Sujith Shivaprakash
Sujith Shivaprakash

Reputation: 161

Cannot instantiate Document error Lucene 6.2.0 API

I have started to use Lucene 6.2.0 in Java from the past couple of days and trying to parse a .trectext file containing tags. I believe I used the right parser to parse the tags, but however I have come across this weird error while creating a new Lucene Document which says Cannot instantiate the type Document. I kindly request that somebody help me with this, I do not see any suggestions regarding this over the internet nor does eclipse suggest any corrections. Not sure where I am going wrong. I have pasted the code below. `

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.document.Field;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import java.io.Reader;
import java.util.Iterator;

public class Indexing {
public static void main(String argv[]) {
    String[] tags={"DOCNO","HEAD","BYLINE","DATELINE","TEXT"};
    try {
    String indexPath="C:\\Users\\sujit\\Desktop\\lucene_indexed"; //Path to create the Lucene Document Index.
    File fXmlFile = new File("C:\\Users\\sujit\\Desktop\\sample.txt"); //Path to find the document to be indexed.

    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
    Document doc = dBuilder.parse(fXmlFile);
    doc.getDocumentElement().normalize();

    Analyzer analyzer=new StandardAnalyzer();
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    IndexWriter writer=new IndexWriter(dir,iwc);
    String DocNo="" ,Head="",ByLine="",DateLine="",Text="";

    //System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
    NodeList nList = doc.getElementsByTagName("DOC");
    //System.out.println("----------------------------");
    for (int temp = 0; temp < nList.getLength(); temp++) {
        //**Place where I see the error **
        Document luceneDoc=new Document();

        Node nNode = nList.item(temp);
        System.out.println("\nCurrent Element :" + nNode.getNodeName());
        if (nNode.getNodeType() == Node.ELEMENT_NODE) {
            Element eElement = (Element) nNode;
            for(int tagNo=0;tagNo<tags.length;tagNo++){

                for(int j=0;j<eElement.getElementsByTagName(tags[tagNo]).getLength();j++){
                    if(tags[tagNo]==tags[0])
                       DocNo+=eElement.getElementsByTagName(tags[tagNo]).item(j).getTextContent();
                    else if(tags[tagNo]==tags[1])
                        Head+=eElement.getElementsByTagName(tags[tagNo]).item(j).getTextContent();
                    else if(tags[tagNo]==tags[2])
                        ByLine+=eElement.getElementsByTagName(tags[tagNo]).item(j).getTextContent();
                    else if(tags[tagNo]==tags[3])
                        DateLine+=eElement.getElementsByTagName(tags[tagNo]).item(j).getTextContent();
                    else if(tags[tagNo]==tags[4])
                        Text+=eElement.getElementsByTagName(tags[tagNo]).item(j).getTextContent();
                }
                System.out.println(DocNo+Head+ByLine+DateLine+Text+"\n");
                luceneDoc.add(new StringField("DOCNO",DocNo,Field.Store.YES));
                luceneDoc.add(new StringField("HEAD",Head,Field.Store.YES));
                luceneDoc.add(new StringField("BYLINE",ByLine,Field.Store.YES));
                luceneDoc.add(new StringField("DATELINE",DateLine,Field.Store.YES));
                luceneDoc.add(new StringField("TEXT",Text,Field.Store.YES));
                writer.addDocument(luceneDoc);
                DocNo="";Head="";ByLine="";DateLine="";Text="";
            }
            writer.close();
        }
    }
    } catch (Exception e) {
    e.printStackTrace();
    }
  }

}

Upvotes: 1

Views: 419

Answers (1)

femtoRgon
femtoRgon

Reputation: 33351

You are importing org.w3c.dom.Document, instead of org.apache.lucene.document.Document. Since you appear to need both, you can just refer to one of them with it's full path, rather than importing it.

Upvotes: 1

Related Questions