Reputation: 35
I started use Lucene several days ago, but in debug my solution I found one trouble in Lucene. For try solve this problem I create new custom project and started test different solutions, but after 2 days of searching solution with Lucene I surrender...
My problem :
I create one custom class, create standard array of this class. Create Document objects and index it by IndexWriter. Its all works fine. Search working great. But when I try to update any document by using IndexWriter.UpdateDocument and say it update document with, for example, index '5', its create new document with id = 5. And finally I have 2 documents with id=5, and old, and new. If in constructor of IndexWriter 'true' in replace id, so when I update it same code its save only 1 updated document, and remove all indexed before. Exactly, I can't update all base all time, because my base is large (about 600 internet resources on my constructor), I need update only changed data (replace it new), and save indexed before. May be some one know what I doing wrong?
P.S. Sorry for my english.
class mydoc
{
public string id;
public string name;
public string content;
public mydoc(string ID, string Name, string Content)
{
id = ID;
name = Name;
this.content = Content;
}
}
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Create data array...");
mydoc[] docs = new mydoc[11];
docs[0] = new mydoc("0", "Name0", "tet 5");
docs[1] = new mydoc("1", "Name1", "aaaa text");
docs[2] = new mydoc("2", "Name2", "and me test ");
docs[3] = new mydoc("3", "Name3", "I am new tes 3");
docs[4] = new mydoc("4", "Name4", "I am new tes 4");
docs[5] = new mydoc("5", "Name5", "I am new test 5");
docs[6] = new mydoc("6", "Name6", "I am new text 6");
docs[7] = new mydoc("7", "Name7", "I am new text 7");
docs[8] = new mydoc("8", "Name8", "I am new text 8");
docs[9] = new mydoc("9", "Name9", "I am new text 9");
docs[10] = new mydoc("10", "Name10", "I am new test 10");
Console.WriteLine("index processing...");
var dir = new DirectoryInfo("tmp");
FSDirectory fsdir = FSDirectory.Open(dir);
Analyzer analyzer = new StandardAnalyzer(Net.Util.Version.LUCENE_29);
IndexWriter writer = new IndexWriter(fsdir , analyzer,true, IndexWriter.MaxFieldLength.UNLIMITED);
for (int i = 0; i < docs.Length; i++)
{
writer.AddDocument(Convert(docs[i]));
}
writer.Optimize(true);
writer.Close(true);
Console.WriteLine("index done !");
IndexReader reader = IndexReader.Open(fsdir, true);
for (int i = 0; i < reader.MaxDoc;i++)
{
Document doc = reader.Document(i);
Console.WriteLine("id = \"{0}\", Name = \"{1}\", Context = \"{2}\"", doc.Get("ID"),doc.Get("Name"),doc.Get("Content"));
}
reader.Close();
// Update custom base
IndexWriter updater = new IndexWriter(fsdir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
updater.UpdateDocument(new Term("0"), Convert(new mydoc("0", "New name 0", "prosto obitr test")), new StandardAnalyzer(Net.Util.Version.LUCENE_30));
updater.UpdateDocument(new Term("1"), Convert(new mydoc("1", "New name 1", "prosto obitr test")),new StandardAnalyzer(Net.Util.Version.LUCENE_30));
updater.UpdateDocument(new Term("2"), Convert(new mydoc("2", "New name 2", "prosto obitr test")), new StandardAnalyzer(Net.Util.Version.LUCENE_30));
updater.UpdateDocument(new Term("3"), Convert(new mydoc("3", "New name 3", "prosto obitr test")), new StandardAnalyzer(Net.Util.Version.LUCENE_30));
updater.UpdateDocument(new Term("4"), Convert(new mydoc("4", "New name 4", "prosto obitr test")), new StandardAnalyzer(Net.Util.Version.LUCENE_30));
updater.UpdateDocument(new Term("5"), Convert(new mydoc("5", "New name 5", "prosto obitr test")), new StandardAnalyzer(Net.Util.Version.LUCENE_30));
updater.UpdateDocument(new Term("6"), Convert(new mydoc("6", "New name 6", "prosto obitr test")), new StandardAnalyzer(Net.Util.Version.LUCENE_30));
updater.UpdateDocument(new Term("7"), Convert(new mydoc("7", "New name 7", "prosto obitr test")), new StandardAnalyzer(Net.Util.Version.LUCENE_30));
updater.UpdateDocument(new Term("8"), Convert(new mydoc("8", "New name 8", "prosto obitr test")), new StandardAnalyzer(Net.Util.Version.LUCENE_30));
updater.UpdateDocument(new Term("9"), Convert(new mydoc("9", "New name 9", "prosto obitr test")), new StandardAnalyzer(Net.Util.Version.LUCENE_30));
updater.Optimize();
updater.Close(true);
reader = IndexReader.Open(fsdir, true);
Console.WriteLine("New updated data:");
for (int i = 0; i < reader.MaxDoc; i++)
{
Document doc = reader.Document(i);
Console.WriteLine("id = \"{0}\", Name = \"{1}\", Context = \"{2}\"", doc.Get("ID"), doc.Get("Name"), doc.Get("Content"));
}
Console.ReadKey();
Console.WriteLine("search processing...");
string query = "test";
fsdir = FSDirectory.Open(dir);
IndexSearcher searcher = new IndexSearcher(fsdir, true);
Console.WriteLine("Searching phrase \"{0}\"", query);
List<KeyValuePair<int, int>> results = find(query, searcher);
searcher.Close();
fsdir.Close();
Console.WriteLine("Results:");
for (int i = 0; i < results.Count; i++)
{
try
{
// Display founded id
Console.WriteLine(results[i].Value);
}
catch (Exception ex)
{
continue;
}
}
Console.WriteLine("\n\rDone !");
Console.ReadKey();
}
static List<KeyValuePair<int,int>> find(string query, IndexSearcher searcher)
{
var parser = new MultiFieldQueryParser(Net.Util.Version.LUCENE_30, new[] { "Name", "Content" }, new SimpleAnalyzer());
var score = searcher.Search(parser.Parse(query), 99).ScoreDocs;
var docIDs = score.Select(x => new KeyValuePair<int, int>
(
x.Doc, int.Parse(searcher.Doc(x.Doc).Get("ID"))
)
).ToList();
return docIDs;
}
static Document Convert(mydoc doc)
{
var document = new Document();
document.Add(new Field("ID", doc.id, Field.Store.YES, Field.Index.NOT_ANALYZED));
document.Add(new Field("Name", doc.name, Field.Store.YES, Field.Index.ANALYZED));
document.Add(new Field("Content", doc.content, Field.Store.YES, Field.Index.ANALYZED));
return document;
}
}
In this case doc[10] just disapear from index. if in
IndexWriter updater = new IndexWriter(fsdir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
'true' replace to 'false', it create new documents instead replace old.
updater.commit() also did not help.
Upvotes: 1
Views: 3486
Reputation: 35
Problem solved. My fault was in not correct understanding of type Term in:
IndexUpdater.UpdateDocument(Term term, Document doc);
Its need to create new instance of Term like this (in my case):
updater.UpdateDocument(new Term("ID", "5"), Convert(new mydoc("5", "New name 5", "simple new test text")), new StandardAnalyzer(Net.Util.Version.LUCENE_30));
Where in Term constructor field "ID" is my unique field with no index flag and "5" is text of old value field "ID" in old document in index.
Upvotes: 1