user1019042
user1019042

Reputation: 2218

Upgrade to Lucene.Net 4.8 has slowed down search

I'm doing index search to return the lastnames of passengers from my Lucene Index files in my machine. I'm upgrading my Lucene.Net from 3.0 to 4.8. I followed the migration documentations and converted my code (both old and new code are below) and were able to come up with the correct results, but noticing that the returning of results is now slower than previous code. The search is looking at 47K records and returning only 185 of matched results. The slowing down is for example:

returning 1st set of 25 results slowed down from 140ms to 396ms
returning 2nd set of 25 results slowed down from 216ms to 380ms
and so on...

This is very similar code that calls the searcher code in 3.0 and 4.8. I only changed minimally the parts that are mandatory:

var searchFields = new search[4];
searchFields[0] = "Id";
searchFields[1] = "FirstName";
searchFields[2] = "LastName";
searchFields[3] = "Tag";

var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
var parser = new MultiFieldQueryParser(LuceneVersion.LUCENE_48, searchFields, analyzer)
{
    AllowLeadingWildcard = true,
    DefaultOperator = QueryParserBase.AND_OPERATOR
};

var searchCriteria = "Jones"
var query = parser.Parse(searchCriteria);

var directory = FSDirectory.Open(new DirectoryInfo(indexDirectory));
DirectoryReader di = DirectoryReader.Open(directory);
var searcher = new IndexSearcher(di);   

var start = DateTime.Now; // --> Start Timer
var filter = new QueryWrapperFilter(query);
var topDocs = searcher.Search(query, filter, (1 * 25), sort);
var stop = DateTime.Now;  // --> End Timer

Most of the conversions tooke place in this file.

/*******************************************/
//**OLD CODE WITH 3.0 **
//*******************************************/
internal class TagComparator : FieldComparator
{
    private string[] values;
    private string[] currentReaderValues;
    private string field;
    private string bottom; 
    private bool reversed;

    public TagComparator(int numHits, string field, bool reversed)
    {
        values = new string[numHits];
        this.field = field;
        this.reversed = reversed;
    }

    public override int Compare(int slot1, int slot2)
    {
        string v1 = values[slot1];
        string v2 = values[slot2];

        return DoCompare(v1, v2);
    }

    public override int CompareBottom(int doc)
    {
        string v2 = currentReaderValues[doc];

        return DoCompare(bottom, v2);
    }

    private int DoCompare(string v1, string v2)
    {
        if (string.IsNullOrEmpty(v1))
        {
            if (string.IsNullOrEmpty(v2))
            {
                return 0;
            }

            return reversed ? -1 : 1;
        }

        if (string.IsNullOrEmpty(v2))
        {
            return reversed ? 1 : -1;
        }

        return v1.CompareTo(v2);
    }

    public override void Copy(int slot, int doc)
    {
        values[slot] = currentReaderValues[doc];
    }

    public override void SetNextReader(IndexReader reader, int docBase)
    {
        currentReaderValues = FieldCache_Fields.DEFAULT.GetStrings(reader, field);
    }

    public override void SetBottom(int bottom)
    {
        this.bottom = values[bottom];
    }

    public override IComparable this[int slot] => values[slot];
}

/*******************************************/
//** NEW CODE WITH 4.8 **
/*******************************************/
internal class TagComparator : FieldComparer<BytesRef>
{
    protected readonly ILog Log;
    private readonly BytesRef[] bvalues;
    private readonly string field;
    private readonly bool reversed;
    private BytesRef termCopy = new BytesRef();
    private SortedDocValues sortedResults;
    private int bottomSlot;

    public override BytesRef this[int slot] => bvalues[slot];

    public TagComparator(int numHits, string field, bool reversed)
    {
        bvalues = new BytesRef[numHits];
        this.field = field;
        this.reversed = reversed;
    }

    public override int Compare(int slot1, int slot2)
    {
        var result1 = DoCompare(bvalues[slot1], bvalues[slot2]);
        return result1;
    }

    private int DoCompare(BytesRef v1, BytesRef v2)
    {
        if (v1.Length == 0)
        {
            if (v2.Length == 0)
            {
                return 0;
            }

            return reversed ? -1 : 1;
        }

        if (v2.Length == 0)
        {
            return reversed ? 1 : -1;
        }

        if (v1.CompareTo(v2) > 0)
            return 1;
        else
            return -1;
    }

    public override void Copy(int slot, int doc)
    {
        termCopy = new BytesRef();
        sortedResults.Get(doc, termCopy);
        bvalues[slot] = termCopy;
    }

    public override int CompareBottom(int doc)
    {
        BytesRef termOrd = new BytesRef();
        int ord = sortedResults.GetOrd(doc);
        sortedResults.LookupOrd(ord, termOrd);
        var result = DoCompare(bvalues[bottomSlot], termOrd);

        return result;
    }

    public override void SetBottom(int bottom)
    {
        bottomSlot = bottom;
    }

    public override FieldComparer SetNextReader(AtomicReaderContext context)
    {
        sortedResults = FieldCache.DEFAULT.GetTermsIndex(context.AtomicReader, field);
        return this;
    }

    public override int CompareTop(int doc)
    {
        throw new NotImplementedException();
    }

    public override void SetTopValue(BytesRef value)
    {
        throw new NotImplementedException();
    }
}

I don't know which change that is causing the longer processing time. There are new overrides that I didn't implement but don't think I need them. There are nothing in the Lucene.net documentation that is talking about speed. They are merely class, constructor and method dry definition, so they didn't help. Do you see anything I did here wrong that is causing the longer execution than before?

UPDATE #1 Please note that none of the FieldComparer unimplemented methods are being executed. The implemented methods are executing in this order: SetNextReader --> Copy --> Compare The new code currently is like the old one when it compare results by comparing the string to sort them alpahabetically.

UPDATE #2 The new code for 4.8 is updated and switched the expensive comparison of string to using the bytesRefs according to comments. Please see my last attemp and provide suggestion on how to speed it up via code.

Upvotes: 0

Views: 154

Answers (0)

Related Questions