summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/clucene/src/CLucene/search/IndexSearcher.cpp
blob: c948cfa4b850532e2f79a84bd48aa7fcfa46679b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
/*
 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
 *
 * Distributable under the terms of either the Apache License (Version 2.0) or 
 * the GNU Lesser General Public License, as specified in the COPYING file.
 *
 * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
*/
#include "CLucene/StdHeader.h"
#include "IndexSearcher.h"

#include "SearchHeader.h"
#include "Scorer.h"
#include "FieldDocSortedHitQueue.h"
#include "CLucene/store/Directory.h"
#include "CLucene/document/Document.h"
#include "CLucene/index/IndexReader.h"
#include "CLucene/index/Term.h"
#include "CLucene/util/BitSet.h"
#include "FieldSortedHitQueue.h"

CL_NS_USE(index)
CL_NS_USE(util)
CL_NS_USE(document)

CL_NS_DEF(search)

class SimpleTopDocsCollector : public HitCollector
{ 
private:
    qreal minScore;
    const CL_NS(util)::BitSet* bits;
    HitQueue* hq;
    size_t nDocs;
    int32_t* totalHits;

public:
    SimpleTopDocsCollector(const CL_NS(util)::BitSet* bs, HitQueue* hitQueue,
        int32_t* totalhits, size_t ndocs, const qreal ms=-1.0f)
        : minScore(ms),
        bits(bs),
        hq(hitQueue),
        nDocs(ndocs),
        totalHits(totalhits) {}
    ~SimpleTopDocsCollector() {}

    void collect(const int32_t doc, const qreal score)
    {
        if (score > 0.0f    // ignore zeroed buckets
            && (bits == NULL || bits->get(doc))) {	  // skip docs not in bits
                ++totalHits[0];
                if (hq->size() < nDocs || (minScore==-1.0f || score >= minScore)) {
                    ScoreDoc sd = {doc, score};
                    hq->insert(sd);	  // update hit queue
                    if ( minScore != -1.0f )
                        minScore = hq->top().score; // maintain minScore
                }
        }
    }
};

class SortedTopDocsCollector : public HitCollector
{ 
private:
    const CL_NS(util)::BitSet* bits;
    FieldSortedHitQueue* hq;
    size_t nDocs;
    int32_t* totalHits;
public:
    SortedTopDocsCollector(const CL_NS(util)::BitSet* bs,
        FieldSortedHitQueue* hitQueue, int32_t* totalhits, size_t _nDocs)
        : bits(bs),
          hq(hitQueue),
          nDocs(_nDocs),
          totalHits(totalhits)
      {
      }
      ~SortedTopDocsCollector() {}
      
      void collect(const int32_t doc, const qreal score)
      {
          if (score > 0.0f &&			  // ignore zeroed buckets
              (bits==NULL || bits->get(doc))) {	  // skip docs not in bits
                  ++totalHits[0];
                  // TODO: see jlucene way... with fields def???
                  FieldDoc* fd = _CLNEW FieldDoc(doc, score);
                  if ( !hq->insert(fd) )	  // update hit queue
                      _CLDELETE(fd);
          }
      }
};

class SimpleFilteredCollector : public HitCollector
{
private:
    CL_NS(util)::BitSet* bits;
    HitCollector* results;
public:
    SimpleFilteredCollector(CL_NS(util)::BitSet* bs, HitCollector* collector)
        : bits(bs),
          results(collector) {}
      ~SimpleFilteredCollector() {}

protected:
    void collect(const int32_t doc, const qreal score)
    {
        // skip docs not in bits
        if (bits->get(doc))	  
            results->collect(doc, score);
    }
};


IndexSearcher::IndexSearcher(const QString& path)
{
    //Func - Constructor
    //       Creates a searcher searching the index in the named directory.
    //Pre  - path != NULL
    //Post - The instance has been created

    CND_PRECONDITION(!path.isEmpty(), "path is NULL");

    reader = IndexReader::open(path);
    readerOwner = true;
}

IndexSearcher::IndexSearcher(CL_NS(store)::Directory* directory)
{
    //Func - Constructor
    //       Creates a searcher searching the index in the specified directory.
    //Pre  - path != NULL
    //Post - The instance has been created

    CND_PRECONDITION(directory != NULL, "directory is NULL");

    reader = IndexReader::open(directory);
    readerOwner = true;
}

IndexSearcher::IndexSearcher(IndexReader* r)
{
    //Func - Constructor
    //       Creates a searcher searching the index with the provide IndexReader
    //Pre  - path != NULL
    //Post - The instance has been created

    reader      = r;
    readerOwner = false;
}

IndexSearcher::~IndexSearcher()
{
    //Func - Destructor
    //Pre  - true
    //Post - The instance has been destroyed

    close();
}

void IndexSearcher::close()
{
    //Func - Frees resources associated with this Searcher.
    //Pre  - true
    //Post - The resources associated have been freed
    if (readerOwner && reader){
        reader->close();
        _CLDELETE(reader);
    }
}

// inherit javadoc
int32_t IndexSearcher::docFreq(const Term* term) const
{
    //Func - 
    //Pre  - reader != NULL
    //Post -

    CND_PRECONDITION(reader != NULL, "reader is NULL");
    return reader->docFreq(term);
}

// inherit javadoc
bool IndexSearcher::doc(int32_t i, CL_NS(document)::Document* d)
{
    //Func - Retrieves i-th document found
    //       For use by HitCollector implementations.
    //Pre  - reader != NULL
    //Post - The i-th document has been returned

    CND_PRECONDITION(reader != NULL, "reader is NULL");
    return reader->document(i,d);
}

// inherit javadoc
int32_t IndexSearcher::maxDoc() const
{
    //Func - Return total number of documents including the ones marked deleted
    //Pre  - reader != NULL
    //Post - The total number of documents including the ones marked deleted 
    //       has been returned

    CND_PRECONDITION(reader != NULL, "reader is NULL");
    return reader->maxDoc();
}

TopDocs* IndexSearcher::_search(Query* query, Filter* filter, const int32_t nDocs)
{
    //Func -
    //Pre  - reader != NULL
    //Post -

    CND_PRECONDITION(reader != NULL, "reader is NULL");
    CND_PRECONDITION(query != NULL, "query is NULL");

    Weight* weight = query->weight(this);
    Scorer* scorer = weight->scorer(reader);
    if (scorer == NULL){
        return _CLNEW TopDocs(0, NULL, 0);
    }

    BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
    HitQueue* hq = _CLNEW HitQueue(nDocs);

    //Check hq has been allocated properly
    CND_CONDITION(hq != NULL, "Could not allocate memory for HitQueue hq");

    int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
    totalHits[0] = 0;

    SimpleTopDocsCollector hitCol(bits,hq,totalHits,nDocs,0.0f);
    scorer->score( &hitCol );
    _CLDELETE(scorer);

    int32_t scoreDocsLength = hq->size();

    ScoreDoc* scoreDocs = _CL_NEWARRAY(ScoreDoc,scoreDocsLength);

    for (int32_t i = scoreDocsLength-1; i >= 0; --i)	  // put docs in array
        scoreDocs[i] = hq->pop();

    int32_t totalHitsInt = totalHits[0];

    _CLDELETE(hq);
    if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
        _CLDELETE(bits);
    _CLDELETE_ARRAY(totalHits);
    Query* wq = weight->getQuery();
    if ( query != wq ) //query was re-written
        _CLLDELETE(wq);
    _CLDELETE(weight);

    return _CLNEW TopDocs(totalHitsInt, scoreDocs, scoreDocsLength);
}

// inherit javadoc
TopFieldDocs* IndexSearcher::_search(Query* query, Filter* filter,
    const int32_t nDocs, const Sort* sort)
{
    CND_PRECONDITION(reader != NULL, "reader is NULL");
    CND_PRECONDITION(query != NULL, "query is NULL");

    Weight* weight = query->weight(this);
    Scorer* scorer = weight->scorer(reader);
    if (scorer == NULL) {
        return _CLNEW TopFieldDocs(0, NULL, 0, NULL );
    }

    BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
    FieldSortedHitQueue hq(reader, sort->getSort(), nDocs);
    int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
    totalHits[0]=0;

    SortedTopDocsCollector hitCol(bits,&hq,totalHits,nDocs);
    scorer->score(&hitCol);
    _CLDELETE(scorer);

    int32_t hqLen = hq.size();
    FieldDoc** fieldDocs = _CL_NEWARRAY(FieldDoc*,hqLen);
    for (int32_t i = hqLen-1; i >= 0; --i){	  // put docs in array
        fieldDocs[i] = hq.fillFields (hq.pop());
    }

    Query* wq = weight->getQuery();
    if ( query != wq ) //query was re-written
        _CLLDELETE(wq);
    _CLDELETE(weight);

    SortField** hqFields = hq.getFields();
    hq.setFields(NULL); //move ownership of memory over to TopFieldDocs
    int32_t totalHits0 = totalHits[0];
    if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
        _CLDELETE(bits);
    _CLDELETE_ARRAY(totalHits);
    return _CLNEW TopFieldDocs(totalHits0, fieldDocs, hqLen, hqFields );
}

void IndexSearcher::_search(Query* query, Filter* filter, HitCollector* results)
{
    //Func - _search an index and fetch the results
    //       Applications should only use this if they need all of the
    //       matching documents.  The high-level search API (search(Query)) 
    //       is usually more efficient, as it skips non-high-scoring hits.
    //Pre  - query is a valid reference to a query filter may or may not be NULL
    //       results is a valid reference to a HitCollector and used to store the results
    //Post - filter if non-NULL, a bitset used to eliminate some documents

    CND_PRECONDITION(reader != NULL, "reader is NULL");
    CND_PRECONDITION(query != NULL, "query is NULL");

    BitSet* bits = NULL;
    SimpleFilteredCollector* fc = NULL; 

    if (filter != NULL){
        bits = filter->bits(reader);
        fc = _CLNEW SimpleFilteredCollector(bits, results);
    }

    Weight* weight = query->weight(this);
    Scorer* scorer = weight->scorer(reader);
    if (scorer != NULL) {
        if (fc == NULL){
            scorer->score(results);
        }else{
            scorer->score((HitCollector*)fc);
        }
        _CLDELETE(scorer); 
    }

    _CLDELETE(fc);
    _CLDELETE(weight);
    if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
        _CLDELETE(bits);
}

Query* IndexSearcher::rewrite(Query* original)
{
    Query* query = original;
    Query* last = original;
    for (Query* rewrittenQuery = query->rewrite(reader); 
        rewrittenQuery != query;
        rewrittenQuery = query->rewrite(reader)) {
            query = rewrittenQuery;
            if ( query != last && last != original) {
                _CLDELETE(last);
            }
            last = query;
    }
    return query;
}

void IndexSearcher::explain(Query* query, int32_t doc, Explanation* ret)
{
    Weight* weight = query->weight(this);
    weight->explain(reader, doc, ret);

    Query* wq = weight->getQuery();
    if ( query != wq ) //query was re-written
        _CLLDELETE(wq);
    _CLDELETE(weight);
}

CL_NS_END