Switch to unified view

a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
...
...
171
    }
171
    }
172
    return true;
172
    return true;
173
}
173
}
174
174
175
// Remove prefixes (caps) from a list of terms.
175
// Remove prefixes (caps) from a list of terms.
176
static list<string> noPrefixList(const list<string>& in) 
176
static void noPrefixList(const list<string>& in, list<string>& out) 
177
{
177
{
178
    list<string> out;
179
    for (list<string>::const_iterator qit = in.begin(); 
178
    for (list<string>::const_iterator qit = in.begin(); 
180
     qit != in.end(); qit++) {
179
     qit != in.end(); qit++) {
181
    if ('A' <= qit->at(0) && qit->at(0) <= 'Z') {
180
    if ('A' <= qit->at(0) && qit->at(0) <= 'Z') {
182
        string term = *qit;
181
        string term = *qit;
183
        while (term.length() && 'A' <= term.at(0) && term.at(0) <= 'Z')
182
        while (term.length() && 'A' <= term.at(0) && term.at(0) <= 'Z')
...
...
187
        continue;
186
        continue;
188
    } else {
187
    } else {
189
        out.push_back(*qit);
188
        out.push_back(*qit);
190
    }
189
    }
191
    }
190
    }
192
    return out;
193
}
191
}
194
192
195
//#define DEBUGABSTRACT  1
193
//#define DEBUGABSTRACT  1
196
#ifdef DEBUGABSTRACT
194
#ifdef DEBUGABSTRACT
197
#define LOGABS LOGDEB
195
#define LOGABS LOGDEB
198
#else
196
#else
199
#define LOGABS LOGDEB2
197
#define LOGABS LOGDEB2
200
#endif
198
#endif
199
static void listList(const string& what, const list<string>&l)
200
{
201
    string a;
202
    for (list<string>::const_iterator it = l.begin(); it != l.end(); it++) {
203
        a = a + *it + " ";
204
    }
205
    LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
206
}
201
207
202
// Build a document abstract by extracting text chunks around the query terms
208
// Build a document abstract by extracting text chunks around the query terms
203
// This uses the db termlists, not the original document.
209
// This uses the db termlists, not the original document.
204
//
210
//
205
// DatabaseModified and other general exceptions are catched and
211
// DatabaseModified and other general exceptions are catched and
...
...
208
{
214
{
209
    Chrono chron;
215
    Chrono chron;
210
    LOGDEB(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
216
    LOGDEB(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
211
         m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen));
217
         m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen));
212
218
213
    list<string> iterms;
219
    list<string> terms;
214
    query->getQueryTerms(iterms);
215
220
216
    list<string> terms = noPrefixList(iterms);
221
    {
222
        list<string> iterms;
223
        query->getMatchTerms(docid, iterms);
224
        noPrefixList(iterms, terms);
217
    if (terms.empty()) {
225
        if (terms.empty()) {
218
  return string();
226
            LOGDEB(("makeAbstract::Empty term list\n"));
227
            return string();
219
    }
228
        }
229
    }
230
//    listList("Match terms: ", terms);
220
231
221
    // Retrieve db-wide frequencies for the query terms
232
    // Retrieve db-wide frequencies for the query terms (we do this once per
233
    // query, using all the query terms, not only the document match terms)
222
    if (query->m_nq->termfreqs.empty()) {
234
    if (query->m_nq->termfreqs.empty()) {
235
        list<string> iqterms, qterms;
236
        query->getQueryTerms(iqterms);
237
        noPrefixList(iqterms, qterms);
238
//        listList("Query terms: ", qterms);
223
    double doccnt = xrdb.get_doccount();
239
    double doccnt = xrdb.get_doccount();
224
    if (doccnt == 0) doccnt = 1;
240
    if (doccnt == 0) doccnt = 1;
225
    for (list<string>::const_iterator qit = terms.begin(); 
241
    for (list<string>::const_iterator qit = qterms.begin(); 
226
         qit != terms.end(); qit++) {
242
         qit != qterms.end(); qit++) {
227
        query->m_nq->termfreqs[*qit] = xrdb.get_termfreq(*qit) / doccnt;
243
        query->m_nq->termfreqs[*qit] = xrdb.get_termfreq(*qit) / doccnt;
228
        LOGABS(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(), 
244
        LOGDEB(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(), 
229
            query->m_nq->termfreqs[*qit]));
245
            query->m_nq->termfreqs[*qit]));
230
    }
246
    }
231
    LOGABS(("makeAbstract:%d: got termfreqs\n", chron.ms()));
247
    LOGABS(("makeAbstract:%d: got termfreqs\n", chron.ms()));
232
    }
248
    }
233
249
...
...
448
        LOGDEB(("makeAbstract:vec[%d]: [%s]\n", ipos, it->c_str()));
464
        LOGDEB(("makeAbstract:vec[%d]: [%s]\n", ipos, it->c_str()));
449
    }
465
    }
450
    }
466
    }
451
#endif
467
#endif
452
468
453
    LOGDEB(("makeAbstract:%d: extracting\n", chron.millis()));
469
    LOGABS(("makeAbstract:%d: extracting\n", chron.millis()));
454
470
455
    // Finally build the abstract by walking the map (in order of position)
471
    // Finally build the abstract by walking the map (in order of position)
456
    string abstract;
472
    string abstract;
457
    abstract.reserve(sparseDoc.size() * 10);
473
    abstract.reserve(sparseDoc.size() * 10);
458
    bool incjk = false;
474
    bool incjk = false;