|
a/src/query/plaintorich.cpp |
|
b/src/query/plaintorich.cpp |
|
... |
|
... |
325 |
sterms += "GROUP: ";
|
325 |
sterms += "GROUP: ";
|
326 |
sterms += vecStringToString(*vit);
|
326 |
sterms += vecStringToString(*vit);
|
327 |
sterms += "\n";
|
327 |
sterms += "\n";
|
328 |
}
|
328 |
}
|
329 |
LOGDEB0((" %s", sterms.c_str()));
|
329 |
LOGDEB0((" %s", sterms.c_str()));
|
|
|
330 |
LOGDEB2((" TEXT:[%s]\n", in.c_str()));
|
330 |
}
|
331 |
}
|
331 |
|
332 |
|
332 |
// Compute the positions for the query terms. We use the text
|
333 |
// Compute the positions for the query terms. We use the text
|
333 |
// splitter to break the text into words, and compare the words to
|
334 |
// splitter to break the text into words, and compare the words to
|
334 |
// the search terms,
|
335 |
// the search terms,
|
|
... |
|
... |
362 |
}
|
363 |
}
|
363 |
#endif
|
364 |
#endif
|
364 |
|
365 |
|
365 |
// Input character iterator
|
366 |
// Input character iterator
|
366 |
Utf8Iter chariter(in);
|
367 |
Utf8Iter chariter(in);
|
|
|
368 |
|
367 |
// State variable used to limit the number of consecutive empty lines
|
369 |
// State variable used to limit the number of consecutive empty lines,
|
|
|
370 |
// and convert all eol to '\n'
|
368 |
int ateol = 0;
|
371 |
int eol = 0;
|
|
|
372 |
int hadcr = 0;
|
369 |
|
373 |
|
370 |
// Value for numbered anchors at each term match
|
374 |
// Value for numbered anchors at each term match
|
371 |
int anchoridx = 1;
|
375 |
int anchoridx = 1;
|
372 |
// html state
|
376 |
// HTML state
|
373 |
bool intag = false, inparamvalue = false;
|
377 |
bool intag = false, inparamvalue = false;
|
|
|
378 |
// My tag state
|
|
|
379 |
int inrcltag = 0;
|
|
|
380 |
|
374 |
unsigned int headend = 0;
|
381 |
unsigned int headend = 0;
|
375 |
if (m_inputhtml) {
|
382 |
if (m_inputhtml) {
|
376 |
headend = in.find("</head>");
|
383 |
headend = in.find("</head>");
|
377 |
if (headend == string::npos)
|
384 |
if (headend == string::npos)
|
378 |
headend = in.find("</HEAD>");
|
385 |
headend = in.find("</HEAD>");
|
379 |
if (headend != string::npos)
|
386 |
if (headend != string::npos)
|
380 |
headend += 7;
|
387 |
headend += 7;
|
381 |
}
|
388 |
}
|
|
|
389 |
|
382 |
for (string::size_type pos = 0; pos != string::npos; pos = chariter++) {
|
390 |
for (string::size_type pos = 0; pos != string::npos; pos = chariter++) {
|
383 |
// Check from time to time if we need to stop
|
391 |
// Check from time to time if we need to stop
|
384 |
if ((pos & 0xfff) == 0) {
|
392 |
if ((pos & 0xfff) == 0) {
|
385 |
CancelCheck::instance().checkCancel();
|
393 |
CancelCheck::instance().checkCancel();
|
386 |
}
|
394 |
}
|
|
... |
|
... |
393 |
if (!intag && ibyteidx > (int)headend) {
|
401 |
if (!intag && ibyteidx > (int)headend) {
|
394 |
*olit += startAnchor(anchoridx);
|
402 |
*olit += startAnchor(anchoridx);
|
395 |
*olit += startMatch();
|
403 |
*olit += startMatch();
|
396 |
}
|
404 |
}
|
397 |
anchoridx++;
|
405 |
anchoridx++;
|
|
|
406 |
inrcltag = 1;
|
398 |
} else if (ibyteidx == tPosIt->second) {
|
407 |
} else if (ibyteidx == tPosIt->second) {
|
399 |
// Output end or match region tags
|
408 |
// Output end of match region tags
|
400 |
if (!intag && ibyteidx > (int)headend) {
|
409 |
if (!intag && ibyteidx > (int)headend) {
|
401 |
*olit += endMatch();
|
410 |
*olit += endMatch();
|
402 |
*olit += endAnchor();
|
411 |
*olit += endAnchor();
|
403 |
}
|
412 |
}
|
404 |
// Skip all highlight areas that would overlap this one
|
413 |
// Skip all highlight areas that would overlap this one
|
405 |
int crend = tPosIt->second;
|
414 |
int crend = tPosIt->second;
|
406 |
while (tPosIt != cb.tboffs.end() && tPosIt->first < crend)
|
415 |
while (tPosIt != cb.tboffs.end() && tPosIt->first < crend)
|
407 |
tPosIt++;
|
416 |
tPosIt++;
|
408 |
|
417 |
inrcltag = 0;
|
409 |
// Maybe end this chunk, begin next. Don't do it on html
|
|
|
410 |
// there is just no way to do it right (qtextedit cant grok
|
|
|
411 |
// chunks cut in the middle of <a></a> for example).
|
|
|
412 |
if (!m_inputhtml && olit->size() > (unsigned int)chunksize) {
|
|
|
413 |
out.push_back("");
|
|
|
414 |
olit++;
|
|
|
415 |
}
|
|
|
416 |
}
|
418 |
}
|
417 |
}
|
419 |
}
|
|
|
420 |
|
|
|
421 |
unsigned int car = *chariter;
|
418 |
|
422 |
|
419 |
if (m_inputhtml) {
|
423 |
if (car == '\n') {
|
|
|
424 |
if (!hadcr)
|
|
|
425 |
eol++;
|
|
|
426 |
hadcr = 0;
|
|
|
427 |
continue;
|
|
|
428 |
} else if (car == '\r') {
|
|
|
429 |
hadcr++;
|
|
|
430 |
eol++;
|
|
|
431 |
continue;
|
|
|
432 |
} else if (eol) {
|
|
|
433 |
// Do line break;
|
|
|
434 |
hadcr = 0;
|
|
|
435 |
if (eol > 2)
|
|
|
436 |
eol = 2;
|
|
|
437 |
while (eol) {
|
|
|
438 |
*olit += "\n";
|
|
|
439 |
eol--;
|
|
|
440 |
}
|
|
|
441 |
// Maybe end this chunk, begin next. Don't do it on html
|
|
|
442 |
// there is just no way to do it right (qtextedit cant grok
|
|
|
443 |
// chunks cut in the middle of <a></a> for example).
|
|
|
444 |
if (!m_inputhtml && !inrcltag &&
|
|
|
445 |
olit->size() > (unsigned int)chunksize) {
|
|
|
446 |
out.push_back(string(startChunk()));
|
|
|
447 |
olit++;
|
|
|
448 |
}
|
|
|
449 |
}
|
|
|
450 |
|
420 |
switch (*chariter) {
|
451 |
switch (car) {
|
421 |
case '<':
|
452 |
case '<':
|
422 |
if (!inparamvalue)
|
453 |
if (m_inputhtml) {
|
423 |
intag = true;
|
454 |
if (!inparamvalue)
|
424 |
break;
|
455 |
intag = true;
|
|
|
456 |
chariter.appendchartostring(*olit);
|
|
|
457 |
} else {
|
|
|
458 |
*olit += "<";
|
|
|
459 |
}
|
|
|
460 |
break;
|
425 |
case '>':
|
461 |
case '>':
|
426 |
if (!inparamvalue)
|
462 |
if (m_inputhtml) {
|
427 |
intag = false;
|
463 |
if (!inparamvalue)
|
428 |
break;
|
464 |
intag = false;
|
|
|
465 |
}
|
|
|
466 |
chariter.appendchartostring(*olit);
|
|
|
467 |
break;
|
|
|
468 |
case '&':
|
|
|
469 |
if (m_inputhtml) {
|
|
|
470 |
chariter.appendchartostring(*olit);
|
|
|
471 |
} else {
|
|
|
472 |
*olit += "&";
|
|
|
473 |
}
|
|
|
474 |
break;
|
429 |
case '"':
|
475 |
case '"':
|
430 |
if (intag) {
|
476 |
if (m_inputhtml && intag) {
|
431 |
inparamvalue = !inparamvalue;
|
477 |
inparamvalue = !inparamvalue;
|
432 |
}
|
478 |
}
|
433 |
break;
|
|
|
434 |
}
|
|
|
435 |
chariter.appendchartostring(*olit);
|
479 |
chariter.appendchartostring(*olit);
|
436 |
} else switch (*chariter) {
|
480 |
break;
|
437 |
case '\n':
|
|
|
438 |
if (ateol < 2) {
|
|
|
439 |
*olit += "<br>\n";
|
|
|
440 |
ateol++;
|
|
|
441 |
}
|
|
|
442 |
break;
|
|
|
443 |
case '\r':
|
|
|
444 |
break;
|
|
|
445 |
case '<':
|
|
|
446 |
ateol = 0;
|
|
|
447 |
*olit += "<";
|
|
|
448 |
break;
|
|
|
449 |
case '&':
|
|
|
450 |
ateol = 0;
|
|
|
451 |
*olit += "&";
|
|
|
452 |
break;
|
|
|
453 |
default:
|
481 |
default:
|
454 |
// We don't change the eol status for whitespace, want
|
|
|
455 |
// a real line
|
|
|
456 |
if (!(*chariter == ' ' || *chariter == '\t')) {
|
|
|
457 |
ateol = 0;
|
|
|
458 |
}
|
|
|
459 |
chariter.appendchartostring(*olit);
|
482 |
chariter.appendchartostring(*olit);
|
460 |
}
|
|
|
461 |
}
|
483 |
}
|
|
|
484 |
|
|
|
485 |
} // End chariter loop
|
|
|
486 |
|
462 |
#if 0
|
487 |
#if 0
|
463 |
{
|
488 |
{
|
464 |
FILE *fp = fopen("/tmp/debugplaintorich", "a");
|
489 |
FILE *fp = fopen("/tmp/debugplaintorich", "a");
|
465 |
fprintf(fp, "BEGINOFPLAINTORICHOUTPUT\n");
|
490 |
fprintf(fp, "BEGINOFPLAINTORICHOUTPUT\n");
|
466 |
for (list<string>::iterator it = out.begin();
|
491 |
for (list<string>::iterator it = out.begin();
|