Switch to unified view

a/src/common/textsplit.cpp b/src/common/textsplit.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.23 2006-09-21 05:59:02 dockes Exp $ (C) 2004 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.24 2006-11-12 08:35:11 dockes Exp $ (C) 2004 J.F.Dockes";
3
#endif
3
#endif
4
/*
4
/*
5
 *   This program is free software; you can redistribute it and/or modify
5
 *   This program is free software; you can redistribute it and/or modify
6
 *   it under the terms of the GNU General Public License as published by
6
 *   it under the terms of the GNU General Public License as published by
7
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   the Free Software Foundation; either version 2 of the License, or
...
...
142
    span << "' sp: " << spanpos << " spe: " << spanerase << " bp: " << bp 
142
    span << "' sp: " << spanpos << " spe: " << spanerase << " bp: " << bp 
143
     << endl;
143
     << endl;
144
#endif
144
#endif
145
145
146
    // Emit span. When splitting for query, we only emit final spans
146
    // Emit span. When splitting for query, we only emit final spans
147
    if (spanerase) {
147
    bool spanemitted = false;
148
    if (spanerase && !(m_flags & TXTS_NOSPANS)) {
148
    // Maybe trim at end These are chars that we would keep inside 
149
    // Maybe trim at end These are chars that we would keep inside 
149
    // a span, but not at the end
150
    // a span, but not at the end
150
    while (span.length() > 0) {
151
    while (span.length() > 0) {
151
        switch (span[span.length()-1]) {
152
        switch (span[span.length()-1]) {
152
        case '.':
153
        case '.':
...
...
160
        default:
161
        default:
161
        goto breakloop1;
162
        goto breakloop1;
162
        }
163
        }
163
    }
164
    }
164
    breakloop1:
165
    breakloop1:
166
  spanemitted = true;
165
    if (!emitterm(true, span, spanpos, bp-span.length(), bp))
167
    if (!emitterm(true, span, spanpos, bp-span.length(), bp))
166
        return false;
168
        return false;
167
    }
169
    }
168
170
169
171
170
    // Emit word if different from span and not query mode
172
    // Emit word if different from span and not 'no words' mode
173
    if (!(m_flags & TXTS_ONLYSPANS) && 
171
    if (!fq && (!spanerase || (word.length() != span.length())))
174
  (!spanemitted || word.length() != span.length()))
172
    if (!emitterm(false, word, wordpos, bp-word.length(), bp))
175
    if (!emitterm(false, word, wordpos, bp-word.length(), bp))
173
        return false;
176
        return false;
174
177
175
    // Adjust state
178
    // Adjust state
176
    wordpos++;
179
    wordpos++;
...
...
402
405
403
static string thisprog;
406
static string thisprog;
404
407
405
static string usage =
408
static string usage =
406
    " textsplit [opts] [filename]\n"
409
    " textsplit [opts] [filename]\n"
407
    "   -q: query mode\n"
410
    "   -s:  only spans\n"
411
    "   -w:  only words\n"
408
    " if filename is 'stdin', will read stdin for data (end with ^D)\n"
412
    " if filename is 'stdin', will read stdin for data (end with ^D)\n"
409
    "  \n\n"
413
    "  \n\n"
410
    ;
414
    ;
411
415
412
static void
416
static void
...
...
415
    cerr << thisprog  << ": usage:\n" << usage;
419
    cerr << thisprog  << ": usage:\n" << usage;
416
    exit(1);
420
    exit(1);
417
}
421
}
418
422
419
static int        op_flags;
423
static int        op_flags;
420
#define OPT_q      0x1 
424
#define OPT_s      0x1 
425
#define OPT_w   0x2
421
426
422
int main(int argc, char **argv)
427
int main(int argc, char **argv)
423
{
428
{
424
    thisprog = argv[0];
429
    thisprog = argv[0];
425
    argc--; argv++;
430
    argc--; argv++;
...
...
429
    if (!(**argv))
434
    if (!(**argv))
430
        /* Cas du "adb - core" */
435
        /* Cas du "adb - core" */
431
        Usage();
436
        Usage();
432
    while (**argv)
437
    while (**argv)
433
        switch (*(*argv)++) {
438
        switch (*(*argv)++) {
434
        case 'q':    op_flags |= OPT_q; break;
439
        case 's':    op_flags |= OPT_s; break;
440
      case 'w':   op_flags |= OPT_w; break;
435
        default: Usage();   break;
441
        default: Usage();   break;
436
        }
442
        }
437
    argc--; argv++;
443
    argc--; argv++;
438
    }
444
    }
439
    DebugLog::getdbl()->setloglevel(DEBDEB1);
445
    DebugLog::getdbl()->setloglevel(DEBDEB1);
440
    DebugLog::setfilename("stderr");
446
    DebugLog::setfilename("stderr");
441
    mySplitterCB cb;
447
    mySplitterCB cb;
442
    TextSplit splitter(&cb, (op_flags&OPT_q) ? true: false);
448
    TextSplit::Flags flags = TextSplit::TXTS_NONE;
449
    if (op_flags&OPT_s)
450
  flags = TextSplit::TXTS_ONLYSPANS;
451
    else if (op_flags&OPT_w)
452
  flags = TextSplit::TXTS_NOSPANS;
453
    TextSplit splitter(&cb,  flags);
443
    if (argc == 1) {
454
    if (argc == 1) {
444
    string data;
455
    string data;
445
    const char *filename = *argv++; argc--;
456
    const char *filename = *argv++; argc--;
446
    if (!strcmp(filename, "stdin")) {
457
    if (!strcmp(filename, "stdin")) {
447
        char buf[1024];
458
        char buf[1024];