Switch to unified view

a/src/common/textsplit.cpp b/src/common/textsplit.cpp
...
...
333
    m_spanpos = m_wordpos;
333
    m_spanpos = m_wordpos;
334
    m_wordStart = 0;
334
    m_wordStart = 0;
335
    m_wordLen = 0;
335
    m_wordLen = 0;
336
}
336
}
337
337
338
static inline bool isalphanum(int what, unsigned int flgs)
339
{
340
    return what == A_LLETTER || what == A_ULETTER ||
341
  what == DIGIT || what == LETTER ||
342
  ((flgs & TextSplit::TXTS_KEEPWILD) && what == WILD);
343
}
344
static inline bool isdigit(int what, unsigned int flgs)
345
{
346
    return what == DIGIT || ((flgs & TextSplit::TXTS_KEEPWILD) && what == WILD);
347
}
348
338
/** 
349
/** 
339
 * Splitting a text into terms to be indexed.
350
 * Splitting a text into terms to be indexed.
340
 * We basically emit a word every time we see a separator, but some chars are
351
 * We basically emit a word every time we see a separator, but some chars are
341
 * handled specially so that special cases, ie, c++ and jfd@recoll.com etc, 
352
 * handled specially so that special cases, ie, c++ and jfd@recoll.com etc, 
342
 * are handled properly,
353
 * are handled properly,
...
...
441
    case '+':
452
    case '+':
442
        curspanglue = cc;
453
        curspanglue = cc;
443
        if (m_wordLen == 0) {
454
        if (m_wordLen == 0) {
444
        // + or - don't start a term except if this looks like
455
        // + or - don't start a term except if this looks like
445
        // it's going to be to be a number
456
        // it's going to be to be a number
446
      if (whatcc(it[it.getCpos()+1]) == DIGIT) {
457
      if (isdigit(whatcc(it[it.getCpos()+1]), m_flags)) {
447
            // -10
458
            // -10
448
            m_inNumber = true;
459
            m_inNumber = true;
449
            m_wordLen += it.appendchartostring(m_span);
460
            m_wordLen += it.appendchartostring(m_span);
450
        } else {
461
        } else {
451
            goto SPACE;
462
            goto SPACE;
452
        } 
463
        } 
453
        } else if (m_inNumber && (m_span[m_span.length() - 1] == 'e' ||
464
        } else if (m_inNumber && (m_span[m_span.length() - 1] == 'e' ||
454
                      m_span[m_span.length() - 1] == 'E')) {
465
                      m_span[m_span.length() - 1] == 'E')) {
455
      if (whatcc(it[it.getCpos()+1]) == DIGIT) {
466
      if (isdigit(whatcc(it[it.getCpos()+1]), m_flags)) {
456
            m_wordLen += it.appendchartostring(m_span);
467
            m_wordLen += it.appendchartostring(m_span);
457
        } else {
468
        } else {
458
            goto SPACE;
469
            goto SPACE;
459
        }
470
        }
460
        } else {
471
        } else {
...
...
466
    {
477
    {
467
        // Need a little lookahead here. At worse this gets the end null
478
        // Need a little lookahead here. At worse this gets the end null
468
        int nextc = it[it.getCpos()+1];
479
        int nextc = it[it.getCpos()+1];
469
        int nextwhat = whatcc(nextc);
480
        int nextwhat = whatcc(nextc);
470
        if (m_inNumber) {
481
        if (m_inNumber) {
471
      if (nextwhat != DIGIT)
482
      if (!isdigit(nextwhat, m_flags))
472
            goto SPACE;
483
            goto SPACE;
473
        m_wordLen += it.appendchartostring(m_span);
484
        m_wordLen += it.appendchartostring(m_span);
474
        curspanglue = cc;
485
        curspanglue = cc;
475
        break;
486
        break;
476
        } else {
487
        } else {
...
...
480
                // Another problem is that something like .x-errs 
491
                // Another problem is that something like .x-errs 
481
        // will be split as .x-errs, x, errs but not x-errs
492
        // will be split as .x-errs, x, errs but not x-errs
482
        // A final comma in a word will be removed by doemit
493
        // A final comma in a word will be removed by doemit
483
494
484
        // Only letters and digits make sense after
495
        // Only letters and digits make sense after
485
      if (nextwhat != A_LLETTER && nextwhat != A_ULETTER && 
496
      if (!isalphanum(nextwhat, m_flags))
486
          nextwhat != DIGIT && nextwhat != LETTER)
487
            goto SPACE;
497
            goto SPACE;
488
498
489
        if (cc == '.') {
499
        if (cc == '.') {
490
                    // Check for number like .1
500
                    // Check for number like .1
491
                    if (m_span.length() == 0 && nextwhat == DIGIT) {
501
                    if (m_span.length() == 0 && isdigit(nextwhat, m_flags)) {
492
                        m_inNumber = true;
502
                        m_inNumber = true;
493
                        m_wordLen += it.appendchartostring(m_span);
503
                        m_wordLen += it.appendchartostring(m_span);
494
                        curspanglue = cc;
504
                        curspanglue = cc;
495
                        break;
505
                        break;
496
                    }
506
                    }