|
a/src/common/textsplit.cpp |
|
b/src/common/textsplit.cpp |
|
... |
|
... |
353 |
m_span.erase();
|
353 |
m_span.erase();
|
354 |
m_inNumber = false;
|
354 |
m_inNumber = false;
|
355 |
m_wordStart = m_wordLen = m_prevpos = m_prevlen = m_wordpos = m_spanpos = 0;
|
355 |
m_wordStart = m_wordLen = m_prevpos = m_prevlen = m_wordpos = m_spanpos = 0;
|
356 |
int curspanglue = 0;
|
356 |
int curspanglue = 0;
|
357 |
bool pagepending = false;
|
357 |
bool pagepending = false;
|
|
|
358 |
bool softhyphenpending = false;
|
358 |
|
359 |
|
359 |
// Running count of non-alphanum chars. Reset when we see one;
|
360 |
// Running count of non-alphanum chars. Reset when we see one;
|
360 |
int nonalnumcnt = 0;
|
361 |
int nonalnumcnt = 0;
|
361 |
|
362 |
|
362 |
Utf8Iter it(in);
|
363 |
Utf8Iter it(in);
|
|
... |
|
... |
391 |
}
|
392 |
}
|
392 |
|
393 |
|
393 |
int cc = whatcc(c);
|
394 |
int cc = whatcc(c);
|
394 |
switch (cc) {
|
395 |
switch (cc) {
|
395 |
case SKIP:
|
396 |
case SKIP:
|
|
|
397 |
// Special-case soft-hyphen. To work, this depends on the
|
|
|
398 |
// fact that only SKIP calls "continue" inside the
|
|
|
399 |
// switch. All the others will do the softhyphenpending
|
|
|
400 |
// reset after the switch
|
|
|
401 |
if (c == 0xad) {
|
|
|
402 |
softhyphenpending = true;
|
|
|
403 |
} else {
|
|
|
404 |
softhyphenpending = false;
|
|
|
405 |
}
|
|
|
406 |
// Skips the softhyphenpending reset
|
396 |
continue;
|
407 |
continue;
|
397 |
case DIGIT:
|
408 |
case DIGIT:
|
398 |
if (m_wordLen == 0)
|
409 |
if (m_wordLen == 0)
|
399 |
m_inNumber = true;
|
410 |
m_inNumber = true;
|
400 |
m_wordLen += it.appendchartostring(m_span);
|
411 |
m_wordLen += it.appendchartostring(m_span);
|
|
... |
|
... |
442 |
m_wordLen += it.appendchartostring(m_span);
|
453 |
m_wordLen += it.appendchartostring(m_span);
|
443 |
} else {
|
454 |
} else {
|
444 |
goto SPACE;
|
455 |
goto SPACE;
|
445 |
}
|
456 |
}
|
446 |
} else {
|
457 |
} else {
|
447 |
if (!doemit(false, it.getBpos()))
|
458 |
goto SPACE;
|
448 |
return false;
|
|
|
449 |
m_inNumber = false;
|
|
|
450 |
m_wordStart += it.appendchartostring(m_span);
|
|
|
451 |
}
|
459 |
}
|
452 |
break;
|
460 |
break;
|
453 |
|
461 |
|
454 |
case '.':
|
462 |
case '.':
|
455 |
case ',':
|
463 |
case ',':
|
|
... |
|
... |
553 |
}
|
561 |
}
|
554 |
goto SPACE;
|
562 |
goto SPACE;
|
555 |
break;
|
563 |
break;
|
556 |
case '\n':
|
564 |
case '\n':
|
557 |
case '\r':
|
565 |
case '\r':
|
558 |
if (m_span.length() && m_span[m_span.length() - 1] == '-') {
|
566 |
if ((m_span.length() && m_span[m_span.length() - 1] == '-') ||
|
|
|
567 |
softhyphenpending) {
|
559 |
// if '-' is the last char before end of line, just
|
568 |
// if '-' is the last char before end of line, just
|
560 |
// ignore the line change. This is the right thing to
|
569 |
// ignore the line change. This is the right thing to
|
561 |
// do almost always. We'd then need a way to check if
|
570 |
// do almost always. We'd then need a way to check if
|
562 |
// the - was added as part of the word hyphenation, or was
|
571 |
// the - was added as part of the word hyphenation, or was
|
563 |
// there in the first place, but this would need a dictionary.
|
572 |
// there in the first place, but this would need a dictionary.
|
564 |
// Also we'd need to check for a soft-hyphen and remove it,
|
573 |
// Don't reset soft-hyphen
|
565 |
// but this would require more utf-8 magic
|
574 |
continue;
|
566 |
} else {
|
575 |
} else {
|
567 |
// Handle like a normal separator
|
576 |
// Handle like a normal separator
|
568 |
goto SPACE;
|
577 |
goto SPACE;
|
569 |
}
|
578 |
}
|
570 |
break;
|
579 |
break;
|
|
... |
|
... |
620 |
}
|
629 |
}
|
621 |
m_wordLen += it.appendchartostring(m_span);
|
630 |
m_wordLen += it.appendchartostring(m_span);
|
622 |
nonalnumcnt = 0;
|
631 |
nonalnumcnt = 0;
|
623 |
break;
|
632 |
break;
|
624 |
}
|
633 |
}
|
|
|
634 |
softhyphenpending = false;
|
625 |
}
|
635 |
}
|
626 |
if (m_wordLen || m_span.length()) {
|
636 |
if (m_wordLen || m_span.length()) {
|
627 |
if (!doemit(true, it.getBpos()))
|
637 |
if (!doemit(true, it.getBpos()))
|
628 |
return false;
|
638 |
return false;
|
629 |
}
|
639 |
}
|
|
... |
|
... |
889 |
"A b C 2 . +"
|
899 |
"A b C 2 . +"
|
890 |
"','this\n"
|
900 |
"','this\n"
|
891 |
" ,able,test-domain "
|
901 |
" ,able,test-domain "
|
892 |
" -wl,--export-dynamic "
|
902 |
" -wl,--export-dynamic "
|
893 |
" ~/.xsession-errors "
|
903 |
" ~/.xsession-errors "
|
|
|
904 |
"soft\xc2\xadhyphen "
|
|
|
905 |
"soft\xc2\xad\nhyphen "
|
|
|
906 |
"soft\xc2\xad\n\rhyphen "
|
|
|
907 |
"hard-\nhyphen "
|
894 |
;
|
908 |
;
|
895 |
|
909 |
|
896 |
static string teststring1 = " nouvel-an ";
|
910 |
static string teststring1 = " nouvel-an ";
|
897 |
|
911 |
|
898 |
static string thisprog;
|
912 |
static string thisprog;
|