|
a/src/utils/smallut.cpp |
|
b/src/utils/smallut.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.15 2006-01-26 12:29:20 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.16 2006-04-11 06:49:45 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
329 |
pos = s.find_last_not_of(ws);
|
329 |
pos = s.find_last_not_of(ws);
|
330 |
if (pos != string::npos && pos != s.length()-1)
|
330 |
if (pos != string::npos && pos != s.length()-1)
|
331 |
s.replace(pos+1, string::npos, "");
|
331 |
s.replace(pos+1, string::npos, "");
|
332 |
}
|
332 |
}
|
333 |
|
333 |
|
|
|
334 |
// Remove some chars and replace them with spaces
|
|
|
335 |
string neutchars(const string &str, string delims)
|
|
|
336 |
{
|
|
|
337 |
string out;
|
|
|
338 |
string::size_type startPos, pos;
|
|
|
339 |
|
|
|
340 |
for (pos = 0;;) {
|
|
|
341 |
// Skip initial delims, break if this eats all.
|
|
|
342 |
if ((startPos = str.find_first_not_of(delims, pos)) == string::npos)
|
|
|
343 |
break;
|
|
|
344 |
// Find next delimiter or end of string (end of token)
|
|
|
345 |
pos = str.find_first_of(delims, startPos);
|
|
|
346 |
// Add token to the output. Note: token cant be empty here
|
|
|
347 |
if (pos == string::npos) {
|
|
|
348 |
out += str.substr(startPos);
|
|
|
349 |
} else {
|
|
|
350 |
out += str.substr(startPos, pos - startPos) + " ";
|
|
|
351 |
}
|
|
|
352 |
}
|
|
|
353 |
return out;
|
|
|
354 |
}
|
|
|
355 |
|
|
|
356 |
|
|
|
357 |
/* Truncate a string to a given maxlength, avoiding cutting off midword
|
|
|
358 |
* if reasonably possible. Note: we could also use textsplit, stopping when
|
|
|
359 |
* we have enough, this would be cleanly utf8-aware but would remove
|
|
|
360 |
* punctuation */
|
|
|
361 |
static const string SEPAR = " \t\n\r-:.;,/[]{}";
|
|
|
362 |
string truncate_to_word(string & input, string::size_type maxlen)
|
|
|
363 |
{
|
|
|
364 |
string output;
|
|
|
365 |
if (input.length() <= maxlen) {
|
|
|
366 |
output = input;
|
|
|
367 |
} else {
|
|
|
368 |
output = input.substr(0, maxlen);
|
|
|
369 |
string::size_type space = output.find_last_of(SEPAR);
|
|
|
370 |
// Original version only truncated at space if space was found after
|
|
|
371 |
// maxlen/2. But we HAVE to truncate at space, else we'd need to do
|
|
|
372 |
// utf8 stuff to avoid truncating at multibyte char. In any case,
|
|
|
373 |
// not finding space means that the text probably has no value.
|
|
|
374 |
// Except probably for Asian languages, so we may want to fix this
|
|
|
375 |
// one day
|
|
|
376 |
if (space == string::npos) {
|
|
|
377 |
output.erase();
|
|
|
378 |
} else {
|
|
|
379 |
output.erase(space);
|
|
|
380 |
}
|
|
|
381 |
output += " ...";
|
|
|
382 |
}
|
|
|
383 |
return output;
|
|
|
384 |
}
|
|
|
385 |
|
334 |
// Escape things that would look like markup
|
386 |
// Escape things that would look like markup
|
335 |
string escapeHtml(const string &in)
|
387 |
string escapeHtml(const string &in)
|
336 |
{
|
388 |
{
|
337 |
string out;
|
389 |
string out;
|
338 |
for (string::size_type pos = 0; pos < in.length(); pos++) {
|
390 |
for (string::size_type pos = 0; pos < in.length(); pos++) {
|