|
a/src/smallut.cpp |
|
b/src/smallut.cpp |
|
... |
|
... |
12 |
* You should have received a copy of the GNU General Public License
|
12 |
* You should have received a copy of the GNU General Public License
|
13 |
* along with this program; if not, write to the
|
13 |
* along with this program; if not, write to the
|
14 |
* Free Software Foundation, Inc.,
|
14 |
* Free Software Foundation, Inc.,
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
16 |
*/
|
16 |
*/
|
17 |
|
|
|
18 |
#ifndef TEST_SMALLUT
|
|
|
19 |
#ifdef BUILDING_RECOLL
|
17 |
#ifdef BUILDING_RECOLL
|
20 |
#include "autoconfig.h"
|
18 |
#include "autoconfig.h"
|
21 |
#else
|
19 |
#else
|
22 |
#include "config.h"
|
20 |
#include "config.h"
|
23 |
#endif
|
21 |
#endif
|
|
... |
|
... |
1330 |
void smallut_init_mt()
|
1328 |
void smallut_init_mt()
|
1331 |
{
|
1329 |
{
|
1332 |
// Init langtocode() static table
|
1330 |
// Init langtocode() static table
|
1333 |
langtocode("");
|
1331 |
langtocode("");
|
1334 |
}
|
1332 |
}
|
1335 |
|
|
|
1336 |
#else // TEST_SMALLUT
|
|
|
1337 |
|
|
|
1338 |
#include <string>
|
|
|
1339 |
using namespace std;
|
|
|
1340 |
#include <iostream>
|
|
|
1341 |
|
|
|
1342 |
#include "smallut.h"
|
|
|
1343 |
|
|
|
1344 |
struct spair {
|
|
|
1345 |
const char *s1;
|
|
|
1346 |
const char *s2;
|
|
|
1347 |
};
|
|
|
1348 |
struct spair pairs[] = {
|
|
|
1349 |
{"", ""},
|
|
|
1350 |
{"", "a"},
|
|
|
1351 |
{"a", ""},
|
|
|
1352 |
{"a", "a"},
|
|
|
1353 |
{"A", "a"},
|
|
|
1354 |
{"a", "A"},
|
|
|
1355 |
{"A", "A"},
|
|
|
1356 |
{"12", "12"},
|
|
|
1357 |
{"a", "ab"},
|
|
|
1358 |
{"ab", "a"},
|
|
|
1359 |
{"A", "Ab"},
|
|
|
1360 |
{"a", "Ab"},
|
|
|
1361 |
};
|
|
|
1362 |
int npairs = sizeof(pairs) / sizeof(struct spair);
|
|
|
1363 |
struct spair suffpairs[] = {
|
|
|
1364 |
{"", ""},
|
|
|
1365 |
{"", "a"},
|
|
|
1366 |
{"a", ""},
|
|
|
1367 |
{"a", "a"},
|
|
|
1368 |
{"toto.txt", ".txt"},
|
|
|
1369 |
{"TXT", "toto.txt"},
|
|
|
1370 |
{"toto.txt", ".txt1"},
|
|
|
1371 |
{"toto.txt1", ".txt"},
|
|
|
1372 |
};
|
|
|
1373 |
int nsuffpairs = sizeof(suffpairs) / sizeof(struct spair);
|
|
|
1374 |
|
|
|
1375 |
|
|
|
1376 |
// Periods test strings
|
|
|
1377 |
const char* periods[] = {
|
|
|
1378 |
"2001", // Year 2001
|
|
|
1379 |
"2001/", // 2001 or later
|
|
|
1380 |
"2001/P3Y", // 2001 -> 2004 or 2005, ambiguous
|
|
|
1381 |
"2001-01-01/P3Y", // 01-2001 -> 01 2004
|
|
|
1382 |
"2001-03-03/2001-05-01", // Explicit one
|
|
|
1383 |
"P3M/", // 3 months ago to now
|
|
|
1384 |
"P1Y1M/2001-03-01", // 2000-02-01/2001-03-01
|
|
|
1385 |
"/2001", // From the epoch to the end of 2001
|
|
|
1386 |
};
|
|
|
1387 |
const int nperiods = sizeof(periods) / sizeof(char*);
|
|
|
1388 |
|
|
|
1389 |
const char *thisprog;
|
|
|
1390 |
static void cerrdip(const string& s, DateInterval *dip)
|
|
|
1391 |
{
|
|
|
1392 |
cerr << s << dip->y1 << "-" << dip->m1 << "-" << dip->d1 << "/"
|
|
|
1393 |
<< dip->y2 << "-" << dip->m2 << "-" << dip->d2
|
|
|
1394 |
<< endl;
|
|
|
1395 |
}
|
|
|
1396 |
|
|
|
1397 |
int main(int argc, char **argv)
|
|
|
1398 |
{
|
|
|
1399 |
thisprog = *argv++;
|
|
|
1400 |
argc--;
|
|
|
1401 |
|
|
|
1402 |
#if 0
|
|
|
1403 |
if (argc <= 0) {
|
|
|
1404 |
cerr << "Usage: smallut <stringtosplit>" << endl;
|
|
|
1405 |
exit(1);
|
|
|
1406 |
}
|
|
|
1407 |
string s = *argv++;
|
|
|
1408 |
argc--;
|
|
|
1409 |
vector<string> vs;
|
|
|
1410 |
stringToTokens(s, vs, "/");
|
|
|
1411 |
for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++) {
|
|
|
1412 |
cerr << "[" << *it << "] ";
|
|
|
1413 |
}
|
|
|
1414 |
cerr << endl;
|
|
|
1415 |
exit(0);
|
|
|
1416 |
#elif 0
|
|
|
1417 |
if (argc <= 0) {
|
|
|
1418 |
cerr << "Usage: smallut <stringtosplit>" << endl;
|
|
|
1419 |
exit(1);
|
|
|
1420 |
}
|
|
|
1421 |
string s = *argv++;
|
|
|
1422 |
argc--;
|
|
|
1423 |
vector<string> vs;
|
|
|
1424 |
if (!stringToStrings(s, vs, ":-()")) {
|
|
|
1425 |
cerr << "Bad entry" << endl;
|
|
|
1426 |
exit(1);
|
|
|
1427 |
}
|
|
|
1428 |
for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++) {
|
|
|
1429 |
cerr << "[" << *it << "] ";
|
|
|
1430 |
}
|
|
|
1431 |
cerr << endl;
|
|
|
1432 |
exit(0);
|
|
|
1433 |
#elif 0
|
|
|
1434 |
if (argc <= 0) {
|
|
|
1435 |
cerr << "Usage: smallut <dateinterval>" << endl;
|
|
|
1436 |
exit(1);
|
|
|
1437 |
}
|
|
|
1438 |
string s = *argv++;
|
|
|
1439 |
argc--;
|
|
|
1440 |
DateInterval di;
|
|
|
1441 |
if (!parsedateinterval(s, &di)) {
|
|
|
1442 |
cerr << "Parse failed" << endl;
|
|
|
1443 |
exit(1);
|
|
|
1444 |
}
|
|
|
1445 |
cerrdip("", &di);
|
|
|
1446 |
exit(0);
|
|
|
1447 |
#elif 0
|
|
|
1448 |
DateInterval di;
|
|
|
1449 |
for (int i = 0; i < nperiods; i++) {
|
|
|
1450 |
if (!parsedateinterval(periods[i], &di)) {
|
|
|
1451 |
cerr << "Parsing failed for [" << periods[i] << "]" << endl;
|
|
|
1452 |
} else {
|
|
|
1453 |
cerrdip(string(periods[i]).append(" : "), &di);
|
|
|
1454 |
}
|
|
|
1455 |
}
|
|
|
1456 |
exit(0);
|
|
|
1457 |
#elif 0
|
|
|
1458 |
for (int i = 0; i < npairs; i++) {
|
|
|
1459 |
{
|
|
|
1460 |
int c = stringicmp(pairs[i].s1, pairs[i].s2);
|
|
|
1461 |
printf("'%s' %s '%s' ", pairs[i].s1,
|
|
|
1462 |
c == 0 ? "==" : c < 0 ? "<" : ">", pairs[i].s2);
|
|
|
1463 |
}
|
|
|
1464 |
{
|
|
|
1465 |
int cl = stringlowercmp(pairs[i].s1, pairs[i].s2);
|
|
|
1466 |
printf("L '%s' %s '%s' ", pairs[i].s1,
|
|
|
1467 |
cl == 0 ? "==" : cl < 0 ? "<" : ">", pairs[i].s2);
|
|
|
1468 |
}
|
|
|
1469 |
{
|
|
|
1470 |
int cu = stringuppercmp(pairs[i].s1, pairs[i].s2);
|
|
|
1471 |
printf("U '%s' %s '%s' ", pairs[i].s1,
|
|
|
1472 |
cu == 0 ? "==" : cu < 0 ? "<" : ">", pairs[i].s2);
|
|
|
1473 |
}
|
|
|
1474 |
printf("\n");
|
|
|
1475 |
}
|
|
|
1476 |
#elif 0
|
|
|
1477 |
for (int i = 0; i < nsuffpairs; i++) {
|
|
|
1478 |
int c = stringisuffcmp(suffpairs[i].s1, suffpairs[i].s2);
|
|
|
1479 |
printf("[%s] %s [%s] \n", suffpairs[i].s1,
|
|
|
1480 |
c == 0 ? "matches" : c < 0 ? "<" : ">", suffpairs[i].s2);
|
|
|
1481 |
}
|
|
|
1482 |
#elif 0
|
|
|
1483 |
std::string testit("\303\251l\303\251gant");
|
|
|
1484 |
for (int sz = 10; sz >= 0; sz--) {
|
|
|
1485 |
utf8truncate(testit, sz);
|
|
|
1486 |
cout << testit << endl;
|
|
|
1487 |
}
|
|
|
1488 |
#elif 0
|
|
|
1489 |
std::string testit("ligne\ndeuxieme ligne\r3eme ligne\r\n");
|
|
|
1490 |
cout << "[" << neutchars(testit, "\r\n") << "]" << endl;
|
|
|
1491 |
string i, o;
|
|
|
1492 |
cout << "neutchars(null) is [" << neutchars(i, "\r\n") << "]" << endl;
|
|
|
1493 |
#elif 0
|
|
|
1494 |
map<string, string> substs;
|
|
|
1495 |
substs["a"] = "A_SUBST";
|
|
|
1496 |
substs["title"] = "TITLE_SUBST";
|
|
|
1497 |
string in = "a: %a title: %(title) pcpc: %% %";
|
|
|
1498 |
string out;
|
|
|
1499 |
pcSubst(in, out, substs);
|
|
|
1500 |
cout << in << " => " << out << endl;
|
|
|
1501 |
|
|
|
1502 |
in = "unfinished: %(unfinished";
|
|
|
1503 |
pcSubst(in, out, substs);
|
|
|
1504 |
cout << in << " => " << out << endl;
|
|
|
1505 |
in = "unfinished: %(";
|
|
|
1506 |
pcSubst(in, out, substs);
|
|
|
1507 |
cout << in << " => " << out << endl;
|
|
|
1508 |
in = "empty: %()";
|
|
|
1509 |
pcSubst(in, out, substs);
|
|
|
1510 |
cout << in << " => " << out << endl;
|
|
|
1511 |
substs.clear();
|
|
|
1512 |
in = "a: %a title: %(title) pcpc: %% %";
|
|
|
1513 |
pcSubst(in, out, substs);
|
|
|
1514 |
cout << "After map clear: " << in << " => " << out << endl;
|
|
|
1515 |
#elif 0
|
|
|
1516 |
list<string> tokens;
|
|
|
1517 |
tokens.push_back("");
|
|
|
1518 |
tokens.push_back("a,b");
|
|
|
1519 |
tokens.push_back("simple value");
|
|
|
1520 |
tokens.push_back("with \"quotes\"");
|
|
|
1521 |
string out;
|
|
|
1522 |
stringsToCSV(tokens, out);
|
|
|
1523 |
cout << "CSV line: [" << out << "]" << endl;
|
|
|
1524 |
#elif 0
|
|
|
1525 |
string sshort("ABC");
|
|
|
1526 |
string slong("ABCD");
|
|
|
1527 |
string sshortsmaller("ABB");
|
|
|
1528 |
|
|
|
1529 |
vector<pair<string, string> > cmps;
|
|
|
1530 |
cmps.push_back(pair<string, string>(sshort, sshort));
|
|
|
1531 |
cmps.push_back(pair<string, string>(sshort, slong));
|
|
|
1532 |
cmps.push_back(pair<string, string>(slong, sshort));
|
|
|
1533 |
cmps.push_back(pair<string, string>(sshortsmaller, sshort));
|
|
|
1534 |
cmps.push_back(pair<string, string>(sshort, sshortsmaller));
|
|
|
1535 |
|
|
|
1536 |
for (vector<pair<string, string> >::const_iterator it = cmps.begin();
|
|
|
1537 |
it != cmps.end(); it++) {
|
|
|
1538 |
cout << it->first << " " << it->second << " " <<
|
|
|
1539 |
stringicmp(it->first, it->second) << endl;
|
|
|
1540 |
}
|
|
|
1541 |
cout << endl;
|
|
|
1542 |
for (vector<pair<string, string> >::const_iterator it = cmps.begin();
|
|
|
1543 |
it != cmps.end(); it++) {
|
|
|
1544 |
cout << it->first << " " << it->second << " " <<
|
|
|
1545 |
stringlowercmp(stringtolower(it->first), it->second) << endl;
|
|
|
1546 |
}
|
|
|
1547 |
cout << endl;
|
|
|
1548 |
for (vector<pair<string, string> >::const_iterator it = cmps.begin();
|
|
|
1549 |
it != cmps.end(); it++) {
|
|
|
1550 |
cout << it->first << " " << it->second << " " <<
|
|
|
1551 |
stringuppercmp(it->first, it->second) << endl;
|
|
|
1552 |
}
|
|
|
1553 |
#elif 0
|
|
|
1554 |
SimpleRegexp exp("[ \t]*#[ \t]*([a-zA-Z0-9]+)[ \t]*=.*", 0, 1);
|
|
|
1555 |
//SimpleRegexp exp(" # ([a-zA-Z0-9]+) =.*", 0, 10);
|
|
|
1556 |
//SimpleRegexp exp(" # (varnm) = sdf sdf sdf ", 0, 10);
|
|
|
1557 |
//SimpleRegexp exp(".*", 0);
|
|
|
1558 |
string tomatch(" # varnm = sdf sdf sdf ");
|
|
|
1559 |
if (exp.simpleMatch(tomatch)) {
|
|
|
1560 |
cout << "Match !\n";
|
|
|
1561 |
cout << "Submatch[0]: [" << exp.getMatch(tomatch, 0) << "]\n";
|
|
|
1562 |
cout << "Submatch[1]: [" << exp.getMatch(tomatch, 1) << "]\n";
|
|
|
1563 |
return 0;
|
|
|
1564 |
} else {
|
|
|
1565 |
cerr << "No match\n";
|
|
|
1566 |
return 1;
|
|
|
1567 |
}
|
|
|
1568 |
#elif 1
|
|
|
1569 |
cout << makeCString("\"hello\" world\n2nd line") << endl;
|
|
|
1570 |
#endif
|
|
|
1571 |
}
|
|
|
1572 |
|
|
|
1573 |
#endif
|
|
|