Switch to unified view

a b/libupnpp/smallut.cpp
1
/* Copyright (C) 2004-2016 J.F.Dockes
2
 *   This program is free software; you can redistribute it and/or modify
3
 *   it under the terms of the GNU General Public License as published by
4
 *   the Free Software Foundation; either version 2 of the License, or
5
 *   (at your option) any later version.
6
 *
7
 *   This program is distributed in the hope that it will be useful,
8
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 *   GNU General Public License for more details.
11
 *
12
 *   You should have received a copy of the GNU General Public License
13
 *   along with this program; if not, write to the
14
 *   Free Software Foundation, Inc.,
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
16
 */
17
#ifdef BUILDING_RECOLL
18
#include "autoconfig.h"
19
#else
20
#include "config.h"
21
#endif
22
23
#include <stdio.h>
24
#include <stdlib.h>
25
#include <time.h>
26
#include <ctype.h>
27
#include <errno.h>
28
#include <time.h>
29
#include <string.h>
30
#include <math.h>
31
32
// Older compilers don't support stdc++ regex, but Windows does not
33
// have the Linux one. Have a simple class to solve the simple cases.
34
#if defined(_WIN32)
35
#define USE_STD_REGEX
36
#include <regex>
37
#else
38
#define USE_LINUX_REGEX
39
#include <regex.h>
40
#endif
41
42
#include <string>
43
#include <iostream>
44
#include <list>
45
#include UNORDERED_MAP_INCLUDE
46
#include UNORDERED_SET_INCLUDE
47
48
#include "smallut.h"
49
50
using namespace std;
51
52
int stringicmp(const string& s1, const string& s2)
53
{
54
    string::const_iterator it1 = s1.begin();
55
    string::const_iterator it2 = s2.begin();
56
    string::size_type size1 = s1.length(), size2 = s2.length();
57
    char c1, c2;
58
59
    if (size1 < size2) {
60
        while (it1 != s1.end()) {
61
            c1 = ::toupper(*it1);
62
            c2 = ::toupper(*it2);
63
            if (c1 != c2) {
64
                return c1 > c2 ? 1 : -1;
65
            }
66
            ++it1;
67
            ++it2;
68
        }
69
        return size1 == size2 ? 0 : -1;
70
    } else {
71
        while (it2 != s2.end()) {
72
            c1 = ::toupper(*it1);
73
            c2 = ::toupper(*it2);
74
            if (c1 != c2) {
75
                return c1 > c2 ? 1 : -1;
76
            }
77
            ++it1;
78
            ++it2;
79
        }
80
        return size1 == size2 ? 0 : 1;
81
    }
82
}
83
void stringtolower(string& io)
84
{
85
    string::iterator it = io.begin();
86
    string::iterator ite = io.end();
87
    while (it != ite) {
88
        *it = ::tolower(*it);
89
        it++;
90
    }
91
}
92
string stringtolower(const string& i)
93
{
94
    string o = i;
95
    stringtolower(o);
96
    return o;
97
}
98
99
void stringtoupper(string& io)
100
{
101
    string::iterator it = io.begin();
102
    string::iterator ite = io.end();
103
    while (it != ite) {
104
        *it = ::toupper(*it);
105
        it++;
106
    }
107
}
108
string stringtoupper(const string& i)
109
{
110
    string o = i;
111
    stringtoupper(o);
112
    return o;
113
}
114
115
extern int stringisuffcmp(const string& s1, const string& s2)
116
{
117
    string::const_reverse_iterator r1 = s1.rbegin(), re1 = s1.rend(),
118
                                   r2 = s2.rbegin(), re2 = s2.rend();
119
    while (r1 != re1 && r2 != re2) {
120
        char c1 = ::toupper(*r1);
121
        char c2 = ::toupper(*r2);
122
        if (c1 != c2) {
123
            return c1 > c2 ? 1 : -1;
124
        }
125
        ++r1;
126
        ++r2;
127
    }
128
    return 0;
129
}
130
131
//  s1 is already lowercase
132
int stringlowercmp(const string& s1, const string& s2)
133
{
134
    string::const_iterator it1 = s1.begin();
135
    string::const_iterator it2 = s2.begin();
136
    string::size_type size1 = s1.length(), size2 = s2.length();
137
    char c2;
138
139
    if (size1 < size2) {
140
        while (it1 != s1.end()) {
141
            c2 = ::tolower(*it2);
142
            if (*it1 != c2) {
143
                return *it1 > c2 ? 1 : -1;
144
            }
145
            ++it1;
146
            ++it2;
147
        }
148
        return size1 == size2 ? 0 : -1;
149
    } else {
150
        while (it2 != s2.end()) {
151
            c2 = ::tolower(*it2);
152
            if (*it1 != c2) {
153
                return *it1 > c2 ? 1 : -1;
154
            }
155
            ++it1;
156
            ++it2;
157
        }
158
        return size1 == size2 ? 0 : 1;
159
    }
160
}
161
162
//  s1 is already uppercase
163
int stringuppercmp(const string& s1, const string& s2)
164
{
165
    string::const_iterator it1 = s1.begin();
166
    string::const_iterator it2 = s2.begin();
167
    string::size_type size1 = s1.length(), size2 = s2.length();
168
    char c2;
169
170
    if (size1 < size2) {
171
        while (it1 != s1.end()) {
172
            c2 = ::toupper(*it2);
173
            if (*it1 != c2) {
174
                return *it1 > c2 ? 1 : -1;
175
            }
176
            ++it1;
177
            ++it2;
178
        }
179
        return size1 == size2 ? 0 : -1;
180
    } else {
181
        while (it2 != s2.end()) {
182
            c2 = ::toupper(*it2);
183
            if (*it1 != c2) {
184
                return *it1 > c2 ? 1 : -1;
185
            }
186
            ++it1;
187
            ++it2;
188
        }
189
        return size1 == size2 ? 0 : 1;
190
    }
191
}
192
193
// Compare charset names, removing the more common spelling variations
194
bool samecharset(const string& cs1, const string& cs2)
195
{
196
    string mcs1, mcs2;
197
    // Remove all - and _, turn to lowecase
198
    for (unsigned int i = 0; i < cs1.length(); i++) {
199
        if (cs1[i] != '_' && cs1[i] != '-') {
200
            mcs1 += ::tolower(cs1[i]);
201
        }
202
    }
203
    for (unsigned int i = 0; i < cs2.length(); i++) {
204
        if (cs2[i] != '_' && cs2[i] != '-') {
205
            mcs2 += ::tolower(cs2[i]);
206
        }
207
    }
208
    return mcs1 == mcs2;
209
}
210
211
template <class T> bool stringToStrings(const string& s, T& tokens,
212
                                        const string& addseps)
213
{
214
    string current;
215
    tokens.clear();
216
    enum states {SPACE, TOKEN, INQUOTE, ESCAPE};
217
    states state = SPACE;
218
    for (unsigned int i = 0; i < s.length(); i++) {
219
        switch (s[i]) {
220
        case '"':
221
            switch (state) {
222
            case SPACE:
223
                state = INQUOTE;
224
                continue;
225
            case TOKEN:
226
                current += '"';
227
                continue;
228
            case INQUOTE:
229
                tokens.insert(tokens.end(), current);
230
                current.clear();
231
                state = SPACE;
232
                continue;
233
            case ESCAPE:
234
                current += '"';
235
                state = INQUOTE;
236
                continue;
237
            }
238
            break;
239
        case '\\':
240
            switch (state) {
241
            case SPACE:
242
            case TOKEN:
243
                current += '\\';
244
                state = TOKEN;
245
                continue;
246
            case INQUOTE:
247
                state = ESCAPE;
248
                continue;
249
            case ESCAPE:
250
                current += '\\';
251
                state = INQUOTE;
252
                continue;
253
            }
254
            break;
255
256
        case ' ':
257
        case '\t':
258
        case '\n':
259
        case '\r':
260
            switch (state) {
261
            case SPACE:
262
                continue;
263
            case TOKEN:
264
                tokens.insert(tokens.end(), current);
265
                current.clear();
266
                state = SPACE;
267
                continue;
268
            case INQUOTE:
269
            case ESCAPE:
270
                current += s[i];
271
                continue;
272
            }
273
            break;
274
275
        default:
276
            if (!addseps.empty() && addseps.find(s[i]) != string::npos) {
277
                switch (state) {
278
                case ESCAPE:
279
                    state = INQUOTE;
280
                    break;
281
                case INQUOTE:
282
                    break;
283
                case SPACE:
284
                    tokens.insert(tokens.end(), string(1, s[i]));
285
                    continue;
286
                case TOKEN:
287
                    tokens.insert(tokens.end(), current);
288
                    current.erase();
289
                    tokens.insert(tokens.end(), string(1, s[i]));
290
                    state = SPACE;
291
                    continue;
292
                }
293
            } else switch (state) {
294
                case ESCAPE:
295
                    state = INQUOTE;
296
                    break;
297
                case SPACE:
298
                    state = TOKEN;
299
                    break;
300
                case TOKEN:
301
                case INQUOTE:
302
                    break;
303
                }
304
            current += s[i];
305
        }
306
    }
307
    switch (state) {
308
    case SPACE:
309
        break;
310
    case TOKEN:
311
        tokens.insert(tokens.end(), current);
312
        break;
313
    case INQUOTE:
314
    case ESCAPE:
315
        return false;
316
    }
317
    return true;
318
}
319
320
template bool stringToStrings<list<string> >(const string&,
321
        list<string>&, const string&);
322
template bool stringToStrings<vector<string> >(const string&,
323
        vector<string>&, const string&);
324
template bool stringToStrings<set<string> >(const string&,
325
        set<string>&, const string&);
326
template bool stringToStrings<STD_UNORDERED_SET<string> >
327
(const string&, STD_UNORDERED_SET<string>&, const string&);
328
329
template <class T> void stringsToString(const T& tokens, string& s)
330
{
331
    for (typename T::const_iterator it = tokens.begin();
332
            it != tokens.end(); it++) {
333
        bool hasblanks = false;
334
        if (it->find_first_of(" \t\n") != string::npos) {
335
            hasblanks = true;
336
        }
337
        if (it != tokens.begin()) {
338
            s.append(1, ' ');
339
        }
340
        if (hasblanks) {
341
            s.append(1, '"');
342
        }
343
        for (unsigned int i = 0; i < it->length(); i++) {
344
            char car = it->at(i);
345
            if (car == '"') {
346
                s.append(1, '\\');
347
                s.append(1, car);
348
            } else {
349
                s.append(1, car);
350
            }
351
        }
352
        if (hasblanks) {
353
            s.append(1, '"');
354
        }
355
    }
356
}
357
template void stringsToString<list<string> >(const list<string>&, string&);
358
template void stringsToString<vector<string> >(const vector<string>&, string&);
359
template void stringsToString<set<string> >(const set<string>&, string&);
360
template <class T> string stringsToString(const T& tokens)
361
{
362
    string out;
363
    stringsToString<T>(tokens, out);
364
    return out;
365
}
366
template string stringsToString<list<string> >(const list<string>&);
367
template string stringsToString<vector<string> >(const vector<string>&);
368
template string stringsToString<set<string> >(const set<string>&);
369
370
template <class T> void stringsToCSV(const T& tokens, string& s,
371
                                     char sep)
372
{
373
    s.erase();
374
    for (typename T::const_iterator it = tokens.begin();
375
            it != tokens.end(); it++) {
376
        bool needquotes = false;
377
        if (it->empty() ||
378
                it->find_first_of(string(1, sep) + "\"\n") != string::npos) {
379
            needquotes = true;
380
        }
381
        if (it != tokens.begin()) {
382
            s.append(1, sep);
383
        }
384
        if (needquotes) {
385
            s.append(1, '"');
386
        }
387
        for (unsigned int i = 0; i < it->length(); i++) {
388
            char car = it->at(i);
389
            if (car == '"') {
390
                s.append(2, '"');
391
            } else {
392
                s.append(1, car);
393
            }
394
        }
395
        if (needquotes) {
396
            s.append(1, '"');
397
        }
398
    }
399
}
400
template void stringsToCSV<list<string> >(const list<string>&, string&, char);
401
template void stringsToCSV<vector<string> >(const vector<string>&, string&,
402
        char);
403
404
void stringToTokens(const string& str, vector<string>& tokens,
405
                    const string& delims, bool skipinit)
406
{
407
    string::size_type startPos = 0, pos;
408
409
    // Skip initial delims, return empty if this eats all.
410
    if (skipinit &&
411
            (startPos = str.find_first_not_of(delims, 0)) == string::npos) {
412
        return;
413
    }
414
    while (startPos < str.size()) {
415
        // Find next delimiter or end of string (end of token)
416
        pos = str.find_first_of(delims, startPos);
417
418
        // Add token to the vector and adjust start
419
        if (pos == string::npos) {
420
            tokens.push_back(str.substr(startPos));
421
            break;
422
        } else if (pos == startPos) {
423
            // Dont' push empty tokens after first
424
            if (tokens.empty()) {
425
                tokens.push_back(string());
426
            }
427
            startPos = ++pos;
428
        } else {
429
            tokens.push_back(str.substr(startPos, pos - startPos));
430
            startPos = ++pos;
431
        }
432
    }
433
}
434
435
bool stringToBool(const string& s)
436
{
437
    if (s.empty()) {
438
        return false;
439
    }
440
    if (isdigit(s[0])) {
441
        int val = atoi(s.c_str());
442
        return val ? true : false;
443
    }
444
    if (s.find_first_of("yYtT") == 0) {
445
        return true;
446
    }
447
    return false;
448
}
449
450
void trimstring(string& s, const char *ws)
451
{
452
    string::size_type pos = s.find_first_not_of(ws);
453
    if (pos == string::npos) {
454
        s.clear();
455
        return;
456
    }
457
    s.replace(0, pos, string());
458
459
    pos = s.find_last_not_of(ws);
460
    if (pos != string::npos && pos != s.length() - 1) {
461
        s.replace(pos + 1, string::npos, string());
462
    }
463
}
464
465
// Remove some chars and replace them with spaces
466
string neutchars(const string& str, const string& chars)
467
{
468
    string out;
469
    neutchars(str, out, chars);
470
    return out;
471
}
472
void neutchars(const string& str, string& out, const string& chars)
473
{
474
    string::size_type startPos, pos;
475
476
    for (pos = 0;;) {
477
        // Skip initial chars, break if this eats all.
478
        if ((startPos = str.find_first_not_of(chars, pos)) == string::npos) {
479
            break;
480
        }
481
        // Find next delimiter or end of string (end of token)
482
        pos = str.find_first_of(chars, startPos);
483
        // Add token to the output. Note: token cant be empty here
484
        if (pos == string::npos) {
485
            out += str.substr(startPos);
486
        } else {
487
            out += str.substr(startPos, pos - startPos) + " ";
488
        }
489
    }
490
}
491
492
493
/* Truncate a string to a given maxlength, avoiding cutting off midword
494
 * if reasonably possible. Note: we could also use textsplit, stopping when
495
 * we have enough, this would be cleanly utf8-aware but would remove
496
 * punctuation */
497
static const string cstr_SEPAR = " \t\n\r-:.;,/[]{}";
498
string truncate_to_word(const string& input, string::size_type maxlen)
499
{
500
    string output;
501
    if (input.length() <= maxlen) {
502
        output = input;
503
    } else {
504
        output = input.substr(0, maxlen);
505
        string::size_type space = output.find_last_of(cstr_SEPAR);
506
        // Original version only truncated at space if space was found after
507
        // maxlen/2. But we HAVE to truncate at space, else we'd need to do
508
        // utf8 stuff to avoid truncating at multibyte char. In any case,
509
        // not finding space means that the text probably has no value.
510
        // Except probably for Asian languages, so we may want to fix this
511
        // one day
512
        if (space == string::npos) {
513
            output.erase();
514
        } else {
515
            output.erase(space);
516
        }
517
    }
518
    return output;
519
}
520
521
// Escape things that would look like markup
522
string escapeHtml(const string& in)
523
{
524
    string out;
525
    for (string::size_type pos = 0; pos < in.length(); pos++) {
526
        switch (in.at(pos)) {
527
        case '<':
528
            out += "&lt;";
529
            break;
530
        case '&':
531
            out += "&amp;";
532
            break;
533
        default:
534
            out += in.at(pos);
535
        }
536
    }
537
    return out;
538
}
539
540
string escapeShell(const string& in)
541
{
542
    string out;
543
    out += "\"";
544
    for (string::size_type pos = 0; pos < in.length(); pos++) {
545
        switch (in.at(pos)) {
546
        case '$':
547
            out += "\\$";
548
            break;
549
        case '`':
550
            out += "\\`";
551
            break;
552
        case '"':
553
            out += "\\\"";
554
            break;
555
        case '\n':
556
            out += "\\\n";
557
            break;
558
        case '\\':
559
            out += "\\\\";
560
            break;
561
        default:
562
            out += in.at(pos);
563
        }
564
    }
565
    out += "\"";
566
    return out;
567
}
568
569
// Escape value to be suitable as C++ source double-quoted string (for
570
// generating a c++ program
571
string makeCString(const string& in)
572
{
573
    string out;
574
    out += "\"";
575
    for (string::size_type pos = 0; pos < in.length(); pos++) {
576
        switch (in.at(pos)) {
577
        case '"':
578
            out += "\\\"";
579
            break;
580
        case '\n':
581
            out += "\\n";
582
            break;
583
        case '\r':
584
            out += "\\r";
585
            break;
586
        case '\\':
587
            out += "\\\\";
588
            break;
589
        default:
590
            out += in.at(pos);
591
        }
592
    }
593
    out += "\"";
594
    return out;
595
}
596
597
598
// Substitute printf-like percent cmds inside a string
599
bool pcSubst(const string& in, string& out, const map<char, string>& subs)
600
{
601
    string::const_iterator it;
602
    for (it = in.begin(); it != in.end(); it++) {
603
        if (*it == '%') {
604
            if (++it == in.end()) {
605
                out += '%';
606
                break;
607
            }
608
            if (*it == '%') {
609
                out += '%';
610
                continue;
611
            }
612
            map<char, string>::const_iterator tr;
613
            if ((tr = subs.find(*it)) != subs.end()) {
614
                out += tr->second;
615
            } else {
616
                // We used to do "out += *it;" here but this does not make
617
                // sense
618
            }
619
        } else {
620
            out += *it;
621
        }
622
    }
623
    return true;
624
}
625
626
bool pcSubst(const string& in, string& out, const map<string, string>& subs)
627
{
628
    out.erase();
629
    string::size_type i;
630
    for (i = 0; i < in.size(); i++) {
631
        if (in[i] == '%') {
632
            if (++i == in.size()) {
633
                out += '%';
634
                break;
635
            }
636
            if (in[i] == '%') {
637
                out += '%';
638
                continue;
639
            }
640
            string key = "";
641
            if (in[i] == '(') {
642
                if (++i == in.size()) {
643
                    out += string("%(");
644
                    break;
645
                }
646
                string::size_type j = in.find_first_of(")", i);
647
                if (j == string::npos) {
648
                    // ??concatenate remaining part and stop
649
                    out += in.substr(i - 2);
650
                    break;
651
                }
652
                key = in.substr(i, j - i);
653
                i = j;
654
            } else {
655
                key = in[i];
656
            }
657
            map<string, string>::const_iterator tr;
658
            if ((tr = subs.find(key)) != subs.end()) {
659
                out += tr->second;
660
            } else {
661
                // Substitute to nothing, that's the reasonable thing to do
662
                // instead of keeping the %(key)
663
                // out += key.size()==1? key : string("(") + key + string(")");
664
            }
665
        } else {
666
            out += in[i];
667
        }
668
    }
669
    return true;
670
}
671
inline static int ulltorbuf(unsigned long long val, char *rbuf)
672
{
673
    int idx;
674
    for (idx = 0; val; idx++) {
675
        rbuf[idx] = '0' + val % 10;
676
        val /= 10;
677
    }
678
    while (val);
679
    rbuf[idx] = 0;
680
    return idx;
681
}
682
683
inline static void ullcopyreverse(const char *rbuf, string& buf, int idx)
684
{
685
    buf.reserve(idx + 1);
686
    for (int i = idx - 1; i >= 0; i--) {
687
        buf.push_back(rbuf[i]);
688
    }
689
}
690
691
void ulltodecstr(unsigned long long val, string& buf)
692
{
693
    buf.clear();
694
    if (val == 0) {
695
        buf = "0";
696
        return;
697
    }
698
699
    char rbuf[30];
700
    int idx = ulltorbuf(val, rbuf);
701
702
    ullcopyreverse(rbuf, buf, idx);
703
    return;
704
}
705
706
void lltodecstr(long long val, string& buf)
707
{
708
    buf.clear();
709
    if (val == 0) {
710
        buf = "0";
711
        return;
712
    }
713
714
    bool neg = val < 0;
715
    if (neg) {
716
        val = -val;
717
    }
718
719
    char rbuf[30];
720
    int idx = ulltorbuf(val, rbuf);
721
722
    if (neg) {
723
        rbuf[idx++] = '-';
724
    }
725
    rbuf[idx] = 0;
726
727
    ullcopyreverse(rbuf, buf, idx);
728
    return;
729
}
730
731
string lltodecstr(long long val)
732
{
733
    string buf;
734
    lltodecstr(val, buf);
735
    return buf;
736
}
737
738
string ulltodecstr(unsigned long long val)
739
{
740
    string buf;
741
    ulltodecstr(val, buf);
742
    return buf;
743
}
744
745
// Convert byte count into unit (KB/MB...) appropriate for display
746
string displayableBytes(off_t size)
747
{
748
    const char *unit;
749
750
    double roundable = 0;
751
    if (size < 1000) {
752
        unit = " B ";
753
        roundable = double(size);
754
    } else if (size < 1E6) {
755
        unit = " KB ";
756
        roundable = double(size) / 1E3;
757
    } else if (size < 1E9) {
758
        unit = " MB ";
759
        roundable = double(size) / 1E6;
760
    } else {
761
        unit = " GB ";
762
        roundable = double(size) / 1E9;
763
    }
764
    size = off_t(round(roundable));
765
    return lltodecstr(size).append(unit);
766
}
767
768
string breakIntoLines(const string& in, unsigned int ll,
769
                      unsigned int maxlines)
770
{
771
    string query = in;
772
    string oq;
773
    unsigned int nlines = 0;
774
    while (query.length() > 0) {
775
        string ss = query.substr(0, ll);
776
        if (ss.length() == ll) {
777
            string::size_type pos = ss.find_last_of(" ");
778
            if (pos == string::npos) {
779
                pos = query.find_first_of(" ");
780
                if (pos != string::npos) {
781
                    ss = query.substr(0, pos + 1);
782
                } else {
783
                    ss = query;
784
                }
785
            } else {
786
                ss = ss.substr(0, pos + 1);
787
            }
788
        }
789
        // This cant happen, but anyway. Be very sure to avoid an infinite loop
790
        if (ss.length() == 0) {
791
            oq = query;
792
            break;
793
        }
794
        oq += ss + "\n";
795
        if (nlines++ >= maxlines) {
796
            oq += " ... \n";
797
            break;
798
        }
799
        query = query.substr(ss.length());
800
    }
801
    return oq;
802
}
803
804
// Date is Y[-M[-D]]
805
static bool parsedate(vector<string>::const_iterator& it,
806
                      vector<string>::const_iterator end, DateInterval *dip)
807
{
808
    dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
809
    if (it->length() > 4 || !it->length() ||
810
            it->find_first_not_of("0123456789") != string::npos) {
811
        return false;
812
    }
813
    if (it == end || sscanf(it++->c_str(), "%d", &dip->y1) != 1) {
814
        return false;
815
    }
816
    if (it == end || *it == "/") {
817
        return true;
818
    }
819
    if (*it++ != "-") {
820
        return false;
821
    }
822
823
    if (it->length() > 2 || !it->length() ||
824
            it->find_first_not_of("0123456789") != string::npos) {
825
        return false;
826
    }
827
    if (it == end || sscanf(it++->c_str(), "%d", &dip->m1) != 1) {
828
        return false;
829
    }
830
    if (it == end || *it == "/") {
831
        return true;
832
    }
833
    if (*it++ != "-") {
834
        return false;
835
    }
836
837
    if (it->length() > 2 || !it->length() ||
838
            it->find_first_not_of("0123456789") != string::npos) {
839
        return false;
840
    }
841
    if (it == end || sscanf(it++->c_str(), "%d", &dip->d1) != 1) {
842
        return false;
843
    }
844
845
    return true;
846
}
847
848
// Called with the 'P' already processed. Period ends at end of string
849
// or at '/'. We dont' do a lot effort at validation and will happily
850
// accept 10Y1Y4Y (the last wins)
851
static bool parseperiod(vector<string>::const_iterator& it,
852
                        vector<string>::const_iterator end, DateInterval *dip)
853
{
854
    dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
855
    while (it != end) {
856
        int value;
857
        if (it->find_first_not_of("0123456789") != string::npos) {
858
            return false;
859
        }
860
        if (sscanf(it++->c_str(), "%d", &value) != 1) {
861
            return false;
862
        }
863
        if (it == end || it->empty()) {
864
            return false;
865
        }
866
        switch (it->at(0)) {
867
        case 'Y':
868
        case 'y':
869
            dip->y1 = value;
870
            break;
871
        case 'M':
872
        case 'm':
873
            dip->m1 = value;
874
            break;
875
        case 'D':
876
        case 'd':
877
            dip->d1 = value;
878
            break;
879
        default:
880
            return false;
881
        }
882
        it++;
883
        if (it == end) {
884
            return true;
885
        }
886
        if (*it == "/") {
887
            return true;
888
        }
889
    }
890
    return true;
891
}
892
893
#ifdef _WIN32
894
int setenv(const char *name, const char *value, int overwrite)
895
{
896
    if (!overwrite) {
897
        const char *cp = getenv(name);
898
        if (cp) {
899
            return -1;
900
        }
901
    }
902
    return _putenv_s(name, value);
903
}
904
void unsetenv(const char *name)
905
{
906
    _putenv_s(name, "");
907
}
908
#endif
909
910
time_t portable_timegm(struct tm *tm)
911
{
912
    time_t ret;
913
    char *tz;
914
915
    tz = getenv("TZ");
916
    setenv("TZ", "", 1);
917
    tzset();
918
    ret = mktime(tm);
919
    if (tz) {
920
        setenv("TZ", tz, 1);
921
    } else {
922
        unsetenv("TZ");
923
    }
924
    tzset();
925
    return ret;
926
}
927
928
#if 0
929
static void cerrdip(const string& s, DateInterval *dip)
930
{
931
    cerr << s << dip->y1 << "-" << dip->m1 << "-" << dip->d1 << "/"
932
         << dip->y2 << "-" << dip->m2 << "-" << dip->d2
933
         << endl;
934
}
935
#endif
936
937
// Compute date + period. Won't work out of the unix era.
938
// or pre-1970 dates. Just convert everything to unixtime and
939
// seconds (with average durations for months/years), add and convert
940
// back
941
static bool addperiod(DateInterval *dp, DateInterval *pp)
942
{
943
    struct tm tm;
944
    // Create a struct tm with possibly non normalized fields and let
945
    // timegm sort it out
946
    memset(&tm, 0, sizeof(tm));
947
    tm.tm_year = dp->y1 - 1900 + pp->y1;
948
    tm.tm_mon = dp->m1 + pp->m1 - 1;
949
    tm.tm_mday = dp->d1 + pp->d1;
950
    time_t tres = mktime(&tm);
951
    localtime_r(&tres, &tm);
952
    dp->y1 = tm.tm_year + 1900;
953
    dp->m1 = tm.tm_mon + 1;
954
    dp->d1 = tm.tm_mday;
955
    //cerrdip("Addperiod return", dp);
956
    return true;
957
}
958
int monthdays(int mon, int year)
959
{
960
    switch (mon) {
961
    // We are returning a few too many 29 days februaries, no problem
962
    case 2:
963
        return (year % 4) == 0 ? 29 : 28;
964
    case 1:
965
    case 3:
966
    case 5:
967
    case 7:
968
    case 8:
969
    case 10:
970
    case 12:
971
        return 31;
972
    default:
973
        return 30;
974
    }
975
}
976
bool parsedateinterval(const string& s, DateInterval *dip)
977
{
978
    vector<string> vs;
979
    dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
980
    DateInterval p1, p2, d1, d2;
981
    p1 = p2 = d1 = d2 = *dip;
982
    bool hasp1 = false, hasp2 = false, hasd1 = false, hasd2 = false,
983
         hasslash = false;
984
985
    if (!stringToStrings(s, vs, "PYMDpymd-/")) {
986
        return false;
987
    }
988
    if (vs.empty()) {
989
        return false;
990
    }
991
992
    vector<string>::const_iterator it = vs.begin();
993
    if (*it == "P" || *it == "p") {
994
        it++;
995
        if (!parseperiod(it, vs.end(), &p1)) {
996
            return false;
997
        }
998
        hasp1 = true;
999
        //cerrdip("p1", &p1);
1000
        p1.y1 = -p1.y1;
1001
        p1.m1 = -p1.m1;
1002
        p1.d1 = -p1.d1;
1003
    } else if (*it == "/") {
1004
        hasslash = true;
1005
        goto secondelt;
1006
    } else {
1007
        if (!parsedate(it, vs.end(), &d1)) {
1008
            return false;
1009
        }
1010
        hasd1 = true;
1011
    }
1012
1013
    // Got one element and/or /
1014
secondelt:
1015
    if (it != vs.end()) {
1016
        if (*it != "/") {
1017
            return false;
1018
        }
1019
        hasslash = true;
1020
        it++;
1021
        if (it == vs.end()) {
1022
            // ok
1023
        } else if (*it == "P" || *it == "p") {
1024
            it++;
1025
            if (!parseperiod(it, vs.end(), &p2)) {
1026
                return false;
1027
            }
1028
            hasp2 = true;
1029
        } else {
1030
            if (!parsedate(it, vs.end(), &d2)) {
1031
                return false;
1032
            }
1033
            hasd2 = true;
1034
        }
1035
    }
1036
1037
    // 2 periods dont' make sense
1038
    if (hasp1 && hasp2) {
1039
        return false;
1040
    }
1041
    // Nothing at all doesn't either
1042
    if (!hasp1 && !hasd1 && !hasp2 && !hasd2) {
1043
        return false;
1044
    }
1045
1046
    // Empty part means today IF other part is period, else means
1047
    // forever (stays at 0)
1048
    time_t now = time(0);
1049
    struct tm *tmnow = gmtime(&now);
1050
    if ((!hasp1 && !hasd1) && hasp2) {
1051
        d1.y1 = 1900 + tmnow->tm_year;
1052
        d1.m1 = tmnow->tm_mon + 1;
1053
        d1.d1 = tmnow->tm_mday;
1054
        hasd1 = true;
1055
    } else if ((!hasp2 && !hasd2) && hasp1) {
1056
        d2.y1 = 1900 + tmnow->tm_year;
1057
        d2.m1 = tmnow->tm_mon + 1;
1058
        d2.d1 = tmnow->tm_mday;
1059
        hasd2 = true;
1060
    }
1061
1062
    // Incomplete dates have different meanings depending if there is
1063
    // a period or not (actual or infinite indicated by a / + empty)
1064
    //
1065
    // If there is no explicit period, an incomplete date indicates a
1066
    // period of the size of the uncompleted elements. Ex: 1999
1067
    // actually means 1999/P12M
1068
    //
1069
    // If there is a period, the incomplete date should be extended
1070
    // to the beginning or end of the unspecified portion. Ex: 1999/
1071
    // means 1999-01-01/ and /1999 means /1999-12-31
1072
    if (hasd1) {
1073
        if (!(hasslash || hasp2)) {
1074
            if (d1.m1 == 0) {
1075
                p2.m1 = 12;
1076
                d1.m1 = 1;
1077
                d1.d1 = 1;
1078
            } else if (d1.d1 == 0) {
1079
                d1.d1 = 1;
1080
                p2.d1 = monthdays(d1.m1, d1.y1);
1081
            }
1082
            hasp2 = true;
1083
        } else {
1084
            if (d1.m1 == 0) {
1085
                d1.m1 = 1;
1086
                d1.d1 = 1;
1087
            } else if (d1.d1 == 0) {
1088
                d1.d1 = 1;
1089
            }
1090
        }
1091
    }
1092
    // if hasd2 is true we had a /
1093
    if (hasd2) {
1094
        if (d2.m1 == 0) {
1095
            d2.m1 = 12;
1096
            d2.d1 = 31;
1097
        } else if (d2.d1 == 0) {
1098
            d2.d1 = monthdays(d2.m1, d2.y1);
1099
        }
1100
    }
1101
    if (hasp1) {
1102
        // Compute d1
1103
        d1 = d2;
1104
        if (!addperiod(&d1, &p1)) {
1105
            return false;
1106
        }
1107
    } else if (hasp2) {
1108
        // Compute d2
1109
        d2 = d1;
1110
        if (!addperiod(&d2, &p2)) {
1111
            return false;
1112
        }
1113
    }
1114
1115
    dip->y1 = d1.y1;
1116
    dip->m1 = d1.m1;
1117
    dip->d1 = d1.d1;
1118
    dip->y2 = d2.y1;
1119
    dip->m2 = d2.m1;
1120
    dip->d2 = d2.d1;
1121
    return true;
1122
}
1123
1124
1125
void catstrerror(string *reason, const char *what, int _errno)
1126
{
1127
    if (!reason) {
1128
        return;
1129
    }
1130
    if (what) {
1131
        reason->append(what);
1132
    }
1133
1134
    reason->append(": errno: ");
1135
1136
    char nbuf[20];
1137
    sprintf(nbuf, "%d", _errno);
1138
    reason->append(nbuf);
1139
1140
    reason->append(" : ");
1141
1142
#if defined(sun) || defined(_WIN32)
1143
    // Note: sun strerror is noted mt-safe ??
1144
    reason->append(strerror(_errno));
1145
#else
1146
#define ERRBUFSZ 200
1147
    char errbuf[ERRBUFSZ];
1148
    // There are 2 versions of strerror_r.
1149
    // - The GNU one returns a pointer to the message (maybe
1150
    //   static storage or supplied buffer).
1151
    // - The POSIX one always stores in supplied buffer and
1152
    //   returns 0 on success. As the possibility of error and
1153
    //   error code are not specified, we're basically doomed
1154
    //   cause we can't use a test on the 0 value to know if we
1155
    //   were returned a pointer...
1156
    // Also couldn't find an easy way to disable the gnu version without
1157
    // changing the cxxflags globally, so forget it. Recent gnu lib versions
1158
    // normally default to the posix version.
1159
    // At worse we get no message at all here.
1160
    errbuf[0] = 0;
1161
    // We don't use ret, it's there to silence a cc warning
1162
    char *ret = (char *)strerror_r(_errno, errbuf, ERRBUFSZ);
1163
    (void)ret;
1164
    reason->append(errbuf);
1165
#endif
1166
}
1167
1168
1169
static const char *vlang_to_code[] = {
1170
    "be", "cp1251",
1171
    "bg", "cp1251",
1172
    "cs", "iso-8859-2",
1173
    "el", "iso-8859-7",
1174
    "he", "iso-8859-8",
1175
    "hr", "iso-8859-2",
1176
    "hu", "iso-8859-2",
1177
    "ja", "eucjp",
1178
    "kk", "pt154",
1179
    "ko", "euckr",
1180
    "lt", "iso-8859-13",
1181
    "lv", "iso-8859-13",
1182
    "pl", "iso-8859-2",
1183
    "rs", "iso-8859-2",
1184
    "ro", "iso-8859-2",
1185
    "ru", "koi8-r",
1186
    "sk", "iso-8859-2",
1187
    "sl", "iso-8859-2",
1188
    "sr", "iso-8859-2",
1189
    "th", "iso-8859-11",
1190
    "tr", "iso-8859-9",
1191
    "uk", "koi8-u",
1192
};
1193
1194
static const string cstr_cp1252("CP1252");
1195
1196
string langtocode(const string& lang)
1197
{
1198
    static STD_UNORDERED_MAP<string, string> lang_to_code;
1199
    if (lang_to_code.empty()) {
1200
        for (unsigned int i = 0;
1201
                i < sizeof(vlang_to_code) / sizeof(char *); i += 2) {
1202
            lang_to_code[vlang_to_code[i]] = vlang_to_code[i + 1];
1203
        }
1204
    }
1205
    STD_UNORDERED_MAP<string, string>::const_iterator it =
1206
        lang_to_code.find(lang);
1207
1208
    // Use cp1252 by default...
1209
    if (it == lang_to_code.end()) {
1210
        return cstr_cp1252;
1211
    }
1212
1213
    return it->second;
1214
}
1215
1216
string localelang()
1217
{
1218
    const char *lang = getenv("LANG");
1219
1220
    if (lang == 0 || *lang == 0 || !strcmp(lang, "C") ||
1221
            !strcmp(lang, "POSIX")) {
1222
        return "en";
1223
    }
1224
    string locale(lang);
1225
    string::size_type under = locale.find_first_of("_");
1226
    if (under == string::npos) {
1227
        return locale;
1228
    }
1229
    return locale.substr(0, under);
1230
}
1231
1232
#ifdef USE_STD_REGEX
1233
1234
class SimpleRegexp::Internal {
1235
public:
1236
    Internal(const string& exp, int flags, int nm)
1237
        : expr(exp,
1238
               basic_regex<char>::flag_type(regex_constants::extended |
1239
                   ((flags&SRE_ICASE) ? regex_constants::icase : 0) |
1240
                   ((flags&SRE_NOSUB) ? regex_constants::nosubs : 0)
1241
                   )), ok(true), nmatch(nm) {
1242
    }
1243
    bool ok;
1244
    std::regex expr;
1245
    std::smatch res;
1246
    int nmatch;
1247
};
1248
1249
bool SimpleRegexp::simpleMatch(const string& val) const
1250
{
1251
    if (!ok())
1252
        return false;
1253
    return regex_match(val, m->res, m->expr);
1254
}
1255
1256
string SimpleRegexp::getMatch(const string& val, int i) const
1257
{
1258
    return m->res.str(i);
1259
}
1260
1261
#else // -> !WIN32
1262
1263
class SimpleRegexp::Internal {
1264
public:
1265
    Internal(const string& exp, int flags, int nm) : nmatch(nm) {
1266
        if (regcomp(&expr, exp.c_str(), REG_EXTENDED |
1267
                    ((flags&SRE_ICASE) ? REG_ICASE : 0) |
1268
                    ((flags&SRE_NOSUB) ? REG_NOSUB : 0)) == 0) {
1269
            ok = true;
1270
        } else {
1271
            ok = false;
1272
        }
1273
        matches.reserve(nmatch+1);
1274
    }
1275
    ~Internal() {
1276
        regfree(&expr);
1277
    }
1278
    bool ok;
1279
    regex_t expr;
1280
    int nmatch;
1281
    vector<regmatch_t> matches;
1282
};
1283
1284
bool SimpleRegexp::simpleMatch(const string& val) const
1285
{
1286
    if (!ok())
1287
        return false;
1288
    if (regexec(&m->expr, val.c_str(), m->nmatch+1, &m->matches[0], 0) == 0) {
1289
        return true;
1290
    } else {
1291
        return false;
1292
    }
1293
}
1294
1295
string SimpleRegexp::getMatch(const string& val, int i) const
1296
{
1297
    if (i > m->nmatch) {
1298
        return string();
1299
    }
1300
    return val.substr(m->matches[i].rm_so,
1301
                      m->matches[i].rm_eo - m->matches[i].rm_so);
1302
}
1303
1304
#endif // win/notwinf
1305
1306
SimpleRegexp::SimpleRegexp(const string& exp, int flags, int nmatch)
1307
    : m(new Internal(exp, flags, nmatch))
1308
{
1309
}
1310
1311
SimpleRegexp::~SimpleRegexp()
1312
{
1313
    delete m;
1314
}
1315
1316
bool SimpleRegexp::ok() const
1317
{
1318
    return m->ok;
1319
}
1320
1321
bool SimpleRegexp::operator() (const string& val) const
1322
{
1323
    return simpleMatch(val);
1324
}
1325
1326
// Initialization for static stuff to be called from main thread before going
1327
// multiple
1328
void smallut_init_mt()
1329
{
1330
    // Init langtocode() static table
1331
    langtocode("");
1332
}