Switch to unified view

a/src/internfile/txtdcode.cpp b/src/internfile/txtdcode.cpp
...
...
13
 *  along with this program; if not, write to the Free Software
13
 *  along with this program; if not, write to the Free Software
14
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
14
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15
 */
15
 */
16
#include "autoconfig.h"
16
#include "autoconfig.h"
17
17
18
#include <tr1/unordered_map>
19
using std::tr1::unordered_map;
20
21
#include "cstr.h"
18
#include "cstr.h"
22
#include "transcode.h"
19
#include "transcode.h"
23
#include "mimehandler.h"
20
#include "mimehandler.h"
24
#include "debuglog.h"
21
#include "debuglog.h"
25
#include "smallut.h"
22
#include "smallut.h"
26
27
static const char *vcountry_to_code[] = {
28
    "fr", "windows-1252",
29
    "al", "windows-1252", 
30
    "dk", "windows-1252",
31
    "en", "windows-1252",
32
    "de", "windows-1252",
33
    "is", "windows-1252",
34
    "my", "windows-1252",
35
    "ie", "windows-1252",
36
    "gb", "windows-1252",
37
    "it", "windows-1252",
38
    "lu", "windows-1252",
39
    "no", "windows-1252",
40
    "pt", "windows-1252",
41
    "es", "windows-1252",
42
    "se", "windows-1252",
43
    "ba", "iso-8859-2",
44
    "hr", "iso-8859-2",
45
    "cz", "iso-8859-2",
46
    "hu", "iso-8859-2",
47
    "pl", "iso-8859-2",
48
    "rs", "iso-8859-2",
49
    "sk", "iso-8859-2",
50
    "si", "iso-8859-2",
51
    "gr", "iso-8859-7",
52
    "il", "iso-8859-8",
53
    "tr", "iso-8859-9",
54
    "th", "iso-8859-11",
55
    "lv", "iso-8859-13",
56
    "lt", "iso-8859-13",
57
};
58
23
59
24
60
// Called after decoding from utf-8 failed. Handle the common case
25
// Called after decoding from utf-8 failed. Handle the common case
61
// where this is a good old 8bit-encoded text document left-over when
26
// where this is a good old 8bit-encoded text document left-over when
62
// the locale was switched to utf-8. We try to guess a charset
27
// the locale was switched to utf-8. We try to guess a charset
63
// according to the locale language and use it. This is a very rough
28
// according to the locale language and use it. This is a very rough
64
// heuristic, but may be better than discarding the data.
29
// heuristic, but may be better than discarding the data.
65
static bool alternate_decode(const string& in, string& out)
30
static bool alternate_decode(const string& in, string& out)
66
{
31
{
67
    static unordered_map<string, string> country_to_code;
32
    string lang = localelang();
68
    if (country_to_code.empty()) {
33
    string code = langtocode(lang);
69
  for (unsigned int i = 0; 
70
       i < sizeof(vcountry_to_code) / sizeof(char *); i += 2) {
71
      country_to_code[vcountry_to_code[i]] = vcountry_to_code[i+1];
72
  }
73
    }
74
75
    string locale = setlocale(LC_CTYPE, 0);
76
    LOGDEB(("RecollFilter::alternate_dcde: locale: [%s]\n", locale.c_str()));
77
    string::size_type under = locale.find_first_of("_");
78
    if (under == string::npos)
79
  return false;
80
    string country = locale.substr(0, under);
81
82
    unordered_map<string,string>::const_iterator it = 
83
  country_to_code.find(country);
84
    if (it == country_to_code.end())
85
  return false;
86
    string code = it->second;
87
88
    LOGDEB(("RecollFilter::txtdcode: trying alternate decode from %s\n",
34
    LOGDEB(("RecollFilter::txtdcode: trying alternate decode from %s\n",
89
        code.c_str()));
35
        code.c_str()));
90
    return transcode(in, out, code, cstr_utf8);
36
    return transcode(in, out, code, cstr_utf8);
91
}
37
}
92
38