Switch to unified view

a/src/internfile/txtdcode.cpp b/src/internfile/txtdcode.cpp
...
...
22
#include "mimehandler.h"
22
#include "mimehandler.h"
23
#include "log.h"
23
#include "log.h"
24
#include "smallut.h"
24
#include "smallut.h"
25
#include "listmem.h"
25
#include "listmem.h"
26
26
27
using std::string;
28
27
// Called after decoding from utf-8 failed. Handle the common case
29
// Called after decoding from utf-8 failed. Handle the common case
28
// where this is a good old 8bit-encoded text document left-over when
30
// where this is a good old 8bit-encoded text document left-over when
29
// the locale was switched to utf-8. We try to guess a charset
31
// the locale was switched to utf-8. We try to guess a charset
30
// according to the locale language and use it. This is a very rough
32
// according to the locale language and use it. This is a very rough
31
// heuristic, but may be better than discarding the data. 
33
// heuristic, but may be better than discarding the data. 
32
// If we still get a significant number of decode errors, the doc is
34
// If we still get a significant number of decode errors, the doc is
33
// quite probably binary, so just fail.
35
// quite probably binary, so just fail.
34
// Note that we could very well get a wrong transcoding (e.g. between
36
// Note that we could very well get a wrong transcoding (e.g. between
35
// iso-8859 variations), there is no way to detect it.
37
// iso-8859 variations), there is no way to detect it.
36
static bool alternate_decode(const string& in, string& out, const string& ocs)
38
static bool alternate_decode(const string& in, string& out, string& ocs)
37
{
39
{
38
    int ecnt;
40
    int ecnt;
39
    if (samecharset(ocs, cstr_utf8)) {
41
    if (samecharset(ocs, cstr_utf8)) {
40
        string lang = localelang();
42
        string lang = localelang();
41
        string code = langtocode(lang);
43
        string code = langtocode(lang);
42
        LOGDEB("RecollFilter::txtdcode: trying alternate decode from " <<
44
        LOGDEB("RecollFilter::txtdcode: trying alternate decode from " <<
43
               code << "\n");
45
               code << "\n");
44
        bool ret = transcode(in, out, code, cstr_utf8, &ecnt);
46
        bool ret = transcode(in, out, code, cstr_utf8, &ecnt);
45
        return ecnt > 5 ? false : ret;
47
        if (ecnt > 5)
48
            ret = false;
49
        if (ret) {
50
            ocs = code;
51
        }
52
        return ret;
46
    } else {
53
    } else {
47
        // Give a try to utf-8 anyway, as this is self-detecting. This
54
        // Give a try to utf-8 anyway, as this is self-detecting. This
48
        // handles UTF-8 docs in a non-utf-8 environment. Note that
55
        // handles UTF-8 docs in a non-utf-8 environment. Note that
49
        // this will almost never be called, as most encodings are
56
        // this will almost never be called, as most encodings are
50
        // unable to detect errors so that the first try at
57
        // unable to detect errors so that the first try at