Switch to unified view

a/src/internfile/mh_text.cpp b/src/internfile/mh_text.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: mh_text.cpp,v 1.5 2006-03-20 15:14:08 dockes Exp $ (C) 2005 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: mh_text.cpp,v 1.6 2006-12-15 12:40:02 dockes Exp $ (C) 2005 J.F.Dockes";
3
#endif
3
#endif
4
/*
4
/*
5
 *   This program is free software; you can redistribute it and/or modify
5
 *   This program is free software; you can redistribute it and/or modify
6
 *   it under the terms of the GNU General Public License as published by
6
 *   it under the terms of the GNU General Public License as published by
7
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   the Free Software Foundation; either version 2 of the License, or
...
...
29
#include "debuglog.h"
29
#include "debuglog.h"
30
#include "readfile.h"
30
#include "readfile.h"
31
#include "transcode.h"
31
#include "transcode.h"
32
32
33
// Process a plain text file
33
// Process a plain text file
34
MimeHandler::Status MimeHandlerText::mkDoc(RclConfig *conf, const string &fn, 
34
bool MimeHandlerText::set_document_file(const string &fn)
35
               const string &mtype, Rcl::Doc &docout, string&)
36
{
35
{
37
    string otext;
36
    string otext;
38
    if (!file_to_string(fn, otext))
37
    if (!file_to_string(fn, otext))
39
  return MimeHandler::MHError;
38
  return false;
40
  
39
    return set_document_string(otext);
41
    // Try to guess charset, then convert to utf-8, and fill document
40
}
42
    // fields The charset guesser really doesnt work well in general
41
    
43
    // and should be avoided (especially for short documents)
42
bool MimeHandlerText::set_document_string(const string& otext)
44
    string charset;
43
{
45
    if (conf->getGuessCharset()) {
44
    m_text = otext;
46
  charset = csguess(otext, conf->getDefCharset());
45
    m_havedoc = true;
47
    } else
46
    return true;
48
  charset = conf->getDefCharset();
47
}
49
48
49
bool MimeHandlerText::next_document()
50
{ 
51
    if (m_havedoc == false)
52
  return false;
53
    m_havedoc = false;
50
    LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n", 
54
    LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n", 
51
         charset.c_str()));
55
         m_defcharset.c_str()));
52
56
53
    string utf8;
57
    // Avoid unneeded copy. This gets a reference to an empty string which is
58
    // the entry for "content"
59
    string& utf8 = m_metaData["content"];
60
61
    // Note that we transcode always even if defcharset is already utf-8: 
62
    // this validates the encoding.
54
    if (!transcode(otext, utf8, charset, "UTF-8")) {
63
    if (!transcode(m_text, utf8, m_defcharset, "UTF-8")) {
55
    LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
64
    LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
56
        "for charset [%s]\n", charset.c_str()));
65
        "for charset [%s]\n", m_defcharset.c_str()));
57
    otext.erase();
66
    utf8.erase();
58
  return MimeHandler::MHError;
67
  return false;
59
    }
68
    }
60
69
61
    docout.origcharset = charset;
70
    m_metaData["origcharset"] = m_defcharset;
62
    docout.text = utf8;
71
    m_metaData["charset"] = "utf-8";
63
    return MimeHandler::MHDone;
72
    m_metaData["mimetype"] = "text/plain";
73
    return true;
64
}
74
}