--- a/src/internfile/mh_text.cpp
+++ b/src/internfile/mh_text.cpp
@@ -1,5 +1,5 @@
#ifndef lint
-static char rcsid[] = "@(#$Id: mh_text.cpp,v 1.5 2006-03-20 15:14:08 dockes Exp $ (C) 2005 J.F.Dockes";
+static char rcsid[] = "@(#$Id: mh_text.cpp,v 1.6 2006-12-15 12:40:02 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@@ -31,34 +31,44 @@
#include "transcode.h"
// Process a plain text file
-MimeHandler::Status MimeHandlerText::mkDoc(RclConfig *conf, const string &fn,
- const string &mtype, Rcl::Doc &docout, string&)
+bool MimeHandlerText::set_document_file(const string &fn)
{
string otext;
if (!file_to_string(fn, otext))
- return MimeHandler::MHError;
-
- // Try to guess charset, then convert to utf-8, and fill document
- // fields The charset guesser really doesnt work well in general
- // and should be avoided (especially for short documents)
- string charset;
- if (conf->getGuessCharset()) {
- charset = csguess(otext, conf->getDefCharset());
- } else
- charset = conf->getDefCharset();
+ return false;
+ return set_document_string(otext);
+}
+
+bool MimeHandlerText::set_document_string(const string& otext)
+{
+ m_text = otext;
+ m_havedoc = true;
+ return true;
+}
+bool MimeHandlerText::next_document()
+{
+ if (m_havedoc == false)
+ return false;
+ m_havedoc = false;
LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n",
- charset.c_str()));
+ m_defcharset.c_str()));
- string utf8;
- if (!transcode(otext, utf8, charset, "UTF-8")) {
+ // Avoid unneeded copy. This gets a reference to an empty string which is
+ // the entry for "content"
+ string& utf8 = m_metaData["content"];
+
+ // Note that we transcode always even if defcharset is already utf-8:
+ // this validates the encoding.
+ if (!transcode(m_text, utf8, m_defcharset, "UTF-8")) {
LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
- "for charset [%s]\n", charset.c_str()));
- otext.erase();
- return MimeHandler::MHError;
+ "for charset [%s]\n", m_defcharset.c_str()));
+ utf8.erase();
+ return false;
}
- docout.origcharset = charset;
- docout.text = utf8;
- return MimeHandler::MHDone;
+ m_metaData["origcharset"] = m_defcharset;
+ m_metaData["charset"] = "utf-8";
+ m_metaData["mimetype"] = "text/plain";
+ return true;
}