--- a/src/utils/mimeparse.cpp
+++ b/src/utils/mimeparse.cpp
@@ -74,25 +74,25 @@
string::size_type pos1, pos2=0;
if (charset.empty()) {
- if ((pos1 = in.find("'")) == string::npos)
- return false;
- charset = in.substr(0, pos1);
- // fprintf(stderr, "Charset: [%s]\n", charset.c_str());
- pos1++;
-
- if ((pos2 = in.find("'", pos1)) == string::npos)
- return false;
- // We have no use for lang for now
- // string lang = in.substr(pos1, pos2-pos1);
- // fprintf(stderr, "Lang: [%s]\n", lang.c_str());
- pos2++;
+ if ((pos1 = in.find("'")) == string::npos)
+ return false;
+ charset = in.substr(0, pos1);
+ // fprintf(stderr, "Charset: [%s]\n", charset.c_str());
+ pos1++;
+
+ if ((pos2 = in.find("'", pos1)) == string::npos)
+ return false;
+ // We have no use for lang for now
+ // string lang = in.substr(pos1, pos2-pos1);
+ // fprintf(stderr, "Lang: [%s]\n", lang.c_str());
+ pos2++;
}
string raw;
qp_decode(in.substr(pos2), raw, '%');
// fprintf(stderr, "raw [%s]\n", raw.c_str());
if (!transcode(raw, out, charset, "UTF-8"))
- return false;
+ return false;
return true;
}
@@ -102,7 +102,7 @@
// The lexical token returned by find_next_token
class Lexical {
- public:
+public:
enum kind {none, token, separator};
kind what;
string value;
@@ -118,26 +118,26 @@
{
int commentlevel = 0;
for (; start < in.size(); start++) {
- if (in[start] == '\\') {
- // Skip escaped char.
- if (start+1 < in.size()) {
- start++;
- continue;
- } else {
- lex.error.append("\\ at end of string ");
- return in.size();
- }
- }
- if (in[start] == '(')
- commentlevel++;
- if (in[start] == ')') {
- if (--commentlevel == 0)
- break;
- }
+ if (in[start] == '\\') {
+ // Skip escaped char.
+ if (start+1 < in.size()) {
+ start++;
+ continue;
+ } else {
+ lex.error.append("\\ at end of string ");
+ return in.size();
+ }
+ }
+ if (in[start] == '(')
+ commentlevel++;
+ if (in[start] == ')') {
+ if (--commentlevel == 0)
+ break;
+ }
}
if (start == in.size() && commentlevel != 0) {
- lex.error.append("Unclosed comment ");
- return in.size();
+ lex.error.append("Unclosed comment ");
+ return in.size();
}
return start;
}
@@ -145,17 +145,17 @@
// Skip initial whitespace and (possibly nested) comments.
static string::size_type
skip_whitespace_and_comment(const string &in, string::size_type start,
- Lexical &lex)
+ Lexical &lex)
{
while (1) {
- if ((start = in.find_first_not_of(" \t\r\n", start)) == string::npos)
- return in.size();
- if (in[start] == '(') {
- if ((start = skip_comment(in, start, lex)) == string::npos)
- return string::npos;
- } else {
- break;
- }
+ if ((start = in.find_first_not_of(" \t\r\n", start)) == string::npos)
+ return in.size();
+ if (in[start] == '(') {
+ if ((start = skip_comment(in, start, lex)) == string::npos)
+ return string::npos;
+ } else {
+ break;
+ }
}
return start;
}
@@ -168,20 +168,20 @@
/// @param delims separators we should look for
static string::size_type
find_next_token(const string &in, string::size_type start,
- Lexical &lex, string delims = ";=")
+ Lexical &lex, string delims = ";=")
{
char oquot, cquot;
start = skip_whitespace_and_comment(in, start, lex);
if (start == string::npos || start == in.size())
- return in.size();
+ return in.size();
// Begins with separator ? return it.
string::size_type delimi = delims.find_first_of(in[start]);
if (delimi != string::npos) {
- lex.what = Lexical::separator;
- lex.value = delims[delimi];
- return start+1;
+ lex.what = Lexical::separator;
+ lex.value = delims[delimi];
+ return start+1;
}
// Check for start of quoted string
@@ -193,41 +193,41 @@
}
if (cquot != 0) {
- // Quoted string parsing
- string::size_type end;
- start++; // Skip quote character
- for (end = start;end < in.size() && in[end] != cquot; end++) {
- if (in[end] == '\\') {
- // Skip escaped char.
- if (end+1 < in.size()) {
- end++;
- } else {
- // backslash at end of string: error
- lex.error.append("\\ at end of string ");
- return string::npos;
- }
- }
- }
- if (end == in.size()) {
- // Found end of string before closing quote character: error
- lex.error.append("Unclosed quoted string ");
- return string::npos;
- }
- lex.what = Lexical::token;
- lex.value = in.substr(start, end-start);
- lex.quote = oquot;
- return ++end;
+ // Quoted string parsing
+ string::size_type end;
+ start++; // Skip quote character
+ for (end = start;end < in.size() && in[end] != cquot; end++) {
+ if (in[end] == '\\') {
+ // Skip escaped char.
+ if (end+1 < in.size()) {
+ end++;
+ } else {
+ // backslash at end of string: error
+ lex.error.append("\\ at end of string ");
+ return string::npos;
+ }
+ }
+ }
+ if (end == in.size()) {
+ // Found end of string before closing quote character: error
+ lex.error.append("Unclosed quoted string ");
+ return string::npos;
+ }
+ lex.what = Lexical::token;
+ lex.value = in.substr(start, end-start);
+ lex.quote = oquot;
+ return ++end;
} else {
- string::size_type end = in.find_first_of(delims + "\r\n \t(", start);
- lex.what = Lexical::token;
- lex.quote = 0;
- if (end == string::npos) {
- end = in.size();
- lex.value = in.substr(start);
- } else {
- lex.value = in.substr(start, end-start);
- }
- return end;
+ string::size_type end = in.find_first_of(delims + "\r\n \t(", start);
+ lex.what = Lexical::token;
+ lex.quote = 0;
+ if (end == string::npos) {
+ end = in.size();
+ lex.value = in.substr(start);
+ } else {
+ lex.value = in.substr(start, end-start);
+ }
+ return end;
}
}
@@ -246,7 +246,7 @@
void stringtolower(string &out, const string& in)
{
for (string::size_type i = 0; i < in.size(); i++)
- out.append(1, char(tolower(in[i])));
+ out.append(1, char(tolower(in[i])));
}
// Parse MIME field value. Should look like:
@@ -262,43 +262,43 @@
// Get the field value
start = find_next_token(value, start, lex);
if (start == string::npos || lex.what != Lexical::token)
- return false;
+ return false;
parsed.value = lex.value;
map<string, string> rawparams;
// Look for parameters
for (;;) {
- string paramname, paramvalue;
- lex.reset();
- start = find_next_token(value, start, lex);
- if (start == value.size())
- break;
- if (start == string::npos) {
- //fprintf(stderr, "Find_next_token error(1)\n");
- return false;
- }
- if (lex.what == Lexical::separator && lex.value[0] == ';')
- continue;
- if (lex.what != Lexical::token)
- return false;
- stringtolower(paramname, lex.value);
-
- start = find_next_token(value, start, lex);
- if (start == string::npos || lex.what != Lexical::separator ||
- lex.value[0] != '=') {
- //fprintf(stderr, "Find_next_token error (2)\n");
- return false;
- }
-
- start = find_next_token(value, start, lex);
- if (start == string::npos || lex.what != Lexical::token) {
- //fprintf(stderr, "Parameter has no value!");
- return false;
- }
- paramvalue = lex.value;
- rawparams[paramname] = paramvalue;
- //fprintf(stderr, "RAW: name [%s], value [%s]\n", paramname.c_str(),
- // paramvalue.c_str());
+ string paramname, paramvalue;
+ lex.reset();
+ start = find_next_token(value, start, lex);
+ if (start == value.size())
+ break;
+ if (start == string::npos) {
+ //fprintf(stderr, "Find_next_token error(1)\n");
+ return false;
+ }
+ if (lex.what == Lexical::separator && lex.value[0] == ';')
+ continue;
+ if (lex.what != Lexical::token)
+ return false;
+ stringtolower(paramname, lex.value);
+
+ start = find_next_token(value, start, lex);
+ if (start == string::npos || lex.what != Lexical::separator ||
+ lex.value[0] != '=') {
+ //fprintf(stderr, "Find_next_token error (2)\n");
+ return false;
+ }
+
+ start = find_next_token(value, start, lex);
+ if (start == string::npos || lex.what != Lexical::token) {
+ //fprintf(stderr, "Parameter has no value!");
+ return false;
+ }
+ paramvalue = lex.value;
+ rawparams[paramname] = paramvalue;
+ //fprintf(stderr, "RAW: name [%s], value [%s]\n", paramname.c_str(),
+ // paramvalue.c_str());
}
// fprintf(stderr, "Number of raw params %d\n", rawparams.size());
@@ -309,38 +309,38 @@
map<string, Chunks> chunks;
for (map<string, string>::const_iterator it = rawparams.begin();
- it != rawparams.end(); it++) {
- string nm = it->first;
- // fprintf(stderr, "NM: [%s]\n", nm.c_str());
- if (nm.empty()) // ??
- continue;
-
- Chunk chunk;
- if (nm[nm.length()-1] == '*') {
- nm.erase(nm.length() - 1);
- chunk.decode = true;
- } else
- chunk.decode = false;
- // fprintf(stderr, "NM1: [%s]\n", nm.c_str());
-
- chunk.value = it->second;
-
- // Look for another asterisk in nm. If none, assign index 0
- string::size_type aster;
- int idx = 0;
- if ((aster = nm.rfind("*")) != string::npos) {
- string num = nm.substr(aster+1);
- //fprintf(stderr, "NUM: [%s]\n", num.c_str());
- nm.erase(aster);
- idx = atoi(num.c_str());
- }
- Chunks empty;
- if (chunks.find(nm) == chunks.end())
- chunks[nm] = empty;
- chunks[nm].chunks.resize(idx+1);
- chunks[nm].chunks[idx] = chunk;
- //fprintf(stderr, "CHNKS: nm [%s], idx %d, decode %d, value [%s]\n",
- // nm.c_str(), idx, int(chunk.decode), chunk.value.c_str());
+ it != rawparams.end(); it++) {
+ string nm = it->first;
+ // fprintf(stderr, "NM: [%s]\n", nm.c_str());
+ if (nm.empty()) // ??
+ continue;
+
+ Chunk chunk;
+ if (nm[nm.length()-1] == '*') {
+ nm.erase(nm.length() - 1);
+ chunk.decode = true;
+ } else
+ chunk.decode = false;
+ // fprintf(stderr, "NM1: [%s]\n", nm.c_str());
+
+ chunk.value = it->second;
+
+ // Look for another asterisk in nm. If none, assign index 0
+ string::size_type aster;
+ int idx = 0;
+ if ((aster = nm.rfind("*")) != string::npos) {
+ string num = nm.substr(aster+1);
+ //fprintf(stderr, "NUM: [%s]\n", num.c_str());
+ nm.erase(aster);
+ idx = atoi(num.c_str());
+ }
+ Chunks empty;
+ if (chunks.find(nm) == chunks.end())
+ chunks[nm] = empty;
+ chunks[nm].chunks.resize(idx+1);
+ chunks[nm].chunks[idx] = chunk;
+ //fprintf(stderr, "CHNKS: nm [%s], idx %d, decode %d, value [%s]\n",
+ // nm.c_str(), idx, int(chunk.decode), chunk.value.c_str());
}
// For each parameter name, concatenate its chunks and possibly
@@ -349,29 +349,29 @@
// which is not right because there might be uncoded chunks
// according to the rfc.
for (map<string, Chunks>::const_iterator it = chunks.begin();
- it != chunks.end(); it++) {
- if (it->second.chunks.empty())
- continue;
- string nm = it->first;
- // Create the name entry
- if (parsed.params.find(nm) == parsed.params.end())
- parsed.params[nm].clear();
- // Concatenate all chunks and decode the whole if the first one needs
- // to. Yes, this is not quite right.
- string value;
- for (vector<Chunk>::const_iterator vi = it->second.chunks.begin();
- vi != it->second.chunks.end(); vi++) {
- value += vi->value;
- }
- if (it->second.chunks[0].decode) {
- string charset;
- rfc2231_decode(value, parsed.params[nm], charset);
- } else {
- // rfc2047 MUST NOT but IS used by some agents
- rfc2047_decode(value, parsed.params[nm]);
- }
- //fprintf(stderr, "FINAL: nm [%s], value [%s]\n",
- //nm.c_str(), parsed.params[nm].c_str());
+ it != chunks.end(); it++) {
+ if (it->second.chunks.empty())
+ continue;
+ string nm = it->first;
+ // Create the name entry
+ if (parsed.params.find(nm) == parsed.params.end())
+ parsed.params[nm].clear();
+ // Concatenate all chunks and decode the whole if the first one needs
+ // to. Yes, this is not quite right.
+ string value;
+ for (vector<Chunk>::const_iterator vi = it->second.chunks.begin();
+ vi != it->second.chunks.end(); vi++) {
+ value += vi->value;
+ }
+ if (it->second.chunks[0].decode) {
+ string charset;
+ rfc2231_decode(value, parsed.params[nm], charset);
+ } else {
+ // rfc2047 MUST NOT but IS used by some agents
+ rfc2047_decode(value, parsed.params[nm]);
+ }
+ //fprintf(stderr, "FINAL: nm [%s], value [%s]\n",
+ //nm.c_str(), parsed.params[nm].c_str());
}
return true;
@@ -385,80 +385,80 @@
out.reserve(in.length());
string::size_type ii;
for (ii = 0; ii < in.length(); ii++) {
- if (in[ii] == esc) {
- ii++; // Skip '=' or '%'
- if(ii >= in.length() - 1) { // Need at least 2 more chars
- break;
- } else if (in[ii] == '\r' && in[ii+1] == '\n') { // Soft nl, skip
- ii++;
- } else if (in[ii] != '\n' && in[ii] != '\r') { // decode
- char c = in[ii];
- char co;
- if(c >= 'A' && c <= 'F') {
- co = char((c - 'A' + 10) * 16);
- } else if (c >= 'a' && c <= 'f') {
- co = char((c - 'a' + 10) * 16);
- } else if (c >= '0' && c <= '9') {
- co = char((c - '0') * 16);
- } else {
- return false;
- }
- if(++ii >= in.length())
- break;
- c = in[ii];
- if (c >= 'A' && c <= 'F') {
- co += char(c - 'A' + 10);
- } else if (c >= 'a' && c <= 'f') {
- co += char(c - 'a' + 10);
- } else if (c >= '0' && c <= '9') {
- co += char(c - '0');
- } else {
- return false;
- }
- out += co;
- }
- } else {
- out += in[ii];
- }
+ if (in[ii] == esc) {
+ ii++; // Skip '=' or '%'
+ if(ii >= in.length() - 1) { // Need at least 2 more chars
+ break;
+ } else if (in[ii] == '\r' && in[ii+1] == '\n') { // Soft nl, skip
+ ii++;
+ } else if (in[ii] != '\n' && in[ii] != '\r') { // decode
+ char c = in[ii];
+ char co;
+ if(c >= 'A' && c <= 'F') {
+ co = char((c - 'A' + 10) * 16);
+ } else if (c >= 'a' && c <= 'f') {
+ co = char((c - 'a' + 10) * 16);
+ } else if (c >= '0' && c <= '9') {
+ co = char((c - '0') * 16);
+ } else {
+ return false;
+ }
+ if(++ii >= in.length())
+ break;
+ c = in[ii];
+ if (c >= 'A' && c <= 'F') {
+ co += char(c - 'A' + 10);
+ } else if (c >= 'a' && c <= 'f') {
+ co += char(c - 'a' + 10);
+ } else if (c >= '0' && c <= '9') {
+ co += char(c - '0');
+ } else {
+ return false;
+ }
+ out += co;
+ }
+ } else {
+ out += in[ii];
+ }
}
return true;
}
// Decode an word encoded as quoted printable or base 64
static bool rfc2047_decodeParsed(const std::string& charset,
- const std::string& encoding,
- const std::string& value,
- std::string &utf8)
+ const std::string& encoding,
+ const std::string& value,
+ std::string &utf8)
{
DPRINT((stderr, "DecodeParsed: charset [%s] enc [%s] val [%s]\n",
- charset.c_str(), encoding.c_str(), value.c_str()));
+ charset.c_str(), encoding.c_str(), value.c_str()));
utf8.clear();
string decoded;
if (!stringlowercmp("b", encoding)) {
- if (!base64_decode(value, decoded))
- return false;
- DPRINT((stderr, "FromB64: [%s]\n", decoded.c_str()));
+ if (!base64_decode(value, decoded))
+ return false;
+ DPRINT((stderr, "FromB64: [%s]\n", decoded.c_str()));
} else if (!stringlowercmp("q", encoding)) {
- if (!qp_decode(value, decoded))
- return false;
- // Need to translate _ to ' ' here
- string temp;
- for (string::size_type pos = 0; pos < decoded.length(); pos++)
- if (decoded[pos] == '_')
- temp += ' ';
- else
- temp += decoded[pos];
- decoded = temp;
- DPRINT((stderr, "FromQP: [%s]\n", decoded.c_str()));
+ if (!qp_decode(value, decoded))
+ return false;
+ // Need to translate _ to ' ' here
+ string temp;
+ for (string::size_type pos = 0; pos < decoded.length(); pos++)
+ if (decoded[pos] == '_')
+ temp += ' ';
+ else
+ temp += decoded[pos];
+ decoded = temp;
+ DPRINT((stderr, "FromQP: [%s]\n", decoded.c_str()));
} else {
- DPRINT((stderr, "Bad encoding [%s]\n", encoding.c_str()));
- return false;
+ DPRINT((stderr, "Bad encoding [%s]\n", encoding.c_str()));
+ return false;
}
if (!transcode(decoded, utf8, charset, "UTF-8")) {
- DPRINT((stderr, "Transcode failed\n"));
- return false;
+ DPRINT((stderr, "Transcode failed\n"));
+ return false;
}
return true;
}
@@ -470,8 +470,8 @@
// - We should turn off decoding while inside quoted strings
//
typedef enum {rfc2047ready, rfc2047open_eq,
- rfc2047charset, rfc2047encoding,
- rfc2047value, rfc2047close_q} Rfc2047States;
+ rfc2047charset, rfc2047encoding,
+ rfc2047value, rfc2047close_q} Rfc2047States;
bool rfc2047_decode(const std::string& in, std::string &out)
{
@@ -483,106 +483,106 @@
out.clear();
for (string::size_type ii = 0; ii < in.length(); ii++) {
- char ch = in[ii];
- switch (state) {
- case rfc2047ready:
- {
- DPRINT((stderr, "STATE: ready, ch %c\n", ch));
- switch (ch) {
- // Whitespace: stay ready
- case ' ': case ' ': value += ch;break;
- // '=' -> forward to next state
- case '=': state = rfc2047open_eq; break;
- DPRINT((stderr, "STATE: open_eq\n"));
- // Other: go back to sleep
- default: value += ch; state = rfc2047ready;
- }
- }
- break;
- case rfc2047open_eq:
- {
- DPRINT((stderr, "STATE: open_eq, ch %c\n", ch));
- switch (ch) {
- case '?':
- {
- // Transcode current (unencoded part) value:
- // we sometimes find 8-bit chars in
- // there. Interpret as Iso8859.
- if (value.length() > 0) {
- transcode(value, utf8, "ISO-8859-1", "UTF-8");
- out += utf8;
- value.clear();
- }
- state = rfc2047charset;
- }
- break;
- default: state = rfc2047ready; out += '='; out += ch;break;
- }
- }
- break;
- case rfc2047charset:
- {
- DPRINT((stderr, "STATE: charset, ch %c\n", ch));
- switch (ch) {
- case '?': state = rfc2047encoding; break;
- default: charset += ch; break;
- }
- }
- break;
- case rfc2047encoding:
- {
- DPRINT((stderr, "STATE: encoding, ch %c\n", ch));
- switch (ch) {
- case '?': state = rfc2047value; break;
- default: encoding += ch; break;
- }
- }
- break;
- case rfc2047value:
- {
- DPRINT((stderr, "STATE: value, ch %c\n", ch));
- switch (ch) {
- case '?': state = rfc2047close_q; break;
- default: value += ch;break;
- }
- }
- break;
- case rfc2047close_q:
- {
- DPRINT((stderr, "STATE: close_q, ch %c\n", ch));
- switch (ch) {
- case '=':
- {
- DPRINT((stderr, "End of encoded area. Charset %s, Encoding %s\n", charset.c_str(), encoding.c_str()));
- string utf8;
- state = rfc2047ready;
- if (!rfc2047_decodeParsed(charset, encoding, value,
- utf8)) {
- return false;
- }
- out += utf8;
- charset.clear();
- encoding.clear();
- value.clear();
- }
- break;
- default: state = rfc2047value; value += '?';value += ch;break;
- }
- }
- break;
- default: // ??
+ char ch = in[ii];
+ switch (state) {
+ case rfc2047ready:
+ {
+ DPRINT((stderr, "STATE: ready, ch %c\n", ch));
+ switch (ch) {
+ // Whitespace: stay ready
+ case ' ': case '\t': value += ch;break;
+ // '=' -> forward to next state
+ case '=': state = rfc2047open_eq; break;
+ DPRINT((stderr, "STATE: open_eq\n"));
+ // Other: go back to sleep
+ default: value += ch; state = rfc2047ready;
+ }
+ }
+ break;
+ case rfc2047open_eq:
+ {
+ DPRINT((stderr, "STATE: open_eq, ch %c\n", ch));
+ switch (ch) {
+ case '?':
+ {
+ // Transcode current (unencoded part) value:
+ // we sometimes find 8-bit chars in
+ // there. Interpret as Iso8859.
+ if (value.length() > 0) {
+ transcode(value, utf8, "ISO-8859-1", "UTF-8");
+ out += utf8;
+ value.clear();
+ }
+ state = rfc2047charset;
+ }
+ break;
+ default: state = rfc2047ready; out += '='; out += ch;break;
+ }
+ }
+ break;
+ case rfc2047charset:
+ {
+ DPRINT((stderr, "STATE: charset, ch %c\n", ch));
+ switch (ch) {
+ case '?': state = rfc2047encoding; break;
+ default: charset += ch; break;
+ }
+ }
+ break;
+ case rfc2047encoding:
+ {
+ DPRINT((stderr, "STATE: encoding, ch %c\n", ch));
+ switch (ch) {
+ case '?': state = rfc2047value; break;
+ default: encoding += ch; break;
+ }
+ }
+ break;
+ case rfc2047value:
+ {
+ DPRINT((stderr, "STATE: value, ch %c\n", ch));
+ switch (ch) {
+ case '?': state = rfc2047close_q; break;
+ default: value += ch;break;
+ }
+ }
+ break;
+ case rfc2047close_q:
+ {
+ DPRINT((stderr, "STATE: close_q, ch %c\n", ch));
+ switch (ch) {
+ case '=':
+ {
+ DPRINT((stderr, "End of encoded area. Charset %s, Encoding %s\n", charset.c_str(), encoding.c_str()));
+ string utf8;
+ state = rfc2047ready;
+ if (!rfc2047_decodeParsed(charset, encoding, value,
+ utf8)) {
+ return false;
+ }
+ out += utf8;
+ charset.clear();
+ encoding.clear();
+ value.clear();
+ }
+ break;
+ default: state = rfc2047value; value += '?';value += ch;break;
+ }
+ }
+ break;
+ default: // ??
DPRINT((stderr, "STATE: default ?? ch %c\n", ch));
- return false;
- }
+ return false;
+ }
}
if (value.length() > 0) {
- transcode(value, utf8, "ISO-8859-1", "UTF-8");
- out += utf8;
- value.clear();
+ transcode(value, utf8, "ISO-8859-1", "UTF-8");
+ out += utf8;
+ value.clear();
}
if (state != rfc2047ready)
- return false;
+ return false;
return true;
}
@@ -604,16 +604,16 @@
vector<string> toks;
string::size_type idx;
if ((idx = dt.find_first_of(",")) != string::npos) {
- if (idx == dt.length() - 1) {
- DATEDEB((stderr, "Bad rfc822 date format (short1): [%s]\n",
- dt.c_str()));
- return (time_t)-1;
- }
- string date = dt.substr(idx+1);
- stringToTokens(date, toks, " \t:");
+ if (idx == dt.length() - 1) {
+ DATEDEB((stderr, "Bad rfc822 date format (short1): [%s]\n",
+ dt.c_str()));
+ return (time_t)-1;
+ }
+ string date = dt.substr(idx+1);
+ stringToTokens(date, toks, " \t:");
} else {
// No comma. Enter strangeland
- stringToTokens(dt, toks, " \t:");
+ stringToTokens(dt, toks, " \t:");
// Test for date like: Sun Nov 19 06:18:41 2006
// 0 1 2 3 4 5 6
// and change to: 19 Nov 2006 06:18:41
@@ -629,20 +629,20 @@
#if DEBUGDATE
for (list<string>::iterator it = toks.begin(); it != toks.end(); it++) {
- DATEDEB((stderr, "[%s] ", it->c_str()));
+ DATEDEB((stderr, "[%s] ", it->c_str()));
}
DATEDEB((stderr, "\n"));
#endif
if (toks.size() < 6) {
- DATEDEB((stderr, "Bad rfc822 date format (toks cnt): [%s]\n",
- dt.c_str()));
- return (time_t)-1;
+ DATEDEB((stderr, "Bad rfc822 date format (toks cnt): [%s]\n",
+ dt.c_str()));
+ return (time_t)-1;
}
if (toks.size() == 6) {
- // Probably no timezone, sometimes happens
- toks.push_back("+0000");
+ // Probably no timezone, sometimes happens
+ toks.push_back("+0000");
}
struct tm tm;
@@ -660,20 +660,20 @@
// Month. Only Jan-Dec are legal. January, February do happen
// though. Convert to 0-11
if (*it == "Jan" || *it == "January") tm.tm_mon = 0; else if
- (*it == "Feb" || *it == "February") tm.tm_mon = 1; else if
- (*it == "Mar" || *it == "March") tm.tm_mon = 2; else if
- (*it == "Apr" || *it == "April") tm.tm_mon = 3; else if
- (*it == "May") tm.tm_mon = 4; else if
- (*it == "Jun" || *it == "June") tm.tm_mon = 5; else if
- (*it == "Jul" || *it == "July") tm.tm_mon = 6; else if
- (*it == "Aug" || *it == "August") tm.tm_mon = 7; else if
- (*it == "Sep" || *it == "September") tm.tm_mon = 8; else if
- (*it == "Oct" || *it == "October") tm.tm_mon = 9; else if
- (*it == "Nov" || *it == "November") tm.tm_mon = 10; else if
- (*it == "Dec" || *it == "December") tm.tm_mon = 11; else {
- DATEDEB((stderr, "Bad rfc822 date format (month): [%s]\n",
- dt.c_str()));
- return (time_t)-1;
+ (*it == "Feb" || *it == "February") tm.tm_mon = 1; else if
+ (*it == "Mar" || *it == "March") tm.tm_mon = 2; else if
+ (*it == "Apr" || *it == "April") tm.tm_mon = 3; else if
+ (*it == "May") tm.tm_mon = 4; else if
+ (*it == "Jun" || *it == "June") tm.tm_mon = 5; else if
+ (*it == "Jul" || *it == "July") tm.tm_mon = 6; else if
+ (*it == "Aug" || *it == "August") tm.tm_mon = 7; else if
+ (*it == "Sep" || *it == "September") tm.tm_mon = 8; else if
+ (*it == "Oct" || *it == "October") tm.tm_mon = 9; else if
+ (*it == "Nov" || *it == "November") tm.tm_mon = 10; else if
+ (*it == "Dec" || *it == "December") tm.tm_mon = 11; else {
+ DATEDEB((stderr, "Bad rfc822 date format (month): [%s]\n",
+ dt.c_str()));
+ return (time_t)-1;
}
it++;
@@ -687,67 +687,67 @@
tm.tm_year += 1900;
}
if (tm.tm_year > 1900)
- tm.tm_year -= 1900;
+ tm.tm_year -= 1900;
it++;
// Hour minute second need no adjustments
tm.tm_hour = atoi(it->c_str()); it++;
tm.tm_min = atoi(it->c_str()); it++;
- tm.tm_sec = atoi(it->c_str()); it++;
+ tm.tm_sec = atoi(it->c_str()); it++;
// Timezone is supposed to be either +-XYZT or a zone name
int zonesecs = 0;
if (it->length() < 1) {
- DATEDEB((stderr, "Bad rfc822 date format (zlen): [%s]\n", dt.c_str()));
- return (time_t)-1;
+ DATEDEB((stderr, "Bad rfc822 date format (zlen): [%s]\n", dt.c_str()));
+ return (time_t)-1;
}
if (it->at(0) == '-' || it->at(0) == '+') {
- // Note that +xy:zt (instead of +xyzt) sometimes happen, we
- // may want to process it one day
- if (it->length() < 5) {
- DATEDEB((stderr, "Bad rfc822 date format (zlen1): [%s]\n",
- dt.c_str()));
- goto nozone;
- }
- zonesecs = 3600*((it->at(1)-'0') * 10 + it->at(2)-'0')+
- (it->at(3)-'0')*10 + it->at(4)-'0';
- zonesecs = it->at(0) == '+' ? -1 * zonesecs : zonesecs;
+ // Note that +xy:zt (instead of +xyzt) sometimes happen, we
+ // may want to process it one day
+ if (it->length() < 5) {
+ DATEDEB((stderr, "Bad rfc822 date format (zlen1): [%s]\n",
+ dt.c_str()));
+ goto nozone;
+ }
+ zonesecs = 3600*((it->at(1)-'0') * 10 + it->at(2)-'0')+
+ (it->at(3)-'0')*10 + it->at(4)-'0';
+ zonesecs = it->at(0) == '+' ? -1 * zonesecs : zonesecs;
} else {
- int hours;
- if (*it == "A") hours= 1; else if (*it == "B") hours= 2;
- else if (*it == "C") hours= 3; else if (*it == "D") hours= 4;
- else if (*it == "E") hours= 5; else if (*it == "F") hours= 6;
- else if (*it == "G") hours= 7; else if (*it == "H") hours= 8;
- else if (*it == "I") hours= 9; else if (*it == "K") hours= 10;
- else if (*it == "L") hours= 11; else if (*it == "M") hours= 12;
- else if (*it == "N") hours= -1; else if (*it == "O") hours= -2;
- else if (*it == "P") hours= -3; else if (*it == "Q") hours= -4;
- else if (*it == "R") hours= -5; else if (*it == "S") hours= -6;
- else if (*it == "T") hours= -7; else if (*it == "U") hours= -8;
- else if (*it == "V") hours= -9; else if (*it == "W") hours= -10;
- else if (*it == "X") hours= -11; else if (*it == "Y") hours= -12;
- else if (*it == "Z") hours= 0; else if (*it == "UT") hours= 0;
- else if (*it == "GMT") hours= 0; else if (*it == "EST") hours= 5;
- else if (*it == "EDT") hours= 4; else if (*it == "CST") hours= 6;
- else if (*it == "CDT") hours= 5; else if (*it == "MST") hours= 7;
- else if (*it == "MDT") hours= 6; else if (*it == "PST") hours= 8;
- else if (*it == "PDT") hours= 7;
- // Non standard names
- // Standard Time (or Irish Summer Time?) is actually +5.5
- else if (*it == "CET") hours= -1; else if (*it == "JST") hours= -9;
- else if (*it == "IST") hours= -5; else if (*it == "WET") hours= 0;
- else if (*it == "MET") hours= -1;
- else {
- DATEDEB((stderr, "Bad rfc822 date format (zname): [%s]\n",
- dt.c_str()));
- // Forget tz
- goto nozone;
- }
- zonesecs = 3600 * hours;
+ int hours;
+ if (*it == "A") hours= 1; else if (*it == "B") hours= 2;
+ else if (*it == "C") hours= 3; else if (*it == "D") hours= 4;
+ else if (*it == "E") hours= 5; else if (*it == "F") hours= 6;
+ else if (*it == "G") hours= 7; else if (*it == "H") hours= 8;
+ else if (*it == "I") hours= 9; else if (*it == "K") hours= 10;
+ else if (*it == "L") hours= 11; else if (*it == "M") hours= 12;
+ else if (*it == "N") hours= -1; else if (*it == "O") hours= -2;
+ else if (*it == "P") hours= -3; else if (*it == "Q") hours= -4;
+ else if (*it == "R") hours= -5; else if (*it == "S") hours= -6;
+ else if (*it == "T") hours= -7; else if (*it == "U") hours= -8;
+ else if (*it == "V") hours= -9; else if (*it == "W") hours= -10;
+ else if (*it == "X") hours= -11; else if (*it == "Y") hours= -12;
+ else if (*it == "Z") hours= 0; else if (*it == "UT") hours= 0;
+ else if (*it == "GMT") hours= 0; else if (*it == "EST") hours= 5;
+ else if (*it == "EDT") hours= 4; else if (*it == "CST") hours= 6;
+ else if (*it == "CDT") hours= 5; else if (*it == "MST") hours= 7;
+ else if (*it == "MDT") hours= 6; else if (*it == "PST") hours= 8;
+ else if (*it == "PDT") hours= 7;
+ // Non standard names
+ // Standard Time (or Irish Summer Time?) is actually +5.5
+ else if (*it == "CET") hours= -1; else if (*it == "JST") hours= -9;
+ else if (*it == "IST") hours= -5; else if (*it == "WET") hours= 0;
+ else if (*it == "MET") hours= -1;
+ else {
+ DATEDEB((stderr, "Bad rfc822 date format (zname): [%s]\n",
+ dt.c_str()));
+ // Forget tz
+ goto nozone;
+ }
+ zonesecs = 3600 * hours;
}
DATEDEB((stderr, "Tz: [%s] -> %d\n", it->c_str(), zonesecs));
- nozone:
+nozone:
// Compute the UTC Unix time value
#ifndef sun
@@ -792,14 +792,14 @@
static const char *thisprog;
static char usage [] =
-"-p: header value and parameter test\n"
-"-q: qp decoding\n"
-"-b: base64\n"
-"-7: rfc2047\n"
-"-1: rfc2331\n"
-"-t: date time\n"
-" \n\n"
-;
+ "-p: header value and parameter test\n"
+ "-q: qp decoding\n"
+ "-b: base64\n"
+ "-7: rfc2047\n"
+ "-1: rfc2331\n"
+ "-t: date time\n"
+ " \n\n"
+ ;
static void
Usage(void)
{
@@ -809,8 +809,8 @@
static int op_flags;
#define OPT_MOINS 0x1
-#define OPT_p 0x2
-#define OPT_q 0x4
+#define OPT_p 0x2
+#define OPT_q 0x4
#define OPT_b 0x8
#define OPT_7 0x10
#define OPT_1 0x20
@@ -818,159 +818,168 @@
int
main(int argc, const char **argv)
{
- int count = 10;
+ int count = 10;
- thisprog = argv[0];
- argc--; argv++;
-
- while (argc > 0 && **argv == '-') {
- (*argv)++;
- if (!(**argv))
- /* Cas du "adb - core" */
- Usage();
- while (**argv)
- switch (*(*argv)++) {
- case 'p': op_flags |= OPT_p; break;
- case 'q': op_flags |= OPT_q; break;
- case 'b': op_flags |= OPT_b; break;
- case '1': op_flags |= OPT_1; break;
- case '7': op_flags |= OPT_7; break;
- case 't': op_flags |= OPT_t; break;
- default: Usage(); break;
- }
- b1: argc--; argv++;
- }
-
- if (argc != 0)
- Usage();
-
- if (op_flags & OPT_p) {
- // Mime header value and parameters extraction
- const char *tr[] = {
- "text/html;charset = UTF-8 ; otherparam=garb; \n"
- "QUOTEDPARAM=\"quoted value\"",
-
- "text/plain; charset=ASCII\r\n name=\"809D3016_5691DPS_5.2.LIC\"",
-
- "application/x-stuff;"
- "title*0*=us-ascii'en'This%20is%20even%20more%20;"
- "title*1*=%2A%2A%2Afun%2A%2A%2A%20;"
- "title*2=\"isn't it!\""
- };
+ thisprog = argv[0];
+ argc--; argv++;
+
+ while (argc > 0 && **argv == '-') {
+ (*argv)++;
+ if (!(**argv))
+ /* Cas du "adb - core" */
+ Usage();
+ while (**argv)
+ switch (*(*argv)++) {
+ case 'p': op_flags |= OPT_p; break;
+ case 'q': op_flags |= OPT_q; break;
+ case 'b': op_flags |= OPT_b; break;
+ case '1': op_flags |= OPT_1; break;
+ case '7': op_flags |= OPT_7; break;
+ case 't': op_flags |= OPT_t; break;
+ default: Usage(); break;
+ }
+ b1: argc--; argv++;
+ }
+
+ if (argc != 0)
+ Usage();
+
+ if (op_flags & OPT_p) {
+ // Mime header value and parameters extraction
+ const char *tr[] = {
+ "text/html;charset = UTF-8 ; otherparam=garb; \n"
+ "QUOTEDPARAM=\"quoted value\"",
+
+ "text/plain; charset=ASCII\r\n name=\"809D3016_5691DPS_5.2.LIC\"",
+
+ "application/x-stuff;"
+ "title*0*=us-ascii'en'This%20is%20even%20more%20;"
+ "title*1*=%2A%2A%2Afun%2A%2A%2A%20;"
+ "title*2=\"isn't it!\"",
+
+ // The following are all invalid, trying to crash the parser...
+ "",
+ // This does not parse because of whitespace in the value.
+ " complete garbage;",
+ // This parses, but only the first word gets into the value
+ " some value",
+ " word ;", ";", "=", "; = ", "a;=\"toto tutu\"=", ";;;;a=b",
+ };
- for (unsigned int i = 0; i < sizeof(tr) / sizeof(char *); i++) {
- MimeHeaderValue parsed;
- if (!parseMimeHeaderValue(tr[i], parsed)) {
- fprintf(stderr, "PARSE ERROR for [%s]\n", tr[i]);
- }
- printf("Field value: [%s]\n", parsed.value.c_str());
- map<string, string>::iterator it;
- for (it = parsed.params.begin();it != parsed.params.end();it++) {
- if (it == parsed.params.begin())
- printf("Parameters:\n");
- printf(" [%s] = [%s]\n", it->first.c_str(), it->second.c_str());
- }
- }
-
- } else if (op_flags & OPT_q) {
- // Quoted printable stuff
- const char *qp =
- "=41=68 =e0 boire=\r\n continue 1ere\ndeuxieme\n\r3eme "
- "agrave is: '=E0' probable skipped decode error: =\n"
- "Actual decode error =xx this wont show";
-
- string out;
- if (!qp_decode(string(qp), out)) {
- fprintf(stderr, "qp_decode returned error\n");
- }
- printf("Decoded: '%s'\n", out.c_str());
- } else if (op_flags & OPT_b) {
- // Base64
- //'C'est � boire qu'il nous faut �viter l'exc�s.'
- //'Deuxi�me ligne'
- //'Troisi�me ligne'
- //'Et la fin (pas de nl). '
- const char *b64 =
- "Qydlc3Qg4CBib2lyZSBxdSdpbCBub3VzIGZhdXQg6XZpdGVyIGwnZXhj6HMuCkRldXhp6G1l\r\n"
- "IGxpZ25lClRyb2lzaehtZSBsaWduZQpFdCBsYSBmaW4gKHBhcyBkZSBubCkuIA==\r\n";
-
- string out;
- if (!base64_decode(string(b64), out)) {
- fprintf(stderr, "base64_decode returned error\n");
- exit(1);
- }
- printf("Decoded: [%s]\n", out.c_str());
+ for (unsigned int i = 0; i < sizeof(tr) / sizeof(char *); i++) {
+ MimeHeaderValue parsed;
+ if (!parseMimeHeaderValue(tr[i], parsed)) {
+ fprintf(stderr, "PARSE ERROR for [%s]\n", tr[i]);
+ continue;
+ }
+ printf("Field value: [%s]\n", parsed.value.c_str());
+ map<string, string>::iterator it;
+ for (it = parsed.params.begin();it != parsed.params.end();it++) {
+ if (it == parsed.params.begin())
+ printf("Parameters:\n");
+ printf(" [%s] = [%s]\n", it->first.c_str(), it->second.c_str());
+ }
+ }
+
+ } else if (op_flags & OPT_q) {
+ // Quoted printable stuff
+ const char *qp =
+ "=41=68 =e0 boire=\r\n continue 1ere\ndeuxieme\n\r3eme "
+ "agrave is: '=E0' probable skipped decode error: =\n"
+ "Actual decode error =xx this wont show";
+
+ string out;
+ if (!qp_decode(string(qp), out)) {
+ fprintf(stderr, "qp_decode returned error\n");
+ }
+ printf("Decoded: '%s'\n", out.c_str());
+ } else if (op_flags & OPT_b) {
+ // Base64
+ //'C'est � boire qu'il nous faut �viter l'exc�s.'
+ //'Deuxi�me ligne'
+ //'Troisi�me ligne'
+ //'Et la fin (pas de nl). '
+ const char *b64 =
+ "Qydlc3Qg4CBib2lyZSBxdSdpbCBub3VzIGZhdXQg6XZpdGVyIGwnZXhj6HMuCkRldXhp6G1l\r\n"
+ "IGxpZ25lClRyb2lzaehtZSBsaWduZQpFdCBsYSBmaW4gKHBhcyBkZSBubCkuIA==\r\n";
+
+ string out;
+ if (!base64_decode(string(b64), out)) {
+ fprintf(stderr, "base64_decode returned error\n");
+ exit(1);
+ }
+ printf("Decoded: [%s]\n", out.c_str());
#if 0
- string coded, decoded;
- const char *fname = "/tmp/recoll_decodefail";
- if (!file_to_string(fname, coded)) {
- fprintf(stderr, "Cant read %s\n", fname);
- exit(1);
- }
+ string coded, decoded;
+ const char *fname = "/tmp/recoll_decodefail";
+ if (!file_to_string(fname, coded)) {
+ fprintf(stderr, "Cant read %s\n", fname);
+ exit(1);
+ }
- if (!base64_decode(coded, decoded)) {
- fprintf(stderr, "base64_decode returned error\n");
- exit(1);
- }
- printf("Decoded: [%s]\n", decoded.c_str());
+ if (!base64_decode(coded, decoded)) {
+ fprintf(stderr, "base64_decode returned error\n");
+ exit(1);
+ }
+ printf("Decoded: [%s]\n", decoded.c_str());
#endif
- } else if (op_flags & (OPT_7|OPT_1)) {
- // rfc2047
- char line [1024];
- string out;
- bool res;
- while (fgets(line, 1023, stdin)) {
- int l = strlen(line);
- if (l == 0)
- continue;
- line[l-1] = 0;
- fprintf(stderr, "Line: [%s]\n", line);
- string charset;
- if (op_flags & OPT_7) {
- res = rfc2047_decode(line, out);
- } else {
- res = rfc2231_decode(line, out, charset);
- }
- if (res)
- fprintf(stderr, "Out: [%s] cs %s\n", out.c_str(), charset.c_str());
- else
- fprintf(stderr, "Decoding failed\n");
- }
- } else if (op_flags & OPT_t) {
- time_t t;
-
- const char *dates[] = {
- " Wed, 13 Sep 2006 11:40:26 -0700 (PDT)",
- " Mon, 3 Jul 2006 09:51:58 +0200",
- " Wed, 13 Sep 2006 08:19:48 GMT-07:00",
- " Wed, 13 Sep 2006 11:40:26 -0700 (PDT)",
- " Sat, 23 Dec 89 19:27:12 EST",
- " 13 Jan 90 08:23:29 GMT"};
-
- for (unsigned int i = 0; i <sizeof(dates) / sizeof(char *); i++) {
- t = rfc2822DateToUxTime(dates[i]);
- struct tm *tm = localtime(&t);
- char datebuf[100];
- strftime(datebuf, 99, " %Y-%m-%d %H:%M:%S %z", tm);
- printf("[%s] -> [%s]\n", dates[i], datebuf);
- }
- printf("Enter date:\n");
- char line [1024];
- while (fgets(line, 1023, stdin)) {
- int l = strlen(line);
- if (l == 0) continue;
- line[l-1] = 0;
- t = rfc2822DateToUxTime(line);
- struct tm *tm = localtime(&t);
- char datebuf[100];
- strftime(datebuf, 99, " %Y-%m-%d %H:%M:%S %z", tm);
- printf("[%s] -> [%s]\n", line, datebuf);
- }
-
-
- }
- exit(0);
+ } else if (op_flags & (OPT_7|OPT_1)) {
+ // rfc2047
+ char line [1024];
+ string out;
+ bool res;
+ while (fgets(line, 1023, stdin)) {
+ int l = strlen(line);
+ if (l == 0)
+ continue;
+ line[l-1] = 0;
+ fprintf(stderr, "Line: [%s]\n", line);
+ string charset;
+ if (op_flags & OPT_7) {
+ res = rfc2047_decode(line, out);
+ } else {
+ res = rfc2231_decode(line, out, charset);
+ }
+ if (res)
+ fprintf(stderr, "Out: [%s] cs %s\n", out.c_str(), charset.c_str());
+ else
+ fprintf(stderr, "Decoding failed\n");
+ }
+ } else if (op_flags & OPT_t) {
+ time_t t;
+
+ const char *dates[] = {
+ " Wed, 13 Sep 2006 11:40:26 -0700 (PDT)",
+ " Mon, 3 Jul 2006 09:51:58 +0200",
+ " Wed, 13 Sep 2006 08:19:48 GMT-07:00",
+ " Wed, 13 Sep 2006 11:40:26 -0700 (PDT)",
+ " Sat, 23 Dec 89 19:27:12 EST",
+ " 13 Jan 90 08:23:29 GMT"};
+
+ for (unsigned int i = 0; i <sizeof(dates) / sizeof(char *); i++) {
+ t = rfc2822DateToUxTime(dates[i]);
+ struct tm *tm = localtime(&t);
+ char datebuf[100];
+ strftime(datebuf, 99, " %Y-%m-%d %H:%M:%S %z", tm);
+ printf("[%s] -> [%s]\n", dates[i], datebuf);
+ }
+ printf("Enter date:\n");
+ char line [1024];
+ while (fgets(line, 1023, stdin)) {
+ int l = strlen(line);
+ if (l == 0) continue;
+ line[l-1] = 0;
+ t = rfc2822DateToUxTime(line);
+ struct tm *tm = localtime(&t);
+ char datebuf[100];
+ strftime(datebuf, 99, " %Y-%m-%d %H:%M:%S %z", tm);
+ printf("[%s] -> [%s]\n", line, datebuf);
+ }
+
+
+ }
+ exit(0);
}
#endif // TEST_MIMEPARSE