|
a/src/utils/mimeparse.cpp |
|
b/src/utils/mimeparse.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.8 2005-11-24 07:16:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.9 2005-12-16 10:08:08 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
|
4 |
|
5 |
#ifndef TEST_MIMEPARSE
|
5 |
#ifndef TEST_MIMEPARSE
|
6 |
|
6 |
|
7 |
#include <string>
|
7 |
#include <string>
|
|
... |
|
... |
36 |
Lexical() : what(none), quote(0) {}
|
36 |
Lexical() : what(none), quote(0) {}
|
37 |
void reset() {what = none; value.erase(); error.erase();quote = 0;}
|
37 |
void reset() {what = none; value.erase(); error.erase();quote = 0;}
|
38 |
};
|
38 |
};
|
39 |
|
39 |
|
40 |
// Skip mime comment. This must be called with in[start] == '('
|
40 |
// Skip mime comment. This must be called with in[start] == '('
|
41 |
int skip_comment(const string &in, unsigned int start, Lexical &lex)
|
41 |
string::size_type skip_comment(const string &in, string::size_type start,
|
|
|
42 |
Lexical &lex)
|
42 |
{
|
43 |
{
|
43 |
int commentlevel = 0;
|
44 |
int commentlevel = 0;
|
44 |
for (; start < in.size(); start++) {
|
45 |
for (; start < in.size(); start++) {
|
45 |
if (in[start] == '\\') {
|
46 |
if (in[start] == '\\') {
|
46 |
// Skip escaped char.
|
47 |
// Skip escaped char.
|
|
... |
|
... |
65 |
}
|
66 |
}
|
66 |
return start;
|
67 |
return start;
|
67 |
}
|
68 |
}
|
68 |
|
69 |
|
69 |
// Skip initial whitespace and (possibly nested) comments.
|
70 |
// Skip initial whitespace and (possibly nested) comments.
|
70 |
int skip_whitespace_and_comment(const string &in, unsigned int start,
|
71 |
string::size_type skip_whitespace_and_comment(const string &in,
|
|
|
72 |
string::size_type start,
|
71 |
Lexical &lex)
|
73 |
Lexical &lex)
|
72 |
{
|
74 |
{
|
73 |
while (1) {
|
75 |
while (1) {
|
74 |
if ((start = in.find_first_not_of(" \t\r\n", start)) == string::npos)
|
76 |
if ((start = in.find_first_not_of(" \t\r\n", start)) == string::npos)
|
75 |
return in.size();
|
77 |
return in.size();
|
76 |
if (in[start] == '(') {
|
78 |
if (in[start] == '(') {
|
|
... |
|
... |
88 |
/// (ie unbalanced quoting)
|
90 |
/// (ie unbalanced quoting)
|
89 |
/// @param in the input string
|
91 |
/// @param in the input string
|
90 |
/// @param start the starting position
|
92 |
/// @param start the starting position
|
91 |
/// @param lex the returned token and its description
|
93 |
/// @param lex the returned token and its description
|
92 |
/// @param delims separators we should look for
|
94 |
/// @param delims separators we should look for
|
93 |
int find_next_token(const string &in, unsigned int start,
|
95 |
string::size_type find_next_token(const string &in, string::size_type start,
|
94 |
Lexical &lex, string delims = ";=")
|
96 |
Lexical &lex, string delims = ";=")
|
95 |
{
|
97 |
{
|
96 |
char oquot, cquot;
|
98 |
char oquot, cquot;
|
97 |
|
99 |
|
98 |
start = skip_whitespace_and_comment(in, start, lex);
|
100 |
start = skip_whitespace_and_comment(in, start, lex);
|
99 |
if (start == string::npos || start == in.size())
|
101 |
if (start == string::npos || start == in.size())
|
100 |
return start;
|
102 |
return start;
|
101 |
|
103 |
|
102 |
// Begins with separator ? return it.
|
104 |
// Begins with separator ? return it.
|
103 |
unsigned int delimi = delims.find_first_of(in[start]);
|
105 |
string::size_type delimi = delims.find_first_of(in[start]);
|
104 |
if (delimi != string::npos) {
|
106 |
if (delimi != string::npos) {
|
105 |
lex.what = Lexical::separator;
|
107 |
lex.what = Lexical::separator;
|
106 |
lex.value = delims[delimi];
|
108 |
lex.value = delims[delimi];
|
107 |
return start+1;
|
109 |
return start+1;
|
108 |
}
|
110 |
}
|
|
... |
|
... |
115 |
default: cquot = 0; break;
|
117 |
default: cquot = 0; break;
|
116 |
}
|
118 |
}
|
117 |
|
119 |
|
118 |
if (cquot != 0) {
|
120 |
if (cquot != 0) {
|
119 |
// Quoted string parsing
|
121 |
// Quoted string parsing
|
120 |
unsigned int end;
|
122 |
string::size_type end;
|
121 |
start++; // Skip quote character
|
123 |
start++; // Skip quote character
|
122 |
for (end = start;end < in.size() && in[end] != cquot; end++) {
|
124 |
for (end = start;end < in.size() && in[end] != cquot; end++) {
|
123 |
if (in[end] == '\\') {
|
125 |
if (in[end] == '\\') {
|
124 |
// Skip escaped char.
|
126 |
// Skip escaped char.
|
125 |
if (end+1 < in.size()) {
|
127 |
if (end+1 < in.size()) {
|
|
... |
|
... |
139 |
lex.what = Lexical::token;
|
141 |
lex.what = Lexical::token;
|
140 |
lex.value = in.substr(start, end-start);
|
142 |
lex.value = in.substr(start, end-start);
|
141 |
lex.quote = oquot;
|
143 |
lex.quote = oquot;
|
142 |
return ++end;
|
144 |
return ++end;
|
143 |
} else {
|
145 |
} else {
|
144 |
unsigned int end = in.find_first_of(delims + " \t(", start);
|
146 |
string::size_type end = in.find_first_of(delims + " \t(", start);
|
145 |
lex.what = Lexical::token;
|
147 |
lex.what = Lexical::token;
|
146 |
lex.quote = 0;
|
148 |
lex.quote = 0;
|
147 |
if (end == string::npos) {
|
149 |
if (end == string::npos) {
|
148 |
end = in.size();
|
150 |
end = in.size();
|
149 |
lex.value = in.substr(start);
|
151 |
lex.value = in.substr(start);
|
|
... |
|
... |
154 |
}
|
156 |
}
|
155 |
}
|
157 |
}
|
156 |
|
158 |
|
157 |
void stringtolower(string &out, const string& in)
|
159 |
void stringtolower(string &out, const string& in)
|
158 |
{
|
160 |
{
|
159 |
for (unsigned int i = 0; i < in.size(); i++)
|
161 |
for (string::size_type i = 0; i < in.size(); i++)
|
160 |
out.append(1, char(tolower(in[i])));
|
162 |
out.append(1, char(tolower(in[i])));
|
161 |
}
|
163 |
}
|
162 |
|
164 |
|
163 |
bool parseMimeHeaderValue(const string& value, MimeHeaderValue& parsed)
|
165 |
bool parseMimeHeaderValue(const string& value, MimeHeaderValue& parsed)
|
164 |
{
|
166 |
{
|
165 |
parsed.value.erase();
|
167 |
parsed.value.erase();
|
166 |
parsed.params.clear();
|
168 |
parsed.params.clear();
|
167 |
|
169 |
|
168 |
Lexical lex;
|
170 |
Lexical lex;
|
169 |
unsigned int start = 0;
|
171 |
string::size_type start = 0;
|
170 |
start = find_next_token(value, start, lex);
|
172 |
start = find_next_token(value, start, lex);
|
171 |
if (start == string::npos || lex.what != Lexical::token)
|
173 |
if (start == string::npos || lex.what != Lexical::token)
|
172 |
return false;
|
174 |
return false;
|
173 |
|
175 |
|
174 |
parsed.value = lex.value;
|
176 |
parsed.value = lex.value;
|
|
... |
|
... |
203 |
|
205 |
|
204 |
// Decode a string encoded with quoted-printable encoding.
|
206 |
// Decode a string encoded with quoted-printable encoding.
|
205 |
bool qp_decode(const string& in, string &out)
|
207 |
bool qp_decode(const string& in, string &out)
|
206 |
{
|
208 |
{
|
207 |
out.reserve(in.length());
|
209 |
out.reserve(in.length());
|
208 |
unsigned int ii;
|
210 |
string::size_type ii;
|
209 |
for (ii = 0; ii < in.length(); ii++) {
|
211 |
for (ii = 0; ii < in.length(); ii++) {
|
210 |
if (in[ii] == '=') {
|
212 |
if (in[ii] == '=') {
|
211 |
ii++; // Skip '='
|
213 |
ii++; // Skip '='
|
212 |
if(ii >= in.length() - 1) { // Need at least 2 more chars
|
214 |
if(ii >= in.length() - 1) { // Need at least 2 more chars
|
213 |
break;
|
215 |
break;
|
|
... |
|
... |
298 |
Rfc2047States state = rfc2047base;
|
300 |
Rfc2047States state = rfc2047base;
|
299 |
string encoding, charset, value, utf8;
|
301 |
string encoding, charset, value, utf8;
|
300 |
|
302 |
|
301 |
out = "";
|
303 |
out = "";
|
302 |
|
304 |
|
303 |
for (unsigned int ii = 0; ii < in.length(); ii++) {
|
305 |
for (string::size_type ii = 0; ii < in.length(); ii++) {
|
304 |
char ch = in[ii];
|
306 |
char ch = in[ii];
|
305 |
switch (state) {
|
307 |
switch (state) {
|
306 |
case rfc2047base:
|
308 |
case rfc2047base:
|
307 |
{
|
309 |
{
|
308 |
switch (ch) {
|
310 |
switch (ch) {
|