recoll / Code / Diff of /src/utils/mimeparse.cpp

Diff of /src/utils/mimeparse.cpp [38ca57] .. [9de619]

Switch to unified view


#ifndef lint
static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.9 2005-12-16 10:08:08 dockes Exp $ (C) 2004 J.F.Dockes";
#endif

#ifndef TEST_MIMEPARSE

#include <string>
...
    Lexical() : what(none), quote(0) {}
    void reset() {what = none; value.erase(); error.erase();quote = 0;}
};

// Skip mime comment. This must be called with in[start] == '('
string::size_type skip_comment(const string &in, string::size_type start, 
                 Lexical &lex)
{
    int commentlevel = 0;
    for (; start < in.size(); start++) {
    if (in[start] == '\\') {
        // Skip escaped char. 
...
    }
    return start;
}

// Skip initial whitespace and (possibly nested) comments. 
string::size_type skip_whitespace_and_comment(const string &in, 
                        string::size_type start, 
                        Lexical &lex)
{
    while (1) {
    if ((start = in.find_first_not_of(" \t\r\n", start)) == string::npos)
        return in.size();
    if (in[start] == '(') {
...
///   (ie unbalanced quoting)
/// @param in the input string
/// @param start the starting position
/// @param lex  the returned token and its description
/// @param delims separators we should look for
string::size_type find_next_token(const string &in, string::size_type start, 
                Lexical &lex, string delims = ";=")
{
    char oquot, cquot;

    start = skip_whitespace_and_comment(in, start, lex);
    if (start == string::npos || start == in.size())
    return start;

    // Begins with separator ? return it.
    string::size_type delimi = delims.find_first_of(in[start]);
    if (delimi != string::npos) {
    lex.what = Lexical::separator;
    lex.value = delims[delimi];
    return start+1;
    }
...
    default: cquot = 0; break;
    }

    if (cquot != 0) {
    // Quoted string parsing
  string::size_type end;
    start++; // Skip quote character
    for (end = start;end < in.size() && in[end] != cquot; end++) {
        if (in[end] == '\\') {
        // Skip escaped char. 
        if (end+1 < in.size()) {
...
    lex.what = Lexical::token;
    lex.value = in.substr(start, end-start);
    lex.quote = oquot;
    return ++end;
    } else {
    string::size_type end = in.find_first_of(delims + " \t(", start);
    lex.what = Lexical::token;
    lex.quote = 0;
    if (end == string::npos) {
        end = in.size();
        lex.value = in.substr(start);
...
    }
}

void stringtolower(string &out, const string& in)
{
    for (string::size_type i = 0; i < in.size(); i++)
    out.append(1, char(tolower(in[i])));
}

bool parseMimeHeaderValue(const string& value, MimeHeaderValue& parsed)
{
    parsed.value.erase();
    parsed.params.clear();

    Lexical lex;
    string::size_type start = 0;
    start = find_next_token(value, start, lex);
    if (start == string::npos || lex.what != Lexical::token) 
    return false;

    parsed.value = lex.value;
...

// Decode a string encoded with quoted-printable encoding. 
bool qp_decode(const string& in, string &out) 
{
    out.reserve(in.length());
    string::size_type ii;
    for (ii = 0; ii < in.length(); ii++) {
    if (in[ii] == '=') {
        ii++; // Skip '='
        if(ii >= in.length() - 1) { // Need at least 2 more chars
        break;
...
    Rfc2047States state = rfc2047base;
    string encoding, charset, value, utf8;

    out = "";

    for (string::size_type ii = 0; ii < in.length(); ii++) {
    char ch = in[ii];
    switch (state) {
    case rfc2047base: 
        {
        switch (ch) {

	a/src/utils/mimeparse.cpp		b/src/utils/mimeparse.cpp
1	#ifndef lint	1	#ifndef lint
2	static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.8 2005-11-24 07:16:16 dockes Exp $ (C) 2004 J.F.Dockes";	2	static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.9 2005-12-16 10:08:08 dockes Exp $ (C) 2004 J.F.Dockes";
3	#endif	3	#endif
4		4
5	#ifndef TEST_MIMEPARSE	5	#ifndef TEST_MIMEPARSE
6		6
7	#include <string>	7	#include <string>
	...		...
36	Lexical() : what(none), quote(0) {}	36	Lexical() : what(none), quote(0) {}
37	void reset() {what = none; value.erase(); error.erase();quote = 0;}	37	void reset() {what = none; value.erase(); error.erase();quote = 0;}
38	};	38	};
39		39
40	// Skip mime comment. This must be called with in[start] == '('	40	// Skip mime comment. This must be called with in[start] == '('
41	int skip_comment(const string &in, unsigned int start, Lexical &lex)	41	string::size_type skip_comment(const string &in, string::size_type start,
		42	Lexical &lex)
42	{	43	{
43	int commentlevel = 0;	44	int commentlevel = 0;
44	for (; start < in.size(); start++) {	45	for (; start < in.size(); start++) {
45	if (in[start] == '\\') {	46	if (in[start] == '\\') {
46	// Skip escaped char.	47	// Skip escaped char.
	...		...
65	}	66	}
66	return start;	67	return start;
67	}	68	}
68		69
69	// Skip initial whitespace and (possibly nested) comments.	70	// Skip initial whitespace and (possibly nested) comments.
70	int skip_whitespace_and_comment(const string &in, unsigned int start,	71	string::size_type skip_whitespace_and_comment(const string &in,
		72	string::size_type start,
71	Lexical &lex)	73	Lexical &lex)
72	{	74	{
73	while (1) {	75	while (1) {
74	if ((start = in.find_first_not_of(" \t\r\n", start)) == string::npos)	76	if ((start = in.find_first_not_of(" \t\r\n", start)) == string::npos)
75	return in.size();	77	return in.size();
76	if (in[start] == '(') {	78	if (in[start] == '(') {
	...		...
88	/// (ie unbalanced quoting)	90	/// (ie unbalanced quoting)
89	/// @param in the input string	91	/// @param in the input string
90	/// @param start the starting position	92	/// @param start the starting position
91	/// @param lex the returned token and its description	93	/// @param lex the returned token and its description
92	/// @param delims separators we should look for	94	/// @param delims separators we should look for
93	int find_next_token(const string &in, unsigned int start,	95	string::size_type find_next_token(const string &in, string::size_type start,
94	Lexical &lex, string delims = ";=")	96	Lexical &lex, string delims = ";=")
95	{	97	{
96	char oquot, cquot;	98	char oquot, cquot;
97		99
98	start = skip_whitespace_and_comment(in, start, lex);	100	start = skip_whitespace_and_comment(in, start, lex);
99	if (start == string::npos \|\| start == in.size())	101	if (start == string::npos \|\| start == in.size())
100	return start;	102	return start;
101		103
102	// Begins with separator ? return it.	104	// Begins with separator ? return it.
103	unsigned int delimi = delims.find_first_of(in[start]);	105	string::size_type delimi = delims.find_first_of(in[start]);
104	if (delimi != string::npos) {	106	if (delimi != string::npos) {
105	lex.what = Lexical::separator;	107	lex.what = Lexical::separator;
106	lex.value = delims[delimi];	108	lex.value = delims[delimi];
107	return start+1;	109	return start+1;
108	}	110	}
	...		...
115	default: cquot = 0; break;	117	default: cquot = 0; break;
116	}	118	}
117		119
118	if (cquot != 0) {	120	if (cquot != 0) {
119	// Quoted string parsing	121	// Quoted string parsing
120	unsigned int end;	122	string::size_type end;
121	start++; // Skip quote character	123	start++; // Skip quote character
122	for (end = start;end < in.size() && in[end] != cquot; end++) {	124	for (end = start;end < in.size() && in[end] != cquot; end++) {
123	if (in[end] == '\\') {	125	if (in[end] == '\\') {
124	// Skip escaped char.	126	// Skip escaped char.
125	if (end+1 < in.size()) {	127	if (end+1 < in.size()) {
	...		...
139	lex.what = Lexical::token;	141	lex.what = Lexical::token;
140	lex.value = in.substr(start, end-start);	142	lex.value = in.substr(start, end-start);
141	lex.quote = oquot;	143	lex.quote = oquot;
142	return ++end;	144	return ++end;
143	} else {	145	} else {
144	unsigned int end = in.find_first_of(delims + " \t(", start);	146	string::size_type end = in.find_first_of(delims + " \t(", start);
145	lex.what = Lexical::token;	147	lex.what = Lexical::token;
146	lex.quote = 0;	148	lex.quote = 0;
147	if (end == string::npos) {	149	if (end == string::npos) {
148	end = in.size();	150	end = in.size();
149	lex.value = in.substr(start);	151	lex.value = in.substr(start);
	...		...
154	}	156	}
155	}	157	}
156		158
157	void stringtolower(string &out, const string& in)	159	void stringtolower(string &out, const string& in)
158	{	160	{
159	for (unsigned int i = 0; i < in.size(); i++)	161	for (string::size_type i = 0; i < in.size(); i++)
160	out.append(1, char(tolower(in[i])));	162	out.append(1, char(tolower(in[i])));
161	}	163	}
162		164
163	bool parseMimeHeaderValue(const string& value, MimeHeaderValue& parsed)	165	bool parseMimeHeaderValue(const string& value, MimeHeaderValue& parsed)
164	{	166	{
165	parsed.value.erase();	167	parsed.value.erase();
166	parsed.params.clear();	168	parsed.params.clear();
167		169
168	Lexical lex;	170	Lexical lex;
169	unsigned int start = 0;	171	string::size_type start = 0;
170	start = find_next_token(value, start, lex);	172	start = find_next_token(value, start, lex);
171	if (start == string::npos \|\| lex.what != Lexical::token)	173	if (start == string::npos \|\| lex.what != Lexical::token)
172	return false;	174	return false;
173		175
174	parsed.value = lex.value;	176	parsed.value = lex.value;
	...		...
203		205
204	// Decode a string encoded with quoted-printable encoding.	206	// Decode a string encoded with quoted-printable encoding.
205	bool qp_decode(const string& in, string &out)	207	bool qp_decode(const string& in, string &out)
206	{	208	{
207	out.reserve(in.length());	209	out.reserve(in.length());
208	unsigned int ii;	210	string::size_type ii;
209	for (ii = 0; ii < in.length(); ii++) {	211	for (ii = 0; ii < in.length(); ii++) {
210	if (in[ii] == '=') {	212	if (in[ii] == '=') {
211	ii++; // Skip '='	213	ii++; // Skip '='
212	if(ii >= in.length() - 1) { // Need at least 2 more chars	214	if(ii >= in.length() - 1) { // Need at least 2 more chars
213	break;	215	break;
	...		...
298	Rfc2047States state = rfc2047base;	300	Rfc2047States state = rfc2047base;
299	string encoding, charset, value, utf8;	301	string encoding, charset, value, utf8;
300		302
301	out = "";	303	out = "";
302		304
303	for (unsigned int ii = 0; ii < in.length(); ii++) {	305	for (string::size_type ii = 0; ii < in.length(); ii++) {
304	char ch = in[ii];	306	char ch = in[ii];
305	switch (state) {	307	switch (state) {
306	case rfc2047base:	308	case rfc2047base:
307	{	309	{
308	switch (ch) {	310	switch (ch) {