recoll / Code / Diff of /src/utils/mimeparse.cpp

Diff of /src/utils/mimeparse.cpp [623065] .. [98688b]

Switch to unified view


...
#define DATEDEB(X)
#endif

// Convert rfc822 date to unix time. A date string normally looks like:
//  Mon, 3 Jul 2006 09:51:58 +0200
// But there are many close common variations
// And also hopeless things like: Fri Nov  3 13:13:33 2006
time_t rfc2822DateToUxTime(const string& dt)
{
    // Strip everything up to first comma if any, we don't need weekday,
    // then break into tokens
    list<string> toks;
...
        return (time_t)-1;
    }
    string date = dt.substr(idx+1);
    stringToTokens(date, toks, " \t:");
    } else {
        // No comma. Enter strangeland
    stringToTokens(dt, toks, " \t:");
        // Test for date like: Sun Nov 19 06:18:41 2006
        //                      0   1  2   3 4  5  6
        // and change to:      19 Nov 2006 06:18:41
        if (toks.size() == 7) {
            list<string>::iterator it0 = toks.begin();
            if (it0->length() == 3 &&
                it0->find_first_of("0123456789") == string::npos) {
                list<string>::iterator it2 = it0;
                for (int i = 0; i < 2; i++) it2++;
                list<string>::iterator it6 = it2;
                for (int i = 0; i < 4; i++) it6++;
                iter_swap(it0, it2);
                iter_swap(it6, it2);
                toks.erase(it6);
            }
        }
    }

#if DEBUGDATE
    for (list<string>::iterator it = toks.begin(); it != toks.end(); it++) {
    DATEDEB((stderr, "[%s] ", it->c_str()));
    }
    DATEDEB((stderr, "\n"));
#endif

    if (toks.size() < 6) {
  DATEDEB((stderr, "Bad rfc822 date format (toks cnt): [%s]\n", 
       dt.c_str()));
  return (time_t)-1;
    }

    if (toks.size() == 6) {
    // Probably no timezone, sometimes happens
    toks.push_back("+0000");
    }







    struct tm tm;
    memset(&tm, 0, sizeof(tm));

    // Load struct tm with appropriate tokens, possibly converting
    // when needed
...
         dt.c_str()));
    return (time_t)-1;
    }
    it++;

    // Year. Struct tm counts from 1900. 2 char years are quite rare
    // but do happen. I've seen 00 happen so count small values from 2000
    tm.tm_year = atoi(it->c_str());
    if (it->length() == 2) {
        if (tm.tm_year < 10)
            tm.tm_year += 2000;
        else
            tm.tm_year += 1900;
    }
    if (tm.tm_year > 1900)
    tm.tm_year -= 1900;
    it++;

    // Hour minute second need no adjustments

	a/src/utils/mimeparse.cpp		b/src/utils/mimeparse.cpp
	...		...
598	#define DATEDEB(X)	598	#define DATEDEB(X)
599	#endif	599	#endif
600		600
601	// Convert rfc822 date to unix time. A date string normally looks like:	601	// Convert rfc822 date to unix time. A date string normally looks like:
602	// Mon, 3 Jul 2006 09:51:58 +0200	602	// Mon, 3 Jul 2006 09:51:58 +0200
603	// But there are many common variations	603	// But there are many close common variations
604	//	604	// And also hopeless things like: Fri Nov 3 13:13:33 2006
605	time_t rfc2822DateToUxTime(const string& dt)	605	time_t rfc2822DateToUxTime(const string& dt)
606	{	606	{
607	// Strip everything up to first comma if any, we don't need weekday,	607	// Strip everything up to first comma if any, we don't need weekday,
608	// then break into tokens	608	// then break into tokens
609	list<string> toks;	609	list<string> toks;
	...		...
615	return (time_t)-1;	615	return (time_t)-1;
616	}	616	}
617	string date = dt.substr(idx+1);	617	string date = dt.substr(idx+1);
618	stringToTokens(date, toks, " \t:");	618	stringToTokens(date, toks, " \t:");
619	} else {	619	} else {
		620	// No comma. Enter strangeland
620	stringToTokens(dt, toks, " \t:");	621	stringToTokens(dt, toks, " \t:");
		622	// Test for date like: Sun Nov 19 06:18:41 2006
		623	// 0 1 2 3 4 5 6
		624	// and change to: 19 Nov 2006 06:18:41
		625	if (toks.size() == 7) {
		626	list<string>::iterator it0 = toks.begin();
		627	if (it0->length() == 3 &&
		628	it0->find_first_of("0123456789") == string::npos) {
		629	list<string>::iterator it2 = it0;
		630	for (int i = 0; i < 2; i++) it2++;
		631	list<string>::iterator it6 = it2;
		632	for (int i = 0; i < 4; i++) it6++;
		633	iter_swap(it0, it2);
		634	iter_swap(it6, it2);
		635	toks.erase(it6);
		636	}
		637	}
621	}	638	}
622		639
623	#if DEBUGDATE	640	#if DEBUGDATE
624	for (list<string>::iterator it = toks.begin(); it != toks.end(); it++) {	641	for (list<string>::iterator it = toks.begin(); it != toks.end(); it++) {
625	DATEDEB((stderr, "[%s] ", it->c_str()));	642	DATEDEB((stderr, "[%s] ", it->c_str()));
626	}	643	}
627	DATEDEB((stderr, "\n"));	644	DATEDEB((stderr, "\n"));
628	#endif	645	#endif
629		646
		647	if (toks.size() < 6) {
		648	DATEDEB((stderr, "Bad rfc822 date format (toks cnt): [%s]\n",
		649	dt.c_str()));
		650	return (time_t)-1;
		651	}
		652
630	if (toks.size() == 6) {	653	if (toks.size() == 6) {
631	// Probably no timezone, sometimes happens	654	// Probably no timezone, sometimes happens
632	toks.push_back("+0000");	655	toks.push_back("+0000");
633	}	656	}
634		657
635	if (toks.size() < 7) {
636	DATEDEB((stderr, "Bad rfc822 date format (toks cnt): [%s]\n",
637	dt.c_str()));
638	return (time_t)-1;
639	}
640
641	struct tm tm;	658	struct tm tm;
642	memset(&tm, 0, sizeof(tm));	659	memset(&tm, 0, sizeof(tm));
643		660
644	// Load struct tm with appropriate tokens, possibly converting	661	// Load struct tm with appropriate tokens, possibly converting
645	// when needed	662	// when needed
	...		...
668	dt.c_str()));	685	dt.c_str()));
669	return (time_t)-1;	686	return (time_t)-1;
670	}	687	}
671	it++;	688	it++;
672		689
673	// Year. Struct tm counts from 1900	690	// Year. Struct tm counts from 1900. 2 char years are quite rare
		691	// but do happen. I've seen 00 happen so count small values from 2000
674	tm.tm_year = atoi(it->c_str());	692	tm.tm_year = atoi(it->c_str());
		693	if (it->length() == 2) {
		694	if (tm.tm_year < 10)
		695	tm.tm_year += 2000;
		696	else
		697	tm.tm_year += 1900;
		698	}
675	if (tm.tm_year > 1900)	699	if (tm.tm_year > 1900)
676	tm.tm_year -= 1900;	700	tm.tm_year -= 1900;
677	it++;	701	it++;
678		702
679	// Hour minute second need no adjustments	703	// Hour minute second need no adjustments