recoll / Code / Diff of /src/bincimapmime/mime-parsefull.cc

Diff of /src/bincimapmime/mime-parsefull.cc [138427] .. [bfc651]

Switch to unified view


...

#ifndef NO_NAMESPACES
using namespace ::std;
#endif /* NO_NAMESPACES */

#undef MPF
#ifdef MPF
#define MPFDEB(X) fprintf X
#else
#define MPFDEB(X)
#endif

//------------------------------------------------------------------------
void Binc::MimeDocument::parseFull(int fd) const
{
  if (allIsParsed)
    return;
...
  delimiterqueue = 0;

  return foundBoundary;
}

// JFD: Things we do after finding a boundary (something like CRLF--somestring)
// Need to see if this is a final one (with an additional -- at the end),
// and need to check if it is immediately followed by another boundary 
// (in this case, we give up our final CRLF in its favour)
static inline void postBoundaryProcessing(bool *eof,
                    unsigned int *nlines,
                    int *boundarysize,
                    bool *foundendofpart)
{
    // Read two more characters. This may be CRLF, it may be "--" and
    // it may be any other two characters.
    char a = '\0';
    if (!mimeSource->getChar(&a))
      *eof = true;
    if (a == '\n')
      ++*nlines; 

    char b = '\0';
    if (!mimeSource->getChar(&b))
      *eof = true;
    if (b == '\n')
      ++*nlines;
    
    // If eof, we're done here
    if (*eof)
      return;

    // If we find two dashes after the boundary, then this is the end
    // of boundary marker, and we need to get 2 more chars
    if (a == '-' && b == '-') {
      *foundendofpart = true;
      *boundarysize += 2;
  
      if (!mimeSource->getChar(&a))
  *eof = true;
      if (a == '\n')
  ++*nlines; 
  
      if (!mimeSource->getChar(&b))
  *eof = true;
      if (b == '\n')
  ++*nlines;
    }

    // If the boundary is followed by CRLF, we need to handle the
    // special case where another boundary line follows
    // immediately. In this case we consider the CRLF to be part of
    // the NEXT boundary.
    if (a == '\r' && b == '\n') {
      // Get 2 more
      if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b)) {
  *eof = true; 
      } else if (a == '-' && b == '-') {
  MPFDEB((stderr, "BINC: consecutive delimiters, giving up CRLF\n"));
  mimeSource->ungetChar();
  mimeSource->ungetChar();
  mimeSource->ungetChar();
  mimeSource->ungetChar();
      } else {
  // We unget the 2 chars, and keep our crlf (increasing our own size)
  MPFDEB((stderr, "BINC: keeping my CRLF\n"));
  mimeSource->ungetChar();
  mimeSource->ungetChar();
  *boundarysize += 2;
      }

    } else {
      // Boundary string not followed by CRLF, don't read more and let
      // others skip the rest. Note that this is allowed but quite uncommon
      mimeSource->ungetChar();
      mimeSource->ungetChar();
    }
}

static void parseMultipart(const string &boundary,
               const string &toboundary,
               bool *eof,
               unsigned int *nlines,
               int *boundarysize,
               bool *foundendofpart,
               unsigned int *bodylength,
               vector<Binc::MimePart> *members)
{
  MPFDEB((stderr, "BINC: ParseMultipart: boundary [%s], toboundary[%s]\n", 
    boundary.c_str(),
    toboundary.c_str()));
  using namespace ::Binc;
  unsigned int bodystartoffsetcrlf = mimeSource->getOffset();

  // multipart parsing starts with skipping to the first
  // boundary. then we call parse() for all parts. the last parse()
...
  skipUntilBoundary(delimiter, nlines, eof);

  if (!eof)
    *boundarysize = delimiter.size();

  postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);


























































  // read all mime parts.
  if (!*foundendofpart && !*eof) {
    bool quit = false;
    do {
...
    // boundary. then we call parse() for all parts. the last parse()
    // command will return a code indicating that it found the last
    // boundary of this multipart. Note that the first boundary does
    // not have to start with CRLF.
    string delimiter = "\r\n--" + toboundary;

    skipUntilBoundary(delimiter, nlines, eof);

    if (!*eof)
      *boundarysize = delimiter.size();

    postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);

























































  }

  // make sure bodylength doesn't overflow    
  *bodylength = mimeSource->getOffset();
  if (*bodylength >= bodystartoffsetcrlf) {
...
      *bodylength = 0;
    }
  } else {
    *bodylength = 0;
  }
  MPFDEB((stderr, "BINC: ParseMultipart return\n"));
}

static void parseSinglePart(const string &toboundary,
                int *boundarysize,
                unsigned int *nbodylines,
                unsigned int *nlines,
                bool *eof, bool *foundendofpart,
                unsigned int *bodylength)
{
  MPFDEB((stderr, "BINC: parseSinglePart, boundary [%s]\n", 
    toboundary.c_str()));
  using namespace ::Binc;
  unsigned int bodystartoffsetcrlf = mimeSource->getOffset();

  // If toboundary is empty, then we read until the end of the
  // file. Otherwise we will read until we encounter toboundary.
...
  }

  delete [] boundaryqueue;

  if (toboundary != "") {
    postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);

















































  } else {
    // Recoll: in the case of a multipart body with a null
    // boundary (probably illegal but wtf), eof was not set and
    // multipart went into a loop until bad alloc.
    *eof = true;
  }

  // make sure bodylength doesn't overflow    
  *bodylength = mimeSource->getOffset();
...
      *bodylength = 0;
    }
  } else {
    *bodylength = 0;
  }
  MPFDEB((stderr, "BINC: parseSimple ret: bodylength %d, boundarysize %d\n",
    *bodylength, *boundarysize));
}

//------------------------------------------------------------------------
int Binc::MimePart::parseFull(const string &toboundary,
                  int &boundarysize) const
{
  MPFDEB((stderr, "BINC: parsefull, toboundary[%s]\n", toboundary.c_str()));
  headerstartoffsetcrlf = mimeSource->getOffset();

  // Parse the header of this mime part.
  parseHeader(&h, &nlines);

...
  } else {
    parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines,
            &eof, &foundendofpart, &bodylength);
  }

  MPFDEB((stderr, "BINC: parsefull ret, toboundary[%s]\n", toboundary.c_str()));
  return (eof || foundendofpart) ? 1 : 0;
}

	a/src/bincimapmime/mime-parsefull.cc		b/src/bincimapmime/mime-parsefull.cc
	...		...
46		46
47	#ifndef NO_NAMESPACES	47	#ifndef NO_NAMESPACES
48	using namespace ::std;	48	using namespace ::std;
49	#endif /* NO_NAMESPACES */	49	#endif /* NO_NAMESPACES */
50		50
		51	#undef MPF
		52	#ifdef MPF
		53	#define MPFDEB(X) fprintf X
		54	#else
		55	#define MPFDEB(X)
		56	#endif
		57
51	//------------------------------------------------------------------------	58	//------------------------------------------------------------------------
52	void Binc::MimeDocument::parseFull(int fd) const	59	void Binc::MimeDocument::parseFull(int fd) const
53	{	60	{
54	if (allIsParsed)	61	if (allIsParsed)
55	return;	62	return;
	...		...
345	delimiterqueue = 0;	352	delimiterqueue = 0;
346		353
347	return foundBoundary;	354	return foundBoundary;
348	}	355	}
349		356
		357	// JFD: Things we do after finding a boundary (something like CRLF--somestring)
		358	// Need to see if this is a final one (with an additional -- at the end),
		359	// and need to check if it is immediately followed by another boundary
		360	// (in this case, we give up our final CRLF in its favour)
		361	static inline void postBoundaryProcessing(bool *eof,
		362	unsigned int *nlines,
		363	int *boundarysize,
		364	bool *foundendofpart)
		365	{
		366	// Read two more characters. This may be CRLF, it may be "--" and
		367	// it may be any other two characters.
		368	char a = '\0';
		369	if (!mimeSource->getChar(&a))
		370	*eof = true;
		371	if (a == '\n')
		372	++*nlines;
		373
		374	char b = '\0';
		375	if (!mimeSource->getChar(&b))
		376	*eof = true;
		377	if (b == '\n')
		378	++*nlines;
		379
		380	// If eof, we're done here
		381	if (*eof)
		382	return;
		383
		384	// If we find two dashes after the boundary, then this is the end
		385	// of boundary marker, and we need to get 2 more chars
		386	if (a == '-' && b == '-') {
		387	*foundendofpart = true;
		388	*boundarysize += 2;
		389
		390	if (!mimeSource->getChar(&a))
		391	*eof = true;
		392	if (a == '\n')
		393	++*nlines;
		394
		395	if (!mimeSource->getChar(&b))
		396	*eof = true;
		397	if (b == '\n')
		398	++*nlines;
		399	}
		400
		401	// If the boundary is followed by CRLF, we need to handle the
		402	// special case where another boundary line follows
		403	// immediately. In this case we consider the CRLF to be part of
		404	// the NEXT boundary.
		405	if (a == '\r' && b == '\n') {
		406	// Get 2 more
		407	if (!mimeSource->getChar(&a) \|\| !mimeSource->getChar(&b)) {
		408	*eof = true;
		409	} else if (a == '-' && b == '-') {
		410	MPFDEB((stderr, "BINC: consecutive delimiters, giving up CRLF\n"));
		411	mimeSource->ungetChar();
		412	mimeSource->ungetChar();
		413	mimeSource->ungetChar();
		414	mimeSource->ungetChar();
		415	} else {
		416	// We unget the 2 chars, and keep our crlf (increasing our own size)
		417	MPFDEB((stderr, "BINC: keeping my CRLF\n"));
		418	mimeSource->ungetChar();
		419	mimeSource->ungetChar();
		420	*boundarysize += 2;
		421	}
		422
		423	} else {
		424	// Boundary string not followed by CRLF, don't read more and let
		425	// others skip the rest. Note that this is allowed but quite uncommon
		426	mimeSource->ungetChar();
		427	mimeSource->ungetChar();
		428	}
		429	}
350		430
351	static void parseMultipart(const string &boundary,	431	static void parseMultipart(const string &boundary,
352	const string &toboundary,	432	const string &toboundary,
353	bool *eof,	433	bool *eof,
354	unsigned int *nlines,	434	unsigned int *nlines,
355	int *boundarysize,	435	int *boundarysize,
356	bool *foundendofpart,	436	bool *foundendofpart,
357	unsigned int *bodylength,	437	unsigned int *bodylength,
358	vector<Binc::MimePart> *members)	438	vector<Binc::MimePart> *members)
359	{	439	{
		440	MPFDEB((stderr, "BINC: ParseMultipart: boundary [%s], toboundary[%s]\n",
		441	boundary.c_str(),
		442	toboundary.c_str()));
360	using namespace ::Binc;	443	using namespace ::Binc;
361	unsigned int bodystartoffsetcrlf = mimeSource->getOffset();	444	unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
362		445
363	// multipart parsing starts with skipping to the first	446	// multipart parsing starts with skipping to the first
364	// boundary. then we call parse() for all parts. the last parse()	447	// boundary. then we call parse() for all parts. the last parse()
	...		...
370	skipUntilBoundary(delimiter, nlines, eof);	453	skipUntilBoundary(delimiter, nlines, eof);
371		454
372	if (!eof)	455	if (!eof)
373	*boundarysize = delimiter.size();	456	*boundarysize = delimiter.size();
374		457
375	// Read two more characters. This may be CRLF, it may be "--" and	458	postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
376	// it may be any other two characters.
377	char a;
378	if (!mimeSource->getChar(&a))
379	*eof = true;
380
381	if (a == '\n')
382	++*nlines;
383
384	char b;
385	if (!mimeSource->getChar(&b))
386	*eof = true;
387
388	if (b == '\n')
389	++*nlines;
390
391	// If we find two dashes after the boundary, then this is the end
392	// of boundary marker.
393	if (!*eof) {
394	if (a == '-' && b == '-') {
395	*foundendofpart = true;
396	*boundarysize += 2;
397
398	if (!mimeSource->getChar(&a))
399	*eof = true;
400
401	if (a == '\n')
402	++*nlines;
403
404	if (!mimeSource->getChar(&b))
405	*eof = true;
406
407	if (b == '\n')
408	++*nlines;
409	}
410
411	if (a == '\r' && b == '\n') {
412	// This exception is to handle a special case where the
413	// delimiter of one part is not followed by CRLF, but
414	// immediately followed by a CRLF prefixed delimiter.
415	if (!mimeSource->getChar(&a) \|\| !mimeSource->getChar(&b))
416	*eof = true;
417	else if (a == '-' && b == '-') {
418	mimeSource->ungetChar();
419	mimeSource->ungetChar();
420	mimeSource->ungetChar();
421	mimeSource->ungetChar();
422	} else {
423	mimeSource->ungetChar();
424	mimeSource->ungetChar();
425	}
426
427	*boundarysize += 2;
428	} else {
429	mimeSource->ungetChar();
430	mimeSource->ungetChar();
431	}
432	}
433		459
434	// read all mime parts.	460	// read all mime parts.
435	if (!foundendofpart && !eof) {	461	if (!foundendofpart && !eof) {
436	bool quit = false;	462	bool quit = false;
437	do {	463	do {
	...		...
455	// boundary. then we call parse() for all parts. the last parse()	481	// boundary. then we call parse() for all parts. the last parse()
456	// command will return a code indicating that it found the last	482	// command will return a code indicating that it found the last
457	// boundary of this multipart. Note that the first boundary does	483	// boundary of this multipart. Note that the first boundary does
458	// not have to start with CRLF.	484	// not have to start with CRLF.
459	string delimiter = "\r\n--" + toboundary;	485	string delimiter = "\r\n--" + toboundary;
460
461	skipUntilBoundary(delimiter, nlines, eof);	486	skipUntilBoundary(delimiter, nlines, eof);
462		487
463	if (!*eof)	488	if (!*eof)
464	*boundarysize = delimiter.size();	489	*boundarysize = delimiter.size();
465		490
466	// Read two more characters. This may be CRLF, it may be "--" and	491	postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
467	// it may be any other two characters.
468	char a = '\0';
469	if (!mimeSource->getChar(&a))
470	*eof = true;
471
472	if (a == '\n')
473	++*nlines;
474
475	char b = '\0';
476	if (!mimeSource->getChar(&b))
477	*eof = true;
478
479	if (b == '\n')
480	++*nlines;
481
482	// If we find two dashes after the boundary, then this is the end
483	// of boundary marker.
484	if (!*eof) {
485	if (a == '-' && b == '-') {
486	*foundendofpart = true;
487	*boundarysize += 2;
488
489	if (!mimeSource->getChar(&a))
490	*eof = true;
491
492	if (a == '\n')
493	++*nlines;
494
495	if (!mimeSource->getChar(&b))
496	*eof = true;
497
498	if (b == '\n')
499	++*nlines;
500	}
501
502	if (a == '\r' && b == '\n') {
503	// This exception is to handle a special case where the
504	// delimiter of one part is not followed by CRLF, but
505	// immediately followed by a CRLF prefixed delimiter.
506	if (!mimeSource->getChar(&a) \|\| !mimeSource->getChar(&b))
507	*eof = true;
508	else if (a == '-' && b == '-') {
509	mimeSource->ungetChar();
510	mimeSource->ungetChar();
511	mimeSource->ungetChar();
512	mimeSource->ungetChar();
513	} else {
514	mimeSource->ungetChar();
515	mimeSource->ungetChar();
516	}
517
518	*boundarysize += 2;
519	} else {
520	mimeSource->ungetChar();
521	mimeSource->ungetChar();
522	}
523	}
524	}	492	}
525		493
526	// make sure bodylength doesn't overflow	494	// make sure bodylength doesn't overflow
527	*bodylength = mimeSource->getOffset();	495	*bodylength = mimeSource->getOffset();
528	if (*bodylength >= bodystartoffsetcrlf) {	496	if (*bodylength >= bodystartoffsetcrlf) {
	...		...
533	*bodylength = 0;	501	*bodylength = 0;
534	}	502	}
535	} else {	503	} else {
536	*bodylength = 0;	504	*bodylength = 0;
537	}	505	}
		506	MPFDEB((stderr, "BINC: ParseMultipart return\n"));
538	}	507	}
539		508
540	static void parseSinglePart(const string &toboundary,	509	static void parseSinglePart(const string &toboundary,
541	int *boundarysize,	510	int *boundarysize,
542	unsigned int *nbodylines,	511	unsigned int *nbodylines,
543	unsigned int *nlines,	512	unsigned int *nlines,
544	bool eof, bool foundendofpart,	513	bool eof, bool foundendofpart,
545	unsigned int *bodylength)	514	unsigned int *bodylength)
546	{	515	{
		516	MPFDEB((stderr, "BINC: parseSinglePart, boundary [%s]\n",
		517	toboundary.c_str()));
547	using namespace ::Binc;	518	using namespace ::Binc;
548	unsigned int bodystartoffsetcrlf = mimeSource->getOffset();	519	unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
549		520
550	// If toboundary is empty, then we read until the end of the	521	// If toboundary is empty, then we read until the end of the
551	// file. Otherwise we will read until we encounter toboundary.	522	// file. Otherwise we will read until we encounter toboundary.
	...		...
589	}	560	}
590		561
591	delete [] boundaryqueue;	562	delete [] boundaryqueue;
592		563
593	if (toboundary != "") {	564	if (toboundary != "") {
594	char a;	565	postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
595	if (!mimeSource->getChar(&a))
596	*eof = true;
597
598	if (a == '\n')
599	++*nlines;
600	char b;
601	if (!mimeSource->getChar(&b))
602	*eof = true;
603
604	if (b == '\n')
605	++*nlines;
606
607	if (a == '-' && b == '-') {
608	*boundarysize += 2;
609	*foundendofpart = true;
610	if (!mimeSource->getChar(&a))
611	*eof = true;
612
613	if (a == '\n')
614	++*nlines;
615
616	if (!mimeSource->getChar(&b))
617	*eof = true;
618
619	if (b == '\n')
620	++*nlines;
621	}
622
623	if (a == '\r' && b == '\n') {
624	// This exception is to handle a special case where the
625	// delimiter of one part is not followed by CRLF, but
626	// immediately followed by a CRLF prefixed delimiter.
627	if (!mimeSource->getChar(&a) \|\| !mimeSource->getChar(&b))
628	*eof = true;
629	else if (a == '-' && b == '-') {
630	mimeSource->ungetChar();
631	mimeSource->ungetChar();
632	mimeSource->ungetChar();
633	mimeSource->ungetChar();
634	} else {
635	mimeSource->ungetChar();
636	mimeSource->ungetChar();
637	}
638
639	*boundarysize += 2;
640	} else {
641	mimeSource->ungetChar();
642	mimeSource->ungetChar();
643	}
644	} else {	566	} else {
645	// Recoll: in the case of a null boundary (probably illegal but wtf), eof	567	// Recoll: in the case of a multipart body with a null
		568	// boundary (probably illegal but wtf), eof was not set and
646	// was not set and multipart went into a loop until bad alloc.	569	// multipart went into a loop until bad alloc.
647	*eof = true;	570	*eof = true;
648	}	571	}
649		572
650	// make sure bodylength doesn't overflow	573	// make sure bodylength doesn't overflow
651	*bodylength = mimeSource->getOffset();	574	*bodylength = mimeSource->getOffset();
	...		...
657	*bodylength = 0;	580	*bodylength = 0;
658	}	581	}
659	} else {	582	} else {
660	*bodylength = 0;	583	*bodylength = 0;
661	}	584	}
662		585	MPFDEB((stderr, "BINC: parseSimple ret: bodylength %d, boundarysize %d\n",
		586	bodylength, boundarysize));
663	}	587	}
664		588
665	//------------------------------------------------------------------------	589	//------------------------------------------------------------------------
666	int Binc::MimePart::parseFull(const string &toboundary,	590	int Binc::MimePart::parseFull(const string &toboundary,
667	int &boundarysize) const	591	int &boundarysize) const
668	{	592	{
		593	MPFDEB((stderr, "BINC: parsefull, toboundary[%s]\n", toboundary.c_str()));
669	headerstartoffsetcrlf = mimeSource->getOffset();	594	headerstartoffsetcrlf = mimeSource->getOffset();
670		595
671	// Parse the header of this mime part.	596	// Parse the header of this mime part.
672	parseHeader(&h, &nlines);	597	parseHeader(&h, &nlines);
673		598
	...		...
695	} else {	620	} else {
696	parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines,	621	parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines,
697	&eof, &foundendofpart, &bodylength);	622	&eof, &foundendofpart, &bodylength);
698	}	623	}
699		624
		625	MPFDEB((stderr, "BINC: parsefull ret, toboundary[%s]\n", toboundary.c_str()));
700	return (eof \|\| foundendofpart) ? 1 : 0;	626	return (eof \|\| foundendofpart) ? 1 : 0;
701	}	627	}