|
a/src/bincimapmime/mime-parsefull.cc |
|
b/src/bincimapmime/mime-parsefull.cc |
|
... |
|
... |
46 |
|
46 |
|
47 |
#ifndef NO_NAMESPACES
|
47 |
#ifndef NO_NAMESPACES
|
48 |
using namespace ::std;
|
48 |
using namespace ::std;
|
49 |
#endif /* NO_NAMESPACES */
|
49 |
#endif /* NO_NAMESPACES */
|
50 |
|
50 |
|
|
|
51 |
#undef MPF
|
|
|
52 |
#ifdef MPF
|
|
|
53 |
#define MPFDEB(X) fprintf X
|
|
|
54 |
#else
|
|
|
55 |
#define MPFDEB(X)
|
|
|
56 |
#endif
|
|
|
57 |
|
51 |
//------------------------------------------------------------------------
|
58 |
//------------------------------------------------------------------------
|
52 |
void Binc::MimeDocument::parseFull(int fd) const
|
59 |
void Binc::MimeDocument::parseFull(int fd) const
|
53 |
{
|
60 |
{
|
54 |
if (allIsParsed)
|
61 |
if (allIsParsed)
|
55 |
return;
|
62 |
return;
|
|
... |
|
... |
345 |
delimiterqueue = 0;
|
352 |
delimiterqueue = 0;
|
346 |
|
353 |
|
347 |
return foundBoundary;
|
354 |
return foundBoundary;
|
348 |
}
|
355 |
}
|
349 |
|
356 |
|
|
|
357 |
// JFD: Things we do after finding a boundary (something like CRLF--somestring)
|
|
|
358 |
// Need to see if this is a final one (with an additional -- at the end),
|
|
|
359 |
// and need to check if it is immediately followed by another boundary
|
|
|
360 |
// (in this case, we give up our final CRLF in its favour)
|
|
|
361 |
static inline void postBoundaryProcessing(bool *eof,
|
|
|
362 |
unsigned int *nlines,
|
|
|
363 |
int *boundarysize,
|
|
|
364 |
bool *foundendofpart)
|
|
|
365 |
{
|
|
|
366 |
// Read two more characters. This may be CRLF, it may be "--" and
|
|
|
367 |
// it may be any other two characters.
|
|
|
368 |
char a = '\0';
|
|
|
369 |
if (!mimeSource->getChar(&a))
|
|
|
370 |
*eof = true;
|
|
|
371 |
if (a == '\n')
|
|
|
372 |
++*nlines;
|
|
|
373 |
|
|
|
374 |
char b = '\0';
|
|
|
375 |
if (!mimeSource->getChar(&b))
|
|
|
376 |
*eof = true;
|
|
|
377 |
if (b == '\n')
|
|
|
378 |
++*nlines;
|
|
|
379 |
|
|
|
380 |
// If eof, we're done here
|
|
|
381 |
if (*eof)
|
|
|
382 |
return;
|
|
|
383 |
|
|
|
384 |
// If we find two dashes after the boundary, then this is the end
|
|
|
385 |
// of boundary marker, and we need to get 2 more chars
|
|
|
386 |
if (a == '-' && b == '-') {
|
|
|
387 |
*foundendofpart = true;
|
|
|
388 |
*boundarysize += 2;
|
|
|
389 |
|
|
|
390 |
if (!mimeSource->getChar(&a))
|
|
|
391 |
*eof = true;
|
|
|
392 |
if (a == '\n')
|
|
|
393 |
++*nlines;
|
|
|
394 |
|
|
|
395 |
if (!mimeSource->getChar(&b))
|
|
|
396 |
*eof = true;
|
|
|
397 |
if (b == '\n')
|
|
|
398 |
++*nlines;
|
|
|
399 |
}
|
|
|
400 |
|
|
|
401 |
// If the boundary is followed by CRLF, we need to handle the
|
|
|
402 |
// special case where another boundary line follows
|
|
|
403 |
// immediately. In this case we consider the CRLF to be part of
|
|
|
404 |
// the NEXT boundary.
|
|
|
405 |
if (a == '\r' && b == '\n') {
|
|
|
406 |
// Get 2 more
|
|
|
407 |
if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b)) {
|
|
|
408 |
*eof = true;
|
|
|
409 |
} else if (a == '-' && b == '-') {
|
|
|
410 |
MPFDEB((stderr, "BINC: consecutive delimiters, giving up CRLF\n"));
|
|
|
411 |
mimeSource->ungetChar();
|
|
|
412 |
mimeSource->ungetChar();
|
|
|
413 |
mimeSource->ungetChar();
|
|
|
414 |
mimeSource->ungetChar();
|
|
|
415 |
} else {
|
|
|
416 |
// We unget the 2 chars, and keep our crlf (increasing our own size)
|
|
|
417 |
MPFDEB((stderr, "BINC: keeping my CRLF\n"));
|
|
|
418 |
mimeSource->ungetChar();
|
|
|
419 |
mimeSource->ungetChar();
|
|
|
420 |
*boundarysize += 2;
|
|
|
421 |
}
|
|
|
422 |
|
|
|
423 |
} else {
|
|
|
424 |
// Boundary string not followed by CRLF, don't read more and let
|
|
|
425 |
// others skip the rest. Note that this is allowed but quite uncommon
|
|
|
426 |
mimeSource->ungetChar();
|
|
|
427 |
mimeSource->ungetChar();
|
|
|
428 |
}
|
|
|
429 |
}
|
350 |
|
430 |
|
351 |
static void parseMultipart(const string &boundary,
|
431 |
static void parseMultipart(const string &boundary,
|
352 |
const string &toboundary,
|
432 |
const string &toboundary,
|
353 |
bool *eof,
|
433 |
bool *eof,
|
354 |
unsigned int *nlines,
|
434 |
unsigned int *nlines,
|
355 |
int *boundarysize,
|
435 |
int *boundarysize,
|
356 |
bool *foundendofpart,
|
436 |
bool *foundendofpart,
|
357 |
unsigned int *bodylength,
|
437 |
unsigned int *bodylength,
|
358 |
vector<Binc::MimePart> *members)
|
438 |
vector<Binc::MimePart> *members)
|
359 |
{
|
439 |
{
|
|
|
440 |
MPFDEB((stderr, "BINC: ParseMultipart: boundary [%s], toboundary[%s]\n",
|
|
|
441 |
boundary.c_str(),
|
|
|
442 |
toboundary.c_str()));
|
360 |
using namespace ::Binc;
|
443 |
using namespace ::Binc;
|
361 |
unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
|
444 |
unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
|
362 |
|
445 |
|
363 |
// multipart parsing starts with skipping to the first
|
446 |
// multipart parsing starts with skipping to the first
|
364 |
// boundary. then we call parse() for all parts. the last parse()
|
447 |
// boundary. then we call parse() for all parts. the last parse()
|
|
... |
|
... |
370 |
skipUntilBoundary(delimiter, nlines, eof);
|
453 |
skipUntilBoundary(delimiter, nlines, eof);
|
371 |
|
454 |
|
372 |
if (!eof)
|
455 |
if (!eof)
|
373 |
*boundarysize = delimiter.size();
|
456 |
*boundarysize = delimiter.size();
|
374 |
|
457 |
|
375 |
// Read two more characters. This may be CRLF, it may be "--" and
|
458 |
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
|
376 |
// it may be any other two characters.
|
|
|
377 |
char a;
|
|
|
378 |
if (!mimeSource->getChar(&a))
|
|
|
379 |
*eof = true;
|
|
|
380 |
|
|
|
381 |
if (a == '\n')
|
|
|
382 |
++*nlines;
|
|
|
383 |
|
|
|
384 |
char b;
|
|
|
385 |
if (!mimeSource->getChar(&b))
|
|
|
386 |
*eof = true;
|
|
|
387 |
|
|
|
388 |
if (b == '\n')
|
|
|
389 |
++*nlines;
|
|
|
390 |
|
|
|
391 |
// If we find two dashes after the boundary, then this is the end
|
|
|
392 |
// of boundary marker.
|
|
|
393 |
if (!*eof) {
|
|
|
394 |
if (a == '-' && b == '-') {
|
|
|
395 |
*foundendofpart = true;
|
|
|
396 |
*boundarysize += 2;
|
|
|
397 |
|
|
|
398 |
if (!mimeSource->getChar(&a))
|
|
|
399 |
*eof = true;
|
|
|
400 |
|
|
|
401 |
if (a == '\n')
|
|
|
402 |
++*nlines;
|
|
|
403 |
|
|
|
404 |
if (!mimeSource->getChar(&b))
|
|
|
405 |
*eof = true;
|
|
|
406 |
|
|
|
407 |
if (b == '\n')
|
|
|
408 |
++*nlines;
|
|
|
409 |
}
|
|
|
410 |
|
|
|
411 |
if (a == '\r' && b == '\n') {
|
|
|
412 |
// This exception is to handle a special case where the
|
|
|
413 |
// delimiter of one part is not followed by CRLF, but
|
|
|
414 |
// immediately followed by a CRLF prefixed delimiter.
|
|
|
415 |
if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
|
|
|
416 |
*eof = true;
|
|
|
417 |
else if (a == '-' && b == '-') {
|
|
|
418 |
mimeSource->ungetChar();
|
|
|
419 |
mimeSource->ungetChar();
|
|
|
420 |
mimeSource->ungetChar();
|
|
|
421 |
mimeSource->ungetChar();
|
|
|
422 |
} else {
|
|
|
423 |
mimeSource->ungetChar();
|
|
|
424 |
mimeSource->ungetChar();
|
|
|
425 |
}
|
|
|
426 |
|
|
|
427 |
*boundarysize += 2;
|
|
|
428 |
} else {
|
|
|
429 |
mimeSource->ungetChar();
|
|
|
430 |
mimeSource->ungetChar();
|
|
|
431 |
}
|
|
|
432 |
}
|
|
|
433 |
|
459 |
|
434 |
// read all mime parts.
|
460 |
// read all mime parts.
|
435 |
if (!*foundendofpart && !*eof) {
|
461 |
if (!*foundendofpart && !*eof) {
|
436 |
bool quit = false;
|
462 |
bool quit = false;
|
437 |
do {
|
463 |
do {
|
|
... |
|
... |
455 |
// boundary. then we call parse() for all parts. the last parse()
|
481 |
// boundary. then we call parse() for all parts. the last parse()
|
456 |
// command will return a code indicating that it found the last
|
482 |
// command will return a code indicating that it found the last
|
457 |
// boundary of this multipart. Note that the first boundary does
|
483 |
// boundary of this multipart. Note that the first boundary does
|
458 |
// not have to start with CRLF.
|
484 |
// not have to start with CRLF.
|
459 |
string delimiter = "\r\n--" + toboundary;
|
485 |
string delimiter = "\r\n--" + toboundary;
|
460 |
|
|
|
461 |
skipUntilBoundary(delimiter, nlines, eof);
|
486 |
skipUntilBoundary(delimiter, nlines, eof);
|
462 |
|
487 |
|
463 |
if (!*eof)
|
488 |
if (!*eof)
|
464 |
*boundarysize = delimiter.size();
|
489 |
*boundarysize = delimiter.size();
|
465 |
|
490 |
|
466 |
// Read two more characters. This may be CRLF, it may be "--" and
|
491 |
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
|
467 |
// it may be any other two characters.
|
|
|
468 |
char a = '\0';
|
|
|
469 |
if (!mimeSource->getChar(&a))
|
|
|
470 |
*eof = true;
|
|
|
471 |
|
|
|
472 |
if (a == '\n')
|
|
|
473 |
++*nlines;
|
|
|
474 |
|
|
|
475 |
char b = '\0';
|
|
|
476 |
if (!mimeSource->getChar(&b))
|
|
|
477 |
*eof = true;
|
|
|
478 |
|
|
|
479 |
if (b == '\n')
|
|
|
480 |
++*nlines;
|
|
|
481 |
|
|
|
482 |
// If we find two dashes after the boundary, then this is the end
|
|
|
483 |
// of boundary marker.
|
|
|
484 |
if (!*eof) {
|
|
|
485 |
if (a == '-' && b == '-') {
|
|
|
486 |
*foundendofpart = true;
|
|
|
487 |
*boundarysize += 2;
|
|
|
488 |
|
|
|
489 |
if (!mimeSource->getChar(&a))
|
|
|
490 |
*eof = true;
|
|
|
491 |
|
|
|
492 |
if (a == '\n')
|
|
|
493 |
++*nlines;
|
|
|
494 |
|
|
|
495 |
if (!mimeSource->getChar(&b))
|
|
|
496 |
*eof = true;
|
|
|
497 |
|
|
|
498 |
if (b == '\n')
|
|
|
499 |
++*nlines;
|
|
|
500 |
}
|
|
|
501 |
|
|
|
502 |
if (a == '\r' && b == '\n') {
|
|
|
503 |
// This exception is to handle a special case where the
|
|
|
504 |
// delimiter of one part is not followed by CRLF, but
|
|
|
505 |
// immediately followed by a CRLF prefixed delimiter.
|
|
|
506 |
if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
|
|
|
507 |
*eof = true;
|
|
|
508 |
else if (a == '-' && b == '-') {
|
|
|
509 |
mimeSource->ungetChar();
|
|
|
510 |
mimeSource->ungetChar();
|
|
|
511 |
mimeSource->ungetChar();
|
|
|
512 |
mimeSource->ungetChar();
|
|
|
513 |
} else {
|
|
|
514 |
mimeSource->ungetChar();
|
|
|
515 |
mimeSource->ungetChar();
|
|
|
516 |
}
|
|
|
517 |
|
|
|
518 |
*boundarysize += 2;
|
|
|
519 |
} else {
|
|
|
520 |
mimeSource->ungetChar();
|
|
|
521 |
mimeSource->ungetChar();
|
|
|
522 |
}
|
|
|
523 |
}
|
|
|
524 |
}
|
492 |
}
|
525 |
|
493 |
|
526 |
// make sure bodylength doesn't overflow
|
494 |
// make sure bodylength doesn't overflow
|
527 |
*bodylength = mimeSource->getOffset();
|
495 |
*bodylength = mimeSource->getOffset();
|
528 |
if (*bodylength >= bodystartoffsetcrlf) {
|
496 |
if (*bodylength >= bodystartoffsetcrlf) {
|
|
... |
|
... |
533 |
*bodylength = 0;
|
501 |
*bodylength = 0;
|
534 |
}
|
502 |
}
|
535 |
} else {
|
503 |
} else {
|
536 |
*bodylength = 0;
|
504 |
*bodylength = 0;
|
537 |
}
|
505 |
}
|
|
|
506 |
MPFDEB((stderr, "BINC: ParseMultipart return\n"));
|
538 |
}
|
507 |
}
|
539 |
|
508 |
|
540 |
static void parseSinglePart(const string &toboundary,
|
509 |
static void parseSinglePart(const string &toboundary,
|
541 |
int *boundarysize,
|
510 |
int *boundarysize,
|
542 |
unsigned int *nbodylines,
|
511 |
unsigned int *nbodylines,
|
543 |
unsigned int *nlines,
|
512 |
unsigned int *nlines,
|
544 |
bool *eof, bool *foundendofpart,
|
513 |
bool *eof, bool *foundendofpart,
|
545 |
unsigned int *bodylength)
|
514 |
unsigned int *bodylength)
|
546 |
{
|
515 |
{
|
|
|
516 |
MPFDEB((stderr, "BINC: parseSinglePart, boundary [%s]\n",
|
|
|
517 |
toboundary.c_str()));
|
547 |
using namespace ::Binc;
|
518 |
using namespace ::Binc;
|
548 |
unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
|
519 |
unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
|
549 |
|
520 |
|
550 |
// If toboundary is empty, then we read until the end of the
|
521 |
// If toboundary is empty, then we read until the end of the
|
551 |
// file. Otherwise we will read until we encounter toboundary.
|
522 |
// file. Otherwise we will read until we encounter toboundary.
|
|
... |
|
... |
589 |
}
|
560 |
}
|
590 |
|
561 |
|
591 |
delete [] boundaryqueue;
|
562 |
delete [] boundaryqueue;
|
592 |
|
563 |
|
593 |
if (toboundary != "") {
|
564 |
if (toboundary != "") {
|
594 |
char a;
|
565 |
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
|
595 |
if (!mimeSource->getChar(&a))
|
|
|
596 |
*eof = true;
|
|
|
597 |
|
|
|
598 |
if (a == '\n')
|
|
|
599 |
++*nlines;
|
|
|
600 |
char b;
|
|
|
601 |
if (!mimeSource->getChar(&b))
|
|
|
602 |
*eof = true;
|
|
|
603 |
|
|
|
604 |
if (b == '\n')
|
|
|
605 |
++*nlines;
|
|
|
606 |
|
|
|
607 |
if (a == '-' && b == '-') {
|
|
|
608 |
*boundarysize += 2;
|
|
|
609 |
*foundendofpart = true;
|
|
|
610 |
if (!mimeSource->getChar(&a))
|
|
|
611 |
*eof = true;
|
|
|
612 |
|
|
|
613 |
if (a == '\n')
|
|
|
614 |
++*nlines;
|
|
|
615 |
|
|
|
616 |
if (!mimeSource->getChar(&b))
|
|
|
617 |
*eof = true;
|
|
|
618 |
|
|
|
619 |
if (b == '\n')
|
|
|
620 |
++*nlines;
|
|
|
621 |
}
|
|
|
622 |
|
|
|
623 |
if (a == '\r' && b == '\n') {
|
|
|
624 |
// This exception is to handle a special case where the
|
|
|
625 |
// delimiter of one part is not followed by CRLF, but
|
|
|
626 |
// immediately followed by a CRLF prefixed delimiter.
|
|
|
627 |
if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
|
|
|
628 |
*eof = true;
|
|
|
629 |
else if (a == '-' && b == '-') {
|
|
|
630 |
mimeSource->ungetChar();
|
|
|
631 |
mimeSource->ungetChar();
|
|
|
632 |
mimeSource->ungetChar();
|
|
|
633 |
mimeSource->ungetChar();
|
|
|
634 |
} else {
|
|
|
635 |
mimeSource->ungetChar();
|
|
|
636 |
mimeSource->ungetChar();
|
|
|
637 |
}
|
|
|
638 |
|
|
|
639 |
*boundarysize += 2;
|
|
|
640 |
} else {
|
|
|
641 |
mimeSource->ungetChar();
|
|
|
642 |
mimeSource->ungetChar();
|
|
|
643 |
}
|
|
|
644 |
} else {
|
566 |
} else {
|
645 |
// Recoll: in the case of a null boundary (probably illegal but wtf), eof
|
567 |
// Recoll: in the case of a multipart body with a null
|
|
|
568 |
// boundary (probably illegal but wtf), eof was not set and
|
646 |
// was not set and multipart went into a loop until bad alloc.
|
569 |
// multipart went into a loop until bad alloc.
|
647 |
*eof = true;
|
570 |
*eof = true;
|
648 |
}
|
571 |
}
|
649 |
|
572 |
|
650 |
// make sure bodylength doesn't overflow
|
573 |
// make sure bodylength doesn't overflow
|
651 |
*bodylength = mimeSource->getOffset();
|
574 |
*bodylength = mimeSource->getOffset();
|
|
... |
|
... |
657 |
*bodylength = 0;
|
580 |
*bodylength = 0;
|
658 |
}
|
581 |
}
|
659 |
} else {
|
582 |
} else {
|
660 |
*bodylength = 0;
|
583 |
*bodylength = 0;
|
661 |
}
|
584 |
}
|
662 |
|
585 |
MPFDEB((stderr, "BINC: parseSimple ret: bodylength %d, boundarysize %d\n",
|
|
|
586 |
*bodylength, *boundarysize));
|
663 |
}
|
587 |
}
|
664 |
|
588 |
|
665 |
//------------------------------------------------------------------------
|
589 |
//------------------------------------------------------------------------
|
666 |
int Binc::MimePart::parseFull(const string &toboundary,
|
590 |
int Binc::MimePart::parseFull(const string &toboundary,
|
667 |
int &boundarysize) const
|
591 |
int &boundarysize) const
|
668 |
{
|
592 |
{
|
|
|
593 |
MPFDEB((stderr, "BINC: parsefull, toboundary[%s]\n", toboundary.c_str()));
|
669 |
headerstartoffsetcrlf = mimeSource->getOffset();
|
594 |
headerstartoffsetcrlf = mimeSource->getOffset();
|
670 |
|
595 |
|
671 |
// Parse the header of this mime part.
|
596 |
// Parse the header of this mime part.
|
672 |
parseHeader(&h, &nlines);
|
597 |
parseHeader(&h, &nlines);
|
673 |
|
598 |
|
|
... |
|
... |
695 |
} else {
|
620 |
} else {
|
696 |
parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines,
|
621 |
parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines,
|
697 |
&eof, &foundendofpart, &bodylength);
|
622 |
&eof, &foundendofpart, &bodylength);
|
698 |
}
|
623 |
}
|
699 |
|
624 |
|
|
|
625 |
MPFDEB((stderr, "BINC: parsefull ret, toboundary[%s]\n", toboundary.c_str()));
|
700 |
return (eof || foundendofpart) ? 1 : 0;
|
626 |
return (eof || foundendofpart) ? 1 : 0;
|
701 |
}
|
627 |
}
|