Switch to unified view

a/src/bincimapmime/mime-parsefull.cc b/src/bincimapmime/mime-parsefull.cc
...
...
46
46
47
#ifndef NO_NAMESPACES
47
#ifndef NO_NAMESPACES
48
using namespace ::std;
48
using namespace ::std;
49
#endif /* NO_NAMESPACES */
49
#endif /* NO_NAMESPACES */
50
50
51
#undef MPF
52
#ifdef MPF
53
#define MPFDEB(X) fprintf X
54
#else
55
#define MPFDEB(X)
56
#endif
57
51
//------------------------------------------------------------------------
58
//------------------------------------------------------------------------
52
void Binc::MimeDocument::parseFull(int fd) const
59
void Binc::MimeDocument::parseFull(int fd) const
53
{
60
{
54
  if (allIsParsed)
61
  if (allIsParsed)
55
    return;
62
    return;
...
...
345
  delimiterqueue = 0;
352
  delimiterqueue = 0;
346
353
347
  return foundBoundary;
354
  return foundBoundary;
348
}
355
}
349
356
357
// JFD: Things we do after finding a boundary (something like CRLF--somestring)
358
// Need to see if this is a final one (with an additional -- at the end),
359
// and need to check if it is immediately followed by another boundary 
360
// (in this case, we give up our final CRLF in its favour)
361
static inline void postBoundaryProcessing(bool *eof,
362
                    unsigned int *nlines,
363
                    int *boundarysize,
364
                    bool *foundendofpart)
365
{
366
    // Read two more characters. This may be CRLF, it may be "--" and
367
    // it may be any other two characters.
368
    char a = '\0';
369
    if (!mimeSource->getChar(&a))
370
      *eof = true;
371
    if (a == '\n')
372
      ++*nlines; 
373
374
    char b = '\0';
375
    if (!mimeSource->getChar(&b))
376
      *eof = true;
377
    if (b == '\n')
378
      ++*nlines;
379
    
380
    // If eof, we're done here
381
    if (*eof)
382
      return;
383
384
    // If we find two dashes after the boundary, then this is the end
385
    // of boundary marker, and we need to get 2 more chars
386
    if (a == '-' && b == '-') {
387
      *foundendofpart = true;
388
      *boundarysize += 2;
389
  
390
      if (!mimeSource->getChar(&a))
391
  *eof = true;
392
      if (a == '\n')
393
  ++*nlines; 
394
  
395
      if (!mimeSource->getChar(&b))
396
  *eof = true;
397
      if (b == '\n')
398
  ++*nlines;
399
    }
400
401
    // If the boundary is followed by CRLF, we need to handle the
402
    // special case where another boundary line follows
403
    // immediately. In this case we consider the CRLF to be part of
404
    // the NEXT boundary.
405
    if (a == '\r' && b == '\n') {
406
      // Get 2 more
407
      if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b)) {
408
  *eof = true; 
409
      } else if (a == '-' && b == '-') {
410
  MPFDEB((stderr, "BINC: consecutive delimiters, giving up CRLF\n"));
411
  mimeSource->ungetChar();
412
  mimeSource->ungetChar();
413
  mimeSource->ungetChar();
414
  mimeSource->ungetChar();
415
      } else {
416
  // We unget the 2 chars, and keep our crlf (increasing our own size)
417
  MPFDEB((stderr, "BINC: keeping my CRLF\n"));
418
  mimeSource->ungetChar();
419
  mimeSource->ungetChar();
420
  *boundarysize += 2;
421
      }
422
423
    } else {
424
      // Boundary string not followed by CRLF, don't read more and let
425
      // others skip the rest. Note that this is allowed but quite uncommon
426
      mimeSource->ungetChar();
427
      mimeSource->ungetChar();
428
    }
429
}
350
430
351
static void parseMultipart(const string &boundary,
431
static void parseMultipart(const string &boundary,
352
               const string &toboundary,
432
               const string &toboundary,
353
               bool *eof,
433
               bool *eof,
354
               unsigned int *nlines,
434
               unsigned int *nlines,
355
               int *boundarysize,
435
               int *boundarysize,
356
               bool *foundendofpart,
436
               bool *foundendofpart,
357
               unsigned int *bodylength,
437
               unsigned int *bodylength,
358
               vector<Binc::MimePart> *members)
438
               vector<Binc::MimePart> *members)
359
{
439
{
440
  MPFDEB((stderr, "BINC: ParseMultipart: boundary [%s], toboundary[%s]\n", 
441
    boundary.c_str(),
442
    toboundary.c_str()));
360
  using namespace ::Binc;
443
  using namespace ::Binc;
361
  unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
444
  unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
362
445
363
  // multipart parsing starts with skipping to the first
446
  // multipart parsing starts with skipping to the first
364
  // boundary. then we call parse() for all parts. the last parse()
447
  // boundary. then we call parse() for all parts. the last parse()
...
...
370
  skipUntilBoundary(delimiter, nlines, eof);
453
  skipUntilBoundary(delimiter, nlines, eof);
371
454
372
  if (!eof)
455
  if (!eof)
373
    *boundarysize = delimiter.size();
456
    *boundarysize = delimiter.size();
374
457
375
  // Read two more characters. This may be CRLF, it may be "--" and
458
  postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
376
  // it may be any other two characters.
377
  char a;
378
  if (!mimeSource->getChar(&a))
379
    *eof = true;
380
381
  if (a == '\n')
382
    ++*nlines; 
383
384
  char b;
385
  if (!mimeSource->getChar(&b))
386
    *eof = true;
387
    
388
  if (b == '\n')
389
    ++*nlines;
390
    
391
  // If we find two dashes after the boundary, then this is the end
392
  // of boundary marker.
393
  if (!*eof) {
394
    if (a == '-' && b == '-') {
395
      *foundendofpart = true;
396
      *boundarysize += 2;
397
  
398
      if (!mimeSource->getChar(&a))
399
  *eof = true;
400
  
401
      if (a == '\n')
402
  ++*nlines; 
403
  
404
      if (!mimeSource->getChar(&b))
405
  *eof = true;
406
  
407
      if (b == '\n')
408
  ++*nlines;
409
    }
410
411
    if (a == '\r' && b == '\n') {
412
      // This exception is to handle a special case where the
413
      // delimiter of one part is not followed by CRLF, but
414
      // immediately followed by a CRLF prefixed delimiter.
415
      if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
416
  *eof = true; 
417
      else if (a == '-' && b == '-') {
418
  mimeSource->ungetChar();
419
  mimeSource->ungetChar();
420
  mimeSource->ungetChar();
421
  mimeSource->ungetChar();
422
      } else {
423
  mimeSource->ungetChar();
424
  mimeSource->ungetChar();
425
      }
426
427
      *boundarysize += 2;
428
    } else {
429
      mimeSource->ungetChar();
430
      mimeSource->ungetChar();
431
    }
432
  }
433
459
434
  // read all mime parts.
460
  // read all mime parts.
435
  if (!*foundendofpart && !*eof) {
461
  if (!*foundendofpart && !*eof) {
436
    bool quit = false;
462
    bool quit = false;
437
    do {
463
    do {
...
...
455
    // boundary. then we call parse() for all parts. the last parse()
481
    // boundary. then we call parse() for all parts. the last parse()
456
    // command will return a code indicating that it found the last
482
    // command will return a code indicating that it found the last
457
    // boundary of this multipart. Note that the first boundary does
483
    // boundary of this multipart. Note that the first boundary does
458
    // not have to start with CRLF.
484
    // not have to start with CRLF.
459
    string delimiter = "\r\n--" + toboundary;
485
    string delimiter = "\r\n--" + toboundary;
460
461
    skipUntilBoundary(delimiter, nlines, eof);
486
    skipUntilBoundary(delimiter, nlines, eof);
462
487
463
    if (!*eof)
488
    if (!*eof)
464
      *boundarysize = delimiter.size();
489
      *boundarysize = delimiter.size();
465
490
466
    // Read two more characters. This may be CRLF, it may be "--" and
491
    postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
467
    // it may be any other two characters.
468
    char a = '\0';
469
    if (!mimeSource->getChar(&a))
470
      *eof = true;
471
472
    if (a == '\n')
473
      ++*nlines; 
474
475
    char b = '\0';
476
    if (!mimeSource->getChar(&b))
477
      *eof = true;
478
    
479
    if (b == '\n')
480
      ++*nlines;
481
    
482
    // If we find two dashes after the boundary, then this is the end
483
    // of boundary marker.
484
    if (!*eof) {
485
      if (a == '-' && b == '-') {
486
  *foundendofpart = true;
487
  *boundarysize += 2;
488
  
489
  if (!mimeSource->getChar(&a))
490
    *eof = true;
491
  
492
  if (a == '\n')
493
    ++*nlines; 
494
  
495
  if (!mimeSource->getChar(&b))
496
    *eof = true;
497
  
498
  if (b == '\n')
499
    ++*nlines;
500
      }
501
502
      if (a == '\r' && b == '\n') {
503
  // This exception is to handle a special case where the
504
  // delimiter of one part is not followed by CRLF, but
505
  // immediately followed by a CRLF prefixed delimiter.
506
  if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
507
    *eof = true; 
508
  else if (a == '-' && b == '-') {
509
    mimeSource->ungetChar();
510
    mimeSource->ungetChar();
511
    mimeSource->ungetChar();
512
    mimeSource->ungetChar();
513
  } else {
514
    mimeSource->ungetChar();
515
    mimeSource->ungetChar();
516
  }
517
518
  *boundarysize += 2;
519
      } else {
520
  mimeSource->ungetChar();
521
  mimeSource->ungetChar();
522
      }
523
    }
524
  }
492
  }
525
493
526
  // make sure bodylength doesn't overflow    
494
  // make sure bodylength doesn't overflow    
527
  *bodylength = mimeSource->getOffset();
495
  *bodylength = mimeSource->getOffset();
528
  if (*bodylength >= bodystartoffsetcrlf) {
496
  if (*bodylength >= bodystartoffsetcrlf) {
...
...
533
      *bodylength = 0;
501
      *bodylength = 0;
534
    }
502
    }
535
  } else {
503
  } else {
536
    *bodylength = 0;
504
    *bodylength = 0;
537
  }
505
  }
506
  MPFDEB((stderr, "BINC: ParseMultipart return\n"));
538
}
507
}
539
508
540
static void parseSinglePart(const string &toboundary,
509
static void parseSinglePart(const string &toboundary,
541
                int *boundarysize,
510
                int *boundarysize,
542
                unsigned int *nbodylines,
511
                unsigned int *nbodylines,
543
                unsigned int *nlines,
512
                unsigned int *nlines,
544
                bool *eof, bool *foundendofpart,
513
                bool *eof, bool *foundendofpart,
545
                unsigned int *bodylength)
514
                unsigned int *bodylength)
546
{
515
{
516
  MPFDEB((stderr, "BINC: parseSinglePart, boundary [%s]\n", 
517
    toboundary.c_str()));
547
  using namespace ::Binc;
518
  using namespace ::Binc;
548
  unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
519
  unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
549
520
550
  // If toboundary is empty, then we read until the end of the
521
  // If toboundary is empty, then we read until the end of the
551
  // file. Otherwise we will read until we encounter toboundary.
522
  // file. Otherwise we will read until we encounter toboundary.
...
...
589
  }
560
  }
590
561
591
  delete [] boundaryqueue;
562
  delete [] boundaryqueue;
592
563
593
  if (toboundary != "") {
564
  if (toboundary != "") {
594
    char a;
565
    postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
595
    if (!mimeSource->getChar(&a))
596
      *eof = true;
597
598
    if (a == '\n')
599
      ++*nlines;
600
    char b;
601
    if (!mimeSource->getChar(&b))
602
      *eof = true;
603
604
    if (b == '\n') 
605
      ++*nlines;
606
607
    if (a == '-' && b == '-') {
608
      *boundarysize += 2;
609
      *foundendofpart = true;
610
      if (!mimeSource->getChar(&a))
611
  *eof = true;
612
613
      if (a == '\n')
614
  ++*nlines;
615
616
      if (!mimeSource->getChar(&b))
617
  *eof = true;
618
    
619
      if (b == '\n')
620
  ++*nlines;
621
    }
622
623
    if (a == '\r' && b == '\n') {
624
      // This exception is to handle a special case where the
625
      // delimiter of one part is not followed by CRLF, but
626
      // immediately followed by a CRLF prefixed delimiter.
627
      if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
628
  *eof = true; 
629
      else if (a == '-' && b == '-') {
630
  mimeSource->ungetChar();
631
  mimeSource->ungetChar();
632
  mimeSource->ungetChar();
633
  mimeSource->ungetChar();
634
      } else {
635
  mimeSource->ungetChar();
636
  mimeSource->ungetChar();
637
      }
638
639
      *boundarysize += 2;
640
    } else {
641
      mimeSource->ungetChar();
642
      mimeSource->ungetChar();
643
    }
644
  } else {
566
  } else {
645
    // Recoll: in the case of a null boundary (probably illegal but wtf), eof
567
    // Recoll: in the case of a multipart body with a null
568
    // boundary (probably illegal but wtf), eof was not set and
646
    // was not set and multipart went into a loop until bad alloc.
569
    // multipart went into a loop until bad alloc.
647
    *eof = true;
570
    *eof = true;
648
  }
571
  }
649
572
650
  // make sure bodylength doesn't overflow    
573
  // make sure bodylength doesn't overflow    
651
  *bodylength = mimeSource->getOffset();
574
  *bodylength = mimeSource->getOffset();
...
...
657
      *bodylength = 0;
580
      *bodylength = 0;
658
    }
581
    }
659
  } else {
582
  } else {
660
    *bodylength = 0;
583
    *bodylength = 0;
661
  }
584
  }
662
585
  MPFDEB((stderr, "BINC: parseSimple ret: bodylength %d, boundarysize %d\n",
586
    *bodylength, *boundarysize));
663
}
587
}
664
588
665
//------------------------------------------------------------------------
589
//------------------------------------------------------------------------
666
int Binc::MimePart::parseFull(const string &toboundary,
590
int Binc::MimePart::parseFull(const string &toboundary,
667
                  int &boundarysize) const
591
                  int &boundarysize) const
668
{
592
{
593
  MPFDEB((stderr, "BINC: parsefull, toboundary[%s]\n", toboundary.c_str()));
669
  headerstartoffsetcrlf = mimeSource->getOffset();
594
  headerstartoffsetcrlf = mimeSource->getOffset();
670
595
671
  // Parse the header of this mime part.
596
  // Parse the header of this mime part.
672
  parseHeader(&h, &nlines);
597
  parseHeader(&h, &nlines);
673
598
...
...
695
  } else {
620
  } else {
696
    parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines,
621
    parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines,
697
            &eof, &foundendofpart, &bodylength);
622
            &eof, &foundendofpart, &bodylength);
698
  }
623
  }
699
624
625
  MPFDEB((stderr, "BINC: parsefull ret, toboundary[%s]\n", toboundary.c_str()));
700
  return (eof || foundendofpart) ? 1 : 0;
626
  return (eof || foundendofpart) ? 1 : 0;
701
}
627
}