recoll / Code / Diff of /src/bincimapmime/mime-parsefull.cc

Diff of /src/bincimapmime/mime-parsefull.cc [138427] .. [bfc651]

Switch to side-by-side view

--- a/src/bincimapmime/mime-parsefull.cc
+++ b/src/bincimapmime/mime-parsefull.cc
@@ -48,6 +48,13 @@
 using namespace ::std;
 #endif /* NO_NAMESPACES */
 
+#undef MPF
+#ifdef MPF
+#define MPFDEB(X) fprintf X
+#else
+#define MPFDEB(X)
+#endif
+
 //------------------------------------------------------------------------
 void Binc::MimeDocument::parseFull(int fd) const
 {
@@ -347,6 +354,79 @@
   return foundBoundary;
 }
 
+// JFD: Things we do after finding a boundary (something like CRLF--somestring)
+// Need to see if this is a final one (with an additional -- at the end),
+// and need to check if it is immediately followed by another boundary 
+// (in this case, we give up our final CRLF in its favour)
+static inline void postBoundaryProcessing(bool *eof,
+					  unsigned int *nlines,
+					  int *boundarysize,
+					  bool *foundendofpart)
+{
+    // Read two more characters. This may be CRLF, it may be "--" and
+    // it may be any other two characters.
+    char a = '\0';
+    if (!mimeSource->getChar(&a))
+      *eof = true;
+    if (a == '\n')
+      ++*nlines; 
+
+    char b = '\0';
+    if (!mimeSource->getChar(&b))
+      *eof = true;
+    if (b == '\n')
+      ++*nlines;
+    
+    // If eof, we're done here
+    if (*eof)
+      return;
+
+    // If we find two dashes after the boundary, then this is the end
+    // of boundary marker, and we need to get 2 more chars
+    if (a == '-' && b == '-') {
+      *foundendofpart = true;
+      *boundarysize += 2;
+	
+      if (!mimeSource->getChar(&a))
+	*eof = true;
+      if (a == '\n')
+	++*nlines; 
+	
+      if (!mimeSource->getChar(&b))
+	*eof = true;
+      if (b == '\n')
+	++*nlines;
+    }
+
+    // If the boundary is followed by CRLF, we need to handle the
+    // special case where another boundary line follows
+    // immediately. In this case we consider the CRLF to be part of
+    // the NEXT boundary.
+    if (a == '\r' && b == '\n') {
+      // Get 2 more
+      if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b)) {
+	*eof = true; 
+      } else if (a == '-' && b == '-') {
+	MPFDEB((stderr, "BINC: consecutive delimiters, giving up CRLF\n"));
+	mimeSource->ungetChar();
+	mimeSource->ungetChar();
+	mimeSource->ungetChar();
+	mimeSource->ungetChar();
+      } else {
+	// We unget the 2 chars, and keep our crlf (increasing our own size)
+	MPFDEB((stderr, "BINC: keeping my CRLF\n"));
+	mimeSource->ungetChar();
+	mimeSource->ungetChar();
+	*boundarysize += 2;
+      }
+
+    } else {
+      // Boundary string not followed by CRLF, don't read more and let
+      // others skip the rest. Note that this is allowed but quite uncommon
+      mimeSource->ungetChar();
+      mimeSource->ungetChar();
+    }
+}
 
 static void parseMultipart(const string &boundary,
 			   const string &toboundary,
@@ -357,6 +437,9 @@
 			   unsigned int *bodylength,
 			   vector<Binc::MimePart> *members)
 {
+  MPFDEB((stderr, "BINC: ParseMultipart: boundary [%s], toboundary[%s]\n", 
+	  boundary.c_str(),
+	  toboundary.c_str()));
   using namespace ::Binc;
   unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
 
@@ -372,64 +455,7 @@
   if (!eof)
     *boundarysize = delimiter.size();
 
-  // Read two more characters. This may be CRLF, it may be "--" and
-  // it may be any other two characters.
-  char a;
-  if (!mimeSource->getChar(&a))
-    *eof = true;
-
-  if (a == '\n')
-    ++*nlines; 
-
-  char b;
-  if (!mimeSource->getChar(&b))
-    *eof = true;
-    
-  if (b == '\n')
-    ++*nlines;
-    
-  // If we find two dashes after the boundary, then this is the end
-  // of boundary marker.
-  if (!*eof) {
-    if (a == '-' && b == '-') {
-      *foundendofpart = true;
-      *boundarysize += 2;
-	
-      if (!mimeSource->getChar(&a))
-	*eof = true;
-	
-      if (a == '\n')
-	++*nlines; 
-	
-      if (!mimeSource->getChar(&b))
-	*eof = true;
-	
-      if (b == '\n')
-	++*nlines;
-    }
-
-    if (a == '\r' && b == '\n') {
-      // This exception is to handle a special case where the
-      // delimiter of one part is not followed by CRLF, but
-      // immediately followed by a CRLF prefixed delimiter.
-      if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
-	*eof = true; 
-      else if (a == '-' && b == '-') {
-	mimeSource->ungetChar();
-	mimeSource->ungetChar();
-	mimeSource->ungetChar();
-	mimeSource->ungetChar();
-      } else {
-	mimeSource->ungetChar();
-	mimeSource->ungetChar();
-      }
-
-      *boundarysize += 2;
-    } else {
-      mimeSource->ungetChar();
-      mimeSource->ungetChar();
-    }
-  }
+  postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
 
   // read all mime parts.
   if (!*foundendofpart && !*eof) {
@@ -457,70 +483,12 @@
     // boundary of this multipart. Note that the first boundary does
     // not have to start with CRLF.
     string delimiter = "\r\n--" + toboundary;
-
     skipUntilBoundary(delimiter, nlines, eof);
 
     if (!*eof)
       *boundarysize = delimiter.size();
 
-    // Read two more characters. This may be CRLF, it may be "--" and
-    // it may be any other two characters.
-    char a = '\0';
-    if (!mimeSource->getChar(&a))
-      *eof = true;
-
-    if (a == '\n')
-      ++*nlines; 
-
-    char b = '\0';
-    if (!mimeSource->getChar(&b))
-      *eof = true;
-    
-    if (b == '\n')
-      ++*nlines;
-    
-    // If we find two dashes after the boundary, then this is the end
-    // of boundary marker.
-    if (!*eof) {
-      if (a == '-' && b == '-') {
-	*foundendofpart = true;
-	*boundarysize += 2;
-	
-	if (!mimeSource->getChar(&a))
-	  *eof = true;
-	
-	if (a == '\n')
-	  ++*nlines; 
-	
-	if (!mimeSource->getChar(&b))
-	  *eof = true;
-	
-	if (b == '\n')
-	  ++*nlines;
-      }
-
-      if (a == '\r' && b == '\n') {
-	// This exception is to handle a special case where the
-	// delimiter of one part is not followed by CRLF, but
-	// immediately followed by a CRLF prefixed delimiter.
-	if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
-	  *eof = true; 
-	else if (a == '-' && b == '-') {
-	  mimeSource->ungetChar();
-	  mimeSource->ungetChar();
-	  mimeSource->ungetChar();
-	  mimeSource->ungetChar();
-	} else {
-	  mimeSource->ungetChar();
-	  mimeSource->ungetChar();
-	}
-
-	*boundarysize += 2;
-      } else {
-	mimeSource->ungetChar();
-	mimeSource->ungetChar();
-      }
-    }
+    postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
   }
 
   // make sure bodylength doesn't overflow    
@@ -535,6 +503,7 @@
   } else {
     *bodylength = 0;
   }
+  MPFDEB((stderr, "BINC: ParseMultipart return\n"));
 }
 
 static void parseSinglePart(const string &toboundary,
@@ -544,6 +513,8 @@
 			    bool *eof, bool *foundendofpart,
 			    unsigned int *bodylength)
 {
+  MPFDEB((stderr, "BINC: parseSinglePart, boundary [%s]\n", 
+	  toboundary.c_str()));
   using namespace ::Binc;
   unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
 
@@ -591,59 +562,11 @@
   delete [] boundaryqueue;
 
   if (toboundary != "") {
-    char a;
-    if (!mimeSource->getChar(&a))
-      *eof = true;
-
-    if (a == '\n')
-      ++*nlines;
-    char b;
-    if (!mimeSource->getChar(&b))
-      *eof = true;
-
-    if (b == '\n') 
-      ++*nlines;
-
-    if (a == '-' && b == '-') {
-      *boundarysize += 2;
-      *foundendofpart = true;
-      if (!mimeSource->getChar(&a))
-	*eof = true;
-
-      if (a == '\n')
-	++*nlines;
-
-      if (!mimeSource->getChar(&b))
-	*eof = true;
-	  
-      if (b == '\n')
-	++*nlines;
-    }
-
-    if (a == '\r' && b == '\n') {
-      // This exception is to handle a special case where the
-      // delimiter of one part is not followed by CRLF, but
-      // immediately followed by a CRLF prefixed delimiter.
-      if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
-	*eof = true; 
-      else if (a == '-' && b == '-') {
-	mimeSource->ungetChar();
-	mimeSource->ungetChar();
-	mimeSource->ungetChar();
-	mimeSource->ungetChar();
-      } else {
-	mimeSource->ungetChar();
-	mimeSource->ungetChar();
-      }
-
-      *boundarysize += 2;
-    } else {
-      mimeSource->ungetChar();
-      mimeSource->ungetChar();
-    }
+    postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
   } else {
-    // Recoll: in the case of a null boundary (probably illegal but wtf), eof
-    // was not set and multipart went into a loop until bad alloc.
+    // Recoll: in the case of a multipart body with a null
+    // boundary (probably illegal but wtf), eof was not set and
+    // multipart went into a loop until bad alloc.
     *eof = true;
   }
 
@@ -659,13 +582,15 @@
   } else {
     *bodylength = 0;
   }
-
+  MPFDEB((stderr, "BINC: parseSimple ret: bodylength %d, boundarysize %d\n",
+	  *bodylength, *boundarysize));
 }
 
 //------------------------------------------------------------------------
 int Binc::MimePart::parseFull(const string &toboundary,
 			      int &boundarysize) const
 {
+  MPFDEB((stderr, "BINC: parsefull, toboundary[%s]\n", toboundary.c_str()));
   headerstartoffsetcrlf = mimeSource->getOffset();
 
   // Parse the header of this mime part.
@@ -697,5 +622,6 @@
 		    &eof, &foundendofpart, &bodylength);
   }
 
+  MPFDEB((stderr, "BINC: parsefull ret, toboundary[%s]\n", toboundary.c_str()));
   return (eof || foundendofpart) ? 1 : 0;
 }