Switch to side-by-side view

--- a/src/internfile/mh_text.cpp
+++ b/src/internfile/mh_text.cpp
@@ -33,6 +33,7 @@
 #include "readfile.h"
 #include "md5.h"
 #include "rclconfig.h"
+#include "pxattr.h"
 
 const int MB = 1024*1024;
 const int KB = 1024;
@@ -52,6 +53,10 @@
                 m_fn.c_str(), errno));
         return false;
     }
+
+    // Check for charset defined in extended attribute as per:
+    // http://freedesktop.org/wiki/CommonExtendedAttributes
+    pxattr::get(m_fn, "charset", &m_charsetfromxattr);
 
     // Max file size parameter: texts over this size are not indexed
     int maxmbs = 20;
@@ -115,14 +120,18 @@
     if (m_havedoc == false)
 	return false;
 
-    // We transcode even if defcharset is supposedly already utf-8:
-    // this validates the encoding.
-    m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
+    if (m_charsetfromxattr.empty())
+	m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
+    else 
+	m_metaData[cstr_dj_keyorigcharset] = m_charsetfromxattr;
+
     m_metaData[cstr_dj_keymt] = cstr_textplain;
 
     size_t srclen = m_text.length();
     m_metaData[cstr_dj_keycontent].swap(m_text);
 
+    // We transcode even if defcharset is supposedly already utf-8:
+    // this validates the encoding.
     // txtdcode() truncates the text if transcoding fails
     (void)txtdcode("mh_text");