--- a/src/common/textsplit.h
+++ b/src/common/textsplit.h
@@ -16,7 +16,7 @@
  */
 #ifndef _TEXTSPLIT_H_INCLUDED_
 #define _TEXTSPLIT_H_INCLUDED_
-/* @(#$Id: textsplit.h,v 1.17 2007-09-18 20:35:31 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: textsplit.h,v 1.18 2007-09-20 08:45:05 dockes Exp $  (C) 2004 J.F.Dockes */
 
 #include <string>
 #ifndef NO_NAMESPACES
@@ -36,6 +36,8 @@
 			  ) = 0; 
 };
 
+class Utf8Iter;
+
 /** 
  * Split text into words. 
  * See comments at top of .cpp for more explanations.
@@ -47,14 +49,19 @@
     enum Flags {TXTS_NONE = 0, 
 		TXTS_ONLYSPANS = 1,  // Only return maximum spans (a@b.com) 
 		TXTS_NOSPANS = 2,  // Only return atomic words (a, b, com)
-		TXTS_KEEPWILD = 4 // Handle wildcards as letters
+		TXTS_KEEPWILD = 4, // Handle wildcards as letters
+		TXTS_NOCJK = 8     // CJK special processing
     };
 
     /**
      * Constructor: just store callback object
      */
-    TextSplit(TextSplitCB *t, Flags flags = TXTS_NONE) 
-	: m_flags(flags), m_cb(t), m_maxWordLength(40), m_prevpos(-1) {}
+    TextSplit(TextSplitCB *t, Flags flags = Flags(TXTS_NONE))
+	: m_flags(flags), m_cb(t), m_maxWordLength(40), 
+	  m_nocjk((m_flags & TXTS_NOCJK) != 0),
+	  m_prevpos(-1)
+    {
+    }
 
     /**
      * Split text, emit words and positions.
@@ -69,11 +76,13 @@
     Flags         m_flags;
     TextSplitCB  *m_cb;
     int           m_maxWordLength;
+    int           m_nocjk;
 
     // Current span. Might be jf.dockes@wanadoo.f
     string        m_span; 
 
-    // Current word: no punctuation at all in there
+    // Current word: no punctuation at all in there. Byte offset
+    // relative to the current span and byte length
     int           m_wordStart;
     unsigned int  m_wordLen;
 
@@ -90,7 +99,7 @@
     unsigned int  m_prevlen;
 
     // This processes cjk text:
-    // bool cjk_to_words();
+    bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
 
     bool emitterm(bool isspan, string &term, int pos, int bs, int be);
     bool doemit(bool spanerase, int bp);