recoll / Code / Diff of /src/qtgui/plaintorich.cpp

Diff of /src/qtgui/plaintorich.cpp [7b421d] .. [5856df]

Switch to unified view


#ifndef lint
static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.31 2008-07-01 08:27:58 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
...
#include "utf8iter.h"
#include "smallut.h"
#include "plaintorich.h"
#include "cancelcheck.h"

const string PlainToRich::snull = "";

// For debug printing
static string vecStringToString(const vector<string>& t)
{
    string sterms;
    for (vector<string>::const_iterator it = t.begin(); it != t.end(); it++) {
...
// Text splitter callback used to take note of the position of query terms 
// inside the result text. This is then used to insert highlight tags. 
class myTextSplitCB : public TextSplitCB {
 public:







    // Out: begin and end byte positions of query terms/groups in text
    vector<pair<int, int> > tboffs;  

    myTextSplitCB(const vector<string>& its, 
          const vector<vector<string> >&groups, 
          const vector<int>& slacks) 
    :  m_wcount(0), m_groups(groups), m_slacks(slacks)
    {
    for (vector<string>::const_iterator it = its.begin(); 
         it != its.end(); it++) {
        m_terms.insert(*it);
    }
...
    // pos, bts, bte));

    // If this word is a search term, remember its byte-offset span. 
    if (m_terms.find(dumb) != m_terms.end()) {
        tboffs.push_back(pair<int, int>(bts, bte));





    }
    
    if (m_gterms.find(dumb) != m_gterms.end()) {
        // Term group (phrase/near) handling
        m_plists[dumb].push_back(pos);
...
};

#define SETMINMAX(POS, STA, STO)  {if ((POS) < (STA)) (STA) = (POS); \
    if ((POS) > (STO)) (STO) = (POS);}

// Recursively check that each term is inside the window (which is
// readjusted as the successive terms are found). i is the index for
// the next position list to use (initially 1)
static bool do_proximity_test(int window, vector<vector<int>* >& plists, 
                unsigned int i, int min, int max, 
                int *sp, int *ep)
{
    int tmp = max + 1;
    // take care to avoid underflow
    if (window <= tmp) 
    tmp -= window; 
...
    // stem-expanded: we don't know which matched)
    for (vector<string>::const_iterator it = terms.begin(); 
     it != terms.end(); it++) {
    map<string, vector<int> >::iterator pl = m_plists.find(*it);
    if (pl == m_plists.end()) {
        LOGDEB0(("myTextSplitCB::matchGroup: [%s] not found in m_plists\n",
            (*it).c_str()));
        continue;
    }
    plists.push_back(&(pl->second));
    plistToTerm[&(pl->second)] = *it;
    realgroup.push_back(*it);
    }
    LOGDEB0(("myTextSplitCB::matchGroup:d %d:real group after expansion %s\n", 
         window, vecStringToString(realgroup).c_str()));
    if (plists.size() < 2) {
  LOGDEB0(("myTextSplitCB::matchGroup: no actual groups found\n"));
    return false;
    }
    // Sort the positions lists so that the shorter is first
    std::sort(plists.begin(), plists.end(), VecIntCmpShorter());

    { // Debug
  map<vector<int>*, string>::iterator it;
  it =  plistToTerm.find(plists[0]);
  if (it == plistToTerm.end()) {
      // SuperWeird
      LOGERR(("matchGroup: term for first list not found !?!\n"));
      return false;
  }
  LOGDEB0(("matchGroup: walking the shortest plist. Term [%s], len %d\n",
      it->second.c_str(), plists[0]->size()));
    }

    // Walk the shortest plist and look for matches




    for (vector<int>::iterator it = plists[0]->begin(); 

     it != plists[0]->end(); it++) {

    int pos = *it;
  int sta = int(10E9), sto = 0;
  LOGDEB0(("MatchGroup: Testing at pos %d\n", pos));
  if (do_proximity_test(window, plists, 1, pos, pos, &sta, &sto)) {
      LOGDEB0(("myTextSplitCB::matchGroup: MATCH termpos [%d,%d]\n", 
           sta, sto)); 
      // Maybe extend the window by 1st term position, this was not
      // done by do_prox..
      SETMINMAX(pos, sta, sto);



      // Translate the position window into a byte offset window
      int bs = 0;
      map<int, pair<int, int> >::iterator i1 =  m_gpostobytes.find(sta);
      map<int, pair<int, int> >::iterator i2 =  m_gpostobytes.find(sto);
      if (i1 != m_gpostobytes.end() && i2 != m_gpostobytes.end()) {
     LOGDEB0(("myTextSplitCB::matchGroup: pushing bpos %d %d\n",
          i1->second.first, i2->second.second));
     tboffs.push_back(pair<int, int>(i1->second.first, 
                      i2->second.second));
     bs = i1->second.first;
      } else {
     LOGDEB(("matchGroup: no bpos found for %d or %d\n", sta, sto));

      }
  }















    }

    return true;
}

...

    // Sort by start and end offsets. The merging of overlapping entries
    // will be handled during output.
    std::sort(tboffs.begin(), tboffs.end(), PairIntCmpFirst());
    return true;














}


// Fix result text for display inside the gui text window.
//
...
// on qtextedit internals, and we don't do it any more, so we finally
// don't know the term par/car positions in the editor text.  
// Instead, we mark the search term positions either with html anchor
// (qt currently has problems with them), or a special string, and the
// caller will use the editor's find() function to position on it
bool PlainToRich::plaintorich(const string& in, list<string>& out, 
                const HiliteData& hdata,
                int chunksize)
{
    Chrono chron;
    const vector<string>& terms(hdata.terms);
    const vector<vector<string> >& groups(hdata.groups);
    const vector<int>& slacks(hdata.gslks);
...
    LOGDEB0(("  %s\n", sterms.c_str()));
    sterms = "\n";
    LOGDEB0(("plaintorich: groups: \n"));
    for (vector<vector<string> >::const_iterator vit = groups.begin(); 
         vit != groups.end(); vit++) {
      sterms += "GROUP: ";
        sterms += vecStringToString(*vit);
        sterms += "\n";
    }
    LOGDEB0(("  %s", sterms.c_str()));
    }
...

    cb.matchGroups();

    out.clear();
    out.push_back("");
    list<string>::iterator olit = out.begin();

    // Rich text output
    *olit = header();




    // Iterator for the list of input term positions. We use it to
    // output highlight tags and to compute term positions in the
    // output text
    vector<pair<int, int> >::iterator tPosIt = cb.tboffs.begin();
...
    // Input character iterator
    Utf8Iter chariter(in);
    // State variable used to limitate the number of consecutive empty lines 
    int ateol = 0;

    // Value for numbered anchors at each term match
    int anchoridx = 1;

    for (string::size_type pos = 0; pos != string::npos; pos = chariter++) {
  // Check from time to time if we need to stop
    if ((pos & 0xfff) == 0) {
        CancelCheck::instance().checkCancel();
    }

    // If we still have terms positions, check (byte) position. If
    // we are at or after a term match, mark.
    if (tPosIt != tboffsend) {
        int ibyteidx = chariter.getBpos();
        if (ibyteidx == tPosIt->first) {
      *olit += startAnchor(anchoridx++);
      *olit += startMatch();

        } else if (ibyteidx == tPosIt->second) {
        // Output end tag, then skip all highlight areas that
        // would overlap this one
      *olit += endMatch();
      *olit += endAnchor();
        int crend = tPosIt->second;
        while (tPosIt != cb.tboffs.end() && tPosIt->first < crend)
            tPosIt++;
      // Maybe end this chunk, begin next
        if (olit->size() > (unsigned int)chunksize) {
            out.push_back("");
            olit++;
        }
        }
    }

    switch(*chariter) {
    case '\n':
        if (ateol < 2) {
        *olit += "<br>\n";
        ateol++;
        }
        break;
    case '\r': 
        break;


    case '<':
        ateol = 0;
        *olit += "&lt;";
        break;
    case '&':
        ateol = 0;
        *olit += "&amp;";
        break;
    default:
        // We don't change the eol status for whitespace, want a real line
        if (!(*chariter == ' ' || *chariter == '\t')) {
        ateol = 0;
        }
        chariter.appendchartostring(*olit);
    }
    }


#if 1
    {
    FILE *fp = fopen("/tmp/debugplaintorich", "a");
    fprintf(fp, "BEGINOFPLAINTORICHOUTPUT\n");
    for (list<string>::iterator it = out.begin();
         it != out.end(); it++) {

	a/src/qtgui/plaintorich.cpp		b/src/qtgui/plaintorich.cpp
1	#ifndef lint	1	#ifndef lint
2	static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.30 2007-11-15 18:05:32 dockes Exp $ (C) 2005 J.F.Dockes";	2	static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.31 2008-07-01 08:27:58 dockes Exp $ (C) 2005 J.F.Dockes";
3	#endif	3	#endif
4	/*	4	/*
5	* This program is free software; you can redistribute it and/or modify	5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by	6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or	7	* the Free Software Foundation; either version 2 of the License, or
	...		...
41	#include "utf8iter.h"	41	#include "utf8iter.h"
42	#include "smallut.h"	42	#include "smallut.h"
43	#include "plaintorich.h"	43	#include "plaintorich.h"
44	#include "cancelcheck.h"	44	#include "cancelcheck.h"
45		45
		46	const string PlainToRich::snull = "";
		47
46	// For debug printing	48	// For debug printing
47	static string vecStringToString(const vector<string>& t)	49	static string vecStringToString(const vector<string>& t)
48	{	50	{
49	string sterms;	51	string sterms;
50	for (vector<string>::const_iterator it = t.begin(); it != t.end(); it++) {	52	for (vector<string>::const_iterator it = t.begin(); it != t.end(); it++) {
	...		...
56	// Text splitter callback used to take note of the position of query terms	58	// Text splitter callback used to take note of the position of query terms
57	// inside the result text. This is then used to insert highlight tags.	59	// inside the result text. This is then used to insert highlight tags.
58	class myTextSplitCB : public TextSplitCB {	60	class myTextSplitCB : public TextSplitCB {
59	public:	61	public:
60		62
61	// Out: first query term found in text
62	string firstTerm;
63	int firstTermOcc;
64	int m_firstTermPos;
65	int m_firstTermBPos;
66
67	// Out: begin and end byte positions of query terms/groups in text	63	// Out: begin and end byte positions of query terms/groups in text
68	vector<pair<int, int> > tboffs;	64	vector<pair<int, int> > tboffs;
69		65
70	myTextSplitCB(const vector<string>& its,	66	myTextSplitCB(const vector<string>& its,
71	const vector<vector<string> >&groups,	67	const vector<vector<string> >&groups,
72	const vector<int>& slacks)	68	const vector<int>& slacks)
73	: firstTermOcc(1), m_wcount(0), m_groups(groups), m_slacks(slacks)	69	: m_wcount(0), m_groups(groups), m_slacks(slacks)
74	{	70	{
75	for (vector<string>::const_iterator it = its.begin();	71	for (vector<string>::const_iterator it = its.begin();
76	it != its.end(); it++) {	72	it != its.end(); it++) {
77	m_terms.insert(*it);	73	m_terms.insert(*it);
78	}	74	}
	...		...
93	// pos, bts, bte));	89	// pos, bts, bte));
94		90
95	// If this word is a search term, remember its byte-offset span.	91	// If this word is a search term, remember its byte-offset span.
96	if (m_terms.find(dumb) != m_terms.end()) {	92	if (m_terms.find(dumb) != m_terms.end()) {
97	tboffs.push_back(pair<int, int>(bts, bte));	93	tboffs.push_back(pair<int, int>(bts, bte));
98	if (firstTerm.empty()) {
99	firstTerm = term;
100	m_firstTermPos = pos;
101	m_firstTermBPos = bts;
102	}
103	}	94	}
104		95
105	if (m_gterms.find(dumb) != m_gterms.end()) {	96	if (m_gterms.find(dumb) != m_gterms.end()) {
106	// Term group (phrase/near) handling	97	// Term group (phrase/near) handling
107	m_plists[dumb].push_back(pos);	98	m_plists[dumb].push_back(pos);
	...		...
146	};	137	};
147		138
148	#define SETMINMAX(POS, STA, STO) {if ((POS) < (STA)) (STA) = (POS); \	139	#define SETMINMAX(POS, STA, STO) {if ((POS) < (STA)) (STA) = (POS); \
149	if ((POS) > (STO)) (STO) = (POS);}	140	if ((POS) > (STO)) (STO) = (POS);}
150		141
151	// Recursively check that each term is inside the window (which is readjusted	142	// Recursively check that each term is inside the window (which is
152	// as the successive terms are found)	143	// readjusted as the successive terms are found). i is the index for
		144	// the next position list to use (initially 1)
153	static bool do_proximity_test(int window, vector<vector<int>* >& plists,	145	static bool do_proximity_test(int window, vector<vector<int>* >& plists,
154	unsigned int i, int min, int max, int sp, int ep)	146	unsigned int i, int min, int max,
		147	int sp, int ep)
155	{	148	{
156	int tmp = max + 1;	149	int tmp = max + 1;
157	// take care to avoid underflow	150	// take care to avoid underflow
158	if (window <= tmp)	151	if (window <= tmp)
159	tmp -= window;	152	tmp -= window;
	...		...
208	// stem-expanded: we don't know which matched)	201	// stem-expanded: we don't know which matched)
209	for (vector<string>::const_iterator it = terms.begin();	202	for (vector<string>::const_iterator it = terms.begin();
210	it != terms.end(); it++) {	203	it != terms.end(); it++) {
211	map<string, vector<int> >::iterator pl = m_plists.find(*it);	204	map<string, vector<int> >::iterator pl = m_plists.find(*it);
212	if (pl == m_plists.end()) {	205	if (pl == m_plists.end()) {
213	LOGDEB1(("myTextSplitCB::matchGroup: [%s] not found in m_plists\n",	206	LOGDEB0(("myTextSplitCB::matchGroup: [%s] not found in m_plists\n",
214	(*it).c_str()));	207	(*it).c_str()));
215	continue;	208	continue;
216	}	209	}
217	plists.push_back(&(pl->second));	210	plists.push_back(&(pl->second));
218	plistToTerm[&(pl->second)] = *it;	211	plistToTerm[&(pl->second)] = *it;
219	realgroup.push_back(*it);	212	realgroup.push_back(*it);
220	}	213	}
221	LOGDEB0(("myTextSplitCB::matchGroup:d %d:real group %s\n", window,	214	LOGDEB0(("myTextSplitCB::matchGroup:d %d:real group after expansion %s\n",
222	vecStringToString(realgroup).c_str()));	215	window, vecStringToString(realgroup).c_str()));
223	if (plists.size() < 2)	216	if (plists.size() < 2) {
		217	LOGDEB0(("myTextSplitCB::matchGroup: no actual groups found\n"));
224	return false;	218	return false;
		219	}
225	// Sort the positions lists so that the shorter is first	220	// Sort the positions lists so that the shorter is first
226	std::sort(plists.begin(), plists.end(), VecIntCmpShorter());	221	std::sort(plists.begin(), plists.end(), VecIntCmpShorter());
227		222
		223	{ // Debug
		224	map<vector<int>*, string>::iterator it;
		225	it = plistToTerm.find(plists[0]);
		226	if (it == plistToTerm.end()) {
		227	// SuperWeird
		228	LOGERR(("matchGroup: term for first list not found !?!\n"));
		229	return false;
		230	}
		231	LOGDEB0(("matchGroup: walking the shortest plist. Term [%s], len %d\n",
		232	it->second.c_str(), plists[0]->size()));
		233	}
		234
228	// Walk the shortest plist and look for matches	235	// Walk the shortest plist and look for matches
229	int sta = int(10E9), sto = 0;
230	int pos;
231	// Occurrences are from 1->N
232	firstTermOcc = 0;
233	vector<int>::iterator it = plists[0]->begin();	236	for (vector<int>::iterator it = plists[0]->begin();
234	do {
235	if (it == plists[0]->end())	237	it != plists[0]->end(); it++) {
236	return false;
237	pos = *it++;	238	int pos = *it;
238	firstTermOcc++;	239	int sta = int(10E9), sto = 0;
		240	LOGDEB0(("MatchGroup: Testing at pos %d\n", pos));
239	} while (!do_proximity_test(window, plists, 1, pos, pos, &sta, &sto));	241	if (do_proximity_test(window, plists, 1, pos, pos, &sta, &sto)) {
		242	LOGDEB0(("myTextSplitCB::matchGroup: MATCH termpos [%d,%d]\n",
		243	sta, sto));
		244	// Maybe extend the window by 1st term position, this was not
		245	// done by do_prox..
240	SETMINMAX(pos, sta, sto);	246	SETMINMAX(pos, sta, sto);
241
242	LOGDEB0(("myTextSplitCB::matchGroup: MATCH [%d,%d]\n", sta, sto));
243
244	// Translate the position window into a byte offset window	247	// Translate the position window into a byte offset window
245	int bs = 0;	248	int bs = 0;
246	map<int, pair<int, int> >::iterator i1 = m_gpostobytes.find(sta);	249	map<int, pair<int, int> >::iterator i1 = m_gpostobytes.find(sta);
247	map<int, pair<int, int> >::iterator i2 = m_gpostobytes.find(sto);	250	map<int, pair<int, int> >::iterator i2 = m_gpostobytes.find(sto);
248	if (i1 != m_gpostobytes.end() && i2 != m_gpostobytes.end()) {	251	if (i1 != m_gpostobytes.end() && i2 != m_gpostobytes.end()) {
249	LOGDEB1(("myTextSplitCB::matchGroup: pushing %d %d\n",	252	LOGDEB0(("myTextSplitCB::matchGroup: pushing bpos %d %d\n",
250	i1->second.first, i2->second.second));	253	i1->second.first, i2->second.second));
251	tboffs.push_back(pair<int, int>(i1->second.first, i2->second.second));	254	tboffs.push_back(pair<int, int>(i1->second.first,
		255	i2->second.second));
252	bs = i1->second.first;	256	bs = i1->second.first;
253	} else {	257	} else {
254	LOGDEB(("myTextSplitCB::matchGroup: no bpos found for %d or %d\n",	258	LOGDEB(("matchGroup: no bpos found for %d or %d\n", sta, sto));
255	sta, sto));
256	}	259	}
257		260	}
258	if (firstTerm.empty() \|\| m_firstTermPos > sta) {
259	// firsTerm is used to try an position the preview window over
260	// the match. As it's difficult to divine byte/word positions
261	// in qtextedit, we use a string search. Use the
262	// shortest plist for this, which hopefully gives a better
263	// chance for the group to be found (it's hopeless to try and
264	// match the whole group)
265	map<vector<int>*, string>::iterator it =
266	plistToTerm.find(plists.front());
267	if (it != plistToTerm.end())
268	firstTerm = it->second;
269	LOGDEB0(("myTextSplitCB:: best group term %s, firstTermOcc %d\n",
270	firstTerm.c_str(), firstTermOcc));
271	m_firstTermPos = sta;
272	m_firstTermBPos = bs;
273	}	261	}
274		262
275	return true;	263	return true;
276	}	264	}
277		265
	...		...
296		284
297	// Sort by start and end offsets. The merging of overlapping entries	285	// Sort by start and end offsets. The merging of overlapping entries
298	// will be handled during output.	286	// will be handled during output.
299	std::sort(tboffs.begin(), tboffs.end(), PairIntCmpFirst());	287	std::sort(tboffs.begin(), tboffs.end(), PairIntCmpFirst());
300	return true;	288	return true;
301	}
302
303	// Setting searchable beacons in the text to walk the term list.
304	static const char *termAnchorNameBase = "TRM";
305	string termAnchorName(int i)
306	{
307	char acname[sizeof(termAnchorNameBase) + 20];
308	sprintf(acname, "%s%d", termAnchorNameBase, i);
309	return string(acname);
310	}
311
312	static string termBeacon(int i)
313	{
314	return string("<a name=\"") + termAnchorName(i) + "\">";
315	}	289	}
316		290
317		291
318	// Fix result text for display inside the gui text window.	292	// Fix result text for display inside the gui text window.
319	//	293	//
	...		...
323	// on qtextedit internals, and we don't do it any more, so we finally	297	// on qtextedit internals, and we don't do it any more, so we finally
324	// don't know the term par/car positions in the editor text.	298	// don't know the term par/car positions in the editor text.
325	// Instead, we mark the search term positions either with html anchor	299	// Instead, we mark the search term positions either with html anchor
326	// (qt currently has problems with them), or a special string, and the	300	// (qt currently has problems with them), or a special string, and the
327	// caller will use the editor's find() function to position on it	301	// caller will use the editor's find() function to position on it
328	bool plaintorich(const string& in, list<string>& out,	302	bool PlainToRich::plaintorich(const string& in, list<string>& out,
329	const HiliteData& hdata,	303	const HiliteData& hdata,
330	bool noHeader, int *lastAnchor, int chunksize)	304	int chunksize)
331	{	305	{
332	Chrono chron;	306	Chrono chron;
333	const vector<string>& terms(hdata.terms);	307	const vector<string>& terms(hdata.terms);
334	const vector<vector<string> >& groups(hdata.groups);	308	const vector<vector<string> >& groups(hdata.groups);
335	const vector<int>& slacks(hdata.gslks);	309	const vector<int>& slacks(hdata.gslks);
	...		...
340	LOGDEB0((" %s\n", sterms.c_str()));	314	LOGDEB0((" %s\n", sterms.c_str()));
341	sterms = "\n";	315	sterms = "\n";
342	LOGDEB0(("plaintorich: groups: \n"));	316	LOGDEB0(("plaintorich: groups: \n"));
343	for (vector<vector<string> >::const_iterator vit = groups.begin();	317	for (vector<vector<string> >::const_iterator vit = groups.begin();
344	vit != groups.end(); vit++) {	318	vit != groups.end(); vit++) {
		319	sterms += "GROUP: ";
345	sterms += vecStringToString(*vit);	320	sterms += vecStringToString(*vit);
346	sterms += "\n";	321	sterms += "\n";
347	}	322	}
348	LOGDEB0((" %s", sterms.c_str()));	323	LOGDEB0((" %s", sterms.c_str()));
349	}	324	}
	...		...
360		335
361	cb.matchGroups();	336	cb.matchGroups();
362		337
363	out.clear();	338	out.clear();
364	out.push_back("");	339	out.push_back("");
365	list<string>::iterator sit = out.begin();	340	list<string>::iterator olit = out.begin();
366		341
367	// Rich text output	342	// Rich text output
368	if (noHeader)	343	*olit = header();
369	*sit = "";
370	else
371	*sit = "<qt><head><title></title></head><body><p>";
372		344
373	// Iterator for the list of input term positions. We use it to	345	// Iterator for the list of input term positions. We use it to
374	// output highlight tags and to compute term positions in the	346	// output highlight tags and to compute term positions in the
375	// output text	347	// output text
376	vector<pair<int, int> >::iterator tPosIt = cb.tboffs.begin();	348	vector<pair<int, int> >::iterator tPosIt = cb.tboffs.begin();
	...		...
386	// Input character iterator	358	// Input character iterator
387	Utf8Iter chariter(in);	359	Utf8Iter chariter(in);
388	// State variable used to limitate the number of consecutive empty lines	360	// State variable used to limitate the number of consecutive empty lines
389	int ateol = 0;	361	int ateol = 0;
390		362
391	// Stuff for numbered anchors at each term match	363	// Value for numbered anchors at each term match
392	int anchoridx = 1;	364	int anchoridx = 1;
393		365
394	for (string::size_type pos = 0; pos != string::npos; pos = chariter++) {	366	for (string::size_type pos = 0; pos != string::npos; pos = chariter++) {
		367	// Check from time to time if we need to stop
395	if ((pos & 0xfff) == 0) {	368	if ((pos & 0xfff) == 0) {
396	CancelCheck::instance().checkCancel();	369	CancelCheck::instance().checkCancel();
397	}	370	}
398		371
399	// If we still have terms positions, check (byte) position. If	372	// If we still have terms positions, check (byte) position. If
400	// we are at or after a term match, mark.	373	// we are at or after a term match, mark.
401	if (tPosIt != tboffsend) {	374	if (tPosIt != tboffsend) {
402	int ibyteidx = chariter.getBpos();	375	int ibyteidx = chariter.getBpos();
403	if (ibyteidx == tPosIt->first) {	376	if (ibyteidx == tPosIt->first) {
404	if (lastAnchor)	377	*olit += startAnchor(anchoridx++);
405	*sit += termBeacon(anchoridx++);	378	*olit += startMatch();
406	*sit += "<termtag>";
407	} else if (ibyteidx == tPosIt->second) {	379	} else if (ibyteidx == tPosIt->second) {
408	// Output end tag, then skip all highlight areas that	380	// Output end tag, then skip all highlight areas that
409	// would overlap this one	381	// would overlap this one
410	*sit += "</termtag>";	382	*olit += endMatch();
		383	*olit += endAnchor();
411	int crend = tPosIt->second;	384	int crend = tPosIt->second;
412	while (tPosIt != cb.tboffs.end() && tPosIt->first < crend)	385	while (tPosIt != cb.tboffs.end() && tPosIt->first < crend)
413	tPosIt++;	386	tPosIt++;
414	// Maybe end chunk	387	// Maybe end this chunk, begin next
415	if (sit->size() > (unsigned int)chunksize) {	388	if (olit->size() > (unsigned int)chunksize) {
416	out.push_back("");	389	out.push_back("");
417	sit++;	390	olit++;
418	}	391	}
419	}	392	}
420	}	393	}
421		394
422	switch(*chariter) {	395	switch(*chariter) {
423	case '\n':	396	case '\n':
424	if (ateol < 2) {	397	if (ateol < 2) {
425	*sit += "<br>\n";	398	*olit += "<br>\n";
426	ateol++;	399	ateol++;
427	}	400	}
428	break;	401	break;
429	case '\r':	402	case '\r':
430	break;	403	break;
431	case '\007': // used as anchor char, strip other instances
432	break;
433	case '<':	404	case '<':
434	ateol = 0;	405	ateol = 0;
435	*sit += "<";	406	*olit += "<";
436	break;	407	break;
437	case '&':	408	case '&':
438	ateol = 0;	409	ateol = 0;
439	*sit += "&";	410	*olit += "&";
440	break;	411	break;
441	default:	412	default:
442	// We don't change the eol status for whitespace, want a real line	413	// We don't change the eol status for whitespace, want a real line
443	if (!(chariter == ' ' \|\| chariter == '\t')) {	414	if (!(chariter == ' ' \|\| chariter == '\t')) {
444	ateol = 0;	415	ateol = 0;
445	}	416	}
446	chariter.appendchartostring(*sit);	417	chariter.appendchartostring(*olit);
447	}	418	}
448	}	419	}
449	if (lastAnchor)
450	*lastAnchor = anchoridx - 1;
451	#if 0	420	#if 1
452	{	421	{
453	FILE *fp = fopen("/tmp/debugplaintorich", "a");	422	FILE *fp = fopen("/tmp/debugplaintorich", "a");
454	fprintf(fp, "BEGINOFPLAINTORICHOUTPUT\n");	423	fprintf(fp, "BEGINOFPLAINTORICHOUTPUT\n");
455	for (list<string>::iterator it = out.begin();	424	for (list<string>::iterator it = out.begin();
456	it != out.end(); it++) {	425	it != out.end(); it++) {