recoll / Code / Diff of /src/rcldb/termproc.h

Diff of /src/rcldb/termproc.h [b9e672] .. [8f77b9]

Switch to unified view


...
    virtual bool takeword(const string& itrm, int pos, int bs, int be)
    {
        m_totalterms++;
        string otrm;
        if (!unacmaybefold(itrm, otrm, "UTF-8", UNACOP_UNACFOLD)) {
            LOGDEB("splitter::takeword: unac ["  << (itrm) << "] failed\n" );
            m_unacerrors++;
            // We don't generate a fatal error because of a bad term,
            // but one has to put the limit somewhere
            if (m_unacerrors > 500 &&
                    (double(m_totalterms) / double(m_unacerrors)) < 2.0) {
                // More than 1 error for every other term
                LOGERR("splitter::takeword: too many unac errors "  << (m_unacerrors) << "/"  << (m_totalterms) << "\n" );

                return false;
            }
            return true;
        }

...
    {
    }

    virtual bool takeword(const string& term, int pos, int bs, int be)
    {
        LOGDEB1("TermProcCom::takeword: pos "  << (pos) << " "  << (bs) << " "  << (be) << " ["  << (term) << "]\n" );

        bool isstop = m_stops.isStop(term);
        bool twogramemit = false;

        if (!m_prevterm.empty() && (m_prevstop || isstop)) {
            // create 2-gram. space unnecessary but improves
...


} // End namespace Rcl

#endif /* _TERMPROC_H_INCLUDED_ */


	a/src/rcldb/termproc.h		b/src/rcldb/termproc.h
	...		...
127	virtual bool takeword(const string& itrm, int pos, int bs, int be)	127	virtual bool takeword(const string& itrm, int pos, int bs, int be)
128	{	128	{
129	m_totalterms++;	129	m_totalterms++;
130	string otrm;	130	string otrm;
131	if (!unacmaybefold(itrm, otrm, "UTF-8", UNACOP_UNACFOLD)) {	131	if (!unacmaybefold(itrm, otrm, "UTF-8", UNACOP_UNACFOLD)) {
132	LOGDEB(("splitter::takeword: unac [%s] failed\n", itrm.c_str()));	132	LOGDEB("splitter::takeword: unac [" << (itrm) << "] failed\n" );
133	m_unacerrors++;	133	m_unacerrors++;
134	// We don't generate a fatal error because of a bad term,	134	// We don't generate a fatal error because of a bad term,
135	// but one has to put the limit somewhere	135	// but one has to put the limit somewhere
136	if (m_unacerrors > 500 &&	136	if (m_unacerrors > 500 &&
137	(double(m_totalterms) / double(m_unacerrors)) < 2.0) {	137	(double(m_totalterms) / double(m_unacerrors)) < 2.0) {
138	// More than 1 error for every other term	138	// More than 1 error for every other term
139	LOGERR(("splitter::takeword: too many unac errors %d/%d\n",	139	LOGERR("splitter::takeword: too many unac errors " << (m_unacerrors) << "/" << (m_totalterms) << "\n" );
140	m_unacerrors, m_totalterms));
141	return false;	140	return false;
142	}	141	}
143	return true;	142	return true;
144	}	143	}
145		144
	...		...
225	{	224	{
226	}	225	}
227		226
228	virtual bool takeword(const string& term, int pos, int bs, int be)	227	virtual bool takeword(const string& term, int pos, int bs, int be)
229	{	228	{
230	LOGDEB1(("TermProcCom::takeword: pos %d %d %d [%s]\n",	229	LOGDEB1("TermProcCom::takeword: pos " << (pos) << " " << (bs) << " " << (be) << " [" << (term) << "]\n" );
231	pos, bs, be, term.c_str()));
232	bool isstop = m_stops.isStop(term);	230	bool isstop = m_stops.isStop(term);
233	bool twogramemit = false;	231	bool twogramemit = false;
234		232
235	if (!m_prevterm.empty() && (m_prevstop \|\| isstop)) {	233	if (!m_prevterm.empty() && (m_prevstop \|\| isstop)) {
236	// create 2-gram. space unnecessary but improves	234	// create 2-gram. space unnecessary but improves
	...		...
298		296
299		297
300	} // End namespace Rcl	298	} // End namespace Rcl
301		299
302	#endif /* _TERMPROC_H_INCLUDED_ */	300	#endif /* _TERMPROC_H_INCLUDED_ */
		301