Switch to unified view

a/src/common/textsplit.h b/src/common/textsplit.h
...
...
14
 *   Free Software Foundation, Inc.,
14
 *   Free Software Foundation, Inc.,
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
16
 */
16
 */
17
#ifndef _TEXTSPLIT_H_INCLUDED_
17
#ifndef _TEXTSPLIT_H_INCLUDED_
18
#define _TEXTSPLIT_H_INCLUDED_
18
#define _TEXTSPLIT_H_INCLUDED_
19
/* @(#$Id: textsplit.h,v 1.20 2007-10-04 12:21:52 dockes Exp $  (C) 2004 J.F.Dockes */
19
/* @(#$Id: textsplit.h,v 1.21 2008-12-05 11:09:31 dockes Exp $  (C) 2004 J.F.Dockes */
20
20
21
#include <string>
21
#include <string>
22
#include <list>
22
#ifndef NO_NAMESPACES
23
#ifndef NO_NAMESPACES
23
using std::string;
24
using std::string;
25
using std::list;
24
#endif
26
#endif
25
27
26
/**
28
/**
27
 * Function class whose takeword method is called for every detected word while * splitting text.
29
 * Function class whose takeword method is called for every detected word while * splitting text.
28
 */
30
 */
...
...
72
    : m_flags(flags), m_cb(t), m_maxWordLength(40), 
74
    : m_flags(flags), m_cb(t), m_maxWordLength(40), 
73
      m_prevpos(-1)
75
      m_prevpos(-1)
74
    {
76
    {
75
    }
77
    }
76
78
77
    /**
78
     * Split text, emit words and positions.
79
    /** Split text, emit words and positions. */
79
     */
80
    bool text_to_words(const string &in);
80
    bool text_to_words(const string &in);
81
81
82
    // Utility functions : these does not need the user to setup a callback 
82
    //Utility functions : these does not need the user to setup a callback 
83
    // etc.
83
    // etc.
84
85
    /** Count words in string, as the splitter would generate them */
84
    static int countWords(const string &in, Flags flgs = TXTS_ONLYSPANS);
86
    static int countWords(const string &in, Flags flgs = TXTS_ONLYSPANS);
85
87
88
    /** Check if this is visibly not a single block of text */
89
    static bool hasVisibleWhite(const string &in);
90
91
    /** Split text span into strings, at white space, allowing for substrings
92
     * quoted with " . Escaping with \ works as usual inside the quoted areas.
93
     * This has to be kept separate from smallut.cpp's stringsToStrings, which
94
     * basically works only if whitespace is ascii, and which processes 
95
     * non-utf-8 input (iso-8859 config files work ok). This hopefully
96
     * handles all Unicode whitespace, but needs correct utf-8 input
97
     */
98
    static bool stringToStrings(const string &s, list<string> &tokens);
99
    
86
private:
100
private:
87
    Flags         m_flags;
101
    Flags         m_flags;
88
    TextSplitCB  *m_cb;
102
    TextSplitCB  *m_cb;
89
    int           m_maxWordLength;
103
    int           m_maxWordLength;
90
104