|
a/src/common/textsplit.h |
|
b/src/common/textsplit.h |
|
... |
|
... |
18 |
#define _TEXTSPLIT_H_INCLUDED_
|
18 |
#define _TEXTSPLIT_H_INCLUDED_
|
19 |
|
19 |
|
20 |
#include <string>
|
20 |
#include <string>
|
21 |
#include <vector>
|
21 |
#include <vector>
|
22 |
|
22 |
|
23 |
#ifndef NO_NAMESPACES
|
|
|
24 |
using std::string;
|
23 |
using std::string;
|
25 |
using std::vector;
|
24 |
using std::vector;
|
26 |
#endif
|
|
|
27 |
|
25 |
|
28 |
class Utf8Iter;
|
26 |
class Utf8Iter;
|
29 |
|
27 |
|
30 |
/**
|
28 |
/**
|
31 |
* Split text into words.
|
29 |
* Split text into words.
|
|
... |
|
... |
76 |
int pos, // term pos
|
74 |
int pos, // term pos
|
77 |
int bts, // byte offset of first char in term
|
75 |
int bts, // byte offset of first char in term
|
78 |
int bte // byte offset of first char after term
|
76 |
int bte // byte offset of first char after term
|
79 |
) = 0;
|
77 |
) = 0;
|
80 |
|
78 |
|
|
|
79 |
/** Called when we encounter formfeed \f 0x0c. Override to use the event.
|
|
|
80 |
* Mostly or exclusively used with pdftoxx output. Other filters mostly
|
|
|
81 |
* just don't know about pages. */
|
|
|
82 |
virtual void newpage(int /*pos*/)
|
|
|
83 |
{
|
|
|
84 |
}
|
81 |
|
85 |
|
82 |
// Static utility functions:
|
86 |
// Static utility functions:
|
83 |
|
87 |
|
84 |
/** Count words in string, as the splitter would generate them */
|
88 |
/** Count words in string, as the splitter would generate them */
|
85 |
static int countWords(const string &in, Flags flgs = TXTS_ONLYSPANS);
|
89 |
static int countWords(const string &in, Flags flgs = TXTS_ONLYSPANS);
|