|
a/src/common/textsplit.cpp |
|
b/src/common/textsplit.cpp |
|
... |
|
... |
1007 |
#define OPT_k 0x10
|
1007 |
#define OPT_k 0x10
|
1008 |
#define OPT_C 0x20
|
1008 |
#define OPT_C 0x20
|
1009 |
#define OPT_n 0x40
|
1009 |
#define OPT_n 0x40
|
1010 |
#define OPT_S 0x80
|
1010 |
#define OPT_S 0x80
|
1011 |
#define OPT_u 0x100
|
1011 |
#define OPT_u 0x100
|
|
|
1012 |
#define OPT_p 0x200
|
1012 |
|
1013 |
|
1013 |
bool dosplit(const string& data, TextSplit::Flags flags, int op_flags)
|
1014 |
bool dosplit(const string& data, TextSplit::Flags flags, int op_flags)
|
1014 |
{
|
1015 |
{
|
1015 |
myTermProc printproc;
|
1016 |
myTermProc printproc;
|
1016 |
|
1017 |
|
|
... |
|
... |
1083 |
" -k : preserve wildcards (?*)\n"
|
1084 |
" -k : preserve wildcards (?*)\n"
|
1084 |
" -c : just count words\n"
|
1085 |
" -c : just count words\n"
|
1085 |
" -u : use unac\n"
|
1086 |
" -u : use unac\n"
|
1086 |
" -C [charset] : input charset\n"
|
1087 |
" -C [charset] : input charset\n"
|
1087 |
" -S [stopfile] : stopfile to use for commongrams\n"
|
1088 |
" -S [stopfile] : stopfile to use for commongrams\n"
|
1088 |
" if filename is 'stdin', will read stdin for data (end with ^D)\n"
|
1089 |
" if filename is 'stdin', will read stdin for data (end with ^D)\n\n"
|
|
|
1090 |
" textplit -p somephrase : display results from stringToStrings()\n"
|
1089 |
" \n\n"
|
1091 |
" \n"
|
1090 |
;
|
1092 |
;
|
1091 |
|
1093 |
|
1092 |
static void
|
1094 |
static void
|
1093 |
Usage(void)
|
1095 |
Usage(void)
|
1094 |
{
|
1096 |
{
|
|
... |
|
... |
1116 |
case 'C': op_flags |= OPT_C; if (argc < 2) Usage();
|
1118 |
case 'C': op_flags |= OPT_C; if (argc < 2) Usage();
|
1117 |
charset = *(++argv); argc--;
|
1119 |
charset = *(++argv); argc--;
|
1118 |
goto b1;
|
1120 |
goto b1;
|
1119 |
case 'k': op_flags |= OPT_k; break;
|
1121 |
case 'k': op_flags |= OPT_k; break;
|
1120 |
case 'n': op_flags |= OPT_n; break;
|
1122 |
case 'n': op_flags |= OPT_n; break;
|
|
|
1123 |
case 'p': op_flags |= OPT_p; break;
|
1121 |
case 'q': op_flags |= OPT_q; break;
|
1124 |
case 'q': op_flags |= OPT_q; break;
|
1122 |
case 's': op_flags |= OPT_s; break;
|
1125 |
case 's': op_flags |= OPT_s; break;
|
1123 |
case 'S': op_flags |= OPT_S; if (argc < 2) Usage();
|
1126 |
case 'S': op_flags |= OPT_S; if (argc < 2) Usage();
|
1124 |
stopfile = *(++argv); argc--;
|
1127 |
stopfile = *(++argv); argc--;
|
1125 |
goto b1;
|
1128 |
goto b1;
|
|
... |
|
... |
1151 |
}
|
1154 |
}
|
1152 |
}
|
1155 |
}
|
1153 |
string odata, reason;
|
1156 |
string odata, reason;
|
1154 |
if (argc == 1) {
|
1157 |
if (argc == 1) {
|
1155 |
const char *filename = *argv++; argc--;
|
1158 |
const char *filename = *argv++; argc--;
|
|
|
1159 |
if (op_flags& OPT_p) {
|
|
|
1160 |
vector<string> tokens;
|
|
|
1161 |
TextSplit::stringToStrings(filename, tokens);
|
|
|
1162 |
for (vector<string>::const_iterator it = tokens.begin();
|
|
|
1163 |
it != tokens.end(); it++) {
|
|
|
1164 |
cout << "[" << *it << "] ";
|
|
|
1165 |
}
|
|
|
1166 |
cout << endl;
|
|
|
1167 |
exit(0);
|
|
|
1168 |
}
|
1156 |
if (!strcmp(filename, "stdin")) {
|
1169 |
if (!strcmp(filename, "stdin")) {
|
1157 |
char buf[1024];
|
1170 |
char buf[1024];
|
1158 |
int nread;
|
1171 |
int nread;
|
1159 |
while ((nread = read(0, buf, 1024)) > 0) {
|
1172 |
while ((nread = read(0, buf, 1024)) > 0) {
|
1160 |
odata.append(buf, nread);
|
1173 |
odata.append(buf, nread);
|
|
... |
|
... |
1163 |
cerr << "Failed: file_to_string(" << filename << ") failed: "
|
1176 |
cerr << "Failed: file_to_string(" << filename << ") failed: "
|
1164 |
<< reason << endl;
|
1177 |
<< reason << endl;
|
1165 |
exit(1);
|
1178 |
exit(1);
|
1166 |
}
|
1179 |
}
|
1167 |
} else {
|
1180 |
} else {
|
|
|
1181 |
if (op_flags & OPT_p)
|
|
|
1182 |
Usage();
|
1168 |
for (int i = 0; i < teststrings_cnt; i++) {
|
1183 |
for (int i = 0; i < teststrings_cnt; i++) {
|
1169 |
cout << endl << teststrings[i] << endl;
|
1184 |
cout << endl << teststrings[i] << endl;
|
1170 |
dosplit(teststrings[i], flags, op_flags);
|
1185 |
dosplit(teststrings[i], flags, op_flags);
|
1171 |
}
|
1186 |
}
|
1172 |
exit(0);
|
1187 |
exit(0);
|