|
a/src/query/xadump.cpp |
|
b/src/query/xadump.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: xadump.cpp,v 1.19 2008-12-15 14:39:52 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: xadump.cpp,v 1.20 2008-12-18 14:11:01 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
27 |
|
27 |
|
28 |
#ifndef NO_NAMESPACES
|
28 |
#ifndef NO_NAMESPACES
|
29 |
using namespace std;
|
29 |
using namespace std;
|
30 |
#endif /* NO_NAMESPACES */
|
30 |
#endif /* NO_NAMESPACES */
|
31 |
|
31 |
|
|
|
32 |
#include "utf8iter.h"
|
|
|
33 |
|
32 |
#include "xapian.h"
|
34 |
#include "xapian.h"
|
33 |
|
35 |
|
34 |
static string thisprog;
|
36 |
static string thisprog;
|
35 |
|
37 |
|
36 |
static string usage =
|
38 |
static string usage =
|
|
... |
|
... |
40 |
" -i docid -b : 'rebuild' document from term positions\n"
|
42 |
" -i docid -b : 'rebuild' document from term positions\n"
|
41 |
" -t term -E : term existence test\n"
|
43 |
" -t term -E : term existence test\n"
|
42 |
" -t term -F : retrieve term frequency data for given term\n"
|
44 |
" -t term -F : retrieve term frequency data for given term\n"
|
43 |
" -t term -P : retrieve postings for term\n"
|
45 |
" -t term -P : retrieve postings for term\n"
|
44 |
" -i docid -T : term list for doc docid\n"
|
46 |
" -i docid -T : term list for doc docid\n"
|
|
|
47 |
" -x : separate each output char with a space\n"
|
45 |
" -T : list all terms\n"
|
48 |
" -T : list all terms\n"
|
46 |
" -f : precede each term in the list with its occurrence count\n"
|
49 |
" -f : precede each term in the list with its occurrence count\n"
|
47 |
" -n : raw data (no [])\n"
|
50 |
" -n : raw data (no [])\n"
|
48 |
" -s : special mode to dump recoll stem db\n"
|
51 |
" -s : special mode to dump recoll stem db\n"
|
49 |
" -q term [term ...] : perform AND query\n"
|
52 |
" -q term [term ...] : perform AND query\n"
|
|
... |
|
... |
71 |
#define OPT_s 0x400
|
74 |
#define OPT_s 0x400
|
72 |
#define OPT_f 0x800
|
75 |
#define OPT_f 0x800
|
73 |
#define OPT_q 0x1000
|
76 |
#define OPT_q 0x1000
|
74 |
#define OPT_n 0x2000
|
77 |
#define OPT_n 0x2000
|
75 |
#define OPT_X 0x4000
|
78 |
#define OPT_X 0x4000
|
|
|
79 |
#define OPT_x 0x8000
|
|
|
80 |
|
|
|
81 |
// Compute an exploded version of string, inserting a space between each char.
|
|
|
82 |
// (no character combining possible)
|
|
|
83 |
static string detailstring(const string& in)
|
|
|
84 |
{
|
|
|
85 |
if (!(op_flags & OPT_x))
|
|
|
86 |
return in;
|
|
|
87 |
string out;
|
|
|
88 |
Utf8Iter it(in);
|
|
|
89 |
for (; !it.eof(); it++) {
|
|
|
90 |
it.appendchartostring(out);
|
|
|
91 |
out += ' ';
|
|
|
92 |
}
|
|
|
93 |
// Strip last space
|
|
|
94 |
if (!out.empty())
|
|
|
95 |
out.resize(out.size()-1);
|
|
|
96 |
return out;
|
|
|
97 |
}
|
76 |
|
98 |
|
77 |
Xapian::Database *db;
|
99 |
Xapian::Database *db;
|
78 |
|
100 |
|
79 |
static void cleanup()
|
101 |
static void cleanup()
|
80 |
{
|
102 |
{
|
|
... |
|
... |
130 |
case 't': op_flags |= OPT_t; if (argc < 2) Usage();
|
152 |
case 't': op_flags |= OPT_t; if (argc < 2) Usage();
|
131 |
aterm = *(++argv);
|
153 |
aterm = *(++argv);
|
132 |
argc--;
|
154 |
argc--;
|
133 |
goto b1;
|
155 |
goto b1;
|
134 |
case 'X': op_flags |= OPT_X; break;
|
156 |
case 'X': op_flags |= OPT_X; break;
|
|
|
157 |
case 'x': op_flags |= OPT_x; break;
|
135 |
default: Usage(); break;
|
158 |
default: Usage(); break;
|
136 |
}
|
159 |
}
|
137 |
b1: argc--; argv++;
|
160 |
b1: argc--; argv++;
|
138 |
}
|
161 |
}
|
139 |
|
162 |
|
|
... |
|
... |
172 |
string op = (op_flags & OPT_n) ? string(): "[";
|
195 |
string op = (op_flags & OPT_n) ? string(): "[";
|
173 |
string cl = (op_flags & OPT_n) ? string(): "]";
|
196 |
string cl = (op_flags & OPT_n) ? string(): "]";
|
174 |
if (op_flags & OPT_i) {
|
197 |
if (op_flags & OPT_i) {
|
175 |
for (term = db->termlist_begin(docid);
|
198 |
for (term = db->termlist_begin(docid);
|
176 |
term != db->termlist_end(docid);term++) {
|
199 |
term != db->termlist_end(docid);term++) {
|
177 |
cout << op << *term << cl << endl;
|
200 |
cout << op << detailstring(*term) << cl << endl;
|
178 |
}
|
201 |
}
|
179 |
} else {
|
202 |
} else {
|
180 |
for (term = db->allterms_begin();
|
203 |
for (term = db->allterms_begin();
|
181 |
term != db->allterms_end();term++) {
|
204 |
term != db->allterms_end();term++) {
|
182 |
if (op_flags & OPT_f)
|
205 |
if (op_flags & OPT_f)
|
183 |
cout << term.get_termfreq() << " ";
|
206 |
cout << term.get_termfreq() << " ";
|
184 |
cout << op << *term << cl << endl;
|
207 |
cout << op << detailstring(*term) << cl << endl;
|
185 |
}
|
208 |
}
|
186 |
}
|
209 |
}
|
187 |
} else if (op_flags & OPT_s) {
|
210 |
} else if (op_flags & OPT_s) {
|
188 |
for (unsigned int docid = 1;
|
211 |
for (unsigned int docid = 1;
|
189 |
docid < db->get_lastdocid(); docid++) {
|
212 |
docid < db->get_lastdocid(); docid++) {
|
190 |
// cout << docid << ": ";
|
213 |
// cout << docid << ": ";
|
191 |
Xapian::TermIterator term;
|
214 |
Xapian::TermIterator term;
|
192 |
for (term = db->termlist_begin(docid);
|
215 |
for (term = db->termlist_begin(docid);
|
193 |
term != db->termlist_end(docid);term++) {
|
216 |
term != db->termlist_end(docid);term++) {
|
194 |
cout << *term << " ";
|
217 |
cout << detailstring(*term) << " ";
|
195 |
Xapian::Document doc = db->get_document(docid);
|
218 |
Xapian::Document doc = db->get_document(docid);
|
196 |
string data = doc.get_data();
|
219 |
string data = doc.get_data();
|
197 |
cout << data;
|
220 |
cout << data;
|
198 |
}
|
221 |
}
|
199 |
}
|
222 |
}
|
|
... |
|
... |
223 |
Xapian::PositionIterator pos;
|
246 |
Xapian::PositionIterator pos;
|
224 |
for (pos = db->positionlist_begin(docid, *term);
|
247 |
for (pos = db->positionlist_begin(docid, *term);
|
225 |
pos != db->positionlist_end(docid, *term); pos++) {
|
248 |
pos != db->positionlist_end(docid, *term); pos++) {
|
226 |
if (buf.size() <= *pos)
|
249 |
if (buf.size() <= *pos)
|
227 |
buf.resize((*pos)+100);
|
250 |
buf.resize((*pos)+100);
|
228 |
buf[(*pos)] = *term;
|
251 |
buf[(*pos)] = detailstring(*term);
|
229 |
}
|
252 |
}
|
230 |
}
|
253 |
}
|
231 |
for (vector<string>::iterator it = buf.begin(); it != buf.end();
|
254 |
for (vector<string>::iterator it = buf.begin(); it != buf.end();
|
232 |
it++) {
|
255 |
it++) {
|
233 |
cout << *it << " ";
|
256 |
cout << *it << " ";
|