|
a/src/query/xadump.cpp |
|
b/src/query/xadump.cpp |
|
... |
|
... |
41 |
static string usage =
|
41 |
static string usage =
|
42 |
" -d <dbdir> \n"
|
42 |
" -d <dbdir> \n"
|
43 |
"-e <output encoding>\n"
|
43 |
"-e <output encoding>\n"
|
44 |
" -i docid -D : get document data for docid\n"
|
44 |
" -i docid -D : get document data for docid\n"
|
45 |
" -i docid -X : delete document docid\n"
|
45 |
" -i docid -X : delete document docid\n"
|
46 |
" -i docid -b : 'rebuild' document from term positions\n"
|
|
|
47 |
" -i docid -T : term list for doc docid\n"
|
46 |
" -i docid -T : term list for doc docid\n"
|
48 |
" -i docid -r : reconstructed text for docid\n"
|
47 |
" -i docid -r : reconstructed text for docid\n"
|
49 |
" -t term -E : term existence test\n"
|
48 |
" -t term -E : term existence test\n"
|
50 |
" -t term -F : retrieve term frequency data for given term\n"
|
49 |
" -t term -F : retrieve term frequency data for given term\n"
|
51 |
" -t term -P : retrieve postings for term\n"
|
50 |
" -t term -P : retrieve postings for term\n"
|
|
... |
|
... |
71 |
#define OPT_E 0x2
|
70 |
#define OPT_E 0x2
|
72 |
#define OPT_F 0x4
|
71 |
#define OPT_F 0x4
|
73 |
#define OPT_P 0x8
|
72 |
#define OPT_P 0x8
|
74 |
#define OPT_T 0x10
|
73 |
#define OPT_T 0x10
|
75 |
#define OPT_X 0x20
|
74 |
#define OPT_X 0x20
|
76 |
#define OPT_b 0x40
|
|
|
77 |
#define OPT_d 0x80
|
75 |
#define OPT_d 0x80
|
78 |
#define OPT_e 0x100
|
76 |
#define OPT_e 0x100
|
79 |
#define OPT_f 0x200
|
77 |
#define OPT_f 0x200
|
80 |
#define OPT_i 0x400
|
78 |
#define OPT_i 0x400
|
81 |
#define OPT_n 0x800
|
79 |
#define OPT_n 0x800
|
82 |
#define OPT_q 0x1000
|
80 |
#define OPT_q 0x1000
|
83 |
#define OPT_s 0x2000
|
|
|
84 |
#define OPT_t 0x4000
|
81 |
#define OPT_t 0x4000
|
85 |
#define OPT_x 0x8000
|
82 |
#define OPT_x 0x8000
|
86 |
#define OPT_l 0x10000
|
83 |
#define OPT_l 0x10000
|
87 |
#define OPT_r 0x20000
|
84 |
#define OPT_r 0x20000
|
88 |
|
85 |
|
|
... |
|
... |
139 |
Xapian::PositionIterator pos;
|
136 |
Xapian::PositionIterator pos;
|
140 |
for (pos = db->positionlist_begin(docid, *term);
|
137 |
for (pos = db->positionlist_begin(docid, *term);
|
141 |
pos != db->positionlist_end(docid, *term); pos++) {
|
138 |
pos != db->positionlist_end(docid, *term); pos++) {
|
142 |
if (buf.size() < *pos)
|
139 |
if (buf.size() < *pos)
|
143 |
buf.resize(2*((*pos)+1));
|
140 |
buf.resize(2*((*pos)+1));
|
144 |
buf[(*pos)] = *term;
|
141 |
buf[(*pos)] = detailstring(*term);
|
145 |
}
|
142 |
}
|
146 |
}
|
143 |
}
|
147 |
for (vector<string>::iterator it = buf.begin(); it != buf.end(); it++) {
|
144 |
for (vector<string>::iterator it = buf.begin(); it != buf.end(); it++) {
|
148 |
if (!it->empty())
|
145 |
if (!it->empty())
|
149 |
cout << *it << " ";
|
146 |
cout << *it << " ";
|
|
... |
|
... |
165 |
if (!(**argv))
|
162 |
if (!(**argv))
|
166 |
/* Cas du "adb - core" */
|
163 |
/* Cas du "adb - core" */
|
167 |
Usage();
|
164 |
Usage();
|
168 |
while (**argv)
|
165 |
while (**argv)
|
169 |
switch (*(*argv)++) {
|
166 |
switch (*(*argv)++) {
|
170 |
case 'b': op_flags |= OPT_b; break;
|
|
|
171 |
case 'D': op_flags |= OPT_D; break;
|
167 |
case 'D': op_flags |= OPT_D; break;
|
172 |
case 'd': op_flags |= OPT_d; if (argc < 2) Usage();
|
168 |
case 'd': op_flags |= OPT_d; if (argc < 2) Usage();
|
173 |
dbdir = *(++argv);
|
169 |
dbdir = *(++argv);
|
174 |
argc--;
|
170 |
argc--;
|
175 |
goto b1;
|
171 |
goto b1;
|
|
... |
|
... |
186 |
goto b1;
|
182 |
goto b1;
|
187 |
case 'l': op_flags |= OPT_l; break;
|
183 |
case 'l': op_flags |= OPT_l; break;
|
188 |
case 'n': op_flags |= OPT_n; break;
|
184 |
case 'n': op_flags |= OPT_n; break;
|
189 |
case 'P': op_flags |= OPT_P; break;
|
185 |
case 'P': op_flags |= OPT_P; break;
|
190 |
case 'q': op_flags |= OPT_q; break;
|
186 |
case 'q': op_flags |= OPT_q; break;
|
191 |
case 'r': op_flags |= OPT_r; break;
|
187 |
case 'r': case 'b': op_flags |= OPT_r; break;
|
192 |
case 's': op_flags |= OPT_s; break;
|
|
|
193 |
case 'T': op_flags |= OPT_T; break;
|
188 |
case 'T': op_flags |= OPT_T; break;
|
194 |
case 't': op_flags |= OPT_t; if (argc < 2) Usage();
|
189 |
case 't': op_flags |= OPT_t; if (argc < 2) Usage();
|
195 |
aterm = *(++argv);
|
190 |
aterm = *(++argv);
|
196 |
argc--;
|
191 |
argc--;
|
197 |
goto b1;
|
192 |
goto b1;
|
|
... |
|
... |
264 |
cout << db->get_collection_freq(*term) << " "
|
259 |
cout << db->get_collection_freq(*term) << " "
|
265 |
<< term.get_termfreq() << " ";
|
260 |
<< term.get_termfreq() << " ";
|
266 |
cout << op << detailstring(s) << cl << endl;
|
261 |
cout << op << detailstring(s) << cl << endl;
|
267 |
}
|
262 |
}
|
268 |
}
|
263 |
}
|
269 |
} else if (op_flags & OPT_s) {
|
|
|
270 |
for (unsigned int docid = 1;
|
|
|
271 |
docid < db->get_lastdocid(); docid++) {
|
|
|
272 |
// cout << docid << ": ";
|
|
|
273 |
Xapian::TermIterator term;
|
|
|
274 |
for (term = db->termlist_begin(docid);
|
|
|
275 |
term != db->termlist_end(docid);term++) {
|
|
|
276 |
cout << detailstring(*term) << " ";
|
|
|
277 |
Xapian::Document doc = db->get_document(docid);
|
|
|
278 |
string data = doc.get_data();
|
|
|
279 |
cout << data;
|
|
|
280 |
}
|
|
|
281 |
}
|
|
|
282 |
} else if (op_flags & OPT_D) {
|
264 |
} else if (op_flags & OPT_D) {
|
283 |
Xapian::Document doc = db->get_document(docid);
|
265 |
Xapian::Document doc = db->get_document(docid);
|
284 |
string data = doc.get_data();
|
266 |
string data = doc.get_data();
|
285 |
cout << data << endl;
|
267 |
cout << data << endl;
|
286 |
} else if (op_flags & OPT_r) {
|
268 |
} else if (op_flags & OPT_r) {
|
|
... |
|
... |
295 |
if (!rep.empty() && (rep[0] == 'y' || rep[0] == 'Y')) {
|
277 |
if (!rep.empty() && (rep[0] == 'y' || rep[0] == 'Y')) {
|
296 |
Xapian::WritableDatabase wdb(dbdir, Xapian::DB_OPEN);
|
278 |
Xapian::WritableDatabase wdb(dbdir, Xapian::DB_OPEN);
|
297 |
cout << "Deleting" << endl;
|
279 |
cout << "Deleting" << endl;
|
298 |
wdb.delete_document(docid);
|
280 |
wdb.delete_document(docid);
|
299 |
}
|
281 |
}
|
300 |
} else if (op_flags & OPT_b) {
|
|
|
301 |
if (!(op_flags & OPT_i))
|
|
|
302 |
Usage();
|
|
|
303 |
vector<string> buf;
|
|
|
304 |
Xapian::TermIterator term;
|
|
|
305 |
for (term = db->termlist_begin(docid);
|
|
|
306 |
term != db->termlist_end(docid); term++) {
|
|
|
307 |
Xapian::PositionIterator pos;
|
|
|
308 |
for (pos = db->positionlist_begin(docid, *term);
|
|
|
309 |
pos != db->positionlist_end(docid, *term); pos++) {
|
|
|
310 |
if (buf.size() <= *pos)
|
|
|
311 |
buf.resize((*pos)+100);
|
|
|
312 |
buf[(*pos)] = detailstring(*term);
|
|
|
313 |
}
|
|
|
314 |
}
|
|
|
315 |
for (vector<string>::iterator it = buf.begin(); it != buf.end();
|
|
|
316 |
it++) {
|
|
|
317 |
cout << *it << " ";
|
|
|
318 |
}
|
|
|
319 |
} else if (op_flags & OPT_P) {
|
282 |
} else if (op_flags & OPT_P) {
|
320 |
Xapian::PostingIterator doc;
|
283 |
Xapian::PostingIterator doc;
|
321 |
for (doc = db->postlist_begin(aterm);
|
284 |
for (doc = db->postlist_begin(aterm);
|
322 |
doc != db->postlist_end(aterm); doc++) {
|
285 |
doc != db->postlist_end(aterm); doc++) {
|
323 |
cout << *doc << "(" << doc.get_wdf() << ") : " ;
|
286 |
cout << *doc << "(" << doc.get_wdf() << ") : " ;
|