|
a/src/query/xadump.cpp |
|
b/src/query/xadump.cpp |
|
... |
|
... |
12 |
* You should have received a copy of the GNU General Public License
|
12 |
* You should have received a copy of the GNU General Public License
|
13 |
* along with this program; if not, write to the
|
13 |
* along with this program; if not, write to the
|
14 |
* Free Software Foundation, Inc.,
|
14 |
* Free Software Foundation, Inc.,
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
16 |
*/
|
16 |
*/
|
|
|
17 |
|
|
|
18 |
#include "autoconfig.h"
|
|
|
19 |
|
17 |
#include <stdio.h>
|
20 |
#include <stdio.h>
|
18 |
#include <stdlib.h>
|
21 |
#include <stdlib.h>
|
19 |
#include <signal.h>
|
22 |
#include <signal.h>
|
20 |
#include <strings.h>
|
23 |
#include <strings.h>
|
21 |
|
24 |
|
|
... |
|
... |
34 |
#include "xapian.h"
|
37 |
#include "xapian.h"
|
35 |
|
38 |
|
36 |
static string thisprog;
|
39 |
static string thisprog;
|
37 |
|
40 |
|
38 |
static string usage =
|
41 |
static string usage =
|
|
|
42 |
" -d <dbdir> \n"
|
39 |
" -d <dbdir> -e <output encoding>\n"
|
43 |
"-e <output encoding>\n"
|
40 |
" -i docid -D : get document data for docid\n"
|
44 |
" -i docid -D : get document data for docid\n"
|
41 |
" -i docid -X : delete document docid\n"
|
45 |
" -i docid -X : delete document docid\n"
|
42 |
" -i docid -b : 'rebuild' document from term positions\n"
|
46 |
" -i docid -b : 'rebuild' document from term positions\n"
|
43 |
" -i docid -T : term list for doc docid\n"
|
47 |
" -i docid -T : term list for doc docid\n"
|
44 |
" -t term -E : term existence test\n"
|
48 |
" -t term -E : term existence test\n"
|
|
... |
|
... |
108 |
static void sigcleanup(int sig)
|
112 |
static void sigcleanup(int sig)
|
109 |
{
|
113 |
{
|
110 |
fprintf(stderr, "sigcleanup\n");
|
114 |
fprintf(stderr, "sigcleanup\n");
|
111 |
cleanup();
|
115 |
cleanup();
|
112 |
exit(1);
|
116 |
exit(1);
|
|
|
117 |
}
|
|
|
118 |
|
|
|
119 |
#ifndef RCL_INDEX_STRIPCHARS
|
|
|
120 |
bool o_index_stripchars;
|
|
|
121 |
#endif
|
|
|
122 |
|
|
|
123 |
inline bool has_prefix(const string& trm)
|
|
|
124 |
{
|
|
|
125 |
#ifndef RCL_INDEX_STRIPCHARS
|
|
|
126 |
if (o_index_stripchars) {
|
|
|
127 |
#endif
|
|
|
128 |
return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
|
|
|
129 |
#ifndef RCL_INDEX_STRIPCHARS
|
|
|
130 |
} else {
|
|
|
131 |
return trm.size() > 0 && trm[0] == ':';
|
|
|
132 |
}
|
|
|
133 |
#endif
|
113 |
}
|
134 |
}
|
114 |
|
135 |
|
115 |
int main(int argc, char **argv)
|
136 |
int main(int argc, char **argv)
|
116 |
{
|
137 |
{
|
117 |
string dbdir = path_cat(path_home(), ".recoll/xapiandb");
|
138 |
string dbdir = path_cat(path_home(), ".recoll/xapiandb");
|
|
... |
|
... |
186 |
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
|
207 |
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
|
187 |
signal(SIGTERM, sigcleanup);
|
208 |
signal(SIGTERM, sigcleanup);
|
188 |
|
209 |
|
189 |
try {
|
210 |
try {
|
190 |
db = new Xapian::Database(dbdir);
|
211 |
db = new Xapian::Database(dbdir);
|
191 |
|
|
|
192 |
cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
|
212 |
cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
|
193 |
db->get_lastdocid() << " avglength " << db->get_avlength() << endl;
|
213 |
db->get_lastdocid() << " avglength " << db->get_avlength() << endl;
|
|
|
214 |
|
|
|
215 |
#ifndef RCL_INDEX_STRIPCHARS
|
|
|
216 |
// If we have terms with a leading ':' it's a new style,
|
|
|
217 |
// unstripped index
|
|
|
218 |
{
|
|
|
219 |
Xapian::TermIterator term = db->allterms_begin(":");
|
|
|
220 |
if (term == db->allterms_end())
|
|
|
221 |
o_index_stripchars = true;
|
|
|
222 |
else
|
|
|
223 |
o_index_stripchars = false;
|
|
|
224 |
cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")<<endl;
|
|
|
225 |
}
|
|
|
226 |
#endif
|
194 |
|
227 |
|
195 |
if (op_flags & OPT_T) {
|
228 |
if (op_flags & OPT_T) {
|
196 |
Xapian::TermIterator term;
|
229 |
Xapian::TermIterator term;
|
197 |
string printable;
|
230 |
string printable;
|
198 |
string op = (op_flags & OPT_n) ? string(): "[";
|
231 |
string op = (op_flags & OPT_n) ? string(): "[";
|
199 |
string cl = (op_flags & OPT_n) ? string(): "]";
|
232 |
string cl = (op_flags & OPT_n) ? string(): "]";
|
200 |
if (op_flags & OPT_i) {
|
233 |
if (op_flags & OPT_i) {
|
201 |
for (term = db->termlist_begin(docid);
|
234 |
for (term = db->termlist_begin(docid);
|
202 |
term != db->termlist_end(docid);term++) {
|
235 |
term != db->termlist_end(docid);term++) {
|
203 |
const string& s = *term;
|
236 |
const string& s = *term;
|
204 |
if ((op_flags&OPT_l) &&
|
237 |
if ((op_flags&OPT_l) && has_prefix(s))
|
205 |
!s.empty() && s[0] >= 'A' && s[0] <= 'Z')
|
|
|
206 |
continue;
|
238 |
continue;
|
207 |
cout << op << detailstring(s) << cl << endl;
|
239 |
cout << op << detailstring(s) << cl << endl;
|
208 |
}
|
240 |
}
|
209 |
} else {
|
241 |
} else {
|
210 |
for (term = db->allterms_begin();
|
242 |
for (term = db->allterms_begin();
|
211 |
term != db->allterms_end();term++) {
|
243 |
term != db->allterms_end();term++) {
|
212 |
const string& s = *term;
|
244 |
const string& s = *term;
|
213 |
if ((op_flags&OPT_l) &&
|
245 |
if ((op_flags&OPT_l) && has_prefix(s))
|
214 |
!s.empty() && s[0] >= 'A' && s[0] <= 'Z')
|
|
|
215 |
continue;
|
246 |
continue;
|
216 |
if (op_flags & OPT_f)
|
247 |
if (op_flags & OPT_f)
|
217 |
cout << db->get_collection_freq(*term) << " "
|
248 |
cout << db->get_collection_freq(*term) << " "
|
218 |
<< term.get_termfreq() << " ";
|
249 |
<< term.get_termfreq() << " ";
|
219 |
cout << op << detailstring(s) << cl << endl;
|
250 |
cout << op << detailstring(s) << cl << endl;
|