|
a/src/rcldb/synfamily.h |
|
b/src/rcldb/synfamily.h |
|
... |
|
... |
40 |
#include "debuglog.h"
|
40 |
#include "debuglog.h"
|
41 |
#include "xmacros.h"
|
41 |
#include "xmacros.h"
|
42 |
|
42 |
|
43 |
namespace Rcl {
|
43 |
namespace Rcl {
|
44 |
|
44 |
|
45 |
class XapSynFamily {
|
45 |
class XapSynFamily {
|
46 |
public:
|
46 |
public:
|
47 |
/**
|
47 |
/**
|
48 |
* Construct from readable xapian database and family name (ie: Stm)
|
48 |
* Construct from readable xapian database and family name (ie: Stm)
|
49 |
*/
|
49 |
*/
|
50 |
XapSynFamily(Xapian::Database xdb, const std::string& familyname)
|
50 |
XapSynFamily(Xapian::Database xdb, const std::string& familyname)
|
51 |
: m_rdb(xdb)
|
51 |
: m_rdb(xdb)
|
52 |
{
|
52 |
{
|
53 |
m_prefix1 = std::string(":") + familyname;
|
53 |
m_prefix1 = std::string(":") + familyname;
|
54 |
}
|
54 |
}
|
55 |
|
55 |
|
56 |
/** Expand one term (e.g.: familier) inside one family number (e.g: french)
|
|
|
57 |
*/
|
|
|
58 |
virtual bool synExpand(const std::string& fammember,
|
|
|
59 |
const std::string& key,
|
|
|
60 |
std::vector<std::string>& result);
|
|
|
61 |
|
|
|
62 |
/** Retrieve all members of this family (e.g: french english german...) */
|
56 |
/** Retrieve all members of this family (e.g: french english german...) */
|
63 |
virtual bool getMembers(std::vector<std::string>&);
|
57 |
virtual bool getMembers(std::vector<std::string>&);
|
64 |
|
58 |
|
65 |
/** debug: list map for one member to stdout */
|
59 |
/** debug: list map for one member to stdout */
|
66 |
virtual bool listMap(const std::string& fam);
|
60 |
virtual bool listMap(const std::string& fam);
|
|
|
61 |
|
|
|
62 |
/** Expand term to list of synonyms for given member */
|
|
|
63 |
bool synExpand(const std::string& membername,
|
|
|
64 |
const std::string& term, std::vector<std::string>& result);
|
|
|
65 |
|
|
|
66 |
// The prefix shared by all synonym entries inside a family member
|
|
|
67 |
virtual std::string entryprefix(const std::string& member)
|
|
|
68 |
{
|
|
|
69 |
return m_prefix1 + ":" + member + ":";
|
|
|
70 |
}
|
|
|
71 |
|
|
|
72 |
// The key for the "list of members" entry
|
|
|
73 |
virtual std::string memberskey()
|
|
|
74 |
{
|
|
|
75 |
return m_prefix1 + ";" + "members";
|
|
|
76 |
}
|
|
|
77 |
|
|
|
78 |
Xapian::Database& getdb()
|
|
|
79 |
{
|
|
|
80 |
return m_rdb;
|
|
|
81 |
}
|
67 |
|
82 |
|
68 |
protected:
|
83 |
protected:
|
69 |
Xapian::Database m_rdb;
|
84 |
Xapian::Database m_rdb;
|
70 |
std::string m_prefix1;
|
85 |
std::string m_prefix1;
|
71 |
|
|
|
72 |
virtual std::string entryprefix(const std::string& member)
|
|
|
73 |
{
|
|
|
74 |
return m_prefix1 + ":" + member + ":";
|
|
|
75 |
}
|
|
|
76 |
virtual std::string memberskey()
|
|
|
77 |
{
|
|
|
78 |
return m_prefix1 + ";" + "members";
|
|
|
79 |
}
|
|
|
80 |
|
|
|
81 |
};
|
86 |
};
|
82 |
|
87 |
|
|
|
88 |
/** Modify ops for a synonyms family
|
|
|
89 |
*
|
|
|
90 |
* A method to add a synonym entry inside a given member would make sense,
|
|
|
91 |
* but would not be used presently as all these ops go through
|
|
|
92 |
* ComputableSynFamMember objects
|
|
|
93 |
*/
|
83 |
class XapWritableSynFamily : public XapSynFamily {
|
94 |
class XapWritableSynFamily : public XapSynFamily {
|
84 |
public:
|
95 |
public:
|
85 |
/** Construct with Xapian db open for r/w */
|
96 |
/** Construct with Xapian db open for r/w */
|
86 |
XapWritableSynFamily(Xapian::WritableDatabase db, const std::string& pfx)
|
97 |
XapWritableSynFamily(Xapian::WritableDatabase db,
|
|
|
98 |
const std::string& familyname)
|
87 |
: XapSynFamily(db, pfx), m_wdb(db)
|
99 |
: XapSynFamily(db, familyname), m_wdb(db)
|
88 |
{
|
100 |
{
|
89 |
}
|
101 |
}
|
90 |
|
102 |
|
91 |
/** Delete all entries for one member (e.g. french), and remove from list
|
103 |
/** Delete all entries for one member (e.g. french), and remove from list
|
92 |
* of members */
|
104 |
* of members */
|
93 |
virtual bool deleteMember(const std::string& membername);
|
105 |
virtual bool deleteMember(const std::string& membername);
|
94 |
|
106 |
|
95 |
/** Add to list of members. Idempotent, does not affect actual expansions */
|
107 |
/** Add to list of members. Idempotent, does not affect actual expansions */
|
96 |
virtual bool createMember(const std::string& membername);
|
108 |
virtual bool createMember(const std::string& membername);
|
97 |
|
109 |
|
98 |
/** Add expansion list for term inside family member (e.g., inside
|
110 |
Xapian::WritableDatabase getdb() {return m_wdb;}
|
99 |
* the english member, add expansion for floor -> floors, flooring.. */
|
|
|
100 |
virtual bool addSynonyms(const std::string& membername,
|
|
|
101 |
const std::string& term,
|
|
|
102 |
const std::vector<std::string>& trans);
|
|
|
103 |
|
111 |
|
104 |
// Need to call setCurrentMemberName before addSynonym !
|
112 |
protected:
|
105 |
// We don't check it, for speed
|
113 |
Xapian::WritableDatabase m_wdb;
|
106 |
virtual void setCurrentMemberName(const std::string& nm)
|
114 |
};
|
|
|
115 |
|
|
|
116 |
/** A functor which transforms a string */
|
|
|
117 |
class SynTermTrans {
|
|
|
118 |
public:
|
|
|
119 |
virtual std::string operator()(const std::string&) = 0;
|
|
|
120 |
};
|
|
|
121 |
|
|
|
122 |
/** A member (set of root-synonyms associations) of a SynFamily for
|
|
|
123 |
* which the root is computable from the input term.
|
|
|
124 |
* The objects use a functor member to compute the term root on input
|
|
|
125 |
* (e.g. compute the term sterm or casefold it
|
|
|
126 |
*/
|
|
|
127 |
class XapComputableSynFamMember {
|
|
|
128 |
public:
|
|
|
129 |
XapComputableSynFamMember(Xapian::Database xdb, std::string familyname,
|
|
|
130 |
std::string membername, SynTermTrans* trans)
|
|
|
131 |
: m_family(xdb, familyname), m_membername(membername),
|
|
|
132 |
m_trans(trans), m_prefix(m_family.entryprefix(m_membername))
|
|
|
133 |
{
|
|
|
134 |
}
|
|
|
135 |
|
|
|
136 |
/** Expand a term to its list of synonyms. If filtertrans is set we
|
|
|
137 |
* keep only the results which transform to the same value as the input */
|
|
|
138 |
bool synExpand(const std::string& term, std::vector<std::string>& result,
|
|
|
139 |
SynTermTrans *filtertrans = 0);
|
107 |
{
|
140 |
|
108 |
m_currentPrefix = entryprefix(nm);
|
141 |
private:
|
|
|
142 |
XapSynFamily m_family;
|
|
|
143 |
std::string m_membername;
|
|
|
144 |
SynTermTrans *m_trans;
|
|
|
145 |
std::string m_prefix;
|
|
|
146 |
};
|
|
|
147 |
|
|
|
148 |
/** Computable term root SynFamily member, modify ops */
|
|
|
149 |
class XapWritableComputableSynFamMember {
|
|
|
150 |
public:
|
|
|
151 |
XapWritableComputableSynFamMember(
|
|
|
152 |
Xapian::WritableDatabase xdb, std::string familyname,
|
|
|
153 |
std::string membername, SynTermTrans* trans)
|
|
|
154 |
: m_family(xdb, familyname), m_membername(membername),
|
|
|
155 |
m_trans(trans), m_prefix(m_family.entryprefix(m_membername))
|
109 |
}
|
156 |
{
|
110 |
virtual bool addSynonym(const std::string& term, const std::string& trans)
|
|
|
111 |
{
|
157 |
}
|
112 |
std::string key = m_currentPrefix + term;
|
158 |
|
|
|
159 |
virtual bool addSynonym(const std::string& term)
|
|
|
160 |
{
|
|
|
161 |
LOGDEB2(("addSynonym:me %p term [%s] m_trans %p\n", this,
|
|
|
162 |
term.c_str(), m_trans));
|
|
|
163 |
std::string transformed = (*m_trans)(term);
|
|
|
164 |
LOGDEB2(("addSynonym: transformed [%s]\n", transformed.c_str()));
|
|
|
165 |
if (transformed == term)
|
|
|
166 |
return true;
|
|
|
167 |
|
113 |
std::string ermsg;
|
168 |
std::string ermsg;
|
114 |
try {
|
169 |
try {
|
115 |
m_wdb.add_synonym(key, trans);
|
170 |
m_family.getdb().add_synonym(m_prefix + transformed, term);
|
116 |
} XCATCHERROR(ermsg);
|
171 |
} XCATCHERROR(ermsg);
|
117 |
if (!ermsg.empty()) {
|
172 |
if (!ermsg.empty()) {
|
118 |
LOGERR(("XapSynFamily::addSynonym: xapian error %s\n",
|
173 |
LOGERR(("XapWritableComputableSynFamMember::addSynonym: "
|
119 |
ermsg.c_str()));
|
174 |
"xapian error %s\n", ermsg.c_str()));
|
120 |
return false;
|
175 |
return false;
|
121 |
}
|
176 |
}
|
122 |
return true;
|
177 |
return true;
|
123 |
}
|
178 |
}
|
124 |
|
179 |
|
125 |
protected:
|
180 |
void clear()
|
126 |
Xapian::WritableDatabase m_wdb;
|
181 |
{
|
|
|
182 |
m_family.deleteMember(m_membername);
|
|
|
183 |
}
|
|
|
184 |
|
|
|
185 |
void recreate()
|
|
|
186 |
{
|
|
|
187 |
clear();
|
|
|
188 |
m_family.createMember(m_membername);
|
|
|
189 |
}
|
|
|
190 |
|
|
|
191 |
private:
|
|
|
192 |
XapWritableSynFamily m_family;
|
|
|
193 |
std::string m_membername;
|
|
|
194 |
SynTermTrans *m_trans;
|
127 |
std::string m_currentPrefix;
|
195 |
std::string m_prefix;
|
128 |
};
|
196 |
};
|
129 |
|
197 |
|
130 |
|
198 |
|
131 |
//
|
199 |
//
|
132 |
// Prefixes are centrally defined here to avoid collisions
|
200 |
// Prefixes are centrally defined here to avoid collisions
|
133 |
//
|
201 |
//
|
134 |
// Stem expansion family prefix. The family member name is the
|
202 |
// Stem expansion family prefix. The family member name is the
|
135 |
// language ("all" for Dia and Cse)
|
203 |
// language ("all" for Dia and Cse)
|
|
|
204 |
|
|
|
205 |
// Lowercase accented stem to expansion
|
136 |
static const std::string synFamStem("Stm");
|
206 |
static const std::string synFamStem("Stm");
|
|
|
207 |
// Lowercase unaccented stem to expansion
|
|
|
208 |
static const std::string synFamStemUnac("StU");
|
|
|
209 |
// Lowercase unaccented term to case and accent variations
|
137 |
static const std::string synFamDiac("Dia");
|
210 |
static const std::string synFamDiCa("DCa");
|
138 |
static const std::string synFamCase("Cse");
|
|
|
139 |
|
|
|
140 |
|
|
|
141 |
}
|
211 |
}
|
142 |
|
212 |
|
143 |
#endif /* _SYNFAMILY_H_INCLUDED_ */
|
213 |
#endif /* _SYNFAMILY_H_INCLUDED_ */
|