|
a/src/common/unacpp.cpp |
|
b/src/common/unacpp.cpp |
|
... |
|
... |
22 |
|
22 |
|
23 |
#include <string>
|
23 |
#include <string>
|
24 |
|
24 |
|
25 |
#include "unacpp.h"
|
25 |
#include "unacpp.h"
|
26 |
#include "unac.h"
|
26 |
#include "unac.h"
|
27 |
#include "debuglog.h"
|
27 |
#include "log.h"
|
28 |
#include "utf8iter.h"
|
28 |
#include "utf8iter.h"
|
29 |
|
29 |
|
30 |
bool unacmaybefold(const string &in, string &out,
|
30 |
bool unacmaybefold(const string &in, string &out,
|
31 |
const char *encoding, UnacOp what)
|
31 |
const char *encoding, UnacOp what)
|
32 |
{
|
32 |
{
|
|
... |
|
... |
66 |
// Functions to determine upper-case or accented status could be implemented
|
66 |
// Functions to determine upper-case or accented status could be implemented
|
67 |
// hugely more efficiently inside the unac c code, but there only used for
|
67 |
// hugely more efficiently inside the unac c code, but there only used for
|
68 |
// testing user-entered terms, so we don't really care.
|
68 |
// testing user-entered terms, so we don't really care.
|
69 |
bool unaciscapital(const string& in)
|
69 |
bool unaciscapital(const string& in)
|
70 |
{
|
70 |
{
|
71 |
LOGDEB2(("unaciscapital: [%s]\n", in.c_str()));
|
71 |
LOGDEB2("unaciscapital: [" << (in) << "]\n" );
|
72 |
if (in.empty())
|
72 |
if (in.empty())
|
73 |
return false;
|
73 |
return false;
|
74 |
Utf8Iter it(in);
|
74 |
Utf8Iter it(in);
|
75 |
string shorter;
|
75 |
string shorter;
|
76 |
it.appendchartostring(shorter);
|
76 |
it.appendchartostring(shorter);
|
77 |
|
77 |
|
78 |
string lower;
|
78 |
string lower;
|
79 |
if (!unacmaybefold(shorter, lower, "UTF-8", UNACOP_FOLD)) {
|
79 |
if (!unacmaybefold(shorter, lower, "UTF-8", UNACOP_FOLD)) {
|
80 |
LOGINFO(("unaciscapital: unac/fold failed for [%s]\n", in.c_str()));
|
80 |
LOGINFO("unaciscapital: unac/fold failed for [" << (in) << "]\n" );
|
81 |
return false;
|
81 |
return false;
|
82 |
}
|
82 |
}
|
83 |
Utf8Iter it1(lower);
|
83 |
Utf8Iter it1(lower);
|
84 |
if (*it != *it1)
|
84 |
if (*it != *it1)
|
85 |
return true;
|
85 |
return true;
|
86 |
else
|
86 |
else
|
87 |
return false;
|
87 |
return false;
|
88 |
}
|
88 |
}
|
89 |
bool unachasuppercase(const string& in)
|
89 |
bool unachasuppercase(const string& in)
|
90 |
{
|
90 |
{
|
91 |
LOGDEB2(("unachasuppercase: [%s]\n", in.c_str()));
|
91 |
LOGDEB2("unachasuppercase: [" << (in) << "]\n" );
|
92 |
if (in.empty())
|
92 |
if (in.empty())
|
93 |
return false;
|
93 |
return false;
|
94 |
|
94 |
|
95 |
string lower;
|
95 |
string lower;
|
96 |
if (!unacmaybefold(in, lower, "UTF-8", UNACOP_FOLD)) {
|
96 |
if (!unacmaybefold(in, lower, "UTF-8", UNACOP_FOLD)) {
|
97 |
LOGINFO(("unachasuppercase: unac/fold failed for [%s]\n", in.c_str()));
|
97 |
LOGINFO("unachasuppercase: unac/fold failed for [" << (in) << "]\n" );
|
98 |
return false;
|
98 |
return false;
|
99 |
}
|
99 |
}
|
100 |
if (lower != in)
|
100 |
if (lower != in)
|
101 |
return true;
|
101 |
return true;
|
102 |
else
|
102 |
else
|
103 |
return false;
|
103 |
return false;
|
104 |
}
|
104 |
}
|
105 |
bool unachasaccents(const string& in)
|
105 |
bool unachasaccents(const string& in)
|
106 |
{
|
106 |
{
|
107 |
LOGDEB2(("unachasaccents: [%s]\n", in.c_str()));
|
107 |
LOGDEB2("unachasaccents: [" << (in) << "]\n" );
|
108 |
if (in.empty())
|
108 |
if (in.empty())
|
109 |
return false;
|
109 |
return false;
|
110 |
|
110 |
|
111 |
string noac;
|
111 |
string noac;
|
112 |
if (!unacmaybefold(in, noac, "UTF-8", UNACOP_UNAC)) {
|
112 |
if (!unacmaybefold(in, noac, "UTF-8", UNACOP_UNAC)) {
|
113 |
LOGINFO(("unachasaccents: unac/unac failed for [%s]\n", in.c_str()));
|
113 |
LOGINFO("unachasaccents: unac/unac failed for [" << (in) << "]\n" );
|
114 |
return false;
|
114 |
return false;
|
115 |
}
|
115 |
}
|
116 |
if (noac != in)
|
116 |
if (noac != in)
|
117 |
return true;
|
117 |
return true;
|
118 |
else
|
118 |
else
|
|
... |
|
... |
245 |
return 0;
|
245 |
return 0;
|
246 |
}
|
246 |
}
|
247 |
}
|
247 |
}
|
248 |
|
248 |
|
249 |
#endif
|
249 |
#endif
|
|
|
250 |
|