recoll / Code / Diff of /src/common/unacpp.cpp

Diff of /src/common/unacpp.cpp [d2f7f1] .. [0573e5]

Switch to unified view


...

#include <string>

#include "unacpp.h"
#include "unac.h"
#include "log.h"
#include "utf8iter.h"

bool unacmaybefold(const string &in, string &out, 
           const char *encoding, UnacOp what)
{
...
// Functions to determine upper-case or accented status could be implemented
// hugely more efficiently inside the unac c code, but there only used for
// testing user-entered terms, so we don't really care.
bool unaciscapital(const string& in)
{
    LOGDEB2("unaciscapital: ["  << (in) << "]\n" );
    if (in.empty())
    return false;
    Utf8Iter it(in);
    string shorter;
    it.appendchartostring(shorter);

    string lower;
    if (!unacmaybefold(shorter, lower, "UTF-8", UNACOP_FOLD)) {
    LOGINFO("unaciscapital: unac/fold failed for ["  << (in) << "]\n" );
    return false;
    } 
    Utf8Iter it1(lower);
    if (*it != *it1)
    return true;
    else
    return false;
}
bool unachasuppercase(const string& in)
{
    LOGDEB2("unachasuppercase: ["  << (in) << "]\n" );
    if (in.empty())
    return false;

    string lower;
    if (!unacmaybefold(in, lower, "UTF-8", UNACOP_FOLD)) {
    LOGINFO("unachasuppercase: unac/fold failed for ["  << (in) << "]\n" );
    return false;
    } 
    if (lower != in)
    return true;
    else
    return false;
}
bool unachasaccents(const string& in)
{
    LOGDEB2("unachasaccents: ["  << (in) << "]\n" );
    if (in.empty())
    return false;

    string noac;
    if (!unacmaybefold(in, noac, "UTF-8", UNACOP_UNAC)) {
    LOGINFO("unachasaccents: unac/unac failed for ["  << (in) << "]\n" );
    return false;
    } 
    if (noac != in)
    return true;
    else
...
    return 0;
    }
}

#endif


	a/src/common/unacpp.cpp		b/src/common/unacpp.cpp
	...		...
22		22
23	#include <string>	23	#include <string>
24		24
25	#include "unacpp.h"	25	#include "unacpp.h"
26	#include "unac.h"	26	#include "unac.h"
27	#include "debuglog.h"	27	#include "log.h"
28	#include "utf8iter.h"	28	#include "utf8iter.h"
29		29
30	bool unacmaybefold(const string &in, string &out,	30	bool unacmaybefold(const string &in, string &out,
31	const char *encoding, UnacOp what)	31	const char *encoding, UnacOp what)
32	{	32	{
	...		...
66	// Functions to determine upper-case or accented status could be implemented	66	// Functions to determine upper-case or accented status could be implemented
67	// hugely more efficiently inside the unac c code, but there only used for	67	// hugely more efficiently inside the unac c code, but there only used for
68	// testing user-entered terms, so we don't really care.	68	// testing user-entered terms, so we don't really care.
69	bool unaciscapital(const string& in)	69	bool unaciscapital(const string& in)
70	{	70	{
71	LOGDEB2(("unaciscapital: [%s]\n", in.c_str()));	71	LOGDEB2("unaciscapital: [" << (in) << "]\n" );
72	if (in.empty())	72	if (in.empty())
73	return false;	73	return false;
74	Utf8Iter it(in);	74	Utf8Iter it(in);
75	string shorter;	75	string shorter;
76	it.appendchartostring(shorter);	76	it.appendchartostring(shorter);
77		77
78	string lower;	78	string lower;
79	if (!unacmaybefold(shorter, lower, "UTF-8", UNACOP_FOLD)) {	79	if (!unacmaybefold(shorter, lower, "UTF-8", UNACOP_FOLD)) {
80	LOGINFO(("unaciscapital: unac/fold failed for [%s]\n", in.c_str()));	80	LOGINFO("unaciscapital: unac/fold failed for [" << (in) << "]\n" );
81	return false;	81	return false;
82	}	82	}
83	Utf8Iter it1(lower);	83	Utf8Iter it1(lower);
84	if (it != it1)	84	if (it != it1)
85	return true;	85	return true;
86	else	86	else
87	return false;	87	return false;
88	}	88	}
89	bool unachasuppercase(const string& in)	89	bool unachasuppercase(const string& in)
90	{	90	{
91	LOGDEB2(("unachasuppercase: [%s]\n", in.c_str()));	91	LOGDEB2("unachasuppercase: [" << (in) << "]\n" );
92	if (in.empty())	92	if (in.empty())
93	return false;	93	return false;
94		94
95	string lower;	95	string lower;
96	if (!unacmaybefold(in, lower, "UTF-8", UNACOP_FOLD)) {	96	if (!unacmaybefold(in, lower, "UTF-8", UNACOP_FOLD)) {
97	LOGINFO(("unachasuppercase: unac/fold failed for [%s]\n", in.c_str()));	97	LOGINFO("unachasuppercase: unac/fold failed for [" << (in) << "]\n" );
98	return false;	98	return false;
99	}	99	}
100	if (lower != in)	100	if (lower != in)
101	return true;	101	return true;
102	else	102	else
103	return false;	103	return false;
104	}	104	}
105	bool unachasaccents(const string& in)	105	bool unachasaccents(const string& in)
106	{	106	{
107	LOGDEB2(("unachasaccents: [%s]\n", in.c_str()));	107	LOGDEB2("unachasaccents: [" << (in) << "]\n" );
108	if (in.empty())	108	if (in.empty())
109	return false;	109	return false;
110		110
111	string noac;	111	string noac;
112	if (!unacmaybefold(in, noac, "UTF-8", UNACOP_UNAC)) {	112	if (!unacmaybefold(in, noac, "UTF-8", UNACOP_UNAC)) {
113	LOGINFO(("unachasaccents: unac/unac failed for [%s]\n", in.c_str()));	113	LOGINFO("unachasaccents: unac/unac failed for [" << (in) << "]\n" );
114	return false;	114	return false;
115	}	115	}
116	if (noac != in)	116	if (noac != in)
117	return true;	117	return true;
118	else	118	else
	...		...
245	return 0;	245	return 0;
246	}	246	}
247	}	247	}
248		248
249	#endif	249	#endif
		250