|
a/src/utils/transcode.cpp |
|
b/src/utils/transcode.cpp |
|
... |
|
... |
38 |
#define ICV_P2_TYPE const char**
|
38 |
#define ICV_P2_TYPE const char**
|
39 |
#else
|
39 |
#else
|
40 |
#define ICV_P2_TYPE char**
|
40 |
#define ICV_P2_TYPE char**
|
41 |
#endif
|
41 |
#endif
|
42 |
|
42 |
|
|
|
43 |
// We gain approximately 28% exec time for word at a time conversions by
|
|
|
44 |
// caching the iconv_open thing. This is probably not worth it.
|
|
|
45 |
//#define ICONV_CACHE_OPEN
|
|
|
46 |
|
43 |
bool transcode(const string &in, string &out, const string &icode,
|
47 |
bool transcode(const string &in, string &out, const string &icode,
|
44 |
const string &ocode, int *ecnt)
|
48 |
const string &ocode, int *ecnt)
|
45 |
{
|
49 |
{
|
|
|
50 |
#ifdef ICONV_CACHE_OPEN
|
|
|
51 |
static iconv_t ic;
|
|
|
52 |
static string cachedicode;
|
|
|
53 |
static string cachedocode;
|
|
|
54 |
#else
|
46 |
iconv_t ic;
|
55 |
iconv_t ic;
|
|
|
56 |
#endif
|
47 |
bool ret = false;
|
57 |
bool ret = false;
|
48 |
const int OBSIZ = 8192;
|
58 |
const int OBSIZ = 8192;
|
49 |
char obuf[OBSIZ], *op;
|
59 |
char obuf[OBSIZ], *op;
|
50 |
bool icopen = false;
|
60 |
bool icopen = false;
|
51 |
int mecnt = 0;
|
61 |
int mecnt = 0;
|
52 |
out.erase();
|
62 |
out.erase();
|
53 |
size_t isiz = in.length();
|
63 |
size_t isiz = in.length();
|
54 |
out.reserve(isiz);
|
64 |
out.reserve(isiz);
|
55 |
const char *ip = in.c_str();
|
65 |
const char *ip = in.c_str();
|
56 |
|
66 |
|
|
|
67 |
#ifdef ICONV_CACHE_OPEN
|
|
|
68 |
if (cachedicode.compare(icode) || cachedocode.compare(ocode)) {
|
|
|
69 |
#endif
|
57 |
if((ic = iconv_open(ocode.c_str(), icode.c_str())) == (iconv_t)-1) {
|
70 |
if((ic = iconv_open(ocode.c_str(), icode.c_str())) == (iconv_t)-1) {
|
58 |
out = string("iconv_open failed for ") + icode
|
71 |
out = string("iconv_open failed for ") + icode
|
59 |
+ " -> " + ocode;
|
72 |
+ " -> " + ocode;
|
60 |
goto error;
|
73 |
goto error;
|
|
|
74 |
}
|
|
|
75 |
#ifdef ICONV_CACHE_OPEN
|
|
|
76 |
cachedicode.assign(icode);
|
|
|
77 |
cachedocode.assign(ocode);
|
|
|
78 |
} else {
|
|
|
79 |
iconv(ic, 0, 0, 0, 0);
|
61 |
}
|
80 |
}
|
|
|
81 |
#else
|
62 |
icopen = true;
|
82 |
icopen = true;
|
|
|
83 |
#endif
|
63 |
|
84 |
|
64 |
while (isiz > 0) {
|
85 |
while (isiz > 0) {
|
65 |
size_t osiz;
|
86 |
size_t osiz;
|
66 |
op = obuf;
|
87 |
op = obuf;
|
67 |
osiz = OBSIZ;
|
88 |
osiz = OBSIZ;
|
|
... |
|
... |
87 |
}
|
108 |
}
|
88 |
|
109 |
|
89 |
out.append(obuf, OBSIZ - osiz);
|
110 |
out.append(obuf, OBSIZ - osiz);
|
90 |
}
|
111 |
}
|
91 |
|
112 |
|
|
|
113 |
#ifndef ICONV_CACHE_OPEN
|
92 |
if(iconv_close(ic) == -1) {
|
114 |
if(iconv_close(ic) == -1) {
|
93 |
out.erase();
|
115 |
out.erase();
|
94 |
out = string("iconv_close failed for ") + icode
|
116 |
out = string("iconv_close failed for ") + icode
|
95 |
+ " -> " + ocode;
|
117 |
+ " -> " + ocode;
|
96 |
goto error;
|
118 |
goto error;
|
97 |
}
|
119 |
}
|
98 |
icopen = false;
|
120 |
icopen = false;
|
|
|
121 |
#endif
|
99 |
ret = true;
|
122 |
ret = true;
|
100 |
error:
|
123 |
error:
|
|
|
124 |
#ifndef ICONV_CACHE_OPEN
|
101 |
if (icopen)
|
125 |
if (icopen)
|
102 |
iconv_close(ic);
|
126 |
iconv_close(ic);
|
|
|
127 |
#endif
|
103 |
if (mecnt)
|
128 |
if (mecnt)
|
104 |
LOGDEB(("transcode: [%s]->[%s] %d errors\n",
|
129 |
LOGDEB(("transcode: [%s]->[%s] %d errors\n",
|
105 |
icode.c_str(), ocode.c_str(), mecnt));
|
130 |
icode.c_str(), ocode.c_str(), mecnt));
|
106 |
if (ecnt)
|
131 |
if (ecnt)
|
107 |
*ecnt = mecnt;
|
132 |
*ecnt = mecnt;
|
|
... |
|
... |
122 |
using namespace std;
|
147 |
using namespace std;
|
123 |
|
148 |
|
124 |
#include "readfile.h"
|
149 |
#include "readfile.h"
|
125 |
#include "transcode.h"
|
150 |
#include "transcode.h"
|
126 |
|
151 |
|
|
|
152 |
// Repeatedly transcode a small string for timing measurements
|
|
|
153 |
static const string testword("\xc3\xa9\x6c\x69\x6d\x69\x6e\xc3\xa9\xc3\xa0");
|
|
|
154 |
// Without cache 10e6 reps on macpro -> 1.88 S
|
|
|
155 |
// With cache -> 1.56
|
|
|
156 |
void looptest()
|
|
|
157 |
{
|
|
|
158 |
cout << testword << endl;
|
|
|
159 |
string out;
|
|
|
160 |
for (int i = 0; i < 1000*1000; i++) {
|
|
|
161 |
if (!transcode(testword, out, "UTF-8", "UTF-16BE")) {
|
|
|
162 |
cerr << "Transcode failed" << endl;
|
|
|
163 |
break;
|
|
|
164 |
}
|
|
|
165 |
}
|
|
|
166 |
}
|
|
|
167 |
|
127 |
int main(int argc, char **argv)
|
168 |
int main(int argc, char **argv)
|
128 |
{
|
169 |
{
|
|
|
170 |
#if 0
|
|
|
171 |
looptest();
|
|
|
172 |
exit(0);
|
|
|
173 |
#endif
|
129 |
if (argc != 5) {
|
174 |
if (argc != 5) {
|
130 |
cerr << "Usage: trcsguess ifilename icode ofilename ocode" << endl;
|
175 |
cerr << "Usage: trcsguess ifilename icode ofilename ocode" << endl;
|
131 |
exit(1);
|
176 |
exit(1);
|
132 |
}
|
177 |
}
|
133 |
const string ifilename = argv[1];
|
178 |
const string ifilename = argv[1];
|