|
a/src/utils/transcode.cpp |
|
b/src/utils/transcode.cpp |
|
... |
|
... |
19 |
#include "autoconfig.h"
|
19 |
#include "autoconfig.h"
|
20 |
|
20 |
|
21 |
#include <string>
|
21 |
#include <string>
|
22 |
#include <iostream>
|
22 |
#include <iostream>
|
23 |
#include <mutex>
|
23 |
#include <mutex>
|
24 |
using std::string;
|
|
|
25 |
|
24 |
|
26 |
#include <errno.h>
|
25 |
#include <errno.h>
|
27 |
#include <iconv.h>
|
26 |
#include <iconv.h>
|
|
|
27 |
#include <wchar.h>
|
28 |
|
28 |
|
29 |
#include "transcode.h"
|
29 |
#include "transcode.h"
|
30 |
#include "log.h"
|
30 |
#include "log.h"
|
|
|
31 |
|
|
|
32 |
using namespace std;
|
31 |
|
33 |
|
32 |
// We gain approximately 25% exec time for word at a time conversions by
|
34 |
// We gain approximately 25% exec time for word at a time conversions by
|
33 |
// caching the iconv_open thing.
|
35 |
// caching the iconv_open thing.
|
34 |
//
|
36 |
//
|
35 |
// We may also lose some concurrency on multiproc because of the
|
37 |
// We may also lose some concurrency on multiproc because of the
|
|
... |
|
... |
40 |
#define ICONV_CACHE_OPEN
|
42 |
#define ICONV_CACHE_OPEN
|
41 |
|
43 |
|
42 |
bool transcode(const string &in, string &out, const string &icode,
|
44 |
bool transcode(const string &in, string &out, const string &icode,
|
43 |
const string &ocode, int *ecnt)
|
45 |
const string &ocode, int *ecnt)
|
44 |
{
|
46 |
{
|
45 |
LOGDEB2("Transcode: " << (icode) << " -> " << (ocode) << "\n" );
|
47 |
LOGDEB2("Transcode: " << icode << " -> " << ocode << "\n");
|
46 |
#ifdef ICONV_CACHE_OPEN
|
48 |
#ifdef ICONV_CACHE_OPEN
|
47 |
static iconv_t ic = (iconv_t)-1;
|
49 |
static iconv_t ic = (iconv_t)-1;
|
48 |
static string cachedicode;
|
50 |
static string cachedicode;
|
49 |
static string cachedocode;
|
51 |
static string cachedocode;
|
50 |
static std::mutex o_cachediconv_mutex;
|
52 |
static std::mutex o_cachediconv_mutex;
|
|
... |
|
... |
98 |
out.erase();
|
100 |
out.erase();
|
99 |
out = string("iconv failed for ") + icode + " -> " + ocode +
|
101 |
out = string("iconv failed for ") + icode + " -> " + ocode +
|
100 |
" : " + strerror(errno);
|
102 |
" : " + strerror(errno);
|
101 |
#endif
|
103 |
#endif
|
102 |
if (errno == EILSEQ) {
|
104 |
if (errno == EILSEQ) {
|
103 |
LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n" );
|
105 |
LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n");
|
104 |
LOGDEB1(" Input consumed " << (ip - in) << " output produced " << (out.length() + OBSIZ - osiz) << "\n" );
|
106 |
LOGDEB1(" Input consumed " << ip - in << " output produced " <<
|
|
|
107 |
out.length() + OBSIZ - osiz << "\n");
|
105 |
out.append(obuf, OBSIZ - osiz);
|
108 |
out.append(obuf, OBSIZ - osiz);
|
106 |
out += "?";
|
109 |
out += "?";
|
107 |
mecnt++;
|
110 |
mecnt++;
|
108 |
ip++;isiz--;
|
111 |
ip++;isiz--;
|
109 |
continue;
|
112 |
continue;
|
|
... |
|
... |
142 |
iconv(ic, 0, 0, 0, 0);
|
145 |
iconv(ic, 0, 0, 0, 0);
|
143 |
#endif
|
146 |
#endif
|
144 |
}
|
147 |
}
|
145 |
|
148 |
|
146 |
if (mecnt)
|
149 |
if (mecnt)
|
147 |
LOGDEB("transcode: [" << (icode) << "]->[" << (ocode) << "] " << (mecnt) << " errors\n" );
|
150 |
LOGDEB("transcode: [" << icode << "]->[" << ocode << "] " <<
|
|
|
151 |
mecnt << " errors\n");
|
148 |
if (ecnt)
|
152 |
if (ecnt)
|
149 |
*ecnt = mecnt;
|
153 |
*ecnt = mecnt;
|
150 |
return ret;
|
154 |
return ret;
|
151 |
}
|
155 |
}
|
152 |
|
156 |
|
|
|
157 |
bool wchartoutf8(const wchar_t *in, std::string& out)
|
|
|
158 |
{
|
|
|
159 |
static iconv_t ic = (iconv_t)-1;
|
|
|
160 |
if (ic == (iconv_t)-1) {
|
|
|
161 |
if((ic = iconv_open("UTF-8", "WCHAR_T")) == (iconv_t)-1) {
|
|
|
162 |
LOGERR("wchartoutf8: iconv_open failed\n");
|
|
|
163 |
return false;
|
|
|
164 |
}
|
|
|
165 |
}
|
|
|
166 |
const int OBSIZ = 8192;
|
|
|
167 |
char obuf[OBSIZ], *op;
|
|
|
168 |
out.erase();
|
|
|
169 |
size_t isiz = 2 * wcslen(in);
|
|
|
170 |
out.reserve(isiz);
|
|
|
171 |
const char *ip = (const char *)in;
|
153 |
|
172 |
|
154 |
#else
|
173 |
while (isiz > 0) {
|
|
|
174 |
size_t osiz;
|
|
|
175 |
op = obuf;
|
|
|
176 |
osiz = OBSIZ;
|
|
|
177 |
|
|
|
178 |
if(iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1
|
|
|
179 |
&& errno != E2BIG) {
|
|
|
180 |
LOGERR("wchartoutf8: iconv error, errno: " << errno << endl);
|
|
|
181 |
return false;
|
|
|
182 |
}
|
|
|
183 |
out.append(obuf, OBSIZ - osiz);
|
|
|
184 |
}
|
|
|
185 |
return true;
|
|
|
186 |
}
|
|
|
187 |
|
|
|
188 |
bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap)
|
|
|
189 |
{
|
|
|
190 |
static iconv_t ic = (iconv_t)-1;
|
|
|
191 |
if (ic == (iconv_t)-1) {
|
|
|
192 |
if((ic = iconv_open("WCHAR_T", "UTF-8")) == (iconv_t)-1) {
|
|
|
193 |
LOGERR("utf8towchar: iconv_open failed\n");
|
|
|
194 |
return false;
|
|
|
195 |
}
|
|
|
196 |
}
|
|
|
197 |
size_t isiz = in.size();
|
|
|
198 |
const char *ip = in.c_str();
|
|
|
199 |
size_t osiz = (size_t)obytescap-2;
|
|
|
200 |
char *op = (char *)out;
|
|
|
201 |
if (iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1) {
|
|
|
202 |
LOGERR("utf8towchar: iconv error, errno: " << errno << endl);
|
|
|
203 |
return false;
|
|
|
204 |
}
|
|
|
205 |
*op++ = 0;
|
|
|
206 |
*op = 0;
|
|
|
207 |
return true;
|
|
|
208 |
}
|
|
|
209 |
|
|
|
210 |
#else // -> TEST
|
155 |
|
211 |
|
156 |
#include <stdio.h>
|
212 |
#include <stdio.h>
|
157 |
#include <stdlib.h>
|
213 |
#include <stdlib.h>
|
158 |
#include <errno.h>
|
214 |
#include <errno.h>
|
159 |
#include <fcntl.h>
|
215 |
#include <fcntl.h>
|
|
... |
|
... |
220 |
}
|
276 |
}
|
221 |
fclose(fp);
|
277 |
fclose(fp);
|
222 |
exit(0);
|
278 |
exit(0);
|
223 |
}
|
279 |
}
|
224 |
#endif
|
280 |
#endif
|
225 |
|
|
|