|
a/unac/unac.c |
|
b/unac/unac.c |
|
... |
|
... |
28 |
#include <vector>
|
28 |
#include <vector>
|
29 |
#include <map>
|
29 |
#include <map>
|
30 |
#include <string>
|
30 |
#include <string>
|
31 |
#include <algorithm>
|
31 |
#include <algorithm>
|
32 |
#include <iostream>
|
32 |
#include <iostream>
|
33 |
#include UNORDERED_MAP_INCLUDE
|
33 |
#include <unordered_map>
|
|
|
34 |
#include <mutex>
|
34 |
|
35 |
|
35 |
using std::string;
|
36 |
using std::string;
|
|
|
37 |
using std::vector;
|
36 |
|
38 |
|
37 |
#include "smallut.h"
|
39 |
#include "smallut.h"
|
38 |
|
40 |
|
39 |
/*
|
41 |
/*
|
40 |
Storage for the exception translations. These are chars which
|
42 |
Storage for the exception translations. These are chars which
|
41 |
should not be translated according to what UnicodeData says, but
|
43 |
should not be translated according to what UnicodeData says, but
|
42 |
instead according to some local rule. There will usually be very
|
44 |
instead according to some local rule. There will usually be very
|
43 |
few of them, but they must be looked up for every translated char.
|
45 |
few of them, but they must be looked up for every translated char.
|
44 |
*/
|
46 |
*/
|
45 |
STD_UNORDERED_MAP<unsigned short, string> except_trans;
|
47 |
std::unordered_map<unsigned short, string> except_trans;
|
46 |
static inline bool is_except_char(unsigned short c, string& trans)
|
48 |
static inline bool is_except_char(unsigned short c, string& trans)
|
47 |
{
|
49 |
{
|
48 |
STD_UNORDERED_MAP<unsigned short, string>::const_iterator it
|
|
|
49 |
= except_trans.find(c);
|
50 |
auto it = except_trans.find(c);
|
50 |
if (it == except_trans.end())
|
51 |
if (it == except_trans.end())
|
51 |
return false;
|
52 |
return false;
|
52 |
trans = it->second;
|
53 |
trans = it->second;
|
53 |
return true;
|
54 |
return true;
|
54 |
}
|
55 |
}
|
|
... |
|
... |
72 |
#include <errno.h>
|
73 |
#include <errno.h>
|
73 |
#ifdef HAVE_VSNPRINTF
|
74 |
#ifdef HAVE_VSNPRINTF
|
74 |
#include <stdio.h>
|
75 |
#include <stdio.h>
|
75 |
#include <stdarg.h>
|
76 |
#include <stdarg.h>
|
76 |
#endif /* HAVE_VSNPRINTF */
|
77 |
#endif /* HAVE_VSNPRINTF */
|
77 |
#include <pthread.h>
|
|
|
78 |
|
78 |
|
79 |
#include "unac.h"
|
79 |
#include "unac.h"
|
80 |
#include "unac_version.h"
|
80 |
#include "unac_version.h"
|
81 |
|
81 |
|
82 |
/* Generated by builder. Do not modify. Start tables */
|
82 |
/* Generated by builder. Do not modify. Start tables */
|
|
... |
|
... |
14310 |
}
|
14310 |
}
|
14311 |
|
14311 |
|
14312 |
static const char *utf16be = "UTF-16BE";
|
14312 |
static const char *utf16be = "UTF-16BE";
|
14313 |
static iconv_t u8tou16_cd = (iconv_t)-1;
|
14313 |
static iconv_t u8tou16_cd = (iconv_t)-1;
|
14314 |
static iconv_t u16tou8_cd = (iconv_t)-1;
|
14314 |
static iconv_t u16tou8_cd = (iconv_t)-1;
|
14315 |
static pthread_mutex_t o_unac_mutex;
|
14315 |
static std::mutex o_unac_mutex;
|
14316 |
static int unac_mutex_is_init;
|
|
|
14317 |
// Call this or take your chances with the auto init.
|
|
|
14318 |
void unac_init_mt()
|
|
|
14319 |
{
|
|
|
14320 |
pthread_mutex_init(&o_unac_mutex, 0);
|
|
|
14321 |
unac_mutex_is_init = 1;
|
|
|
14322 |
}
|
|
|
14323 |
|
14316 |
|
14324 |
/*
|
14317 |
/*
|
14325 |
* Convert buffer <in> containing string encoded in charset <from> into
|
14318 |
* Convert buffer <in> containing string encoded in charset <from> into
|
14326 |
* a string in charset <to> and return it in buffer <outp>. The <outp>
|
14319 |
* a string in charset <to> and return it in buffer <outp>. The <outp>
|
14327 |
* points to a malloced string large enough to hold the conversion result.
|
14320 |
* points to a malloced string large enough to hold the conversion result.
|
|
... |
|
... |
14339 |
size_t out_size;
|
14332 |
size_t out_size;
|
14340 |
char* out_base;
|
14333 |
char* out_base;
|
14341 |
int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
|
14334 |
int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
|
14342 |
const char space[] = { 0x00, 0x20 };
|
14335 |
const char space[] = { 0x00, 0x20 };
|
14343 |
|
14336 |
|
14344 |
/* Note: better call explicit unac_init_mt() before starting threads than
|
14337 |
std::unique_lock<std::mutex> lock(o_unac_mutex);
|
14345 |
rely on this.
|
|
|
14346 |
*/
|
|
|
14347 |
if (unac_mutex_is_init == 0) {
|
|
|
14348 |
pthread_mutex_init(&o_unac_mutex, 0);
|
|
|
14349 |
unac_mutex_is_init = 1;
|
|
|
14350 |
}
|
|
|
14351 |
pthread_mutex_lock(&o_unac_mutex);
|
|
|
14352 |
|
14338 |
|
14353 |
if (!strcmp(utf16be, from)) {
|
14339 |
if (!strcmp(utf16be, from)) {
|
14354 |
from_utf8 = 0;
|
14340 |
from_utf8 = 0;
|
14355 |
from_utf16 = 1;
|
14341 |
from_utf16 = 1;
|
14356 |
} else if (!strcasecmp("UTF-8", from)) {
|
14342 |
} else if (!strcasecmp("UTF-8", from)) {
|
|
... |
|
... |
14490 |
*out_lengthp = out - out_base;
|
14476 |
*out_lengthp = out - out_base;
|
14491 |
(*outp)[*out_lengthp] = '\0';
|
14477 |
(*outp)[*out_lengthp] = '\0';
|
14492 |
|
14478 |
|
14493 |
ret = 0;
|
14479 |
ret = 0;
|
14494 |
out:
|
14480 |
out:
|
14495 |
pthread_mutex_unlock(&o_unac_mutex);
|
|
|
14496 |
return ret;
|
14481 |
return ret;
|
14497 |
}
|
14482 |
}
|
14498 |
|
14483 |
|
14499 |
int unacmaybefold_string(const char* charset,
|
14484 |
int unacmaybefold_string(const char* charset,
|
14500 |
const char* in, size_t in_length,
|
14485 |
const char* in, size_t in_length,
|