|
a/src/unac/unac.c |
|
b/src/unac/unac.c |
|
... |
|
... |
13065 |
|
13065 |
|
13066 |
/*
|
13066 |
/*
|
13067 |
* Debug level. See unac.h for a detailed discussion of the
|
13067 |
* Debug level. See unac.h for a detailed discussion of the
|
13068 |
* values.
|
13068 |
* values.
|
13069 |
*/
|
13069 |
*/
|
13070 |
static int debug_level = UNAC_DEBUG_NONE;
|
13070 |
static int debug_level = UNAC_DEBUG_LOW;
|
13071 |
|
13071 |
|
13072 |
#ifdef UNAC_DEBUG_AVAILABLE
|
13072 |
#ifdef UNAC_DEBUG_AVAILABLE
|
13073 |
|
13073 |
|
13074 |
/*
|
13074 |
/*
|
13075 |
* Default debug function, printing on stderr.
|
13075 |
* Default debug function, printing on stderr.
|
|
... |
|
... |
13140 |
iconv_t cd;
|
13140 |
iconv_t cd;
|
13141 |
static char* name = 0;
|
13141 |
static char* name = 0;
|
13142 |
|
13142 |
|
13143 |
if(name == 0) {
|
13143 |
if(name == 0) {
|
13144 |
if((cd = iconv_open("UTF-16BE", "UTF-16BE")) == (iconv_t)-1) {
|
13144 |
if((cd = iconv_open("UTF-16BE", "UTF-16BE")) == (iconv_t)-1) {
|
13145 |
if(debug_level == UNAC_DEBUG_LOW) DEBUG("could not find UTF-16BE (see iconv -l), using UTF-16. If UTF-16 happens to be encoded in little endian, be prepared for an horrible mess.");
|
13145 |
if(debug_level >= UNAC_DEBUG_LOW) DEBUG("could not find UTF-16BE (see iconv -l), using UTF-16. If UTF-16 happens to be encoded in little endian, be prepared for an horrible mess.");
|
13146 |
name = "UTF-16";
|
13146 |
name = "UTF-16";
|
13147 |
} else {
|
13147 |
} else {
|
13148 |
iconv_close(cd);
|
13148 |
iconv_close(cd);
|
13149 |
name = "UTF-16BE";
|
13149 |
name = "UTF-16BE";
|
13150 |
}
|
13150 |
}
|
|
... |
|
... |
13160 |
int out_size;
|
13160 |
int out_size;
|
13161 |
int out_length;
|
13161 |
int out_length;
|
13162 |
int i;
|
13162 |
int i;
|
13163 |
|
13163 |
|
13164 |
out_size = in_length > 0 ? in_length : 1024;
|
13164 |
out_size = in_length > 0 ? in_length : 1024;
|
13165 |
if(*outp) {
|
13165 |
if (*outp) {
|
13166 |
out = *outp;
|
13166 |
out = *outp;
|
13167 |
/* +1 for null */
|
13167 |
/* +1 for null */
|
13168 |
out = realloc(out, out_size + 1);
|
13168 |
out = realloc(out, out_size + 1);
|
|
|
13169 |
if(out == 0) {
|
|
|
13170 |
if(debug_level >= UNAC_DEBUG_LOW)
|
|
|
13171 |
DEBUG("realloc %d bytes failed\n", out_size+1);
|
|
|
13172 |
// *outp is still valid. Let the caller free it
|
|
|
13173 |
return -1;
|
|
|
13174 |
}
|
13169 |
} else {
|
13175 |
} else {
|
13170 |
/* +1 for null */
|
13176 |
/* +1 for null */
|
13171 |
out = malloc(out_size + 1);
|
13177 |
out = malloc(out_size + 1);
|
13172 |
if(out == 0) return -1;
|
13178 |
if (out == 0) {
|
|
|
13179 |
if(debug_level >= UNAC_DEBUG_LOW)
|
|
|
13180 |
DEBUG("malloc %d bytes failed\n", out_size+1);
|
|
|
13181 |
return -1;
|
|
|
13182 |
}
|
13173 |
}
|
13183 |
}
|
13174 |
out_length = 0;
|
13184 |
out_length = 0;
|
13175 |
|
13185 |
|
13176 |
for(i = 0; i < in_length; i += 2) {
|
13186 |
for(i = 0; i < in_length; i += 2) {
|
13177 |
unsigned short c;
|
13187 |
unsigned short c;
|
|
... |
|
... |
13202 |
for(i = 0; i < l; i++)
|
13212 |
for(i = 0; i < l; i++)
|
13203 |
DEBUG_APPEND("0x%04x ", p[i]);
|
13213 |
DEBUG_APPEND("0x%04x ", p[i]);
|
13204 |
DEBUG_APPEND("\n");
|
13214 |
DEBUG_APPEND("\n");
|
13205 |
}
|
13215 |
}
|
13206 |
}
|
13216 |
}
|
|
|
13217 |
|
13207 |
/*
|
13218 |
/*
|
13208 |
* Make sure there is enough space to hold the decomposition
|
13219 |
* Make sure there is enough space to hold the decomposition
|
|
|
13220 |
* Note: a previous realloc may have succeeded, which means that *outp
|
|
|
13221 |
* is not valid any more. We have to do the freeing and zero out *outp
|
13209 |
*/
|
13222 |
*/
|
13210 |
if(out_length + ((l + 1) * 2) > out_size) {
|
13223 |
if(out_length + ((l + 1) * 2) > out_size) {
|
13211 |
out_size += ((l + 1) * 2) + 1024;
|
13224 |
out_size += ((l + 1) * 2) + 1024;
|
|
|
13225 |
char *saved = out;
|
13212 |
out = realloc(out, out_size);
|
13226 |
out = realloc(out, out_size);
|
13213 |
if(out == 0) {
|
13227 |
if(out == 0) {
|
13214 |
if(debug_level == UNAC_DEBUG_LOW)
|
13228 |
if(debug_level >= UNAC_DEBUG_LOW)
|
13215 |
DEBUG("realloc %d bytes failed\n", out_size);
|
13229 |
DEBUG("realloc %d bytes failed\n", out_size);
|
|
|
13230 |
free(saved);
|
|
|
13231 |
*outp = 0;
|
13216 |
return -1;
|
13232 |
return -1;
|
13217 |
}
|
13233 |
}
|
13218 |
}
|
13234 |
}
|
13219 |
if(l > 0) {
|
13235 |
if(l > 0) {
|
13220 |
/*
|
13236 |
/*
|
|
... |
|
... |
13281 |
out_size = in_length > 0 ? in_length : 1024;
|
13297 |
out_size = in_length > 0 ? in_length : 1024;
|
13282 |
if(*outp) {
|
13298 |
if(*outp) {
|
13283 |
out = *outp;
|
13299 |
out = *outp;
|
13284 |
/* +1 for null */
|
13300 |
/* +1 for null */
|
13285 |
out = realloc(out, out_size + 1);
|
13301 |
out = realloc(out, out_size + 1);
|
|
|
13302 |
if(out == 0) {
|
|
|
13303 |
// *outp still valid, no freeing
|
|
|
13304 |
if(debug_level >= UNAC_DEBUG_LOW)
|
|
|
13305 |
DEBUG("realloc %d bytes failed\n", out_size+1);
|
|
|
13306 |
return -1;
|
|
|
13307 |
}
|
13286 |
} else {
|
13308 |
} else {
|
13287 |
/* +1 for null */
|
13309 |
/* +1 for null */
|
13288 |
out = malloc(out_size + 1);
|
13310 |
out = malloc(out_size + 1);
|
13289 |
if(out == 0) return -1;
|
13311 |
if(out == 0) {
|
|
|
13312 |
if(debug_level >= UNAC_DEBUG_LOW)
|
|
|
13313 |
DEBUG("malloc %d bytes failed\n", out_size+1);
|
|
|
13314 |
return -1;
|
|
|
13315 |
}
|
13290 |
}
|
13316 |
}
|
13291 |
out_remain = out_size;
|
13317 |
out_remain = out_size;
|
13292 |
out_base = out;
|
13318 |
out_base = out;
|
13293 |
|
13319 |
|
13294 |
if((cd = iconv_open(to, from)) == (iconv_t)-1) {
|
13320 |
if((cd = iconv_open(to, from)) == (iconv_t)-1) {
|
|
... |
|
... |
13336 |
return -1;
|
13362 |
return -1;
|
13337 |
}
|
13363 |
}
|
13338 |
case E2BIG:
|
13364 |
case E2BIG:
|
13339 |
{
|
13365 |
{
|
13340 |
/*
|
13366 |
/*
|
13341 |
* The output does not fit in the current out buffer, enlarge it.
|
13367 |
* The output does not fit in the current out buffer, enlarge it.
|
13342 |
*/
|
13368 |
*/
|
13343 |
int length = out - out_base;
|
13369 |
int length = out - out_base;
|
13344 |
out_size *= 2;
|
13370 |
out_size *= 2;
|
|
|
13371 |
{
|
|
|
13372 |
char *saved = out_base;
|
13345 |
/* +1 for null */
|
13373 |
/* +1 for null */
|
13346 |
out_base = realloc(out_base, out_size + 1);
|
13374 |
out_base = realloc(out_base, out_size + 1);
|
13347 |
if(out_base == 0) return -1;
|
13375 |
if (out_base == 0) {
|
|
|
13376 |
// *outp potentially not valid any more. Free here,
|
|
|
13377 |
// and zero out
|
|
|
13378 |
if(debug_level >= UNAC_DEBUG_LOW)
|
|
|
13379 |
DEBUG("realloc %d bytes failed\n", out_size+1);
|
|
|
13380 |
free(saved);
|
|
|
13381 |
*outp = 0;
|
|
|
13382 |
return -1;
|
|
|
13383 |
}
|
|
|
13384 |
}
|
13348 |
out = out_base + length;
|
13385 |
out = out_base + length;
|
13349 |
out_remain = out_size - length;
|
13386 |
out_remain = out_size - length;
|
13350 |
}
|
13387 |
}
|
13351 |
break;
|
13388 |
break;
|
13352 |
default:
|
13389 |
default:
|
|
... |
|
... |
13370 |
{
|
13407 |
{
|
13371 |
/*
|
13408 |
/*
|
13372 |
* When converting an empty string, skip everything but alloc the
|
13409 |
* When converting an empty string, skip everything but alloc the
|
13373 |
* buffer if NULL pointer.
|
13410 |
* buffer if NULL pointer.
|
13374 |
*/
|
13411 |
*/
|
13375 |
if(in_length <= 0) {
|
13412 |
if (in_length <= 0) {
|
13376 |
if(!*outp)
|
13413 |
if(!*outp) {
|
13377 |
*outp = malloc(32);
|
13414 |
if ((*outp = malloc(32)) == 0)
|
|
|
13415 |
return -1;
|
|
|
13416 |
}
|
13378 |
(*outp)[0] = '\0';
|
13417 |
(*outp)[0] = '\0';
|
13379 |
*out_lengthp = 0;
|
13418 |
*out_lengthp = 0;
|
13380 |
} else {
|
13419 |
} else {
|
13381 |
char* utf16 = 0;
|
13420 |
char* utf16 = 0;
|
13382 |
size_t utf16_length = 0;
|
13421 |
size_t utf16_length = 0;
|
13383 |
char* utf16_unaccented = 0;
|
13422 |
char* utf16_unaccented = 0;
|
13384 |
size_t utf16_unaccented_length = 0;
|
13423 |
size_t utf16_unaccented_length = 0;
|