Parent: [8c627a] (diff)

Child: [0d24b5] (diff)

Download this file

unac.h    531 lines (514 with data), 19.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
/*
* Copyright (C) 2000, 2001, 2002 Loic Dachary <loic@senga.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/*
* Provides functions to strip accents from a string in all the
* charset supported by iconv(3).
*
* See the unac(3) manual page for more information.
*
*/
#ifndef _unac_h
#define _unac_h
#ifdef __cplusplus
extern "C" {
#endif
/* Generated by builder. Do not modify. Start defines */
#define UNAC_BLOCK_SHIFT 4
#define UNAC_BLOCK_MASK ((1 << UNAC_BLOCK_SHIFT) - 1)
#define UNAC_BLOCK_SIZE (1 << UNAC_BLOCK_SHIFT)
#define UNAC_BLOCK_COUNT 355
#define UNAC_INDEXES_SIZE (0x10000 >> UNAC_BLOCK_SHIFT)
/* Generated by builder. Do not modify. End defines */
/*
* Return the unaccented equivalent of the UTF-16 character <c>
* in the pointer <p>. The length of the unsigned short array pointed
* by <p> is returned in the <l> argument.
* The C++ prototype of this macro would be:
*
* void unac_char(const unsigned short c, unsigned short*& p, int& l)
*
* See unac(3) in IMPLEMENTATION NOTES for more information about the
* tables (unac_data_table, unac_positions) layout.
*/
#define unac_char_utf16(c,p,l) \
{ \
unsigned short index = unac_indexes[(c) >> UNAC_BLOCK_SHIFT]; \
unsigned char position = 2*((c) & UNAC_BLOCK_MASK); \
(p) = &(unac_data_table[index][unac_positions[index][position]]); \
(l) = unac_positions[index][position + 1] - unac_positions[index][position]; \
if((l) == 1 && *(p) == 0xFFFF) { \
(p) = 0; \
(l) = 0; \
} \
}
/*
* Save as unac_ but case-folded
*/
#define unacfold_char_utf16(c,p,l) \
{ \
unsigned short index = unac_indexes[(c) >> UNAC_BLOCK_SHIFT]; \
unsigned char position = 2*((c) & UNAC_BLOCK_MASK)+1; \
(p) = &(unac_data_table[index][unac_positions[index][position]]); \
(l) = unac_positions[index][position + 1] - unac_positions[index][position]; \
if((l) == 1 && *(p) == 0xFFFF) { \
(p) = 0; \
(l) = 0; \
} \
}
/*
* Return the unaccented equivalent of the UTF-16 string <in> of
* length <in_length> in the pointer <out>. The length of the UTF-16
* string returned in <out> is stored in <out_length>. If the pointer
* *out is null, a new string is allocated using malloc(3). If the
* pointer *out is not null, the available length must also be given
* in the *out_length argument. The pointer passed to *out must have
* been allocated by malloc(3) and may be reallocated by realloc(3) if
* needs be. It is the responsibility of the caller to free the
* pointer returned in *out. The return value is 0 on success and -1
* on error, in which case the errno variable is set to the
* corresponding error code.
*/
int unac_string_utf16(const char* in, size_t in_length,
char** out, size_t* out_length);
int unacfold_string_utf16(const char* in, size_t in_length,
char** out, size_t* out_length);
/*
* The semantic of this function is stricly equal to the function
* unac_string_utf16. The <charset> argument applies to the content of the
* input string. It is converted to UTF-16 using iconv(3) before calling
* the unac_string function and the result is converted from UTF-16 to
* the specified <charset> before returning it in the <out> pointer.
* For efficiency purpose it is recommended that the caller uses
* unac_string and iconv(3) to save buffer allocations overhead.
* The return value is 0 on success and -1 on error, in which case
* the errno variable is set to the corresponding error code.
*/
int unac_string(const char* charset,
const char* in, size_t in_length,
char** out, size_t* out_length);
int unacfold_string(const char* charset,
const char* in, size_t in_length,
char** out, size_t* out_length);
/* To be called before starting threads in mt programs */
void unac_init_mt();
/*
* Return unac version number.
*/
const char* unac_version(void);
#define UNAC_DEBUG_NONE 0x00
#define UNAC_DEBUG_LOW 0x01
#define UNAC_DEBUG_HIGH 0x02
#ifdef HAVE_VSNPRINTF
#define UNAC_DEBUG_AVAILABLE 1
/*
* Set the unac debug level. <l> is one of:
* UNAC_DEBUG_NONE for no debug messages at all
* UNAC_DEBUG_LOW for minimal information
* UNAC_DEBUG_HIGH for extremely verbose information,
* only usable when translating a few short strings.
*
* unac_debug with anything but UNAC_DEBUG_NONE is not
* thread safe.
*/
#define unac_debug(l) unac_debug_callback((l), 0, (void*)0);
/*
* Set the debug level and define a printing function callback.
* The <level> debug level is the same as in unac_debug. The
* <function> is in charge of dealing with the debug messages,
* presumably to print them to the user. The <data> is an opaque
* pointer that is passed along to <function>, should it
* need to manage a persistent context.
*
* The prototype of <function> allows two arguments. The first
* is the debug message (const char*), the second is the opaque
* pointer given as <data> argument to unac_debug_callback.
*
* If <function> is NULL, messages are printed on the standard
* error output using fprintf(stderr...).
*
* unac_debug_callback with anything but UNAC_DEBUG_NONE is not
* thread safe.
*
*/
typedef void (*unac_debug_print_t)(const char* message, void* data);
void unac_debug_callback(int level, unac_debug_print_t function, void* data);
#endif /* HAVE_VSNPRINTF */
/* Generated by builder. Do not modify. Start declarations */
extern unsigned short unac_indexes[UNAC_INDEXES_SIZE];
extern unsigned char unac_positions[UNAC_BLOCK_COUNT][2*UNAC_BLOCK_SIZE + 1];
extern unsigned short* unac_data_table[UNAC_BLOCK_COUNT];
extern unsigned short unac_data0[];
extern unsigned short unac_data1[];
extern unsigned short unac_data2[];
extern unsigned short unac_data3[];
extern unsigned short unac_data4[];
extern unsigned short unac_data5[];
extern unsigned short unac_data6[];
extern unsigned short unac_data7[];
extern unsigned short unac_data8[];
extern unsigned short unac_data9[];
extern unsigned short unac_data10[];
extern unsigned short unac_data11[];
extern unsigned short unac_data12[];
extern unsigned short unac_data13[];
extern unsigned short unac_data14[];
extern unsigned short unac_data15[];
extern unsigned short unac_data16[];
extern unsigned short unac_data17[];
extern unsigned short unac_data18[];
extern unsigned short unac_data19[];
extern unsigned short unac_data20[];
extern unsigned short unac_data21[];
extern unsigned short unac_data22[];
extern unsigned short unac_data23[];
extern unsigned short unac_data24[];
extern unsigned short unac_data25[];
extern unsigned short unac_data26[];
extern unsigned short unac_data27[];
extern unsigned short unac_data28[];
extern unsigned short unac_data29[];
extern unsigned short unac_data30[];
extern unsigned short unac_data31[];
extern unsigned short unac_data32[];
extern unsigned short unac_data33[];
extern unsigned short unac_data34[];
extern unsigned short unac_data35[];
extern unsigned short unac_data36[];
extern unsigned short unac_data37[];
extern unsigned short unac_data38[];
extern unsigned short unac_data39[];
extern unsigned short unac_data40[];
extern unsigned short unac_data41[];
extern unsigned short unac_data42[];
extern unsigned short unac_data43[];
extern unsigned short unac_data44[];
extern unsigned short unac_data45[];
extern unsigned short unac_data46[];
extern unsigned short unac_data47[];
extern unsigned short unac_data48[];
extern unsigned short unac_data49[];
extern unsigned short unac_data50[];
extern unsigned short unac_data51[];
extern unsigned short unac_data52[];
extern unsigned short unac_data53[];
extern unsigned short unac_data54[];
extern unsigned short unac_data55[];
extern unsigned short unac_data56[];
extern unsigned short unac_data57[];
extern unsigned short unac_data58[];
extern unsigned short unac_data59[];
extern unsigned short unac_data60[];
extern unsigned short unac_data61[];
extern unsigned short unac_data62[];
extern unsigned short unac_data63[];
extern unsigned short unac_data64[];
extern unsigned short unac_data65[];
extern unsigned short unac_data66[];
extern unsigned short unac_data67[];
extern unsigned short unac_data68[];
extern unsigned short unac_data69[];
extern unsigned short unac_data70[];
extern unsigned short unac_data71[];
extern unsigned short unac_data72[];
extern unsigned short unac_data73[];
extern unsigned short unac_data74[];
extern unsigned short unac_data75[];
extern unsigned short unac_data76[];
extern unsigned short unac_data77[];
extern unsigned short unac_data78[];
extern unsigned short unac_data79[];
extern unsigned short unac_data80[];
extern unsigned short unac_data81[];
extern unsigned short unac_data82[];
extern unsigned short unac_data83[];
extern unsigned short unac_data84[];
extern unsigned short unac_data85[];
extern unsigned short unac_data86[];
extern unsigned short unac_data87[];
extern unsigned short unac_data88[];
extern unsigned short unac_data89[];
extern unsigned short unac_data90[];
extern unsigned short unac_data91[];
extern unsigned short unac_data92[];
extern unsigned short unac_data93[];
extern unsigned short unac_data94[];
extern unsigned short unac_data95[];
extern unsigned short unac_data96[];
extern unsigned short unac_data97[];
extern unsigned short unac_data98[];
extern unsigned short unac_data99[];
extern unsigned short unac_data100[];
extern unsigned short unac_data101[];
extern unsigned short unac_data102[];
extern unsigned short unac_data103[];
extern unsigned short unac_data104[];
extern unsigned short unac_data105[];
extern unsigned short unac_data106[];
extern unsigned short unac_data107[];
extern unsigned short unac_data108[];
extern unsigned short unac_data109[];
extern unsigned short unac_data110[];
extern unsigned short unac_data111[];
extern unsigned short unac_data112[];
extern unsigned short unac_data113[];
extern unsigned short unac_data114[];
extern unsigned short unac_data115[];
extern unsigned short unac_data116[];
extern unsigned short unac_data117[];
extern unsigned short unac_data118[];
extern unsigned short unac_data119[];
extern unsigned short unac_data120[];
extern unsigned short unac_data121[];
extern unsigned short unac_data122[];
extern unsigned short unac_data123[];
extern unsigned short unac_data124[];
extern unsigned short unac_data125[];
extern unsigned short unac_data126[];
extern unsigned short unac_data127[];
extern unsigned short unac_data128[];
extern unsigned short unac_data129[];
extern unsigned short unac_data130[];
extern unsigned short unac_data131[];
extern unsigned short unac_data132[];
extern unsigned short unac_data133[];
extern unsigned short unac_data134[];
extern unsigned short unac_data135[];
extern unsigned short unac_data136[];
extern unsigned short unac_data137[];
extern unsigned short unac_data138[];
extern unsigned short unac_data139[];
extern unsigned short unac_data140[];
extern unsigned short unac_data141[];
extern unsigned short unac_data142[];
extern unsigned short unac_data143[];
extern unsigned short unac_data144[];
extern unsigned short unac_data145[];
extern unsigned short unac_data146[];
extern unsigned short unac_data147[];
extern unsigned short unac_data148[];
extern unsigned short unac_data149[];
extern unsigned short unac_data150[];
extern unsigned short unac_data151[];
extern unsigned short unac_data152[];
extern unsigned short unac_data153[];
extern unsigned short unac_data154[];
extern unsigned short unac_data155[];
extern unsigned short unac_data156[];
extern unsigned short unac_data157[];
extern unsigned short unac_data158[];
extern unsigned short unac_data159[];
extern unsigned short unac_data160[];
extern unsigned short unac_data161[];
extern unsigned short unac_data162[];
extern unsigned short unac_data163[];
extern unsigned short unac_data164[];
extern unsigned short unac_data165[];
extern unsigned short unac_data166[];
extern unsigned short unac_data167[];
extern unsigned short unac_data168[];
extern unsigned short unac_data169[];
extern unsigned short unac_data170[];
extern unsigned short unac_data171[];
extern unsigned short unac_data172[];
extern unsigned short unac_data173[];
extern unsigned short unac_data174[];
extern unsigned short unac_data175[];
extern unsigned short unac_data176[];
extern unsigned short unac_data177[];
extern unsigned short unac_data178[];
extern unsigned short unac_data179[];
extern unsigned short unac_data180[];
extern unsigned short unac_data181[];
extern unsigned short unac_data182[];
extern unsigned short unac_data183[];
extern unsigned short unac_data184[];
extern unsigned short unac_data185[];
extern unsigned short unac_data186[];
extern unsigned short unac_data187[];
extern unsigned short unac_data188[];
extern unsigned short unac_data189[];
extern unsigned short unac_data190[];
extern unsigned short unac_data191[];
extern unsigned short unac_data192[];
extern unsigned short unac_data193[];
extern unsigned short unac_data194[];
extern unsigned short unac_data195[];
extern unsigned short unac_data196[];
extern unsigned short unac_data197[];
extern unsigned short unac_data198[];
extern unsigned short unac_data199[];
extern unsigned short unac_data200[];
extern unsigned short unac_data201[];
extern unsigned short unac_data202[];
extern unsigned short unac_data203[];
extern unsigned short unac_data204[];
extern unsigned short unac_data205[];
extern unsigned short unac_data206[];
extern unsigned short unac_data207[];
extern unsigned short unac_data208[];
extern unsigned short unac_data209[];
extern unsigned short unac_data210[];
extern unsigned short unac_data211[];
extern unsigned short unac_data212[];
extern unsigned short unac_data213[];
extern unsigned short unac_data214[];
extern unsigned short unac_data215[];
extern unsigned short unac_data216[];
extern unsigned short unac_data217[];
extern unsigned short unac_data218[];
extern unsigned short unac_data219[];
extern unsigned short unac_data220[];
extern unsigned short unac_data221[];
extern unsigned short unac_data222[];
extern unsigned short unac_data223[];
extern unsigned short unac_data224[];
extern unsigned short unac_data225[];
extern unsigned short unac_data226[];
extern unsigned short unac_data227[];
extern unsigned short unac_data228[];
extern unsigned short unac_data229[];
extern unsigned short unac_data230[];
extern unsigned short unac_data231[];
extern unsigned short unac_data232[];
extern unsigned short unac_data233[];
extern unsigned short unac_data234[];
extern unsigned short unac_data235[];
extern unsigned short unac_data236[];
extern unsigned short unac_data237[];
extern unsigned short unac_data238[];
extern unsigned short unac_data239[];
extern unsigned short unac_data240[];
extern unsigned short unac_data241[];
extern unsigned short unac_data242[];
extern unsigned short unac_data243[];
extern unsigned short unac_data244[];
extern unsigned short unac_data245[];
extern unsigned short unac_data246[];
extern unsigned short unac_data247[];
extern unsigned short unac_data248[];
extern unsigned short unac_data249[];
extern unsigned short unac_data250[];
extern unsigned short unac_data251[];
extern unsigned short unac_data252[];
extern unsigned short unac_data253[];
extern unsigned short unac_data254[];
extern unsigned short unac_data255[];
extern unsigned short unac_data256[];
extern unsigned short unac_data257[];
extern unsigned short unac_data258[];
extern unsigned short unac_data259[];
extern unsigned short unac_data260[];
extern unsigned short unac_data261[];
extern unsigned short unac_data262[];
extern unsigned short unac_data263[];
extern unsigned short unac_data264[];
extern unsigned short unac_data265[];
extern unsigned short unac_data266[];
extern unsigned short unac_data267[];
extern unsigned short unac_data268[];
extern unsigned short unac_data269[];
extern unsigned short unac_data270[];
extern unsigned short unac_data271[];
extern unsigned short unac_data272[];
extern unsigned short unac_data273[];
extern unsigned short unac_data274[];
extern unsigned short unac_data275[];
extern unsigned short unac_data276[];
extern unsigned short unac_data277[];
extern unsigned short unac_data278[];
extern unsigned short unac_data279[];
extern unsigned short unac_data280[];
extern unsigned short unac_data281[];
extern unsigned short unac_data282[];
extern unsigned short unac_data283[];
extern unsigned short unac_data284[];
extern unsigned short unac_data285[];
extern unsigned short unac_data286[];
extern unsigned short unac_data287[];
extern unsigned short unac_data288[];
extern unsigned short unac_data289[];
extern unsigned short unac_data290[];
extern unsigned short unac_data291[];
extern unsigned short unac_data292[];
extern unsigned short unac_data293[];
extern unsigned short unac_data294[];
extern unsigned short unac_data295[];
extern unsigned short unac_data296[];
extern unsigned short unac_data297[];
extern unsigned short unac_data298[];
extern unsigned short unac_data299[];
extern unsigned short unac_data300[];
extern unsigned short unac_data301[];
extern unsigned short unac_data302[];
extern unsigned short unac_data303[];
extern unsigned short unac_data304[];
extern unsigned short unac_data305[];
extern unsigned short unac_data306[];
extern unsigned short unac_data307[];
extern unsigned short unac_data308[];
extern unsigned short unac_data309[];
extern unsigned short unac_data310[];
extern unsigned short unac_data311[];
extern unsigned short unac_data312[];
extern unsigned short unac_data313[];
extern unsigned short unac_data314[];
extern unsigned short unac_data315[];
extern unsigned short unac_data316[];
extern unsigned short unac_data317[];
extern unsigned short unac_data318[];
extern unsigned short unac_data319[];
extern unsigned short unac_data320[];
extern unsigned short unac_data321[];
extern unsigned short unac_data322[];
extern unsigned short unac_data323[];
extern unsigned short unac_data324[];
extern unsigned short unac_data325[];
extern unsigned short unac_data326[];
extern unsigned short unac_data327[];
extern unsigned short unac_data328[];
extern unsigned short unac_data329[];
extern unsigned short unac_data330[];
extern unsigned short unac_data331[];
extern unsigned short unac_data332[];
extern unsigned short unac_data333[];
extern unsigned short unac_data334[];
extern unsigned short unac_data335[];
extern unsigned short unac_data336[];
extern unsigned short unac_data337[];
extern unsigned short unac_data338[];
extern unsigned short unac_data339[];
extern unsigned short unac_data340[];
extern unsigned short unac_data341[];
extern unsigned short unac_data342[];
extern unsigned short unac_data343[];
extern unsigned short unac_data344[];
extern unsigned short unac_data345[];
extern unsigned short unac_data346[];
extern unsigned short unac_data347[];
extern unsigned short unac_data348[];
extern unsigned short unac_data349[];
extern unsigned short unac_data350[];
extern unsigned short unac_data351[];
extern unsigned short unac_data352[];
extern unsigned short unac_data353[];
extern unsigned short unac_data354[];
/* Generated by builder. Do not modify. End declarations */
#ifdef __cplusplus
}
#endif
#endif /* _unac_h */