|
a/unac/builder.in |
|
b/unac/builder.in |
|
... |
|
... |
339 |
push(@blocks, $block);
|
339 |
push(@blocks, $block);
|
340 |
}
|
340 |
}
|
341 |
@values = ();
|
341 |
@values = ();
|
342 |
}
|
342 |
}
|
343 |
$code_value = uc(sprintf("%04x", $code_value));
|
343 |
$code_value = uc(sprintf("%04x", $code_value));
|
|
|
344 |
#print "$code_value UNAC ";
|
344 |
if(exists($decomposition->{$code_value})) {
|
345 |
if(exists($decomposition->{$code_value})) {
|
345 |
push(@values, $decomposition->{$code_value});
|
346 |
push(@values, $decomposition->{$code_value});
|
|
|
347 |
#print "$decomposition->{$code_value} ";
|
346 |
} else {
|
348 |
} else {
|
347 |
push(@values, "FFFF");
|
349 |
push(@values, "FFFF");
|
|
|
350 |
#print "FFFF ";
|
348 |
}
|
351 |
}
|
349 |
# We also push the case-folded version of the unaccented char
|
352 |
# We push both the case-folded version of the unaccented char
|
350 |
# Note that by pushing the case-folded version of the original
|
353 |
# and the case-folded version of the original one. This
|
351 |
# char, we'd have the possibility of independant unaccenting and
|
354 |
# makes the table a little bigger, but allows
|
352 |
# case folding, but with less performance.
|
355 |
# independantly unaccenting, folding or both
|
353 |
# We could also keep the three chunks, using a little more memory
|
356 |
#print "UNACFOLD ";
|
354 |
if(exists($decomposition->{$code_value})) {
|
357 |
if(exists($decomposition->{$code_value})) {
|
355 |
my($cv);
|
358 |
my($cv);
|
356 |
my(@vl);
|
359 |
my(@vl);
|
357 |
foreach $cv (split(' ', $decomposition->{$code_value})) {
|
360 |
foreach $cv (split(' ', $decomposition->{$code_value})) {
|
358 |
if(exists($casefold->{$cv})) {
|
361 |
if(exists($casefold->{$cv})) {
|
359 |
push(@vl, $casefold->{$cv});
|
362 |
push(@vl, $casefold->{$cv});
|
|
|
363 |
#print "$casefold->{$cv} ";
|
360 |
} else {
|
364 |
} else {
|
361 |
push(@vl, $cv);
|
365 |
push(@vl, $cv);
|
|
|
366 |
#print "$cv ";
|
362 |
}
|
367 |
}
|
363 |
}
|
368 |
}
|
364 |
#print STDERR "Pushing " . join(" ", @vl) . " for " .
|
|
|
365 |
#$code_value . "\n";
|
|
|
366 |
push(@values, join(" ", @vl));
|
369 |
push(@values, join(" ", @vl));
|
367 |
} else {
|
370 |
} else {
|
368 |
if(exists($casefold->{$code_value})) {
|
371 |
if(exists($casefold->{$code_value})) {
|
369 |
push(@values, $casefold->{$code_value});
|
372 |
push(@values, $casefold->{$code_value});
|
|
|
373 |
#print "$casefold->{$code_value} ";
|
370 |
} else {
|
374 |
} else {
|
371 |
push(@values, "FFFF");
|
375 |
push(@values, "FFFF");
|
|
|
376 |
#print "FFFF ";
|
372 |
}
|
377 |
}
|
373 |
}
|
378 |
}
|
|
|
379 |
#print "FOLD ";
|
|
|
380 |
if(exists($casefold->{$code_value})) {
|
|
|
381 |
push(@values, $casefold->{$code_value});
|
|
|
382 |
#print "$casefold->{$code_value} ";
|
|
|
383 |
} else {
|
|
|
384 |
push(@values, "FFFF");
|
|
|
385 |
#print "FFFF ";
|
|
|
386 |
}
|
|
|
387 |
#print "\n";
|
374 |
}
|
388 |
}
|
375 |
print STDERR scalar(@blocks) . " blocks of " . $block_count . " entries, factorized $duplicate blocks\n\t" if($verbose);
|
389 |
print STDERR scalar(@blocks) . " blocks of " . $block_count . " entries, factorized $duplicate blocks\n\t" if($verbose);
|
376 |
my($block_size) = 0;
|
390 |
my($block_size) = 0;
|
377 |
my($block);
|
391 |
my($block);
|
378 |
foreach $block (@blocks) {
|
392 |
foreach $block (@blocks) {
|
|
... |
|
... |
464 |
push(@data_table_out, "unac_data$block_number");
|
478 |
push(@data_table_out, "unac_data$block_number");
|
465 |
push(@data_out, "unsigned short unac_data$block_number" . "[] = { 0x" . join(", 0x", @data) . " };\n");
|
479 |
push(@data_out, "unsigned short unac_data$block_number" . "[] = { 0x" . join(", 0x", @data) . " };\n");
|
466 |
$block_number++;
|
480 |
$block_number++;
|
467 |
}
|
481 |
}
|
468 |
my($position_type) = $highest_position >= 256 ? "short" : "char";
|
482 |
my($position_type) = $highest_position >= 256 ? "short" : "char";
|
469 |
my($positions_out) = "unsigned $position_type unac_positions[UNAC_BLOCK_COUNT][2*UNAC_BLOCK_SIZE + 1] = {\n";
|
483 |
my($positions_out) = "unsigned $position_type unac_positions[UNAC_BLOCK_COUNT][3*UNAC_BLOCK_SIZE + 1] = {\n";
|
470 |
|
484 |
|
471 |
$positions_out .= join(",\n", @positions_out);
|
485 |
$positions_out .= join(",\n", @positions_out);
|
472 |
$positions_out .= "\n};\n";
|
486 |
$positions_out .= "\n};\n";
|
473 |
my($data_out) = join("", @data_out);
|
487 |
my($data_out) = join("", @data_out);
|
474 |
$data_table_out .= join(",\n", @data_table_out);
|
488 |
$data_table_out .= join(",\n", @data_table_out);
|
|
... |
|
... |
479 |
# result : $declarations
|
493 |
# result : $declarations
|
480 |
#
|
494 |
#
|
481 |
my($declarations);
|
495 |
my($declarations);
|
482 |
$declarations = <<EOF;
|
496 |
$declarations = <<EOF;
|
483 |
extern unsigned short unac_indexes[UNAC_INDEXES_SIZE];
|
497 |
extern unsigned short unac_indexes[UNAC_INDEXES_SIZE];
|
484 |
extern unsigned $position_type unac_positions[UNAC_BLOCK_COUNT][2*UNAC_BLOCK_SIZE + 1];
|
498 |
extern unsigned $position_type unac_positions[UNAC_BLOCK_COUNT][3*UNAC_BLOCK_SIZE + 1];
|
485 |
extern unsigned short* unac_data_table[UNAC_BLOCK_COUNT];
|
499 |
extern unsigned short* unac_data_table[UNAC_BLOCK_COUNT];
|
486 |
EOF
|
500 |
EOF
|
487 |
for($block_number = 0; $block_number < $block_count; $block_number++) {
|
501 |
for($block_number = 0; $block_number < $block_count; $block_number++) {
|
488 |
$declarations .= "extern unsigned short unac_data$block_number" . "[];\n";
|
502 |
$declarations .= "extern unsigned short unac_data$block_number" . "[];\n";
|
489 |
}
|
503 |
}
|