|
a/unac/builder.in |
|
b/unac/builder.in |
|
... |
|
... |
80 |
|
80 |
|
81 |
#
|
81 |
#
|
82 |
# Generate compatibility decomposition and strip marks
|
82 |
# Generate compatibility decomposition and strip marks
|
83 |
# (marks == diacritics == accents)
|
83 |
# (marks == diacritics == accents)
|
84 |
#
|
84 |
#
|
|
|
85 |
# For kana japanese characters, we don't strip accents. Note: we just
|
|
|
86 |
# need to test for the main kana (hiragana + katakana 3040-30ff) block,
|
|
|
87 |
# characters such as halfwidth variations will be first decomposed into it
|
85 |
my($from, $to);
|
88 |
my($from, $to);
|
86 |
while(($from, $to) = each(%decomposition)) {
|
89 |
while(($from, $to) = each(%decomposition)) {
|
87 |
my(@code_values) = split(' ', $to);
|
90 |
my(@code_values) = split(' ', $to);
|
88 |
my($code_value);
|
91 |
my($code_value);
|
89 |
my(@decomposition);
|
92 |
my(@decomposition);
|
90 |
while(@code_values) {
|
93 |
while(@code_values) {
|
91 |
my($code_value) = shift(@code_values);
|
94 |
my($code_value) = shift(@code_values);
|
92 |
if(exists($decomposition{$code_value})) {
|
95 |
if(exists($decomposition{$code_value})) {
|
93 |
push(@code_values, split(' ', $decomposition{$code_value}));
|
96 |
push(@code_values, split(' ', $decomposition{$code_value}));
|
94 |
} elsif(!exists($mark{$code_value})) {
|
97 |
} elsif (!exists($mark{$code_value}) ||
|
|
|
98 |
(hex $code_value >= 0x3040 && hex $code_value <= 0x30ff)) {
|
95 |
push(@decomposition, $code_value);
|
99 |
push(@decomposition, $code_value);
|
96 |
}
|
100 |
}
|
97 |
}
|
101 |
}
|
98 |
if(@decomposition) {
|
102 |
if(@decomposition) {
|
99 |
$decomposition{$from} = "@decomposition";
|
103 |
$decomposition{$from} = "@decomposition";
|