recoll / Code / Diff of /unac/builder.in

Diff of /unac/builder.in [5cf720] .. [869d75]

Switch to unified view


...
    
    #
    # Generate compatibility decomposition and strip marks
    # (marks == diacritics == accents)
    #
    # For kana japanese characters, we don't strip accents. Note: we just
    # need to test for the main kana (hiragana + katakana 3040-30ff) block,
    # characters such as halfwidth variations will be first decomposed into it
    my($from, $to);
    while(($from, $to) = each(%decomposition)) {
    my(@code_values) = split(' ', $to);
    my($code_value);
    my(@decomposition);
    while(@code_values) {
        my($code_value) = shift(@code_values);
        if(exists($decomposition{$code_value})) {
        push(@code_values, split(' ', $decomposition{$code_value}));
        } elsif (!exists($mark{$code_value}) || 
           (hex $code_value >= 0x3040 && hex $code_value <= 0x30ff)) {
        push(@decomposition, $code_value);
        }
    }
    if(@decomposition) {
        $decomposition{$from} = "@decomposition";

	a/unac/builder.in		b/unac/builder.in
	...		...
80		80
81	#	81	#
82	# Generate compatibility decomposition and strip marks	82	# Generate compatibility decomposition and strip marks
83	# (marks == diacritics == accents)	83	# (marks == diacritics == accents)
84	#	84	#
		85	# For kana japanese characters, we don't strip accents. Note: we just
		86	# need to test for the main kana (hiragana + katakana 3040-30ff) block,
		87	# characters such as halfwidth variations will be first decomposed into it
85	my($from, $to);	88	my($from, $to);
86	while(($from, $to) = each(%decomposition)) {	89	while(($from, $to) = each(%decomposition)) {
87	my(@code_values) = split(' ', $to);	90	my(@code_values) = split(' ', $to);
88	my($code_value);	91	my($code_value);
89	my(@decomposition);	92	my(@decomposition);
90	while(@code_values) {	93	while(@code_values) {
91	my($code_value) = shift(@code_values);	94	my($code_value) = shift(@code_values);
92	if(exists($decomposition{$code_value})) {	95	if(exists($decomposition{$code_value})) {
93	push(@code_values, split(' ', $decomposition{$code_value}));	96	push(@code_values, split(' ', $decomposition{$code_value}));
94	} elsif(!exists($mark{$code_value})) {	97	} elsif (!exists($mark{$code_value}) \|\|
		98	(hex $code_value >= 0x3040 && hex $code_value <= 0x30ff)) {
95	push(@decomposition, $code_value);	99	push(@decomposition, $code_value);
96	}	100	}
97	}	101	}
98	if(@decomposition) {	102	if(@decomposition) {
99	$decomposition{$from} = "@decomposition";	103	$decomposition{$from} = "@decomposition";