Switch to unified view

a/unac/builder.in b/unac/builder.in
...
...
71
    #
71
    #
72
    if($character_name =~ /^<(.*), (First|Last)>/) {
72
    if($character_name =~ /^<(.*), (First|Last)>/) {
73
        $ranges{$1}{$2} = $code_value;
73
        $ranges{$1}{$2} = $code_value;
74
    }
74
    }
75
75
76
  # Test for exceptions to unaccenting. Note that this is
77
  # mostly based on blocks when it should use the Unicode
78
  # script property. In practise, for the script concerned,
79
  # this does not look to be an issue currently
80
  # (following comment made for japanese but also concerns
81
  #  other exceptions)
76
    # For kana japanese characters, we don't want to strip accents as I'm
82
    # For kana japanese characters, we don't want to strip
77
  # told that they are essential and stripping them does not
83
  # accents as I'm told that they are essential and
78
  # make sense. Wonder why Unicode does these decompositions
84
  # stripping them does not make sense. 
79
    # then...  Problem: the first solution used was to decompose
85
    # Problem: the first solution used was to decompose the
80
    # the japanese accented kana and not remove accents. But then
86
    # Japanese accented kana and not remove accents. But then
81
    # the unaccented character would match the string with
87
    # the unaccented character would match the string with
82
    # accent. So now we don't decompose at all, but this means
88
    # accent. So now we don't decompose at all, but this means
83
    # that, if the original text was decomposed, things don't work
89
    # that, if the original text was decomposed, things don't
84
    # as intended as we should actually recombine the
90
    # work as intended as we should actually recombine the
85
    # letter+accents in this case for data to be unified.
91
    # letter+accents in this case for data to be unified.
92
93
  # Hiragana + Katakana 
94
  if (!(hex $code_value >= 0x3040 && hex $code_value <= 0x30ff)
95
      # Halfwidth katakana
96
      && !(hex $code_value >= 0xff65 && hex $code_value <= 0xff9f)
97
      #  Hindi Devanagari
98
      && !(hex $code_value >= 0x0900 && hex $code_value <= 0x097f)
99
      && !(hex $code_value >= 0xa8e0 && hex $code_value <= 0xa8ff)
100
      ) {
101
      # If a decomposition exists, record it
86
    if($character_decomposition_mapping =~ /(<.*>)?\s*(.+)/) {
102
        if($character_decomposition_mapping =~ /(<.*>)?\s*(.+)/) {
87
      # Not for Hiragana + Katakana 
88
      if (!(hex $code_value >= 0x3040 && hex $code_value <= 0x30ff) &&
89
      # and Halfwidth katakana
90
      !(hex $code_value >= 0xff65 && hex $code_value <= 0xff9f) ) {
91
        $decomposition{$code_value} = $2;
103
        $decomposition{$code_value} = $2;
92
        }
104
        }
93
  }
94
    if($general_category =~ /^M/) {
105
        if($general_category =~ /^M/) {
95
        $mark{$code_value} = 1;
106
      $mark{$code_value} = 1;
96
            # For mark caracters, we generate a 0 entry in the
107
      # For mark caracters, we generate a 0 entry in the
97
            # decomposition table. This signals to the c code that no
108
      # decomposition table. This signals to the c code that no
98
            # output should be generated. Slightly hacky but ok. The
109
      # output should be generated. Slightly hacky but ok. The
99
            # original code left mark character go through (generating
110
      # original code left mark character go through (generating
100
            # still accented output if the input was in decomposed
111
      # still accented output if the input was in decomposed
101
            # form). Decomposed text is rare, but, for example, macosx file
112
      # form). Decomposed text is rare, but, for example, macosx file
102
            # names have separate combining accent characters.
113
      # names have separate combining accent characters.
103
        $decomposition{$code_value} = "0000";
114
      $decomposition{$code_value} = "0000";
115
      }
104
    }
116
    }
105
    $name{$code_value} = $character_name;
117
    $name{$code_value} = $character_name;
106
    }
118
    }
107
    close(FILE);
119
    close(FILE);
108
    
120