Download this file

gencasefold.sh    122 lines (108 with data), 2.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/bin/sh
###############
## Use awk and gperf to generate casefolding efficient function
awk -F';' \
'
BEGIN {
printf "%%{\n"
printf "// Automatically generated by gencasefold.sh, do not edit\n"
printf "#ifndef TEST_CASECONVERT\n"
printf "%%}\n"
printf "struct mapping { char *name; unsigned short value; };\n\n"
printf("%%%%\n");
}
/^#/{next}
/^$/{next}
{
if (length($1) <= 4 && ($2 ~ "C" || $2 ~ "S")) {
gsub(" ", "", $3);
printf "%s, 0x%s\n", $1, $3
}
#else {printf "T/F/higher plane line: %s\n", $0}
}
' \
< CaseFolding.txt | \
gperf -I -n -LC++ -t > caseconvert.cpp
#############
## Append wrapper function
cat <<EOF >> caseconvert.cpp
#include <stdio.h>
#include <string>
#include "caseconvert.h"
using std::string;
// Input and output must be utf-16
bool ucs2lower(const string &in, string &out)
{
if (in.length() < 2) {
out.erase();
return true;
}
static const char hex[]="0123456789ABCDEF";
char key[5];
key[4] = 0;
for (unsigned int i = 0; i < in.length() - 1; i += 2) {
struct mapping *m;
// Convert 16 bits to 4 hex chars as key
key[0] = hex[(in[i]&0xf0) >> 4];
key[1] = hex[in[i] & 0x0f];
key[2] = hex[(in[i+1]&0xf0) >> 4];
key[3] = hex[in[i+1] & 0x0f];
//fprintf(stderr, "Key: %s\n", key);
if ((m = Perfect_Hash::in_word_set(key, 4)) && m->name[0]) {
#if 0
char sval[50];
sprintf(sval, "%X", (unsigned int)(m->value));
fprintf(stderr, "svalue: %s\n", sval);
#endif
out += char((m->value & 0xff00) >> 16);
out += char(m->value & 0x00ff);
} else
{
out += in[i];
out += in[i+1];
}
}
return true;
}
#else // !TESTING->
#include <errno.h>
#include <string>
#include <iostream>
#include <unistd.h>
#include <fcntl.h>
using namespace std;
#include "readfile.h"
#include "caseconvert.h"
int main(int argc, char **argv)
{
if (argc != 3) {
cerr << "Usage: trcaseconvert ifilename ofilename" << endl;
cerr << "Input and output must be utf16be" << endl;
exit(1);
}
const string ifilename = argv[1];
const string ofilename = argv[2];
string text;
if (!file_to_string(ifilename, text)) {
cerr << "Couldnt read file, errno " << errno << endl;
exit(1);
}
string out;
if (!ucs2lower(text, out)) {
cerr << "ucs2lower failed" << endl;
exit(1);
}
int fd = open(ofilename.c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0666);
if (fd < 0) {
perror("Open/create output");
exit(1);
}
if (write(fd, out.c_str(), out.length()) != (int)out.length()) {
perror("write");
exit(1);
}
close(fd);
exit(0);
}
#endif // TEST_CASEFOLDING
EOF