#!/bin/sh
###############
## Use awk and gperf to generate casefolding efficient function
awk -F';' \
'
BEGIN {
printf "%%{\n"
printf "// Automatically generated by gencasefold.sh, do not edit\n"
printf "#ifndef TEST_CASECONVERT\n"
printf "%%}\n"
printf "struct mapping { char *name; unsigned short value; };\n\n"
printf("%%%%\n");
}
/^#/{next}
/^$/{next}
{
if (length($1) <= 4 && ($2 ~ "C" || $2 ~ "S")) {
gsub(" ", "", $3);
printf "%s, 0x%s\n", $1, $3
}
#else {printf "T/F/higher plane line: %s\n", $0}
}
' \
< CaseFolding.txt | \
gperf -I -n -LC++ -t > caseconvert.cpp
#############
## Append wrapper function
cat <<EOF >> caseconvert.cpp
#include <stdio.h>
#include <string>
#include "caseconvert.h"
using std::string;
// Input and output must be utf-16
bool ucs2lower(const string &in, string &out)
{
if (in.length() < 2) {
out.erase();
return true;
}
static const char hex[]="0123456789ABCDEF";
char key[5];
key[4] = 0;
for (unsigned int i = 0; i < in.length() - 1; i += 2) {
struct mapping *m;
// Convert 16 bits to 4 hex chars as key
key[0] = hex[(in[i]&0xf0) >> 4];
key[1] = hex[in[i] & 0x0f];
key[2] = hex[(in[i+1]&0xf0) >> 4];
key[3] = hex[in[i+1] & 0x0f];
//fprintf(stderr, "Key: %s\n", key);
if ((m = Perfect_Hash::in_word_set(key, 4)) && m->name[0]) {
#if 0
char sval[50];
sprintf(sval, "%X", (unsigned int)(m->value));
fprintf(stderr, "svalue: %s\n", sval);
#endif
out += char((m->value & 0xff00) >> 16);
out += char(m->value & 0x00ff);
} else
{
out += in[i];
out += in[i+1];
}
}
return true;
}
#else // !TESTING->
#include <errno.h>
#include <string>
#include <iostream>
#include <unistd.h>
#include <fcntl.h>
using namespace std;
#include "readfile.h"
#include "caseconvert.h"
int main(int argc, char **argv)
{
if (argc != 3) {
cerr << "Usage: trcaseconvert ifilename ofilename" << endl;
cerr << "Input and output must be utf16be" << endl;
exit(1);
}
const string ifilename = argv[1];
const string ofilename = argv[2];
string text;
if (!file_to_string(ifilename, text)) {
cerr << "Couldnt read file, errno " << errno << endl;
exit(1);
}
string out;
if (!ucs2lower(text, out)) {
cerr << "ucs2lower failed" << endl;
exit(1);
}
int fd = open(ofilename.c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0666);
if (fd < 0) {
perror("Open/create output");
exit(1);
}
if (write(fd, out.c_str(), out.length()) != (int)out.length()) {
perror("write");
exit(1);
}
close(fd);
exit(0);
}
#endif // TEST_CASEFOLDING
EOF