|
a/src/utils/utf8iter.cpp |
|
b/src/utils/utf8iter.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: utf8iter.cpp,v 1.5 2006-11-20 11:16:54 dockes Exp $ (C) 2005 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: utf8iter.cpp,v 1.6 2007-12-04 10:16:27 dockes Exp $ (C) 2005 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
33 |
|
33 |
|
34 |
#define UTF8ITER_CHECK
|
34 |
#define UTF8ITER_CHECK
|
35 |
#include "utf8iter.h"
|
35 |
#include "utf8iter.h"
|
36 |
#include "readfile.h"
|
36 |
#include "readfile.h"
|
37 |
|
37 |
|
|
|
38 |
const char *thisprog;
|
|
|
39 |
static char usage [] =
|
|
|
40 |
"utf8iter [opts] infile outfile\n"
|
|
|
41 |
" converts infile to 32 bits unicode (processor order), for testing\n"
|
|
|
42 |
"-v : print stuff as we go\n"
|
|
|
43 |
;
|
|
|
44 |
|
|
|
45 |
void Usage() {
|
|
|
46 |
fprintf(stderr, "%s:%s\n", thisprog, usage);
|
|
|
47 |
exit(1);
|
|
|
48 |
}
|
|
|
49 |
static int op_flags;
|
|
|
50 |
#define OPT_v 0x2
|
|
|
51 |
|
38 |
int main(int argc, char **argv)
|
52 |
int main(int argc, char **argv)
|
39 |
{
|
53 |
{
|
40 |
if (argc != 3) {
|
54 |
thisprog = argv[0];
|
41 |
cerr << "Usage: utf8iter infile outfile" << endl;
|
55 |
argc--; argv++;
|
42 |
exit(1);
|
56 |
|
|
|
57 |
while (argc > 0 && **argv == '-') {
|
|
|
58 |
(*argv)++;
|
|
|
59 |
if (!(**argv))
|
|
|
60 |
Usage();
|
|
|
61 |
while (**argv)
|
|
|
62 |
switch (*(*argv)++) {
|
|
|
63 |
case 'v': op_flags |= OPT_v; break;
|
|
|
64 |
|
|
|
65 |
default: Usage(); break;
|
|
|
66 |
}
|
|
|
67 |
argc--;argv++;
|
43 |
}
|
68 |
}
|
|
|
69 |
|
|
|
70 |
if (argc != 2) {
|
|
|
71 |
Usage();
|
|
|
72 |
}
|
44 |
const char *infile = argv[1];
|
73 |
const char *infile = *argv++;argc--;
|
45 |
const char *outfile = argv[2];
|
74 |
const char *outfile = *argv++;argc--;
|
46 |
string in;
|
75 |
string in;
|
47 |
if (!file_to_string(infile, in)) {
|
76 |
if (!file_to_string(infile, in)) {
|
48 |
cerr << "Cant read file\n" << endl;
|
77 |
cerr << "Cant read file\n" << endl;
|
49 |
exit(1);
|
78 |
exit(1);
|
50 |
}
|
79 |
}
|
|
... |
|
... |
60 |
|
89 |
|
61 |
int nchars = 0;
|
90 |
int nchars = 0;
|
62 |
for (;!it.eof(); it++) {
|
91 |
for (;!it.eof(); it++) {
|
63 |
unsigned int value = *it;
|
92 |
unsigned int value = *it;
|
64 |
if (value == (unsigned int)-1) {
|
93 |
if (value == (unsigned int)-1) {
|
65 |
fprintf(stderr, "Conversion error occurred\n");
|
94 |
cerr << "Conversion error occurred\n" << endl;
|
66 |
exit(1);
|
95 |
exit(1);
|
|
|
96 |
}
|
|
|
97 |
if (op_flags & OPT_v) {
|
|
|
98 |
printf("Value: 0x%x", value);
|
|
|
99 |
if (value < 0x7f)
|
|
|
100 |
printf(" (%c) ", value);
|
|
|
101 |
printf("\n");
|
67 |
}
|
102 |
}
|
68 |
// UTF-32LE or BE array
|
103 |
// UTF-32LE or BE array
|
69 |
ucsout1.push_back(value);
|
104 |
ucsout1.push_back(value);
|
70 |
// UTF-32LE or BE file
|
105 |
// UTF-32LE or BE file
|
71 |
fwrite(&value, 4, 1, fp);
|
106 |
fwrite(&value, 4, 1, fp);
|
|
... |
|
... |
138 |
fprintf(stderr, "Transcode back to utf-8 compare to in failed\n");
|
173 |
fprintf(stderr, "Transcode back to utf-8 compare to in failed\n");
|
139 |
exit(1);
|
174 |
exit(1);
|
140 |
}
|
175 |
}
|
141 |
exit(0);
|
176 |
exit(0);
|
142 |
}
|
177 |
}
|
143 |
|
|
|