|
a/src/utils/utf8iter.cpp |
|
b/src/utils/utf8iter.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: utf8iter.cpp,v 1.1 2005-02-10 19:52:50 dockes Exp $ (C) 2005 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: utf8iter.cpp,v 1.2 2005-02-11 11:20:02 dockes Exp $ (C) 2005 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
#include <stdio.h>
|
4 |
#include <stdio.h>
|
5 |
#include <string>
|
5 |
#include <string>
|
6 |
#include <iostream>
|
6 |
#include <iostream>
|
7 |
#include <list>
|
7 |
#include <list>
|
|
|
8 |
#include <vector>
|
8 |
#include "debuglog.h"
|
9 |
#include "debuglog.h"
|
9 |
using namespace std;
|
10 |
using namespace std;
|
10 |
|
11 |
|
11 |
#include "utf8iter.h"
|
12 |
#include "utf8iter.h"
|
12 |
#include "readfile.h"
|
13 |
#include "readfile.h"
|
|
... |
|
... |
20 |
exit(1);
|
21 |
exit(1);
|
21 |
}
|
22 |
}
|
22 |
const char *infile = argv[1];
|
23 |
const char *infile = argv[1];
|
23 |
const char *outfile = argv[2];
|
24 |
const char *outfile = argv[2];
|
24 |
string in;
|
25 |
string in;
|
25 |
string out;
|
|
|
26 |
if (!file_to_string(infile, in)) {
|
26 |
if (!file_to_string(infile, in)) {
|
27 |
cerr << "Cant read file\n" << endl;
|
27 |
cerr << "Cant read file\n" << endl;
|
28 |
exit(1);
|
28 |
exit(1);
|
29 |
}
|
29 |
}
|
|
|
30 |
|
|
|
31 |
vector<unsigned int>ucsout1;
|
|
|
32 |
string out, out1;
|
30 |
Utf8Iter it(in);
|
33 |
Utf8Iter it(in);
|
31 |
FILE *fp = fopen(outfile, "w");
|
34 |
FILE *fp = fopen(outfile, "w");
|
32 |
if (fp == 0) {
|
35 |
if (fp == 0) {
|
33 |
fprintf(stderr, "cant create %s\n", outfile);
|
36 |
fprintf(stderr, "cant create %s\n", outfile);
|
34 |
exit(1);
|
37 |
exit(1);
|
35 |
}
|
38 |
}
|
36 |
while (!it.eof()) {
|
39 |
int nchars = 0;
|
|
|
40 |
for (;!it.eof(); it++) {
|
37 |
unsigned int value = *it;
|
41 |
unsigned int value = *it;
|
38 |
it.appendchartostring(out);
|
42 |
if (value == (unsigned int)-1) {
|
39 |
it++;
|
43 |
fprintf(stderr, "Conversion error occurred\n");
|
|
|
44 |
exit(1);
|
|
|
45 |
}
|
|
|
46 |
ucsout1.push_back(value);
|
40 |
fwrite(&value, 4, 1, fp);
|
47 |
fwrite(&value, 4, 1, fp);
|
|
|
48 |
if (!it.appendchartostring(out))
|
|
|
49 |
break;
|
|
|
50 |
out1 += it;
|
|
|
51 |
nchars++;
|
41 |
}
|
52 |
}
|
42 |
fclose(fp);
|
53 |
fprintf(stderr, "nchars1 %d\n", nchars);
|
43 |
if (it.error()) {
|
|
|
44 |
fprintf(stderr, "Conversion error occurred\n");
|
|
|
45 |
exit(1);
|
|
|
46 |
}
|
|
|
47 |
if (in != out) {
|
54 |
if (in != out) {
|
48 |
fprintf(stderr, "error: out != in\n");
|
55 |
fprintf(stderr, "error: out != in\n");
|
49 |
exit(1);
|
56 |
exit(1);
|
50 |
}
|
57 |
}
|
|
|
58 |
if (in != out1) {
|
|
|
59 |
fprintf(stderr, "error: out1 != in\n");
|
|
|
60 |
exit(1);
|
|
|
61 |
}
|
|
|
62 |
|
|
|
63 |
vector<unsigned int>ucsout2;
|
|
|
64 |
it.rewind();
|
|
|
65 |
for (int i = 0; ; i++) {
|
|
|
66 |
unsigned int value;
|
|
|
67 |
if ((value = it[i]) == (unsigned int)-1) {
|
|
|
68 |
fprintf(stderr, "%d chars\n", i);
|
|
|
69 |
break;
|
|
|
70 |
}
|
|
|
71 |
it++;
|
|
|
72 |
ucsout2.push_back(value);
|
|
|
73 |
}
|
|
|
74 |
|
|
|
75 |
if (ucsout1 != ucsout2) {
|
|
|
76 |
fprintf(stderr, "error: ucsout1 != ucsout2\n");
|
|
|
77 |
exit(1);
|
|
|
78 |
}
|
|
|
79 |
|
|
|
80 |
fclose(fp);
|
51 |
exit(0);
|
81 |
exit(0);
|
52 |
}
|
82 |
}
|
53 |
|
83 |
|