Switch to unified view

a/src/utils/utf8iter.cpp b/src/utils/utf8iter.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: utf8iter.cpp,v 1.4 2006-01-23 13:32:28 dockes Exp $ (C) 2005 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: utf8iter.cpp,v 1.5 2006-11-20 11:16:54 dockes Exp $ (C) 2005 J.F.Dockes";
3
#endif
3
#endif
4
/*
4
/*
5
 *   This program is free software; you can redistribute it and/or modify
5
 *   This program is free software; you can redistribute it and/or modify
6
 *   it under the terms of the GNU General Public License as published by
6
 *   it under the terms of the GNU General Public License as published by
7
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   the Free Software Foundation; either version 2 of the License, or
...
...
20
#include <stdio.h>
20
#include <stdio.h>
21
#include <string>
21
#include <string>
22
#include <iostream>
22
#include <iostream>
23
#include <list>
23
#include <list>
24
#include <vector>
24
#include <vector>
25
26
25
#include "debuglog.h"
27
#include "debuglog.h"
28
#include "transcode.h"
29
26
#ifndef NO_NAMESPACES
30
#ifndef NO_NAMESPACES
27
using namespace std;
31
using namespace std;
28
#endif /* NO_NAMESPACES */
32
#endif /* NO_NAMESPACES */
29
33
34
#define UTF8ITER_CHECK
30
#include "utf8iter.h"
35
#include "utf8iter.h"
31
#include "readfile.h"
36
#include "readfile.h"
32
33
34
37
35
int main(int argc, char **argv)
38
int main(int argc, char **argv)
36
{
39
{
37
    if (argc != 3) {
40
    if (argc != 3) {
38
    cerr << "Usage: utf8iter infile outfile" << endl;
41
    cerr << "Usage: utf8iter infile outfile" << endl;
...
...
52
    FILE *fp = fopen(outfile, "w");
55
    FILE *fp = fopen(outfile, "w");
53
    if (fp == 0) {
56
    if (fp == 0) {
54
    fprintf(stderr, "cant create %s\n", outfile);
57
    fprintf(stderr, "cant create %s\n", outfile);
55
    exit(1);
58
    exit(1);
56
    }
59
    }
60
57
    int nchars = 0;
61
    int nchars = 0;
58
    for (;!it.eof(); it++) {
62
    for (;!it.eof(); it++) {
59
    unsigned int value = *it;
63
    unsigned int value = *it;
60
    if (value == (unsigned int)-1) {
64
    if (value == (unsigned int)-1) {
61
        fprintf(stderr, "Conversion error occurred\n");
65
        fprintf(stderr, "Conversion error occurred\n");
62
        exit(1);
66
        exit(1);
63
    }
67
    }
68
  // UTF-32LE or BE array
64
    ucsout1.push_back(value);
69
    ucsout1.push_back(value);
70
  // UTF-32LE or BE file
65
    fwrite(&value, 4, 1, fp);
71
    fwrite(&value, 4, 1, fp);
72
73
  // Reconstructed utf8 strings (2 methods)
66
    if (!it.appendchartostring(out))
74
    if (!it.appendchartostring(out))
67
        break;
75
        break;
76
  // conversion to string
68
    out1 += it;
77
    out1 += it;
78
  
79
  // fprintf(stderr, "%s", string(it).c_str());
69
    nchars++;
80
    nchars++;
70
    }
81
    }
82
    fclose(fp);
83
71
    fprintf(stderr, "nchars1 %d\n", nchars);
84
    fprintf(stderr, "nchars %d\n", nchars);
72
    if (in != out) {
85
    if (in.compare(out)) {
73
    fprintf(stderr, "error: out != in\n");
86
    fprintf(stderr, "error: out != in\n");
74
    exit(1);
87
    exit(1);
75
    }
88
    }
76
    if (in != out1) {
89
    if (in != out1) {
77
    fprintf(stderr, "error: out1 != in\n");
90
    fprintf(stderr, "error: out1 != in\n");
78
    exit(1);
91
    exit(1);
79
    }
92
    }
80
93
94
    // Rewind and do it a second time
81
    vector<unsigned int>ucsout2;
95
    vector<unsigned int>ucsout2;
82
    it.rewind();
96
    it.rewind();
83
    for (int i = 0; ; i++) {
97
    for (int i = 0; ; i++) {
84
    unsigned int value;
98
    unsigned int value;
85
    if ((value = it[i]) == (unsigned int)-1) {
99
    if ((value = it[i]) == (unsigned int)-1) {
...
...
93
    if (ucsout1 != ucsout2) {
107
    if (ucsout1 != ucsout2) {
94
    fprintf(stderr, "error: ucsout1 != ucsout2\n");
108
    fprintf(stderr, "error: ucsout1 != ucsout2\n");
95
    exit(1);
109
    exit(1);
96
    }
110
    }
97
111
98
    fclose(fp);
112
    ucsout2.clear();
113
    int ercnt;
114
    const char *encoding = "UTF-32LE"; // note : use BE on high-endian machine
115
    string ucs, ucs1;
116
    for (vector<unsigned int>::iterator it = ucsout1.begin(); 
117
   it != ucsout1.end(); it++) {
118
  unsigned int i = *it;
119
  ucs.append((const char *)&i, 4);
120
    }
121
    if (!transcode(ucs, ucs1, 
122
         encoding, encoding, &ercnt) || ercnt) {
123
  fprintf(stderr, "Transcode check failed, ercount: %d\n", ercnt);
124
  exit(1);
125
    }
126
    if (ucs.compare(ucs1)) {
127
  fprintf(stderr, "error: ucsout1 != ucsout2 after iconv\n");
128
  exit(1);
129
    }
130
131
    if (!transcode(ucs, ucs1, 
132
         encoding, "UTF-8", &ercnt) || ercnt) {
133
  fprintf(stderr, "Transcode back to utf-8 check failed, ercount: %d\n",
134
      ercnt);
135
  exit(1);
136
    }
137
    if (ucs1.compare(in)) {
138
  fprintf(stderr, "Transcode back to utf-8 compare to in failed\n");
139
  exit(1);
140
    }
99
    exit(0);
141
    exit(0);
100
}
142
}
101
143