Switch to unified view

a/src/common/unacpp.cpp b/src/common/unacpp.cpp
...
...
61
    if (cout)
61
    if (cout)
62
    free(cout);
62
    free(cout);
63
    return true;
63
    return true;
64
}
64
}
65
65
66
// Functions to determine upper-case or accented status could be implemented
67
// hugely more efficiently inside the unac c code, but there only used for
68
// testing user-entered terms, so we don't really care.
66
bool unaciscapital(const string& in)
69
bool unaciscapital(const string& in)
67
{
70
{
71
    LOGDEB2(("unaciscapital: [%s]\n", in.c_str()));
68
    if (in.empty())
72
    if (in.empty())
69
    return false;
73
    return false;
70
    Utf8Iter it(in);
74
    Utf8Iter it(in);
71
    string shorter;
75
    string shorter;
72
    it.appendchartostring(shorter);
76
    it.appendchartostring(shorter);
73
77
74
    string noacterm, noaclowterm;
78
    string lower;
75
    if (!unacmaybefold(shorter, noacterm, "UTF-8", UNACOP_UNAC)) {
79
    if (!unacmaybefold(shorter, lower, "UTF-8", UNACOP_FOLD)) {
76
    LOGINFO(("unaciscapital: unac failed for [%s]\n", in.c_str()));
80
    LOGINFO(("unaciscapital: unac/fold failed for [%s]\n", in.c_str()));
77
    return false;
81
    return false;
78
    } 
82
    } 
79
    if (!unacmaybefold(noacterm, noaclowterm, "UTF-8", UNACOP_UNACFOLD)) {
80
  LOGINFO(("unaciscapital: unacfold failed for [%s]\n", in.c_str()));
81
  return false;
82
    }
83
    Utf8Iter it1(noacterm);
83
    Utf8Iter it1(lower);
84
    Utf8Iter it2(noaclowterm);
85
    if (*it1 != *it2)
84
    if (*it != *it1)
85
  return true;
86
    else
87
  return false;
88
}
89
bool unachasuppercase(const string& in)
90
{
91
    LOGDEB2(("unachasuppercase: [%s]\n", in.c_str()));
92
    if (in.empty())
93
  return false;
94
95
    string lower;
96
    if (!unacmaybefold(in, lower, "UTF-8", UNACOP_FOLD)) {
97
  LOGINFO(("unachasuppercase: unac/fold failed for [%s]\n", in.c_str()));
98
  return false;
99
    } 
100
    if (lower != in)
101
  return true;
102
    else
103
  return false;
104
}
105
bool unachasaccents(const string& in)
106
{
107
    LOGDEB2(("unachasaccents: [%s]\n", in.c_str()));
108
    if (in.empty())
109
  return false;
110
111
    string noac;
112
    if (!unacmaybefold(in, noac, "UTF-8", UNACOP_UNAC)) {
113
  LOGINFO(("unachasaccents: unac/unac failed for [%s]\n", in.c_str()));
114
  return false;
115
    } 
116
    if (noac != in)
86
    return true;
117
    return true;
87
    else
118
    else
88
    return false;
119
    return false;
89
}
120
}
90
121
...
...
105
136
106
static char *thisprog;
137
static char *thisprog;
107
138
108
static char usage [] = "\n"
139
static char usage [] = "\n"
109
    "[-c|-C] <encoding> <infile> <outfile>\n"
140
    "[-c|-C] <encoding> <infile> <outfile>\n"
110
    " Default : unaccent\n"
141
    "   Default : unaccent\n"
111
    " -c : unaccent and casefold\n"
142
    "   -c : unaccent and casefold\n"
112
    " -C : casefold only\n"
143
    "   -C : casefold only\n"
144
    "-t <string> test string as capitalized, upper-case anywhere, accents\n"
145
    "   the parameter is supposedly utf-8 so this can only work in an utf-8\n"
146
    "   locale\n"
113
    "\n";
147
    "\n";
114
115
;
148
;
149
116
static void
150
static void
117
Usage(void)
151
Usage(void)
118
{
152
{
119
    fprintf(stderr, "%s: usage: %s\n", thisprog, usage);
153
    fprintf(stderr, "%s: usage: %s\n", thisprog, usage);
120
    exit(1);
154
    exit(1);
121
}
155
}
122
156
123
static int     op_flags;
157
static int     op_flags;
124
#define OPT_c     0x2 
158
#define OPT_c     0x2 
125
#define OPT_C     0x4 
159
#define OPT_C     0x4 
160
#define OPT_t     0x8
126
161
127
int main(int argc, char **argv)
162
int main(int argc, char **argv)
128
{
163
{
129
    UnacOp op = UNACOP_UNAC;
164
    UnacOp op = UNACOP_UNAC;
130
165
...
...
138
        Usage();
173
        Usage();
139
    while (**argv)
174
    while (**argv)
140
        switch (*(*argv)++) {
175
        switch (*(*argv)++) {
141
        case 'c':   op_flags |= OPT_c; break;
176
        case 'c':   op_flags |= OPT_c; break;
142
        case 'C':   op_flags |= OPT_C; break;
177
        case 'C':   op_flags |= OPT_C; break;
178
      case 't':   op_flags |= OPT_t; break;
143
        default: Usage();   break;
179
        default: Usage();   break;
144
        }
180
        }
145
    argc--; argv++;
181
    argc--; argv++;
146
    }
182
    }
147
183
148
    if (op_flags & OPT_c) {
184
    if (op_flags & OPT_t) {
185
  if (argc != 1)
186
      Usage();
187
  string in = *argv++;argc--;
188
  bool capital, upper, accent;
189
  capital = unaciscapital(in);
190
  upper = unachasuppercase(in);
191
  accent = unachasaccents(in);
192
  cout << "[" << in << "] : " << 
193
      "capitalized: " << (capital ? "Yes. " : "No. ") <<
194
      "has uppercase: " << (upper ? "Yes. " : "No. ") <<
195
      "has accents: " << (accent ? "Yes. " : "No. ") << 
196
      endl;
197
  return 0;
198
    } else {
199
  if (argc != 3)
200
      Usage();
201
  if (op_flags & OPT_c) {
149
    op = UNACOP_UNACFOLD;
202
        op = UNACOP_UNACFOLD;
150
    } else if (op_flags & OPT_C) {
203
  } else if (op_flags & OPT_C) {
151
    op = UNACOP_FOLD;
204
        op = UNACOP_FOLD;
152
    }
205
  }
153
206
154
    if (argc != 3) {
155
  Usage();
156
    }
157
158
    const char *encoding = *argv++; argc--;
207
  const char *encoding = *argv++; argc--;
159
    string ifn = *argv++; argc--;
208
  string ifn = *argv++; argc--;
160
    if (!ifn.compare("stdin"))
209
  if (!ifn.compare("stdin"))
161
    ifn.clear();
210
        ifn.clear();
162
    const char *ofn = *argv++; argc--;
211
  const char *ofn = *argv++; argc--;
163
212
164
    string reason;
213
  string reason;
165
    (void)recollinit(RCLINIT_NONE, 0, 0, reason, 0);
214
  (void)recollinit(RCLINIT_NONE, 0, 0, reason, 0);
166
215
167
    string odata;
216
  string odata;
168
    if (!file_to_string(ifn, odata)) {
217
  if (!file_to_string(ifn, odata)) {
169
    cerr << "file_to_string " << ifn << " : " << odata << endl;
218
        cerr << "file_to_string " << ifn << " : " << odata << endl;
170
    return 1;
219
        return 1;
171
    }
220
  }
172
    string ndata;
221
  string ndata;
173
    if (!unacmaybefold(odata, ndata, encoding, op)) {
222
  if (!unacmaybefold(odata, ndata, encoding, op)) {
174
    cerr << "unac: " << ndata << endl;
223
        cerr << "unac: " << ndata << endl;
175
    return 1;
224
        return 1;
176
    }
225
  }
177
    
226
    
178
    int fd;
227
  int fd;
179
    if (strcmp(ofn, "stdout")) {
228
  if (strcmp(ofn, "stdout")) {
180
    fd = open(ofn, O_CREAT|O_EXCL|O_WRONLY, 0666);
229
        fd = open(ofn, O_CREAT|O_EXCL|O_WRONLY, 0666);
181
    } else {
230
  } else {
182
    fd = 1;
231
        fd = 1;
183
    }
232
  }
184
    if (fd < 0) {
233
  if (fd < 0) {
185
    cerr << "Open/Create " << ofn << " failed: " << strerror(errno) 
234
        cerr << "Open/Create " << ofn << " failed: " << strerror(errno) 
186
         << endl;
235
       << endl;
187
    return 1;
236
        return 1;
188
    }
237
  }
189
    if (write(fd, ndata.c_str(), ndata.length()) != (int)ndata.length()) {
238
  if (write(fd, ndata.c_str(), ndata.length()) != (int)ndata.length()) {
190
    cerr << "Write(2) failed: " << strerror(errno)  << endl;
239
        cerr << "Write(2) failed: " << strerror(errno)  << endl;
240
      return 1;
241
  }
242
  close(fd);
191
    return 1;
243
    return 0;
192
    }
244
    }
193
    close(fd);
194
    return 0;
195
}
245
}
196
246
197
#endif
247
#endif