Switch to unified view

a/unac/unac.c b/unac/unac.c
...
...
14
 * You should have received a copy of the GNU General Public License
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program; if not, write to the Free Software
15
 * along with this program; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17
 */
17
 */
18
18
19
#ifdef HAVE_CONFIG_H
19
#ifdef BUILDING_RECOLL
20
#ifdef RECOLL_DATADIR
21
#include "autoconfig.h"
20
#include "autoconfig.h"
22
#else
21
#else
23
#include "config.h"
22
#include "config.h"
24
#endif /* RECOLL */
23
#endif /* RECOLL */
25
#endif /* HAVE_CONFIG_H */
26
24
27
#ifdef RECOLL_DATADIR
25
#ifdef BUILDING_RECOLL
28
/* Yes, recoll unac is actually c++, lets face modernity, I will not be
26
/* Yes, recoll unac is actually c++, lets face modernity, I will not be
29
   caught writing another binary search  */
27
   caught writing another binary search  */
30
#include <vector>
28
#include <vector>
31
#include <map>
29
#include <map>
32
#include <string>
30
#include <string>
33
#include <algorithm>
31
#include <algorithm>
34
#include "unordered_defs.h"
32
#include <iostream>
33
#include UNORDERED_MAP_INCLUDE
34
35
using std::string;
35
using std::string;
36
36
37
#include "smallut.h"
37
#include "smallut.h"
38
38
39
/* 
39
/* 
...
...
50
    if (it == except_trans.end())
50
    if (it == except_trans.end())
51
    return false;
51
    return false;
52
    trans = it->second;
52
    trans = it->second;
53
    return true;
53
    return true;
54
}
54
}
55
#endif /* RECOLL_DATADIR */
55
#endif /* BUILDING_RECOLL*/
56
56
57
/*
57
/*
58
 * If configure.in has not defined this symbol, assume const. It
58
 * If configure.in has not defined this symbol, assume const. It
59
 * does not harm much: a warning will be issued during compilation.
59
 * does not harm much: a warning will be issued during compilation.
60
 */
60
 */
...
...
14168
14168
14169
int unacmaybefold_string_utf16(const char* in, size_t in_length,
14169
int unacmaybefold_string_utf16(const char* in, size_t in_length,
14170
                   char** outp, size_t* out_lengthp, int what)
14170
                   char** outp, size_t* out_lengthp, int what)
14171
{
14171
{
14172
  char* out;
14172
  char* out;
14173
  int out_size;
14173
  size_t out_size;
14174
  int out_length;
14174
  size_t out_length;
14175
  unsigned int i;
14175
  size_t i;
14176
14176
14177
  out_size = in_length > 0 ? in_length : 1024;
14177
  out_size = in_length > 0 ? in_length : 1024;
14178
14178
14179
  out = *outp;
14179
  out = *outp;
14180
  out = (char*)realloc(out, out_size + 1);
14180
  out = (char*)realloc(out, out_size + 1);
...
...
14188
  out_length = 0;
14188
  out_length = 0;
14189
14189
14190
  for(i = 0; i < in_length; i += 2) {
14190
  for(i = 0; i < in_length; i += 2) {
14191
    unsigned short c;
14191
    unsigned short c;
14192
    unsigned short* p;
14192
    unsigned short* p;
14193
    int l;
14193
    size_t l;
14194
    int k;
14194
    size_t k;
14195
    c = (in[i] << 8) | (in[i + 1] & 0xff);
14195
    c = (in[i] << 8) | (in[i + 1] & 0xff);
14196
    /*
14196
    /*
14197
     * Lookup the tables for decomposition information
14197
     * Lookup the tables for decomposition information
14198
     */
14198
     */
14199
#ifdef RECOLL_DATADIR
14199
#ifdef BUILDING_RECOLL
14200
    // Exception unac/fold values set by user. There should be 3 arrays for
14200
    // Exception unac/fold values set by user. There should be 3 arrays for
14201
    // unac/fold/unac+fold. For now there is only one array, which used to
14201
    // unac/fold/unac+fold. For now there is only one array, which used to
14202
    // be set for unac+fold, and is mostly or only used to prevent diacritics
14202
    // be set for unac+fold, and is mostly or only used to prevent diacritics
14203
    // removal for some chars and languages where it should not be done.
14203
    // removal for some chars and languages where it should not be done.
14204
    // In conformance with current usage, but incorrectly, we do the following
14204
    // In conformance with current usage, but incorrectly, we do the following
...
...
14217
        // Has to be UNAC_UNACFOLD: use table
14217
        // Has to be UNAC_UNACFOLD: use table
14218
        p = (unsigned short *)trans.c_str();
14218
        p = (unsigned short *)trans.c_str();
14219
        l = trans.size() / 2;
14219
        l = trans.size() / 2;
14220
    }
14220
    }
14221
    } else {
14221
    } else {
14222
#endif /* RECOLL_DATADIR */
14222
#endif /* BUILDING_RECOLL */
14223
    unac_uf_char_utf16_(c, p, l, what)
14223
    unac_uf_char_utf16_(c, p, l, what)
14224
#ifdef RECOLL_DATADIR
14224
#ifdef BUILDING_RECOLL
14225
    }
14225
    }
14226
#endif /* RECOLL_DATADIR */
14226
#endif /* BUILDING_RECOLL */
14227
14227
14228
    /*
14228
    /*
14229
     * Explain what's done in great detail
14229
     * Explain what's done in great detail
14230
     */
14230
     */
14231
    if(debug_level == UNAC_DEBUG_HIGH) {
14231
    if(debug_level == UNAC_DEBUG_HIGH) {
...
...
14234
      DEBUG("unac_data%d[%d] & unac_positions[%d][%d]: ", index, unac_positions[index][position], index, position+1);
14234
      DEBUG("unac_data%d[%d] & unac_positions[%d][%d]: ", index, unac_positions[index][position], index, position+1);
14235
      DEBUG_APPEND("0x%04x => ", (c));
14235
      DEBUG_APPEND("0x%04x => ", (c));
14236
      if(l == 0) {
14236
      if(l == 0) {
14237
    DEBUG_APPEND("untouched\n");
14237
    DEBUG_APPEND("untouched\n");
14238
      } else {
14238
      } else {
14239
  int i;
14239
  size_t i;
14240
    for(i = 0; i < l; i++)
14240
    for(i = 0; i < l; i++)
14241
      DEBUG_APPEND("0x%04x ", p[i]);
14241
      DEBUG_APPEND("0x%04x ", p[i]);
14242
    DEBUG_APPEND("\n");
14242
    DEBUG_APPEND("\n");
14243
      }
14243
      }
14244
    }
14244
    }
...
...
14434
     */
14434
     */
14435
    if(from_utf16) {
14435
    if(from_utf16) {
14436
      const char* tmp = space;
14436
      const char* tmp = space;
14437
      size_t tmp_length = 2;
14437
      size_t tmp_length = 2;
14438
      if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
14438
      if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
14439
      if(errno == E2BIG)
14439
              if(errno == E2BIG) {
14440
          /* fall thru to the E2BIG case below */;
14440
          /* fall thru to the E2BIG case below */;
14441
      else
14441
              } else {
14442
        goto out;
14442
                  goto out;
14443
              }
14443
      } else {
14444
      } else {
14444
        /* The offending character was replaced by a SPACE, skip it. */
14445
        /* The offending character was replaced by a SPACE, skip it. */
14445
        in += 2;
14446
        in += 2;
14446
        in_length -= 2;
14447
        in_length -= 2;
14447
        /* And continue conversion. */
14448
        /* And continue conversion. */
...
...
14453
      case E2BIG:
14454
      case E2BIG:
14454
    {
14455
    {
14455
      /*
14456
      /*
14456
       * The output does not fit in the current out buffer, enlarge it.
14457
       * The output does not fit in the current out buffer, enlarge it.
14457
       */
14458
       */
14458
      int length = out - out_base;
14459
      size_t length = out - out_base;
14459
      out_size *= 2;
14460
      out_size *= 2;
14460
      {
14461
      {
14461
          char *saved = out_base;
14462
          char *saved = out_base;
14462
          /* +1 for null */
14463
          /* +1 for null */
14463
          out_base = (char *)realloc(out_base, out_size + 1);
14464
          out_base = (char *)realloc(out_base, out_size + 1);
...
...
14559
const char* unac_version(void)
14560
const char* unac_version(void)
14560
{
14561
{
14561
  return UNAC_VERSION;
14562
  return UNAC_VERSION;
14562
}
14563
}
14563
14564
14564
#ifdef RECOLL_DATADIR
14565
#ifdef BUILDING_RECOLL
14565
void unac_set_except_translations(const char *spectrans)
14566
void unac_set_except_translations(const char *spectrans)
14566
{
14567
{
14567
    except_trans.clear();
14568
    except_trans.clear();
14568
    if (!spectrans || !spectrans[0])
14569
    if (!spectrans || !spectrans[0])
14569
    return;
14570
    return;
...
...
14612
14613
14613
    except_trans[ch] = string((const char *)(out + 2), outsize-2);
14614
    except_trans[ch] = string((const char *)(out + 2), outsize-2);
14614
    free(out);
14615
    free(out);
14615
    }
14616
    }
14616
}
14617
}
14617
#endif /* RECOLL_DATADIR */
14618
#endif /* BUILDING_RECOLL */