Switch to unified view

a b/upmpd/base64.cxx
1
/* Copyright (C) 2005 J.F.Dockes
2
 *   This program is free software; you can redistribute it and/or modify
3
 *   it under the terms of the GNU General Public License as published by
4
 *   the Free Software Foundation; either version 2 of the License, or
5
 *   (at your option) any later version.
6
 *
7
 *   This program is distributed in the hope that it will be useful,
8
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 *   GNU General Public License for more details.
11
 *
12
 *   You should have received a copy of the GNU General Public License
13
 *   along with this program; if not, write to the
14
 *   Free Software Foundation, Inc.,
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
16
 */
17
#include <stdio.h>
18
19
#include <sys/types.h>
20
#include <cstring>
21
#include <string>
22
using std::string;
23
24
#include "base64.hxx"
25
26
#undef DEBUG_BASE64 
27
#ifdef DEBUG_BASE64
28
#define DPRINT(X) fprintf X
29
#else
30
#define DPRINT(X)
31
#endif
32
33
// This is adapted from FreeBSD's code, quite modified for performance.
34
// Tests on a Mac pro 2.1G with a 166MB base64 file
35
//
36
// The original version used strchr to lookup the base64 value from
37
// the input code:
38
//   real    0m13.053s user  0m12.574s sys   0m0.471s
39
// Using a direct access, 256 entries table:
40
//   real    0m3.073s user   0m2.600s sys    0m0.439s
41
// Using a variable to hold the array length (instead of in.length()):
42
//   real    0m2.972s user   0m2.527s sys    0m0.433s
43
// Using values from the table instead of isspace() (final)
44
//   real    0m2.513s user   0m2.059s sys    0m0.439s
45
//
46
// The table has one entry per char value (0-256). Invalid base64
47
// chars take value 256, whitespace 255, Pad ('=') 254. 
48
// Valid char points contain their base64 value (0-63) 
49
static const int b64values[] = {
50
/* 0 */ 256,/* 1 */ 256,/* 2 */ 256,/* 3 */ 256,/* 4 */ 256,
51
/* 5 */ 256,/* 6 */ 256,/* 7 */ 256,/* 8 */ 256,
52
/*9 ht */ 255,/* 10 nl */ 255,/* 11 vt */ 255,/* 12 np/ff*/ 255,/* 13 cr */ 255,
53
/* 14 */ 256,/* 15 */ 256,/* 16 */ 256,/* 17 */ 256,/* 18 */ 256,/* 19 */ 256,
54
/* 20 */ 256,/* 21 */ 256,/* 22 */ 256,/* 23 */ 256,/* 24 */ 256,/* 25 */ 256,
55
/* 26 */ 256,/* 27 */ 256,/* 28 */ 256,/* 29 */ 256,/* 30 */ 256,/* 31 */ 256,
56
/* 32 sp  */ 255,
57
/* ! */ 256,/* " */ 256,/* # */ 256,/* $ */ 256,/* % */ 256,
58
/* & */ 256,/* ' */ 256,/* ( */ 256,/* ) */ 256,/* * */ 256,
59
/* + */ 62,
60
/* , */ 256,/* - */ 256,/* . */ 256,
61
/* / */ 63,
62
/* 0 */ 52,/* 1 */ 53,/* 2 */ 54,/* 3 */ 55,/* 4 */ 56,/* 5 */ 57,/* 6 */ 58,
63
/* 7 */ 59,/* 8 */ 60,/* 9 */ 61,
64
/* : */ 256,/* ; */ 256,/* < */ 256,
65
/* = */ 254,
66
/* > */ 256,/* ? */ 256,/* @ */ 256,
67
/* A */ 0,/* B */ 1,/* C */ 2,/* D */ 3,/* E */ 4,/* F */ 5,/* G */ 6,/* H */ 7,
68
/* I */ 8,/* J */ 9,/* K */ 10,/* L */ 11,/* M */ 12,/* N */ 13,/* O */ 14,
69
/* P */ 15,/* Q */ 16,/* R */ 17,/* S */ 18,/* T */ 19,/* U */ 20,/* V */ 21,
70
/* W */ 22,/* X */ 23,/* Y */ 24,/* Z */ 25,
71
/* [ */ 256,/* \ */ 256,/* ] */ 256,/* ^ */ 256,/* _ */ 256,/* ` */ 256,
72
/* a */ 26,/* b */ 27,/* c */ 28,/* d */ 29,/* e */ 30,/* f */ 31,/* g */ 32,
73
/* h */ 33,/* i */ 34,/* j */ 35,/* k */ 36,/* l */ 37,/* m */ 38,/* n */ 39,
74
/* o */ 40,/* p */ 41,/* q */ 42,/* r */ 43,/* s */ 44,/* t */ 45,/* u */ 46,
75
/* v */ 47,/* w */ 48,/* x */ 49,/* y */ 50,/* z */ 51,
76
/* { */ 256,/* | */ 256,/* } */ 256,/* ~ */ 256,
77
256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
78
256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
79
256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
80
256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
81
256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
82
256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
83
256,256,256,256,256,256,256,256,
84
};
85
static const char Base64[] =
86
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
87
static const char Pad64 = '=';
88
89
bool base64_decode(const string& in, string& out)
90
{
91
    int io = 0, state = 0, ch = 0;
92
    unsigned int ii = 0;
93
    out.clear();
94
    size_t ilen = in.length();
95
    out.reserve(ilen);
96
97
    for (ii = 0; ii < ilen; ii++) {
98
  ch = (unsigned char)in[ii];
99
  int value = b64values[ch];
100
101
  if (value == 255)        /* Skip whitespace anywhere. */
102
      continue;
103
  if (ch == Pad64)
104
      break;
105
  if (value == 256) {
106
      /* A non-base64 character. */
107
      DPRINT((stderr, "base64_dec: non-base64 char at pos %d\n", ii));
108
      return false;
109
  }
110
111
  switch (state) {
112
  case 0:
113
      out += value << 2;
114
      state = 1;
115
      break;
116
  case 1:
117
      out[io]   |=  value >> 4;
118
      out += (value & 0x0f) << 4 ;
119
      io++;
120
      state = 2;
121
      break;
122
  case 2:
123
      out[io]   |=  value >> 2;
124
      out += (value & 0x03) << 6;
125
      io++;
126
      state = 3;
127
      break;
128
  case 3:
129
      out[io] |= value;
130
      io++;
131
      state = 0;
132
      break;
133
  default:
134
      fprintf(stderr, "base64_dec: internal!bad state!\n");
135
      return false;
136
  }
137
    }
138
139
    /*
140
     * We are done decoding Base-64 chars.  Let's see if we ended
141
     * on a byte boundary, and/or with erroneous trailing characters.
142
     */
143
144
    if (ch == Pad64) {        /* We got a pad char. */
145
  ch = in[ii++];      /* Skip it, get next. */
146
  switch (state) {
147
  case 0:     /* Invalid = in first position */
148
  case 1:     /* Invalid = in second position */
149
      DPRINT((stderr, "base64_dec: pad char in state 0/1\n"));
150
      return false;
151
152
  case 2:     /* Valid, means one byte of info */
153
          /* Skip any number of spaces. */
154
      for (; ii < in.length(); ch = in[ii++])
155
      if (!isspace((unsigned char)ch))
156
          break;
157
      /* Make sure there is another trailing = sign. */
158
      if (ch != Pad64) {
159
      DPRINT((stderr, "base64_dec: missing pad char!\n"));
160
      // Well, there are bad encoders out there. Let it pass
161
      // return false;
162
      }
163
      ch = in[ii++];      /* Skip the = */
164
      /* Fall through to "single trailing =" case. */
165
      /* FALLTHROUGH */
166
167
  case 3:     /* Valid, means two bytes of info */
168
      /*
169
       * We know this char is an =.  Is there anything but
170
       * whitespace after it?
171
       */
172
      for (; ii < in.length(); ch = in[ii++])
173
      if (!isspace((unsigned char)ch)) {
174
          DPRINT((stderr, "base64_dec: non-white at eod: 0x%x\n", 
175
              (unsigned int)((unsigned char)ch)));
176
          // Well, there are bad encoders out there. Let it pass
177
          //return false;
178
      }
179
180
      /*
181
       * Now make sure for cases 2 and 3 that the "extra"
182
       * bits that slopped past the last full byte were
183
       * zeros.  If we don't check them, they become a
184
       * subliminal channel.
185
       */
186
      if (out[io] != 0) {
187
      DPRINT((stderr, "base64_dec: bad extra bits!\n"));
188
      // Well, there are bad encoders out there. Let it pass
189
      out[io] = 0;
190
      // return false;
191
      }
192
      // We've appended an extra 0.
193
      out.resize(io);
194
  }
195
    } else {
196
  /*
197
   * We ended by seeing the end of the string.  Make sure we
198
   * have no partial bytes lying around.
199
   */
200
  if (state != 0) {
201
      DPRINT((stderr, "base64_dec: bad final state\n"));
202
      return false;
203
  }
204
    }
205
206
    DPRINT((stderr, "base64_dec: ret ok, io %d sz %d len %d value [%s]\n", 
207
      io, (int)out.size(), (int)out.length(), out.c_str()));
208
    return true;
209
}
210
211
#undef Assert
212
#define Assert(X)
213
214
void base64_encode(const string &in, string &out)
215
{
216
    unsigned char input[3];
217
    unsigned char output[4];
218
219
    out.clear();
220
221
    int srclength = in.length();
222
    int sidx = 0;
223
    while (2 < srclength) {
224
  input[0] = in[sidx++];
225
  input[1] = in[sidx++];
226
  input[2] = in[sidx++];
227
  srclength -= 3;
228
229
  output[0] = input[0] >> 2;
230
  output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
231
  output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
232
  output[3] = input[2] & 0x3f;
233
  Assert(output[0] < 64);
234
  Assert(output[1] < 64);
235
  Assert(output[2] < 64);
236
  Assert(output[3] < 64);
237
238
  out += Base64[output[0]];
239
  out += Base64[output[1]];
240
  out += Base64[output[2]];
241
  out += Base64[output[3]];
242
    }
243
    
244
    /* Now we worry about padding. */
245
    if (0 != srclength) {
246
  /* Get what's left. */
247
  input[0] = input[1] = input[2] = '\0';
248
  for (int i = 0; i < srclength; i++)
249
      input[i] = in[sidx++];
250
  
251
  output[0] = input[0] >> 2;
252
  output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
253
  output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
254
  Assert(output[0] < 64);
255
  Assert(output[1] < 64);
256
  Assert(output[2] < 64);
257
258
  out += Base64[output[0]];
259
  out += Base64[output[1]];
260
  if (srclength == 1)
261
      out += Pad64;
262
  else
263
      out += Base64[output[2]];
264
  out += Pad64;
265
    }
266
    return;
267
}
268
269
#ifdef TEST_BASE64
270
#include <stdio.h>
271
#include <stdlib.h>
272
273
#include "readfile.h"
274
275
const char *thisprog;
276
static char usage [] = "testfile\n\n"
277
;
278
static void
279
Usage(void)
280
{
281
    fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
282
    exit(1);
283
}
284
285
static int     op_flags;
286
#define OPT_MOINS 0x1
287
#define OPT_i   0x2 
288
#define OPT_P   0x4 
289
290
int main(int argc, char **argv)
291
{
292
    thisprog = argv[0];
293
    argc--; argv++;
294
295
    while (argc > 0 && **argv == '-') {
296
  (*argv)++;
297
  if (!(**argv))
298
      /* Cas du "adb - core" */
299
      Usage();
300
  while (**argv)
301
      switch (*(*argv)++) {
302
      case 'i':   op_flags |= OPT_i; break;
303
      default: Usage();   break;
304
      }
305
  argc--; argv++;
306
    }
307
    
308
    if (op_flags & OPT_i)  {
309
  const char *values[] = {"", "1", "12", "123", "1234", 
310
              "12345", "123456"};
311
  int nvalues = sizeof(values) / sizeof(char *);
312
  string in, out, back;
313
  int err = 0;
314
  for (int i = 0; i < nvalues; i++) {
315
      in = values[i];
316
      base64_encode(in, out);
317
      base64_decode(out, back);
318
      if (in != back) {
319
      fprintf(stderr, "In [%s] %d != back [%s] %d (out [%s] %d\n", 
320
          in.c_str(), int(in.length()), 
321
          back.c_str(), int(back.length()),
322
          out.c_str(), int(out.length())
323
          );
324
      err++;
325
      }
326
  }
327
  in.erase();
328
  in += char(0);
329
  in += char(0);
330
  in += char(0);
331
  in += char(0);
332
  base64_encode(in, out);
333
  base64_decode(out, back);
334
  if (in != back) {
335
      fprintf(stderr, "In [%s] %d != back [%s] %d (out [%s] %d\n", 
336
          in.c_str(), int(in.length()), 
337
          back.c_str(), int(back.length()),
338
          out.c_str(), int(out.length())
339
          );
340
      err++;
341
  }
342
  exit(!(err == 0));
343
    } else {
344
  if (argc > 1)
345
      Usage();
346
  string infile;
347
  if (argc == 1)
348
      infile = *argv++;argc--;
349
  string idata, reason;
350
  if (!file_to_string(infile, idata, &reason)) {
351
      fprintf(stderr, "Can't read file: %s\n", reason.c_str());
352
      exit(1);
353
  }
354
  string odata;
355
  if (!base64_decode(idata, odata)) {
356
      fprintf(stderr, "Decoding failed\n");
357
      exit(1);
358
  }
359
  write(1, odata.c_str(), 
360
        odata.size() * sizeof(string::value_type));
361
  exit(0);
362
    }
363
}
364
#endif