001    /*
002     * Copyright 2012-2016 UnboundID Corp.
003     * All Rights Reserved.
004     */
005    /*
006     * Copyright (C) 2012-2016 UnboundID Corp.
007     *
008     * This program is free software; you can redistribute it and/or modify
009     * it under the terms of the GNU General Public License (GPLv2 only)
010     * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011     * as published by the Free Software Foundation.
012     *
013     * This program is distributed in the hope that it will be useful,
014     * but WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program; if not, see <http://www.gnu.org/licenses>.
020     */
021    package com.unboundid.util;
022    
023    
024    
025    import java.io.IOException;
026    import java.text.ParseException;
027    
028    import static com.unboundid.util.UtilityMessages.*;
029    import static com.unboundid.util.Validator.*;
030    
031    
032    
033    /**
034     * This class provides methods for encoding and decoding data in base32 as
035     * defined in <A HREF="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</A>.  It
036     * provides a somewhat compact way of representing binary data using only
037     * printable characters (a subset of ASCII letters and numeric digits selected
038     * to avoid ambiguity, like confusion between the number 1 and the uppercase
039     * letter I, and between the number 0 and the uppercase letter O).  It uses a
040     * five-bit encoding mechanism in which every five bytes of raw data is
041     * converted into eight bytes of base32-encoded data.
042     * <BR><BR>
043     * <H2>Example</H2>
044     * The following examples demonstrate the process for base32-encoding raw data,
045     * and for decoding a string containing base32-encoded data back to the raw
046     * data used to create it:
047     * <PRE>
048     * // Base32-encode some raw data:
049     * String base32String = Base32.encode(rawDataBytes);
050     *
051     * // Decode a base32 string back to raw data:
052     * byte[] decodedRawDataBytes;
053     * try
054     * {
055     *   decodedRawDataBytes = Base32.decode(base32String);
056     * }
057     * catch (ParseException pe)
058     * {
059     *   // The string did not represent a valid base32 encoding.
060     *   decodedRawDataBytes = null;
061     * }
062     * </PRE>
063     */
064    @ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
065    public final class Base32
066    {
067      /**
068       * The set of characters in the base32 alphabet.
069       */
070      private static final char[] BASE32_ALPHABET =
071           ("ABCDEFGHIJKLMNOPQRSTUVWXYZ234567").toCharArray();
072    
073    
074    
075      /**
076       * Prevent this class from being instantiated.
077       */
078      private Base32()
079      {
080        // No implementation is required.
081      }
082    
083    
084    
085      /**
086       * Encodes the UTF-8 representation of the provided string in base32 format.
087       *
088       * @param  data  The raw data to be encoded.  It must not be {@code null}.
089       *
090       * @return  The base32-encoded representation of the provided data.
091       */
092      public static String encode(final String data)
093      {
094        ensureNotNull(data);
095    
096        return encode(StaticUtils.getBytes(data));
097      }
098    
099    
100    
101      /**
102       * Encodes the provided data in base32 format.
103       *
104       * @param  data  The raw data to be encoded.  It must not be {@code null}.
105       *
106       * @return  The base32-encoded representation of the provided data.
107       */
108      public static String encode(final byte[] data)
109      {
110        ensureNotNull(data);
111    
112        final StringBuilder buffer = new StringBuilder(4*data.length/3+1);
113        encodeInternal(data, 0, data.length, buffer);
114        return buffer.toString();
115      }
116    
117    
118    
119      /**
120       * Appends a base32-encoded version of the contents of the provided buffer
121       * (using a UTF-8 representation) to the given buffer.
122       *
123       * @param  data    The raw data to be encoded.  It must not be {@code null}.
124       * @param  buffer  The buffer to which the base32-encoded data is to be
125       *                 written.
126       */
127      public static void encode(final String data, final StringBuilder buffer)
128      {
129        ensureNotNull(data);
130    
131        encode(StaticUtils.getBytes(data), buffer);
132      }
133    
134    
135    
136      /**
137       * Appends a base32-encoded version of the contents of the provided buffer
138       * (using a UTF-8 representation) to the given buffer.
139       *
140       * @param  data    The raw data to be encoded.  It must not be {@code null}.
141       * @param  buffer  The buffer to which the base32-encoded data is to be
142       *                 written.
143       */
144      public static void encode(final String data, final ByteStringBuffer buffer)
145      {
146        ensureNotNull(data);
147    
148        encode(StaticUtils.getBytes(data), buffer);
149      }
150    
151    
152    
153      /**
154       * Appends a base32-encoded representation of the provided data to the given
155       * buffer.
156       *
157       * @param  data    The raw data to be encoded.  It must not be {@code null}.
158       * @param  buffer  The buffer to which the base32-encoded data is to be
159       *                 written.
160       */
161      public static void encode(final byte[] data, final StringBuilder buffer)
162      {
163        encodeInternal(data, 0, data.length, buffer);
164      }
165    
166    
167    
168      /**
169       * Appends a base32-encoded representation of the provided data to the given
170       * buffer.
171       *
172       * @param  data    The array containing the raw data to be encoded.  It must
173       *                 not be {@code null}.
174       * @param  off     The offset in the array at which the data to encode begins.
175       * @param  length  The number of bytes to be encoded.
176       * @param  buffer  The buffer to which the base32-encoded data is to be
177       *                 written.
178       */
179      public static void encode(final byte[] data, final int off, final int length,
180                                final StringBuilder buffer)
181      {
182        encodeInternal(data, off, length, buffer);
183      }
184    
185    
186    
187      /**
188       * Appends a base32-encoded representation of the provided data to the given
189       * buffer.
190       *
191       * @param  data    The raw data to be encoded.  It must not be {@code null}.
192       * @param  buffer  The buffer to which the base32-encoded data is to be
193       *                 written.
194       */
195      public static void encode(final byte[] data, final ByteStringBuffer buffer)
196      {
197        encodeInternal(data, 0, data.length, buffer);
198      }
199    
200    
201    
202      /**
203       * Appends a base32-encoded representation of the provided data to the given
204       * buffer.
205       *
206       * @param  data    The raw data to be encoded.  It must not be {@code null}.
207       * @param  off     The offset in the array at which the data to encode begins.
208       * @param  length  The number of bytes to be encoded.
209       * @param  buffer  The buffer to which the base32-encoded data is to be
210       *                 written.
211       */
212      public static void encode(final byte[] data, final int off, final int length,
213                                final ByteStringBuffer buffer)
214      {
215        encodeInternal(data, off, length, buffer);
216      }
217    
218    
219    
220      /**
221       * Appends a base32-encoded representation of the provided data to the given
222       * buffer.
223       *
224       * @param  data    The raw data to be encoded.  It must not be {@code null}.
225       * @param  off     The offset in the array at which the data to encode begins.
226       * @param  length  The number of bytes to be encoded.
227       * @param  buffer  The buffer to which the base32-encoded data is to be
228       *                 written.
229       */
230      private static void encodeInternal(final byte[] data, final int off,
231                                         final int length, final Appendable buffer)
232      {
233        ensureNotNull(data);
234        ensureTrue(data.length >= off);
235        ensureTrue(data.length >= (off+length));
236    
237        if (length == 0)
238        {
239          return;
240        }
241    
242        try
243        {
244          int pos = off;
245          for (int i=0; i < (length / 5); i++)
246          {
247            final long longValue =
248                 (((data[pos++] & 0xFFL) << 32) |
249                  ((data[pos++] & 0xFFL) << 24) |
250                  ((data[pos++] & 0xFFL) << 16) |
251                  ((data[pos++] & 0xFFL) << 8) |
252                   (data[pos++] & 0xFFL));
253    
254            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
255            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
256            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
257            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
258            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
259            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 10) & 0x1FL)]);
260            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 5) & 0x1FL)]);
261            buffer.append(BASE32_ALPHABET[(int) (longValue & 0x1FL)]);
262          }
263    
264          switch ((off+length) - pos)
265          {
266            case 1:
267              long longValue = ((data[pos] & 0xFFL) << 32);
268              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
269              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
270              buffer.append("======");
271              return;
272    
273            case 2:
274              longValue = (((data[pos++] & 0xFFL) << 32) |
275                           ((data[pos] & 0xFFL) << 24));
276              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
277              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
278              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
279              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
280              buffer.append("====");
281              return;
282    
283            case 3:
284              longValue = (((data[pos++] & 0xFFL) << 32) |
285                           ((data[pos++] & 0xFFL) << 24) |
286                           ((data[pos] & 0xFFL) << 16));
287              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
288              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
289              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
290              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
291              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
292              buffer.append("===");
293              return;
294    
295            case 4:
296              longValue = (((data[pos++] & 0xFFL) << 32) |
297                           ((data[pos++] & 0xFFL) << 24) |
298                           ((data[pos++] & 0xFFL) << 16) |
299                           ((data[pos] & 0xFFL) << 8));
300              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
301              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
302              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
303              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
304              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
305              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 10) & 0x1FL)]);
306              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 5) & 0x1FL)]);
307              buffer.append("=");
308              return;
309          }
310        }
311        catch (final IOException ioe)
312        {
313          Debug.debugException(ioe);
314    
315          // This should never happen.
316          throw new RuntimeException(ioe.getMessage(), ioe);
317        }
318      }
319    
320    
321    
322      /**
323       * Decodes the contents of the provided base32-encoded string.
324       *
325       * @param  data  The base32-encoded string to decode.  It must not be
326       *               {@code null}.
327       *
328       * @return  A byte array containing the decoded data.
329       *
330       * @throws  ParseException  If the contents of the provided string cannot be
331       *                          parsed as base32-encoded data.
332       */
333      public static byte[] decode(final String data)
334             throws ParseException
335      {
336        ensureNotNull(data);
337    
338        final int length = data.length();
339        if (length == 0)
340        {
341          return new byte[0];
342        }
343    
344        if ((length % 8) != 0)
345        {
346          throw new ParseException(ERR_BASE32_DECODE_INVALID_LENGTH.get(), length);
347        }
348    
349        final ByteStringBuffer buffer = new ByteStringBuffer(5 * (length / 8));
350    
351        int stringPos = 0;
352        while (stringPos < length)
353        {
354          long longValue = 0x00;
355          for (int i=0; i < 8; i++)
356          {
357            longValue <<= 5;
358            switch (data.charAt(stringPos++))
359            {
360              case 'A':
361              case 'a':
362                longValue |= 0x00L;
363                break;
364              case 'B':
365              case 'b':
366                longValue |= 0x01L;
367                break;
368              case 'C':
369              case 'c':
370                longValue |= 0x02L;
371                break;
372              case 'D':
373              case 'd':
374                longValue |= 0x03L;
375                break;
376              case 'E':
377              case 'e':
378                longValue |= 0x04L;
379                break;
380              case 'F':
381              case 'f':
382                longValue |= 0x05L;
383                break;
384              case 'G':
385              case 'g':
386                longValue |= 0x06L;
387                break;
388              case 'H':
389              case 'h':
390                longValue |= 0x07L;
391                break;
392              case 'I':
393              case 'i':
394                longValue |= 0x08L;
395                break;
396              case 'J':
397              case 'j':
398                longValue |= 0x09L;
399                break;
400              case 'K':
401              case 'k':
402                longValue |= 0x0AL;
403                break;
404              case 'L':
405              case 'l':
406                longValue |= 0x0BL;
407                break;
408              case 'M':
409              case 'm':
410                longValue |= 0x0CL;
411                break;
412              case 'N':
413              case 'n':
414                longValue |= 0x0DL;
415                break;
416              case 'O':
417              case 'o':
418                longValue |= 0x0EL;
419                break;
420              case 'P':
421              case 'p':
422                longValue |= 0x0FL;
423                break;
424              case 'Q':
425              case 'q':
426                longValue |= 0x10L;
427                break;
428              case 'R':
429              case 'r':
430                longValue |= 0x11L;
431                break;
432              case 'S':
433              case 's':
434                longValue |= 0x12L;
435                break;
436              case 'T':
437              case 't':
438                longValue |= 0x13L;
439                break;
440              case 'U':
441              case 'u':
442                longValue |= 0x14L;
443                break;
444              case 'V':
445              case 'v':
446                longValue |= 0x15L;
447                break;
448              case 'W':
449              case 'w':
450                longValue |= 0x16L;
451                break;
452              case 'X':
453              case 'x':
454                longValue |= 0x17L;
455                break;
456              case 'Y':
457              case 'y':
458                longValue |= 0x18L;
459                break;
460              case 'Z':
461              case 'z':
462                longValue |= 0x19L;
463                break;
464              case '2':
465                longValue |= 0x1AL;
466                break;
467              case '3':
468                longValue |= 0x1BL;
469                break;
470              case '4':
471                longValue |= 0x1CL;
472                break;
473              case '5':
474                longValue |= 0x1DL;
475                break;
476              case '6':
477                longValue |= 0x1EL;
478                break;
479              case '7':
480                longValue |= 0x1FL;
481                break;
482    
483              case '=':
484                switch (length - stringPos)
485                {
486                  case 0:
487                    // The string ended with a single equal sign, so there are
488                    // four bytes left.
489                    buffer.append((byte) ((longValue >> 32) & 0xFFL));
490                    buffer.append((byte) ((longValue >> 24) & 0xFFL));
491                    buffer.append((byte) ((longValue >> 16) & 0xFFL));
492                    buffer.append((byte) ((longValue >> 8) & 0xFFL));
493                    return buffer.toByteArray();
494    
495                  case 2:
496                    // The string ended with three equal signs, so there are three
497                    // bytes left.
498                    longValue <<= 10;
499                    buffer.append((byte) ((longValue >> 32) & 0xFFL));
500                    buffer.append((byte) ((longValue >> 24) & 0xFFL));
501                    buffer.append((byte) ((longValue >> 16) & 0xFFL));
502                    return buffer.toByteArray();
503    
504                  case 3:
505                    // The string ended with four equal signs, so there are two
506                    // bytes left.
507                    longValue <<= 15;
508                    buffer.append((byte) ((longValue >> 32) & 0xFFL));
509                    buffer.append((byte) ((longValue >> 24) & 0xFFL));
510                    return buffer.toByteArray();
511    
512                  case 5:
513                    // The string ended with six equal signs, so there is one byte
514                    // left.
515                    longValue <<= 25;
516                    buffer.append((byte) ((longValue >> 32) & 0xFFL));
517                    return buffer.toByteArray();
518    
519                  default:
520                    throw new ParseException(
521                         ERR_BASE32_DECODE_UNEXPECTED_EQUAL.get((stringPos-1)),
522                         (stringPos-1));
523                }
524    
525              default:
526                throw new ParseException(
527                     ERR_BASE32_DECODE_UNEXPECTED_CHAR.get(
528                          data.charAt(stringPos-1)),
529                     (stringPos-1));
530            }
531          }
532    
533          buffer.append((byte) ((longValue >> 32) & 0xFFL));
534          buffer.append((byte) ((longValue >> 24) & 0xFFL));
535          buffer.append((byte) ((longValue >> 16) & 0xFFL));
536          buffer.append((byte) ((longValue >> 8) & 0xFFL));
537          buffer.append((byte) (longValue & 0xFFL));
538        }
539    
540        return buffer.toByteArray();
541      }
542    
543    
544    
545      /**
546       * Decodes the contents of the provided base32-encoded string to a string
547       * containing the raw data using the UTF-8 encoding.
548       *
549       * @param  data  The base32-encoded string to decode.  It must not be
550       *               {@code null}.
551       *
552       * @return  A string containing the decoded data.
553       *
554       * @throws  ParseException  If the contents of the provided string cannot be
555       *                          parsed as base32-encoded data using the UTF-8
556       *                          encoding.
557       */
558      public static String decodeToString(final String data)
559             throws ParseException
560      {
561        ensureNotNull(data);
562    
563        final byte[] decodedBytes = decode(data);
564        return StaticUtils.toUTF8String(decodedBytes);
565      }
566    }