001/*
002 * Copyright 2021-2022 Ping Identity Corporation
003 * All Rights Reserved.
004 */
005/*
006 * Copyright 2021-2022 Ping Identity Corporation
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *    http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020/*
021 * Copyright (C) 2021-2022 Ping Identity Corporation
022 *
023 * This program is free software; you can redistribute it and/or modify
024 * it under the terms of the GNU General Public License (GPLv2 only)
025 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
026 * as published by the Free Software Foundation.
027 *
028 * This program is distributed in the hope that it will be useful,
029 * but WITHOUT ANY WARRANTY; without even the implied warranty of
030 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
031 * GNU General Public License for more details.
032 *
033 * You should have received a copy of the GNU General Public License
034 * along with this program; if not, see <http://www.gnu.org/licenses>.
035 */
036package com.unboundid.ldif;
037
038
039
040import java.io.Serializable;
041
042import com.unboundid.util.ByteString;
043import com.unboundid.util.NotMutable;
044import com.unboundid.util.NotNull;
045import com.unboundid.util.StaticUtils;
046import com.unboundid.util.ThreadSafety;
047import com.unboundid.util.ThreadSafetyLevel;
048
049
050
051/**
052 * This class defines a set of properties that can be used to indicate which
053 * types of optional base64-encoding should be performed by the LDAP SDK.
054 */
055@NotMutable()
056@ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
057public final class Base64EncodingStrategy
058       implements Serializable
059{
060  /**
061   * A base64-encoding strategy that represents a safe default configuration.
062   * This includes:
063   * <UL>
064   *   <LI>
065   *     The presence of ASCII control characters will cause a value to be
066   *     base64-encoded.  This is not required by the LDIF specification, but is
067   *     recommended.
068   *   </LI>
069   *   <LI>
070   *     The presence of any non-ASCII characters (whether they may be
071   *     displayable or not) will cause a value to be base64-encoded as required
072   *     by the LDIF specification.
073   *   </LI>
074   *   <LI>
075   *     The presence of non-UTF-8 data will cause a value to be base64-encoded
076   *     as required by the LDIF specification.
077   *   </LI>
078   * </UL>
079   */
080  @NotNull public static final Base64EncodingStrategy DEFAULT =
081       new Base64EncodingStrategy(true, true, true, true);
082
083
084
085  /**
086   * A base64-encoding strategy that indicates that the LDAP SDK should perform
087   * the minimum amount of encoding required by the specification.  The presence
088   * of ASCII control characters (other than NUL, LF, and CR, which must always
089   * be base64-encoded) will not cause values to be encoded.  However, the
090   * presence of any non-ASCII characters or non-UTF-8 data will cause a value
091   * to be base64-=encoded as required by the LDIF specification.
092   */
093  @NotNull public static final Base64EncodingStrategy MINIMAL_COMPLIANT =
094       new Base64EncodingStrategy(false, true, true, true);
095
096
097
098  /**
099   * A base64-encoding strategy that indicates that the presence of non-ASCII
100   * characters that the LDAP SDK considers displayable should not cause a
101   * value to be encoded.  ASCII control characters, non-displayable non-ASCII
102   * characters, and non-UTF-8 data will cause a value to be base64-encoded.
103   * Note that this NOT compliant with the LDIF specification (which technically
104   * requires base64 encoding for all non-ASCII data), but it may be user
105   * friendly in some cases.
106   */
107  @NotNull public static final Base64EncodingStrategy
108       USER_FRIENDLY_NON_COMPLIANT =
109            new Base64EncodingStrategy(true, false, true, true);
110
111
112
113  /**
114   * A base64-encoding strategy that indicates that the LDAP SDK should perform
115   * the maximum amount of base64 encoding that it considers necessary.  Any
116   * ASCII control characters, any non-ASCII data, and any non-UTF-8 data will
117   * cause a value to be base64 encoded.  This is equivalent to the
118   * {@link #DEFAULT} strategy.
119   */
120  @NotNull public static final Base64EncodingStrategy MAXIMAL = DEFAULT;
121
122
123
124  /**
125   * The serial version UID for this serializable class.
126   */
127  private static final long serialVersionUID = -5787811215448347345L;
128
129
130
131  // Indicates whether the presence of one or more ASCII control characters
132  // should cause a value to be base64-encoded.
133  private final boolean encodeASCIIControlCharacters;
134
135  // Indicates whether the presence of one or more displayable non-ASCII
136  // characters should cause a value to be base64-encoded.
137  private final boolean encodeDisplayableNonASCIICharacters;
138
139  // Indicates whether the presence of one or more non-displayable non-ASCII
140  // characters should cause a value to be base64-encoded.
141  private final boolean encodeNonDisplayableNonASCIICharacters;
142
143  // Indicates whether values that do not represent valid UTF-8 strings should
144  // be base64-encoded.
145  private final boolean encodeNonUTF8Data;
146
147
148
149  /**
150   * Creates a new base64 encoding strategy with the specified settings.
151   *
152   * @param  encodeASCIIControlCharacters
153   *              Indicates whether the presence of one or more ASCII control
154   *              characters (characters whose Unicode code point is less than
155   *              or equal to 0x01F, or is equal to 0x7F) should cause a value
156   *              to be base64-encoded.  Note that as per RFC 2849, the presence
157   *              of the null (0x00), line feed (0x0A), and carriage return
158   *              (0x0D) ASCII control characters will always cause a value to
159   *              be base64-encoded.
160   * @param  encodeDisplayableNonASCIICharacters
161   *              Indicates whether the presence of one or more non-ASCII
162   *              characters (characters whose Unicode code point is greater
163   *              than 0x7F) that are believed to be displayable (as determined
164   *              by the {@link StaticUtils#isLikelyDisplayableCharacter}
165   *              method) should cause a value to be base64-encoded.
166   * @param  encodeNonDisplayableNonASCIICharacters
167   *              Indicates whether the presence of one or more non-ASCII
168   *              characters (characters whose Unicode code point is greater
169   *              than 0x7F) that are not believed to be displayable (as
170   *              determined by the
171   *              {@link StaticUtils#isLikelyDisplayableCharacter} method)
172   *              should cause a value to be base64-encoded.
173   * @param  encodeNonUTF8Data
174   *              Indicates whether non-UTF-8-encoded data should be
175   *              base64-encoded.  Note that if a value does not represent a
176   *              valid UTF-8 string, then the
177   *              {@code encodeDisplayableNonASCIICharacters} and
178   *              {@code encodeNonDisplayableNonASCIICharacters} arguments will
179   *              not be used.
180   */
181  public Base64EncodingStrategy(final boolean encodeASCIIControlCharacters,
182              final boolean encodeDisplayableNonASCIICharacters,
183              final boolean encodeNonDisplayableNonASCIICharacters,
184              final boolean encodeNonUTF8Data)
185  {
186    this.encodeASCIIControlCharacters = encodeASCIIControlCharacters;
187    this.encodeDisplayableNonASCIICharacters =
188         encodeDisplayableNonASCIICharacters;
189    this.encodeNonDisplayableNonASCIICharacters =
190         encodeNonDisplayableNonASCIICharacters;
191    this.encodeNonUTF8Data = encodeNonUTF8Data;
192  }
193
194
195
196  /**
197   * Indicates whether the presence of one or more ASCII control characters
198   * should cause a value to be base64-encoded.  ASCII control characters other
199   * than NUL, LF, and CR are not required to be base64-encoded by the LDIF
200   * specification, but it is generally recommended that they be encoded.
201   *
202   * @return  {@code true} if the presence of one or more ASCII control
203   *          characters should cause a value to be base64-encoded, or
204   *          {@code false} if not.
205   */
206  public boolean encodeASCIIControlCharacters()
207  {
208    return encodeASCIIControlCharacters;
209  }
210
211
212
213  /**
214   * Indicates whether the presence of one or more displayable non-ASCII
215   * characters (as determined by the
216   * {@link StaticUtils#isLikelyDisplayableCharacter} method) should cause a
217   * value to be base64-encoded.  Note that this only applies to values that
218   * represent valid UTF-8 strings.  Values that are not valid UTF-8 strings
219   * will use the setting represented by the {@link #encodeNonUTF8Data} method.
220   * Also note that all non-ASCII characters are required to be base64 encoded
221   * by the LDIF specification, but there may be cases in which it may be
222   * desirable to relax this behavior when displaying to an end user.
223   *
224   * @return  {@code true} if the presence of one or more displayable
225   *          non-ASCII characters should cause a value to be base64-encoded,
226   *          or {@code false} if not.
227   */
228  public boolean encodeDisplayableNonASCIICharacters()
229  {
230    return encodeDisplayableNonASCIICharacters;
231  }
232
233
234
235  /**
236   * Indicates whether the presence of one or more non-displayable non-ASCII
237   * characters (as determined by the
238   * {@link StaticUtils#isLikelyDisplayableCharacter} method) should cause a
239   * value to be base64-encoded.  Note that this only applies to values that
240   * represent valid UTF-8 strings.  Values that are not valid UTF-8 strings
241   * will use the setting represented by the {@link #encodeNonUTF8Data} method.
242   * Also note that all non-ASCII characters are required to be base64 encoded
243   * by the LDIF specification, but there may be cases in which it may be
244   * desirable to relax this behavior when displaying to an end user.
245   *
246   * @return  {@code true} if the presence of one or more non-displayable
247   *          non-ASCII characters should cause a value to be base64-encoded,
248   *          or {@code false} if not.
249   */
250  public boolean encodeNonDisplayableNonASCIICharacters()
251  {
252    return encodeNonDisplayableNonASCIICharacters;
253  }
254
255
256
257  /**
258   * Indicates whether values that do not represent valid UTF-8 strings (as
259   * determined by the {@link StaticUtils#isValidUTF8} method) should be
260   * base64-encoded.  Note that all non-ASCII data (which includes all non-UTF-8
261   * data) is required to be base64 encoded, but there may be cases in which it
262   * may be desirable to relax this behavior when displaying to an end user,
263   * especially when using non-UTF-8 character sets.
264   *
265   * @return  {@code true} if values that do not represent valid UTF-8 strings
266   *          should be base64-encoded, or {@code false} if not.
267   */
268  public boolean encodeNonUTF8Data()
269  {
270    return encodeNonUTF8Data;
271  }
272
273
274
275  /**
276   * Indicates whether the provided value should be base64-encoded in accordance
277   * with this strategy.
278   *
279   * @param  value  The value for which to make the determination.  It must not
280   *                be {@code null}.
281   *
282   * @return  {@code true} if the provided value should be base64-encoded in
283   *          accordance with this strategy, or {@code false} if not.
284   */
285  public boolean shouldBase64Encode(@NotNull final byte[] value)
286  {
287    // If the value is empty, then it does not need to be encoded.
288    if ((value == null) || (value.length == 0))
289    {
290      return false;
291    }
292
293
294    // If the value starts with a space, colon, or less-than character, then it
295    // must be base64-encoded.
296    switch (value[0])
297    {
298      case ' ':
299      case ':':
300      case '<':
301        return true;
302    }
303
304
305    // If the value ends with a space, then it must be base64-encoded.
306    if (value[value.length - 1] == ' ')
307    {
308      return true;
309    }
310
311
312    // Examine all the bytes that make up the value.  If we encounter any
313    // non-ASCII characters, then handle that specially.
314    for (int i=0; i < value.length; i++)
315    {
316      // Bytes that are between 0x00 and 0x1F are ASCII control characters.  The
317      // null (0x00), line feed (0x0A) and carriage return (0x0D) characters
318      // must always base base64-encoded.  For other bytes, use the
319      // encodeASCIIControlCharacters flag.
320      final byte b = value[i];
321      if ((b >= 0x00) && (b <= 0x1F))
322      {
323        switch (b)
324        {
325          case 0x00:
326          case 0x0A:
327          case 0x0D:
328            return true;
329          default:
330            if (encodeASCIIControlCharacters)
331            {
332              return true;
333            }
334            break;
335        }
336      }
337
338      // Byte 0x7F is the ASCII delete control character and should also be
339      // controlled by the encodeASCIIControlCharacters flag.
340      else if (b == 0x07F)
341      {
342        if (encodeASCIIControlCharacters)
343        {
344          return true;
345        }
346      }
347
348
349      // All bytes between 0x20 and 0x7E (inclusive) should be fine.  All other
350      // bytes will have the most significant bit set, and because Java bytes
351      // are signed, they will be negative.  If we encounter any negative bytes,
352      // then that means the value contains non-ASCII characters or doesn't
353      // represent a UTF-8 string.  If it's not valid UTF-8, then we'll handle
354      // it in accordance with the encodeNonUTF8Data flag.  Otherwise, we'll
355      // convert the remainder of the byte to a string and iterate across the
356      // code points for the rest of the determination.
357      else if (b < 0x00)
358      {
359        final byte[] remainingBytes = new byte[value.length - i];
360        System.arraycopy(value, i, remainingBytes, 0, remainingBytes.length);
361        if (StaticUtils.isValidUTF8(remainingBytes))
362        {
363          final String valueString = StaticUtils.toUTF8String(remainingBytes);
364          return shouldBase64EncodePreValidatedString(valueString);
365        }
366        else
367        {
368          return encodeNonUTF8Data;
369        }
370      }
371    }
372
373
374    // If we've gotten here, then the value does not need to be base64-encoded.
375    return false;
376  }
377
378
379
380  /**
381   * Indicates whether the provided value should be base64-encoded in accordance
382   * with this strategy.
383   *
384   * @param  value  The value for which to make the determination.  It must not
385   *                be {@code null}.
386   *
387   * @return  {@code true} if the provided value should be base64-encoded in
388   *          accordance with this strategy, or {@code false} if not.
389   */
390  public boolean shouldBase64Encode(@NotNull final String value)
391  {
392    // If the value is empty, then it does not need to be encoded.
393    if ((value == null) || (value.length() == 0))
394    {
395      return false;
396    }
397
398
399    // If the value starts with a space, colon, or less-than character, then it
400    // must be base64-encoded.
401    switch (value.charAt(0))
402    {
403      case ' ':
404      case ':':
405      case '<':
406        return true;
407    }
408
409
410    // If the value ends with a space, then it must be base64-encoded.
411    if (value.charAt(value.length() - 1) == ' ')
412    {
413      return true;
414    }
415
416
417    // Examine all of the characters in the string as code points so that we can
418    // handle non-ASCII characters properly.
419    return shouldBase64EncodePreValidatedString(value);
420  }
421
422
423
424  /**
425   * Indicates whether the provided string should be base64-encoded in
426   * accordance with this strategy.  Note that all of the appropriate first and
427   * last character validation must have already been performed.
428   *
429   * @param  s  The string to validate.  It must not be {@code null}.
430   *
431   * @return  {@code true} if the value should be base64-encoded in accordance
432   *          with this strategry, or {@code false} if not.
433   */
434  private boolean shouldBase64EncodePreValidatedString(@NotNull final String s)
435  {
436    int pos = 0;
437    while (pos < s.length())
438    {
439      final int codePoint = s.codePointAt(pos);
440
441
442      // Code points that are between 0x00 and 0x1F are ASCII control
443      // characters.  The null (0x00), line feed (0x0A), and carriage return
444      // (0x0D) characters must always be base64-encoded.  For other bytes, use
445      // the encodeASCIIControlCharacters flag.
446      //
447      // Note that code points will never be negative, so we don't have to check
448      // for a lower bound.
449      if (codePoint <=0x1F)
450      {
451        switch (codePoint)
452        {
453          case 0x00:
454          case 0x0A:
455          case 0x0D:
456            return true;
457          default:
458            if (encodeASCIIControlCharacters)
459            {
460              return true;
461            }
462            break;
463        }
464      }
465
466
467      // Code point 0x7F is the ASCII delete control character and should also
468      // be controlled by the encodeASCIIControlCharacters flag.
469      else if (codePoint == 0x7F)
470      {
471        if (encodeASCIIControlCharacters)
472        {
473          return true;
474        }
475      }
476
477
478      // If the code point is greater than 0x7F, then it's a non-ASCII character
479      // and the behavior should be controlled by either the
480      // encodeDisplayableNonASCIICharacters or
481      // encodeNonDisplayableNonASCIICharacters flag, whichever is appropriate.
482      else if (codePoint > 0x7F)
483      {
484        if (StaticUtils.isLikelyDisplayableCharacter(codePoint))
485        {
486          if (encodeDisplayableNonASCIICharacters)
487          {
488            return true;
489          }
490        }
491        else
492        {
493          if (encodeNonDisplayableNonASCIICharacters)
494          {
495            return true;
496          }
497        }
498      }
499
500
501      // Increment the position index based on the number of characters in the
502      // code point.  Some code points may require multiple characters to
503      // represent.
504      final int charsPerCodePoint = Character.charCount(codePoint);
505      pos += charsPerCodePoint;
506    }
507
508
509    // If we've gotten here, then the value does not need to be base64-encoded.
510    return false;
511  }
512
513
514
515  /**
516   * Indicates whether the provided value should be base64-encoded in accordance
517   * with this strategy.
518   *
519   * @param  value  The value for which to make the determination.  It must not
520   *                be {@code null}.
521   *
522   * @return  {@code true} if the provided value should be base64-encoded in
523   *          accordance with this strategy, or {@code false} if not.
524   */
525  public boolean shouldBase64Encode(@NotNull final ByteString value)
526  {
527    return shouldBase64Encode(value.getValue());
528  }
529
530
531
532  /**
533   * Retrieves a string representation of this base64 encoding strategy.
534   *
535   * @return  A string representation of this base64 encoding strategy.
536   */
537  @Override()
538  @NotNull()
539  public String toString()
540  {
541    final StringBuilder buffer = new StringBuilder();
542    toString(buffer);
543    return buffer.toString();
544  }
545
546
547
548  /**
549   * Appends a string representation of this base64 encoding strategy to the
550   * provided buffer.
551   *
552   * @param  buffer  The buffer to which the string representation should be
553   *                 appended.
554   */
555  public void toString(@NotNull final StringBuilder buffer)
556  {
557    buffer.append("Base64EncodingStrategy(encodeASCIIControlCharacters=");
558    buffer.append(encodeASCIIControlCharacters);
559    buffer.append(", encodeDisplayableNonASCIICharacters=");
560    buffer.append(encodeDisplayableNonASCIICharacters);
561    buffer.append(", encodeNonDisplayableNonASCIICharacters=");
562    buffer.append(encodeNonDisplayableNonASCIICharacters);
563    buffer.append(", encodeNonUTF8Data=");
564    buffer.append(encodeNonUTF8Data);
565    buffer.append(')');
566  }
567}