001/*
002 * Copyright 2021-2024 Ping Identity Corporation
003 * All Rights Reserved.
004 */
005/*
006 * Copyright 2021-2024 Ping Identity Corporation
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *    http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020/*
021 * Copyright (C) 2021-2024 Ping Identity Corporation
022 *
023 * This program is free software; you can redistribute it and/or modify
024 * it under the terms of the GNU General Public License (GPLv2 only)
025 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
026 * as published by the Free Software Foundation.
027 *
028 * This program is distributed in the hope that it will be useful,
029 * but WITHOUT ANY WARRANTY; without even the implied warranty of
030 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
031 * GNU General Public License for more details.
032 *
033 * You should have received a copy of the GNU General Public License
034 * along with this program; if not, see <http://www.gnu.org/licenses>.
035 */
036package com.unboundid.ldap.sdk;
037
038
039
040import java.io.Serializable;
041
042import com.unboundid.util.ByteString;
043import com.unboundid.util.ByteStringBuffer;
044import com.unboundid.util.NotMutable;
045import com.unboundid.util.NotNull;
046import com.unboundid.util.StaticUtils;
047import com.unboundid.util.ThreadSafety;
048import com.unboundid.util.ThreadSafetyLevel;
049
050
051
052/**
053 * This class defines a set of properties that can be used to indicate which
054 * types of optional escaping should be performed by the LDAP SDK when
055 * constructing the string representation of DNs and RDNs.
056 */
057@NotMutable()
058@ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
059public final class DNEscapingStrategy
060       implements Serializable
061{
062  /**
063   * A DN escaping strategy that represents a default, user-friendly
064   * configuration.  This includes:
065   * <UL>
066   *   <LI>
067   *     ASCII control characters will be escaped.
068   *   </LI>
069   *   <LI>
070   *     Displayable non-ASCII characters will not be escaped.
071   *   </LI>
072   *   <LI>
073   *     Non-displayable non-ASCII characters will be escaped.
074   *   </LI>
075   *   <LI>
076   *     In non-UTF-8 data, all bytes with the most significant bit set will be
077   *     escaped.
078   *   </LI>
079   * </UL>
080   */
081  @NotNull public static final DNEscapingStrategy DEFAULT =
082       new DNEscapingStrategy(true, false, true, true);
083
084
085
086  /**
087   * A DN escaping strategy that indicates that the LDAP SDK should only perform
088   * required escaping and should not perform any optional escaping.
089   */
090  @NotNull public static final DNEscapingStrategy MINIMAL =
091       new DNEscapingStrategy(false, false, false, false);
092
093
094
095  /**
096   * A base64-encoding strategy that indicates that the LDAP SDK should
097   * perform the maximum amount of DN escaping that is considered reasonable.
098   * All ASCII control characters, all non-ASCII characters and non-UTF-8 bytes
099   * will be escaped.
100   */
101  @NotNull public static final DNEscapingStrategy MAXIMAL =
102       new DNEscapingStrategy(true, true, true, true);
103
104
105
106  /**
107   * The serial version UID for this serializable class.
108   */
109  private static final long serialVersionUID = -5438646712027992419L;
110
111
112
113  // Indicates whether ASCII control characters should be escaped.
114  private final boolean escapeASCIIControlCharacters;
115
116  // Indicates whether displayable non-ASCII characters should be escaped.
117  private final boolean escapeDisplayableNonASCIICharacters;
118
119  // Indicates whether non-displayable non-ASCII characters should be escaped.
120  private final boolean escapeNonDisplayableNonASCIICharacters;
121
122  // Indicates whether bytes with the most significant bit set in non-UTF-8 data
123  // should be escaped.
124  private final boolean escapeNonUTF8Data;
125
126
127
128  /**
129   * Creates a new DN escaping strategy with the specified settings.
130   *
131   * @param  escapeASCIIControlCharacters
132   *              Indicates whether ASCII control characters (characters whose
133   *              Unicode code point is less than or equal to 0x1F, or is equal
134   *              to 0x7F) should be escaped.  Note that the ASCII NULL control
135   *              character (0x00) will always be escaped.
136   * @param  escapeDisplayableNonASCIICharacters
137   *              Indicates whether non-ASCII characters (characters whose
138   *              Unicode code point is greater than 0x7F) that are believed to
139   *              be displayable (as determined by the
140   *              {@link StaticUtils#isLikelyDisplayableCharacter} method)
141   *              should be escaped.
142   * @param  escapeNonDisplayableNonASCIICharacters
143   *              Indicates whether non-ASCII characters (characters whose
144   *              Unicode code point is greater than 0x7F) that are not believed
145   *              to be displayable (as determined by the
146   *              {@link StaticUtils#isLikelyDisplayableCharacter} method)
147   *              should be escaped.
148   * @param  escapeNonUTF8Data
149   *              Indicates whether bytes with the most significant bit set in
150   *              non-UTF-8 data should be escaped.  Note that if a value does
151   *              not represent a valid UTF-8 string, then the
152   *              {@code escapeDisplayableNonASCIICharacters} and
153   *              {@code escapeNonDisplayableNonASCIICharacters} arguments will
154   *              not be used.
155   */
156  public DNEscapingStrategy(final boolean escapeASCIIControlCharacters,
157              final boolean escapeDisplayableNonASCIICharacters,
158              final boolean escapeNonDisplayableNonASCIICharacters,
159              final boolean escapeNonUTF8Data)
160  {
161    this.escapeASCIIControlCharacters = escapeASCIIControlCharacters;
162    this.escapeDisplayableNonASCIICharacters =
163         escapeDisplayableNonASCIICharacters;
164    this.escapeNonDisplayableNonASCIICharacters =
165         escapeNonDisplayableNonASCIICharacters;
166    this.escapeNonUTF8Data = escapeNonUTF8Data;
167  }
168
169
170
171  /**
172   * Indicates whether ASCII control characters should be escaped.  Note that
173   * the ASCII NULL control character (0x00) will always be escaped.
174   *
175   * @return  {@code true} if ASCII control characters should be escaped, or
176   *          {@code false} if not.
177   */
178  public boolean escapeASCIIControlCharacters()
179  {
180    return escapeASCIIControlCharacters;
181  }
182
183
184
185  /**
186   * Indicates whether displayable non-ASCII characters (as determined by the
187   * {@link StaticUtils#isLikelyDisplayableCharacter} method) should be escaped.
188   * Note that this only applies to values that represent valid UTF-8 strings.
189   * Values that are not valid UTF-8 strings will use the setting represented
190   * by the {@link #escapeNonUTF8Data} method.
191   *
192   * @return  {@code true} if displayable non-ASCII characters should be
193   *          escaped, or {@code false} if not.
194   */
195  public boolean escapeDisplayableNonASCIICharacters()
196  {
197    return escapeDisplayableNonASCIICharacters;
198  }
199
200
201
202  /**
203   * Indicates whether non-displayable non-ASCII characters (as determined by
204   * the {@link StaticUtils#isLikelyDisplayableCharacter} method) should be
205   * escaped.  Note that this only applies to values that represent valid UTF-8
206   * strings.  Values that are not valid UTF-8 strings will use the setting
207   * represented by the {@link #escapeNonUTF8Data} method.
208   *
209   * @return  {@code true} if non-displayable non-ASCII characters should be
210   *          escaped, or {@code false} if not.
211   */
212  public boolean escapeNonDisplayableNonASCIICharacters()
213  {
214    return escapeNonDisplayableNonASCIICharacters;
215  }
216
217
218
219  /**
220   * Indicates whether bytes with the most significant bit set in non-UTF-8 data
221   * (as determined by the {@link StaticUtils#isValidUTF8} method) should be
222   * escaped.
223   *
224   * @return  {@code true} if bytes with the most significant bit set in
225   *          non-UTF-8 data should be escaped, or {@code false} if not.
226   */
227  public boolean escapeNonUTF8Data()
228  {
229    return escapeNonUTF8Data;
230  }
231
232
233
234  /**
235   * Appends an appropriately escaped representation of the provided value to
236   * the given buffer.
237   *
238   * @param  value   The value to be appended.  It must not be {@code null}.
239   * @param  buffer  The buffer to which the escaped value should be appended.
240   *                 It must not be {@code null}.
241   */
242  public void escape(@NotNull final byte[] value,
243                     @NotNull final ByteStringBuffer buffer)
244  {
245    // If the value is empty, then we don't need to do anything.
246    final int valueLength = value.length;
247    if ((value == null) || (valueLength == 0))
248    {
249      return;
250    }
251
252
253    // Iterate through the value and examine each byte.
254    Boolean isNonUTF8 = null;
255    for (int i=0; i < valueLength; i++)
256    {
257      final byte b = value[i];
258      switch (b)
259      {
260        // The following characters will always be escaped anywhere in a value.
261        case '"':
262        case '+':
263        case ',':
264        case ';':
265        case '<':
266        case '>':
267        case '\\':
268          buffer.append('\\');
269          buffer.append(b);
270          break;
271
272        // The ASCII NULL character must also always be escaped, but it should
273        // use a hex encoding.
274        case '\u0000':
275          buffer.append("\\00");
276          break;
277
278        // Spaces will only be escaped if they are the first or last character
279        // of the value.
280        case ' ':
281          if ((i == 0) || (i == (valueLength - 1)))
282          {
283            buffer.append('\\');
284          }
285          buffer.append(b);
286          break;
287
288        // The octothorpe character will only be escaped if it is the first
289        // character of a value.
290        case '#':
291          if (i == 0)
292          {
293            buffer.append('\\');
294          }
295          buffer.append(b);
296          break;
297
298        default:
299          // If the byte is between 0x00 and 0x1F (inclusive), or if it's 0x7F,
300          // then it's an ASCII control character.  Handle that appropriately.
301          if (((b >= 0x00) && (b <= 0x1F)) || (b == 0x07F))
302          {
303            if (escapeASCIIControlCharacters)
304            {
305              buffer.append('\\');
306              buffer.append(StaticUtils.toHex(b));
307            }
308            else
309            {
310              buffer.append(b);
311            }
312          }
313
314
315          // Because Java represents bytes as signed values, if a byte is
316          // greater than zero, then it's an ASCII byte and we won't escape it.
317          else if (b > 0x00)
318          {
319            buffer.append(b);
320          }
321
322
323          // If we've gotten here, then the byte is negative, which means that
324          // it's not ASCII.  If we know that it's non-UTF-8 data, then handle
325          // that in accordance with the escapeNonUTF8Data flag.  Otherwise,
326          // check to see whether it is valid UTF-8 and handle it as either a
327          // string comprised of code points or as non-UTF-8 data.
328          else
329          {
330            if (isNonUTF8 == null)
331            {
332              final byte[] remainingValueBytes = new byte[valueLength - i];
333              System.arraycopy(value, i, remainingValueBytes, 0,
334                   remainingValueBytes.length);
335              if (StaticUtils.isValidUTF8(remainingValueBytes))
336              {
337                escape(StaticUtils.toUTF8String(remainingValueBytes), buffer,
338                     (i == 0));
339                return;
340              }
341              else
342              {
343                isNonUTF8 = Boolean.TRUE;
344              }
345            }
346
347            // If we've gotten here, then we know that it's non-UTF-8 data
348            // (because we would have gone to a different method if it was
349            // valid UTF-8), so handle that in accordance with the
350            // escapeNonUTF8Data flag.
351            if (escapeNonUTF8Data)
352            {
353              buffer.append('\\');
354              buffer.append(StaticUtils.toHex(b));
355            }
356            else
357            {
358              buffer.append(b);
359            }
360          }
361          break;
362      }
363    }
364  }
365
366
367
368  /**
369   * Appends an appropriately escaped representation of the provided value to
370   * the given buffer.
371   *
372   * @param  value   The value to be appended.  It must not be {@code null}.
373   * @param  buffer  The buffer to which the escaped value should be appended.
374   *                 It must not be {@code null}.
375   */
376  public void escape(@NotNull final String value,
377                     @NotNull final ByteStringBuffer buffer)
378  {
379    escape(value, buffer, true);
380  }
381
382
383
384  /**
385   * Appends an appropriately escaped representation of the provided value to
386   * the given buffer.
387   *
388   * @param  value   The value to be appended.  It must not be {@code null}.
389   * @param  buffer  The buffer to which the escaped value should be appended.
390   *                 It must not be {@code null}.
391   */
392  public void escape(@NotNull final ByteString value,
393                     @NotNull final ByteStringBuffer buffer)
394  {
395    escape(value.getValue(), buffer);
396  }
397
398
399
400  /**
401   * Appends an appropriately escaped representation of the provided value to
402   * the given buffer.
403   *
404   * @param  value          The value to be appended.  It must not be
405   *                        {@code null}.
406   * @param  buffer         The buffer to which the escaped value should be
407   *                        appended.  It must not be {@code null}.
408   * @param  isWholeString  Indicates whether the provided string represents the
409   *                        entire value being processed, or if a portion of the
410   *                        value may have already been processed.
411   */
412  private void escape(@NotNull final String value,
413                      @NotNull final ByteStringBuffer buffer,
414                      final boolean isWholeString)
415  {
416    if ((value == null) || value.isEmpty())
417    {
418      return;
419    }
420
421    int pos = 0;
422    while (pos < value.length())
423    {
424      final int codePoint = value.codePointAt(pos);
425      switch (codePoint)
426      {
427        // The following characters will always be escaped anywhere in a value.
428        case '"':
429        case '+':
430        case ',':
431        case ';':
432        case '<':
433        case '>':
434        case '\\':
435          buffer.append('\\');
436          buffer.append((byte) codePoint);
437          break;
438
439        // The ASCII NULL character must also always be escaped, but it should
440        // use a hex encoding.
441        case '\u0000':
442          buffer.append("\\00");
443          break;
444
445        // Spaces will only be escaped if they are the first or last character
446        // of the value.
447        case ' ':
448          if (((pos == 0) && isWholeString) ||
449               (pos == (value.length() - 1)))
450          {
451            buffer.append('\\');
452          }
453          buffer.append(' ');
454          break;
455
456        // The octothorpe character will only be escaped if it is the first
457        // character of a value.
458        case '#':
459          if ((pos == 0) && isWholeString)
460          {
461            buffer.append('\\');
462          }
463          buffer.append('#');
464          break;
465
466        default:
467          // If the code point is between 0x00 and 0x1F (inclusive), or if it is
468          // 0x7F, then it's an ASCII control character.  Handle that
469          // appropriately.
470          if (((codePoint >= 0x00) && (codePoint <= 0x1F)) ||
471               (codePoint == 0x7F))
472          {
473            final byte codePointByte = (byte) codePoint;
474            if (escapeASCIIControlCharacters)
475            {
476              buffer.append('\\');
477              buffer.append(StaticUtils.toHex(codePointByte));
478            }
479            else
480            {
481              buffer.append(codePointByte);
482            }
483          }
484
485
486          // If the code point is less than 0x7F, then it's an ASCII character
487          // that we don't need to escape.
488          else if (codePoint < 0x7F)
489          {
490            buffer.append((byte) codePoint);
491          }
492
493
494          // If we've gotten here, then the code point must represent a
495          // non-ASCII character.  Determine whether it's displayable and handle
496          // it appropriately.
497          else
498          {
499            final String codePointString =
500                 new String(new int[] { codePoint }, 0, 1);
501            final byte[] codePointBytes = StaticUtils.getBytes(codePointString);
502            if (StaticUtils.isLikelyDisplayableCharacter(codePoint))
503            {
504              if (escapeDisplayableNonASCIICharacters)
505              {
506                for (final byte b : codePointBytes)
507                {
508                  buffer.append('\\');
509                  buffer.append(StaticUtils.toHex(b));
510                }
511              }
512              else
513              {
514                buffer.append(codePointBytes);
515              }
516            }
517            else
518            {
519              if (escapeNonDisplayableNonASCIICharacters)
520              {
521                for (final byte b : codePointBytes)
522                {
523                  buffer.append('\\');
524                  buffer.append(StaticUtils.toHex(b));
525                }
526              }
527              else
528              {
529                buffer.append(codePointBytes);
530              }
531            }
532          }
533          break;
534      }
535
536      final int charsPerCodePoint = Character.charCount(codePoint);
537      pos += charsPerCodePoint;
538    }
539  }
540
541
542
543  /**
544   * Retrieves a string representation of this base64 encoding strategy.
545   *
546   * @return  A string representation of this base64 encoding strategy.
547   */
548  @Override()
549  @NotNull()
550  public String toString()
551  {
552    final StringBuilder buffer = new StringBuilder();
553    toString(buffer);
554    return buffer.toString();
555  }
556
557
558
559  /**
560   * Appends a string representation of this base64 encoding strategy to the
561   * provided buffer.
562   *
563   * @param  buffer  The buffer to which the string representation should be
564   *                 appended.
565   */
566  public void toString(@NotNull final StringBuilder buffer)
567  {
568    buffer.append("DNEscapingStrategy(escapeASCIIControlCharacters=");
569    buffer.append(escapeASCIIControlCharacters);
570    buffer.append(", escapeDisplayableNonASCIICharacters=");
571    buffer.append(escapeDisplayableNonASCIICharacters);
572    buffer.append(", escapeNonDisplayableNonASCIICharacters=");
573    buffer.append(escapeNonDisplayableNonASCIICharacters);
574    buffer.append(", escapeNonUTF8Data=");
575    buffer.append(escapeNonUTF8Data);
576    buffer.append(')');
577  }
578}