001/* 002 * Copyright 2021-2024 Ping Identity Corporation 003 * All Rights Reserved. 004 */ 005/* 006 * Copyright 2021-2024 Ping Identity Corporation 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020/* 021 * Copyright (C) 2021-2024 Ping Identity Corporation 022 * 023 * This program is free software; you can redistribute it and/or modify 024 * it under the terms of the GNU General Public License (GPLv2 only) 025 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 026 * as published by the Free Software Foundation. 027 * 028 * This program is distributed in the hope that it will be useful, 029 * but WITHOUT ANY WARRANTY; without even the implied warranty of 030 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 031 * GNU General Public License for more details. 032 * 033 * You should have received a copy of the GNU General Public License 034 * along with this program; if not, see <http://www.gnu.org/licenses>. 035 */ 036package com.unboundid.ldap.sdk; 037 038 039 040import java.io.Serializable; 041 042import com.unboundid.util.ByteString; 043import com.unboundid.util.ByteStringBuffer; 044import com.unboundid.util.NotMutable; 045import com.unboundid.util.NotNull; 046import com.unboundid.util.StaticUtils; 047import com.unboundid.util.ThreadSafety; 048import com.unboundid.util.ThreadSafetyLevel; 049 050 051 052/** 053 * This class defines a set of properties that can be used to indicate which 054 * types of optional escaping should be performed by the LDAP SDK when 055 * constructing the string representation of DNs and RDNs. 056 */ 057@NotMutable() 058@ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE) 059public final class DNEscapingStrategy 060 implements Serializable 061{ 062 /** 063 * A DN escaping strategy that represents a default, user-friendly 064 * configuration. This includes: 065 * <UL> 066 * <LI> 067 * ASCII control characters will be escaped. 068 * </LI> 069 * <LI> 070 * Displayable non-ASCII characters will not be escaped. 071 * </LI> 072 * <LI> 073 * Non-displayable non-ASCII characters will be escaped. 074 * </LI> 075 * <LI> 076 * In non-UTF-8 data, all bytes with the most significant bit set will be 077 * escaped. 078 * </LI> 079 * </UL> 080 */ 081 @NotNull public static final DNEscapingStrategy DEFAULT = 082 new DNEscapingStrategy(true, false, true, true); 083 084 085 086 /** 087 * A DN escaping strategy that indicates that the LDAP SDK should only perform 088 * required escaping and should not perform any optional escaping. 089 */ 090 @NotNull public static final DNEscapingStrategy MINIMAL = 091 new DNEscapingStrategy(false, false, false, false); 092 093 094 095 /** 096 * A base64-encoding strategy that indicates that the LDAP SDK should 097 * perform the maximum amount of DN escaping that is considered reasonable. 098 * All ASCII control characters, all non-ASCII characters and non-UTF-8 bytes 099 * will be escaped. 100 */ 101 @NotNull public static final DNEscapingStrategy MAXIMAL = 102 new DNEscapingStrategy(true, true, true, true); 103 104 105 106 /** 107 * The serial version UID for this serializable class. 108 */ 109 private static final long serialVersionUID = -5438646712027992419L; 110 111 112 113 // Indicates whether ASCII control characters should be escaped. 114 private final boolean escapeASCIIControlCharacters; 115 116 // Indicates whether displayable non-ASCII characters should be escaped. 117 private final boolean escapeDisplayableNonASCIICharacters; 118 119 // Indicates whether non-displayable non-ASCII characters should be escaped. 120 private final boolean escapeNonDisplayableNonASCIICharacters; 121 122 // Indicates whether bytes with the most significant bit set in non-UTF-8 data 123 // should be escaped. 124 private final boolean escapeNonUTF8Data; 125 126 127 128 /** 129 * Creates a new DN escaping strategy with the specified settings. 130 * 131 * @param escapeASCIIControlCharacters 132 * Indicates whether ASCII control characters (characters whose 133 * Unicode code point is less than or equal to 0x1F, or is equal 134 * to 0x7F) should be escaped. Note that the ASCII NULL control 135 * character (0x00) will always be escaped. 136 * @param escapeDisplayableNonASCIICharacters 137 * Indicates whether non-ASCII characters (characters whose 138 * Unicode code point is greater than 0x7F) that are believed to 139 * be displayable (as determined by the 140 * {@link StaticUtils#isLikelyDisplayableCharacter} method) 141 * should be escaped. 142 * @param escapeNonDisplayableNonASCIICharacters 143 * Indicates whether non-ASCII characters (characters whose 144 * Unicode code point is greater than 0x7F) that are not believed 145 * to be displayable (as determined by the 146 * {@link StaticUtils#isLikelyDisplayableCharacter} method) 147 * should be escaped. 148 * @param escapeNonUTF8Data 149 * Indicates whether bytes with the most significant bit set in 150 * non-UTF-8 data should be escaped. Note that if a value does 151 * not represent a valid UTF-8 string, then the 152 * {@code escapeDisplayableNonASCIICharacters} and 153 * {@code escapeNonDisplayableNonASCIICharacters} arguments will 154 * not be used. 155 */ 156 public DNEscapingStrategy(final boolean escapeASCIIControlCharacters, 157 final boolean escapeDisplayableNonASCIICharacters, 158 final boolean escapeNonDisplayableNonASCIICharacters, 159 final boolean escapeNonUTF8Data) 160 { 161 this.escapeASCIIControlCharacters = escapeASCIIControlCharacters; 162 this.escapeDisplayableNonASCIICharacters = 163 escapeDisplayableNonASCIICharacters; 164 this.escapeNonDisplayableNonASCIICharacters = 165 escapeNonDisplayableNonASCIICharacters; 166 this.escapeNonUTF8Data = escapeNonUTF8Data; 167 } 168 169 170 171 /** 172 * Indicates whether ASCII control characters should be escaped. Note that 173 * the ASCII NULL control character (0x00) will always be escaped. 174 * 175 * @return {@code true} if ASCII control characters should be escaped, or 176 * {@code false} if not. 177 */ 178 public boolean escapeASCIIControlCharacters() 179 { 180 return escapeASCIIControlCharacters; 181 } 182 183 184 185 /** 186 * Indicates whether displayable non-ASCII characters (as determined by the 187 * {@link StaticUtils#isLikelyDisplayableCharacter} method) should be escaped. 188 * Note that this only applies to values that represent valid UTF-8 strings. 189 * Values that are not valid UTF-8 strings will use the setting represented 190 * by the {@link #escapeNonUTF8Data} method. 191 * 192 * @return {@code true} if displayable non-ASCII characters should be 193 * escaped, or {@code false} if not. 194 */ 195 public boolean escapeDisplayableNonASCIICharacters() 196 { 197 return escapeDisplayableNonASCIICharacters; 198 } 199 200 201 202 /** 203 * Indicates whether non-displayable non-ASCII characters (as determined by 204 * the {@link StaticUtils#isLikelyDisplayableCharacter} method) should be 205 * escaped. Note that this only applies to values that represent valid UTF-8 206 * strings. Values that are not valid UTF-8 strings will use the setting 207 * represented by the {@link #escapeNonUTF8Data} method. 208 * 209 * @return {@code true} if non-displayable non-ASCII characters should be 210 * escaped, or {@code false} if not. 211 */ 212 public boolean escapeNonDisplayableNonASCIICharacters() 213 { 214 return escapeNonDisplayableNonASCIICharacters; 215 } 216 217 218 219 /** 220 * Indicates whether bytes with the most significant bit set in non-UTF-8 data 221 * (as determined by the {@link StaticUtils#isValidUTF8} method) should be 222 * escaped. 223 * 224 * @return {@code true} if bytes with the most significant bit set in 225 * non-UTF-8 data should be escaped, or {@code false} if not. 226 */ 227 public boolean escapeNonUTF8Data() 228 { 229 return escapeNonUTF8Data; 230 } 231 232 233 234 /** 235 * Appends an appropriately escaped representation of the provided value to 236 * the given buffer. 237 * 238 * @param value The value to be appended. It must not be {@code null}. 239 * @param buffer The buffer to which the escaped value should be appended. 240 * It must not be {@code null}. 241 */ 242 public void escape(@NotNull final byte[] value, 243 @NotNull final ByteStringBuffer buffer) 244 { 245 // If the value is empty, then we don't need to do anything. 246 final int valueLength = value.length; 247 if ((value == null) || (valueLength == 0)) 248 { 249 return; 250 } 251 252 253 // Iterate through the value and examine each byte. 254 Boolean isNonUTF8 = null; 255 for (int i=0; i < valueLength; i++) 256 { 257 final byte b = value[i]; 258 switch (b) 259 { 260 // The following characters will always be escaped anywhere in a value. 261 case '"': 262 case '+': 263 case ',': 264 case ';': 265 case '<': 266 case '>': 267 case '\\': 268 buffer.append('\\'); 269 buffer.append(b); 270 break; 271 272 // The ASCII NULL character must also always be escaped, but it should 273 // use a hex encoding. 274 case '\u0000': 275 buffer.append("\\00"); 276 break; 277 278 // Spaces will only be escaped if they are the first or last character 279 // of the value. 280 case ' ': 281 if ((i == 0) || (i == (valueLength - 1))) 282 { 283 buffer.append('\\'); 284 } 285 buffer.append(b); 286 break; 287 288 // The octothorpe character will only be escaped if it is the first 289 // character of a value. 290 case '#': 291 if (i == 0) 292 { 293 buffer.append('\\'); 294 } 295 buffer.append(b); 296 break; 297 298 default: 299 // If the byte is between 0x00 and 0x1F (inclusive), or if it's 0x7F, 300 // then it's an ASCII control character. Handle that appropriately. 301 if (((b >= 0x00) && (b <= 0x1F)) || (b == 0x07F)) 302 { 303 if (escapeASCIIControlCharacters) 304 { 305 buffer.append('\\'); 306 buffer.append(StaticUtils.toHex(b)); 307 } 308 else 309 { 310 buffer.append(b); 311 } 312 } 313 314 315 // Because Java represents bytes as signed values, if a byte is 316 // greater than zero, then it's an ASCII byte and we won't escape it. 317 else if (b > 0x00) 318 { 319 buffer.append(b); 320 } 321 322 323 // If we've gotten here, then the byte is negative, which means that 324 // it's not ASCII. If we know that it's non-UTF-8 data, then handle 325 // that in accordance with the escapeNonUTF8Data flag. Otherwise, 326 // check to see whether it is valid UTF-8 and handle it as either a 327 // string comprised of code points or as non-UTF-8 data. 328 else 329 { 330 if (isNonUTF8 == null) 331 { 332 final byte[] remainingValueBytes = new byte[valueLength - i]; 333 System.arraycopy(value, i, remainingValueBytes, 0, 334 remainingValueBytes.length); 335 if (StaticUtils.isValidUTF8(remainingValueBytes)) 336 { 337 escape(StaticUtils.toUTF8String(remainingValueBytes), buffer, 338 (i == 0)); 339 return; 340 } 341 else 342 { 343 isNonUTF8 = Boolean.TRUE; 344 } 345 } 346 347 // If we've gotten here, then we know that it's non-UTF-8 data 348 // (because we would have gone to a different method if it was 349 // valid UTF-8), so handle that in accordance with the 350 // escapeNonUTF8Data flag. 351 if (escapeNonUTF8Data) 352 { 353 buffer.append('\\'); 354 buffer.append(StaticUtils.toHex(b)); 355 } 356 else 357 { 358 buffer.append(b); 359 } 360 } 361 break; 362 } 363 } 364 } 365 366 367 368 /** 369 * Appends an appropriately escaped representation of the provided value to 370 * the given buffer. 371 * 372 * @param value The value to be appended. It must not be {@code null}. 373 * @param buffer The buffer to which the escaped value should be appended. 374 * It must not be {@code null}. 375 */ 376 public void escape(@NotNull final String value, 377 @NotNull final ByteStringBuffer buffer) 378 { 379 escape(value, buffer, true); 380 } 381 382 383 384 /** 385 * Appends an appropriately escaped representation of the provided value to 386 * the given buffer. 387 * 388 * @param value The value to be appended. It must not be {@code null}. 389 * @param buffer The buffer to which the escaped value should be appended. 390 * It must not be {@code null}. 391 */ 392 public void escape(@NotNull final ByteString value, 393 @NotNull final ByteStringBuffer buffer) 394 { 395 escape(value.getValue(), buffer); 396 } 397 398 399 400 /** 401 * Appends an appropriately escaped representation of the provided value to 402 * the given buffer. 403 * 404 * @param value The value to be appended. It must not be 405 * {@code null}. 406 * @param buffer The buffer to which the escaped value should be 407 * appended. It must not be {@code null}. 408 * @param isWholeString Indicates whether the provided string represents the 409 * entire value being processed, or if a portion of the 410 * value may have already been processed. 411 */ 412 private void escape(@NotNull final String value, 413 @NotNull final ByteStringBuffer buffer, 414 final boolean isWholeString) 415 { 416 if ((value == null) || value.isEmpty()) 417 { 418 return; 419 } 420 421 int pos = 0; 422 while (pos < value.length()) 423 { 424 final int codePoint = value.codePointAt(pos); 425 switch (codePoint) 426 { 427 // The following characters will always be escaped anywhere in a value. 428 case '"': 429 case '+': 430 case ',': 431 case ';': 432 case '<': 433 case '>': 434 case '\\': 435 buffer.append('\\'); 436 buffer.append((byte) codePoint); 437 break; 438 439 // The ASCII NULL character must also always be escaped, but it should 440 // use a hex encoding. 441 case '\u0000': 442 buffer.append("\\00"); 443 break; 444 445 // Spaces will only be escaped if they are the first or last character 446 // of the value. 447 case ' ': 448 if (((pos == 0) && isWholeString) || 449 (pos == (value.length() - 1))) 450 { 451 buffer.append('\\'); 452 } 453 buffer.append(' '); 454 break; 455 456 // The octothorpe character will only be escaped if it is the first 457 // character of a value. 458 case '#': 459 if ((pos == 0) && isWholeString) 460 { 461 buffer.append('\\'); 462 } 463 buffer.append('#'); 464 break; 465 466 default: 467 // If the code point is between 0x00 and 0x1F (inclusive), or if it is 468 // 0x7F, then it's an ASCII control character. Handle that 469 // appropriately. 470 if (((codePoint >= 0x00) && (codePoint <= 0x1F)) || 471 (codePoint == 0x7F)) 472 { 473 final byte codePointByte = (byte) codePoint; 474 if (escapeASCIIControlCharacters) 475 { 476 buffer.append('\\'); 477 buffer.append(StaticUtils.toHex(codePointByte)); 478 } 479 else 480 { 481 buffer.append(codePointByte); 482 } 483 } 484 485 486 // If the code point is less than 0x7F, then it's an ASCII character 487 // that we don't need to escape. 488 else if (codePoint < 0x7F) 489 { 490 buffer.append((byte) codePoint); 491 } 492 493 494 // If we've gotten here, then the code point must represent a 495 // non-ASCII character. Determine whether it's displayable and handle 496 // it appropriately. 497 else 498 { 499 final String codePointString = 500 new String(new int[] { codePoint }, 0, 1); 501 final byte[] codePointBytes = StaticUtils.getBytes(codePointString); 502 if (StaticUtils.isLikelyDisplayableCharacter(codePoint)) 503 { 504 if (escapeDisplayableNonASCIICharacters) 505 { 506 for (final byte b : codePointBytes) 507 { 508 buffer.append('\\'); 509 buffer.append(StaticUtils.toHex(b)); 510 } 511 } 512 else 513 { 514 buffer.append(codePointBytes); 515 } 516 } 517 else 518 { 519 if (escapeNonDisplayableNonASCIICharacters) 520 { 521 for (final byte b : codePointBytes) 522 { 523 buffer.append('\\'); 524 buffer.append(StaticUtils.toHex(b)); 525 } 526 } 527 else 528 { 529 buffer.append(codePointBytes); 530 } 531 } 532 } 533 break; 534 } 535 536 final int charsPerCodePoint = Character.charCount(codePoint); 537 pos += charsPerCodePoint; 538 } 539 } 540 541 542 543 /** 544 * Retrieves a string representation of this base64 encoding strategy. 545 * 546 * @return A string representation of this base64 encoding strategy. 547 */ 548 @Override() 549 @NotNull() 550 public String toString() 551 { 552 final StringBuilder buffer = new StringBuilder(); 553 toString(buffer); 554 return buffer.toString(); 555 } 556 557 558 559 /** 560 * Appends a string representation of this base64 encoding strategy to the 561 * provided buffer. 562 * 563 * @param buffer The buffer to which the string representation should be 564 * appended. 565 */ 566 public void toString(@NotNull final StringBuilder buffer) 567 { 568 buffer.append("DNEscapingStrategy(escapeASCIIControlCharacters="); 569 buffer.append(escapeASCIIControlCharacters); 570 buffer.append(", escapeDisplayableNonASCIICharacters="); 571 buffer.append(escapeDisplayableNonASCIICharacters); 572 buffer.append(", escapeNonDisplayableNonASCIICharacters="); 573 buffer.append(escapeNonDisplayableNonASCIICharacters); 574 buffer.append(", escapeNonUTF8Data="); 575 buffer.append(escapeNonUTF8Data); 576 buffer.append(')'); 577 } 578}