001/* 002 * Copyright 2021-2024 Ping Identity Corporation 003 * All Rights Reserved. 004 */ 005/* 006 * Copyright 2021-2024 Ping Identity Corporation 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020/* 021 * Copyright (C) 2021-2024 Ping Identity Corporation 022 * 023 * This program is free software; you can redistribute it and/or modify 024 * it under the terms of the GNU General Public License (GPLv2 only) 025 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 026 * as published by the Free Software Foundation. 027 * 028 * This program is distributed in the hope that it will be useful, 029 * but WITHOUT ANY WARRANTY; without even the implied warranty of 030 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 031 * GNU General Public License for more details. 032 * 033 * You should have received a copy of the GNU General Public License 034 * along with this program; if not, see <http://www.gnu.org/licenses>. 035 */ 036package com.unboundid.ldif; 037 038 039 040import java.io.Serializable; 041 042import com.unboundid.util.ByteString; 043import com.unboundid.util.NotMutable; 044import com.unboundid.util.NotNull; 045import com.unboundid.util.StaticUtils; 046import com.unboundid.util.ThreadSafety; 047import com.unboundid.util.ThreadSafetyLevel; 048 049 050 051/** 052 * This class defines a set of properties that can be used to indicate which 053 * types of optional base64-encoding should be performed by the LDAP SDK. 054 */ 055@NotMutable() 056@ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE) 057public final class Base64EncodingStrategy 058 implements Serializable 059{ 060 /** 061 * A base64-encoding strategy that represents a safe default configuration. 062 * This includes: 063 * <UL> 064 * <LI> 065 * The presence of ASCII control characters will cause a value to be 066 * base64-encoded. This is not required by the LDIF specification, but is 067 * recommended. 068 * </LI> 069 * <LI> 070 * The presence of any non-ASCII characters (whether they may be 071 * displayable or not) will cause a value to be base64-encoded as required 072 * by the LDIF specification. 073 * </LI> 074 * <LI> 075 * The presence of non-UTF-8 data will cause a value to be base64-encoded 076 * as required by the LDIF specification. 077 * </LI> 078 * </UL> 079 */ 080 @NotNull public static final Base64EncodingStrategy DEFAULT = 081 new Base64EncodingStrategy(true, true, true, true); 082 083 084 085 /** 086 * A base64-encoding strategy that indicates that the LDAP SDK should perform 087 * the minimum amount of encoding required by the specification. The presence 088 * of ASCII control characters (other than NUL, LF, and CR, which must always 089 * be base64-encoded) will not cause values to be encoded. However, the 090 * presence of any non-ASCII characters or non-UTF-8 data will cause a value 091 * to be base64-=encoded as required by the LDIF specification. 092 */ 093 @NotNull public static final Base64EncodingStrategy MINIMAL_COMPLIANT = 094 new Base64EncodingStrategy(false, true, true, true); 095 096 097 098 /** 099 * A base64-encoding strategy that indicates that the presence of non-ASCII 100 * characters that the LDAP SDK considers displayable should not cause a 101 * value to be encoded. ASCII control characters, non-displayable non-ASCII 102 * characters, and non-UTF-8 data will cause a value to be base64-encoded. 103 * Note that this NOT compliant with the LDIF specification (which technically 104 * requires base64 encoding for all non-ASCII data), but it may be user 105 * friendly in some cases. 106 */ 107 @NotNull public static final Base64EncodingStrategy 108 USER_FRIENDLY_NON_COMPLIANT = 109 new Base64EncodingStrategy(true, false, true, true); 110 111 112 113 /** 114 * A base64-encoding strategy that indicates that the LDAP SDK should perform 115 * the maximum amount of base64 encoding that it considers necessary. Any 116 * ASCII control characters, any non-ASCII data, and any non-UTF-8 data will 117 * cause a value to be base64 encoded. This is equivalent to the 118 * {@link #DEFAULT} strategy. 119 */ 120 @NotNull public static final Base64EncodingStrategy MAXIMAL = DEFAULT; 121 122 123 124 /** 125 * The serial version UID for this serializable class. 126 */ 127 private static final long serialVersionUID = -5787811215448347345L; 128 129 130 131 // Indicates whether the presence of one or more ASCII control characters 132 // should cause a value to be base64-encoded. 133 private final boolean encodeASCIIControlCharacters; 134 135 // Indicates whether the presence of one or more displayable non-ASCII 136 // characters should cause a value to be base64-encoded. 137 private final boolean encodeDisplayableNonASCIICharacters; 138 139 // Indicates whether the presence of one or more non-displayable non-ASCII 140 // characters should cause a value to be base64-encoded. 141 private final boolean encodeNonDisplayableNonASCIICharacters; 142 143 // Indicates whether values that do not represent valid UTF-8 strings should 144 // be base64-encoded. 145 private final boolean encodeNonUTF8Data; 146 147 148 149 /** 150 * Creates a new base64 encoding strategy with the specified settings. 151 * 152 * @param encodeASCIIControlCharacters 153 * Indicates whether the presence of one or more ASCII control 154 * characters (characters whose Unicode code point is less than 155 * or equal to 0x01F, or is equal to 0x7F) should cause a value 156 * to be base64-encoded. Note that as per RFC 2849, the presence 157 * of the null (0x00), line feed (0x0A), and carriage return 158 * (0x0D) ASCII control characters will always cause a value to 159 * be base64-encoded. 160 * @param encodeDisplayableNonASCIICharacters 161 * Indicates whether the presence of one or more non-ASCII 162 * characters (characters whose Unicode code point is greater 163 * than 0x7F) that are believed to be displayable (as determined 164 * by the {@link StaticUtils#isLikelyDisplayableCharacter} 165 * method) should cause a value to be base64-encoded. 166 * @param encodeNonDisplayableNonASCIICharacters 167 * Indicates whether the presence of one or more non-ASCII 168 * characters (characters whose Unicode code point is greater 169 * than 0x7F) that are not believed to be displayable (as 170 * determined by the 171 * {@link StaticUtils#isLikelyDisplayableCharacter} method) 172 * should cause a value to be base64-encoded. 173 * @param encodeNonUTF8Data 174 * Indicates whether non-UTF-8-encoded data should be 175 * base64-encoded. Note that if a value does not represent a 176 * valid UTF-8 string, then the 177 * {@code encodeDisplayableNonASCIICharacters} and 178 * {@code encodeNonDisplayableNonASCIICharacters} arguments will 179 * not be used. 180 */ 181 public Base64EncodingStrategy(final boolean encodeASCIIControlCharacters, 182 final boolean encodeDisplayableNonASCIICharacters, 183 final boolean encodeNonDisplayableNonASCIICharacters, 184 final boolean encodeNonUTF8Data) 185 { 186 this.encodeASCIIControlCharacters = encodeASCIIControlCharacters; 187 this.encodeDisplayableNonASCIICharacters = 188 encodeDisplayableNonASCIICharacters; 189 this.encodeNonDisplayableNonASCIICharacters = 190 encodeNonDisplayableNonASCIICharacters; 191 this.encodeNonUTF8Data = encodeNonUTF8Data; 192 } 193 194 195 196 /** 197 * Indicates whether the presence of one or more ASCII control characters 198 * should cause a value to be base64-encoded. ASCII control characters other 199 * than NUL, LF, and CR are not required to be base64-encoded by the LDIF 200 * specification, but it is generally recommended that they be encoded. 201 * 202 * @return {@code true} if the presence of one or more ASCII control 203 * characters should cause a value to be base64-encoded, or 204 * {@code false} if not. 205 */ 206 public boolean encodeASCIIControlCharacters() 207 { 208 return encodeASCIIControlCharacters; 209 } 210 211 212 213 /** 214 * Indicates whether the presence of one or more displayable non-ASCII 215 * characters (as determined by the 216 * {@link StaticUtils#isLikelyDisplayableCharacter} method) should cause a 217 * value to be base64-encoded. Note that this only applies to values that 218 * represent valid UTF-8 strings. Values that are not valid UTF-8 strings 219 * will use the setting represented by the {@link #encodeNonUTF8Data} method. 220 * Also note that all non-ASCII characters are required to be base64 encoded 221 * by the LDIF specification, but there may be cases in which it may be 222 * desirable to relax this behavior when displaying to an end user. 223 * 224 * @return {@code true} if the presence of one or more displayable 225 * non-ASCII characters should cause a value to be base64-encoded, 226 * or {@code false} if not. 227 */ 228 public boolean encodeDisplayableNonASCIICharacters() 229 { 230 return encodeDisplayableNonASCIICharacters; 231 } 232 233 234 235 /** 236 * Indicates whether the presence of one or more non-displayable non-ASCII 237 * characters (as determined by the 238 * {@link StaticUtils#isLikelyDisplayableCharacter} method) should cause a 239 * value to be base64-encoded. Note that this only applies to values that 240 * represent valid UTF-8 strings. Values that are not valid UTF-8 strings 241 * will use the setting represented by the {@link #encodeNonUTF8Data} method. 242 * Also note that all non-ASCII characters are required to be base64 encoded 243 * by the LDIF specification, but there may be cases in which it may be 244 * desirable to relax this behavior when displaying to an end user. 245 * 246 * @return {@code true} if the presence of one or more non-displayable 247 * non-ASCII characters should cause a value to be base64-encoded, 248 * or {@code false} if not. 249 */ 250 public boolean encodeNonDisplayableNonASCIICharacters() 251 { 252 return encodeNonDisplayableNonASCIICharacters; 253 } 254 255 256 257 /** 258 * Indicates whether values that do not represent valid UTF-8 strings (as 259 * determined by the {@link StaticUtils#isValidUTF8} method) should be 260 * base64-encoded. Note that all non-ASCII data (which includes all non-UTF-8 261 * data) is required to be base64 encoded, but there may be cases in which it 262 * may be desirable to relax this behavior when displaying to an end user, 263 * especially when using non-UTF-8 character sets. 264 * 265 * @return {@code true} if values that do not represent valid UTF-8 strings 266 * should be base64-encoded, or {@code false} if not. 267 */ 268 public boolean encodeNonUTF8Data() 269 { 270 return encodeNonUTF8Data; 271 } 272 273 274 275 /** 276 * Indicates whether the provided value should be base64-encoded in accordance 277 * with this strategy. 278 * 279 * @param value The value for which to make the determination. It must not 280 * be {@code null}. 281 * 282 * @return {@code true} if the provided value should be base64-encoded in 283 * accordance with this strategy, or {@code false} if not. 284 */ 285 public boolean shouldBase64Encode(@NotNull final byte[] value) 286 { 287 // If the value is empty, then it does not need to be encoded. 288 if ((value == null) || (value.length == 0)) 289 { 290 return false; 291 } 292 293 294 // If the value starts with a space, colon, or less-than character, then it 295 // must be base64-encoded. 296 switch (value[0]) 297 { 298 case ' ': 299 case ':': 300 case '<': 301 return true; 302 } 303 304 305 // If the value ends with a space, then it must be base64-encoded. 306 if (value[value.length - 1] == ' ') 307 { 308 return true; 309 } 310 311 312 // Examine all the bytes that make up the value. If we encounter any 313 // non-ASCII characters, then handle that specially. 314 for (int i=0; i < value.length; i++) 315 { 316 // Bytes that are between 0x00 and 0x1F are ASCII control characters. The 317 // null (0x00), line feed (0x0A) and carriage return (0x0D) characters 318 // must always base base64-encoded. For other bytes, use the 319 // encodeASCIIControlCharacters flag. 320 final byte b = value[i]; 321 if ((b >= 0x00) && (b <= 0x1F)) 322 { 323 switch (b) 324 { 325 case 0x00: 326 case 0x0A: 327 case 0x0D: 328 return true; 329 default: 330 if (encodeASCIIControlCharacters) 331 { 332 return true; 333 } 334 break; 335 } 336 } 337 338 // Byte 0x7F is the ASCII delete control character and should also be 339 // controlled by the encodeASCIIControlCharacters flag. 340 else if (b == 0x07F) 341 { 342 if (encodeASCIIControlCharacters) 343 { 344 return true; 345 } 346 } 347 348 349 // All bytes between 0x20 and 0x7E (inclusive) should be fine. All other 350 // bytes will have the most significant bit set, and because Java bytes 351 // are signed, they will be negative. If we encounter any negative bytes, 352 // then that means the value contains non-ASCII characters or doesn't 353 // represent a UTF-8 string. If it's not valid UTF-8, then we'll handle 354 // it in accordance with the encodeNonUTF8Data flag. Otherwise, we'll 355 // convert the remainder of the byte to a string and iterate across the 356 // code points for the rest of the determination. 357 else if (b < 0x00) 358 { 359 final byte[] remainingBytes = new byte[value.length - i]; 360 System.arraycopy(value, i, remainingBytes, 0, remainingBytes.length); 361 if (StaticUtils.isValidUTF8(remainingBytes)) 362 { 363 final String valueString = StaticUtils.toUTF8String(remainingBytes); 364 return shouldBase64EncodePreValidatedString(valueString); 365 } 366 else 367 { 368 return encodeNonUTF8Data; 369 } 370 } 371 } 372 373 374 // If we've gotten here, then the value does not need to be base64-encoded. 375 return false; 376 } 377 378 379 380 /** 381 * Indicates whether the provided value should be base64-encoded in accordance 382 * with this strategy. 383 * 384 * @param value The value for which to make the determination. It must not 385 * be {@code null}. 386 * 387 * @return {@code true} if the provided value should be base64-encoded in 388 * accordance with this strategy, or {@code false} if not. 389 */ 390 public boolean shouldBase64Encode(@NotNull final String value) 391 { 392 // If the value is empty, then it does not need to be encoded. 393 if ((value == null) || (value.length() == 0)) 394 { 395 return false; 396 } 397 398 399 // If the value starts with a space, colon, or less-than character, then it 400 // must be base64-encoded. 401 switch (value.charAt(0)) 402 { 403 case ' ': 404 case ':': 405 case '<': 406 return true; 407 } 408 409 410 // If the value ends with a space, then it must be base64-encoded. 411 if (value.charAt(value.length() - 1) == ' ') 412 { 413 return true; 414 } 415 416 417 // Examine all of the characters in the string as code points so that we can 418 // handle non-ASCII characters properly. 419 return shouldBase64EncodePreValidatedString(value); 420 } 421 422 423 424 /** 425 * Indicates whether the provided string should be base64-encoded in 426 * accordance with this strategy. Note that all of the appropriate first and 427 * last character validation must have already been performed. 428 * 429 * @param s The string to validate. It must not be {@code null}. 430 * 431 * @return {@code true} if the value should be base64-encoded in accordance 432 * with this strategry, or {@code false} if not. 433 */ 434 private boolean shouldBase64EncodePreValidatedString(@NotNull final String s) 435 { 436 int pos = 0; 437 while (pos < s.length()) 438 { 439 final int codePoint = s.codePointAt(pos); 440 441 442 // Code points that are between 0x00 and 0x1F are ASCII control 443 // characters. The null (0x00), line feed (0x0A), and carriage return 444 // (0x0D) characters must always be base64-encoded. For other bytes, use 445 // the encodeASCIIControlCharacters flag. 446 // 447 // Note that code points will never be negative, so we don't have to check 448 // for a lower bound. 449 if (codePoint <=0x1F) 450 { 451 switch (codePoint) 452 { 453 case 0x00: 454 case 0x0A: 455 case 0x0D: 456 return true; 457 default: 458 if (encodeASCIIControlCharacters) 459 { 460 return true; 461 } 462 break; 463 } 464 } 465 466 467 // Code point 0x7F is the ASCII delete control character and should also 468 // be controlled by the encodeASCIIControlCharacters flag. 469 else if (codePoint == 0x7F) 470 { 471 if (encodeASCIIControlCharacters) 472 { 473 return true; 474 } 475 } 476 477 478 // If the code point is greater than 0x7F, then it's a non-ASCII character 479 // and the behavior should be controlled by either the 480 // encodeDisplayableNonASCIICharacters or 481 // encodeNonDisplayableNonASCIICharacters flag, whichever is appropriate. 482 else if (codePoint > 0x7F) 483 { 484 if (StaticUtils.isLikelyDisplayableCharacter(codePoint)) 485 { 486 if (encodeDisplayableNonASCIICharacters) 487 { 488 return true; 489 } 490 } 491 else 492 { 493 if (encodeNonDisplayableNonASCIICharacters) 494 { 495 return true; 496 } 497 } 498 } 499 500 501 // Increment the position index based on the number of characters in the 502 // code point. Some code points may require multiple characters to 503 // represent. 504 final int charsPerCodePoint = Character.charCount(codePoint); 505 pos += charsPerCodePoint; 506 } 507 508 509 // If we've gotten here, then the value does not need to be base64-encoded. 510 return false; 511 } 512 513 514 515 /** 516 * Indicates whether the provided value should be base64-encoded in accordance 517 * with this strategy. 518 * 519 * @param value The value for which to make the determination. It must not 520 * be {@code null}. 521 * 522 * @return {@code true} if the provided value should be base64-encoded in 523 * accordance with this strategy, or {@code false} if not. 524 */ 525 public boolean shouldBase64Encode(@NotNull final ByteString value) 526 { 527 return shouldBase64Encode(value.getValue()); 528 } 529 530 531 532 /** 533 * Retrieves a string representation of this base64 encoding strategy. 534 * 535 * @return A string representation of this base64 encoding strategy. 536 */ 537 @Override() 538 @NotNull() 539 public String toString() 540 { 541 final StringBuilder buffer = new StringBuilder(); 542 toString(buffer); 543 return buffer.toString(); 544 } 545 546 547 548 /** 549 * Appends a string representation of this base64 encoding strategy to the 550 * provided buffer. 551 * 552 * @param buffer The buffer to which the string representation should be 553 * appended. 554 */ 555 public void toString(@NotNull final StringBuilder buffer) 556 { 557 buffer.append("Base64EncodingStrategy(encodeASCIIControlCharacters="); 558 buffer.append(encodeASCIIControlCharacters); 559 buffer.append(", encodeDisplayableNonASCIICharacters="); 560 buffer.append(encodeDisplayableNonASCIICharacters); 561 buffer.append(", encodeNonDisplayableNonASCIICharacters="); 562 buffer.append(encodeNonDisplayableNonASCIICharacters); 563 buffer.append(", encodeNonUTF8Data="); 564 buffer.append(encodeNonUTF8Data); 565 buffer.append(')'); 566 } 567}