001 /* 002 * Copyright 2007-2016 UnboundID Corp. 003 * All Rights Reserved. 004 */ 005 /* 006 * Copyright (C) 2008-2016 UnboundID Corp. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (GPLv2 only) 010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 011 * as published by the Free Software Foundation. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Public License for more details. 017 * 018 * You should have received a copy of the GNU General Public License 019 * along with this program; if not, see <http://www.gnu.org/licenses>. 020 */ 021 package com.unboundid.ldif; 022 023 024 025 import java.io.BufferedReader; 026 import java.io.BufferedWriter; 027 import java.io.File; 028 import java.io.FileInputStream; 029 import java.io.FileWriter; 030 import java.io.InputStream; 031 import java.io.InputStreamReader; 032 import java.io.IOException; 033 import java.text.ParseException; 034 import java.util.ArrayList; 035 import java.util.Collection; 036 import java.util.Iterator; 037 import java.util.HashSet; 038 import java.util.LinkedHashMap; 039 import java.util.List; 040 import java.util.Set; 041 import java.util.concurrent.BlockingQueue; 042 import java.util.concurrent.ArrayBlockingQueue; 043 import java.util.concurrent.TimeUnit; 044 import java.util.concurrent.atomic.AtomicBoolean; 045 import java.nio.charset.Charset; 046 047 import com.unboundid.asn1.ASN1OctetString; 048 import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule; 049 import com.unboundid.ldap.matchingrules.MatchingRule; 050 import com.unboundid.ldap.sdk.Attribute; 051 import com.unboundid.ldap.sdk.Control; 052 import com.unboundid.ldap.sdk.Entry; 053 import com.unboundid.ldap.sdk.Modification; 054 import com.unboundid.ldap.sdk.ModificationType; 055 import com.unboundid.ldap.sdk.LDAPException; 056 import com.unboundid.ldap.sdk.schema.AttributeTypeDefinition; 057 import com.unboundid.ldap.sdk.schema.Schema; 058 import com.unboundid.util.AggregateInputStream; 059 import com.unboundid.util.Base64; 060 import com.unboundid.util.LDAPSDKThreadFactory; 061 import com.unboundid.util.parallel.AsynchronousParallelProcessor; 062 import com.unboundid.util.parallel.Result; 063 import com.unboundid.util.parallel.ParallelProcessor; 064 import com.unboundid.util.parallel.Processor; 065 066 import static com.unboundid.ldif.LDIFMessages.*; 067 import static com.unboundid.util.Debug.*; 068 import static com.unboundid.util.StaticUtils.*; 069 import static com.unboundid.util.Validator.*; 070 071 /** 072 * This class provides an LDIF reader, which can be used to read and decode 073 * entries and change records from a data source using the LDAP Data Interchange 074 * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>. 075 * <BR> 076 * This class is not synchronized. If multiple threads read from the 077 * LDIFReader, they must be synchronized externally. 078 * <BR><BR> 079 * <H2>Example</H2> 080 * The following example iterates through all entries contained in an LDIF file 081 * and attempts to add them to a directory server: 082 * <PRE> 083 * LDIFReader ldifReader = new LDIFReader(pathToLDIFFile); 084 * 085 * int entriesRead = 0; 086 * int entriesAdded = 0; 087 * int errorsEncountered = 0; 088 * while (true) 089 * { 090 * Entry entry; 091 * try 092 * { 093 * entry = ldifReader.readEntry(); 094 * if (entry == null) 095 * { 096 * // All entries have been read. 097 * break; 098 * } 099 * 100 * entriesRead++; 101 * } 102 * catch (LDIFException le) 103 * { 104 * errorsEncountered++; 105 * if (le.mayContinueReading()) 106 * { 107 * // A recoverable error occurred while attempting to read a change 108 * // record, at or near line number le.getLineNumber() 109 * // The entry will be skipped, but we'll try to keep reading from the 110 * // LDIF file. 111 * continue; 112 * } 113 * else 114 * { 115 * // An unrecoverable error occurred while attempting to read an entry 116 * // at or near line number le.getLineNumber() 117 * // No further LDIF processing will be performed. 118 * break; 119 * } 120 * } 121 * catch (IOException ioe) 122 * { 123 * // An I/O error occurred while attempting to read from the LDIF file. 124 * // No further LDIF processing will be performed. 125 * errorsEncountered++; 126 * break; 127 * } 128 * 129 * LDAPResult addResult; 130 * try 131 * { 132 * addResult = connection.add(entry); 133 * // If we got here, then the change should have been processed 134 * // successfully. 135 * entriesAdded++; 136 * } 137 * catch (LDAPException le) 138 * { 139 * // If we got here, then the change attempt failed. 140 * addResult = le.toLDAPResult(); 141 * errorsEncountered++; 142 * } 143 * } 144 * 145 * ldifReader.close(); 146 * </PRE> 147 */ 148 public final class LDIFReader 149 { 150 /** 151 * The default buffer size (128KB) that will be used when reading from the 152 * data source. 153 */ 154 public static final int DEFAULT_BUFFER_SIZE = 128 * 1024; 155 156 157 158 /* 159 * When processing asynchronously, this determines how many of the allocated 160 * worker threads are used to parse each batch of read entries. 161 */ 162 private static final int ASYNC_MIN_PER_PARSING_THREAD = 3; 163 164 165 166 /** 167 * When processing asynchronously, this specifies the size of the pending and 168 * completed queues. 169 */ 170 private static final int ASYNC_QUEUE_SIZE = 500; 171 172 173 174 /** 175 * Special entry used internally to signal that the LDIFReaderEntryTranslator 176 * has signalled that a read Entry should be skipped by returning null, 177 * which normally implies EOF. 178 */ 179 private static final Entry SKIP_ENTRY = new Entry("cn=skipped"); 180 181 182 183 /** 184 * The default base path that will be prepended to relative paths. It will 185 * end with a trailing slash. 186 */ 187 private static final String DEFAULT_RELATIVE_BASE_PATH; 188 static 189 { 190 final File currentDir; 191 String currentDirString = System.getProperty("user.dir"); 192 if (currentDirString == null) 193 { 194 currentDir = new File("."); 195 } 196 else 197 { 198 currentDir = new File(currentDirString); 199 } 200 201 final String currentDirAbsolutePath = currentDir.getAbsolutePath(); 202 if (currentDirAbsolutePath.endsWith(File.separator)) 203 { 204 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath; 205 } 206 else 207 { 208 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator; 209 } 210 } 211 212 213 214 // The buffered reader that will be used to read LDIF data. 215 private final BufferedReader reader; 216 217 // The behavior that should be exhibited when encountering duplicate attribute 218 // values. 219 private volatile DuplicateValueBehavior duplicateValueBehavior; 220 221 // A line number counter. 222 private long lineNumberCounter = 0; 223 224 // The change record translator to use, if any. 225 private final LDIFReaderChangeRecordTranslator changeRecordTranslator; 226 227 // The entry translator to use, if any. 228 private final LDIFReaderEntryTranslator entryTranslator; 229 230 // The schema that will be used when processing, if applicable. 231 private Schema schema; 232 233 // Specifies the base path that will be prepended to relative paths for file 234 // URLs. 235 private volatile String relativeBasePath; 236 237 // The behavior that should be exhibited with regard to illegal trailing 238 // spaces in attribute values. 239 private volatile TrailingSpaceBehavior trailingSpaceBehavior; 240 241 // True iff we are processing asynchronously. 242 private final boolean isAsync; 243 244 // 245 // The following only apply to asynchronous processing. 246 // 247 248 // Parses entries asynchronously. 249 private final AsynchronousParallelProcessor<UnparsedLDIFRecord, LDIFRecord> 250 asyncParser; 251 252 // Set to true when the end of the input is reached. 253 private final AtomicBoolean asyncParsingComplete; 254 255 // The records that have been read and parsed. 256 private final BlockingQueue<Result<UnparsedLDIFRecord, LDIFRecord>> 257 asyncParsedRecords; 258 259 260 261 /** 262 * Creates a new LDIF reader that will read data from the specified file. 263 * 264 * @param path The path to the file from which the data is to be read. It 265 * must not be {@code null}. 266 * 267 * @throws IOException If a problem occurs while opening the file for 268 * reading. 269 */ 270 public LDIFReader(final String path) 271 throws IOException 272 { 273 this(new FileInputStream(path)); 274 } 275 276 277 278 /** 279 * Creates a new LDIF reader that will read data from the specified file 280 * and parses the LDIF records asynchronously using the specified number of 281 * threads. 282 * 283 * @param path The path to the file from which the data is to be read. It 284 * must not be {@code null}. 285 * @param numParseThreads If this value is greater than zero, then the 286 * specified number of threads will be used to 287 * asynchronously read and parse the LDIF file. 288 * 289 * @throws IOException If a problem occurs while opening the file for 290 * reading. 291 * 292 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 293 * constructor for more details about asynchronous processing. 294 */ 295 public LDIFReader(final String path, final int numParseThreads) 296 throws IOException 297 { 298 this(new FileInputStream(path), numParseThreads); 299 } 300 301 302 303 /** 304 * Creates a new LDIF reader that will read data from the specified file. 305 * 306 * @param file The file from which the data is to be read. It must not be 307 * {@code null}. 308 * 309 * @throws IOException If a problem occurs while opening the file for 310 * reading. 311 */ 312 public LDIFReader(final File file) 313 throws IOException 314 { 315 this(new FileInputStream(file)); 316 } 317 318 319 320 /** 321 * Creates a new LDIF reader that will read data from the specified file 322 * and optionally parses the LDIF records asynchronously using the specified 323 * number of threads. 324 * 325 * @param file The file from which the data is to be read. It 326 * must not be {@code null}. 327 * @param numParseThreads If this value is greater than zero, then the 328 * specified number of threads will be used to 329 * asynchronously read and parse the LDIF file. 330 * 331 * @throws IOException If a problem occurs while opening the file for 332 * reading. 333 */ 334 public LDIFReader(final File file, final int numParseThreads) 335 throws IOException 336 { 337 this(new FileInputStream(file), numParseThreads); 338 } 339 340 341 342 /** 343 * Creates a new LDIF reader that will read data from the specified files in 344 * the order in which they are provided and optionally parses the LDIF records 345 * asynchronously using the specified number of threads. 346 * 347 * @param files The files from which the data is to be read. It 348 * must not be {@code null} or empty. 349 * @param numParseThreads If this value is greater than zero, then the 350 * specified number of threads will be used to 351 * asynchronously read and parse the LDIF file. 352 * @param entryTranslator The LDIFReaderEntryTranslator to apply to entries 353 * before they are returned. This is normally 354 * {@code null}, which causes entries to be returned 355 * unaltered. This is particularly useful when 356 * parsing the input file in parallel because the 357 * entry translation is also done in parallel. 358 * 359 * @throws IOException If a problem occurs while opening the file for 360 * reading. 361 */ 362 public LDIFReader(final File[] files, final int numParseThreads, 363 final LDIFReaderEntryTranslator entryTranslator) 364 throws IOException 365 { 366 this(files, numParseThreads, entryTranslator, null); 367 } 368 369 370 371 /** 372 * Creates a new LDIF reader that will read data from the specified files in 373 * the order in which they are provided and optionally parses the LDIF records 374 * asynchronously using the specified number of threads. 375 * 376 * @param files The files from which the data is to be 377 * read. It must not be {@code null} or 378 * empty. 379 * @param numParseThreads If this value is greater than zero, then 380 * the specified number of threads will be 381 * used to asynchronously read and parse the 382 * LDIF file. 383 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 384 * entries before they are returned. This is 385 * normally {@code null}, which causes entries 386 * to be returned unaltered. This is 387 * particularly useful when parsing the input 388 * file in parallel because the entry 389 * translation is also done in parallel. 390 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 391 * apply to change records before they are 392 * returned. This is normally {@code null}, 393 * which causes change records to be returned 394 * unaltered. This is particularly useful 395 * when parsing the input file in parallel 396 * because the change record translation is 397 * also done in parallel. 398 * 399 * @throws IOException If a problem occurs while opening the file for 400 * reading. 401 */ 402 public LDIFReader(final File[] files, final int numParseThreads, 403 final LDIFReaderEntryTranslator entryTranslator, 404 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 405 throws IOException 406 { 407 this(createAggregateInputStream(files), numParseThreads, entryTranslator, 408 changeRecordTranslator); 409 } 410 411 412 413 /** 414 * Creates a new aggregate input stream that will read data from the specified 415 * files. If there are multiple files, then a "padding" file will be inserted 416 * between them to ensure that there is at least one blank line between the 417 * end of one file and the beginning of another. 418 * 419 * @param files The files from which the data is to be read. It must not be 420 * {@code null} or empty. 421 * 422 * @return The input stream to use to read data from the provided files. 423 * 424 * @throws IOException If a problem is encountered while attempting to 425 * create the input stream. 426 */ 427 private static InputStream createAggregateInputStream(final File... files) 428 throws IOException 429 { 430 if (files.length == 0) 431 { 432 throw new IOException(ERR_READ_NO_LDIF_FILES.get()); 433 } 434 else if (files.length == 1) 435 { 436 return new FileInputStream(files[0]); 437 } 438 else 439 { 440 final File spacerFile = 441 File.createTempFile("ldif-reader-spacer", ".ldif"); 442 spacerFile.deleteOnExit(); 443 444 final BufferedWriter spacerWriter = 445 new BufferedWriter(new FileWriter(spacerFile)); 446 try 447 { 448 spacerWriter.newLine(); 449 spacerWriter.newLine(); 450 } 451 finally 452 { 453 spacerWriter.close(); 454 } 455 456 final File[] returnArray = new File[(files.length * 2) - 1]; 457 returnArray[0] = files[0]; 458 459 int pos = 1; 460 for (int i=1; i < files.length; i++) 461 { 462 returnArray[pos++] = spacerFile; 463 returnArray[pos++] = files[i]; 464 } 465 466 return new AggregateInputStream(returnArray); 467 } 468 } 469 470 471 472 /** 473 * Creates a new LDIF reader that will read data from the provided input 474 * stream. 475 * 476 * @param inputStream The input stream from which the data is to be read. 477 * It must not be {@code null}. 478 */ 479 public LDIFReader(final InputStream inputStream) 480 { 481 this(inputStream, 0); 482 } 483 484 485 486 /** 487 * Creates a new LDIF reader that will read data from the specified stream 488 * and parses the LDIF records asynchronously using the specified number of 489 * threads. 490 * 491 * @param inputStream The input stream from which the data is to be read. 492 * It must not be {@code null}. 493 * @param numParseThreads If this value is greater than zero, then the 494 * specified number of threads will be used to 495 * asynchronously read and parse the LDIF file. 496 * 497 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 498 * constructor for more details about asynchronous processing. 499 */ 500 public LDIFReader(final InputStream inputStream, final int numParseThreads) 501 { 502 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 503 this(new BufferedReader(new InputStreamReader(inputStream, 504 Charset.forName("UTF-8")), 505 DEFAULT_BUFFER_SIZE), 506 numParseThreads); 507 } 508 509 510 511 /** 512 * Creates a new LDIF reader that will read data from the specified stream 513 * and parses the LDIF records asynchronously using the specified number of 514 * threads. 515 * 516 * @param inputStream The input stream from which the data is to be read. 517 * It must not be {@code null}. 518 * @param numParseThreads If this value is greater than zero, then the 519 * specified number of threads will be used to 520 * asynchronously read and parse the LDIF file. 521 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 522 * entries before they are returned. This is normally 523 * {@code null}, which causes entries to be returned 524 * unaltered. This is particularly useful when parsing 525 * the input file in parallel because the entry 526 * translation is also done in parallel. 527 * 528 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 529 * constructor for more details about asynchronous processing. 530 */ 531 public LDIFReader(final InputStream inputStream, final int numParseThreads, 532 final LDIFReaderEntryTranslator entryTranslator) 533 { 534 this(inputStream, numParseThreads, entryTranslator, null); 535 } 536 537 538 539 /** 540 * Creates a new LDIF reader that will read data from the specified stream 541 * and parses the LDIF records asynchronously using the specified number of 542 * threads. 543 * 544 * @param inputStream The input stream from which the data is to 545 * be read. It must not be {@code null}. 546 * @param numParseThreads If this value is greater than zero, then 547 * the specified number of threads will be 548 * used to asynchronously read and parse the 549 * LDIF file. 550 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 551 * entries before they are returned. This is 552 * normally {@code null}, which causes entries 553 * to be returned unaltered. This is 554 * particularly useful when parsing the input 555 * file in parallel because the entry 556 * translation is also done in parallel. 557 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 558 * apply to change records before they are 559 * returned. This is normally {@code null}, 560 * which causes change records to be returned 561 * unaltered. This is particularly useful 562 * when parsing the input file in parallel 563 * because the change record translation is 564 * also done in parallel. 565 * 566 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 567 * constructor for more details about asynchronous processing. 568 */ 569 public LDIFReader(final InputStream inputStream, final int numParseThreads, 570 final LDIFReaderEntryTranslator entryTranslator, 571 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 572 { 573 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 574 this(new BufferedReader( 575 new InputStreamReader(inputStream, Charset.forName("UTF-8")), 576 DEFAULT_BUFFER_SIZE), 577 numParseThreads, entryTranslator, changeRecordTranslator); 578 } 579 580 581 582 /** 583 * Creates a new LDIF reader that will use the provided buffered reader to 584 * read the LDIF data. The encoding of the underlying Reader must be set to 585 * "UTF-8" as required by RFC 2849. 586 * 587 * @param reader The buffered reader that will be used to read the LDIF 588 * data. It must not be {@code null}. 589 */ 590 public LDIFReader(final BufferedReader reader) 591 { 592 this(reader, 0); 593 } 594 595 596 597 /** 598 * Creates a new LDIF reader that will read data from the specified buffered 599 * reader and parses the LDIF records asynchronously using the specified 600 * number of threads. The encoding of the underlying Reader must be set to 601 * "UTF-8" as required by RFC 2849. 602 * 603 * @param reader The buffered reader that will be used to read the LDIF data. 604 * It must not be {@code null}. 605 * @param numParseThreads If this value is greater than zero, then the 606 * specified number of threads will be used to 607 * asynchronously read and parse the LDIF file. 608 * 609 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 610 * constructor for more details about asynchronous processing. 611 */ 612 public LDIFReader(final BufferedReader reader, final int numParseThreads) 613 { 614 this(reader, numParseThreads, null); 615 } 616 617 618 619 /** 620 * Creates a new LDIF reader that will read data from the specified buffered 621 * reader and parses the LDIF records asynchronously using the specified 622 * number of threads. The encoding of the underlying Reader must be set to 623 * "UTF-8" as required by RFC 2849. 624 * 625 * @param reader The buffered reader that will be used to read the LDIF data. 626 * It must not be {@code null}. 627 * @param numParseThreads If this value is greater than zero, then the 628 * specified number of threads will be used to 629 * asynchronously read and parse the LDIF file. 630 * This should only be set to greater than zero when 631 * performance analysis has demonstrated that reading 632 * and parsing the LDIF is a bottleneck. The default 633 * synchronous processing is normally fast enough. 634 * There is little benefit in passing in a value 635 * greater than four (unless there is an 636 * LDIFReaderEntryTranslator that does time-consuming 637 * processing). A value of zero implies the 638 * default behavior of reading and parsing LDIF 639 * records synchronously when one of the read 640 * methods is called. 641 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 642 * entries before they are returned. This is normally 643 * {@code null}, which causes entries to be returned 644 * unaltered. This is particularly useful when parsing 645 * the input file in parallel because the entry 646 * translation is also done in parallel. 647 */ 648 public LDIFReader(final BufferedReader reader, 649 final int numParseThreads, 650 final LDIFReaderEntryTranslator entryTranslator) 651 { 652 this(reader, numParseThreads, entryTranslator, null); 653 } 654 655 656 657 /** 658 * Creates a new LDIF reader that will read data from the specified buffered 659 * reader and parses the LDIF records asynchronously using the specified 660 * number of threads. The encoding of the underlying Reader must be set to 661 * "UTF-8" as required by RFC 2849. 662 * 663 * @param reader The buffered reader that will be used to 664 * read the LDIF data. It must not be 665 * {@code null}. 666 * @param numParseThreads If this value is greater than zero, then 667 * the specified number of threads will be 668 * used to asynchronously read and parse the 669 * LDIF file. 670 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 671 * entries before they are returned. This is 672 * normally {@code null}, which causes entries 673 * to be returned unaltered. This is 674 * particularly useful when parsing the input 675 * file in parallel because the entry 676 * translation is also done in parallel. 677 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 678 * apply to change records before they are 679 * returned. This is normally {@code null}, 680 * which causes change records to be returned 681 * unaltered. This is particularly useful 682 * when parsing the input file in parallel 683 * because the change record translation is 684 * also done in parallel. 685 */ 686 public LDIFReader(final BufferedReader reader, final int numParseThreads, 687 final LDIFReaderEntryTranslator entryTranslator, 688 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 689 { 690 ensureNotNull(reader); 691 ensureTrue(numParseThreads >= 0, 692 "LDIFReader.numParseThreads must not be negative."); 693 694 this.reader = reader; 695 this.entryTranslator = entryTranslator; 696 this.changeRecordTranslator = changeRecordTranslator; 697 698 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 699 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 700 701 relativeBasePath = DEFAULT_RELATIVE_BASE_PATH; 702 703 if (numParseThreads == 0) 704 { 705 isAsync = false; 706 asyncParser = null; 707 asyncParsingComplete = null; 708 asyncParsedRecords = null; 709 } 710 else 711 { 712 isAsync = true; 713 asyncParsingComplete = new AtomicBoolean(false); 714 715 // Decodes entries in parallel. 716 final LDAPSDKThreadFactory threadFactory = 717 new LDAPSDKThreadFactory("LDIFReader Worker", true, null); 718 final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser = 719 new ParallelProcessor<UnparsedLDIFRecord, LDIFRecord>( 720 new RecordParser(), threadFactory, numParseThreads, 721 ASYNC_MIN_PER_PARSING_THREAD); 722 723 final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new 724 ArrayBlockingQueue<UnparsedLDIFRecord>(ASYNC_QUEUE_SIZE); 725 726 // The output queue must be a little more than twice as big as the input 727 // queue to more easily handle being shutdown in the middle of processing 728 // when the queues are full and threads are blocked. 729 asyncParsedRecords = new ArrayBlockingQueue 730 <Result<UnparsedLDIFRecord, LDIFRecord>>(2 * ASYNC_QUEUE_SIZE + 100); 731 732 asyncParser = new AsynchronousParallelProcessor 733 <UnparsedLDIFRecord, LDIFRecord>(pendingQueue, parallelParser, 734 asyncParsedRecords); 735 736 final LineReaderThread lineReaderThread = new LineReaderThread(); 737 lineReaderThread.start(); 738 } 739 } 740 741 742 743 /** 744 * Reads entries from the LDIF file with the specified path and returns them 745 * as a {@code List}. This is a convenience method that should only be used 746 * for data sets that are small enough so that running out of memory isn't a 747 * concern. 748 * 749 * @param path The path to the LDIF file containing the entries to be read. 750 * 751 * @return A list of the entries read from the given LDIF file. 752 * 753 * @throws IOException If a problem occurs while attempting to read data 754 * from the specified file. 755 * 756 * @throws LDIFException If a problem is encountered while attempting to 757 * decode data read as LDIF. 758 */ 759 public static List<Entry> readEntries(final String path) 760 throws IOException, LDIFException 761 { 762 return readEntries(new LDIFReader(path)); 763 } 764 765 766 767 /** 768 * Reads entries from the specified LDIF file and returns them as a 769 * {@code List}. This is a convenience method that should only be used for 770 * data sets that are small enough so that running out of memory isn't a 771 * concern. 772 * 773 * @param file A reference to the LDIF file containing the entries to be 774 * read. 775 * 776 * @return A list of the entries read from the given LDIF file. 777 * 778 * @throws IOException If a problem occurs while attempting to read data 779 * from the specified file. 780 * 781 * @throws LDIFException If a problem is encountered while attempting to 782 * decode data read as LDIF. 783 */ 784 public static List<Entry> readEntries(final File file) 785 throws IOException, LDIFException 786 { 787 return readEntries(new LDIFReader(file)); 788 } 789 790 791 792 /** 793 * Reads and decodes LDIF entries from the provided input stream and 794 * returns them as a {@code List}. This is a convenience method that should 795 * only be used for data sets that are small enough so that running out of 796 * memory isn't a concern. 797 * 798 * @param inputStream The input stream from which the entries should be 799 * read. The input stream will be closed before 800 * returning. 801 * 802 * @return A list of the entries read from the given input stream. 803 * 804 * @throws IOException If a problem occurs while attempting to read data 805 * from the input stream. 806 * 807 * @throws LDIFException If a problem is encountered while attempting to 808 * decode data read as LDIF. 809 */ 810 public static List<Entry> readEntries(final InputStream inputStream) 811 throws IOException, LDIFException 812 { 813 return readEntries(new LDIFReader(inputStream)); 814 } 815 816 817 818 /** 819 * Reads entries from the provided LDIF reader and returns them as a list. 820 * 821 * @param reader The reader from which the entries should be read. It will 822 * be closed before returning. 823 * 824 * @return A list of the entries read from the provided reader. 825 * 826 * @throws IOException If a problem was encountered while attempting to read 827 * data from the LDIF data source. 828 * 829 * @throws LDIFException If a problem is encountered while attempting to 830 * decode data read as LDIF. 831 */ 832 private static List<Entry> readEntries(final LDIFReader reader) 833 throws IOException, LDIFException 834 { 835 try 836 { 837 final ArrayList<Entry> entries = new ArrayList<Entry>(10); 838 while (true) 839 { 840 final Entry e = reader.readEntry(); 841 if (e == null) 842 { 843 break; 844 } 845 846 entries.add(e); 847 } 848 849 return entries; 850 } 851 finally 852 { 853 reader.close(); 854 } 855 } 856 857 858 859 /** 860 * Closes this LDIF reader and the underlying LDIF source. 861 * 862 * @throws IOException If a problem occurs while closing the underlying LDIF 863 * source. 864 */ 865 public void close() 866 throws IOException 867 { 868 reader.close(); 869 870 if (isAsync()) 871 { 872 // Closing the reader will trigger the LineReaderThread to complete, but 873 // not if it's blocked submitting the next UnparsedLDIFRecord. To avoid 874 // this, we clear out the completed output queue, which is larger than 875 // the input queue, so the LineReaderThread will stop reading and 876 // shutdown the asyncParser. 877 asyncParsedRecords.clear(); 878 } 879 } 880 881 882 883 /** 884 * Indicates whether to ignore any duplicate values encountered while reading 885 * LDIF records. 886 * 887 * @return {@code true} if duplicate values should be ignored, or 888 * {@code false} if any LDIF records containing duplicate values 889 * should be rejected. 890 * 891 * @deprecated Use the {@code getDuplicateValueBehavior} method instead. 892 */ 893 @Deprecated() 894 public boolean ignoreDuplicateValues() 895 { 896 return (duplicateValueBehavior == DuplicateValueBehavior.STRIP); 897 } 898 899 900 901 /** 902 * Specifies whether to ignore any duplicate values encountered while reading 903 * LDIF records. 904 * 905 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 906 * attribute values encountered while reading 907 * LDIF records. 908 * 909 * @deprecated Use the {@code setDuplicateValueBehavior} method instead. 910 */ 911 @Deprecated() 912 public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues) 913 { 914 if (ignoreDuplicateValues) 915 { 916 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 917 } 918 else 919 { 920 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 921 } 922 } 923 924 925 926 /** 927 * Retrieves the behavior that should be exhibited if the LDIF reader 928 * encounters an entry with duplicate values. 929 * 930 * @return The behavior that should be exhibited if the LDIF reader 931 * encounters an entry with duplicate values. 932 */ 933 public DuplicateValueBehavior getDuplicateValueBehavior() 934 { 935 return duplicateValueBehavior; 936 } 937 938 939 940 /** 941 * Specifies the behavior that should be exhibited if the LDIF reader 942 * encounters an entry with duplicate values. 943 * 944 * @param duplicateValueBehavior The behavior that should be exhibited if 945 * the LDIF reader encounters an entry with 946 * duplicate values. 947 */ 948 public void setDuplicateValueBehavior( 949 final DuplicateValueBehavior duplicateValueBehavior) 950 { 951 this.duplicateValueBehavior = duplicateValueBehavior; 952 } 953 954 955 956 /** 957 * Indicates whether to strip off any illegal trailing spaces that may appear 958 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 959 * specification strongly recommends that any value which legitimately 960 * contains trailing spaces be base64-encoded, and any spaces which appear 961 * after the end of non-base64-encoded values may therefore be considered 962 * invalid. If any such trailing spaces are encountered in an LDIF record and 963 * they are not to be stripped, then an {@code LDIFException} will be thrown 964 * for that record. 965 * <BR><BR> 966 * Note that this applies only to spaces after the end of a value, and not to 967 * spaces which may appear at the end of a line for a value that is wrapped 968 * and continued on the next line. 969 * 970 * @return {@code true} if illegal trailing spaces should be stripped off, or 971 * {@code false} if LDIF records containing illegal trailing spaces 972 * should be rejected. 973 * 974 * @deprecated Use the {@code getTrailingSpaceBehavior} method instead. 975 */ 976 @Deprecated() 977 public boolean stripTrailingSpaces() 978 { 979 return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP); 980 } 981 982 983 984 /** 985 * Specifies whether to strip off any illegal trailing spaces that may appear 986 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 987 * specification strongly recommends that any value which legitimately 988 * contains trailing spaces be base64-encoded, and any spaces which appear 989 * after the end of non-base64-encoded values may therefore be considered 990 * invalid. If any such trailing spaces are encountered in an LDIF record and 991 * they are not to be stripped, then an {@code LDIFException} will be thrown 992 * for that record. 993 * <BR><BR> 994 * Note that this applies only to spaces after the end of a value, and not to 995 * spaces which may appear at the end of a line for a value that is wrapped 996 * and continued on the next line. 997 * 998 * @param stripTrailingSpaces Indicates whether to strip off any illegal 999 * trailing spaces, or {@code false} if LDIF 1000 * records containing them should be rejected. 1001 * 1002 * @deprecated Use the {@code setTrailingSpaceBehavior} method instead. 1003 */ 1004 @Deprecated() 1005 public void setStripTrailingSpaces(final boolean stripTrailingSpaces) 1006 { 1007 trailingSpaceBehavior = stripTrailingSpaces 1008 ? TrailingSpaceBehavior.STRIP 1009 : TrailingSpaceBehavior.REJECT; 1010 } 1011 1012 1013 1014 /** 1015 * Retrieves the behavior that should be exhibited when encountering attribute 1016 * values which are not base64-encoded but contain trailing spaces. The LDIF 1017 * specification strongly recommends that any value which legitimately 1018 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1019 * may be configured to automatically strip these spaces, to preserve them, or 1020 * to reject any entry or change record containing them. 1021 * 1022 * @return The behavior that should be exhibited when encountering attribute 1023 * values which are not base64-encoded but contain trailing spaces. 1024 */ 1025 public TrailingSpaceBehavior getTrailingSpaceBehavior() 1026 { 1027 return trailingSpaceBehavior; 1028 } 1029 1030 1031 1032 /** 1033 * Specifies the behavior that should be exhibited when encountering attribute 1034 * values which are not base64-encoded but contain trailing spaces. The LDIF 1035 * specification strongly recommends that any value which legitimately 1036 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1037 * may be configured to automatically strip these spaces, to preserve them, or 1038 * to reject any entry or change record containing them. 1039 * 1040 * @param trailingSpaceBehavior The behavior that should be exhibited when 1041 * encountering attribute values which are not 1042 * base64-encoded but contain trailing spaces. 1043 */ 1044 public void setTrailingSpaceBehavior( 1045 final TrailingSpaceBehavior trailingSpaceBehavior) 1046 { 1047 this.trailingSpaceBehavior = trailingSpaceBehavior; 1048 } 1049 1050 1051 1052 /** 1053 * Retrieves the base path that will be prepended to relative paths in order 1054 * to obtain an absolute path. This will only be used for "file:" URLs that 1055 * have paths which do not begin with a slash. 1056 * 1057 * @return The base path that will be prepended to relative paths in order to 1058 * obtain an absolute path. 1059 */ 1060 public String getRelativeBasePath() 1061 { 1062 return relativeBasePath; 1063 } 1064 1065 1066 1067 /** 1068 * Specifies the base path that will be prepended to relative paths in order 1069 * to obtain an absolute path. This will only be used for "file:" URLs that 1070 * have paths which do not begin with a space. 1071 * 1072 * @param relativeBasePath The base path that will be prepended to relative 1073 * paths in order to obtain an absolute path. 1074 */ 1075 public void setRelativeBasePath(final String relativeBasePath) 1076 { 1077 setRelativeBasePath(new File(relativeBasePath)); 1078 } 1079 1080 1081 1082 /** 1083 * Specifies the base path that will be prepended to relative paths in order 1084 * to obtain an absolute path. This will only be used for "file:" URLs that 1085 * have paths which do not begin with a space. 1086 * 1087 * @param relativeBasePath The base path that will be prepended to relative 1088 * paths in order to obtain an absolute path. 1089 */ 1090 public void setRelativeBasePath(final File relativeBasePath) 1091 { 1092 final String path = relativeBasePath.getAbsolutePath(); 1093 if (path.endsWith(File.separator)) 1094 { 1095 this.relativeBasePath = path; 1096 } 1097 else 1098 { 1099 this.relativeBasePath = path + File.separator; 1100 } 1101 } 1102 1103 1104 1105 /** 1106 * Retrieves the schema that will be used when reading LDIF records, if 1107 * defined. 1108 * 1109 * @return The schema that will be used when reading LDIF records, or 1110 * {@code null} if no schema should be used and all attributes should 1111 * be treated as case-insensitive strings. 1112 */ 1113 public Schema getSchema() 1114 { 1115 return schema; 1116 } 1117 1118 1119 1120 /** 1121 * Specifies the schema that should be used when reading LDIF records. 1122 * 1123 * @param schema The schema that should be used when reading LDIF records, 1124 * or {@code null} if no schema should be used and all 1125 * attributes should be treated as case-insensitive strings. 1126 */ 1127 public void setSchema(final Schema schema) 1128 { 1129 this.schema = schema; 1130 } 1131 1132 1133 1134 /** 1135 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1136 * change record. 1137 * 1138 * @return The record read from the LDIF source, or {@code null} if there are 1139 * no more entries to be read. 1140 * 1141 * @throws IOException If a problem occurs while trying to read from the 1142 * LDIF source. 1143 * 1144 * @throws LDIFException If the data read could not be parsed as an entry or 1145 * an LDIF change record. 1146 */ 1147 public LDIFRecord readLDIFRecord() 1148 throws IOException, LDIFException 1149 { 1150 if (isAsync()) 1151 { 1152 return readLDIFRecordAsync(); 1153 } 1154 else 1155 { 1156 return readLDIFRecordInternal(); 1157 } 1158 } 1159 1160 1161 1162 /** 1163 * Reads an entry from the LDIF source. 1164 * 1165 * @return The entry read from the LDIF source, or {@code null} if there are 1166 * no more entries to be read. 1167 * 1168 * @throws IOException If a problem occurs while attempting to read from the 1169 * LDIF source. 1170 * 1171 * @throws LDIFException If the data read could not be parsed as an entry. 1172 */ 1173 public Entry readEntry() 1174 throws IOException, LDIFException 1175 { 1176 if (isAsync()) 1177 { 1178 return readEntryAsync(); 1179 } 1180 else 1181 { 1182 return readEntryInternal(); 1183 } 1184 } 1185 1186 1187 1188 /** 1189 * Reads an LDIF change record from the LDIF source. The LDIF record must 1190 * have a changetype. 1191 * 1192 * @return The change record read from the LDIF source, or {@code null} if 1193 * there are no more records to be read. 1194 * 1195 * @throws IOException If a problem occurs while attempting to read from the 1196 * LDIF source. 1197 * 1198 * @throws LDIFException If the data read could not be parsed as an LDIF 1199 * change record. 1200 */ 1201 public LDIFChangeRecord readChangeRecord() 1202 throws IOException, LDIFException 1203 { 1204 return readChangeRecord(false); 1205 } 1206 1207 1208 1209 /** 1210 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1211 * record does not have a changetype, then it may be assumed to be an add 1212 * change record. 1213 * 1214 * @param defaultAdd Indicates whether an LDIF record not containing a 1215 * changetype should be retrieved as an add change record. 1216 * If this is {@code false} and the record read does not 1217 * include a changetype, then an {@code LDIFException} 1218 * will be thrown. 1219 * 1220 * @return The change record read from the LDIF source, or {@code null} if 1221 * there are no more records to be read. 1222 * 1223 * @throws IOException If a problem occurs while attempting to read from the 1224 * LDIF source. 1225 * 1226 * @throws LDIFException If the data read could not be parsed as an LDIF 1227 * change record. 1228 */ 1229 public LDIFChangeRecord readChangeRecord(final boolean defaultAdd) 1230 throws IOException, LDIFException 1231 { 1232 if (isAsync()) 1233 { 1234 return readChangeRecordAsync(defaultAdd); 1235 } 1236 else 1237 { 1238 return readChangeRecordInternal(defaultAdd); 1239 } 1240 } 1241 1242 1243 1244 /** 1245 * Reads the next {@code LDIFRecord}, which was read and parsed by a different 1246 * thread. 1247 * 1248 * @return The next parsed record or {@code null} if there are no more 1249 * records to read. 1250 * 1251 * @throws IOException If IOException was thrown when reading or parsing 1252 * the record. 1253 * 1254 * @throws LDIFException If LDIFException was thrown parsing the record. 1255 */ 1256 private LDIFRecord readLDIFRecordAsync() 1257 throws IOException, LDIFException 1258 { 1259 final Result<UnparsedLDIFRecord, LDIFRecord> result = 1260 readLDIFRecordResultAsync(); 1261 if (result == null) 1262 { 1263 return null; 1264 } 1265 else 1266 { 1267 return result.getOutput(); 1268 } 1269 } 1270 1271 1272 1273 /** 1274 * Reads an entry asynchronously from the LDIF source. 1275 * 1276 * @return The entry read from the LDIF source, or {@code null} if there are 1277 * no more entries to be read. 1278 * 1279 * @throws IOException If a problem occurs while attempting to read from the 1280 * LDIF source. 1281 * @throws LDIFException If the data read could not be parsed as an entry. 1282 */ 1283 private Entry readEntryAsync() 1284 throws IOException, LDIFException 1285 { 1286 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1287 LDIFRecord record = null; 1288 while (record == null) 1289 { 1290 result = readLDIFRecordResultAsync(); 1291 if (result == null) 1292 { 1293 return null; 1294 } 1295 1296 record = result.getOutput(); 1297 1298 // This is a special value that means we should skip this Entry. We have 1299 // to use something different than null because null means EOF. 1300 if (record == SKIP_ENTRY) 1301 { 1302 record = null; 1303 } 1304 } 1305 1306 if (record instanceof Entry) 1307 { 1308 return (Entry) record; 1309 } 1310 else if (record instanceof LDIFChangeRecord) 1311 { 1312 try 1313 { 1314 // Some LDIFChangeRecord can be converted to an Entry. This is really 1315 // an edge case though. 1316 return ((LDIFChangeRecord)record).toEntry(); 1317 } 1318 catch (LDIFException e) 1319 { 1320 debugException(e); 1321 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1322 throw new LDIFException(e.getExceptionMessage(), 1323 firstLineNumber, true, e); 1324 } 1325 } 1326 1327 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1328 "LDIFChangeRecord"); 1329 } 1330 1331 1332 1333 /** 1334 * Reads an LDIF change record from the LDIF source asynchronously. 1335 * Optionally, if the LDIF record does not have a changetype, then it may be 1336 * assumed to be an add change record. 1337 * 1338 * @param defaultAdd Indicates whether an LDIF record not containing a 1339 * changetype should be retrieved as an add change record. 1340 * If this is {@code false} and the record read does not 1341 * include a changetype, then an {@code LDIFException} will 1342 * be thrown. 1343 * 1344 * @return The change record read from the LDIF source, or {@code null} if 1345 * there are no more records to be read. 1346 * 1347 * @throws IOException If a problem occurs while attempting to read from the 1348 * LDIF source. 1349 * @throws LDIFException If the data read could not be parsed as an LDIF 1350 * change record. 1351 */ 1352 private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd) 1353 throws IOException, LDIFException 1354 { 1355 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1356 LDIFRecord record = null; 1357 while (record == null) 1358 { 1359 result = readLDIFRecordResultAsync(); 1360 if (result == null) 1361 { 1362 return null; 1363 } 1364 1365 record = result.getOutput(); 1366 1367 // This is a special value that means we should skip this Entry. We have 1368 // to use something different than null because null means EOF. 1369 if (record == SKIP_ENTRY) 1370 { 1371 record = null; 1372 } 1373 } 1374 1375 if (record instanceof LDIFChangeRecord) 1376 { 1377 return (LDIFChangeRecord) record; 1378 } 1379 else if (record instanceof Entry) 1380 { 1381 if (defaultAdd) 1382 { 1383 return new LDIFAddChangeRecord((Entry) record); 1384 } 1385 else 1386 { 1387 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1388 throw new LDIFException( 1389 ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber, 1390 true); 1391 } 1392 } 1393 1394 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1395 "LDIFChangeRecord"); 1396 } 1397 1398 1399 1400 /** 1401 * Reads the next LDIF record, which was read and parsed asynchronously by 1402 * separate threads. 1403 * 1404 * @return The next LDIF record or {@code null} if there are no more records. 1405 * 1406 * @throws IOException If a problem occurs while attempting to read from the 1407 * LDIF source. 1408 * 1409 * @throws LDIFException If the data read could not be parsed as an entry. 1410 */ 1411 private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync() 1412 throws IOException, LDIFException 1413 { 1414 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1415 1416 // If the asynchronous reading and parsing is complete, then we don't have 1417 // to block waiting for the next record to show up on the queue. If there 1418 // isn't a record there, then return null (EOF) right away. 1419 if (asyncParsingComplete.get()) 1420 { 1421 result = asyncParsedRecords.poll(); 1422 } 1423 else 1424 { 1425 try 1426 { 1427 // We probably could just do a asyncParsedRecords.take() here, but 1428 // there are some edge case error scenarios where 1429 // asyncParsingComplete might be set without a special EOF sentinel 1430 // Result enqueued. So to guard against this, we have a very cautious 1431 // polling interval of 1 second. During normal processing, we never 1432 // have to wait for this to expire, when there is something to do 1433 // (like shutdown). 1434 while ((result == null) && (!asyncParsingComplete.get())) 1435 { 1436 result = asyncParsedRecords.poll(1, TimeUnit.SECONDS); 1437 } 1438 1439 // There's a very small chance that we missed the value, so double-check 1440 if (result == null) 1441 { 1442 result = asyncParsedRecords.poll(); 1443 } 1444 } 1445 catch (InterruptedException e) 1446 { 1447 debugException(e); 1448 throw createIOExceptionWithCause(null, e); 1449 } 1450 } 1451 if (result == null) 1452 { 1453 return null; 1454 } 1455 1456 rethrow(result.getFailureCause()); 1457 1458 // Check if we reached the end of the input 1459 final UnparsedLDIFRecord unparsedRecord = result.getInput(); 1460 if (unparsedRecord.isEOF()) 1461 { 1462 // This might have been set already by the LineReaderThread, but 1463 // just in case it hasn't gotten to it yet, do so here. 1464 asyncParsingComplete.set(true); 1465 1466 // Enqueue this EOF result again for any other thread that might be 1467 // blocked in asyncParsedRecords.take() even though having multiple 1468 // threads call this method concurrently breaks the contract of this 1469 // class. 1470 try 1471 { 1472 asyncParsedRecords.put(result); 1473 } 1474 catch (InterruptedException e) 1475 { 1476 // We shouldn't ever get interrupted because the put won't ever block. 1477 // Once we are done reading, this is the only item left in the queue, 1478 // so we should always be able to re-enqueue it. 1479 debugException(e); 1480 } 1481 return null; 1482 } 1483 1484 return result; 1485 } 1486 1487 1488 1489 /** 1490 * Indicates whether this LDIF reader was constructed to perform asynchronous 1491 * processing. 1492 * 1493 * @return {@code true} if this LDIFReader was constructed to perform 1494 * asynchronous processing, or {@code false} if not. 1495 */ 1496 private boolean isAsync() 1497 { 1498 return isAsync; 1499 } 1500 1501 1502 1503 /** 1504 * If not {@code null}, rethrows the specified Throwable as either an 1505 * IOException or LDIFException. 1506 * 1507 * @param t The exception to rethrow. If it's {@code null}, then nothing 1508 * is thrown. 1509 * 1510 * @throws IOException If t is an IOException or a checked Exception that 1511 * is not an LDIFException. 1512 * @throws LDIFException If t is an LDIFException. 1513 */ 1514 static void rethrow(final Throwable t) 1515 throws IOException, LDIFException 1516 { 1517 if (t == null) 1518 { 1519 return; 1520 } 1521 1522 if (t instanceof IOException) 1523 { 1524 throw (IOException) t; 1525 } 1526 else if (t instanceof LDIFException) 1527 { 1528 throw (LDIFException) t; 1529 } 1530 else if (t instanceof RuntimeException) 1531 { 1532 throw (RuntimeException) t; 1533 } 1534 else if (t instanceof Error) 1535 { 1536 throw (Error) t; 1537 } 1538 else 1539 { 1540 throw createIOExceptionWithCause(null, t); 1541 } 1542 } 1543 1544 1545 1546 /** 1547 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1548 * change record. 1549 * 1550 * @return The record read from the LDIF source, or {@code null} if there are 1551 * no more entries to be read. 1552 * 1553 * @throws IOException If a problem occurs while trying to read from the 1554 * LDIF source. 1555 * @throws LDIFException If the data read could not be parsed as an entry or 1556 * an LDIF change record. 1557 */ 1558 private LDIFRecord readLDIFRecordInternal() 1559 throws IOException, LDIFException 1560 { 1561 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1562 return decodeRecord(unparsedRecord, relativeBasePath, schema); 1563 } 1564 1565 1566 1567 /** 1568 * Reads an entry from the LDIF source. 1569 * 1570 * @return The entry read from the LDIF source, or {@code null} if there are 1571 * no more entries to be read. 1572 * 1573 * @throws IOException If a problem occurs while attempting to read from the 1574 * LDIF source. 1575 * @throws LDIFException If the data read could not be parsed as an entry. 1576 */ 1577 private Entry readEntryInternal() 1578 throws IOException, LDIFException 1579 { 1580 Entry e = null; 1581 while (e == null) 1582 { 1583 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1584 if (unparsedRecord.isEOF()) 1585 { 1586 return null; 1587 } 1588 1589 e = decodeEntry(unparsedRecord, relativeBasePath); 1590 debugLDIFRead(e); 1591 1592 if (entryTranslator != null) 1593 { 1594 e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber()); 1595 } 1596 } 1597 return e; 1598 } 1599 1600 1601 1602 /** 1603 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1604 * record does not have a changetype, then it may be assumed to be an add 1605 * change record. 1606 * 1607 * @param defaultAdd Indicates whether an LDIF record not containing a 1608 * changetype should be retrieved as an add change record. 1609 * If this is {@code false} and the record read does not 1610 * include a changetype, then an {@code LDIFException} will 1611 * be thrown. 1612 * 1613 * @return The change record read from the LDIF source, or {@code null} if 1614 * there are no more records to be read. 1615 * 1616 * @throws IOException If a problem occurs while attempting to read from the 1617 * LDIF source. 1618 * @throws LDIFException If the data read could not be parsed as an LDIF 1619 * change record. 1620 */ 1621 private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd) 1622 throws IOException, LDIFException 1623 { 1624 LDIFChangeRecord r = null; 1625 while (r == null) 1626 { 1627 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1628 if (unparsedRecord.isEOF()) 1629 { 1630 return null; 1631 } 1632 1633 r = decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd, 1634 schema); 1635 debugLDIFRead(r); 1636 1637 if (changeRecordTranslator != null) 1638 { 1639 r = changeRecordTranslator.translate(r, 1640 unparsedRecord.getFirstLineNumber()); 1641 } 1642 } 1643 return r; 1644 } 1645 1646 1647 1648 /** 1649 * Reads a record (either an entry or a change record) from the LDIF source 1650 * and places it in the line list. 1651 * 1652 * @return The line number for the first line of the entry that was read. 1653 * 1654 * @throws IOException If a problem occurs while attempting to read from the 1655 * LDIF source. 1656 * 1657 * @throws LDIFException If the data read could not be parsed as a valid 1658 * LDIF record. 1659 */ 1660 private UnparsedLDIFRecord readUnparsedRecord() 1661 throws IOException, LDIFException 1662 { 1663 final ArrayList<StringBuilder> lineList = new ArrayList<StringBuilder>(20); 1664 boolean lastWasComment = false; 1665 long firstLineNumber = lineNumberCounter + 1; 1666 while (true) 1667 { 1668 final String line = reader.readLine(); 1669 lineNumberCounter++; 1670 1671 if (line == null) 1672 { 1673 // We've hit the end of the LDIF source. If we haven't read any entry 1674 // data, then return null. Otherwise, the last entry wasn't followed by 1675 // a blank line, which is OK, and we should decode that entry. 1676 if (lineList.isEmpty()) 1677 { 1678 return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0), 1679 duplicateValueBehavior, trailingSpaceBehavior, schema, -1); 1680 } 1681 else 1682 { 1683 break; 1684 } 1685 } 1686 1687 if (line.length() == 0) 1688 { 1689 // It's a blank line. If we have read entry data, then this signals the 1690 // end of the entry. Otherwise, it's an extra space between entries, 1691 // which is OK. 1692 lastWasComment = false; 1693 if (lineList.isEmpty()) 1694 { 1695 firstLineNumber++; 1696 continue; 1697 } 1698 else 1699 { 1700 break; 1701 } 1702 } 1703 1704 if (line.charAt(0) == ' ') 1705 { 1706 // The line starts with a space, which means that it must be a 1707 // continuation of the previous line. This is true even if the last 1708 // line was a comment. 1709 if (lastWasComment) 1710 { 1711 // What we've read is part of a comment, so we don't care about its 1712 // content. 1713 } 1714 else if (lineList.isEmpty()) 1715 { 1716 throw new LDIFException( 1717 ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter), 1718 lineNumberCounter, false); 1719 } 1720 else 1721 { 1722 lineList.get(lineList.size() - 1).append(line.substring(1)); 1723 lastWasComment = false; 1724 } 1725 } 1726 else if (line.charAt(0) == '#') 1727 { 1728 lastWasComment = true; 1729 } 1730 else 1731 { 1732 // We want to make sure that we skip over the "version:" line if it 1733 // exists, but that should only occur at the beginning of an entry where 1734 // it can't be confused with a possible "version" attribute. 1735 if (lineList.isEmpty() && line.startsWith("version:")) 1736 { 1737 lastWasComment = true; 1738 } 1739 else 1740 { 1741 lineList.add(new StringBuilder(line)); 1742 lastWasComment = false; 1743 } 1744 } 1745 } 1746 1747 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 1748 trailingSpaceBehavior, schema, firstLineNumber); 1749 } 1750 1751 1752 1753 /** 1754 * Decodes the provided set of LDIF lines as an entry. The provided set of 1755 * lines must contain exactly one entry. Long lines may be wrapped as per the 1756 * LDIF specification, and it is acceptable to have one or more blank lines 1757 * following the entry. A default trailing space behavior of 1758 * {@code TrailingSpaceBehavior#REJECT} will be used. 1759 * 1760 * @param ldifLines The set of lines that comprise the LDIF representation 1761 * of the entry. It must not be {@code null} or empty. 1762 * 1763 * @return The entry read from LDIF. 1764 * 1765 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1766 * entry. 1767 */ 1768 public static Entry decodeEntry(final String... ldifLines) 1769 throws LDIFException 1770 { 1771 final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP, 1772 TrailingSpaceBehavior.REJECT, null, ldifLines), 1773 DEFAULT_RELATIVE_BASE_PATH); 1774 debugLDIFRead(e); 1775 return e; 1776 } 1777 1778 1779 1780 /** 1781 * Decodes the provided set of LDIF lines as an entry. The provided set of 1782 * lines must contain exactly one entry. Long lines may be wrapped as per the 1783 * LDIF specification, and it is acceptable to have one or more blank lines 1784 * following the entry. A default trailing space behavior of 1785 * {@code TrailingSpaceBehavior#REJECT} will be used. 1786 * 1787 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1788 * attribute values encountered while parsing. 1789 * @param schema The schema to use when parsing the record, 1790 * if applicable. 1791 * @param ldifLines The set of lines that comprise the LDIF 1792 * representation of the entry. It must not be 1793 * {@code null} or empty. 1794 * 1795 * @return The entry read from LDIF. 1796 * 1797 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1798 * entry. 1799 */ 1800 public static Entry decodeEntry(final boolean ignoreDuplicateValues, 1801 final Schema schema, 1802 final String... ldifLines) 1803 throws LDIFException 1804 { 1805 return decodeEntry(ignoreDuplicateValues, TrailingSpaceBehavior.REJECT, 1806 schema, ldifLines); 1807 } 1808 1809 1810 1811 /** 1812 * Decodes the provided set of LDIF lines as an entry. The provided set of 1813 * lines must contain exactly one entry. Long lines may be wrapped as per the 1814 * LDIF specification, and it is acceptable to have one or more blank lines 1815 * following the entry. 1816 * 1817 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1818 * attribute values encountered while parsing. 1819 * @param trailingSpaceBehavior The behavior that should be exhibited when 1820 * encountering attribute values which are not 1821 * base64-encoded but contain trailing spaces. 1822 * It must not be {@code null}. 1823 * @param schema The schema to use when parsing the record, 1824 * if applicable. 1825 * @param ldifLines The set of lines that comprise the LDIF 1826 * representation of the entry. It must not be 1827 * {@code null} or empty. 1828 * 1829 * @return The entry read from LDIF. 1830 * 1831 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1832 * entry. 1833 */ 1834 public static Entry decodeEntry( 1835 final boolean ignoreDuplicateValues, 1836 final TrailingSpaceBehavior trailingSpaceBehavior, 1837 final Schema schema, 1838 final String... ldifLines) throws LDIFException 1839 { 1840 final Entry e = decodeEntry(prepareRecord( 1841 (ignoreDuplicateValues 1842 ? DuplicateValueBehavior.STRIP 1843 : DuplicateValueBehavior.REJECT), 1844 trailingSpaceBehavior, schema, ldifLines), 1845 DEFAULT_RELATIVE_BASE_PATH); 1846 debugLDIFRead(e); 1847 return e; 1848 } 1849 1850 1851 1852 /** 1853 * Decodes the provided set of LDIF lines as an LDIF change record. The 1854 * provided set of lines must contain exactly one change record and it must 1855 * include a changetype. Long lines may be wrapped as per the LDIF 1856 * specification, and it is acceptable to have one or more blank lines 1857 * following the entry. 1858 * 1859 * @param ldifLines The set of lines that comprise the LDIF representation 1860 * of the change record. It must not be {@code null} or 1861 * empty. 1862 * 1863 * @return The change record read from LDIF. 1864 * 1865 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1866 * change record. 1867 */ 1868 public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines) 1869 throws LDIFException 1870 { 1871 return decodeChangeRecord(false, ldifLines); 1872 } 1873 1874 1875 1876 /** 1877 * Decodes the provided set of LDIF lines as an LDIF change record. The 1878 * provided set of lines must contain exactly one change record. Long lines 1879 * may be wrapped as per the LDIF specification, and it is acceptable to have 1880 * one or more blank lines following the entry. 1881 * 1882 * @param defaultAdd Indicates whether an LDIF record not containing a 1883 * changetype should be retrieved as an add change record. 1884 * If this is {@code false} and the record read does not 1885 * include a changetype, then an {@code LDIFException} 1886 * will be thrown. 1887 * @param ldifLines The set of lines that comprise the LDIF representation 1888 * of the change record. It must not be {@code null} or 1889 * empty. 1890 * 1891 * @return The change record read from LDIF. 1892 * 1893 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1894 * change record. 1895 */ 1896 public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd, 1897 final String... ldifLines) 1898 throws LDIFException 1899 { 1900 final LDIFChangeRecord r = 1901 decodeChangeRecord( 1902 prepareRecord(DuplicateValueBehavior.STRIP, 1903 TrailingSpaceBehavior.REJECT, null, ldifLines), 1904 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 1905 debugLDIFRead(r); 1906 return r; 1907 } 1908 1909 1910 1911 /** 1912 * Decodes the provided set of LDIF lines as an LDIF change record. The 1913 * provided set of lines must contain exactly one change record. Long lines 1914 * may be wrapped as per the LDIF specification, and it is acceptable to have 1915 * one or more blank lines following the entry. 1916 * 1917 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1918 * attribute values encountered while parsing. 1919 * @param schema The schema to use when processing the change 1920 * record, or {@code null} if no schema should 1921 * be used and all values should be treated as 1922 * case-insensitive strings. 1923 * @param defaultAdd Indicates whether an LDIF record not 1924 * containing a changetype should be retrieved 1925 * as an add change record. If this is 1926 * {@code false} and the record read does not 1927 * include a changetype, then an 1928 * {@code LDIFException} will be thrown. 1929 * @param ldifLines The set of lines that comprise the LDIF 1930 * representation of the change record. It 1931 * must not be {@code null} or empty. 1932 * 1933 * @return The change record read from LDIF. 1934 * 1935 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1936 * change record. 1937 */ 1938 public static LDIFChangeRecord decodeChangeRecord( 1939 final boolean ignoreDuplicateValues, 1940 final Schema schema, 1941 final boolean defaultAdd, 1942 final String... ldifLines) 1943 throws LDIFException 1944 { 1945 return decodeChangeRecord(ignoreDuplicateValues, 1946 TrailingSpaceBehavior.REJECT, schema, defaultAdd, ldifLines); 1947 } 1948 1949 1950 1951 /** 1952 * Decodes the provided set of LDIF lines as an LDIF change record. The 1953 * provided set of lines must contain exactly one change record. Long lines 1954 * may be wrapped as per the LDIF specification, and it is acceptable to have 1955 * one or more blank lines following the entry. 1956 * 1957 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1958 * attribute values encountered while parsing. 1959 * @param trailingSpaceBehavior The behavior that should be exhibited when 1960 * encountering attribute values which are not 1961 * base64-encoded but contain trailing spaces. 1962 * It must not be {@code null}. 1963 * @param schema The schema to use when processing the change 1964 * record, or {@code null} if no schema should 1965 * be used and all values should be treated as 1966 * case-insensitive strings. 1967 * @param defaultAdd Indicates whether an LDIF record not 1968 * containing a changetype should be retrieved 1969 * as an add change record. If this is 1970 * {@code false} and the record read does not 1971 * include a changetype, then an 1972 * {@code LDIFException} will be thrown. 1973 * @param ldifLines The set of lines that comprise the LDIF 1974 * representation of the change record. It 1975 * must not be {@code null} or empty. 1976 * 1977 * @return The change record read from LDIF. 1978 * 1979 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1980 * change record. 1981 */ 1982 public static LDIFChangeRecord decodeChangeRecord( 1983 final boolean ignoreDuplicateValues, 1984 final TrailingSpaceBehavior trailingSpaceBehavior, 1985 final Schema schema, 1986 final boolean defaultAdd, 1987 final String... ldifLines) 1988 throws LDIFException 1989 { 1990 final LDIFChangeRecord r = decodeChangeRecord( 1991 prepareRecord( 1992 (ignoreDuplicateValues 1993 ? DuplicateValueBehavior.STRIP 1994 : DuplicateValueBehavior.REJECT), 1995 trailingSpaceBehavior, schema, ldifLines), 1996 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 1997 debugLDIFRead(r); 1998 return r; 1999 } 2000 2001 2002 2003 /** 2004 * Parses the provided set of lines into a list of {@code StringBuilder} 2005 * objects suitable for decoding into an entry or LDIF change record. 2006 * Comments will be ignored and wrapped lines will be unwrapped. 2007 * 2008 * @param duplicateValueBehavior The behavior that should be exhibited if 2009 * the LDIF reader encounters an entry with 2010 * duplicate values. 2011 * @param trailingSpaceBehavior The behavior that should be exhibited when 2012 * encountering attribute values which are not 2013 * base64-encoded but contain trailing spaces. 2014 * @param schema The schema to use when parsing the record, 2015 * if applicable. 2016 * @param ldifLines The set of lines that comprise the record 2017 * to decode. It must not be {@code null} or 2018 * empty. 2019 * 2020 * @return The prepared list of {@code StringBuilder} objects ready to be 2021 * decoded. 2022 * 2023 * @throws LDIFException If the provided lines do not contain valid LDIF 2024 * content. 2025 */ 2026 private static UnparsedLDIFRecord prepareRecord( 2027 final DuplicateValueBehavior duplicateValueBehavior, 2028 final TrailingSpaceBehavior trailingSpaceBehavior, 2029 final Schema schema, final String... ldifLines) 2030 throws LDIFException 2031 { 2032 ensureNotNull(ldifLines); 2033 ensureFalse(ldifLines.length == 0, 2034 "LDIFReader.prepareRecord.ldifLines must not be empty."); 2035 2036 boolean lastWasComment = false; 2037 final ArrayList<StringBuilder> lineList = 2038 new ArrayList<StringBuilder>(ldifLines.length); 2039 for (int i=0; i < ldifLines.length; i++) 2040 { 2041 final String line = ldifLines[i]; 2042 if (line.length() == 0) 2043 { 2044 // This is only acceptable if there are no more non-empty lines in the 2045 // array. 2046 for (int j=i+1; j < ldifLines.length; j++) 2047 { 2048 if (ldifLines[j].length() > 0) 2049 { 2050 throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true, 2051 ldifLines, null); 2052 } 2053 2054 // If we've gotten here, then we know that we're at the end of the 2055 // entry. If we have read data, then we can decode it as an entry. 2056 // Otherwise, there was no real data in the provided LDIF lines. 2057 if (lineList.isEmpty()) 2058 { 2059 throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true, 2060 ldifLines, null); 2061 } 2062 else 2063 { 2064 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2065 trailingSpaceBehavior, schema, 0); 2066 } 2067 } 2068 } 2069 2070 if (line.charAt(0) == ' ') 2071 { 2072 if (i > 0) 2073 { 2074 if (! lastWasComment) 2075 { 2076 lineList.get(lineList.size() - 1).append(line.substring(1)); 2077 } 2078 } 2079 else 2080 { 2081 throw new LDIFException( 2082 ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0, 2083 true, ldifLines, null); 2084 } 2085 } 2086 else if (line.charAt(0) == '#') 2087 { 2088 lastWasComment = true; 2089 } 2090 else 2091 { 2092 lineList.add(new StringBuilder(line)); 2093 lastWasComment = false; 2094 } 2095 } 2096 2097 if (lineList.isEmpty()) 2098 { 2099 throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null); 2100 } 2101 else 2102 { 2103 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2104 trailingSpaceBehavior, schema, 0); 2105 } 2106 } 2107 2108 2109 2110 /** 2111 * Decodes the unparsed record that was read from the LDIF source. It may be 2112 * either an entry or an LDIF change record. 2113 * 2114 * @param unparsedRecord The unparsed LDIF record that was read from the 2115 * input. It must not be {@code null} or empty. 2116 * @param relativeBasePath The base path that will be prepended to relative 2117 * paths in order to obtain an absolute path. 2118 * @param schema The schema to use when parsing. 2119 * 2120 * @return The parsed record, or {@code null} if there are no more entries to 2121 * be read. 2122 * 2123 * @throws LDIFException If the data read could not be parsed as an entry or 2124 * an LDIF change record. 2125 */ 2126 private static LDIFRecord decodeRecord( 2127 final UnparsedLDIFRecord unparsedRecord, 2128 final String relativeBasePath, 2129 final Schema schema) 2130 throws LDIFException 2131 { 2132 // If there was an error reading from the input, then we rethrow it here. 2133 final Exception readError = unparsedRecord.getFailureCause(); 2134 if (readError != null) 2135 { 2136 if (readError instanceof LDIFException) 2137 { 2138 // If the error was an LDIFException, which will normally be the case, 2139 // then rethrow it with all of the same state. We could just 2140 // throw (LDIFException) readError; 2141 // but that's considered bad form. 2142 final LDIFException ldifEx = (LDIFException) readError; 2143 throw new LDIFException(ldifEx.getMessage(), 2144 ldifEx.getLineNumber(), 2145 ldifEx.mayContinueReading(), 2146 ldifEx.getDataLines(), 2147 ldifEx.getCause()); 2148 } 2149 else 2150 { 2151 throw new LDIFException(getExceptionMessage(readError), 2152 -1, true, readError); 2153 } 2154 } 2155 2156 if (unparsedRecord.isEOF()) 2157 { 2158 return null; 2159 } 2160 2161 final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList(); 2162 if (unparsedRecord.getLineList() == null) 2163 { 2164 return null; // We can get here if there was an error reading the lines. 2165 } 2166 2167 final LDIFRecord r; 2168 if (lineList.size() == 1) 2169 { 2170 r = decodeEntry(unparsedRecord, relativeBasePath); 2171 } 2172 else 2173 { 2174 final String lowerSecondLine = toLowerCase(lineList.get(1).toString()); 2175 if (lowerSecondLine.startsWith("control:") || 2176 lowerSecondLine.startsWith("changetype:")) 2177 { 2178 r = decodeChangeRecord(unparsedRecord, relativeBasePath, true, schema); 2179 } 2180 else 2181 { 2182 r = decodeEntry(unparsedRecord, relativeBasePath); 2183 } 2184 } 2185 2186 debugLDIFRead(r); 2187 return r; 2188 } 2189 2190 2191 2192 /** 2193 * Decodes the provided set of LDIF lines as an entry. The provided list must 2194 * not contain any blank lines or comments, and lines are not allowed to be 2195 * wrapped. 2196 * 2197 * @param unparsedRecord The unparsed LDIF record that was read from the 2198 * input. It must not be {@code null} or empty. 2199 * @param relativeBasePath The base path that will be prepended to relative 2200 * paths in order to obtain an absolute path. 2201 * 2202 * @return The entry read from LDIF. 2203 * 2204 * @throws LDIFException If the provided LDIF data cannot be read as an 2205 * entry. 2206 */ 2207 private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord, 2208 final String relativeBasePath) 2209 throws LDIFException 2210 { 2211 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2212 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2213 2214 final Iterator<StringBuilder> iterator = ldifLines.iterator(); 2215 2216 // The first line must start with either "version:" or "dn:". If the first 2217 // line starts with "version:" then the second must start with "dn:". 2218 StringBuilder line = iterator.next(); 2219 handleTrailingSpaces(line, null, firstLineNumber, 2220 unparsedRecord.getTrailingSpaceBehavior()); 2221 int colonPos = line.indexOf(":"); 2222 if ((colonPos > 0) && 2223 line.substring(0, colonPos).equalsIgnoreCase("version")) 2224 { 2225 // The first line is "version:". Under most conditions, this will be 2226 // handled by the LDIF reader, but this can happen if you call 2227 // decodeEntry with a set of data that includes a version. At any rate, 2228 // read the next line, which must specify the DN. 2229 line = iterator.next(); 2230 handleTrailingSpaces(line, null, firstLineNumber, 2231 unparsedRecord.getTrailingSpaceBehavior()); 2232 } 2233 2234 colonPos = line.indexOf(":"); 2235 if ((colonPos < 0) || 2236 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2237 { 2238 throw new LDIFException( 2239 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2240 firstLineNumber, true, ldifLines, null); 2241 } 2242 2243 final String dn; 2244 final int length = line.length(); 2245 if (length == (colonPos+1)) 2246 { 2247 // The colon was the last character on the line. This is acceptable and 2248 // indicates that the entry has the null DN. 2249 dn = ""; 2250 } 2251 else if (line.charAt(colonPos+1) == ':') 2252 { 2253 // Skip over any spaces leading up to the value, and then the rest of the 2254 // string is the base64-encoded DN. 2255 int pos = colonPos+2; 2256 while ((pos < length) && (line.charAt(pos) == ' ')) 2257 { 2258 pos++; 2259 } 2260 2261 try 2262 { 2263 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2264 dn = new String(dnBytes, "UTF-8"); 2265 } 2266 catch (final ParseException pe) 2267 { 2268 debugException(pe); 2269 throw new LDIFException( 2270 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2271 pe.getMessage()), 2272 firstLineNumber, true, ldifLines, pe); 2273 } 2274 catch (final Exception e) 2275 { 2276 debugException(e); 2277 throw new LDIFException( 2278 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e), 2279 firstLineNumber, true, ldifLines, e); 2280 } 2281 } 2282 else 2283 { 2284 // Skip over any spaces leading up to the value, and then the rest of the 2285 // string is the DN. 2286 int pos = colonPos+1; 2287 while ((pos < length) && (line.charAt(pos) == ' ')) 2288 { 2289 pos++; 2290 } 2291 2292 dn = line.substring(pos); 2293 } 2294 2295 2296 // The remaining lines must be the attributes for the entry. However, we 2297 // will allow the case in which an entry does not have any attributes, to be 2298 // able to support reading search result entries in which no attributes were 2299 // returned. 2300 if (! iterator.hasNext()) 2301 { 2302 return new Entry(dn, unparsedRecord.getSchema()); 2303 } 2304 2305 return new Entry(dn, unparsedRecord.getSchema(), 2306 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2307 unparsedRecord.getTrailingSpaceBehavior(), 2308 unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath, 2309 firstLineNumber)); 2310 } 2311 2312 2313 2314 /** 2315 * Decodes the provided set of LDIF lines as a change record. The provided 2316 * list must not contain any blank lines or comments, and lines are not 2317 * allowed to be wrapped. 2318 * 2319 * @param unparsedRecord The unparsed LDIF record that was read from the 2320 * input. It must not be {@code null} or empty. 2321 * @param relativeBasePath The base path that will be prepended to relative 2322 * paths in order to obtain an absolute path. 2323 * @param defaultAdd Indicates whether an LDIF record not containing a 2324 * changetype should be retrieved as an add change 2325 * record. If this is {@code false} and the record 2326 * read does not include a changetype, then an 2327 * {@code LDIFException} will be thrown. 2328 * @param schema The schema to use in parsing. 2329 * 2330 * @return The change record read from LDIF. 2331 * 2332 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2333 * change record. 2334 */ 2335 private static LDIFChangeRecord decodeChangeRecord( 2336 final UnparsedLDIFRecord unparsedRecord, 2337 final String relativeBasePath, 2338 final boolean defaultAdd, 2339 final Schema schema) 2340 throws LDIFException 2341 { 2342 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2343 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2344 2345 Iterator<StringBuilder> iterator = ldifLines.iterator(); 2346 2347 // The first line must start with either "version:" or "dn:". If the first 2348 // line starts with "version:" then the second must start with "dn:". 2349 StringBuilder line = iterator.next(); 2350 handleTrailingSpaces(line, null, firstLineNumber, 2351 unparsedRecord.getTrailingSpaceBehavior()); 2352 int colonPos = line.indexOf(":"); 2353 int linesRead = 1; 2354 if ((colonPos > 0) && 2355 line.substring(0, colonPos).equalsIgnoreCase("version")) 2356 { 2357 // The first line is "version:". Under most conditions, this will be 2358 // handled by the LDIF reader, but this can happen if you call 2359 // decodeEntry with a set of data that includes a version. At any rate, 2360 // read the next line, which must specify the DN. 2361 line = iterator.next(); 2362 linesRead++; 2363 handleTrailingSpaces(line, null, firstLineNumber, 2364 unparsedRecord.getTrailingSpaceBehavior()); 2365 } 2366 2367 colonPos = line.indexOf(":"); 2368 if ((colonPos < 0) || 2369 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2370 { 2371 throw new LDIFException( 2372 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2373 firstLineNumber, true, ldifLines, null); 2374 } 2375 2376 final String dn; 2377 int length = line.length(); 2378 if (length == (colonPos+1)) 2379 { 2380 // The colon was the last character on the line. This is acceptable and 2381 // indicates that the entry has the null DN. 2382 dn = ""; 2383 } 2384 else if (line.charAt(colonPos+1) == ':') 2385 { 2386 // Skip over any spaces leading up to the value, and then the rest of the 2387 // string is the base64-encoded DN. 2388 int pos = colonPos+2; 2389 while ((pos < length) && (line.charAt(pos) == ' ')) 2390 { 2391 pos++; 2392 } 2393 2394 try 2395 { 2396 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2397 dn = new String(dnBytes, "UTF-8"); 2398 } 2399 catch (final ParseException pe) 2400 { 2401 debugException(pe); 2402 throw new LDIFException( 2403 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2404 pe.getMessage()), 2405 firstLineNumber, true, ldifLines, pe); 2406 } 2407 catch (final Exception e) 2408 { 2409 debugException(e); 2410 throw new LDIFException( 2411 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2412 e), 2413 firstLineNumber, true, ldifLines, e); 2414 } 2415 } 2416 else 2417 { 2418 // Skip over any spaces leading up to the value, and then the rest of the 2419 // string is the DN. 2420 int pos = colonPos+1; 2421 while ((pos < length) && (line.charAt(pos) == ' ')) 2422 { 2423 pos++; 2424 } 2425 2426 dn = line.substring(pos); 2427 } 2428 2429 2430 // An LDIF change record may contain zero or more controls, with the end of 2431 // the controls signified by the changetype. The changetype element must be 2432 // present, unless defaultAdd is true in which case the first thing that is 2433 // neither control or changetype will trigger the start of add attribute 2434 // parsing. 2435 if (! iterator.hasNext()) 2436 { 2437 throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber), 2438 firstLineNumber, true, ldifLines, null); 2439 } 2440 2441 String changeType = null; 2442 ArrayList<Control> controls = null; 2443 while (true) 2444 { 2445 line = iterator.next(); 2446 handleTrailingSpaces(line, dn, firstLineNumber, 2447 unparsedRecord.getTrailingSpaceBehavior()); 2448 colonPos = line.indexOf(":"); 2449 if (colonPos < 0) 2450 { 2451 throw new LDIFException( 2452 ERR_READ_CR_SECOND_LINE_MISSING_COLON.get(firstLineNumber), 2453 firstLineNumber, true, ldifLines, null); 2454 } 2455 2456 final String token = toLowerCase(line.substring(0, colonPos)); 2457 if (token.equals("control")) 2458 { 2459 if (controls == null) 2460 { 2461 controls = new ArrayList<Control>(5); 2462 } 2463 2464 controls.add(decodeControl(line, colonPos, firstLineNumber, ldifLines, 2465 relativeBasePath)); 2466 } 2467 else if (token.equals("changetype")) 2468 { 2469 changeType = 2470 decodeChangeType(line, colonPos, firstLineNumber, ldifLines); 2471 break; 2472 } 2473 else if (defaultAdd) 2474 { 2475 // The line we read wasn't a control or changetype declaration, so we'll 2476 // assume it's an attribute in an add record. However, we're not ready 2477 // for that yet, and since we can't rewind an iterator we'll create a 2478 // new one that hasn't yet gotten to this line. 2479 changeType = "add"; 2480 iterator = ldifLines.iterator(); 2481 for (int i=0; i < linesRead; i++) 2482 { 2483 iterator.next(); 2484 } 2485 break; 2486 } 2487 else 2488 { 2489 throw new LDIFException( 2490 ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CONTROL_OR_CT.get( 2491 firstLineNumber), 2492 firstLineNumber, true, ldifLines, null); 2493 } 2494 2495 linesRead++; 2496 } 2497 2498 2499 // Make sure that the change type is acceptable and then decode the rest of 2500 // the change record accordingly. 2501 final String lowerChangeType = toLowerCase(changeType); 2502 if (lowerChangeType.equals("add")) 2503 { 2504 // There must be at least one more line. If not, then that's an error. 2505 // Otherwise, parse the rest of the data as attribute-value pairs. 2506 if (iterator.hasNext()) 2507 { 2508 final Collection<Attribute> attrs = 2509 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2510 unparsedRecord.getTrailingSpaceBehavior(), 2511 unparsedRecord.getSchema(), ldifLines, iterator, 2512 relativeBasePath, firstLineNumber); 2513 final Attribute[] attributes = new Attribute[attrs.size()]; 2514 final Iterator<Attribute> attrIterator = attrs.iterator(); 2515 for (int i=0; i < attributes.length; i++) 2516 { 2517 attributes[i] = attrIterator.next(); 2518 } 2519 2520 return new LDIFAddChangeRecord(dn, attributes, controls); 2521 } 2522 else 2523 { 2524 throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber), 2525 firstLineNumber, true, ldifLines, null); 2526 } 2527 } 2528 else if (lowerChangeType.equals("delete")) 2529 { 2530 // There shouldn't be any more data. If there is, then that's an error. 2531 // Otherwise, we can just return the delete change record with what we 2532 // already know. 2533 if (iterator.hasNext()) 2534 { 2535 throw new LDIFException( 2536 ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber), 2537 firstLineNumber, true, ldifLines, null); 2538 } 2539 else 2540 { 2541 return new LDIFDeleteChangeRecord(dn, controls); 2542 } 2543 } 2544 else if (lowerChangeType.equals("modify")) 2545 { 2546 // There must be at least one more line. If not, then that's an error. 2547 // Otherwise, parse the rest of the data as a set of modifications. 2548 if (iterator.hasNext()) 2549 { 2550 final Modification[] mods = parseModifications(dn, 2551 unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator, 2552 firstLineNumber, schema); 2553 return new LDIFModifyChangeRecord(dn, mods, controls); 2554 } 2555 else 2556 { 2557 throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber), 2558 firstLineNumber, true, ldifLines, null); 2559 } 2560 } 2561 else if (lowerChangeType.equals("moddn") || 2562 lowerChangeType.equals("modrdn")) 2563 { 2564 // There must be at least one more line. If not, then that's an error. 2565 // Otherwise, parse the rest of the data as a set of modifications. 2566 if (iterator.hasNext()) 2567 { 2568 return parseModifyDNChangeRecord(ldifLines, iterator, dn, controls, 2569 unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber); 2570 } 2571 else 2572 { 2573 throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber), 2574 firstLineNumber, true, ldifLines, null); 2575 } 2576 } 2577 else 2578 { 2579 throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType, 2580 firstLineNumber), 2581 firstLineNumber, true, ldifLines, null); 2582 } 2583 } 2584 2585 2586 2587 /** 2588 * Decodes information about a control from the provided line. 2589 * 2590 * @param line The line to process. 2591 * @param colonPos The position of the colon that separates the 2592 * control token string from tbe encoded control. 2593 * @param firstLineNumber The line number for the start of the record. 2594 * @param ldifLines The lines that comprise the LDIF representation 2595 * of the full record being parsed. 2596 * @param relativeBasePath The base path that will be prepended to relative 2597 * paths in order to obtain an absolute path. 2598 * 2599 * @return The decoded control. 2600 * 2601 * @throws LDIFException If a problem is encountered while trying to decode 2602 * the changetype. 2603 */ 2604 private static Control decodeControl(final StringBuilder line, 2605 final int colonPos, 2606 final long firstLineNumber, 2607 final ArrayList<StringBuilder> ldifLines, 2608 final String relativeBasePath) 2609 throws LDIFException 2610 { 2611 final String controlString; 2612 int length = line.length(); 2613 if (length == (colonPos+1)) 2614 { 2615 // The colon was the last character on the line. This is not 2616 // acceptable. 2617 throw new LDIFException( 2618 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2619 firstLineNumber, true, ldifLines, null); 2620 } 2621 else if (line.charAt(colonPos+1) == ':') 2622 { 2623 // Skip over any spaces leading up to the value, and then the rest of 2624 // the string is the base64-encoded control representation. This is 2625 // unusual and unnecessary, but is nevertheless acceptable. 2626 int pos = colonPos+2; 2627 while ((pos < length) && (line.charAt(pos) == ' ')) 2628 { 2629 pos++; 2630 } 2631 2632 try 2633 { 2634 final byte[] controlBytes = Base64.decode(line.substring(pos)); 2635 controlString = new String(controlBytes, "UTF-8"); 2636 } 2637 catch (final ParseException pe) 2638 { 2639 debugException(pe); 2640 throw new LDIFException( 2641 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get( 2642 firstLineNumber, pe.getMessage()), 2643 firstLineNumber, true, ldifLines, pe); 2644 } 2645 catch (final Exception e) 2646 { 2647 debugException(e); 2648 throw new LDIFException( 2649 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(firstLineNumber, e), 2650 firstLineNumber, true, ldifLines, e); 2651 } 2652 } 2653 else 2654 { 2655 // Skip over any spaces leading up to the value, and then the rest of 2656 // the string is the encoded control. 2657 int pos = colonPos+1; 2658 while ((pos < length) && (line.charAt(pos) == ' ')) 2659 { 2660 pos++; 2661 } 2662 2663 controlString = line.substring(pos); 2664 } 2665 2666 // If the resulting control definition is empty, then that's invalid. 2667 if (controlString.length() == 0) 2668 { 2669 throw new LDIFException( 2670 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2671 firstLineNumber, true, ldifLines, null); 2672 } 2673 2674 2675 // The first element of the control must be the OID, and it must be followed 2676 // by a space (to separate it from the criticality), a colon (to separate it 2677 // from the value and indicate a default criticality of false), or the end 2678 // of the line (to indicate a default criticality of false and no value). 2679 String oid = null; 2680 boolean hasCriticality = false; 2681 boolean hasValue = false; 2682 int pos = 0; 2683 length = controlString.length(); 2684 while (pos < length) 2685 { 2686 final char c = controlString.charAt(pos); 2687 if (c == ':') 2688 { 2689 // This indicates that there is no criticality and that the value 2690 // immediately follows the OID. 2691 oid = controlString.substring(0, pos++); 2692 hasValue = true; 2693 break; 2694 } 2695 else if (c == ' ') 2696 { 2697 // This indicates that there is a criticality. We don't know anything 2698 // about the presence of a value yet. 2699 oid = controlString.substring(0, pos++); 2700 hasCriticality = true; 2701 break; 2702 } 2703 else 2704 { 2705 pos++; 2706 } 2707 } 2708 2709 if (oid == null) 2710 { 2711 // This indicates that the string representation of the control is only 2712 // the OID. 2713 return new Control(controlString, false); 2714 } 2715 2716 2717 // See if we need to read the criticality. If so, then do so now. 2718 // Otherwise, assume a default criticality of false. 2719 final boolean isCritical; 2720 if (hasCriticality) 2721 { 2722 // Skip over any spaces before the criticality. 2723 while (controlString.charAt(pos) == ' ') 2724 { 2725 pos++; 2726 } 2727 2728 // Read until we find a colon or the end of the string. 2729 final int criticalityStartPos = pos; 2730 while (pos < length) 2731 { 2732 final char c = controlString.charAt(pos); 2733 if (c == ':') 2734 { 2735 hasValue = true; 2736 break; 2737 } 2738 else 2739 { 2740 pos++; 2741 } 2742 } 2743 2744 final String criticalityString = 2745 toLowerCase(controlString.substring(criticalityStartPos, pos)); 2746 if (criticalityString.equals("true")) 2747 { 2748 isCritical = true; 2749 } 2750 else if (criticalityString.equals("false")) 2751 { 2752 isCritical = false; 2753 } 2754 else 2755 { 2756 throw new LDIFException( 2757 ERR_READ_CONTROL_LINE_INVALID_CRITICALITY.get(criticalityString, 2758 firstLineNumber), 2759 firstLineNumber, true, ldifLines, null); 2760 } 2761 2762 if (hasValue) 2763 { 2764 pos++; 2765 } 2766 } 2767 else 2768 { 2769 isCritical = false; 2770 } 2771 2772 // See if we need to read the value. If so, then do so now. It may be 2773 // a string, or it may be base64-encoded. It could conceivably even be read 2774 // from a URL. 2775 final ASN1OctetString value; 2776 if (hasValue) 2777 { 2778 // The character immediately after the colon that precedes the value may 2779 // be one of the following: 2780 // - A second colon (optionally followed by a single space) to indicate 2781 // that the value is base64-encoded. 2782 // - A less-than symbol to indicate that the value should be read from a 2783 // location specified by a URL. 2784 // - A single space that precedes the non-base64-encoded value. 2785 // - The first character of the non-base64-encoded value. 2786 switch (controlString.charAt(pos)) 2787 { 2788 case ':': 2789 try 2790 { 2791 if (controlString.length() == (pos+1)) 2792 { 2793 value = new ASN1OctetString(); 2794 } 2795 else if (controlString.charAt(pos+1) == ' ') 2796 { 2797 value = new ASN1OctetString( 2798 Base64.decode(controlString.substring(pos+2))); 2799 } 2800 else 2801 { 2802 value = new ASN1OctetString( 2803 Base64.decode(controlString.substring(pos+1))); 2804 } 2805 } 2806 catch (final Exception e) 2807 { 2808 debugException(e); 2809 throw new LDIFException( 2810 ERR_READ_CONTROL_LINE_CANNOT_BASE64_DECODE_VALUE.get( 2811 firstLineNumber, getExceptionMessage(e)), 2812 firstLineNumber, true, ldifLines, e); 2813 } 2814 break; 2815 case '<': 2816 try 2817 { 2818 final String urlString; 2819 if (controlString.charAt(pos+1) == ' ') 2820 { 2821 urlString = controlString.substring(pos+2); 2822 } 2823 else 2824 { 2825 urlString = controlString.substring(pos+1); 2826 } 2827 value = new ASN1OctetString(retrieveURLBytes(urlString, 2828 relativeBasePath, firstLineNumber)); 2829 } 2830 catch (final Exception e) 2831 { 2832 debugException(e); 2833 throw new LDIFException( 2834 ERR_READ_CONTROL_LINE_CANNOT_RETRIEVE_VALUE_FROM_URL.get( 2835 firstLineNumber, getExceptionMessage(e)), 2836 firstLineNumber, true, ldifLines, e); 2837 } 2838 break; 2839 case ' ': 2840 value = new ASN1OctetString(controlString.substring(pos+1)); 2841 break; 2842 default: 2843 value = new ASN1OctetString(controlString.substring(pos)); 2844 break; 2845 } 2846 } 2847 else 2848 { 2849 value = null; 2850 } 2851 2852 return new Control(oid, isCritical, value); 2853 } 2854 2855 2856 2857 /** 2858 * Decodes the changetype element from the provided line. 2859 * 2860 * @param line The line to process. 2861 * @param colonPos The position of the colon that separates the 2862 * changetype string from its value. 2863 * @param firstLineNumber The line number for the start of the record. 2864 * @param ldifLines The lines that comprise the LDIF representation of 2865 * the full record being parsed. 2866 * 2867 * @return The decoded changetype string. 2868 * 2869 * @throws LDIFException If a problem is encountered while trying to decode 2870 * the changetype. 2871 */ 2872 private static String decodeChangeType(final StringBuilder line, 2873 final int colonPos, final long firstLineNumber, 2874 final ArrayList<StringBuilder> ldifLines) 2875 throws LDIFException 2876 { 2877 final int length = line.length(); 2878 if (length == (colonPos+1)) 2879 { 2880 // The colon was the last character on the line. This is not 2881 // acceptable. 2882 throw new LDIFException( 2883 ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber, 2884 true, ldifLines, null); 2885 } 2886 else if (line.charAt(colonPos+1) == ':') 2887 { 2888 // Skip over any spaces leading up to the value, and then the rest of 2889 // the string is the base64-encoded changetype. This is unusual and 2890 // unnecessary, but is nevertheless acceptable. 2891 int pos = colonPos+2; 2892 while ((pos < length) && (line.charAt(pos) == ' ')) 2893 { 2894 pos++; 2895 } 2896 2897 try 2898 { 2899 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 2900 return new String(changeTypeBytes, "UTF-8"); 2901 } 2902 catch (final ParseException pe) 2903 { 2904 debugException(pe); 2905 throw new LDIFException( 2906 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, 2907 pe.getMessage()), 2908 firstLineNumber, true, ldifLines, pe); 2909 } 2910 catch (final Exception e) 2911 { 2912 debugException(e); 2913 throw new LDIFException( 2914 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e), 2915 firstLineNumber, true, ldifLines, e); 2916 } 2917 } 2918 else 2919 { 2920 // Skip over any spaces leading up to the value, and then the rest of 2921 // the string is the changetype. 2922 int pos = colonPos+1; 2923 while ((pos < length) && (line.charAt(pos) == ' ')) 2924 { 2925 pos++; 2926 } 2927 2928 return line.substring(pos); 2929 } 2930 } 2931 2932 2933 2934 /** 2935 * Parses the data available through the provided iterator as a collection of 2936 * attributes suitable for use in an entry or an add change record. 2937 * 2938 * @param dn The DN of the record being read. 2939 * @param duplicateValueBehavior The behavior that should be exhibited if 2940 * the LDIF reader encounters an entry with 2941 * duplicate values. 2942 * @param trailingSpaceBehavior The behavior that should be exhibited when 2943 * encountering attribute values which are not 2944 * base64-encoded but contain trailing spaces. 2945 * @param schema The schema to use when parsing the 2946 * attributes, or {@code null} if none is 2947 * needed. 2948 * @param ldifLines The lines that comprise the LDIF 2949 * representation of the full record being 2950 * parsed. 2951 * @param iterator The iterator to use to access the attribute 2952 * lines. 2953 * @param relativeBasePath The base path that will be prepended to 2954 * relative paths in order to obtain an 2955 * absolute path. 2956 * @param firstLineNumber The line number for the start of the 2957 * record. 2958 * 2959 * @return The collection of attributes that were read. 2960 * 2961 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2962 * set of attributes. 2963 */ 2964 private static ArrayList<Attribute> parseAttributes(final String dn, 2965 final DuplicateValueBehavior duplicateValueBehavior, 2966 final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema, 2967 final ArrayList<StringBuilder> ldifLines, 2968 final Iterator<StringBuilder> iterator, final String relativeBasePath, 2969 final long firstLineNumber) 2970 throws LDIFException 2971 { 2972 final LinkedHashMap<String,Object> attributes = 2973 new LinkedHashMap<String,Object>(ldifLines.size()); 2974 while (iterator.hasNext()) 2975 { 2976 final StringBuilder line = iterator.next(); 2977 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 2978 final int colonPos = line.indexOf(":"); 2979 if (colonPos <= 0) 2980 { 2981 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 2982 firstLineNumber, true, ldifLines, null); 2983 } 2984 2985 final String attributeName = line.substring(0, colonPos); 2986 final String lowerName = toLowerCase(attributeName); 2987 2988 final MatchingRule matchingRule; 2989 if (schema == null) 2990 { 2991 matchingRule = CaseIgnoreStringMatchingRule.getInstance(); 2992 } 2993 else 2994 { 2995 matchingRule = 2996 MatchingRule.selectEqualityMatchingRule(attributeName, schema); 2997 } 2998 2999 Attribute attr; 3000 final LDIFAttribute ldifAttr; 3001 final Object attrObject = attributes.get(lowerName); 3002 if (attrObject == null) 3003 { 3004 attr = null; 3005 ldifAttr = null; 3006 } 3007 else 3008 { 3009 if (attrObject instanceof Attribute) 3010 { 3011 attr = (Attribute) attrObject; 3012 ldifAttr = new LDIFAttribute(attr.getName(), matchingRule, 3013 attr.getRawValues()[0]); 3014 attributes.put(lowerName, ldifAttr); 3015 } 3016 else 3017 { 3018 attr = null; 3019 ldifAttr = (LDIFAttribute) attrObject; 3020 } 3021 } 3022 3023 final int length = line.length(); 3024 if (length == (colonPos+1)) 3025 { 3026 // This means that the attribute has a zero-length value, which is 3027 // acceptable. 3028 if (attrObject == null) 3029 { 3030 attr = new Attribute(attributeName, matchingRule, ""); 3031 attributes.put(lowerName, attr); 3032 } 3033 else 3034 { 3035 try 3036 { 3037 if (! ldifAttr.addValue(new ASN1OctetString(), 3038 duplicateValueBehavior)) 3039 { 3040 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3041 { 3042 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3043 firstLineNumber, attributeName), firstLineNumber, true, 3044 ldifLines, null); 3045 } 3046 } 3047 } 3048 catch (LDAPException le) 3049 { 3050 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3051 firstLineNumber, attributeName, getExceptionMessage(le)), 3052 firstLineNumber, true, ldifLines, le); 3053 } 3054 } 3055 } 3056 else if (line.charAt(colonPos+1) == ':') 3057 { 3058 // Skip over any spaces leading up to the value, and then the rest of 3059 // the string is the base64-encoded attribute value. 3060 int pos = colonPos+2; 3061 while ((pos < length) && (line.charAt(pos) == ' ')) 3062 { 3063 pos++; 3064 } 3065 3066 try 3067 { 3068 final byte[] valueBytes = Base64.decode(line.substring(pos)); 3069 if (attrObject == null) 3070 { 3071 attr = new Attribute(attributeName, matchingRule, valueBytes); 3072 attributes.put(lowerName, attr); 3073 } 3074 else 3075 { 3076 try 3077 { 3078 if (! ldifAttr.addValue(new ASN1OctetString(valueBytes), 3079 duplicateValueBehavior)) 3080 { 3081 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3082 { 3083 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3084 firstLineNumber, attributeName), firstLineNumber, true, 3085 ldifLines, null); 3086 } 3087 } 3088 } 3089 catch (LDAPException le) 3090 { 3091 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3092 firstLineNumber, attributeName, getExceptionMessage(le)), 3093 firstLineNumber, true, ldifLines, le); 3094 } 3095 } 3096 } 3097 catch (final ParseException pe) 3098 { 3099 debugException(pe); 3100 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3101 attributeName, firstLineNumber, 3102 pe.getMessage()), 3103 firstLineNumber, true, ldifLines, pe); 3104 } 3105 } 3106 else if (line.charAt(colonPos+1) == '<') 3107 { 3108 // Skip over any spaces leading up to the value, and then the rest of 3109 // the string is a URL that indicates where to get the real content. 3110 // At the present time, we'll only support the file URLs. 3111 int pos = colonPos+2; 3112 while ((pos < length) && (line.charAt(pos) == ' ')) 3113 { 3114 pos++; 3115 } 3116 3117 final byte[] urlBytes; 3118 final String urlString = line.substring(pos); 3119 try 3120 { 3121 urlBytes = 3122 retrieveURLBytes(urlString, relativeBasePath, firstLineNumber); 3123 } 3124 catch (final Exception e) 3125 { 3126 debugException(e); 3127 throw new LDIFException( 3128 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3129 firstLineNumber, e), 3130 firstLineNumber, true, ldifLines, e); 3131 } 3132 3133 if (attrObject == null) 3134 { 3135 attr = new Attribute(attributeName, matchingRule, urlBytes); 3136 attributes.put(lowerName, attr); 3137 } 3138 else 3139 { 3140 try 3141 { 3142 if (! ldifAttr.addValue(new ASN1OctetString(urlBytes), 3143 duplicateValueBehavior)) 3144 { 3145 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3146 { 3147 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3148 firstLineNumber, attributeName), firstLineNumber, true, 3149 ldifLines, null); 3150 } 3151 } 3152 } 3153 catch (final LDIFException le) 3154 { 3155 debugException(le); 3156 throw le; 3157 } 3158 catch (final Exception e) 3159 { 3160 debugException(e); 3161 throw new LDIFException( 3162 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3163 firstLineNumber, e), 3164 firstLineNumber, true, ldifLines, e); 3165 } 3166 } 3167 } 3168 else 3169 { 3170 // Skip over any spaces leading up to the value, and then the rest of 3171 // the string is the value. 3172 int pos = colonPos+1; 3173 while ((pos < length) && (line.charAt(pos) == ' ')) 3174 { 3175 pos++; 3176 } 3177 3178 final String valueString = line.substring(pos); 3179 if (attrObject == null) 3180 { 3181 attr = new Attribute(attributeName, matchingRule, valueString); 3182 attributes.put(lowerName, attr); 3183 } 3184 else 3185 { 3186 try 3187 { 3188 if (! ldifAttr.addValue(new ASN1OctetString(valueString), 3189 duplicateValueBehavior)) 3190 { 3191 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3192 { 3193 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3194 firstLineNumber, attributeName), firstLineNumber, true, 3195 ldifLines, null); 3196 } 3197 } 3198 } 3199 catch (LDAPException le) 3200 { 3201 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3202 firstLineNumber, attributeName, getExceptionMessage(le)), 3203 firstLineNumber, true, ldifLines, le); 3204 } 3205 } 3206 } 3207 } 3208 3209 final ArrayList<Attribute> attrList = 3210 new ArrayList<Attribute>(attributes.size()); 3211 for (final Object o : attributes.values()) 3212 { 3213 if (o instanceof Attribute) 3214 { 3215 attrList.add((Attribute) o); 3216 } 3217 else 3218 { 3219 attrList.add(((LDIFAttribute) o).toAttribute()); 3220 } 3221 } 3222 3223 return attrList; 3224 } 3225 3226 3227 3228 /** 3229 * Retrieves the bytes that make up the file referenced by the given URL. 3230 * 3231 * @param urlString The string representation of the URL to retrieve. 3232 * @param relativeBasePath The base path that will be prepended to relative 3233 * paths in order to obtain an absolute path. 3234 * @param firstLineNumber The line number for the start of the record. 3235 * 3236 * @return The bytes contained in the specified file, or an empty array if 3237 * the specified file is empty. 3238 * 3239 * @throws LDIFException If the provided URL is malformed or references a 3240 * nonexistent file. 3241 * 3242 * @throws IOException If a problem is encountered while attempting to read 3243 * from the target file. 3244 */ 3245 private static byte[] retrieveURLBytes(final String urlString, 3246 final String relativeBasePath, 3247 final long firstLineNumber) 3248 throws LDIFException, IOException 3249 { 3250 int pos; 3251 String path; 3252 final String lowerURLString = toLowerCase(urlString); 3253 if (lowerURLString.startsWith("file:/")) 3254 { 3255 pos = 6; 3256 while ((pos < urlString.length()) && (urlString.charAt(pos) == '/')) 3257 { 3258 pos++; 3259 } 3260 3261 path = urlString.substring(pos-1); 3262 } 3263 else if (lowerURLString.startsWith("file:")) 3264 { 3265 // A file: URL that doesn't include a slash will be interpreted as a 3266 // relative path. 3267 path = relativeBasePath + urlString.substring(5); 3268 } 3269 else 3270 { 3271 throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(urlString), 3272 firstLineNumber, true); 3273 } 3274 3275 final File f = new File(path); 3276 if (! f.exists()) 3277 { 3278 throw new LDIFException( 3279 ERR_READ_URL_NO_SUCH_FILE.get(urlString, f.getAbsolutePath()), 3280 firstLineNumber, true); 3281 } 3282 3283 // In order to conserve memory, we'll only allow values to be read from 3284 // files no larger than 10 megabytes. 3285 final long fileSize = f.length(); 3286 if (fileSize > (10 * 1024 * 1024)) 3287 { 3288 throw new LDIFException( 3289 ERR_READ_URL_FILE_TOO_LARGE.get(urlString, f.getAbsolutePath(), 3290 (10*1024*1024)), 3291 firstLineNumber, true); 3292 } 3293 3294 int fileBytesRemaining = (int) fileSize; 3295 final byte[] fileData = new byte[(int) fileSize]; 3296 final FileInputStream fis = new FileInputStream(f); 3297 try 3298 { 3299 int fileBytesRead = 0; 3300 while (fileBytesRead < fileSize) 3301 { 3302 final int bytesRead = 3303 fis.read(fileData, fileBytesRead, fileBytesRemaining); 3304 if (bytesRead < 0) 3305 { 3306 // We hit the end of the file before we expected to. This shouldn't 3307 // happen unless the file size changed since we first looked at it, 3308 // which we won't allow. 3309 throw new LDIFException( 3310 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, 3311 f.getAbsolutePath()), 3312 firstLineNumber, true); 3313 } 3314 3315 fileBytesRead += bytesRead; 3316 fileBytesRemaining -= bytesRead; 3317 } 3318 3319 if (fis.read() != -1) 3320 { 3321 // There is still more data to read. This shouldn't happen unless the 3322 // file size changed since we first looked at it, which we won't allow. 3323 throw new LDIFException( 3324 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, f.getAbsolutePath()), 3325 firstLineNumber, true); 3326 } 3327 } 3328 finally 3329 { 3330 fis.close(); 3331 } 3332 3333 return fileData; 3334 } 3335 3336 3337 3338 /** 3339 * Parses the data available through the provided iterator into an array of 3340 * modifications suitable for use in a modify change record. 3341 * 3342 * @param dn The DN of the entry being parsed. 3343 * @param trailingSpaceBehavior The behavior that should be exhibited when 3344 * encountering attribute values which are not 3345 * base64-encoded but contain trailing spaces. 3346 * @param ldifLines The lines that comprise the LDIF 3347 * representation of the full record being 3348 * parsed. 3349 * @param iterator The iterator to use to access the 3350 * modification data. 3351 * @param firstLineNumber The line number for the start of the record. 3352 * @param schema The schema to use in processing. 3353 * 3354 * @return An array containing the modifications that were read. 3355 * 3356 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3357 * set of modifications. 3358 */ 3359 private static Modification[] parseModifications(final String dn, 3360 final TrailingSpaceBehavior trailingSpaceBehavior, 3361 final ArrayList<StringBuilder> ldifLines, 3362 final Iterator<StringBuilder> iterator, 3363 final long firstLineNumber, final Schema schema) 3364 throws LDIFException 3365 { 3366 final ArrayList<Modification> modList = 3367 new ArrayList<Modification>(ldifLines.size()); 3368 3369 while (iterator.hasNext()) 3370 { 3371 // The first line must start with "add:", "delete:", "replace:", or 3372 // "increment:" followed by an attribute name. 3373 StringBuilder line = iterator.next(); 3374 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3375 int colonPos = line.indexOf(":"); 3376 if (colonPos < 0) 3377 { 3378 throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber), 3379 firstLineNumber, true, ldifLines, null); 3380 } 3381 3382 final ModificationType modType; 3383 final String modTypeStr = toLowerCase(line.substring(0, colonPos)); 3384 if (modTypeStr.equals("add")) 3385 { 3386 modType = ModificationType.ADD; 3387 } 3388 else if (modTypeStr.equals("delete")) 3389 { 3390 modType = ModificationType.DELETE; 3391 } 3392 else if (modTypeStr.equals("replace")) 3393 { 3394 modType = ModificationType.REPLACE; 3395 } 3396 else if (modTypeStr.equals("increment")) 3397 { 3398 modType = ModificationType.INCREMENT; 3399 } 3400 else 3401 { 3402 throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr, 3403 firstLineNumber), 3404 firstLineNumber, true, ldifLines, null); 3405 } 3406 3407 String attributeName; 3408 int length = line.length(); 3409 if (length == (colonPos+1)) 3410 { 3411 // The colon was the last character on the line. This is not 3412 // acceptable. 3413 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3414 firstLineNumber), 3415 firstLineNumber, true, ldifLines, null); 3416 } 3417 else if (line.charAt(colonPos+1) == ':') 3418 { 3419 // Skip over any spaces leading up to the value, and then the rest of 3420 // the string is the base64-encoded attribute name. 3421 int pos = colonPos+2; 3422 while ((pos < length) && (line.charAt(pos) == ' ')) 3423 { 3424 pos++; 3425 } 3426 3427 try 3428 { 3429 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3430 attributeName = new String(dnBytes, "UTF-8"); 3431 } 3432 catch (final ParseException pe) 3433 { 3434 debugException(pe); 3435 throw new LDIFException( 3436 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3437 firstLineNumber, pe.getMessage()), 3438 firstLineNumber, true, ldifLines, pe); 3439 } 3440 catch (final Exception e) 3441 { 3442 debugException(e); 3443 throw new LDIFException( 3444 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3445 firstLineNumber, e), 3446 firstLineNumber, true, ldifLines, e); 3447 } 3448 } 3449 else 3450 { 3451 // Skip over any spaces leading up to the value, and then the rest of 3452 // the string is the attribute name. 3453 int pos = colonPos+1; 3454 while ((pos < length) && (line.charAt(pos) == ' ')) 3455 { 3456 pos++; 3457 } 3458 3459 attributeName = line.substring(pos); 3460 } 3461 3462 if (attributeName.length() == 0) 3463 { 3464 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3465 firstLineNumber), 3466 firstLineNumber, true, ldifLines, null); 3467 } 3468 3469 3470 // The next zero or more lines may be the set of attribute values. Keep 3471 // reading until we reach the end of the iterator or until we find a line 3472 // with just a "-". 3473 final ArrayList<ASN1OctetString> valueList = 3474 new ArrayList<ASN1OctetString>(ldifLines.size()); 3475 while (iterator.hasNext()) 3476 { 3477 line = iterator.next(); 3478 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3479 if (line.toString().equals("-")) 3480 { 3481 break; 3482 } 3483 3484 colonPos = line.indexOf(":"); 3485 if (colonPos < 0) 3486 { 3487 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 3488 firstLineNumber, true, ldifLines, null); 3489 } 3490 else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName)) 3491 { 3492 // There are a couple of cases in which this might be acceptable: 3493 // - If the two names are logically equivalent, but have an alternate 3494 // name (or OID) for the target attribute type, or if there are 3495 // attribute options and the options are just in a different order. 3496 // - If this is the first value for the target attribute and the 3497 // alternate name includes a "binary" option that the original 3498 // attribute name did not have. In this case, all subsequent values 3499 // will also be required to have the binary option. 3500 final String alternateName = line.substring(0, colonPos); 3501 3502 3503 // Check to see if the base names are equivalent. 3504 boolean baseNameEquivalent = false; 3505 final String expectedBaseName = Attribute.getBaseName(attributeName); 3506 final String alternateBaseName = Attribute.getBaseName(alternateName); 3507 if (alternateBaseName.equalsIgnoreCase(expectedBaseName)) 3508 { 3509 baseNameEquivalent = true; 3510 } 3511 else 3512 { 3513 if (schema != null) 3514 { 3515 final AttributeTypeDefinition expectedAT = 3516 schema.getAttributeType(expectedBaseName); 3517 final AttributeTypeDefinition alternateAT = 3518 schema.getAttributeType(alternateBaseName); 3519 if ((expectedAT != null) && (alternateAT != null) && 3520 expectedAT.equals(alternateAT)) 3521 { 3522 baseNameEquivalent = true; 3523 } 3524 } 3525 } 3526 3527 3528 // Check to see if the attribute options are equivalent. 3529 final Set<String> expectedOptions = 3530 Attribute.getOptions(attributeName); 3531 final Set<String> lowerExpectedOptions = 3532 new HashSet<String>(expectedOptions.size()); 3533 for (final String s : expectedOptions) 3534 { 3535 lowerExpectedOptions.add(toLowerCase(s)); 3536 } 3537 3538 final Set<String> alternateOptions = 3539 Attribute.getOptions(alternateName); 3540 final Set<String> lowerAlternateOptions = 3541 new HashSet<String>(alternateOptions.size()); 3542 for (final String s : alternateOptions) 3543 { 3544 lowerAlternateOptions.add(toLowerCase(s)); 3545 } 3546 3547 final boolean optionsEquivalent = 3548 lowerAlternateOptions.equals(lowerExpectedOptions); 3549 3550 3551 if (baseNameEquivalent && optionsEquivalent) 3552 { 3553 // This is fine. The two attribute descriptions are logically 3554 // equivalent. We'll continue using the attribute description that 3555 // was provided first. 3556 } 3557 else if (valueList.isEmpty() && baseNameEquivalent && 3558 lowerAlternateOptions.remove("binary") && 3559 lowerAlternateOptions.equals(lowerExpectedOptions)) 3560 { 3561 // This means that the provided value is the first value for the 3562 // attribute, and that the only significant difference is that the 3563 // provided attribute description included an unexpected "binary" 3564 // option. We'll accept this, but will require any additional 3565 // values for this modification to also include the binary option, 3566 // and we'll use the binary option in the attribute that is 3567 // eventually created. 3568 attributeName = alternateName; 3569 } 3570 else 3571 { 3572 // This means that either the base names are different or the sets 3573 // of options are incompatible. This is not acceptable. 3574 throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get( 3575 firstLineNumber, 3576 line.substring(0, colonPos), 3577 attributeName), 3578 firstLineNumber, true, ldifLines, null); 3579 } 3580 } 3581 3582 length = line.length(); 3583 final ASN1OctetString value; 3584 if (length == (colonPos+1)) 3585 { 3586 // The colon was the last character on the line. This is fine. 3587 value = new ASN1OctetString(); 3588 } 3589 else if (line.charAt(colonPos+1) == ':') 3590 { 3591 // Skip over any spaces leading up to the value, and then the rest of 3592 // the string is the base64-encoded value. This is unusual and 3593 // unnecessary, but is nevertheless acceptable. 3594 int pos = colonPos+2; 3595 while ((pos < length) && (line.charAt(pos) == ' ')) 3596 { 3597 pos++; 3598 } 3599 3600 try 3601 { 3602 value = new ASN1OctetString(Base64.decode(line.substring(pos))); 3603 } 3604 catch (final ParseException pe) 3605 { 3606 debugException(pe); 3607 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3608 attributeName, firstLineNumber, pe.getMessage()), 3609 firstLineNumber, true, ldifLines, pe); 3610 } 3611 catch (final Exception e) 3612 { 3613 debugException(e); 3614 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3615 firstLineNumber, e), 3616 firstLineNumber, true, ldifLines, e); 3617 } 3618 } 3619 else 3620 { 3621 // Skip over any spaces leading up to the value, and then the rest of 3622 // the string is the value. 3623 int pos = colonPos+1; 3624 while ((pos < length) && (line.charAt(pos) == ' ')) 3625 { 3626 pos++; 3627 } 3628 3629 value = new ASN1OctetString(line.substring(pos)); 3630 } 3631 3632 valueList.add(value); 3633 } 3634 3635 final ASN1OctetString[] values = new ASN1OctetString[valueList.size()]; 3636 valueList.toArray(values); 3637 3638 // If it's an add modification type, then there must be at least one 3639 // value. 3640 if ((modType.intValue() == ModificationType.ADD.intValue()) && 3641 (values.length == 0)) 3642 { 3643 throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName, 3644 firstLineNumber), 3645 firstLineNumber, true, ldifLines, null); 3646 } 3647 3648 // If it's an increment modification type, then there must be exactly one 3649 // value. 3650 if ((modType.intValue() == ModificationType.INCREMENT.intValue()) && 3651 (values.length != 1)) 3652 { 3653 throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get( 3654 firstLineNumber, attributeName), 3655 firstLineNumber, true, ldifLines, null); 3656 } 3657 3658 modList.add(new Modification(modType, attributeName, values)); 3659 } 3660 3661 final Modification[] mods = new Modification[modList.size()]; 3662 modList.toArray(mods); 3663 return mods; 3664 } 3665 3666 3667 3668 /** 3669 * Parses the data available through the provided iterator as the body of a 3670 * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional 3671 * newsuperior lines). 3672 * 3673 * @param ldifLines The lines that comprise the LDIF 3674 * representation of the full record being 3675 * parsed. 3676 * @param iterator The iterator to use to access the modify DN 3677 * data. 3678 * @param dn The current DN of the entry. 3679 * @param controls The set of controls to include in the change 3680 * record. 3681 * @param trailingSpaceBehavior The behavior that should be exhibited when 3682 * encountering attribute values which are not 3683 * base64-encoded but contain trailing spaces. 3684 * @param firstLineNumber The line number for the start of the record. 3685 * 3686 * @return The decoded modify DN change record. 3687 * 3688 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3689 * modify DN change record. 3690 */ 3691 private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord( 3692 final ArrayList<StringBuilder> ldifLines, 3693 final Iterator<StringBuilder> iterator, final String dn, 3694 final List<Control> controls, 3695 final TrailingSpaceBehavior trailingSpaceBehavior, 3696 final long firstLineNumber) 3697 throws LDIFException 3698 { 3699 // The next line must be the new RDN, and it must start with "newrdn:". 3700 StringBuilder line = iterator.next(); 3701 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3702 int colonPos = line.indexOf(":"); 3703 if ((colonPos < 0) || 3704 (! line.substring(0, colonPos).equalsIgnoreCase("newrdn"))) 3705 { 3706 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get( 3707 firstLineNumber), 3708 firstLineNumber, true, ldifLines, null); 3709 } 3710 3711 final String newRDN; 3712 int length = line.length(); 3713 if (length == (colonPos+1)) 3714 { 3715 // The colon was the last character on the line. This is not acceptable. 3716 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3717 firstLineNumber), 3718 firstLineNumber, true, ldifLines, null); 3719 } 3720 else if (line.charAt(colonPos+1) == ':') 3721 { 3722 // Skip over any spaces leading up to the value, and then the rest of the 3723 // string is the base64-encoded new RDN. 3724 int pos = colonPos+2; 3725 while ((pos < length) && (line.charAt(pos) == ' ')) 3726 { 3727 pos++; 3728 } 3729 3730 try 3731 { 3732 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3733 newRDN = new String(dnBytes, "UTF-8"); 3734 } 3735 catch (final ParseException pe) 3736 { 3737 debugException(pe); 3738 throw new LDIFException( 3739 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3740 pe.getMessage()), 3741 firstLineNumber, true, ldifLines, pe); 3742 } 3743 catch (final Exception e) 3744 { 3745 debugException(e); 3746 throw new LDIFException( 3747 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3748 e), 3749 firstLineNumber, true, ldifLines, e); 3750 } 3751 } 3752 else 3753 { 3754 // Skip over any spaces leading up to the value, and then the rest of the 3755 // string is the new RDN. 3756 int pos = colonPos+1; 3757 while ((pos < length) && (line.charAt(pos) == ' ')) 3758 { 3759 pos++; 3760 } 3761 3762 newRDN = line.substring(pos); 3763 } 3764 3765 if (newRDN.length() == 0) 3766 { 3767 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3768 firstLineNumber), 3769 firstLineNumber, true, ldifLines, null); 3770 } 3771 3772 3773 // The next line must be the deleteOldRDN flag, and it must start with 3774 // 'deleteoldrdn:'. 3775 if (! iterator.hasNext()) 3776 { 3777 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3778 firstLineNumber), 3779 firstLineNumber, true, ldifLines, null); 3780 } 3781 3782 line = iterator.next(); 3783 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3784 colonPos = line.indexOf(":"); 3785 if ((colonPos < 0) || 3786 (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn"))) 3787 { 3788 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3789 firstLineNumber), 3790 firstLineNumber, true, ldifLines, null); 3791 } 3792 3793 final String deleteOldRDNStr; 3794 length = line.length(); 3795 if (length == (colonPos+1)) 3796 { 3797 // The colon was the last character on the line. This is not acceptable. 3798 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get( 3799 firstLineNumber), 3800 firstLineNumber, true, ldifLines, null); 3801 } 3802 else if (line.charAt(colonPos+1) == ':') 3803 { 3804 // Skip over any spaces leading up to the value, and then the rest of the 3805 // string is the base64-encoded value. This is unusual and 3806 // unnecessary, but is nevertheless acceptable. 3807 int pos = colonPos+2; 3808 while ((pos < length) && (line.charAt(pos) == ' ')) 3809 { 3810 pos++; 3811 } 3812 3813 try 3814 { 3815 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 3816 deleteOldRDNStr = new String(changeTypeBytes, "UTF-8"); 3817 } 3818 catch (final ParseException pe) 3819 { 3820 debugException(pe); 3821 throw new LDIFException( 3822 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3823 firstLineNumber, pe.getMessage()), 3824 firstLineNumber, true, ldifLines, pe); 3825 } 3826 catch (final Exception e) 3827 { 3828 debugException(e); 3829 throw new LDIFException( 3830 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3831 firstLineNumber, e), 3832 firstLineNumber, true, ldifLines, e); 3833 } 3834 } 3835 else 3836 { 3837 // Skip over any spaces leading up to the value, and then the rest of the 3838 // string is the value. 3839 int pos = colonPos+1; 3840 while ((pos < length) && (line.charAt(pos) == ' ')) 3841 { 3842 pos++; 3843 } 3844 3845 deleteOldRDNStr = line.substring(pos); 3846 } 3847 3848 final boolean deleteOldRDN; 3849 if (deleteOldRDNStr.equals("0")) 3850 { 3851 deleteOldRDN = false; 3852 } 3853 else if (deleteOldRDNStr.equals("1")) 3854 { 3855 deleteOldRDN = true; 3856 } 3857 else if (deleteOldRDNStr.equalsIgnoreCase("false") || 3858 deleteOldRDNStr.equalsIgnoreCase("no")) 3859 { 3860 // This is technically illegal, but we'll allow it. 3861 deleteOldRDN = false; 3862 } 3863 else if (deleteOldRDNStr.equalsIgnoreCase("true") || 3864 deleteOldRDNStr.equalsIgnoreCase("yes")) 3865 { 3866 // This is also technically illegal, but we'll allow it. 3867 deleteOldRDN = false; 3868 } 3869 else 3870 { 3871 throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get( 3872 deleteOldRDNStr, firstLineNumber), 3873 firstLineNumber, true, ldifLines, null); 3874 } 3875 3876 3877 // If there is another line, then it must be the new superior DN and it must 3878 // start with "newsuperior:". If this is absent, then it's fine. 3879 final String newSuperiorDN; 3880 if (iterator.hasNext()) 3881 { 3882 line = iterator.next(); 3883 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3884 colonPos = line.indexOf(":"); 3885 if ((colonPos < 0) || 3886 (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior"))) 3887 { 3888 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get( 3889 firstLineNumber), 3890 firstLineNumber, true, ldifLines, null); 3891 } 3892 3893 length = line.length(); 3894 if (length == (colonPos+1)) 3895 { 3896 // The colon was the last character on the line. This is fine. 3897 newSuperiorDN = ""; 3898 } 3899 else if (line.charAt(colonPos+1) == ':') 3900 { 3901 // Skip over any spaces leading up to the value, and then the rest of 3902 // the string is the base64-encoded new superior DN. 3903 int pos = colonPos+2; 3904 while ((pos < length) && (line.charAt(pos) == ' ')) 3905 { 3906 pos++; 3907 } 3908 3909 try 3910 { 3911 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3912 newSuperiorDN = new String(dnBytes, "UTF-8"); 3913 } 3914 catch (final ParseException pe) 3915 { 3916 debugException(pe); 3917 throw new LDIFException( 3918 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 3919 firstLineNumber, pe.getMessage()), 3920 firstLineNumber, true, ldifLines, pe); 3921 } 3922 catch (final Exception e) 3923 { 3924 debugException(e); 3925 throw new LDIFException( 3926 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 3927 firstLineNumber, e), 3928 firstLineNumber, true, ldifLines, e); 3929 } 3930 } 3931 else 3932 { 3933 // Skip over any spaces leading up to the value, and then the rest of 3934 // the string is the new superior DN. 3935 int pos = colonPos+1; 3936 while ((pos < length) && (line.charAt(pos) == ' ')) 3937 { 3938 pos++; 3939 } 3940 3941 newSuperiorDN = line.substring(pos); 3942 } 3943 } 3944 else 3945 { 3946 newSuperiorDN = null; 3947 } 3948 3949 3950 // There must not be any more lines. 3951 if (iterator.hasNext()) 3952 { 3953 throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber), 3954 firstLineNumber, true, ldifLines, null); 3955 } 3956 3957 return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN, 3958 newSuperiorDN, controls); 3959 } 3960 3961 3962 3963 /** 3964 * Examines the line contained in the provided buffer to determine whether it 3965 * may contain one or more illegal trailing spaces. If it does, then those 3966 * spaces will either be stripped out or an exception will be thrown to 3967 * indicate that they are illegal. 3968 * 3969 * @param buffer The buffer to be examined. 3970 * @param dn The DN of the LDIF record being parsed. It 3971 * may be {@code null} if the DN is not yet 3972 * known (e.g., because the provided line is 3973 * expected to contain that DN). 3974 * @param firstLineNumber The approximate line number in the LDIF 3975 * source on which the LDIF record begins. 3976 * @param trailingSpaceBehavior The behavior that should be exhibited when 3977 * encountering attribute values which are not 3978 * base64-encoded but contain trailing spaces. 3979 * 3980 * @throws LDIFException If the line contained in the provided buffer ends 3981 * with one or more illegal trailing spaces and 3982 * {@code stripTrailingSpaces} was provided with a 3983 * value of {@code false}. 3984 */ 3985 private static void handleTrailingSpaces(final StringBuilder buffer, 3986 final String dn, final long firstLineNumber, 3987 final TrailingSpaceBehavior trailingSpaceBehavior) 3988 throws LDIFException 3989 { 3990 int pos = buffer.length() - 1; 3991 boolean trailingFound = false; 3992 while ((pos >= 0) && (buffer.charAt(pos) == ' ')) 3993 { 3994 trailingFound = true; 3995 pos--; 3996 } 3997 3998 if (trailingFound && (buffer.charAt(pos) != ':')) 3999 { 4000 switch (trailingSpaceBehavior) 4001 { 4002 case STRIP: 4003 buffer.setLength(pos+1); 4004 break; 4005 4006 case REJECT: 4007 if (dn == null) 4008 { 4009 throw new LDIFException( 4010 ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber, 4011 buffer.toString()), 4012 firstLineNumber, true); 4013 } 4014 else 4015 { 4016 throw new LDIFException( 4017 ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn, 4018 firstLineNumber, buffer.toString()), 4019 firstLineNumber, true); 4020 } 4021 4022 case RETAIN: 4023 default: 4024 // No action will be taken. 4025 break; 4026 } 4027 } 4028 } 4029 4030 4031 4032 /** 4033 * This represents an unparsed LDIFRecord. It stores the line number of the 4034 * first line of the record and each line of the record. 4035 */ 4036 private static final class UnparsedLDIFRecord 4037 { 4038 private final ArrayList<StringBuilder> lineList; 4039 private final long firstLineNumber; 4040 private final Exception failureCause; 4041 private final boolean isEOF; 4042 private final DuplicateValueBehavior duplicateValueBehavior; 4043 private final Schema schema; 4044 private final TrailingSpaceBehavior trailingSpaceBehavior; 4045 4046 4047 4048 /** 4049 * Constructor. 4050 * 4051 * @param lineList The lines that comprise the LDIF record. 4052 * @param duplicateValueBehavior The behavior to exhibit if the entry 4053 * contains duplicate attribute values. 4054 * @param trailingSpaceBehavior Specifies the behavior to exhibit when 4055 * encountering trailing spaces in 4056 * non-base64-encoded attribute values. 4057 * @param schema The schema to use when parsing, if 4058 * applicable. 4059 * @param firstLineNumber The first line number of the LDIF record. 4060 */ 4061 private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList, 4062 final DuplicateValueBehavior duplicateValueBehavior, 4063 final TrailingSpaceBehavior trailingSpaceBehavior, 4064 final Schema schema, final long firstLineNumber) 4065 { 4066 this.lineList = lineList; 4067 this.firstLineNumber = firstLineNumber; 4068 this.duplicateValueBehavior = duplicateValueBehavior; 4069 this.trailingSpaceBehavior = trailingSpaceBehavior; 4070 this.schema = schema; 4071 4072 failureCause = null; 4073 isEOF = 4074 (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty()); 4075 } 4076 4077 4078 4079 /** 4080 * Constructor. 4081 * 4082 * @param failureCause The Exception thrown when reading from the input. 4083 */ 4084 private UnparsedLDIFRecord(final Exception failureCause) 4085 { 4086 this.failureCause = failureCause; 4087 4088 lineList = null; 4089 firstLineNumber = 0; 4090 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 4091 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 4092 schema = null; 4093 isEOF = false; 4094 } 4095 4096 4097 4098 /** 4099 * Return the lines that comprise the LDIF record. 4100 * 4101 * @return The lines that comprise the LDIF record. 4102 */ 4103 private ArrayList<StringBuilder> getLineList() 4104 { 4105 return lineList; 4106 } 4107 4108 4109 4110 /** 4111 * Retrieves the behavior to exhibit when encountering duplicate attribute 4112 * values. 4113 * 4114 * @return The behavior to exhibit when encountering duplicate attribute 4115 * values. 4116 */ 4117 private DuplicateValueBehavior getDuplicateValueBehavior() 4118 { 4119 return duplicateValueBehavior; 4120 } 4121 4122 4123 4124 /** 4125 * Retrieves the behavior that should be exhibited when encountering 4126 * attribute values which are not base64-encoded but contain trailing 4127 * spaces. The LDIF specification strongly recommends that any value which 4128 * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK 4129 * LDIF parser may be configured to automatically strip these spaces, to 4130 * preserve them, or to reject any entry or change record containing them. 4131 * 4132 * @return The behavior that should be exhibited when encountering 4133 * attribute values which are not base64-encoded but contain 4134 * trailing spaces. 4135 */ 4136 private TrailingSpaceBehavior getTrailingSpaceBehavior() 4137 { 4138 return trailingSpaceBehavior; 4139 } 4140 4141 4142 4143 /** 4144 * Retrieves the schema that should be used when parsing the record, if 4145 * applicable. 4146 * 4147 * @return The schema that should be used when parsing the record, or 4148 * {@code null} if none should be used. 4149 */ 4150 private Schema getSchema() 4151 { 4152 return schema; 4153 } 4154 4155 4156 4157 /** 4158 * Return the first line number of the LDIF record. 4159 * 4160 * @return The first line number of the LDIF record. 4161 */ 4162 private long getFirstLineNumber() 4163 { 4164 return firstLineNumber; 4165 } 4166 4167 4168 4169 /** 4170 * Return {@code true} iff the end of the input was reached. 4171 * 4172 * @return {@code true} iff the end of the input was reached. 4173 */ 4174 private boolean isEOF() 4175 { 4176 return isEOF; 4177 } 4178 4179 4180 4181 /** 4182 * Returns the reason that reading the record lines failed. This normally 4183 * is only non-null if something bad happened to the input stream (like 4184 * a disk read error). 4185 * 4186 * @return The reason that reading the record lines failed. 4187 */ 4188 private Exception getFailureCause() 4189 { 4190 return failureCause; 4191 } 4192 } 4193 4194 4195 /** 4196 * When processing in asynchronous mode, this thread is responsible for 4197 * reading the raw unparsed records from the input and submitting them for 4198 * processing. 4199 */ 4200 private final class LineReaderThread 4201 extends Thread 4202 { 4203 /** 4204 * Constructor. 4205 */ 4206 private LineReaderThread() 4207 { 4208 super("Asynchronous LDIF line reader"); 4209 setDaemon(true); 4210 } 4211 4212 4213 4214 /** 4215 * Reads raw, unparsed records from the input and submits them for 4216 * processing until the input is finished or closed. 4217 */ 4218 @Override() 4219 public void run() 4220 { 4221 try 4222 { 4223 boolean stopProcessing = false; 4224 while (!stopProcessing) 4225 { 4226 UnparsedLDIFRecord unparsedRecord = null; 4227 try 4228 { 4229 unparsedRecord = readUnparsedRecord(); 4230 } 4231 catch (IOException e) 4232 { 4233 debugException(e); 4234 unparsedRecord = new UnparsedLDIFRecord(e); 4235 stopProcessing = true; 4236 } 4237 catch (Exception e) 4238 { 4239 debugException(e); 4240 unparsedRecord = new UnparsedLDIFRecord(e); 4241 } 4242 4243 try 4244 { 4245 asyncParser.submit(unparsedRecord); 4246 } 4247 catch (InterruptedException e) 4248 { 4249 debugException(e); 4250 // If this thread is interrupted, then someone wants us to stop 4251 // processing, so that's what we'll do. 4252 stopProcessing = true; 4253 } 4254 4255 if ((unparsedRecord == null) || (unparsedRecord.isEOF())) 4256 { 4257 stopProcessing = true; 4258 } 4259 } 4260 } 4261 finally 4262 { 4263 try 4264 { 4265 asyncParser.shutdown(); 4266 } 4267 catch (InterruptedException e) 4268 { 4269 debugException(e); 4270 } 4271 finally 4272 { 4273 asyncParsingComplete.set(true); 4274 } 4275 } 4276 } 4277 } 4278 4279 4280 4281 /** 4282 * Used to parse Records asynchronously. 4283 */ 4284 private final class RecordParser implements Processor<UnparsedLDIFRecord, 4285 LDIFRecord> 4286 { 4287 /** 4288 * {@inheritDoc} 4289 */ 4290 public LDIFRecord process(final UnparsedLDIFRecord input) 4291 throws LDIFException 4292 { 4293 LDIFRecord record = decodeRecord(input, relativeBasePath, schema); 4294 4295 if ((record instanceof Entry) && (entryTranslator != null)) 4296 { 4297 record = entryTranslator.translate((Entry) record, 4298 input.getFirstLineNumber()); 4299 4300 if (record == null) 4301 { 4302 record = SKIP_ENTRY; 4303 } 4304 } 4305 if ((record instanceof LDIFChangeRecord) && 4306 (changeRecordTranslator != null)) 4307 { 4308 record = changeRecordTranslator.translate((LDIFChangeRecord) record, 4309 input.getFirstLineNumber()); 4310 4311 if (record == null) 4312 { 4313 record = SKIP_ENTRY; 4314 } 4315 } 4316 return record; 4317 } 4318 } 4319 }