001 /* 002 * Copyright 2007-2016 UnboundID Corp. 003 * All Rights Reserved. 004 */ 005 /* 006 * Copyright (C) 2008-2016 UnboundID Corp. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (GPLv2 only) 010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 011 * as published by the Free Software Foundation. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Public License for more details. 017 * 018 * You should have received a copy of the GNU General Public License 019 * along with this program; if not, see <http://www.gnu.org/licenses>. 020 */ 021 package com.unboundid.ldif; 022 023 024 025 import java.io.BufferedReader; 026 import java.io.BufferedWriter; 027 import java.io.Closeable; 028 import java.io.File; 029 import java.io.FileInputStream; 030 import java.io.FileWriter; 031 import java.io.InputStream; 032 import java.io.InputStreamReader; 033 import java.io.IOException; 034 import java.text.ParseException; 035 import java.util.ArrayList; 036 import java.util.Collection; 037 import java.util.Iterator; 038 import java.util.HashSet; 039 import java.util.LinkedHashMap; 040 import java.util.List; 041 import java.util.Set; 042 import java.util.concurrent.BlockingQueue; 043 import java.util.concurrent.ArrayBlockingQueue; 044 import java.util.concurrent.TimeUnit; 045 import java.util.concurrent.atomic.AtomicBoolean; 046 import java.nio.charset.Charset; 047 048 import com.unboundid.asn1.ASN1OctetString; 049 import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule; 050 import com.unboundid.ldap.matchingrules.MatchingRule; 051 import com.unboundid.ldap.sdk.Attribute; 052 import com.unboundid.ldap.sdk.Control; 053 import com.unboundid.ldap.sdk.Entry; 054 import com.unboundid.ldap.sdk.Modification; 055 import com.unboundid.ldap.sdk.ModificationType; 056 import com.unboundid.ldap.sdk.LDAPException; 057 import com.unboundid.ldap.sdk.schema.AttributeTypeDefinition; 058 import com.unboundid.ldap.sdk.schema.Schema; 059 import com.unboundid.util.AggregateInputStream; 060 import com.unboundid.util.Base64; 061 import com.unboundid.util.LDAPSDKThreadFactory; 062 import com.unboundid.util.parallel.AsynchronousParallelProcessor; 063 import com.unboundid.util.parallel.Result; 064 import com.unboundid.util.parallel.ParallelProcessor; 065 import com.unboundid.util.parallel.Processor; 066 067 import static com.unboundid.ldif.LDIFMessages.*; 068 import static com.unboundid.util.Debug.*; 069 import static com.unboundid.util.StaticUtils.*; 070 import static com.unboundid.util.Validator.*; 071 072 /** 073 * This class provides an LDIF reader, which can be used to read and decode 074 * entries and change records from a data source using the LDAP Data Interchange 075 * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>. 076 * <BR> 077 * This class is not synchronized. If multiple threads read from the 078 * LDIFReader, they must be synchronized externally. 079 * <BR><BR> 080 * <H2>Example</H2> 081 * The following example iterates through all entries contained in an LDIF file 082 * and attempts to add them to a directory server: 083 * <PRE> 084 * LDIFReader ldifReader = new LDIFReader(pathToLDIFFile); 085 * 086 * int entriesRead = 0; 087 * int entriesAdded = 0; 088 * int errorsEncountered = 0; 089 * while (true) 090 * { 091 * Entry entry; 092 * try 093 * { 094 * entry = ldifReader.readEntry(); 095 * if (entry == null) 096 * { 097 * // All entries have been read. 098 * break; 099 * } 100 * 101 * entriesRead++; 102 * } 103 * catch (LDIFException le) 104 * { 105 * errorsEncountered++; 106 * if (le.mayContinueReading()) 107 * { 108 * // A recoverable error occurred while attempting to read a change 109 * // record, at or near line number le.getLineNumber() 110 * // The entry will be skipped, but we'll try to keep reading from the 111 * // LDIF file. 112 * continue; 113 * } 114 * else 115 * { 116 * // An unrecoverable error occurred while attempting to read an entry 117 * // at or near line number le.getLineNumber() 118 * // No further LDIF processing will be performed. 119 * break; 120 * } 121 * } 122 * catch (IOException ioe) 123 * { 124 * // An I/O error occurred while attempting to read from the LDIF file. 125 * // No further LDIF processing will be performed. 126 * errorsEncountered++; 127 * break; 128 * } 129 * 130 * LDAPResult addResult; 131 * try 132 * { 133 * addResult = connection.add(entry); 134 * // If we got here, then the change should have been processed 135 * // successfully. 136 * entriesAdded++; 137 * } 138 * catch (LDAPException le) 139 * { 140 * // If we got here, then the change attempt failed. 141 * addResult = le.toLDAPResult(); 142 * errorsEncountered++; 143 * } 144 * } 145 * 146 * ldifReader.close(); 147 * </PRE> 148 */ 149 public final class LDIFReader 150 implements Closeable 151 { 152 /** 153 * The default buffer size (128KB) that will be used when reading from the 154 * data source. 155 */ 156 public static final int DEFAULT_BUFFER_SIZE = 128 * 1024; 157 158 159 160 /* 161 * When processing asynchronously, this determines how many of the allocated 162 * worker threads are used to parse each batch of read entries. 163 */ 164 private static final int ASYNC_MIN_PER_PARSING_THREAD = 3; 165 166 167 168 /** 169 * When processing asynchronously, this specifies the size of the pending and 170 * completed queues. 171 */ 172 private static final int ASYNC_QUEUE_SIZE = 500; 173 174 175 176 /** 177 * Special entry used internally to signal that the LDIFReaderEntryTranslator 178 * has signalled that a read Entry should be skipped by returning null, 179 * which normally implies EOF. 180 */ 181 private static final Entry SKIP_ENTRY = new Entry("cn=skipped"); 182 183 184 185 /** 186 * The default base path that will be prepended to relative paths. It will 187 * end with a trailing slash. 188 */ 189 private static final String DEFAULT_RELATIVE_BASE_PATH; 190 static 191 { 192 final File currentDir; 193 String currentDirString = System.getProperty("user.dir"); 194 if (currentDirString == null) 195 { 196 currentDir = new File("."); 197 } 198 else 199 { 200 currentDir = new File(currentDirString); 201 } 202 203 final String currentDirAbsolutePath = currentDir.getAbsolutePath(); 204 if (currentDirAbsolutePath.endsWith(File.separator)) 205 { 206 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath; 207 } 208 else 209 { 210 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator; 211 } 212 } 213 214 215 216 // The buffered reader that will be used to read LDIF data. 217 private final BufferedReader reader; 218 219 // The behavior that should be exhibited when encountering duplicate attribute 220 // values. 221 private volatile DuplicateValueBehavior duplicateValueBehavior; 222 223 // A line number counter. 224 private long lineNumberCounter = 0; 225 226 // The change record translator to use, if any. 227 private final LDIFReaderChangeRecordTranslator changeRecordTranslator; 228 229 // The entry translator to use, if any. 230 private final LDIFReaderEntryTranslator entryTranslator; 231 232 // The schema that will be used when processing, if applicable. 233 private Schema schema; 234 235 // Specifies the base path that will be prepended to relative paths for file 236 // URLs. 237 private volatile String relativeBasePath; 238 239 // The behavior that should be exhibited with regard to illegal trailing 240 // spaces in attribute values. 241 private volatile TrailingSpaceBehavior trailingSpaceBehavior; 242 243 // True iff we are processing asynchronously. 244 private final boolean isAsync; 245 246 // 247 // The following only apply to asynchronous processing. 248 // 249 250 // Parses entries asynchronously. 251 private final AsynchronousParallelProcessor<UnparsedLDIFRecord, LDIFRecord> 252 asyncParser; 253 254 // Set to true when the end of the input is reached. 255 private final AtomicBoolean asyncParsingComplete; 256 257 // The records that have been read and parsed. 258 private final BlockingQueue<Result<UnparsedLDIFRecord, LDIFRecord>> 259 asyncParsedRecords; 260 261 262 263 /** 264 * Creates a new LDIF reader that will read data from the specified file. 265 * 266 * @param path The path to the file from which the data is to be read. It 267 * must not be {@code null}. 268 * 269 * @throws IOException If a problem occurs while opening the file for 270 * reading. 271 */ 272 public LDIFReader(final String path) 273 throws IOException 274 { 275 this(new FileInputStream(path)); 276 } 277 278 279 280 /** 281 * Creates a new LDIF reader that will read data from the specified file 282 * and parses the LDIF records asynchronously using the specified number of 283 * threads. 284 * 285 * @param path The path to the file from which the data is to be read. It 286 * must not be {@code null}. 287 * @param numParseThreads If this value is greater than zero, then the 288 * specified number of threads will be used to 289 * asynchronously read and parse the LDIF file. 290 * 291 * @throws IOException If a problem occurs while opening the file for 292 * reading. 293 * 294 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 295 * constructor for more details about asynchronous processing. 296 */ 297 public LDIFReader(final String path, final int numParseThreads) 298 throws IOException 299 { 300 this(new FileInputStream(path), numParseThreads); 301 } 302 303 304 305 /** 306 * Creates a new LDIF reader that will read data from the specified file. 307 * 308 * @param file The file from which the data is to be read. It must not be 309 * {@code null}. 310 * 311 * @throws IOException If a problem occurs while opening the file for 312 * reading. 313 */ 314 public LDIFReader(final File file) 315 throws IOException 316 { 317 this(new FileInputStream(file)); 318 } 319 320 321 322 /** 323 * Creates a new LDIF reader that will read data from the specified file 324 * and optionally parses the LDIF records asynchronously using the specified 325 * number of threads. 326 * 327 * @param file The file from which the data is to be read. It 328 * must not be {@code null}. 329 * @param numParseThreads If this value is greater than zero, then the 330 * specified number of threads will be used to 331 * asynchronously read and parse the LDIF file. 332 * 333 * @throws IOException If a problem occurs while opening the file for 334 * reading. 335 */ 336 public LDIFReader(final File file, final int numParseThreads) 337 throws IOException 338 { 339 this(new FileInputStream(file), numParseThreads); 340 } 341 342 343 344 /** 345 * Creates a new LDIF reader that will read data from the specified files in 346 * the order in which they are provided and optionally parses the LDIF records 347 * asynchronously using the specified number of threads. 348 * 349 * @param files The files from which the data is to be read. It 350 * must not be {@code null} or empty. 351 * @param numParseThreads If this value is greater than zero, then the 352 * specified number of threads will be used to 353 * asynchronously read and parse the LDIF file. 354 * @param entryTranslator The LDIFReaderEntryTranslator to apply to entries 355 * before they are returned. This is normally 356 * {@code null}, which causes entries to be returned 357 * unaltered. This is particularly useful when 358 * parsing the input file in parallel because the 359 * entry translation is also done in parallel. 360 * 361 * @throws IOException If a problem occurs while opening the file for 362 * reading. 363 */ 364 public LDIFReader(final File[] files, final int numParseThreads, 365 final LDIFReaderEntryTranslator entryTranslator) 366 throws IOException 367 { 368 this(files, numParseThreads, entryTranslator, null); 369 } 370 371 372 373 /** 374 * Creates a new LDIF reader that will read data from the specified files in 375 * the order in which they are provided and optionally parses the LDIF records 376 * asynchronously using the specified number of threads. 377 * 378 * @param files The files from which the data is to be 379 * read. It must not be {@code null} or 380 * empty. 381 * @param numParseThreads If this value is greater than zero, then 382 * the specified number of threads will be 383 * used to asynchronously read and parse the 384 * LDIF file. 385 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 386 * entries before they are returned. This is 387 * normally {@code null}, which causes entries 388 * to be returned unaltered. This is 389 * particularly useful when parsing the input 390 * file in parallel because the entry 391 * translation is also done in parallel. 392 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 393 * apply to change records before they are 394 * returned. This is normally {@code null}, 395 * which causes change records to be returned 396 * unaltered. This is particularly useful 397 * when parsing the input file in parallel 398 * because the change record translation is 399 * also done in parallel. 400 * 401 * @throws IOException If a problem occurs while opening the file for 402 * reading. 403 */ 404 public LDIFReader(final File[] files, final int numParseThreads, 405 final LDIFReaderEntryTranslator entryTranslator, 406 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 407 throws IOException 408 { 409 this(files, numParseThreads, entryTranslator, changeRecordTranslator, 410 "UTF-8"); 411 } 412 413 414 415 /** 416 * Creates a new LDIF reader that will read data from the specified files in 417 * the order in which they are provided and optionally parses the LDIF records 418 * asynchronously using the specified number of threads. 419 * 420 * @param files The files from which the data is to be 421 * read. It must not be {@code null} or 422 * empty. 423 * @param numParseThreads If this value is greater than zero, then 424 * the specified number of threads will be 425 * used to asynchronously read and parse the 426 * LDIF file. 427 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 428 * entries before they are returned. This is 429 * normally {@code null}, which causes entries 430 * to be returned unaltered. This is 431 * particularly useful when parsing the input 432 * file in parallel because the entry 433 * translation is also done in parallel. 434 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 435 * apply to change records before they are 436 * returned. This is normally {@code null}, 437 * which causes change records to be returned 438 * unaltered. This is particularly useful 439 * when parsing the input file in parallel 440 * because the change record translation is 441 * also done in parallel. 442 * @param characterSet The character set to use when reading from 443 * the input stream. It must not be 444 * {@code null}. 445 * 446 * @throws IOException If a problem occurs while opening the file for 447 * reading. 448 */ 449 public LDIFReader(final File[] files, final int numParseThreads, 450 final LDIFReaderEntryTranslator entryTranslator, 451 final LDIFReaderChangeRecordTranslator changeRecordTranslator, 452 final String characterSet) 453 throws IOException 454 { 455 this(createAggregateInputStream(files), numParseThreads, entryTranslator, 456 changeRecordTranslator, characterSet); 457 } 458 459 460 461 /** 462 * Creates a new aggregate input stream that will read data from the specified 463 * files. If there are multiple files, then a "padding" file will be inserted 464 * between them to ensure that there is at least one blank line between the 465 * end of one file and the beginning of another. 466 * 467 * @param files The files from which the data is to be read. It must not be 468 * {@code null} or empty. 469 * 470 * @return The input stream to use to read data from the provided files. 471 * 472 * @throws IOException If a problem is encountered while attempting to 473 * create the input stream. 474 */ 475 private static InputStream createAggregateInputStream(final File... files) 476 throws IOException 477 { 478 if (files.length == 0) 479 { 480 throw new IOException(ERR_READ_NO_LDIF_FILES.get()); 481 } 482 else if (files.length == 1) 483 { 484 return new FileInputStream(files[0]); 485 } 486 else 487 { 488 final File spacerFile = 489 File.createTempFile("ldif-reader-spacer", ".ldif"); 490 spacerFile.deleteOnExit(); 491 492 final BufferedWriter spacerWriter = 493 new BufferedWriter(new FileWriter(spacerFile)); 494 try 495 { 496 spacerWriter.newLine(); 497 spacerWriter.newLine(); 498 } 499 finally 500 { 501 spacerWriter.close(); 502 } 503 504 final File[] returnArray = new File[(files.length * 2) - 1]; 505 returnArray[0] = files[0]; 506 507 int pos = 1; 508 for (int i=1; i < files.length; i++) 509 { 510 returnArray[pos++] = spacerFile; 511 returnArray[pos++] = files[i]; 512 } 513 514 return new AggregateInputStream(returnArray); 515 } 516 } 517 518 519 520 /** 521 * Creates a new LDIF reader that will read data from the provided input 522 * stream. 523 * 524 * @param inputStream The input stream from which the data is to be read. 525 * It must not be {@code null}. 526 */ 527 public LDIFReader(final InputStream inputStream) 528 { 529 this(inputStream, 0); 530 } 531 532 533 534 /** 535 * Creates a new LDIF reader that will read data from the specified stream 536 * and parses the LDIF records asynchronously using the specified number of 537 * threads. 538 * 539 * @param inputStream The input stream from which the data is to be read. 540 * It must not be {@code null}. 541 * @param numParseThreads If this value is greater than zero, then the 542 * specified number of threads will be used to 543 * asynchronously read and parse the LDIF file. 544 * 545 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 546 * constructor for more details about asynchronous processing. 547 */ 548 public LDIFReader(final InputStream inputStream, final int numParseThreads) 549 { 550 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 551 this(new BufferedReader(new InputStreamReader(inputStream, 552 Charset.forName("UTF-8")), 553 DEFAULT_BUFFER_SIZE), 554 numParseThreads); 555 } 556 557 558 559 /** 560 * Creates a new LDIF reader that will read data from the specified stream 561 * and parses the LDIF records asynchronously using the specified number of 562 * threads. 563 * 564 * @param inputStream The input stream from which the data is to be read. 565 * It must not be {@code null}. 566 * @param numParseThreads If this value is greater than zero, then the 567 * specified number of threads will be used to 568 * asynchronously read and parse the LDIF file. 569 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 570 * entries before they are returned. This is normally 571 * {@code null}, which causes entries to be returned 572 * unaltered. This is particularly useful when parsing 573 * the input file in parallel because the entry 574 * translation is also done in parallel. 575 * 576 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 577 * constructor for more details about asynchronous processing. 578 */ 579 public LDIFReader(final InputStream inputStream, final int numParseThreads, 580 final LDIFReaderEntryTranslator entryTranslator) 581 { 582 this(inputStream, numParseThreads, entryTranslator, null); 583 } 584 585 586 587 /** 588 * Creates a new LDIF reader that will read data from the specified stream 589 * and parses the LDIF records asynchronously using the specified number of 590 * threads. 591 * 592 * @param inputStream The input stream from which the data is to 593 * be read. It must not be {@code null}. 594 * @param numParseThreads If this value is greater than zero, then 595 * the specified number of threads will be 596 * used to asynchronously read and parse the 597 * LDIF file. 598 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 599 * entries before they are returned. This is 600 * normally {@code null}, which causes entries 601 * to be returned unaltered. This is 602 * particularly useful when parsing the input 603 * file in parallel because the entry 604 * translation is also done in parallel. 605 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 606 * apply to change records before they are 607 * returned. This is normally {@code null}, 608 * which causes change records to be returned 609 * unaltered. This is particularly useful 610 * when parsing the input file in parallel 611 * because the change record translation is 612 * also done in parallel. 613 * 614 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 615 * constructor for more details about asynchronous processing. 616 */ 617 public LDIFReader(final InputStream inputStream, final int numParseThreads, 618 final LDIFReaderEntryTranslator entryTranslator, 619 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 620 { 621 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 622 this(inputStream, numParseThreads, entryTranslator, changeRecordTranslator, 623 "UTF-8"); 624 } 625 626 627 628 /** 629 * Creates a new LDIF reader that will read data from the specified stream 630 * and parses the LDIF records asynchronously using the specified number of 631 * threads. 632 * 633 * @param inputStream The input stream from which the data is to 634 * be read. It must not be {@code null}. 635 * @param numParseThreads If this value is greater than zero, then 636 * the specified number of threads will be 637 * used to asynchronously read and parse the 638 * LDIF file. 639 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 640 * entries before they are returned. This is 641 * normally {@code null}, which causes entries 642 * to be returned unaltered. This is 643 * particularly useful when parsing the input 644 * file in parallel because the entry 645 * translation is also done in parallel. 646 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 647 * apply to change records before they are 648 * returned. This is normally {@code null}, 649 * which causes change records to be returned 650 * unaltered. This is particularly useful 651 * when parsing the input file in parallel 652 * because the change record translation is 653 * also done in parallel. 654 * @param characterSet The character set to use when reading from 655 * the input stream. It must not be 656 * {@code null}. 657 * 658 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 659 * constructor for more details about asynchronous processing. 660 */ 661 public LDIFReader(final InputStream inputStream, final int numParseThreads, 662 final LDIFReaderEntryTranslator entryTranslator, 663 final LDIFReaderChangeRecordTranslator changeRecordTranslator, 664 final String characterSet) 665 { 666 this(new BufferedReader( 667 new InputStreamReader(inputStream, Charset.forName(characterSet)), 668 DEFAULT_BUFFER_SIZE), 669 numParseThreads, entryTranslator, changeRecordTranslator); 670 } 671 672 673 674 /** 675 * Creates a new LDIF reader that will use the provided buffered reader to 676 * read the LDIF data. The encoding of the underlying Reader must be set to 677 * "UTF-8" as required by RFC 2849. 678 * 679 * @param reader The buffered reader that will be used to read the LDIF 680 * data. It must not be {@code null}. 681 */ 682 public LDIFReader(final BufferedReader reader) 683 { 684 this(reader, 0); 685 } 686 687 688 689 /** 690 * Creates a new LDIF reader that will read data from the specified buffered 691 * reader and parses the LDIF records asynchronously using the specified 692 * number of threads. The encoding of the underlying Reader must be set to 693 * "UTF-8" as required by RFC 2849. 694 * 695 * @param reader The buffered reader that will be used to read the LDIF data. 696 * It must not be {@code null}. 697 * @param numParseThreads If this value is greater than zero, then the 698 * specified number of threads will be used to 699 * asynchronously read and parse the LDIF file. 700 * 701 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 702 * constructor for more details about asynchronous processing. 703 */ 704 public LDIFReader(final BufferedReader reader, final int numParseThreads) 705 { 706 this(reader, numParseThreads, null); 707 } 708 709 710 711 /** 712 * Creates a new LDIF reader that will read data from the specified buffered 713 * reader and parses the LDIF records asynchronously using the specified 714 * number of threads. The encoding of the underlying Reader must be set to 715 * "UTF-8" as required by RFC 2849. 716 * 717 * @param reader The buffered reader that will be used to read the LDIF data. 718 * It must not be {@code null}. 719 * @param numParseThreads If this value is greater than zero, then the 720 * specified number of threads will be used to 721 * asynchronously read and parse the LDIF file. 722 * This should only be set to greater than zero when 723 * performance analysis has demonstrated that reading 724 * and parsing the LDIF is a bottleneck. The default 725 * synchronous processing is normally fast enough. 726 * There is little benefit in passing in a value 727 * greater than four (unless there is an 728 * LDIFReaderEntryTranslator that does time-consuming 729 * processing). A value of zero implies the 730 * default behavior of reading and parsing LDIF 731 * records synchronously when one of the read 732 * methods is called. 733 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 734 * entries before they are returned. This is normally 735 * {@code null}, which causes entries to be returned 736 * unaltered. This is particularly useful when parsing 737 * the input file in parallel because the entry 738 * translation is also done in parallel. 739 */ 740 public LDIFReader(final BufferedReader reader, 741 final int numParseThreads, 742 final LDIFReaderEntryTranslator entryTranslator) 743 { 744 this(reader, numParseThreads, entryTranslator, null); 745 } 746 747 748 749 /** 750 * Creates a new LDIF reader that will read data from the specified buffered 751 * reader and parses the LDIF records asynchronously using the specified 752 * number of threads. The encoding of the underlying Reader must be set to 753 * "UTF-8" as required by RFC 2849. 754 * 755 * @param reader The buffered reader that will be used to 756 * read the LDIF data. It must not be 757 * {@code null}. 758 * @param numParseThreads If this value is greater than zero, then 759 * the specified number of threads will be 760 * used to asynchronously read and parse the 761 * LDIF file. 762 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 763 * entries before they are returned. This is 764 * normally {@code null}, which causes entries 765 * to be returned unaltered. This is 766 * particularly useful when parsing the input 767 * file in parallel because the entry 768 * translation is also done in parallel. 769 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 770 * apply to change records before they are 771 * returned. This is normally {@code null}, 772 * which causes change records to be returned 773 * unaltered. This is particularly useful 774 * when parsing the input file in parallel 775 * because the change record translation is 776 * also done in parallel. 777 */ 778 public LDIFReader(final BufferedReader reader, final int numParseThreads, 779 final LDIFReaderEntryTranslator entryTranslator, 780 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 781 { 782 ensureNotNull(reader); 783 ensureTrue(numParseThreads >= 0, 784 "LDIFReader.numParseThreads must not be negative."); 785 786 this.reader = reader; 787 this.entryTranslator = entryTranslator; 788 this.changeRecordTranslator = changeRecordTranslator; 789 790 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 791 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 792 793 relativeBasePath = DEFAULT_RELATIVE_BASE_PATH; 794 795 if (numParseThreads == 0) 796 { 797 isAsync = false; 798 asyncParser = null; 799 asyncParsingComplete = null; 800 asyncParsedRecords = null; 801 } 802 else 803 { 804 isAsync = true; 805 asyncParsingComplete = new AtomicBoolean(false); 806 807 // Decodes entries in parallel. 808 final LDAPSDKThreadFactory threadFactory = 809 new LDAPSDKThreadFactory("LDIFReader Worker", true, null); 810 final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser = 811 new ParallelProcessor<UnparsedLDIFRecord, LDIFRecord>( 812 new RecordParser(), threadFactory, numParseThreads, 813 ASYNC_MIN_PER_PARSING_THREAD); 814 815 final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new 816 ArrayBlockingQueue<UnparsedLDIFRecord>(ASYNC_QUEUE_SIZE); 817 818 // The output queue must be a little more than twice as big as the input 819 // queue to more easily handle being shutdown in the middle of processing 820 // when the queues are full and threads are blocked. 821 asyncParsedRecords = new ArrayBlockingQueue 822 <Result<UnparsedLDIFRecord, LDIFRecord>>(2 * ASYNC_QUEUE_SIZE + 100); 823 824 asyncParser = new AsynchronousParallelProcessor 825 <UnparsedLDIFRecord, LDIFRecord>(pendingQueue, parallelParser, 826 asyncParsedRecords); 827 828 final LineReaderThread lineReaderThread = new LineReaderThread(); 829 lineReaderThread.start(); 830 } 831 } 832 833 834 835 /** 836 * Reads entries from the LDIF file with the specified path and returns them 837 * as a {@code List}. This is a convenience method that should only be used 838 * for data sets that are small enough so that running out of memory isn't a 839 * concern. 840 * 841 * @param path The path to the LDIF file containing the entries to be read. 842 * 843 * @return A list of the entries read from the given LDIF file. 844 * 845 * @throws IOException If a problem occurs while attempting to read data 846 * from the specified file. 847 * 848 * @throws LDIFException If a problem is encountered while attempting to 849 * decode data read as LDIF. 850 */ 851 public static List<Entry> readEntries(final String path) 852 throws IOException, LDIFException 853 { 854 return readEntries(new LDIFReader(path)); 855 } 856 857 858 859 /** 860 * Reads entries from the specified LDIF file and returns them as a 861 * {@code List}. This is a convenience method that should only be used for 862 * data sets that are small enough so that running out of memory isn't a 863 * concern. 864 * 865 * @param file A reference to the LDIF file containing the entries to be 866 * read. 867 * 868 * @return A list of the entries read from the given LDIF file. 869 * 870 * @throws IOException If a problem occurs while attempting to read data 871 * from the specified file. 872 * 873 * @throws LDIFException If a problem is encountered while attempting to 874 * decode data read as LDIF. 875 */ 876 public static List<Entry> readEntries(final File file) 877 throws IOException, LDIFException 878 { 879 return readEntries(new LDIFReader(file)); 880 } 881 882 883 884 /** 885 * Reads and decodes LDIF entries from the provided input stream and 886 * returns them as a {@code List}. This is a convenience method that should 887 * only be used for data sets that are small enough so that running out of 888 * memory isn't a concern. 889 * 890 * @param inputStream The input stream from which the entries should be 891 * read. The input stream will be closed before 892 * returning. 893 * 894 * @return A list of the entries read from the given input stream. 895 * 896 * @throws IOException If a problem occurs while attempting to read data 897 * from the input stream. 898 * 899 * @throws LDIFException If a problem is encountered while attempting to 900 * decode data read as LDIF. 901 */ 902 public static List<Entry> readEntries(final InputStream inputStream) 903 throws IOException, LDIFException 904 { 905 return readEntries(new LDIFReader(inputStream)); 906 } 907 908 909 910 /** 911 * Reads entries from the provided LDIF reader and returns them as a list. 912 * 913 * @param reader The reader from which the entries should be read. It will 914 * be closed before returning. 915 * 916 * @return A list of the entries read from the provided reader. 917 * 918 * @throws IOException If a problem was encountered while attempting to read 919 * data from the LDIF data source. 920 * 921 * @throws LDIFException If a problem is encountered while attempting to 922 * decode data read as LDIF. 923 */ 924 private static List<Entry> readEntries(final LDIFReader reader) 925 throws IOException, LDIFException 926 { 927 try 928 { 929 final ArrayList<Entry> entries = new ArrayList<Entry>(10); 930 while (true) 931 { 932 final Entry e = reader.readEntry(); 933 if (e == null) 934 { 935 break; 936 } 937 938 entries.add(e); 939 } 940 941 return entries; 942 } 943 finally 944 { 945 reader.close(); 946 } 947 } 948 949 950 951 /** 952 * Closes this LDIF reader and the underlying LDIF source. 953 * 954 * @throws IOException If a problem occurs while closing the underlying LDIF 955 * source. 956 */ 957 public void close() 958 throws IOException 959 { 960 reader.close(); 961 962 if (isAsync()) 963 { 964 // Closing the reader will trigger the LineReaderThread to complete, but 965 // not if it's blocked submitting the next UnparsedLDIFRecord. To avoid 966 // this, we clear out the completed output queue, which is larger than 967 // the input queue, so the LineReaderThread will stop reading and 968 // shutdown the asyncParser. 969 asyncParsedRecords.clear(); 970 } 971 } 972 973 974 975 /** 976 * Indicates whether to ignore any duplicate values encountered while reading 977 * LDIF records. 978 * 979 * @return {@code true} if duplicate values should be ignored, or 980 * {@code false} if any LDIF records containing duplicate values 981 * should be rejected. 982 * 983 * @deprecated Use the {@code getDuplicateValueBehavior} method instead. 984 */ 985 @Deprecated() 986 public boolean ignoreDuplicateValues() 987 { 988 return (duplicateValueBehavior == DuplicateValueBehavior.STRIP); 989 } 990 991 992 993 /** 994 * Specifies whether to ignore any duplicate values encountered while reading 995 * LDIF records. 996 * 997 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 998 * attribute values encountered while reading 999 * LDIF records. 1000 * 1001 * @deprecated Use the {@code setDuplicateValueBehavior} method instead. 1002 */ 1003 @Deprecated() 1004 public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues) 1005 { 1006 if (ignoreDuplicateValues) 1007 { 1008 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 1009 } 1010 else 1011 { 1012 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 1013 } 1014 } 1015 1016 1017 1018 /** 1019 * Retrieves the behavior that should be exhibited if the LDIF reader 1020 * encounters an entry with duplicate values. 1021 * 1022 * @return The behavior that should be exhibited if the LDIF reader 1023 * encounters an entry with duplicate values. 1024 */ 1025 public DuplicateValueBehavior getDuplicateValueBehavior() 1026 { 1027 return duplicateValueBehavior; 1028 } 1029 1030 1031 1032 /** 1033 * Specifies the behavior that should be exhibited if the LDIF reader 1034 * encounters an entry with duplicate values. 1035 * 1036 * @param duplicateValueBehavior The behavior that should be exhibited if 1037 * the LDIF reader encounters an entry with 1038 * duplicate values. 1039 */ 1040 public void setDuplicateValueBehavior( 1041 final DuplicateValueBehavior duplicateValueBehavior) 1042 { 1043 this.duplicateValueBehavior = duplicateValueBehavior; 1044 } 1045 1046 1047 1048 /** 1049 * Indicates whether to strip off any illegal trailing spaces that may appear 1050 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 1051 * specification strongly recommends that any value which legitimately 1052 * contains trailing spaces be base64-encoded, and any spaces which appear 1053 * after the end of non-base64-encoded values may therefore be considered 1054 * invalid. If any such trailing spaces are encountered in an LDIF record and 1055 * they are not to be stripped, then an {@code LDIFException} will be thrown 1056 * for that record. 1057 * <BR><BR> 1058 * Note that this applies only to spaces after the end of a value, and not to 1059 * spaces which may appear at the end of a line for a value that is wrapped 1060 * and continued on the next line. 1061 * 1062 * @return {@code true} if illegal trailing spaces should be stripped off, or 1063 * {@code false} if LDIF records containing illegal trailing spaces 1064 * should be rejected. 1065 * 1066 * @deprecated Use the {@code getTrailingSpaceBehavior} method instead. 1067 */ 1068 @Deprecated() 1069 public boolean stripTrailingSpaces() 1070 { 1071 return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP); 1072 } 1073 1074 1075 1076 /** 1077 * Specifies whether to strip off any illegal trailing spaces that may appear 1078 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 1079 * specification strongly recommends that any value which legitimately 1080 * contains trailing spaces be base64-encoded, and any spaces which appear 1081 * after the end of non-base64-encoded values may therefore be considered 1082 * invalid. If any such trailing spaces are encountered in an LDIF record and 1083 * they are not to be stripped, then an {@code LDIFException} will be thrown 1084 * for that record. 1085 * <BR><BR> 1086 * Note that this applies only to spaces after the end of a value, and not to 1087 * spaces which may appear at the end of a line for a value that is wrapped 1088 * and continued on the next line. 1089 * 1090 * @param stripTrailingSpaces Indicates whether to strip off any illegal 1091 * trailing spaces, or {@code false} if LDIF 1092 * records containing them should be rejected. 1093 * 1094 * @deprecated Use the {@code setTrailingSpaceBehavior} method instead. 1095 */ 1096 @Deprecated() 1097 public void setStripTrailingSpaces(final boolean stripTrailingSpaces) 1098 { 1099 trailingSpaceBehavior = stripTrailingSpaces 1100 ? TrailingSpaceBehavior.STRIP 1101 : TrailingSpaceBehavior.REJECT; 1102 } 1103 1104 1105 1106 /** 1107 * Retrieves the behavior that should be exhibited when encountering attribute 1108 * values which are not base64-encoded but contain trailing spaces. The LDIF 1109 * specification strongly recommends that any value which legitimately 1110 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1111 * may be configured to automatically strip these spaces, to preserve them, or 1112 * to reject any entry or change record containing them. 1113 * 1114 * @return The behavior that should be exhibited when encountering attribute 1115 * values which are not base64-encoded but contain trailing spaces. 1116 */ 1117 public TrailingSpaceBehavior getTrailingSpaceBehavior() 1118 { 1119 return trailingSpaceBehavior; 1120 } 1121 1122 1123 1124 /** 1125 * Specifies the behavior that should be exhibited when encountering attribute 1126 * values which are not base64-encoded but contain trailing spaces. The LDIF 1127 * specification strongly recommends that any value which legitimately 1128 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1129 * may be configured to automatically strip these spaces, to preserve them, or 1130 * to reject any entry or change record containing them. 1131 * 1132 * @param trailingSpaceBehavior The behavior that should be exhibited when 1133 * encountering attribute values which are not 1134 * base64-encoded but contain trailing spaces. 1135 */ 1136 public void setTrailingSpaceBehavior( 1137 final TrailingSpaceBehavior trailingSpaceBehavior) 1138 { 1139 this.trailingSpaceBehavior = trailingSpaceBehavior; 1140 } 1141 1142 1143 1144 /** 1145 * Retrieves the base path that will be prepended to relative paths in order 1146 * to obtain an absolute path. This will only be used for "file:" URLs that 1147 * have paths which do not begin with a slash. 1148 * 1149 * @return The base path that will be prepended to relative paths in order to 1150 * obtain an absolute path. 1151 */ 1152 public String getRelativeBasePath() 1153 { 1154 return relativeBasePath; 1155 } 1156 1157 1158 1159 /** 1160 * Specifies the base path that will be prepended to relative paths in order 1161 * to obtain an absolute path. This will only be used for "file:" URLs that 1162 * have paths which do not begin with a space. 1163 * 1164 * @param relativeBasePath The base path that will be prepended to relative 1165 * paths in order to obtain an absolute path. 1166 */ 1167 public void setRelativeBasePath(final String relativeBasePath) 1168 { 1169 setRelativeBasePath(new File(relativeBasePath)); 1170 } 1171 1172 1173 1174 /** 1175 * Specifies the base path that will be prepended to relative paths in order 1176 * to obtain an absolute path. This will only be used for "file:" URLs that 1177 * have paths which do not begin with a space. 1178 * 1179 * @param relativeBasePath The base path that will be prepended to relative 1180 * paths in order to obtain an absolute path. 1181 */ 1182 public void setRelativeBasePath(final File relativeBasePath) 1183 { 1184 final String path = relativeBasePath.getAbsolutePath(); 1185 if (path.endsWith(File.separator)) 1186 { 1187 this.relativeBasePath = path; 1188 } 1189 else 1190 { 1191 this.relativeBasePath = path + File.separator; 1192 } 1193 } 1194 1195 1196 1197 /** 1198 * Retrieves the schema that will be used when reading LDIF records, if 1199 * defined. 1200 * 1201 * @return The schema that will be used when reading LDIF records, or 1202 * {@code null} if no schema should be used and all attributes should 1203 * be treated as case-insensitive strings. 1204 */ 1205 public Schema getSchema() 1206 { 1207 return schema; 1208 } 1209 1210 1211 1212 /** 1213 * Specifies the schema that should be used when reading LDIF records. 1214 * 1215 * @param schema The schema that should be used when reading LDIF records, 1216 * or {@code null} if no schema should be used and all 1217 * attributes should be treated as case-insensitive strings. 1218 */ 1219 public void setSchema(final Schema schema) 1220 { 1221 this.schema = schema; 1222 } 1223 1224 1225 1226 /** 1227 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1228 * change record. 1229 * 1230 * @return The record read from the LDIF source, or {@code null} if there are 1231 * no more entries to be read. 1232 * 1233 * @throws IOException If a problem occurs while trying to read from the 1234 * LDIF source. 1235 * 1236 * @throws LDIFException If the data read could not be parsed as an entry or 1237 * an LDIF change record. 1238 */ 1239 public LDIFRecord readLDIFRecord() 1240 throws IOException, LDIFException 1241 { 1242 if (isAsync()) 1243 { 1244 return readLDIFRecordAsync(); 1245 } 1246 else 1247 { 1248 return readLDIFRecordInternal(); 1249 } 1250 } 1251 1252 1253 1254 /** 1255 * Reads an entry from the LDIF source. 1256 * 1257 * @return The entry read from the LDIF source, or {@code null} if there are 1258 * no more entries to be read. 1259 * 1260 * @throws IOException If a problem occurs while attempting to read from the 1261 * LDIF source. 1262 * 1263 * @throws LDIFException If the data read could not be parsed as an entry. 1264 */ 1265 public Entry readEntry() 1266 throws IOException, LDIFException 1267 { 1268 if (isAsync()) 1269 { 1270 return readEntryAsync(); 1271 } 1272 else 1273 { 1274 return readEntryInternal(); 1275 } 1276 } 1277 1278 1279 1280 /** 1281 * Reads an LDIF change record from the LDIF source. The LDIF record must 1282 * have a changetype. 1283 * 1284 * @return The change record read from the LDIF source, or {@code null} if 1285 * there are no more records to be read. 1286 * 1287 * @throws IOException If a problem occurs while attempting to read from the 1288 * LDIF source. 1289 * 1290 * @throws LDIFException If the data read could not be parsed as an LDIF 1291 * change record. 1292 */ 1293 public LDIFChangeRecord readChangeRecord() 1294 throws IOException, LDIFException 1295 { 1296 return readChangeRecord(false); 1297 } 1298 1299 1300 1301 /** 1302 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1303 * record does not have a changetype, then it may be assumed to be an add 1304 * change record. 1305 * 1306 * @param defaultAdd Indicates whether an LDIF record not containing a 1307 * changetype should be retrieved as an add change record. 1308 * If this is {@code false} and the record read does not 1309 * include a changetype, then an {@code LDIFException} 1310 * will be thrown. 1311 * 1312 * @return The change record read from the LDIF source, or {@code null} if 1313 * there are no more records to be read. 1314 * 1315 * @throws IOException If a problem occurs while attempting to read from the 1316 * LDIF source. 1317 * 1318 * @throws LDIFException If the data read could not be parsed as an LDIF 1319 * change record. 1320 */ 1321 public LDIFChangeRecord readChangeRecord(final boolean defaultAdd) 1322 throws IOException, LDIFException 1323 { 1324 if (isAsync()) 1325 { 1326 return readChangeRecordAsync(defaultAdd); 1327 } 1328 else 1329 { 1330 return readChangeRecordInternal(defaultAdd); 1331 } 1332 } 1333 1334 1335 1336 /** 1337 * Reads the next {@code LDIFRecord}, which was read and parsed by a different 1338 * thread. 1339 * 1340 * @return The next parsed record or {@code null} if there are no more 1341 * records to read. 1342 * 1343 * @throws IOException If IOException was thrown when reading or parsing 1344 * the record. 1345 * 1346 * @throws LDIFException If LDIFException was thrown parsing the record. 1347 */ 1348 private LDIFRecord readLDIFRecordAsync() 1349 throws IOException, LDIFException 1350 { 1351 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1352 LDIFRecord record = null; 1353 while (record == null) 1354 { 1355 result = readLDIFRecordResultAsync(); 1356 if (result == null) 1357 { 1358 return null; 1359 } 1360 1361 record = result.getOutput(); 1362 1363 // This is a special value that means we should skip this Entry. We have 1364 // to use something different than null because null means EOF. 1365 if (record == SKIP_ENTRY) 1366 { 1367 record = null; 1368 } 1369 } 1370 return record; 1371 } 1372 1373 1374 1375 /** 1376 * Reads an entry asynchronously from the LDIF source. 1377 * 1378 * @return The entry read from the LDIF source, or {@code null} if there are 1379 * no more entries to be read. 1380 * 1381 * @throws IOException If a problem occurs while attempting to read from the 1382 * LDIF source. 1383 * @throws LDIFException If the data read could not be parsed as an entry. 1384 */ 1385 private Entry readEntryAsync() 1386 throws IOException, LDIFException 1387 { 1388 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1389 LDIFRecord record = null; 1390 while (record == null) 1391 { 1392 result = readLDIFRecordResultAsync(); 1393 if (result == null) 1394 { 1395 return null; 1396 } 1397 1398 record = result.getOutput(); 1399 1400 // This is a special value that means we should skip this Entry. We have 1401 // to use something different than null because null means EOF. 1402 if (record == SKIP_ENTRY) 1403 { 1404 record = null; 1405 } 1406 } 1407 1408 if (record instanceof Entry) 1409 { 1410 return (Entry) record; 1411 } 1412 else if (record instanceof LDIFChangeRecord) 1413 { 1414 try 1415 { 1416 // Some LDIFChangeRecord can be converted to an Entry. This is really 1417 // an edge case though. 1418 return ((LDIFChangeRecord)record).toEntry(); 1419 } 1420 catch (LDIFException e) 1421 { 1422 debugException(e); 1423 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1424 throw new LDIFException(e.getExceptionMessage(), 1425 firstLineNumber, true, e); 1426 } 1427 } 1428 1429 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1430 "LDIFChangeRecord"); 1431 } 1432 1433 1434 1435 /** 1436 * Reads an LDIF change record from the LDIF source asynchronously. 1437 * Optionally, if the LDIF record does not have a changetype, then it may be 1438 * assumed to be an add change record. 1439 * 1440 * @param defaultAdd Indicates whether an LDIF record not containing a 1441 * changetype should be retrieved as an add change record. 1442 * If this is {@code false} and the record read does not 1443 * include a changetype, then an {@code LDIFException} will 1444 * be thrown. 1445 * 1446 * @return The change record read from the LDIF source, or {@code null} if 1447 * there are no more records to be read. 1448 * 1449 * @throws IOException If a problem occurs while attempting to read from the 1450 * LDIF source. 1451 * @throws LDIFException If the data read could not be parsed as an LDIF 1452 * change record. 1453 */ 1454 private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd) 1455 throws IOException, LDIFException 1456 { 1457 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1458 LDIFRecord record = null; 1459 while (record == null) 1460 { 1461 result = readLDIFRecordResultAsync(); 1462 if (result == null) 1463 { 1464 return null; 1465 } 1466 1467 record = result.getOutput(); 1468 1469 // This is a special value that means we should skip this Entry. We have 1470 // to use something different than null because null means EOF. 1471 if (record == SKIP_ENTRY) 1472 { 1473 record = null; 1474 } 1475 } 1476 1477 if (record instanceof LDIFChangeRecord) 1478 { 1479 return (LDIFChangeRecord) record; 1480 } 1481 else if (record instanceof Entry) 1482 { 1483 if (defaultAdd) 1484 { 1485 return new LDIFAddChangeRecord((Entry) record); 1486 } 1487 else 1488 { 1489 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1490 throw new LDIFException( 1491 ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber, 1492 true); 1493 } 1494 } 1495 1496 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1497 "LDIFChangeRecord"); 1498 } 1499 1500 1501 1502 /** 1503 * Reads the next LDIF record, which was read and parsed asynchronously by 1504 * separate threads. 1505 * 1506 * @return The next LDIF record or {@code null} if there are no more records. 1507 * 1508 * @throws IOException If a problem occurs while attempting to read from the 1509 * LDIF source. 1510 * 1511 * @throws LDIFException If the data read could not be parsed as an entry. 1512 */ 1513 private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync() 1514 throws IOException, LDIFException 1515 { 1516 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1517 1518 // If the asynchronous reading and parsing is complete, then we don't have 1519 // to block waiting for the next record to show up on the queue. If there 1520 // isn't a record there, then return null (EOF) right away. 1521 if (asyncParsingComplete.get()) 1522 { 1523 result = asyncParsedRecords.poll(); 1524 } 1525 else 1526 { 1527 try 1528 { 1529 // We probably could just do a asyncParsedRecords.take() here, but 1530 // there are some edge case error scenarios where 1531 // asyncParsingComplete might be set without a special EOF sentinel 1532 // Result enqueued. So to guard against this, we have a very cautious 1533 // polling interval of 1 second. During normal processing, we never 1534 // have to wait for this to expire, when there is something to do 1535 // (like shutdown). 1536 while ((result == null) && (!asyncParsingComplete.get())) 1537 { 1538 result = asyncParsedRecords.poll(1, TimeUnit.SECONDS); 1539 } 1540 1541 // There's a very small chance that we missed the value, so double-check 1542 if (result == null) 1543 { 1544 result = asyncParsedRecords.poll(); 1545 } 1546 } 1547 catch (InterruptedException e) 1548 { 1549 debugException(e); 1550 throw createIOExceptionWithCause(null, e); 1551 } 1552 } 1553 if (result == null) 1554 { 1555 return null; 1556 } 1557 1558 rethrow(result.getFailureCause()); 1559 1560 // Check if we reached the end of the input 1561 final UnparsedLDIFRecord unparsedRecord = result.getInput(); 1562 if (unparsedRecord.isEOF()) 1563 { 1564 // This might have been set already by the LineReaderThread, but 1565 // just in case it hasn't gotten to it yet, do so here. 1566 asyncParsingComplete.set(true); 1567 1568 // Enqueue this EOF result again for any other thread that might be 1569 // blocked in asyncParsedRecords.take() even though having multiple 1570 // threads call this method concurrently breaks the contract of this 1571 // class. 1572 try 1573 { 1574 asyncParsedRecords.put(result); 1575 } 1576 catch (InterruptedException e) 1577 { 1578 // We shouldn't ever get interrupted because the put won't ever block. 1579 // Once we are done reading, this is the only item left in the queue, 1580 // so we should always be able to re-enqueue it. 1581 debugException(e); 1582 } 1583 return null; 1584 } 1585 1586 return result; 1587 } 1588 1589 1590 1591 /** 1592 * Indicates whether this LDIF reader was constructed to perform asynchronous 1593 * processing. 1594 * 1595 * @return {@code true} if this LDIFReader was constructed to perform 1596 * asynchronous processing, or {@code false} if not. 1597 */ 1598 private boolean isAsync() 1599 { 1600 return isAsync; 1601 } 1602 1603 1604 1605 /** 1606 * If not {@code null}, rethrows the specified Throwable as either an 1607 * IOException or LDIFException. 1608 * 1609 * @param t The exception to rethrow. If it's {@code null}, then nothing 1610 * is thrown. 1611 * 1612 * @throws IOException If t is an IOException or a checked Exception that 1613 * is not an LDIFException. 1614 * @throws LDIFException If t is an LDIFException. 1615 */ 1616 static void rethrow(final Throwable t) 1617 throws IOException, LDIFException 1618 { 1619 if (t == null) 1620 { 1621 return; 1622 } 1623 1624 if (t instanceof IOException) 1625 { 1626 throw (IOException) t; 1627 } 1628 else if (t instanceof LDIFException) 1629 { 1630 throw (LDIFException) t; 1631 } 1632 else if (t instanceof RuntimeException) 1633 { 1634 throw (RuntimeException) t; 1635 } 1636 else if (t instanceof Error) 1637 { 1638 throw (Error) t; 1639 } 1640 else 1641 { 1642 throw createIOExceptionWithCause(null, t); 1643 } 1644 } 1645 1646 1647 1648 /** 1649 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1650 * change record. 1651 * 1652 * @return The record read from the LDIF source, or {@code null} if there are 1653 * no more entries to be read. 1654 * 1655 * @throws IOException If a problem occurs while trying to read from the 1656 * LDIF source. 1657 * @throws LDIFException If the data read could not be parsed as an entry or 1658 * an LDIF change record. 1659 */ 1660 private LDIFRecord readLDIFRecordInternal() 1661 throws IOException, LDIFException 1662 { 1663 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1664 return decodeRecord(unparsedRecord, relativeBasePath, schema); 1665 } 1666 1667 1668 1669 /** 1670 * Reads an entry from the LDIF source. 1671 * 1672 * @return The entry read from the LDIF source, or {@code null} if there are 1673 * no more entries to be read. 1674 * 1675 * @throws IOException If a problem occurs while attempting to read from the 1676 * LDIF source. 1677 * @throws LDIFException If the data read could not be parsed as an entry. 1678 */ 1679 private Entry readEntryInternal() 1680 throws IOException, LDIFException 1681 { 1682 Entry e = null; 1683 while (e == null) 1684 { 1685 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1686 if (unparsedRecord.isEOF()) 1687 { 1688 return null; 1689 } 1690 1691 e = decodeEntry(unparsedRecord, relativeBasePath); 1692 debugLDIFRead(e); 1693 1694 if (entryTranslator != null) 1695 { 1696 e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber()); 1697 } 1698 } 1699 return e; 1700 } 1701 1702 1703 1704 /** 1705 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1706 * record does not have a changetype, then it may be assumed to be an add 1707 * change record. 1708 * 1709 * @param defaultAdd Indicates whether an LDIF record not containing a 1710 * changetype should be retrieved as an add change record. 1711 * If this is {@code false} and the record read does not 1712 * include a changetype, then an {@code LDIFException} will 1713 * be thrown. 1714 * 1715 * @return The change record read from the LDIF source, or {@code null} if 1716 * there are no more records to be read. 1717 * 1718 * @throws IOException If a problem occurs while attempting to read from the 1719 * LDIF source. 1720 * @throws LDIFException If the data read could not be parsed as an LDIF 1721 * change record. 1722 */ 1723 private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd) 1724 throws IOException, LDIFException 1725 { 1726 LDIFChangeRecord r = null; 1727 while (r == null) 1728 { 1729 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1730 if (unparsedRecord.isEOF()) 1731 { 1732 return null; 1733 } 1734 1735 r = decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd, 1736 schema); 1737 debugLDIFRead(r); 1738 1739 if (changeRecordTranslator != null) 1740 { 1741 r = changeRecordTranslator.translate(r, 1742 unparsedRecord.getFirstLineNumber()); 1743 } 1744 } 1745 return r; 1746 } 1747 1748 1749 1750 /** 1751 * Reads a record (either an entry or a change record) from the LDIF source 1752 * and places it in the line list. 1753 * 1754 * @return The line number for the first line of the entry that was read. 1755 * 1756 * @throws IOException If a problem occurs while attempting to read from the 1757 * LDIF source. 1758 * 1759 * @throws LDIFException If the data read could not be parsed as a valid 1760 * LDIF record. 1761 */ 1762 private UnparsedLDIFRecord readUnparsedRecord() 1763 throws IOException, LDIFException 1764 { 1765 final ArrayList<StringBuilder> lineList = new ArrayList<StringBuilder>(20); 1766 boolean lastWasComment = false; 1767 long firstLineNumber = lineNumberCounter + 1; 1768 while (true) 1769 { 1770 final String line = reader.readLine(); 1771 lineNumberCounter++; 1772 1773 if (line == null) 1774 { 1775 // We've hit the end of the LDIF source. If we haven't read any entry 1776 // data, then return null. Otherwise, the last entry wasn't followed by 1777 // a blank line, which is OK, and we should decode that entry. 1778 if (lineList.isEmpty()) 1779 { 1780 return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0), 1781 duplicateValueBehavior, trailingSpaceBehavior, schema, -1); 1782 } 1783 else 1784 { 1785 break; 1786 } 1787 } 1788 1789 if (line.length() == 0) 1790 { 1791 // It's a blank line. If we have read entry data, then this signals the 1792 // end of the entry. Otherwise, it's an extra space between entries, 1793 // which is OK. 1794 lastWasComment = false; 1795 if (lineList.isEmpty()) 1796 { 1797 firstLineNumber++; 1798 continue; 1799 } 1800 else 1801 { 1802 break; 1803 } 1804 } 1805 1806 if (line.charAt(0) == ' ') 1807 { 1808 // The line starts with a space, which means that it must be a 1809 // continuation of the previous line. This is true even if the last 1810 // line was a comment. 1811 if (lastWasComment) 1812 { 1813 // What we've read is part of a comment, so we don't care about its 1814 // content. 1815 } 1816 else if (lineList.isEmpty()) 1817 { 1818 throw new LDIFException( 1819 ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter), 1820 lineNumberCounter, false); 1821 } 1822 else 1823 { 1824 lineList.get(lineList.size() - 1).append(line.substring(1)); 1825 lastWasComment = false; 1826 } 1827 } 1828 else if (line.charAt(0) == '#') 1829 { 1830 lastWasComment = true; 1831 } 1832 else 1833 { 1834 // We want to make sure that we skip over the "version:" line if it 1835 // exists, but that should only occur at the beginning of an entry where 1836 // it can't be confused with a possible "version" attribute. 1837 if (lineList.isEmpty() && line.startsWith("version:")) 1838 { 1839 lastWasComment = true; 1840 } 1841 else 1842 { 1843 lineList.add(new StringBuilder(line)); 1844 lastWasComment = false; 1845 } 1846 } 1847 } 1848 1849 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 1850 trailingSpaceBehavior, schema, firstLineNumber); 1851 } 1852 1853 1854 1855 /** 1856 * Decodes the provided set of LDIF lines as an entry. The provided set of 1857 * lines must contain exactly one entry. Long lines may be wrapped as per the 1858 * LDIF specification, and it is acceptable to have one or more blank lines 1859 * following the entry. A default trailing space behavior of 1860 * {@code TrailingSpaceBehavior#REJECT} will be used. 1861 * 1862 * @param ldifLines The set of lines that comprise the LDIF representation 1863 * of the entry. It must not be {@code null} or empty. 1864 * 1865 * @return The entry read from LDIF. 1866 * 1867 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1868 * entry. 1869 */ 1870 public static Entry decodeEntry(final String... ldifLines) 1871 throws LDIFException 1872 { 1873 final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP, 1874 TrailingSpaceBehavior.REJECT, null, ldifLines), 1875 DEFAULT_RELATIVE_BASE_PATH); 1876 debugLDIFRead(e); 1877 return e; 1878 } 1879 1880 1881 1882 /** 1883 * Decodes the provided set of LDIF lines as an entry. The provided set of 1884 * lines must contain exactly one entry. Long lines may be wrapped as per the 1885 * LDIF specification, and it is acceptable to have one or more blank lines 1886 * following the entry. A default trailing space behavior of 1887 * {@code TrailingSpaceBehavior#REJECT} will be used. 1888 * 1889 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1890 * attribute values encountered while parsing. 1891 * @param schema The schema to use when parsing the record, 1892 * if applicable. 1893 * @param ldifLines The set of lines that comprise the LDIF 1894 * representation of the entry. It must not be 1895 * {@code null} or empty. 1896 * 1897 * @return The entry read from LDIF. 1898 * 1899 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1900 * entry. 1901 */ 1902 public static Entry decodeEntry(final boolean ignoreDuplicateValues, 1903 final Schema schema, 1904 final String... ldifLines) 1905 throws LDIFException 1906 { 1907 return decodeEntry(ignoreDuplicateValues, TrailingSpaceBehavior.REJECT, 1908 schema, ldifLines); 1909 } 1910 1911 1912 1913 /** 1914 * Decodes the provided set of LDIF lines as an entry. The provided set of 1915 * lines must contain exactly one entry. Long lines may be wrapped as per the 1916 * LDIF specification, and it is acceptable to have one or more blank lines 1917 * following the entry. 1918 * 1919 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1920 * attribute values encountered while parsing. 1921 * @param trailingSpaceBehavior The behavior that should be exhibited when 1922 * encountering attribute values which are not 1923 * base64-encoded but contain trailing spaces. 1924 * It must not be {@code null}. 1925 * @param schema The schema to use when parsing the record, 1926 * if applicable. 1927 * @param ldifLines The set of lines that comprise the LDIF 1928 * representation of the entry. It must not be 1929 * {@code null} or empty. 1930 * 1931 * @return The entry read from LDIF. 1932 * 1933 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1934 * entry. 1935 */ 1936 public static Entry decodeEntry( 1937 final boolean ignoreDuplicateValues, 1938 final TrailingSpaceBehavior trailingSpaceBehavior, 1939 final Schema schema, 1940 final String... ldifLines) throws LDIFException 1941 { 1942 final Entry e = decodeEntry(prepareRecord( 1943 (ignoreDuplicateValues 1944 ? DuplicateValueBehavior.STRIP 1945 : DuplicateValueBehavior.REJECT), 1946 trailingSpaceBehavior, schema, ldifLines), 1947 DEFAULT_RELATIVE_BASE_PATH); 1948 debugLDIFRead(e); 1949 return e; 1950 } 1951 1952 1953 1954 /** 1955 * Decodes the provided set of LDIF lines as an LDIF change record. The 1956 * provided set of lines must contain exactly one change record and it must 1957 * include a changetype. Long lines may be wrapped as per the LDIF 1958 * specification, and it is acceptable to have one or more blank lines 1959 * following the entry. 1960 * 1961 * @param ldifLines The set of lines that comprise the LDIF representation 1962 * of the change record. It must not be {@code null} or 1963 * empty. 1964 * 1965 * @return The change record read from LDIF. 1966 * 1967 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1968 * change record. 1969 */ 1970 public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines) 1971 throws LDIFException 1972 { 1973 return decodeChangeRecord(false, ldifLines); 1974 } 1975 1976 1977 1978 /** 1979 * Decodes the provided set of LDIF lines as an LDIF change record. The 1980 * provided set of lines must contain exactly one change record. Long lines 1981 * may be wrapped as per the LDIF specification, and it is acceptable to have 1982 * one or more blank lines following the entry. 1983 * 1984 * @param defaultAdd Indicates whether an LDIF record not containing a 1985 * changetype should be retrieved as an add change record. 1986 * If this is {@code false} and the record read does not 1987 * include a changetype, then an {@code LDIFException} 1988 * will be thrown. 1989 * @param ldifLines The set of lines that comprise the LDIF representation 1990 * of the change record. It must not be {@code null} or 1991 * empty. 1992 * 1993 * @return The change record read from LDIF. 1994 * 1995 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1996 * change record. 1997 */ 1998 public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd, 1999 final String... ldifLines) 2000 throws LDIFException 2001 { 2002 final LDIFChangeRecord r = 2003 decodeChangeRecord( 2004 prepareRecord(DuplicateValueBehavior.STRIP, 2005 TrailingSpaceBehavior.REJECT, null, ldifLines), 2006 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 2007 debugLDIFRead(r); 2008 return r; 2009 } 2010 2011 2012 2013 /** 2014 * Decodes the provided set of LDIF lines as an LDIF change record. The 2015 * provided set of lines must contain exactly one change record. Long lines 2016 * may be wrapped as per the LDIF specification, and it is acceptable to have 2017 * one or more blank lines following the entry. 2018 * 2019 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 2020 * attribute values encountered while parsing. 2021 * @param schema The schema to use when processing the change 2022 * record, or {@code null} if no schema should 2023 * be used and all values should be treated as 2024 * case-insensitive strings. 2025 * @param defaultAdd Indicates whether an LDIF record not 2026 * containing a changetype should be retrieved 2027 * as an add change record. If this is 2028 * {@code false} and the record read does not 2029 * include a changetype, then an 2030 * {@code LDIFException} will be thrown. 2031 * @param ldifLines The set of lines that comprise the LDIF 2032 * representation of the change record. It 2033 * must not be {@code null} or empty. 2034 * 2035 * @return The change record read from LDIF. 2036 * 2037 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2038 * change record. 2039 */ 2040 public static LDIFChangeRecord decodeChangeRecord( 2041 final boolean ignoreDuplicateValues, 2042 final Schema schema, 2043 final boolean defaultAdd, 2044 final String... ldifLines) 2045 throws LDIFException 2046 { 2047 return decodeChangeRecord(ignoreDuplicateValues, 2048 TrailingSpaceBehavior.REJECT, schema, defaultAdd, ldifLines); 2049 } 2050 2051 2052 2053 /** 2054 * Decodes the provided set of LDIF lines as an LDIF change record. The 2055 * provided set of lines must contain exactly one change record. Long lines 2056 * may be wrapped as per the LDIF specification, and it is acceptable to have 2057 * one or more blank lines following the entry. 2058 * 2059 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 2060 * attribute values encountered while parsing. 2061 * @param trailingSpaceBehavior The behavior that should be exhibited when 2062 * encountering attribute values which are not 2063 * base64-encoded but contain trailing spaces. 2064 * It must not be {@code null}. 2065 * @param schema The schema to use when processing the change 2066 * record, or {@code null} if no schema should 2067 * be used and all values should be treated as 2068 * case-insensitive strings. 2069 * @param defaultAdd Indicates whether an LDIF record not 2070 * containing a changetype should be retrieved 2071 * as an add change record. If this is 2072 * {@code false} and the record read does not 2073 * include a changetype, then an 2074 * {@code LDIFException} will be thrown. 2075 * @param ldifLines The set of lines that comprise the LDIF 2076 * representation of the change record. It 2077 * must not be {@code null} or empty. 2078 * 2079 * @return The change record read from LDIF. 2080 * 2081 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2082 * change record. 2083 */ 2084 public static LDIFChangeRecord decodeChangeRecord( 2085 final boolean ignoreDuplicateValues, 2086 final TrailingSpaceBehavior trailingSpaceBehavior, 2087 final Schema schema, 2088 final boolean defaultAdd, 2089 final String... ldifLines) 2090 throws LDIFException 2091 { 2092 final LDIFChangeRecord r = decodeChangeRecord( 2093 prepareRecord( 2094 (ignoreDuplicateValues 2095 ? DuplicateValueBehavior.STRIP 2096 : DuplicateValueBehavior.REJECT), 2097 trailingSpaceBehavior, schema, ldifLines), 2098 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 2099 debugLDIFRead(r); 2100 return r; 2101 } 2102 2103 2104 2105 /** 2106 * Parses the provided set of lines into a list of {@code StringBuilder} 2107 * objects suitable for decoding into an entry or LDIF change record. 2108 * Comments will be ignored and wrapped lines will be unwrapped. 2109 * 2110 * @param duplicateValueBehavior The behavior that should be exhibited if 2111 * the LDIF reader encounters an entry with 2112 * duplicate values. 2113 * @param trailingSpaceBehavior The behavior that should be exhibited when 2114 * encountering attribute values which are not 2115 * base64-encoded but contain trailing spaces. 2116 * @param schema The schema to use when parsing the record, 2117 * if applicable. 2118 * @param ldifLines The set of lines that comprise the record 2119 * to decode. It must not be {@code null} or 2120 * empty. 2121 * 2122 * @return The prepared list of {@code StringBuilder} objects ready to be 2123 * decoded. 2124 * 2125 * @throws LDIFException If the provided lines do not contain valid LDIF 2126 * content. 2127 */ 2128 private static UnparsedLDIFRecord prepareRecord( 2129 final DuplicateValueBehavior duplicateValueBehavior, 2130 final TrailingSpaceBehavior trailingSpaceBehavior, 2131 final Schema schema, final String... ldifLines) 2132 throws LDIFException 2133 { 2134 ensureNotNull(ldifLines); 2135 ensureFalse(ldifLines.length == 0, 2136 "LDIFReader.prepareRecord.ldifLines must not be empty."); 2137 2138 boolean lastWasComment = false; 2139 final ArrayList<StringBuilder> lineList = 2140 new ArrayList<StringBuilder>(ldifLines.length); 2141 for (int i=0; i < ldifLines.length; i++) 2142 { 2143 final String line = ldifLines[i]; 2144 if (line.length() == 0) 2145 { 2146 // This is only acceptable if there are no more non-empty lines in the 2147 // array. 2148 for (int j=i+1; j < ldifLines.length; j++) 2149 { 2150 if (ldifLines[j].length() > 0) 2151 { 2152 throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true, 2153 ldifLines, null); 2154 } 2155 2156 // If we've gotten here, then we know that we're at the end of the 2157 // entry. If we have read data, then we can decode it as an entry. 2158 // Otherwise, there was no real data in the provided LDIF lines. 2159 if (lineList.isEmpty()) 2160 { 2161 throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true, 2162 ldifLines, null); 2163 } 2164 else 2165 { 2166 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2167 trailingSpaceBehavior, schema, 0); 2168 } 2169 } 2170 } 2171 2172 if (line.charAt(0) == ' ') 2173 { 2174 if (i > 0) 2175 { 2176 if (! lastWasComment) 2177 { 2178 lineList.get(lineList.size() - 1).append(line.substring(1)); 2179 } 2180 } 2181 else 2182 { 2183 throw new LDIFException( 2184 ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0, 2185 true, ldifLines, null); 2186 } 2187 } 2188 else if (line.charAt(0) == '#') 2189 { 2190 lastWasComment = true; 2191 } 2192 else 2193 { 2194 lineList.add(new StringBuilder(line)); 2195 lastWasComment = false; 2196 } 2197 } 2198 2199 if (lineList.isEmpty()) 2200 { 2201 throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null); 2202 } 2203 else 2204 { 2205 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2206 trailingSpaceBehavior, schema, 0); 2207 } 2208 } 2209 2210 2211 2212 /** 2213 * Decodes the unparsed record that was read from the LDIF source. It may be 2214 * either an entry or an LDIF change record. 2215 * 2216 * @param unparsedRecord The unparsed LDIF record that was read from the 2217 * input. It must not be {@code null} or empty. 2218 * @param relativeBasePath The base path that will be prepended to relative 2219 * paths in order to obtain an absolute path. 2220 * @param schema The schema to use when parsing. 2221 * 2222 * @return The parsed record, or {@code null} if there are no more entries to 2223 * be read. 2224 * 2225 * @throws LDIFException If the data read could not be parsed as an entry or 2226 * an LDIF change record. 2227 */ 2228 private static LDIFRecord decodeRecord( 2229 final UnparsedLDIFRecord unparsedRecord, 2230 final String relativeBasePath, 2231 final Schema schema) 2232 throws LDIFException 2233 { 2234 // If there was an error reading from the input, then we rethrow it here. 2235 final Exception readError = unparsedRecord.getFailureCause(); 2236 if (readError != null) 2237 { 2238 if (readError instanceof LDIFException) 2239 { 2240 // If the error was an LDIFException, which will normally be the case, 2241 // then rethrow it with all of the same state. We could just 2242 // throw (LDIFException) readError; 2243 // but that's considered bad form. 2244 final LDIFException ldifEx = (LDIFException) readError; 2245 throw new LDIFException(ldifEx.getMessage(), 2246 ldifEx.getLineNumber(), 2247 ldifEx.mayContinueReading(), 2248 ldifEx.getDataLines(), 2249 ldifEx.getCause()); 2250 } 2251 else 2252 { 2253 throw new LDIFException(getExceptionMessage(readError), 2254 -1, true, readError); 2255 } 2256 } 2257 2258 if (unparsedRecord.isEOF()) 2259 { 2260 return null; 2261 } 2262 2263 final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList(); 2264 if (unparsedRecord.getLineList() == null) 2265 { 2266 return null; // We can get here if there was an error reading the lines. 2267 } 2268 2269 final LDIFRecord r; 2270 if (lineList.size() == 1) 2271 { 2272 r = decodeEntry(unparsedRecord, relativeBasePath); 2273 } 2274 else 2275 { 2276 final String lowerSecondLine = toLowerCase(lineList.get(1).toString()); 2277 if (lowerSecondLine.startsWith("control:") || 2278 lowerSecondLine.startsWith("changetype:")) 2279 { 2280 r = decodeChangeRecord(unparsedRecord, relativeBasePath, true, schema); 2281 } 2282 else 2283 { 2284 r = decodeEntry(unparsedRecord, relativeBasePath); 2285 } 2286 } 2287 2288 debugLDIFRead(r); 2289 return r; 2290 } 2291 2292 2293 2294 /** 2295 * Decodes the provided set of LDIF lines as an entry. The provided list must 2296 * not contain any blank lines or comments, and lines are not allowed to be 2297 * wrapped. 2298 * 2299 * @param unparsedRecord The unparsed LDIF record that was read from the 2300 * input. It must not be {@code null} or empty. 2301 * @param relativeBasePath The base path that will be prepended to relative 2302 * paths in order to obtain an absolute path. 2303 * 2304 * @return The entry read from LDIF. 2305 * 2306 * @throws LDIFException If the provided LDIF data cannot be read as an 2307 * entry. 2308 */ 2309 private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord, 2310 final String relativeBasePath) 2311 throws LDIFException 2312 { 2313 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2314 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2315 2316 final Iterator<StringBuilder> iterator = ldifLines.iterator(); 2317 2318 // The first line must start with either "version:" or "dn:". If the first 2319 // line starts with "version:" then the second must start with "dn:". 2320 StringBuilder line = iterator.next(); 2321 handleTrailingSpaces(line, null, firstLineNumber, 2322 unparsedRecord.getTrailingSpaceBehavior()); 2323 int colonPos = line.indexOf(":"); 2324 if ((colonPos > 0) && 2325 line.substring(0, colonPos).equalsIgnoreCase("version")) 2326 { 2327 // The first line is "version:". Under most conditions, this will be 2328 // handled by the LDIF reader, but this can happen if you call 2329 // decodeEntry with a set of data that includes a version. At any rate, 2330 // read the next line, which must specify the DN. 2331 line = iterator.next(); 2332 handleTrailingSpaces(line, null, firstLineNumber, 2333 unparsedRecord.getTrailingSpaceBehavior()); 2334 } 2335 2336 colonPos = line.indexOf(":"); 2337 if ((colonPos < 0) || 2338 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2339 { 2340 throw new LDIFException( 2341 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2342 firstLineNumber, true, ldifLines, null); 2343 } 2344 2345 final String dn; 2346 final int length = line.length(); 2347 if (length == (colonPos+1)) 2348 { 2349 // The colon was the last character on the line. This is acceptable and 2350 // indicates that the entry has the null DN. 2351 dn = ""; 2352 } 2353 else if (line.charAt(colonPos+1) == ':') 2354 { 2355 // Skip over any spaces leading up to the value, and then the rest of the 2356 // string is the base64-encoded DN. 2357 int pos = colonPos+2; 2358 while ((pos < length) && (line.charAt(pos) == ' ')) 2359 { 2360 pos++; 2361 } 2362 2363 try 2364 { 2365 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2366 dn = new String(dnBytes, "UTF-8"); 2367 } 2368 catch (final ParseException pe) 2369 { 2370 debugException(pe); 2371 throw new LDIFException( 2372 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2373 pe.getMessage()), 2374 firstLineNumber, true, ldifLines, pe); 2375 } 2376 catch (final Exception e) 2377 { 2378 debugException(e); 2379 throw new LDIFException( 2380 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e), 2381 firstLineNumber, true, ldifLines, e); 2382 } 2383 } 2384 else 2385 { 2386 // Skip over any spaces leading up to the value, and then the rest of the 2387 // string is the DN. 2388 int pos = colonPos+1; 2389 while ((pos < length) && (line.charAt(pos) == ' ')) 2390 { 2391 pos++; 2392 } 2393 2394 dn = line.substring(pos); 2395 } 2396 2397 2398 // The remaining lines must be the attributes for the entry. However, we 2399 // will allow the case in which an entry does not have any attributes, to be 2400 // able to support reading search result entries in which no attributes were 2401 // returned. 2402 if (! iterator.hasNext()) 2403 { 2404 return new Entry(dn, unparsedRecord.getSchema()); 2405 } 2406 2407 return new Entry(dn, unparsedRecord.getSchema(), 2408 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2409 unparsedRecord.getTrailingSpaceBehavior(), 2410 unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath, 2411 firstLineNumber)); 2412 } 2413 2414 2415 2416 /** 2417 * Decodes the provided set of LDIF lines as a change record. The provided 2418 * list must not contain any blank lines or comments, and lines are not 2419 * allowed to be wrapped. 2420 * 2421 * @param unparsedRecord The unparsed LDIF record that was read from the 2422 * input. It must not be {@code null} or empty. 2423 * @param relativeBasePath The base path that will be prepended to relative 2424 * paths in order to obtain an absolute path. 2425 * @param defaultAdd Indicates whether an LDIF record not containing a 2426 * changetype should be retrieved as an add change 2427 * record. If this is {@code false} and the record 2428 * read does not include a changetype, then an 2429 * {@code LDIFException} will be thrown. 2430 * @param schema The schema to use in parsing. 2431 * 2432 * @return The change record read from LDIF. 2433 * 2434 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2435 * change record. 2436 */ 2437 private static LDIFChangeRecord decodeChangeRecord( 2438 final UnparsedLDIFRecord unparsedRecord, 2439 final String relativeBasePath, 2440 final boolean defaultAdd, 2441 final Schema schema) 2442 throws LDIFException 2443 { 2444 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2445 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2446 2447 Iterator<StringBuilder> iterator = ldifLines.iterator(); 2448 2449 // The first line must start with either "version:" or "dn:". If the first 2450 // line starts with "version:" then the second must start with "dn:". 2451 StringBuilder line = iterator.next(); 2452 handleTrailingSpaces(line, null, firstLineNumber, 2453 unparsedRecord.getTrailingSpaceBehavior()); 2454 int colonPos = line.indexOf(":"); 2455 int linesRead = 1; 2456 if ((colonPos > 0) && 2457 line.substring(0, colonPos).equalsIgnoreCase("version")) 2458 { 2459 // The first line is "version:". Under most conditions, this will be 2460 // handled by the LDIF reader, but this can happen if you call 2461 // decodeEntry with a set of data that includes a version. At any rate, 2462 // read the next line, which must specify the DN. 2463 line = iterator.next(); 2464 linesRead++; 2465 handleTrailingSpaces(line, null, firstLineNumber, 2466 unparsedRecord.getTrailingSpaceBehavior()); 2467 } 2468 2469 colonPos = line.indexOf(":"); 2470 if ((colonPos < 0) || 2471 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2472 { 2473 throw new LDIFException( 2474 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2475 firstLineNumber, true, ldifLines, null); 2476 } 2477 2478 final String dn; 2479 int length = line.length(); 2480 if (length == (colonPos+1)) 2481 { 2482 // The colon was the last character on the line. This is acceptable and 2483 // indicates that the entry has the null DN. 2484 dn = ""; 2485 } 2486 else if (line.charAt(colonPos+1) == ':') 2487 { 2488 // Skip over any spaces leading up to the value, and then the rest of the 2489 // string is the base64-encoded DN. 2490 int pos = colonPos+2; 2491 while ((pos < length) && (line.charAt(pos) == ' ')) 2492 { 2493 pos++; 2494 } 2495 2496 try 2497 { 2498 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2499 dn = new String(dnBytes, "UTF-8"); 2500 } 2501 catch (final ParseException pe) 2502 { 2503 debugException(pe); 2504 throw new LDIFException( 2505 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2506 pe.getMessage()), 2507 firstLineNumber, true, ldifLines, pe); 2508 } 2509 catch (final Exception e) 2510 { 2511 debugException(e); 2512 throw new LDIFException( 2513 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2514 e), 2515 firstLineNumber, true, ldifLines, e); 2516 } 2517 } 2518 else 2519 { 2520 // Skip over any spaces leading up to the value, and then the rest of the 2521 // string is the DN. 2522 int pos = colonPos+1; 2523 while ((pos < length) && (line.charAt(pos) == ' ')) 2524 { 2525 pos++; 2526 } 2527 2528 dn = line.substring(pos); 2529 } 2530 2531 2532 // An LDIF change record may contain zero or more controls, with the end of 2533 // the controls signified by the changetype. The changetype element must be 2534 // present, unless defaultAdd is true in which case the first thing that is 2535 // neither control or changetype will trigger the start of add attribute 2536 // parsing. 2537 if (! iterator.hasNext()) 2538 { 2539 throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber), 2540 firstLineNumber, true, ldifLines, null); 2541 } 2542 2543 String changeType = null; 2544 ArrayList<Control> controls = null; 2545 while (true) 2546 { 2547 line = iterator.next(); 2548 handleTrailingSpaces(line, dn, firstLineNumber, 2549 unparsedRecord.getTrailingSpaceBehavior()); 2550 colonPos = line.indexOf(":"); 2551 if (colonPos < 0) 2552 { 2553 throw new LDIFException( 2554 ERR_READ_CR_SECOND_LINE_MISSING_COLON.get(firstLineNumber), 2555 firstLineNumber, true, ldifLines, null); 2556 } 2557 2558 final String token = toLowerCase(line.substring(0, colonPos)); 2559 if (token.equals("control")) 2560 { 2561 if (controls == null) 2562 { 2563 controls = new ArrayList<Control>(5); 2564 } 2565 2566 controls.add(decodeControl(line, colonPos, firstLineNumber, ldifLines, 2567 relativeBasePath)); 2568 } 2569 else if (token.equals("changetype")) 2570 { 2571 changeType = 2572 decodeChangeType(line, colonPos, firstLineNumber, ldifLines); 2573 break; 2574 } 2575 else if (defaultAdd) 2576 { 2577 // The line we read wasn't a control or changetype declaration, so we'll 2578 // assume it's an attribute in an add record. However, we're not ready 2579 // for that yet, and since we can't rewind an iterator we'll create a 2580 // new one that hasn't yet gotten to this line. 2581 changeType = "add"; 2582 iterator = ldifLines.iterator(); 2583 for (int i=0; i < linesRead; i++) 2584 { 2585 iterator.next(); 2586 } 2587 break; 2588 } 2589 else 2590 { 2591 throw new LDIFException( 2592 ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CONTROL_OR_CT.get( 2593 firstLineNumber), 2594 firstLineNumber, true, ldifLines, null); 2595 } 2596 2597 linesRead++; 2598 } 2599 2600 2601 // Make sure that the change type is acceptable and then decode the rest of 2602 // the change record accordingly. 2603 final String lowerChangeType = toLowerCase(changeType); 2604 if (lowerChangeType.equals("add")) 2605 { 2606 // There must be at least one more line. If not, then that's an error. 2607 // Otherwise, parse the rest of the data as attribute-value pairs. 2608 if (iterator.hasNext()) 2609 { 2610 final Collection<Attribute> attrs = 2611 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2612 unparsedRecord.getTrailingSpaceBehavior(), 2613 unparsedRecord.getSchema(), ldifLines, iterator, 2614 relativeBasePath, firstLineNumber); 2615 final Attribute[] attributes = new Attribute[attrs.size()]; 2616 final Iterator<Attribute> attrIterator = attrs.iterator(); 2617 for (int i=0; i < attributes.length; i++) 2618 { 2619 attributes[i] = attrIterator.next(); 2620 } 2621 2622 return new LDIFAddChangeRecord(dn, attributes, controls); 2623 } 2624 else 2625 { 2626 throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber), 2627 firstLineNumber, true, ldifLines, null); 2628 } 2629 } 2630 else if (lowerChangeType.equals("delete")) 2631 { 2632 // There shouldn't be any more data. If there is, then that's an error. 2633 // Otherwise, we can just return the delete change record with what we 2634 // already know. 2635 if (iterator.hasNext()) 2636 { 2637 throw new LDIFException( 2638 ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber), 2639 firstLineNumber, true, ldifLines, null); 2640 } 2641 else 2642 { 2643 return new LDIFDeleteChangeRecord(dn, controls); 2644 } 2645 } 2646 else if (lowerChangeType.equals("modify")) 2647 { 2648 // There must be at least one more line. If not, then that's an error. 2649 // Otherwise, parse the rest of the data as a set of modifications. 2650 if (iterator.hasNext()) 2651 { 2652 final Modification[] mods = parseModifications(dn, 2653 unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator, 2654 firstLineNumber, schema); 2655 return new LDIFModifyChangeRecord(dn, mods, controls); 2656 } 2657 else 2658 { 2659 throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber), 2660 firstLineNumber, true, ldifLines, null); 2661 } 2662 } 2663 else if (lowerChangeType.equals("moddn") || 2664 lowerChangeType.equals("modrdn")) 2665 { 2666 // There must be at least one more line. If not, then that's an error. 2667 // Otherwise, parse the rest of the data as a set of modifications. 2668 if (iterator.hasNext()) 2669 { 2670 return parseModifyDNChangeRecord(ldifLines, iterator, dn, controls, 2671 unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber); 2672 } 2673 else 2674 { 2675 throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber), 2676 firstLineNumber, true, ldifLines, null); 2677 } 2678 } 2679 else 2680 { 2681 throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType, 2682 firstLineNumber), 2683 firstLineNumber, true, ldifLines, null); 2684 } 2685 } 2686 2687 2688 2689 /** 2690 * Decodes information about a control from the provided line. 2691 * 2692 * @param line The line to process. 2693 * @param colonPos The position of the colon that separates the 2694 * control token string from tbe encoded control. 2695 * @param firstLineNumber The line number for the start of the record. 2696 * @param ldifLines The lines that comprise the LDIF representation 2697 * of the full record being parsed. 2698 * @param relativeBasePath The base path that will be prepended to relative 2699 * paths in order to obtain an absolute path. 2700 * 2701 * @return The decoded control. 2702 * 2703 * @throws LDIFException If a problem is encountered while trying to decode 2704 * the changetype. 2705 */ 2706 private static Control decodeControl(final StringBuilder line, 2707 final int colonPos, 2708 final long firstLineNumber, 2709 final ArrayList<StringBuilder> ldifLines, 2710 final String relativeBasePath) 2711 throws LDIFException 2712 { 2713 final String controlString; 2714 int length = line.length(); 2715 if (length == (colonPos+1)) 2716 { 2717 // The colon was the last character on the line. This is not 2718 // acceptable. 2719 throw new LDIFException( 2720 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2721 firstLineNumber, true, ldifLines, null); 2722 } 2723 else if (line.charAt(colonPos+1) == ':') 2724 { 2725 // Skip over any spaces leading up to the value, and then the rest of 2726 // the string is the base64-encoded control representation. This is 2727 // unusual and unnecessary, but is nevertheless acceptable. 2728 int pos = colonPos+2; 2729 while ((pos < length) && (line.charAt(pos) == ' ')) 2730 { 2731 pos++; 2732 } 2733 2734 try 2735 { 2736 final byte[] controlBytes = Base64.decode(line.substring(pos)); 2737 controlString = new String(controlBytes, "UTF-8"); 2738 } 2739 catch (final ParseException pe) 2740 { 2741 debugException(pe); 2742 throw new LDIFException( 2743 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get( 2744 firstLineNumber, pe.getMessage()), 2745 firstLineNumber, true, ldifLines, pe); 2746 } 2747 catch (final Exception e) 2748 { 2749 debugException(e); 2750 throw new LDIFException( 2751 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(firstLineNumber, e), 2752 firstLineNumber, true, ldifLines, e); 2753 } 2754 } 2755 else 2756 { 2757 // Skip over any spaces leading up to the value, and then the rest of 2758 // the string is the encoded control. 2759 int pos = colonPos+1; 2760 while ((pos < length) && (line.charAt(pos) == ' ')) 2761 { 2762 pos++; 2763 } 2764 2765 controlString = line.substring(pos); 2766 } 2767 2768 // If the resulting control definition is empty, then that's invalid. 2769 if (controlString.length() == 0) 2770 { 2771 throw new LDIFException( 2772 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2773 firstLineNumber, true, ldifLines, null); 2774 } 2775 2776 2777 // The first element of the control must be the OID, and it must be followed 2778 // by a space (to separate it from the criticality), a colon (to separate it 2779 // from the value and indicate a default criticality of false), or the end 2780 // of the line (to indicate a default criticality of false and no value). 2781 String oid = null; 2782 boolean hasCriticality = false; 2783 boolean hasValue = false; 2784 int pos = 0; 2785 length = controlString.length(); 2786 while (pos < length) 2787 { 2788 final char c = controlString.charAt(pos); 2789 if (c == ':') 2790 { 2791 // This indicates that there is no criticality and that the value 2792 // immediately follows the OID. 2793 oid = controlString.substring(0, pos++); 2794 hasValue = true; 2795 break; 2796 } 2797 else if (c == ' ') 2798 { 2799 // This indicates that there is a criticality. We don't know anything 2800 // about the presence of a value yet. 2801 oid = controlString.substring(0, pos++); 2802 hasCriticality = true; 2803 break; 2804 } 2805 else 2806 { 2807 pos++; 2808 } 2809 } 2810 2811 if (oid == null) 2812 { 2813 // This indicates that the string representation of the control is only 2814 // the OID. 2815 return new Control(controlString, false); 2816 } 2817 2818 2819 // See if we need to read the criticality. If so, then do so now. 2820 // Otherwise, assume a default criticality of false. 2821 final boolean isCritical; 2822 if (hasCriticality) 2823 { 2824 // Skip over any spaces before the criticality. 2825 while (controlString.charAt(pos) == ' ') 2826 { 2827 pos++; 2828 } 2829 2830 // Read until we find a colon or the end of the string. 2831 final int criticalityStartPos = pos; 2832 while (pos < length) 2833 { 2834 final char c = controlString.charAt(pos); 2835 if (c == ':') 2836 { 2837 hasValue = true; 2838 break; 2839 } 2840 else 2841 { 2842 pos++; 2843 } 2844 } 2845 2846 final String criticalityString = 2847 toLowerCase(controlString.substring(criticalityStartPos, pos)); 2848 if (criticalityString.equals("true")) 2849 { 2850 isCritical = true; 2851 } 2852 else if (criticalityString.equals("false")) 2853 { 2854 isCritical = false; 2855 } 2856 else 2857 { 2858 throw new LDIFException( 2859 ERR_READ_CONTROL_LINE_INVALID_CRITICALITY.get(criticalityString, 2860 firstLineNumber), 2861 firstLineNumber, true, ldifLines, null); 2862 } 2863 2864 if (hasValue) 2865 { 2866 pos++; 2867 } 2868 } 2869 else 2870 { 2871 isCritical = false; 2872 } 2873 2874 // See if we need to read the value. If so, then do so now. It may be 2875 // a string, or it may be base64-encoded. It could conceivably even be read 2876 // from a URL. 2877 final ASN1OctetString value; 2878 if (hasValue) 2879 { 2880 // The character immediately after the colon that precedes the value may 2881 // be one of the following: 2882 // - A second colon (optionally followed by a single space) to indicate 2883 // that the value is base64-encoded. 2884 // - A less-than symbol to indicate that the value should be read from a 2885 // location specified by a URL. 2886 // - A single space that precedes the non-base64-encoded value. 2887 // - The first character of the non-base64-encoded value. 2888 switch (controlString.charAt(pos)) 2889 { 2890 case ':': 2891 try 2892 { 2893 if (controlString.length() == (pos+1)) 2894 { 2895 value = new ASN1OctetString(); 2896 } 2897 else if (controlString.charAt(pos+1) == ' ') 2898 { 2899 value = new ASN1OctetString( 2900 Base64.decode(controlString.substring(pos+2))); 2901 } 2902 else 2903 { 2904 value = new ASN1OctetString( 2905 Base64.decode(controlString.substring(pos+1))); 2906 } 2907 } 2908 catch (final Exception e) 2909 { 2910 debugException(e); 2911 throw new LDIFException( 2912 ERR_READ_CONTROL_LINE_CANNOT_BASE64_DECODE_VALUE.get( 2913 firstLineNumber, getExceptionMessage(e)), 2914 firstLineNumber, true, ldifLines, e); 2915 } 2916 break; 2917 case '<': 2918 try 2919 { 2920 final String urlString; 2921 if (controlString.charAt(pos+1) == ' ') 2922 { 2923 urlString = controlString.substring(pos+2); 2924 } 2925 else 2926 { 2927 urlString = controlString.substring(pos+1); 2928 } 2929 value = new ASN1OctetString(retrieveURLBytes(urlString, 2930 relativeBasePath, firstLineNumber)); 2931 } 2932 catch (final Exception e) 2933 { 2934 debugException(e); 2935 throw new LDIFException( 2936 ERR_READ_CONTROL_LINE_CANNOT_RETRIEVE_VALUE_FROM_URL.get( 2937 firstLineNumber, getExceptionMessage(e)), 2938 firstLineNumber, true, ldifLines, e); 2939 } 2940 break; 2941 case ' ': 2942 value = new ASN1OctetString(controlString.substring(pos+1)); 2943 break; 2944 default: 2945 value = new ASN1OctetString(controlString.substring(pos)); 2946 break; 2947 } 2948 } 2949 else 2950 { 2951 value = null; 2952 } 2953 2954 return new Control(oid, isCritical, value); 2955 } 2956 2957 2958 2959 /** 2960 * Decodes the changetype element from the provided line. 2961 * 2962 * @param line The line to process. 2963 * @param colonPos The position of the colon that separates the 2964 * changetype string from its value. 2965 * @param firstLineNumber The line number for the start of the record. 2966 * @param ldifLines The lines that comprise the LDIF representation of 2967 * the full record being parsed. 2968 * 2969 * @return The decoded changetype string. 2970 * 2971 * @throws LDIFException If a problem is encountered while trying to decode 2972 * the changetype. 2973 */ 2974 private static String decodeChangeType(final StringBuilder line, 2975 final int colonPos, final long firstLineNumber, 2976 final ArrayList<StringBuilder> ldifLines) 2977 throws LDIFException 2978 { 2979 final int length = line.length(); 2980 if (length == (colonPos+1)) 2981 { 2982 // The colon was the last character on the line. This is not 2983 // acceptable. 2984 throw new LDIFException( 2985 ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber, 2986 true, ldifLines, null); 2987 } 2988 else if (line.charAt(colonPos+1) == ':') 2989 { 2990 // Skip over any spaces leading up to the value, and then the rest of 2991 // the string is the base64-encoded changetype. This is unusual and 2992 // unnecessary, but is nevertheless acceptable. 2993 int pos = colonPos+2; 2994 while ((pos < length) && (line.charAt(pos) == ' ')) 2995 { 2996 pos++; 2997 } 2998 2999 try 3000 { 3001 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 3002 return new String(changeTypeBytes, "UTF-8"); 3003 } 3004 catch (final ParseException pe) 3005 { 3006 debugException(pe); 3007 throw new LDIFException( 3008 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, 3009 pe.getMessage()), 3010 firstLineNumber, true, ldifLines, pe); 3011 } 3012 catch (final Exception e) 3013 { 3014 debugException(e); 3015 throw new LDIFException( 3016 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e), 3017 firstLineNumber, true, ldifLines, e); 3018 } 3019 } 3020 else 3021 { 3022 // Skip over any spaces leading up to the value, and then the rest of 3023 // the string is the changetype. 3024 int pos = colonPos+1; 3025 while ((pos < length) && (line.charAt(pos) == ' ')) 3026 { 3027 pos++; 3028 } 3029 3030 return line.substring(pos); 3031 } 3032 } 3033 3034 3035 3036 /** 3037 * Parses the data available through the provided iterator as a collection of 3038 * attributes suitable for use in an entry or an add change record. 3039 * 3040 * @param dn The DN of the record being read. 3041 * @param duplicateValueBehavior The behavior that should be exhibited if 3042 * the LDIF reader encounters an entry with 3043 * duplicate values. 3044 * @param trailingSpaceBehavior The behavior that should be exhibited when 3045 * encountering attribute values which are not 3046 * base64-encoded but contain trailing spaces. 3047 * @param schema The schema to use when parsing the 3048 * attributes, or {@code null} if none is 3049 * needed. 3050 * @param ldifLines The lines that comprise the LDIF 3051 * representation of the full record being 3052 * parsed. 3053 * @param iterator The iterator to use to access the attribute 3054 * lines. 3055 * @param relativeBasePath The base path that will be prepended to 3056 * relative paths in order to obtain an 3057 * absolute path. 3058 * @param firstLineNumber The line number for the start of the 3059 * record. 3060 * 3061 * @return The collection of attributes that were read. 3062 * 3063 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3064 * set of attributes. 3065 */ 3066 private static ArrayList<Attribute> parseAttributes(final String dn, 3067 final DuplicateValueBehavior duplicateValueBehavior, 3068 final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema, 3069 final ArrayList<StringBuilder> ldifLines, 3070 final Iterator<StringBuilder> iterator, final String relativeBasePath, 3071 final long firstLineNumber) 3072 throws LDIFException 3073 { 3074 final LinkedHashMap<String,Object> attributes = 3075 new LinkedHashMap<String,Object>(ldifLines.size()); 3076 while (iterator.hasNext()) 3077 { 3078 final StringBuilder line = iterator.next(); 3079 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3080 final int colonPos = line.indexOf(":"); 3081 if (colonPos <= 0) 3082 { 3083 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 3084 firstLineNumber, true, ldifLines, null); 3085 } 3086 3087 final String attributeName = line.substring(0, colonPos); 3088 final String lowerName = toLowerCase(attributeName); 3089 3090 final MatchingRule matchingRule; 3091 if (schema == null) 3092 { 3093 matchingRule = CaseIgnoreStringMatchingRule.getInstance(); 3094 } 3095 else 3096 { 3097 matchingRule = 3098 MatchingRule.selectEqualityMatchingRule(attributeName, schema); 3099 } 3100 3101 Attribute attr; 3102 final LDIFAttribute ldifAttr; 3103 final Object attrObject = attributes.get(lowerName); 3104 if (attrObject == null) 3105 { 3106 attr = null; 3107 ldifAttr = null; 3108 } 3109 else 3110 { 3111 if (attrObject instanceof Attribute) 3112 { 3113 attr = (Attribute) attrObject; 3114 ldifAttr = new LDIFAttribute(attr.getName(), matchingRule, 3115 attr.getRawValues()[0]); 3116 attributes.put(lowerName, ldifAttr); 3117 } 3118 else 3119 { 3120 attr = null; 3121 ldifAttr = (LDIFAttribute) attrObject; 3122 } 3123 } 3124 3125 final int length = line.length(); 3126 if (length == (colonPos+1)) 3127 { 3128 // This means that the attribute has a zero-length value, which is 3129 // acceptable. 3130 if (attrObject == null) 3131 { 3132 attr = new Attribute(attributeName, matchingRule, ""); 3133 attributes.put(lowerName, attr); 3134 } 3135 else 3136 { 3137 try 3138 { 3139 if (! ldifAttr.addValue(new ASN1OctetString(), 3140 duplicateValueBehavior)) 3141 { 3142 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3143 { 3144 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3145 firstLineNumber, attributeName), firstLineNumber, true, 3146 ldifLines, null); 3147 } 3148 } 3149 } 3150 catch (LDAPException le) 3151 { 3152 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3153 firstLineNumber, attributeName, getExceptionMessage(le)), 3154 firstLineNumber, true, ldifLines, le); 3155 } 3156 } 3157 } 3158 else if (line.charAt(colonPos+1) == ':') 3159 { 3160 // Skip over any spaces leading up to the value, and then the rest of 3161 // the string is the base64-encoded attribute value. 3162 int pos = colonPos+2; 3163 while ((pos < length) && (line.charAt(pos) == ' ')) 3164 { 3165 pos++; 3166 } 3167 3168 try 3169 { 3170 final byte[] valueBytes = Base64.decode(line.substring(pos)); 3171 if (attrObject == null) 3172 { 3173 attr = new Attribute(attributeName, matchingRule, valueBytes); 3174 attributes.put(lowerName, attr); 3175 } 3176 else 3177 { 3178 try 3179 { 3180 if (! ldifAttr.addValue(new ASN1OctetString(valueBytes), 3181 duplicateValueBehavior)) 3182 { 3183 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3184 { 3185 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3186 firstLineNumber, attributeName), firstLineNumber, true, 3187 ldifLines, null); 3188 } 3189 } 3190 } 3191 catch (LDAPException le) 3192 { 3193 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3194 firstLineNumber, attributeName, getExceptionMessage(le)), 3195 firstLineNumber, true, ldifLines, le); 3196 } 3197 } 3198 } 3199 catch (final ParseException pe) 3200 { 3201 debugException(pe); 3202 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3203 attributeName, firstLineNumber, 3204 pe.getMessage()), 3205 firstLineNumber, true, ldifLines, pe); 3206 } 3207 } 3208 else if (line.charAt(colonPos+1) == '<') 3209 { 3210 // Skip over any spaces leading up to the value, and then the rest of 3211 // the string is a URL that indicates where to get the real content. 3212 // At the present time, we'll only support the file URLs. 3213 int pos = colonPos+2; 3214 while ((pos < length) && (line.charAt(pos) == ' ')) 3215 { 3216 pos++; 3217 } 3218 3219 final byte[] urlBytes; 3220 final String urlString = line.substring(pos); 3221 try 3222 { 3223 urlBytes = 3224 retrieveURLBytes(urlString, relativeBasePath, firstLineNumber); 3225 } 3226 catch (final Exception e) 3227 { 3228 debugException(e); 3229 throw new LDIFException( 3230 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3231 firstLineNumber, e), 3232 firstLineNumber, true, ldifLines, e); 3233 } 3234 3235 if (attrObject == null) 3236 { 3237 attr = new Attribute(attributeName, matchingRule, urlBytes); 3238 attributes.put(lowerName, attr); 3239 } 3240 else 3241 { 3242 try 3243 { 3244 if (! ldifAttr.addValue(new ASN1OctetString(urlBytes), 3245 duplicateValueBehavior)) 3246 { 3247 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3248 { 3249 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3250 firstLineNumber, attributeName), firstLineNumber, true, 3251 ldifLines, null); 3252 } 3253 } 3254 } 3255 catch (final LDIFException le) 3256 { 3257 debugException(le); 3258 throw le; 3259 } 3260 catch (final Exception e) 3261 { 3262 debugException(e); 3263 throw new LDIFException( 3264 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3265 firstLineNumber, e), 3266 firstLineNumber, true, ldifLines, e); 3267 } 3268 } 3269 } 3270 else 3271 { 3272 // Skip over any spaces leading up to the value, and then the rest of 3273 // the string is the value. 3274 int pos = colonPos+1; 3275 while ((pos < length) && (line.charAt(pos) == ' ')) 3276 { 3277 pos++; 3278 } 3279 3280 final String valueString = line.substring(pos); 3281 if (attrObject == null) 3282 { 3283 attr = new Attribute(attributeName, matchingRule, valueString); 3284 attributes.put(lowerName, attr); 3285 } 3286 else 3287 { 3288 try 3289 { 3290 if (! ldifAttr.addValue(new ASN1OctetString(valueString), 3291 duplicateValueBehavior)) 3292 { 3293 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3294 { 3295 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3296 firstLineNumber, attributeName), firstLineNumber, true, 3297 ldifLines, null); 3298 } 3299 } 3300 } 3301 catch (LDAPException le) 3302 { 3303 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3304 firstLineNumber, attributeName, getExceptionMessage(le)), 3305 firstLineNumber, true, ldifLines, le); 3306 } 3307 } 3308 } 3309 } 3310 3311 final ArrayList<Attribute> attrList = 3312 new ArrayList<Attribute>(attributes.size()); 3313 for (final Object o : attributes.values()) 3314 { 3315 if (o instanceof Attribute) 3316 { 3317 attrList.add((Attribute) o); 3318 } 3319 else 3320 { 3321 attrList.add(((LDIFAttribute) o).toAttribute()); 3322 } 3323 } 3324 3325 return attrList; 3326 } 3327 3328 3329 3330 /** 3331 * Retrieves the bytes that make up the file referenced by the given URL. 3332 * 3333 * @param urlString The string representation of the URL to retrieve. 3334 * @param relativeBasePath The base path that will be prepended to relative 3335 * paths in order to obtain an absolute path. 3336 * @param firstLineNumber The line number for the start of the record. 3337 * 3338 * @return The bytes contained in the specified file, or an empty array if 3339 * the specified file is empty. 3340 * 3341 * @throws LDIFException If the provided URL is malformed or references a 3342 * nonexistent file. 3343 * 3344 * @throws IOException If a problem is encountered while attempting to read 3345 * from the target file. 3346 */ 3347 private static byte[] retrieveURLBytes(final String urlString, 3348 final String relativeBasePath, 3349 final long firstLineNumber) 3350 throws LDIFException, IOException 3351 { 3352 int pos; 3353 String path; 3354 final String lowerURLString = toLowerCase(urlString); 3355 if (lowerURLString.startsWith("file:/")) 3356 { 3357 pos = 6; 3358 while ((pos < urlString.length()) && (urlString.charAt(pos) == '/')) 3359 { 3360 pos++; 3361 } 3362 3363 path = urlString.substring(pos-1); 3364 } 3365 else if (lowerURLString.startsWith("file:")) 3366 { 3367 // A file: URL that doesn't include a slash will be interpreted as a 3368 // relative path. 3369 path = relativeBasePath + urlString.substring(5); 3370 } 3371 else 3372 { 3373 throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(urlString), 3374 firstLineNumber, true); 3375 } 3376 3377 final File f = new File(path); 3378 if (! f.exists()) 3379 { 3380 throw new LDIFException( 3381 ERR_READ_URL_NO_SUCH_FILE.get(urlString, f.getAbsolutePath()), 3382 firstLineNumber, true); 3383 } 3384 3385 // In order to conserve memory, we'll only allow values to be read from 3386 // files no larger than 10 megabytes. 3387 final long fileSize = f.length(); 3388 if (fileSize > (10 * 1024 * 1024)) 3389 { 3390 throw new LDIFException( 3391 ERR_READ_URL_FILE_TOO_LARGE.get(urlString, f.getAbsolutePath(), 3392 (10*1024*1024)), 3393 firstLineNumber, true); 3394 } 3395 3396 int fileBytesRemaining = (int) fileSize; 3397 final byte[] fileData = new byte[(int) fileSize]; 3398 final FileInputStream fis = new FileInputStream(f); 3399 try 3400 { 3401 int fileBytesRead = 0; 3402 while (fileBytesRead < fileSize) 3403 { 3404 final int bytesRead = 3405 fis.read(fileData, fileBytesRead, fileBytesRemaining); 3406 if (bytesRead < 0) 3407 { 3408 // We hit the end of the file before we expected to. This shouldn't 3409 // happen unless the file size changed since we first looked at it, 3410 // which we won't allow. 3411 throw new LDIFException( 3412 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, 3413 f.getAbsolutePath()), 3414 firstLineNumber, true); 3415 } 3416 3417 fileBytesRead += bytesRead; 3418 fileBytesRemaining -= bytesRead; 3419 } 3420 3421 if (fis.read() != -1) 3422 { 3423 // There is still more data to read. This shouldn't happen unless the 3424 // file size changed since we first looked at it, which we won't allow. 3425 throw new LDIFException( 3426 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, f.getAbsolutePath()), 3427 firstLineNumber, true); 3428 } 3429 } 3430 finally 3431 { 3432 fis.close(); 3433 } 3434 3435 return fileData; 3436 } 3437 3438 3439 3440 /** 3441 * Parses the data available through the provided iterator into an array of 3442 * modifications suitable for use in a modify change record. 3443 * 3444 * @param dn The DN of the entry being parsed. 3445 * @param trailingSpaceBehavior The behavior that should be exhibited when 3446 * encountering attribute values which are not 3447 * base64-encoded but contain trailing spaces. 3448 * @param ldifLines The lines that comprise the LDIF 3449 * representation of the full record being 3450 * parsed. 3451 * @param iterator The iterator to use to access the 3452 * modification data. 3453 * @param firstLineNumber The line number for the start of the record. 3454 * @param schema The schema to use in processing. 3455 * 3456 * @return An array containing the modifications that were read. 3457 * 3458 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3459 * set of modifications. 3460 */ 3461 private static Modification[] parseModifications(final String dn, 3462 final TrailingSpaceBehavior trailingSpaceBehavior, 3463 final ArrayList<StringBuilder> ldifLines, 3464 final Iterator<StringBuilder> iterator, 3465 final long firstLineNumber, final Schema schema) 3466 throws LDIFException 3467 { 3468 final ArrayList<Modification> modList = 3469 new ArrayList<Modification>(ldifLines.size()); 3470 3471 while (iterator.hasNext()) 3472 { 3473 // The first line must start with "add:", "delete:", "replace:", or 3474 // "increment:" followed by an attribute name. 3475 StringBuilder line = iterator.next(); 3476 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3477 int colonPos = line.indexOf(":"); 3478 if (colonPos < 0) 3479 { 3480 throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber), 3481 firstLineNumber, true, ldifLines, null); 3482 } 3483 3484 final ModificationType modType; 3485 final String modTypeStr = toLowerCase(line.substring(0, colonPos)); 3486 if (modTypeStr.equals("add")) 3487 { 3488 modType = ModificationType.ADD; 3489 } 3490 else if (modTypeStr.equals("delete")) 3491 { 3492 modType = ModificationType.DELETE; 3493 } 3494 else if (modTypeStr.equals("replace")) 3495 { 3496 modType = ModificationType.REPLACE; 3497 } 3498 else if (modTypeStr.equals("increment")) 3499 { 3500 modType = ModificationType.INCREMENT; 3501 } 3502 else 3503 { 3504 throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr, 3505 firstLineNumber), 3506 firstLineNumber, true, ldifLines, null); 3507 } 3508 3509 String attributeName; 3510 int length = line.length(); 3511 if (length == (colonPos+1)) 3512 { 3513 // The colon was the last character on the line. This is not 3514 // acceptable. 3515 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3516 firstLineNumber), 3517 firstLineNumber, true, ldifLines, null); 3518 } 3519 else if (line.charAt(colonPos+1) == ':') 3520 { 3521 // Skip over any spaces leading up to the value, and then the rest of 3522 // the string is the base64-encoded attribute name. 3523 int pos = colonPos+2; 3524 while ((pos < length) && (line.charAt(pos) == ' ')) 3525 { 3526 pos++; 3527 } 3528 3529 try 3530 { 3531 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3532 attributeName = new String(dnBytes, "UTF-8"); 3533 } 3534 catch (final ParseException pe) 3535 { 3536 debugException(pe); 3537 throw new LDIFException( 3538 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3539 firstLineNumber, pe.getMessage()), 3540 firstLineNumber, true, ldifLines, pe); 3541 } 3542 catch (final Exception e) 3543 { 3544 debugException(e); 3545 throw new LDIFException( 3546 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3547 firstLineNumber, e), 3548 firstLineNumber, true, ldifLines, e); 3549 } 3550 } 3551 else 3552 { 3553 // Skip over any spaces leading up to the value, and then the rest of 3554 // the string is the attribute name. 3555 int pos = colonPos+1; 3556 while ((pos < length) && (line.charAt(pos) == ' ')) 3557 { 3558 pos++; 3559 } 3560 3561 attributeName = line.substring(pos); 3562 } 3563 3564 if (attributeName.length() == 0) 3565 { 3566 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3567 firstLineNumber), 3568 firstLineNumber, true, ldifLines, null); 3569 } 3570 3571 3572 // The next zero or more lines may be the set of attribute values. Keep 3573 // reading until we reach the end of the iterator or until we find a line 3574 // with just a "-". 3575 final ArrayList<ASN1OctetString> valueList = 3576 new ArrayList<ASN1OctetString>(ldifLines.size()); 3577 while (iterator.hasNext()) 3578 { 3579 line = iterator.next(); 3580 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3581 if (line.toString().equals("-")) 3582 { 3583 break; 3584 } 3585 3586 colonPos = line.indexOf(":"); 3587 if (colonPos < 0) 3588 { 3589 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 3590 firstLineNumber, true, ldifLines, null); 3591 } 3592 else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName)) 3593 { 3594 // There are a couple of cases in which this might be acceptable: 3595 // - If the two names are logically equivalent, but have an alternate 3596 // name (or OID) for the target attribute type, or if there are 3597 // attribute options and the options are just in a different order. 3598 // - If this is the first value for the target attribute and the 3599 // alternate name includes a "binary" option that the original 3600 // attribute name did not have. In this case, all subsequent values 3601 // will also be required to have the binary option. 3602 final String alternateName = line.substring(0, colonPos); 3603 3604 3605 // Check to see if the base names are equivalent. 3606 boolean baseNameEquivalent = false; 3607 final String expectedBaseName = Attribute.getBaseName(attributeName); 3608 final String alternateBaseName = Attribute.getBaseName(alternateName); 3609 if (alternateBaseName.equalsIgnoreCase(expectedBaseName)) 3610 { 3611 baseNameEquivalent = true; 3612 } 3613 else 3614 { 3615 if (schema != null) 3616 { 3617 final AttributeTypeDefinition expectedAT = 3618 schema.getAttributeType(expectedBaseName); 3619 final AttributeTypeDefinition alternateAT = 3620 schema.getAttributeType(alternateBaseName); 3621 if ((expectedAT != null) && (alternateAT != null) && 3622 expectedAT.equals(alternateAT)) 3623 { 3624 baseNameEquivalent = true; 3625 } 3626 } 3627 } 3628 3629 3630 // Check to see if the attribute options are equivalent. 3631 final Set<String> expectedOptions = 3632 Attribute.getOptions(attributeName); 3633 final Set<String> lowerExpectedOptions = 3634 new HashSet<String>(expectedOptions.size()); 3635 for (final String s : expectedOptions) 3636 { 3637 lowerExpectedOptions.add(toLowerCase(s)); 3638 } 3639 3640 final Set<String> alternateOptions = 3641 Attribute.getOptions(alternateName); 3642 final Set<String> lowerAlternateOptions = 3643 new HashSet<String>(alternateOptions.size()); 3644 for (final String s : alternateOptions) 3645 { 3646 lowerAlternateOptions.add(toLowerCase(s)); 3647 } 3648 3649 final boolean optionsEquivalent = 3650 lowerAlternateOptions.equals(lowerExpectedOptions); 3651 3652 3653 if (baseNameEquivalent && optionsEquivalent) 3654 { 3655 // This is fine. The two attribute descriptions are logically 3656 // equivalent. We'll continue using the attribute description that 3657 // was provided first. 3658 } 3659 else if (valueList.isEmpty() && baseNameEquivalent && 3660 lowerAlternateOptions.remove("binary") && 3661 lowerAlternateOptions.equals(lowerExpectedOptions)) 3662 { 3663 // This means that the provided value is the first value for the 3664 // attribute, and that the only significant difference is that the 3665 // provided attribute description included an unexpected "binary" 3666 // option. We'll accept this, but will require any additional 3667 // values for this modification to also include the binary option, 3668 // and we'll use the binary option in the attribute that is 3669 // eventually created. 3670 attributeName = alternateName; 3671 } 3672 else 3673 { 3674 // This means that either the base names are different or the sets 3675 // of options are incompatible. This is not acceptable. 3676 throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get( 3677 firstLineNumber, 3678 line.substring(0, colonPos), 3679 attributeName), 3680 firstLineNumber, true, ldifLines, null); 3681 } 3682 } 3683 3684 length = line.length(); 3685 final ASN1OctetString value; 3686 if (length == (colonPos+1)) 3687 { 3688 // The colon was the last character on the line. This is fine. 3689 value = new ASN1OctetString(); 3690 } 3691 else if (line.charAt(colonPos+1) == ':') 3692 { 3693 // Skip over any spaces leading up to the value, and then the rest of 3694 // the string is the base64-encoded value. This is unusual and 3695 // unnecessary, but is nevertheless acceptable. 3696 int pos = colonPos+2; 3697 while ((pos < length) && (line.charAt(pos) == ' ')) 3698 { 3699 pos++; 3700 } 3701 3702 try 3703 { 3704 value = new ASN1OctetString(Base64.decode(line.substring(pos))); 3705 } 3706 catch (final ParseException pe) 3707 { 3708 debugException(pe); 3709 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3710 attributeName, firstLineNumber, pe.getMessage()), 3711 firstLineNumber, true, ldifLines, pe); 3712 } 3713 catch (final Exception e) 3714 { 3715 debugException(e); 3716 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3717 firstLineNumber, e), 3718 firstLineNumber, true, ldifLines, e); 3719 } 3720 } 3721 else 3722 { 3723 // Skip over any spaces leading up to the value, and then the rest of 3724 // the string is the value. 3725 int pos = colonPos+1; 3726 while ((pos < length) && (line.charAt(pos) == ' ')) 3727 { 3728 pos++; 3729 } 3730 3731 value = new ASN1OctetString(line.substring(pos)); 3732 } 3733 3734 valueList.add(value); 3735 } 3736 3737 final ASN1OctetString[] values = new ASN1OctetString[valueList.size()]; 3738 valueList.toArray(values); 3739 3740 // If it's an add modification type, then there must be at least one 3741 // value. 3742 if ((modType.intValue() == ModificationType.ADD.intValue()) && 3743 (values.length == 0)) 3744 { 3745 throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName, 3746 firstLineNumber), 3747 firstLineNumber, true, ldifLines, null); 3748 } 3749 3750 // If it's an increment modification type, then there must be exactly one 3751 // value. 3752 if ((modType.intValue() == ModificationType.INCREMENT.intValue()) && 3753 (values.length != 1)) 3754 { 3755 throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get( 3756 firstLineNumber, attributeName), 3757 firstLineNumber, true, ldifLines, null); 3758 } 3759 3760 modList.add(new Modification(modType, attributeName, values)); 3761 } 3762 3763 final Modification[] mods = new Modification[modList.size()]; 3764 modList.toArray(mods); 3765 return mods; 3766 } 3767 3768 3769 3770 /** 3771 * Parses the data available through the provided iterator as the body of a 3772 * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional 3773 * newsuperior lines). 3774 * 3775 * @param ldifLines The lines that comprise the LDIF 3776 * representation of the full record being 3777 * parsed. 3778 * @param iterator The iterator to use to access the modify DN 3779 * data. 3780 * @param dn The current DN of the entry. 3781 * @param controls The set of controls to include in the change 3782 * record. 3783 * @param trailingSpaceBehavior The behavior that should be exhibited when 3784 * encountering attribute values which are not 3785 * base64-encoded but contain trailing spaces. 3786 * @param firstLineNumber The line number for the start of the record. 3787 * 3788 * @return The decoded modify DN change record. 3789 * 3790 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3791 * modify DN change record. 3792 */ 3793 private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord( 3794 final ArrayList<StringBuilder> ldifLines, 3795 final Iterator<StringBuilder> iterator, final String dn, 3796 final List<Control> controls, 3797 final TrailingSpaceBehavior trailingSpaceBehavior, 3798 final long firstLineNumber) 3799 throws LDIFException 3800 { 3801 // The next line must be the new RDN, and it must start with "newrdn:". 3802 StringBuilder line = iterator.next(); 3803 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3804 int colonPos = line.indexOf(":"); 3805 if ((colonPos < 0) || 3806 (! line.substring(0, colonPos).equalsIgnoreCase("newrdn"))) 3807 { 3808 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get( 3809 firstLineNumber), 3810 firstLineNumber, true, ldifLines, null); 3811 } 3812 3813 final String newRDN; 3814 int length = line.length(); 3815 if (length == (colonPos+1)) 3816 { 3817 // The colon was the last character on the line. This is not acceptable. 3818 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3819 firstLineNumber), 3820 firstLineNumber, true, ldifLines, null); 3821 } 3822 else if (line.charAt(colonPos+1) == ':') 3823 { 3824 // Skip over any spaces leading up to the value, and then the rest of the 3825 // string is the base64-encoded new RDN. 3826 int pos = colonPos+2; 3827 while ((pos < length) && (line.charAt(pos) == ' ')) 3828 { 3829 pos++; 3830 } 3831 3832 try 3833 { 3834 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3835 newRDN = new String(dnBytes, "UTF-8"); 3836 } 3837 catch (final ParseException pe) 3838 { 3839 debugException(pe); 3840 throw new LDIFException( 3841 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3842 pe.getMessage()), 3843 firstLineNumber, true, ldifLines, pe); 3844 } 3845 catch (final Exception e) 3846 { 3847 debugException(e); 3848 throw new LDIFException( 3849 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3850 e), 3851 firstLineNumber, true, ldifLines, e); 3852 } 3853 } 3854 else 3855 { 3856 // Skip over any spaces leading up to the value, and then the rest of the 3857 // string is the new RDN. 3858 int pos = colonPos+1; 3859 while ((pos < length) && (line.charAt(pos) == ' ')) 3860 { 3861 pos++; 3862 } 3863 3864 newRDN = line.substring(pos); 3865 } 3866 3867 if (newRDN.length() == 0) 3868 { 3869 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3870 firstLineNumber), 3871 firstLineNumber, true, ldifLines, null); 3872 } 3873 3874 3875 // The next line must be the deleteOldRDN flag, and it must start with 3876 // 'deleteoldrdn:'. 3877 if (! iterator.hasNext()) 3878 { 3879 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3880 firstLineNumber), 3881 firstLineNumber, true, ldifLines, null); 3882 } 3883 3884 line = iterator.next(); 3885 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3886 colonPos = line.indexOf(":"); 3887 if ((colonPos < 0) || 3888 (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn"))) 3889 { 3890 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3891 firstLineNumber), 3892 firstLineNumber, true, ldifLines, null); 3893 } 3894 3895 final String deleteOldRDNStr; 3896 length = line.length(); 3897 if (length == (colonPos+1)) 3898 { 3899 // The colon was the last character on the line. This is not acceptable. 3900 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get( 3901 firstLineNumber), 3902 firstLineNumber, true, ldifLines, null); 3903 } 3904 else if (line.charAt(colonPos+1) == ':') 3905 { 3906 // Skip over any spaces leading up to the value, and then the rest of the 3907 // string is the base64-encoded value. This is unusual and 3908 // unnecessary, but is nevertheless acceptable. 3909 int pos = colonPos+2; 3910 while ((pos < length) && (line.charAt(pos) == ' ')) 3911 { 3912 pos++; 3913 } 3914 3915 try 3916 { 3917 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 3918 deleteOldRDNStr = new String(changeTypeBytes, "UTF-8"); 3919 } 3920 catch (final ParseException pe) 3921 { 3922 debugException(pe); 3923 throw new LDIFException( 3924 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3925 firstLineNumber, pe.getMessage()), 3926 firstLineNumber, true, ldifLines, pe); 3927 } 3928 catch (final Exception e) 3929 { 3930 debugException(e); 3931 throw new LDIFException( 3932 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3933 firstLineNumber, e), 3934 firstLineNumber, true, ldifLines, e); 3935 } 3936 } 3937 else 3938 { 3939 // Skip over any spaces leading up to the value, and then the rest of the 3940 // string is the value. 3941 int pos = colonPos+1; 3942 while ((pos < length) && (line.charAt(pos) == ' ')) 3943 { 3944 pos++; 3945 } 3946 3947 deleteOldRDNStr = line.substring(pos); 3948 } 3949 3950 final boolean deleteOldRDN; 3951 if (deleteOldRDNStr.equals("0")) 3952 { 3953 deleteOldRDN = false; 3954 } 3955 else if (deleteOldRDNStr.equals("1")) 3956 { 3957 deleteOldRDN = true; 3958 } 3959 else if (deleteOldRDNStr.equalsIgnoreCase("false") || 3960 deleteOldRDNStr.equalsIgnoreCase("no")) 3961 { 3962 // This is technically illegal, but we'll allow it. 3963 deleteOldRDN = false; 3964 } 3965 else if (deleteOldRDNStr.equalsIgnoreCase("true") || 3966 deleteOldRDNStr.equalsIgnoreCase("yes")) 3967 { 3968 // This is also technically illegal, but we'll allow it. 3969 deleteOldRDN = false; 3970 } 3971 else 3972 { 3973 throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get( 3974 deleteOldRDNStr, firstLineNumber), 3975 firstLineNumber, true, ldifLines, null); 3976 } 3977 3978 3979 // If there is another line, then it must be the new superior DN and it must 3980 // start with "newsuperior:". If this is absent, then it's fine. 3981 final String newSuperiorDN; 3982 if (iterator.hasNext()) 3983 { 3984 line = iterator.next(); 3985 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3986 colonPos = line.indexOf(":"); 3987 if ((colonPos < 0) || 3988 (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior"))) 3989 { 3990 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get( 3991 firstLineNumber), 3992 firstLineNumber, true, ldifLines, null); 3993 } 3994 3995 length = line.length(); 3996 if (length == (colonPos+1)) 3997 { 3998 // The colon was the last character on the line. This is fine. 3999 newSuperiorDN = ""; 4000 } 4001 else if (line.charAt(colonPos+1) == ':') 4002 { 4003 // Skip over any spaces leading up to the value, and then the rest of 4004 // the string is the base64-encoded new superior DN. 4005 int pos = colonPos+2; 4006 while ((pos < length) && (line.charAt(pos) == ' ')) 4007 { 4008 pos++; 4009 } 4010 4011 try 4012 { 4013 final byte[] dnBytes = Base64.decode(line.substring(pos)); 4014 newSuperiorDN = new String(dnBytes, "UTF-8"); 4015 } 4016 catch (final ParseException pe) 4017 { 4018 debugException(pe); 4019 throw new LDIFException( 4020 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 4021 firstLineNumber, pe.getMessage()), 4022 firstLineNumber, true, ldifLines, pe); 4023 } 4024 catch (final Exception e) 4025 { 4026 debugException(e); 4027 throw new LDIFException( 4028 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 4029 firstLineNumber, e), 4030 firstLineNumber, true, ldifLines, e); 4031 } 4032 } 4033 else 4034 { 4035 // Skip over any spaces leading up to the value, and then the rest of 4036 // the string is the new superior DN. 4037 int pos = colonPos+1; 4038 while ((pos < length) && (line.charAt(pos) == ' ')) 4039 { 4040 pos++; 4041 } 4042 4043 newSuperiorDN = line.substring(pos); 4044 } 4045 } 4046 else 4047 { 4048 newSuperiorDN = null; 4049 } 4050 4051 4052 // There must not be any more lines. 4053 if (iterator.hasNext()) 4054 { 4055 throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber), 4056 firstLineNumber, true, ldifLines, null); 4057 } 4058 4059 return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN, 4060 newSuperiorDN, controls); 4061 } 4062 4063 4064 4065 /** 4066 * Examines the line contained in the provided buffer to determine whether it 4067 * may contain one or more illegal trailing spaces. If it does, then those 4068 * spaces will either be stripped out or an exception will be thrown to 4069 * indicate that they are illegal. 4070 * 4071 * @param buffer The buffer to be examined. 4072 * @param dn The DN of the LDIF record being parsed. It 4073 * may be {@code null} if the DN is not yet 4074 * known (e.g., because the provided line is 4075 * expected to contain that DN). 4076 * @param firstLineNumber The approximate line number in the LDIF 4077 * source on which the LDIF record begins. 4078 * @param trailingSpaceBehavior The behavior that should be exhibited when 4079 * encountering attribute values which are not 4080 * base64-encoded but contain trailing spaces. 4081 * 4082 * @throws LDIFException If the line contained in the provided buffer ends 4083 * with one or more illegal trailing spaces and 4084 * {@code stripTrailingSpaces} was provided with a 4085 * value of {@code false}. 4086 */ 4087 private static void handleTrailingSpaces(final StringBuilder buffer, 4088 final String dn, final long firstLineNumber, 4089 final TrailingSpaceBehavior trailingSpaceBehavior) 4090 throws LDIFException 4091 { 4092 int pos = buffer.length() - 1; 4093 boolean trailingFound = false; 4094 while ((pos >= 0) && (buffer.charAt(pos) == ' ')) 4095 { 4096 trailingFound = true; 4097 pos--; 4098 } 4099 4100 if (trailingFound && (buffer.charAt(pos) != ':')) 4101 { 4102 switch (trailingSpaceBehavior) 4103 { 4104 case STRIP: 4105 buffer.setLength(pos+1); 4106 break; 4107 4108 case REJECT: 4109 if (dn == null) 4110 { 4111 throw new LDIFException( 4112 ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber, 4113 buffer.toString()), 4114 firstLineNumber, true); 4115 } 4116 else 4117 { 4118 throw new LDIFException( 4119 ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn, 4120 firstLineNumber, buffer.toString()), 4121 firstLineNumber, true); 4122 } 4123 4124 case RETAIN: 4125 default: 4126 // No action will be taken. 4127 break; 4128 } 4129 } 4130 } 4131 4132 4133 4134 /** 4135 * This represents an unparsed LDIFRecord. It stores the line number of the 4136 * first line of the record and each line of the record. 4137 */ 4138 private static final class UnparsedLDIFRecord 4139 { 4140 private final ArrayList<StringBuilder> lineList; 4141 private final long firstLineNumber; 4142 private final Exception failureCause; 4143 private final boolean isEOF; 4144 private final DuplicateValueBehavior duplicateValueBehavior; 4145 private final Schema schema; 4146 private final TrailingSpaceBehavior trailingSpaceBehavior; 4147 4148 4149 4150 /** 4151 * Constructor. 4152 * 4153 * @param lineList The lines that comprise the LDIF record. 4154 * @param duplicateValueBehavior The behavior to exhibit if the entry 4155 * contains duplicate attribute values. 4156 * @param trailingSpaceBehavior Specifies the behavior to exhibit when 4157 * encountering trailing spaces in 4158 * non-base64-encoded attribute values. 4159 * @param schema The schema to use when parsing, if 4160 * applicable. 4161 * @param firstLineNumber The first line number of the LDIF record. 4162 */ 4163 private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList, 4164 final DuplicateValueBehavior duplicateValueBehavior, 4165 final TrailingSpaceBehavior trailingSpaceBehavior, 4166 final Schema schema, final long firstLineNumber) 4167 { 4168 this.lineList = lineList; 4169 this.firstLineNumber = firstLineNumber; 4170 this.duplicateValueBehavior = duplicateValueBehavior; 4171 this.trailingSpaceBehavior = trailingSpaceBehavior; 4172 this.schema = schema; 4173 4174 failureCause = null; 4175 isEOF = 4176 (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty()); 4177 } 4178 4179 4180 4181 /** 4182 * Constructor. 4183 * 4184 * @param failureCause The Exception thrown when reading from the input. 4185 */ 4186 private UnparsedLDIFRecord(final Exception failureCause) 4187 { 4188 this.failureCause = failureCause; 4189 4190 lineList = null; 4191 firstLineNumber = 0; 4192 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 4193 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 4194 schema = null; 4195 isEOF = false; 4196 } 4197 4198 4199 4200 /** 4201 * Return the lines that comprise the LDIF record. 4202 * 4203 * @return The lines that comprise the LDIF record. 4204 */ 4205 private ArrayList<StringBuilder> getLineList() 4206 { 4207 return lineList; 4208 } 4209 4210 4211 4212 /** 4213 * Retrieves the behavior to exhibit when encountering duplicate attribute 4214 * values. 4215 * 4216 * @return The behavior to exhibit when encountering duplicate attribute 4217 * values. 4218 */ 4219 private DuplicateValueBehavior getDuplicateValueBehavior() 4220 { 4221 return duplicateValueBehavior; 4222 } 4223 4224 4225 4226 /** 4227 * Retrieves the behavior that should be exhibited when encountering 4228 * attribute values which are not base64-encoded but contain trailing 4229 * spaces. The LDIF specification strongly recommends that any value which 4230 * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK 4231 * LDIF parser may be configured to automatically strip these spaces, to 4232 * preserve them, or to reject any entry or change record containing them. 4233 * 4234 * @return The behavior that should be exhibited when encountering 4235 * attribute values which are not base64-encoded but contain 4236 * trailing spaces. 4237 */ 4238 private TrailingSpaceBehavior getTrailingSpaceBehavior() 4239 { 4240 return trailingSpaceBehavior; 4241 } 4242 4243 4244 4245 /** 4246 * Retrieves the schema that should be used when parsing the record, if 4247 * applicable. 4248 * 4249 * @return The schema that should be used when parsing the record, or 4250 * {@code null} if none should be used. 4251 */ 4252 private Schema getSchema() 4253 { 4254 return schema; 4255 } 4256 4257 4258 4259 /** 4260 * Return the first line number of the LDIF record. 4261 * 4262 * @return The first line number of the LDIF record. 4263 */ 4264 private long getFirstLineNumber() 4265 { 4266 return firstLineNumber; 4267 } 4268 4269 4270 4271 /** 4272 * Return {@code true} iff the end of the input was reached. 4273 * 4274 * @return {@code true} iff the end of the input was reached. 4275 */ 4276 private boolean isEOF() 4277 { 4278 return isEOF; 4279 } 4280 4281 4282 4283 /** 4284 * Returns the reason that reading the record lines failed. This normally 4285 * is only non-null if something bad happened to the input stream (like 4286 * a disk read error). 4287 * 4288 * @return The reason that reading the record lines failed. 4289 */ 4290 private Exception getFailureCause() 4291 { 4292 return failureCause; 4293 } 4294 } 4295 4296 4297 /** 4298 * When processing in asynchronous mode, this thread is responsible for 4299 * reading the raw unparsed records from the input and submitting them for 4300 * processing. 4301 */ 4302 private final class LineReaderThread 4303 extends Thread 4304 { 4305 /** 4306 * Constructor. 4307 */ 4308 private LineReaderThread() 4309 { 4310 super("Asynchronous LDIF line reader"); 4311 setDaemon(true); 4312 } 4313 4314 4315 4316 /** 4317 * Reads raw, unparsed records from the input and submits them for 4318 * processing until the input is finished or closed. 4319 */ 4320 @Override() 4321 public void run() 4322 { 4323 try 4324 { 4325 boolean stopProcessing = false; 4326 while (!stopProcessing) 4327 { 4328 UnparsedLDIFRecord unparsedRecord = null; 4329 try 4330 { 4331 unparsedRecord = readUnparsedRecord(); 4332 } 4333 catch (IOException e) 4334 { 4335 debugException(e); 4336 unparsedRecord = new UnparsedLDIFRecord(e); 4337 stopProcessing = true; 4338 } 4339 catch (Exception e) 4340 { 4341 debugException(e); 4342 unparsedRecord = new UnparsedLDIFRecord(e); 4343 } 4344 4345 try 4346 { 4347 asyncParser.submit(unparsedRecord); 4348 } 4349 catch (InterruptedException e) 4350 { 4351 debugException(e); 4352 // If this thread is interrupted, then someone wants us to stop 4353 // processing, so that's what we'll do. 4354 stopProcessing = true; 4355 } 4356 4357 if ((unparsedRecord == null) || (unparsedRecord.isEOF())) 4358 { 4359 stopProcessing = true; 4360 } 4361 } 4362 } 4363 finally 4364 { 4365 try 4366 { 4367 asyncParser.shutdown(); 4368 } 4369 catch (InterruptedException e) 4370 { 4371 debugException(e); 4372 } 4373 finally 4374 { 4375 asyncParsingComplete.set(true); 4376 } 4377 } 4378 } 4379 } 4380 4381 4382 4383 /** 4384 * Used to parse Records asynchronously. 4385 */ 4386 private final class RecordParser implements Processor<UnparsedLDIFRecord, 4387 LDIFRecord> 4388 { 4389 /** 4390 * {@inheritDoc} 4391 */ 4392 public LDIFRecord process(final UnparsedLDIFRecord input) 4393 throws LDIFException 4394 { 4395 LDIFRecord record = decodeRecord(input, relativeBasePath, schema); 4396 4397 if ((record instanceof Entry) && (entryTranslator != null)) 4398 { 4399 record = entryTranslator.translate((Entry) record, 4400 input.getFirstLineNumber()); 4401 4402 if (record == null) 4403 { 4404 record = SKIP_ENTRY; 4405 } 4406 } 4407 if ((record instanceof LDIFChangeRecord) && 4408 (changeRecordTranslator != null)) 4409 { 4410 record = changeRecordTranslator.translate((LDIFChangeRecord) record, 4411 input.getFirstLineNumber()); 4412 4413 if (record == null) 4414 { 4415 record = SKIP_ENTRY; 4416 } 4417 } 4418 return record; 4419 } 4420 } 4421 }