001    /*
002     * Copyright 2007-2016 UnboundID Corp.
003     * All Rights Reserved.
004     */
005    /*
006     * Copyright (C) 2008-2016 UnboundID Corp.
007     *
008     * This program is free software; you can redistribute it and/or modify
009     * it under the terms of the GNU General Public License (GPLv2 only)
010     * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011     * as published by the Free Software Foundation.
012     *
013     * This program is distributed in the hope that it will be useful,
014     * but WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program; if not, see <http://www.gnu.org/licenses>.
020     */
021    package com.unboundid.ldap.matchingrules;
022    
023    
024    
025    import com.unboundid.asn1.ASN1OctetString;
026    import com.unboundid.util.ThreadSafety;
027    import com.unboundid.util.ThreadSafetyLevel;
028    
029    import static com.unboundid.util.StaticUtils.*;
030    
031    
032    
033    /**
034     * This class provides an implementation of a matching rule that uses
035     * case-insensitive matching that also treats multiple consecutive (non-escaped)
036     * spaces as a single space.
037     */
038    @ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
039    public final class CaseIgnoreStringMatchingRule
040           extends AcceptAllSimpleMatchingRule
041    {
042      /**
043       * The singleton instance that will be returned from the {@code getInstance}
044       * method.
045       */
046      private static final CaseIgnoreStringMatchingRule INSTANCE =
047           new CaseIgnoreStringMatchingRule();
048    
049    
050    
051      /**
052       * The name for the caseIgnoreMatch equality matching rule.
053       */
054      public static final String EQUALITY_RULE_NAME = "caseIgnoreMatch";
055    
056    
057    
058      /**
059       * The name for the caseIgnoreMatch equality matching rule, formatted in all
060       * lowercase characters.
061       */
062      static final String LOWER_EQUALITY_RULE_NAME =
063           toLowerCase(EQUALITY_RULE_NAME);
064    
065    
066    
067      /**
068       * The OID for the caseIgnoreMatch equality matching rule.
069       */
070      public static final String EQUALITY_RULE_OID = "2.5.13.2";
071    
072    
073    
074      /**
075       * The name for the caseIgnoreOrderingMatch ordering matching rule.
076       */
077      public static final String ORDERING_RULE_NAME = "caseIgnoreOrderingMatch";
078    
079    
080    
081      /**
082       * The name for the caseIgnoreOrderingMatch ordering matching rule, formatted
083       * in all lowercase characters.
084       */
085      static final String LOWER_ORDERING_RULE_NAME =
086           toLowerCase(ORDERING_RULE_NAME);
087    
088    
089    
090      /**
091       * The OID for the caseIgnoreOrderingMatch ordering matching rule.
092       */
093      public static final String ORDERING_RULE_OID = "2.5.13.3";
094    
095    
096    
097      /**
098       * The name for the caseIgnoreSubstringsMatch substring matching rule.
099       */
100      public static final String SUBSTRING_RULE_NAME = "caseIgnoreSubstringsMatch";
101    
102    
103    
104      /**
105       * The name for the caseIgnoreSubstringsMatch substring matching rule,
106       * formatted in all lowercase characters.
107       */
108      static final String LOWER_SUBSTRING_RULE_NAME =
109           toLowerCase(SUBSTRING_RULE_NAME);
110    
111    
112    
113      /**
114       * The OID for the caseIgnoreSubstringsMatch substring matching rule.
115       */
116      public static final String SUBSTRING_RULE_OID = "2.5.13.4";
117    
118    
119    
120      /**
121       * The serial version UID for this serializable class.
122       */
123      private static final long serialVersionUID = -1293370922676445525L;
124    
125    
126    
127      /**
128       * Creates a new instance of this case ignore string matching rule.
129       */
130      public CaseIgnoreStringMatchingRule()
131      {
132        // No implementation is required.
133      }
134    
135    
136    
137      /**
138       * Retrieves a singleton instance of this matching rule.
139       *
140       * @return  A singleton instance of this matching rule.
141       */
142      public static CaseIgnoreStringMatchingRule getInstance()
143      {
144        return INSTANCE;
145      }
146    
147    
148    
149      /**
150       * {@inheritDoc}
151       */
152      @Override()
153      public String getEqualityMatchingRuleName()
154      {
155        return EQUALITY_RULE_NAME;
156      }
157    
158    
159    
160      /**
161       * {@inheritDoc}
162       */
163      @Override()
164      public String getEqualityMatchingRuleOID()
165      {
166        return EQUALITY_RULE_OID;
167      }
168    
169    
170    
171      /**
172       * {@inheritDoc}
173       */
174      @Override()
175      public String getOrderingMatchingRuleName()
176      {
177        return ORDERING_RULE_NAME;
178      }
179    
180    
181    
182      /**
183       * {@inheritDoc}
184       */
185      @Override()
186      public String getOrderingMatchingRuleOID()
187      {
188        return ORDERING_RULE_OID;
189      }
190    
191    
192    
193      /**
194       * {@inheritDoc}
195       */
196      @Override()
197      public String getSubstringMatchingRuleName()
198      {
199        return SUBSTRING_RULE_NAME;
200      }
201    
202    
203    
204      /**
205       * {@inheritDoc}
206       */
207      @Override()
208      public String getSubstringMatchingRuleOID()
209      {
210        return SUBSTRING_RULE_OID;
211      }
212    
213    
214    
215      /**
216       * {@inheritDoc}
217       */
218      @Override()
219      public boolean valuesMatch(final ASN1OctetString value1,
220                                 final ASN1OctetString value2)
221      {
222        // Try to use a quick, no-copy determination if possible.  If this fails,
223        // then we'll fall back on a more thorough, but more costly, approach.
224        final byte[] value1Bytes = value1.getValue();
225        final byte[] value2Bytes = value2.getValue();
226        if (value1Bytes.length == value2Bytes.length)
227        {
228          for (int i=0; i< value1Bytes.length; i++)
229          {
230            final byte b1 = value1Bytes[i];
231            final byte b2 = value2Bytes[i];
232    
233            if (((b1 & 0x7F) != (b1 & 0xFF)) ||
234                ((b2 & 0x7F) != (b2 & 0xFF)))
235            {
236              return normalize(value1).equals(normalize(value2));
237            }
238            else if (b1 != b2)
239            {
240              if ((b1 == ' ') || (b2 == ' '))
241              {
242                return normalize(value1).equals(normalize(value2));
243              }
244              else if (Character.isUpperCase((char) b1))
245              {
246                final char c = Character.toLowerCase((char) b1);
247                if (c != ((char) b2))
248                {
249                  return false;
250                }
251              }
252              else if (Character.isUpperCase((char) b2))
253              {
254                final char c = Character.toLowerCase((char) b2);
255                if (c != ((char) b1))
256                {
257                  return false;
258                }
259              }
260              else
261              {
262                return false;
263              }
264            }
265          }
266    
267          // If we've gotten to this point, then the values must be equal.
268          return true;
269        }
270        else
271        {
272          return normalizeInternal(value1, false, (byte) 0x00).equals(
273                      normalizeInternal(value2, false, (byte) 0x00));
274        }
275      }
276    
277    
278    
279      /**
280       * {@inheritDoc}
281       */
282      @Override()
283      public ASN1OctetString normalize(final ASN1OctetString value)
284      {
285        return normalizeInternal(value, false, (byte) 0x00);
286      }
287    
288    
289    
290      /**
291       * {@inheritDoc}
292       */
293      @Override()
294      public ASN1OctetString normalizeSubstring(final ASN1OctetString value,
295                                                final byte substringType)
296      {
297        return normalizeInternal(value, true, substringType);
298      }
299    
300    
301    
302      /**
303       * Normalizes the provided value for use in either an equality or substring
304       * matching operation.
305       *
306       * @param  value          The value to be normalized.
307       * @param  isSubstring    Indicates whether the value should be normalized as
308       *                        part of a substring assertion rather than an
309       *                        equality assertion.
310       * @param  substringType  The substring type for the element, if it is to be
311       *                        part of a substring assertion.
312       *
313       * @return  The appropriately normalized form of the provided value.
314       */
315      private static ASN1OctetString normalizeInternal(final ASN1OctetString value,
316                                                       final boolean isSubstring,
317                                                       final byte substringType)
318      {
319        final byte[] valueBytes = value.getValue();
320        if (valueBytes.length == 0)
321        {
322          return value;
323        }
324    
325        final boolean trimInitial;
326        final boolean trimFinal;
327        if (isSubstring)
328        {
329          switch (substringType)
330          {
331            case SUBSTRING_TYPE_SUBINITIAL:
332              trimInitial = true;
333              trimFinal   = false;
334              break;
335    
336            case SUBSTRING_TYPE_SUBFINAL:
337              trimInitial = false;
338              trimFinal   = true;
339              break;
340    
341            default:
342              trimInitial = false;
343              trimFinal   = false;
344              break;
345          }
346        }
347        else
348        {
349          trimInitial = true;
350          trimFinal   = true;
351        }
352    
353        // Count the number of duplicate spaces in the value, and determine whether
354        // there are any non-space characters.  Also, see if there are any non-ASCII
355        // characters.
356        boolean containsNonSpace = false;
357        boolean lastWasSpace = trimInitial;
358        int numDuplicates = 0;
359        for (final byte b : valueBytes)
360        {
361          if ((b & 0x7F) != (b & 0xFF))
362          {
363            return normalizeNonASCII(value, trimInitial, trimFinal);
364          }
365    
366          if (b == ' ')
367          {
368            if (lastWasSpace)
369            {
370              numDuplicates++;
371            }
372            else
373            {
374              lastWasSpace = true;
375            }
376          }
377          else
378          {
379            containsNonSpace = true;
380            lastWasSpace = false;
381          }
382        }
383    
384        if (! containsNonSpace)
385        {
386          return new ASN1OctetString(" ");
387        }
388    
389        if (lastWasSpace && trimFinal)
390        {
391          numDuplicates++;
392        }
393    
394    
395        // Create a new byte array to hold the normalized value.
396        lastWasSpace = trimInitial;
397        int targetPos = 0;
398        final byte[] normalizedBytes = new byte[valueBytes.length - numDuplicates];
399        for (int i=0; i < valueBytes.length; i++)
400        {
401          switch (valueBytes[i])
402          {
403            case ' ':
404              if (lastWasSpace || (trimFinal && (i == (valueBytes.length - 1))))
405              {
406                // No action is required.
407              }
408              else
409              {
410                // This condition is needed to handle the special case in which
411                // there are multiple spaces at the end of the value.
412                if (targetPos < normalizedBytes.length)
413                {
414                  normalizedBytes[targetPos++] = ' ';
415                  lastWasSpace = true;
416                }
417              }
418    
419              break;
420            case 'A':
421              normalizedBytes[targetPos++] = 'a';
422              lastWasSpace = false;
423              break;
424            case 'B':
425              normalizedBytes[targetPos++] = 'b';
426              lastWasSpace = false;
427              break;
428            case 'C':
429              normalizedBytes[targetPos++] = 'c';
430              lastWasSpace = false;
431              break;
432            case 'D':
433              normalizedBytes[targetPos++] = 'd';
434              lastWasSpace = false;
435              break;
436            case 'E':
437              normalizedBytes[targetPos++] = 'e';
438              lastWasSpace = false;
439              break;
440            case 'F':
441              normalizedBytes[targetPos++] = 'f';
442              lastWasSpace = false;
443              break;
444            case 'G':
445              normalizedBytes[targetPos++] = 'g';
446              lastWasSpace = false;
447              break;
448            case 'H':
449              normalizedBytes[targetPos++] = 'h';
450              lastWasSpace = false;
451              break;
452            case 'I':
453              normalizedBytes[targetPos++] = 'i';
454              lastWasSpace = false;
455              break;
456            case 'J':
457              normalizedBytes[targetPos++] = 'j';
458              lastWasSpace = false;
459              break;
460            case 'K':
461              normalizedBytes[targetPos++] = 'k';
462              lastWasSpace = false;
463              break;
464            case 'L':
465              normalizedBytes[targetPos++] = 'l';
466              lastWasSpace = false;
467              break;
468            case 'M':
469              normalizedBytes[targetPos++] = 'm';
470              lastWasSpace = false;
471              break;
472            case 'N':
473              normalizedBytes[targetPos++] = 'n';
474              lastWasSpace = false;
475              break;
476            case 'O':
477              normalizedBytes[targetPos++] = 'o';
478              lastWasSpace = false;
479              break;
480            case 'P':
481              normalizedBytes[targetPos++] = 'p';
482              lastWasSpace = false;
483              break;
484            case 'Q':
485              normalizedBytes[targetPos++] = 'q';
486              lastWasSpace = false;
487              break;
488            case 'R':
489              normalizedBytes[targetPos++] = 'r';
490              lastWasSpace = false;
491              break;
492            case 'S':
493              normalizedBytes[targetPos++] = 's';
494              lastWasSpace = false;
495              break;
496            case 'T':
497              normalizedBytes[targetPos++] = 't';
498              lastWasSpace = false;
499              break;
500            case 'U':
501              normalizedBytes[targetPos++] = 'u';
502              lastWasSpace = false;
503              break;
504            case 'V':
505              normalizedBytes[targetPos++] = 'v';
506              lastWasSpace = false;
507              break;
508            case 'W':
509              normalizedBytes[targetPos++] = 'w';
510              lastWasSpace = false;
511              break;
512            case 'X':
513              normalizedBytes[targetPos++] = 'x';
514              lastWasSpace = false;
515              break;
516            case 'Y':
517              normalizedBytes[targetPos++] = 'y';
518              lastWasSpace = false;
519              break;
520            case 'Z':
521              normalizedBytes[targetPos++] = 'z';
522              lastWasSpace = false;
523              break;
524            default:
525              normalizedBytes[targetPos++] = valueBytes[i];
526              lastWasSpace = false;
527              break;
528          }
529        }
530    
531    
532        return new ASN1OctetString(normalizedBytes);
533      }
534    
535    
536    
537      /**
538       * Normalizes the provided value a string representation, properly handling
539       * any non-ASCII characters.
540       *
541       * @param  value        The value to be normalized.
542       * @param  trimInitial  Indicates whether to trim off all leading spaces at
543       *                      the beginning of the value.
544       * @param  trimFinal    Indicates whether to trim off all trailing spaces at
545       *                      the end of the value.
546       *
547       * @return  The normalized form of the value.
548       */
549      private static ASN1OctetString normalizeNonASCII(final ASN1OctetString value,
550                                                       final boolean trimInitial,
551                                                       final boolean trimFinal)
552      {
553        final StringBuilder buffer = new StringBuilder(value.stringValue());
554    
555        int pos = 0;
556        boolean lastWasSpace = trimInitial;
557        while (pos < buffer.length())
558        {
559          final char c = buffer.charAt(pos++);
560          if (c == ' ')
561          {
562            if (lastWasSpace || (trimFinal && (pos >= buffer.length())))
563            {
564              buffer.deleteCharAt(--pos);
565            }
566            else
567            {
568              lastWasSpace = true;
569            }
570          }
571          else
572          {
573            if (Character.isUpperCase(c))
574            {
575              buffer.setCharAt((pos-1), Character.toLowerCase(c));
576            }
577    
578            lastWasSpace = false;
579          }
580        }
581    
582        // It is possible that there could be an extra space at the end.  If that's
583        // the case, then remove it.
584        if (trimFinal && (buffer.length() > 0) &&
585            (buffer.charAt(buffer.length() - 1) == ' '))
586        {
587          buffer.deleteCharAt(buffer.length() - 1);
588        }
589    
590        return new ASN1OctetString(buffer.toString());
591      }
592    }