001    /*
002     * Copyright 2007-2016 UnboundID Corp.
003     * All Rights Reserved.
004     */
005    /*
006     * Copyright (C) 2008-2016 UnboundID Corp.
007     *
008     * This program is free software; you can redistribute it and/or modify
009     * it under the terms of the GNU General Public License (GPLv2 only)
010     * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011     * as published by the Free Software Foundation.
012     *
013     * This program is distributed in the hope that it will be useful,
014     * but WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program; if not, see <http://www.gnu.org/licenses>.
020     */
021    package com.unboundid.ldap.matchingrules;
022    
023    
024    
025    import com.unboundid.asn1.ASN1OctetString;
026    
027    import static com.unboundid.util.StaticUtils.*;
028    
029    
030    
031    /**
032     * This class provides an implementation of a matching rule that uses
033     * case-insensitive matching that also treats multiple consecutive (non-escaped)
034     * spaces as a single space.
035     */
036    public final class CaseIgnoreStringMatchingRule
037           extends AcceptAllSimpleMatchingRule
038    {
039      /**
040       * The singleton instance that will be returned from the {@code getInstance}
041       * method.
042       */
043      private static final CaseIgnoreStringMatchingRule INSTANCE =
044           new CaseIgnoreStringMatchingRule();
045    
046    
047    
048      /**
049       * The name for the caseIgnoreMatch equality matching rule.
050       */
051      public static final String EQUALITY_RULE_NAME = "caseIgnoreMatch";
052    
053    
054    
055      /**
056       * The name for the caseIgnoreMatch equality matching rule, formatted in all
057       * lowercase characters.
058       */
059      static final String LOWER_EQUALITY_RULE_NAME =
060           toLowerCase(EQUALITY_RULE_NAME);
061    
062    
063    
064      /**
065       * The OID for the caseIgnoreMatch equality matching rule.
066       */
067      public static final String EQUALITY_RULE_OID = "2.5.13.2";
068    
069    
070    
071      /**
072       * The name for the caseIgnoreOrderingMatch ordering matching rule.
073       */
074      public static final String ORDERING_RULE_NAME = "caseIgnoreOrderingMatch";
075    
076    
077    
078      /**
079       * The name for the caseIgnoreOrderingMatch ordering matching rule, formatted
080       * in all lowercase characters.
081       */
082      static final String LOWER_ORDERING_RULE_NAME =
083           toLowerCase(ORDERING_RULE_NAME);
084    
085    
086    
087      /**
088       * The OID for the caseIgnoreOrderingMatch ordering matching rule.
089       */
090      public static final String ORDERING_RULE_OID = "2.5.13.3";
091    
092    
093    
094      /**
095       * The name for the caseIgnoreSubstringsMatch substring matching rule.
096       */
097      public static final String SUBSTRING_RULE_NAME = "caseIgnoreSubstringsMatch";
098    
099    
100    
101      /**
102       * The name for the caseIgnoreSubstringsMatch substring matching rule,
103       * formatted in all lowercase characters.
104       */
105      static final String LOWER_SUBSTRING_RULE_NAME =
106           toLowerCase(SUBSTRING_RULE_NAME);
107    
108    
109    
110      /**
111       * The OID for the caseIgnoreSubstringsMatch substring matching rule.
112       */
113      public static final String SUBSTRING_RULE_OID = "2.5.13.4";
114    
115    
116    
117      /**
118       * The serial version UID for this serializable class.
119       */
120      private static final long serialVersionUID = -1293370922676445525L;
121    
122    
123    
124      /**
125       * Creates a new instance of this case ignore string matching rule.
126       */
127      public CaseIgnoreStringMatchingRule()
128      {
129        // No implementation is required.
130      }
131    
132    
133    
134      /**
135       * Retrieves a singleton instance of this matching rule.
136       *
137       * @return  A singleton instance of this matching rule.
138       */
139      public static CaseIgnoreStringMatchingRule getInstance()
140      {
141        return INSTANCE;
142      }
143    
144    
145    
146      /**
147       * {@inheritDoc}
148       */
149      @Override()
150      public String getEqualityMatchingRuleName()
151      {
152        return EQUALITY_RULE_NAME;
153      }
154    
155    
156    
157      /**
158       * {@inheritDoc}
159       */
160      @Override()
161      public String getEqualityMatchingRuleOID()
162      {
163        return EQUALITY_RULE_OID;
164      }
165    
166    
167    
168      /**
169       * {@inheritDoc}
170       */
171      @Override()
172      public String getOrderingMatchingRuleName()
173      {
174        return ORDERING_RULE_NAME;
175      }
176    
177    
178    
179      /**
180       * {@inheritDoc}
181       */
182      @Override()
183      public String getOrderingMatchingRuleOID()
184      {
185        return ORDERING_RULE_OID;
186      }
187    
188    
189    
190      /**
191       * {@inheritDoc}
192       */
193      @Override()
194      public String getSubstringMatchingRuleName()
195      {
196        return SUBSTRING_RULE_NAME;
197      }
198    
199    
200    
201      /**
202       * {@inheritDoc}
203       */
204      @Override()
205      public String getSubstringMatchingRuleOID()
206      {
207        return SUBSTRING_RULE_OID;
208      }
209    
210    
211    
212      /**
213       * {@inheritDoc}
214       */
215      @Override()
216      public boolean valuesMatch(final ASN1OctetString value1,
217                                 final ASN1OctetString value2)
218      {
219        // Try to use a quick, no-copy determination if possible.  If this fails,
220        // then we'll fall back on a more thorough, but more costly, approach.
221        final byte[] value1Bytes = value1.getValue();
222        final byte[] value2Bytes = value2.getValue();
223        if (value1Bytes.length == value2Bytes.length)
224        {
225          for (int i=0; i< value1Bytes.length; i++)
226          {
227            final byte b1 = value1Bytes[i];
228            final byte b2 = value2Bytes[i];
229    
230            if (((b1 & 0x7F) != (b1 & 0xFF)) ||
231                ((b2 & 0x7F) != (b2 & 0xFF)))
232            {
233              return normalize(value1).equals(normalize(value2));
234            }
235            else if (b1 != b2)
236            {
237              if ((b1 == ' ') || (b2 == ' '))
238              {
239                return normalize(value1).equals(normalize(value2));
240              }
241              else if (Character.isUpperCase((char) b1))
242              {
243                final char c = Character.toLowerCase((char) b1);
244                if (c != ((char) b2))
245                {
246                  return false;
247                }
248              }
249              else if (Character.isUpperCase((char) b2))
250              {
251                final char c = Character.toLowerCase((char) b2);
252                if (c != ((char) b1))
253                {
254                  return false;
255                }
256              }
257              else
258              {
259                return false;
260              }
261            }
262          }
263    
264          // If we've gotten to this point, then the values must be equal.
265          return true;
266        }
267        else
268        {
269          return normalizeInternal(value1, false, (byte) 0x00).equals(
270                      normalizeInternal(value2, false, (byte) 0x00));
271        }
272      }
273    
274    
275    
276      /**
277       * {@inheritDoc}
278       */
279      @Override()
280      public ASN1OctetString normalize(final ASN1OctetString value)
281      {
282        return normalizeInternal(value, false, (byte) 0x00);
283      }
284    
285    
286    
287      /**
288       * {@inheritDoc}
289       */
290      @Override()
291      public ASN1OctetString normalizeSubstring(final ASN1OctetString value,
292                                                final byte substringType)
293      {
294        return normalizeInternal(value, true, substringType);
295      }
296    
297    
298    
299      /**
300       * Normalizes the provided value for use in either an equality or substring
301       * matching operation.
302       *
303       * @param  value          The value to be normalized.
304       * @param  isSubstring    Indicates whether the value should be normalized as
305       *                        part of a substring assertion rather than an
306       *                        equality assertion.
307       * @param  substringType  The substring type for the element, if it is to be
308       *                        part of a substring assertion.
309       *
310       * @return  The appropriately normalized form of the provided value.
311       */
312      private static ASN1OctetString normalizeInternal(final ASN1OctetString value,
313                                                       final boolean isSubstring,
314                                                       final byte substringType)
315      {
316        final byte[] valueBytes = value.getValue();
317        if (valueBytes.length == 0)
318        {
319          return value;
320        }
321    
322        final boolean trimInitial;
323        final boolean trimFinal;
324        if (isSubstring)
325        {
326          switch (substringType)
327          {
328            case SUBSTRING_TYPE_SUBINITIAL:
329              trimInitial = true;
330              trimFinal   = false;
331              break;
332    
333            case SUBSTRING_TYPE_SUBFINAL:
334              trimInitial = false;
335              trimFinal   = true;
336              break;
337    
338            default:
339              trimInitial = false;
340              trimFinal   = false;
341              break;
342          }
343        }
344        else
345        {
346          trimInitial = true;
347          trimFinal   = true;
348        }
349    
350        // Count the number of duplicate spaces in the value, and determine whether
351        // there are any non-space characters.  Also, see if there are any non-ASCII
352        // characters.
353        boolean containsNonSpace = false;
354        boolean lastWasSpace = trimInitial;
355        int numDuplicates = 0;
356        for (final byte b : valueBytes)
357        {
358          if ((b & 0x7F) != (b & 0xFF))
359          {
360            return normalizeNonASCII(value, trimInitial, trimFinal);
361          }
362    
363          if (b == ' ')
364          {
365            if (lastWasSpace)
366            {
367              numDuplicates++;
368            }
369            else
370            {
371              lastWasSpace = true;
372            }
373          }
374          else
375          {
376            containsNonSpace = true;
377            lastWasSpace = false;
378          }
379        }
380    
381        if (! containsNonSpace)
382        {
383          return new ASN1OctetString(" ");
384        }
385    
386        if (lastWasSpace && trimFinal)
387        {
388          numDuplicates++;
389        }
390    
391    
392        // Create a new byte array to hold the normalized value.
393        lastWasSpace = trimInitial;
394        int targetPos = 0;
395        final byte[] normalizedBytes = new byte[valueBytes.length - numDuplicates];
396        for (int i=0; i < valueBytes.length; i++)
397        {
398          switch (valueBytes[i])
399          {
400            case ' ':
401              if (lastWasSpace || (trimFinal && (i == (valueBytes.length - 1))))
402              {
403                // No action is required.
404              }
405              else
406              {
407                // This condition is needed to handle the special case in which
408                // there are multiple spaces at the end of the value.
409                if (targetPos < normalizedBytes.length)
410                {
411                  normalizedBytes[targetPos++] = ' ';
412                  lastWasSpace = true;
413                }
414              }
415    
416              break;
417            case 'A':
418              normalizedBytes[targetPos++] = 'a';
419              lastWasSpace = false;
420              break;
421            case 'B':
422              normalizedBytes[targetPos++] = 'b';
423              lastWasSpace = false;
424              break;
425            case 'C':
426              normalizedBytes[targetPos++] = 'c';
427              lastWasSpace = false;
428              break;
429            case 'D':
430              normalizedBytes[targetPos++] = 'd';
431              lastWasSpace = false;
432              break;
433            case 'E':
434              normalizedBytes[targetPos++] = 'e';
435              lastWasSpace = false;
436              break;
437            case 'F':
438              normalizedBytes[targetPos++] = 'f';
439              lastWasSpace = false;
440              break;
441            case 'G':
442              normalizedBytes[targetPos++] = 'g';
443              lastWasSpace = false;
444              break;
445            case 'H':
446              normalizedBytes[targetPos++] = 'h';
447              lastWasSpace = false;
448              break;
449            case 'I':
450              normalizedBytes[targetPos++] = 'i';
451              lastWasSpace = false;
452              break;
453            case 'J':
454              normalizedBytes[targetPos++] = 'j';
455              lastWasSpace = false;
456              break;
457            case 'K':
458              normalizedBytes[targetPos++] = 'k';
459              lastWasSpace = false;
460              break;
461            case 'L':
462              normalizedBytes[targetPos++] = 'l';
463              lastWasSpace = false;
464              break;
465            case 'M':
466              normalizedBytes[targetPos++] = 'm';
467              lastWasSpace = false;
468              break;
469            case 'N':
470              normalizedBytes[targetPos++] = 'n';
471              lastWasSpace = false;
472              break;
473            case 'O':
474              normalizedBytes[targetPos++] = 'o';
475              lastWasSpace = false;
476              break;
477            case 'P':
478              normalizedBytes[targetPos++] = 'p';
479              lastWasSpace = false;
480              break;
481            case 'Q':
482              normalizedBytes[targetPos++] = 'q';
483              lastWasSpace = false;
484              break;
485            case 'R':
486              normalizedBytes[targetPos++] = 'r';
487              lastWasSpace = false;
488              break;
489            case 'S':
490              normalizedBytes[targetPos++] = 's';
491              lastWasSpace = false;
492              break;
493            case 'T':
494              normalizedBytes[targetPos++] = 't';
495              lastWasSpace = false;
496              break;
497            case 'U':
498              normalizedBytes[targetPos++] = 'u';
499              lastWasSpace = false;
500              break;
501            case 'V':
502              normalizedBytes[targetPos++] = 'v';
503              lastWasSpace = false;
504              break;
505            case 'W':
506              normalizedBytes[targetPos++] = 'w';
507              lastWasSpace = false;
508              break;
509            case 'X':
510              normalizedBytes[targetPos++] = 'x';
511              lastWasSpace = false;
512              break;
513            case 'Y':
514              normalizedBytes[targetPos++] = 'y';
515              lastWasSpace = false;
516              break;
517            case 'Z':
518              normalizedBytes[targetPos++] = 'z';
519              lastWasSpace = false;
520              break;
521            default:
522              normalizedBytes[targetPos++] = valueBytes[i];
523              lastWasSpace = false;
524              break;
525          }
526        }
527    
528    
529        return new ASN1OctetString(normalizedBytes);
530      }
531    
532    
533    
534      /**
535       * Normalizes the provided value a string representation, properly handling
536       * any non-ASCII characters.
537       *
538       * @param  value        The value to be normalized.
539       * @param  trimInitial  Indicates whether to trim off all leading spaces at
540       *                      the beginning of the value.
541       * @param  trimFinal    Indicates whether to trim off all trailing spaces at
542       *                      the end of the value.
543       *
544       * @return  The normalized form of the value.
545       */
546      private static ASN1OctetString normalizeNonASCII(final ASN1OctetString value,
547                                                       final boolean trimInitial,
548                                                       final boolean trimFinal)
549      {
550        final StringBuilder buffer = new StringBuilder(value.stringValue());
551    
552        int pos = 0;
553        boolean lastWasSpace = trimInitial;
554        while (pos < buffer.length())
555        {
556          final char c = buffer.charAt(pos++);
557          if (c == ' ')
558          {
559            if (lastWasSpace || (trimFinal && (pos >= buffer.length())))
560            {
561              buffer.deleteCharAt(--pos);
562            }
563            else
564            {
565              lastWasSpace = true;
566            }
567          }
568          else
569          {
570            if (Character.isUpperCase(c))
571            {
572              buffer.setCharAt((pos-1), Character.toLowerCase(c));
573            }
574    
575            lastWasSpace = false;
576          }
577        }
578    
579        // It is possible that there could be an extra space at the end.  If that's
580        // the case, then remove it.
581        if (trimFinal && (buffer.length() > 0) &&
582            (buffer.charAt(buffer.length() - 1) == ' '))
583        {
584          buffer.deleteCharAt(buffer.length() - 1);
585        }
586    
587        return new ASN1OctetString(buffer.toString());
588      }
589    }