001/*
002 * Copyright 2007-2024 Ping Identity Corporation
003 * All Rights Reserved.
004 */
005/*
006 * Copyright 2007-2024 Ping Identity Corporation
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *    http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020/*
021 * Copyright (C) 2007-2024 Ping Identity Corporation
022 *
023 * This program is free software; you can redistribute it and/or modify
024 * it under the terms of the GNU General Public License (GPLv2 only)
025 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
026 * as published by the Free Software Foundation.
027 *
028 * This program is distributed in the hope that it will be useful,
029 * but WITHOUT ANY WARRANTY; without even the implied warranty of
030 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
031 * GNU General Public License for more details.
032 *
033 * You should have received a copy of the GNU General Public License
034 * along with this program; if not, see <http://www.gnu.org/licenses>.
035 */
036package com.unboundid.ldap.matchingrules;
037
038
039
040import com.unboundid.asn1.ASN1OctetString;
041import com.unboundid.util.NotNull;
042import com.unboundid.util.StaticUtils;
043import com.unboundid.util.ThreadSafety;
044import com.unboundid.util.ThreadSafetyLevel;
045
046
047
048/**
049 * This class provides an implementation of a matching rule that uses
050 * case-insensitive matching that also treats multiple consecutive (non-escaped)
051 * spaces as a single space.
052 */
053@ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
054public final class CaseIgnoreStringMatchingRule
055       extends AcceptAllSimpleMatchingRule
056{
057  /**
058   * The singleton instance that will be returned from the {@code getInstance}
059   * method.
060   */
061  @NotNull private static final CaseIgnoreStringMatchingRule INSTANCE =
062       new CaseIgnoreStringMatchingRule();
063
064
065
066  /**
067   * The name for the caseIgnoreMatch equality matching rule.
068   */
069  @NotNull public static final String EQUALITY_RULE_NAME = "caseIgnoreMatch";
070
071
072
073  /**
074   * The name for the caseIgnoreMatch equality matching rule, formatted in all
075   * lowercase characters.
076   */
077  @NotNull static final String LOWER_EQUALITY_RULE_NAME =
078       StaticUtils.toLowerCase(EQUALITY_RULE_NAME);
079
080
081
082  /**
083   * The OID for the caseIgnoreMatch equality matching rule.
084   */
085  @NotNull public static final String EQUALITY_RULE_OID = "2.5.13.2";
086
087
088
089  /**
090   * The name for the caseIgnoreOrderingMatch ordering matching rule.
091   */
092  @NotNull public static final String ORDERING_RULE_NAME =
093       "caseIgnoreOrderingMatch";
094
095
096
097  /**
098   * The name for the caseIgnoreOrderingMatch ordering matching rule, formatted
099   * in all lowercase characters.
100   */
101  @NotNull static final String LOWER_ORDERING_RULE_NAME =
102       StaticUtils.toLowerCase(ORDERING_RULE_NAME);
103
104
105
106  /**
107   * The OID for the caseIgnoreOrderingMatch ordering matching rule.
108   */
109  @NotNull public static final String ORDERING_RULE_OID = "2.5.13.3";
110
111
112
113  /**
114   * The name for the caseIgnoreSubstringsMatch substring matching rule.
115   */
116  @NotNull public static final String SUBSTRING_RULE_NAME =
117       "caseIgnoreSubstringsMatch";
118
119
120
121  /**
122   * The name for the caseIgnoreSubstringsMatch substring matching rule,
123   * formatted in all lowercase characters.
124   */
125  @NotNull static final String LOWER_SUBSTRING_RULE_NAME =
126       StaticUtils.toLowerCase(SUBSTRING_RULE_NAME);
127
128
129
130  /**
131   * The OID for the caseIgnoreSubstringsMatch substring matching rule.
132   */
133  @NotNull public static final String SUBSTRING_RULE_OID = "2.5.13.4";
134
135
136
137  /**
138   * The serial version UID for this serializable class.
139   */
140  private static final long serialVersionUID = -1293370922676445525L;
141
142
143
144  /**
145   * Creates a new instance of this case ignore string matching rule.
146   */
147  public CaseIgnoreStringMatchingRule()
148  {
149    // No implementation is required.
150  }
151
152
153
154  /**
155   * Retrieves a singleton instance of this matching rule.
156   *
157   * @return  A singleton instance of this matching rule.
158   */
159  @NotNull()
160  public static CaseIgnoreStringMatchingRule getInstance()
161  {
162    return INSTANCE;
163  }
164
165
166
167  /**
168   * {@inheritDoc}
169   */
170  @Override()
171  @NotNull()
172  public String getEqualityMatchingRuleName()
173  {
174    return EQUALITY_RULE_NAME;
175  }
176
177
178
179  /**
180   * {@inheritDoc}
181   */
182  @Override()
183  @NotNull()
184  public String getEqualityMatchingRuleOID()
185  {
186    return EQUALITY_RULE_OID;
187  }
188
189
190
191  /**
192   * {@inheritDoc}
193   */
194  @Override()
195  @NotNull()
196  public String getOrderingMatchingRuleName()
197  {
198    return ORDERING_RULE_NAME;
199  }
200
201
202
203  /**
204   * {@inheritDoc}
205   */
206  @Override()
207  @NotNull()
208  public String getOrderingMatchingRuleOID()
209  {
210    return ORDERING_RULE_OID;
211  }
212
213
214
215  /**
216   * {@inheritDoc}
217   */
218  @Override()
219  @NotNull()
220  public String getSubstringMatchingRuleName()
221  {
222    return SUBSTRING_RULE_NAME;
223  }
224
225
226
227  /**
228   * {@inheritDoc}
229   */
230  @Override()
231  @NotNull()
232  public String getSubstringMatchingRuleOID()
233  {
234    return SUBSTRING_RULE_OID;
235  }
236
237
238
239  /**
240   * {@inheritDoc}
241   */
242  @Override()
243  public boolean valuesMatch(@NotNull final ASN1OctetString value1,
244                             @NotNull final ASN1OctetString value2)
245  {
246    // Try to use a quick, no-copy determination if possible.  If this fails,
247    // then we'll fall back on a more thorough, but more costly, approach.
248    final byte[] value1Bytes = value1.getValue();
249    final byte[] value2Bytes = value2.getValue();
250    if (value1Bytes.length == value2Bytes.length)
251    {
252      for (int i=0; i< value1Bytes.length; i++)
253      {
254        final byte b1 = value1Bytes[i];
255        final byte b2 = value2Bytes[i];
256
257        if (((b1 & 0x7F) != (b1 & 0xFF)) ||
258            ((b2 & 0x7F) != (b2 & 0xFF)))
259        {
260          return normalize(value1).equals(normalize(value2));
261        }
262        else if (b1 != b2)
263        {
264          if ((b1 == ' ') || (b2 == ' '))
265          {
266            return normalize(value1).equals(normalize(value2));
267          }
268          else if (Character.isUpperCase((char) b1))
269          {
270            final char c = Character.toLowerCase((char) b1);
271            if (c != ((char) b2))
272            {
273              return false;
274            }
275          }
276          else if (Character.isUpperCase((char) b2))
277          {
278            final char c = Character.toLowerCase((char) b2);
279            if (c != ((char) b1))
280            {
281              return false;
282            }
283          }
284          else
285          {
286            return false;
287          }
288        }
289      }
290
291      // If we've gotten to this point, then the values must be equal.
292      return true;
293    }
294    else
295    {
296      return normalizeInternal(value1, false, (byte) 0x00).equals(
297                  normalizeInternal(value2, false, (byte) 0x00));
298    }
299  }
300
301
302
303  /**
304   * {@inheritDoc}
305   */
306  @Override()
307  @NotNull()
308  public ASN1OctetString normalize(@NotNull final ASN1OctetString value)
309  {
310    return normalizeInternal(value, false, (byte) 0x00);
311  }
312
313
314
315  /**
316   * {@inheritDoc}
317   */
318  @Override()
319  @NotNull()
320  public ASN1OctetString normalizeSubstring(
321                              @NotNull final ASN1OctetString value,
322                              final byte substringType)
323  {
324    return normalizeInternal(value, true, substringType);
325  }
326
327
328
329  /**
330   * Normalizes the provided value for use in either an equality or substring
331   * matching operation.
332   *
333   * @param  value          The value to be normalized.
334   * @param  isSubstring    Indicates whether the value should be normalized as
335   *                        part of a substring assertion rather than an
336   *                        equality assertion.
337   * @param  substringType  The substring type for the element, if it is to be
338   *                        part of a substring assertion.
339   *
340   * @return  The appropriately normalized form of the provided value.
341   */
342  @NotNull()
343  private static ASN1OctetString normalizeInternal(
344                                      @NotNull final ASN1OctetString value,
345                                      final boolean isSubstring,
346                                      final byte substringType)
347  {
348    final byte[] valueBytes = value.getValue();
349    if (valueBytes.length == 0)
350    {
351      return value;
352    }
353
354    final boolean trimInitial;
355    final boolean trimFinal;
356    if (isSubstring)
357    {
358      switch (substringType)
359      {
360        case SUBSTRING_TYPE_SUBINITIAL:
361          trimInitial = true;
362          trimFinal   = false;
363          break;
364
365        case SUBSTRING_TYPE_SUBFINAL:
366          trimInitial = false;
367          trimFinal   = true;
368          break;
369
370        default:
371          trimInitial = false;
372          trimFinal   = false;
373          break;
374      }
375    }
376    else
377    {
378      trimInitial = true;
379      trimFinal   = true;
380    }
381
382    // Count the number of duplicate spaces in the value, and determine whether
383    // there are any non-space characters.  Also, see if there are any non-ASCII
384    // characters.
385    boolean containsNonSpace = false;
386    boolean lastWasSpace = trimInitial;
387    int numDuplicates = 0;
388    for (final byte b : valueBytes)
389    {
390      if ((b & 0x7F) != (b & 0xFF))
391      {
392        return normalizeNonASCII(value, trimInitial, trimFinal);
393      }
394
395      if (b == ' ')
396      {
397        if (lastWasSpace)
398        {
399          numDuplicates++;
400        }
401        else
402        {
403          lastWasSpace = true;
404        }
405      }
406      else
407      {
408        containsNonSpace = true;
409        lastWasSpace = false;
410      }
411    }
412
413    if (! containsNonSpace)
414    {
415      return new ASN1OctetString(" ");
416    }
417
418    if (lastWasSpace && trimFinal)
419    {
420      numDuplicates++;
421    }
422
423
424    // Create a new byte array to hold the normalized value.
425    lastWasSpace = trimInitial;
426    int targetPos = 0;
427    final byte[] normalizedBytes = new byte[valueBytes.length - numDuplicates];
428    for (int i=0; i < valueBytes.length; i++)
429    {
430      switch (valueBytes[i])
431      {
432        case ' ':
433          if (lastWasSpace || (trimFinal && (i == (valueBytes.length - 1))))
434          {
435            // No action is required.
436          }
437          else
438          {
439            // This condition is needed to handle the special case in which
440            // there are multiple spaces at the end of the value.
441            if (targetPos < normalizedBytes.length)
442            {
443              normalizedBytes[targetPos++] = ' ';
444              lastWasSpace = true;
445            }
446          }
447
448          break;
449        case 'A':
450          normalizedBytes[targetPos++] = 'a';
451          lastWasSpace = false;
452          break;
453        case 'B':
454          normalizedBytes[targetPos++] = 'b';
455          lastWasSpace = false;
456          break;
457        case 'C':
458          normalizedBytes[targetPos++] = 'c';
459          lastWasSpace = false;
460          break;
461        case 'D':
462          normalizedBytes[targetPos++] = 'd';
463          lastWasSpace = false;
464          break;
465        case 'E':
466          normalizedBytes[targetPos++] = 'e';
467          lastWasSpace = false;
468          break;
469        case 'F':
470          normalizedBytes[targetPos++] = 'f';
471          lastWasSpace = false;
472          break;
473        case 'G':
474          normalizedBytes[targetPos++] = 'g';
475          lastWasSpace = false;
476          break;
477        case 'H':
478          normalizedBytes[targetPos++] = 'h';
479          lastWasSpace = false;
480          break;
481        case 'I':
482          normalizedBytes[targetPos++] = 'i';
483          lastWasSpace = false;
484          break;
485        case 'J':
486          normalizedBytes[targetPos++] = 'j';
487          lastWasSpace = false;
488          break;
489        case 'K':
490          normalizedBytes[targetPos++] = 'k';
491          lastWasSpace = false;
492          break;
493        case 'L':
494          normalizedBytes[targetPos++] = 'l';
495          lastWasSpace = false;
496          break;
497        case 'M':
498          normalizedBytes[targetPos++] = 'm';
499          lastWasSpace = false;
500          break;
501        case 'N':
502          normalizedBytes[targetPos++] = 'n';
503          lastWasSpace = false;
504          break;
505        case 'O':
506          normalizedBytes[targetPos++] = 'o';
507          lastWasSpace = false;
508          break;
509        case 'P':
510          normalizedBytes[targetPos++] = 'p';
511          lastWasSpace = false;
512          break;
513        case 'Q':
514          normalizedBytes[targetPos++] = 'q';
515          lastWasSpace = false;
516          break;
517        case 'R':
518          normalizedBytes[targetPos++] = 'r';
519          lastWasSpace = false;
520          break;
521        case 'S':
522          normalizedBytes[targetPos++] = 's';
523          lastWasSpace = false;
524          break;
525        case 'T':
526          normalizedBytes[targetPos++] = 't';
527          lastWasSpace = false;
528          break;
529        case 'U':
530          normalizedBytes[targetPos++] = 'u';
531          lastWasSpace = false;
532          break;
533        case 'V':
534          normalizedBytes[targetPos++] = 'v';
535          lastWasSpace = false;
536          break;
537        case 'W':
538          normalizedBytes[targetPos++] = 'w';
539          lastWasSpace = false;
540          break;
541        case 'X':
542          normalizedBytes[targetPos++] = 'x';
543          lastWasSpace = false;
544          break;
545        case 'Y':
546          normalizedBytes[targetPos++] = 'y';
547          lastWasSpace = false;
548          break;
549        case 'Z':
550          normalizedBytes[targetPos++] = 'z';
551          lastWasSpace = false;
552          break;
553        default:
554          normalizedBytes[targetPos++] = valueBytes[i];
555          lastWasSpace = false;
556          break;
557      }
558    }
559
560
561    return new ASN1OctetString(normalizedBytes);
562  }
563
564
565
566  /**
567   * Normalizes the provided value a string representation, properly handling
568   * any non-ASCII characters.
569   *
570   * @param  value        The value to be normalized.
571   * @param  trimInitial  Indicates whether to trim off all leading spaces at
572   *                      the beginning of the value.
573   * @param  trimFinal    Indicates whether to trim off all trailing spaces at
574   *                      the end of the value.
575   *
576   * @return  The normalized form of the value.
577   */
578  @NotNull()
579  private static ASN1OctetString normalizeNonASCII(
580                                      @NotNull final ASN1OctetString value,
581                                      final boolean trimInitial,
582                                      final boolean trimFinal)
583  {
584    final StringBuilder buffer = new StringBuilder(value.stringValue());
585
586    int pos = 0;
587    boolean lastWasSpace = trimInitial;
588    while (pos < buffer.length())
589    {
590      final char c = buffer.charAt(pos++);
591      if (c == ' ')
592      {
593        if (lastWasSpace || (trimFinal && (pos >= buffer.length())))
594        {
595          buffer.deleteCharAt(--pos);
596        }
597        else
598        {
599          lastWasSpace = true;
600        }
601      }
602      else
603      {
604        if (Character.isHighSurrogate(c))
605        {
606          if (pos < buffer.length())
607          {
608            final char c2 = buffer.charAt(pos++);
609            if (Character.isLowSurrogate(c2))
610            {
611              final int codePoint = Character.toCodePoint(c, c2);
612              if (Character.isUpperCase(codePoint))
613              {
614                final int lowerCaseCodePoint = Character.toLowerCase(codePoint);
615                buffer.setCharAt((pos-2),
616                     Character.highSurrogate(lowerCaseCodePoint));
617                buffer.setCharAt((pos-1),
618                     Character.lowSurrogate(lowerCaseCodePoint));
619              }
620            }
621          }
622        }
623        else if (Character.isUpperCase(c))
624        {
625          buffer.setCharAt((pos-1), Character.toLowerCase(c));
626        }
627
628        lastWasSpace = false;
629      }
630    }
631
632    // It is possible that there could be an extra space at the end.  If that's
633    // the case, then remove it.
634    if (trimFinal && (buffer.length() > 0) &&
635        (buffer.charAt(buffer.length() - 1) == ' '))
636    {
637      buffer.deleteCharAt(buffer.length() - 1);
638    }
639
640    return new ASN1OctetString(buffer.toString());
641  }
642}