001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3.text; 018 019import java.util.Arrays; 020 021import org.apache.commons.lang3.ArraySorter; 022import org.apache.commons.lang3.StringUtils; 023 024/** 025 * A matcher class that can be queried to determine if a character array 026 * portion matches. 027 * <p> 028 * This class comes complete with various factory methods. 029 * If these do not suffice, you can subclass and implement your own matcher. 030 * 031 * @since 2.2 032 * @deprecated as of 3.6, use commons-text 033 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html"> 034 * StringMatcherFactory</a> instead 035 */ 036@Deprecated 037public abstract class StrMatcher { 038 039 /** 040 * Matches the comma character. 041 */ 042 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 043 /** 044 * Matches the tab character. 045 */ 046 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 047 /** 048 * Matches the space character. 049 */ 050 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 051 /** 052 * Matches the same characters as StringTokenizer, 053 * namely space, tab, newline, formfeed. 054 */ 055 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 056 /** 057 * Matches the String trim() whitespace characters. 058 */ 059 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 060 /** 061 * Matches the double quote character. 062 */ 063 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 064 /** 065 * Matches the double quote character. 066 */ 067 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 068 /** 069 * Matches the single or double quote character. 070 */ 071 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 072 /** 073 * Matches no characters. 074 */ 075 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 076 077 // ----------------------------------------------------------------------- 078 079 /** 080 * Returns a matcher which matches the comma character. 081 * 082 * @return a matcher for a comma 083 */ 084 public static StrMatcher commaMatcher() { 085 return COMMA_MATCHER; 086 } 087 088 /** 089 * Returns a matcher which matches the tab character. 090 * 091 * @return a matcher for a tab 092 */ 093 public static StrMatcher tabMatcher() { 094 return TAB_MATCHER; 095 } 096 097 /** 098 * Returns a matcher which matches the space character. 099 * 100 * @return a matcher for a space 101 */ 102 public static StrMatcher spaceMatcher() { 103 return SPACE_MATCHER; 104 } 105 106 /** 107 * Matches the same characters as StringTokenizer, 108 * namely space, tab, newline and formfeed. 109 * 110 * @return the split matcher 111 */ 112 public static StrMatcher splitMatcher() { 113 return SPLIT_MATCHER; 114 } 115 116 /** 117 * Matches the String trim() whitespace characters. 118 * 119 * @return the trim matcher 120 */ 121 public static StrMatcher trimMatcher() { 122 return TRIM_MATCHER; 123 } 124 125 /** 126 * Returns a matcher which matches the single quote character. 127 * 128 * @return a matcher for a single quote 129 */ 130 public static StrMatcher singleQuoteMatcher() { 131 return SINGLE_QUOTE_MATCHER; 132 } 133 134 /** 135 * Returns a matcher which matches the double quote character. 136 * 137 * @return a matcher for a double quote 138 */ 139 public static StrMatcher doubleQuoteMatcher() { 140 return DOUBLE_QUOTE_MATCHER; 141 } 142 143 /** 144 * Returns a matcher which matches the single or double quote character. 145 * 146 * @return a matcher for a single or double quote 147 */ 148 public static StrMatcher quoteMatcher() { 149 return QUOTE_MATCHER; 150 } 151 152 /** 153 * Matches no characters. 154 * 155 * @return a matcher that matches nothing 156 */ 157 public static StrMatcher noneMatcher() { 158 return NONE_MATCHER; 159 } 160 161 /** 162 * Constructor that creates a matcher from a character. 163 * 164 * @param ch the character to match, must not be null 165 * @return a new Matcher for the given char 166 */ 167 public static StrMatcher charMatcher(final char ch) { 168 return new CharMatcher(ch); 169 } 170 171 /** 172 * Constructor that creates a matcher from a set of characters. 173 * 174 * @param chars the characters to match, null or empty matches nothing 175 * @return a new matcher for the given char[] 176 */ 177 public static StrMatcher charSetMatcher(final char... chars) { 178 if (chars == null || chars.length == 0) { 179 return NONE_MATCHER; 180 } 181 if (chars.length == 1) { 182 return new CharMatcher(chars[0]); 183 } 184 return new CharSetMatcher(chars); 185 } 186 187 /** 188 * Constructor that creates a matcher from a string representing a set of characters. 189 * 190 * @param chars the characters to match, null or empty matches nothing 191 * @return a new Matcher for the given characters 192 */ 193 public static StrMatcher charSetMatcher(final String chars) { 194 if (StringUtils.isEmpty(chars)) { 195 return NONE_MATCHER; 196 } 197 if (chars.length() == 1) { 198 return new CharMatcher(chars.charAt(0)); 199 } 200 return new CharSetMatcher(chars.toCharArray()); 201 } 202 203 /** 204 * Constructor that creates a matcher from a string. 205 * 206 * @param str the string to match, null or empty matches nothing 207 * @return a new Matcher for the given String 208 */ 209 public static StrMatcher stringMatcher(final String str) { 210 if (StringUtils.isEmpty(str)) { 211 return NONE_MATCHER; 212 } 213 return new StringMatcher(str); 214 } 215 216 //----------------------------------------------------------------------- 217 /** 218 * Constructor. 219 */ 220 protected StrMatcher() { 221 } 222 223 /** 224 * Returns the number of matching characters, zero for no match. 225 * <p> 226 * This method is called to check for a match. 227 * The parameter {@code pos} represents the current position to be 228 * checked in the string {@code buffer} (a character array which must 229 * not be changed). 230 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 231 * <p> 232 * The character array may be larger than the active area to be matched. 233 * Only values in the buffer between the specified indices may be accessed. 234 * <p> 235 * The matching code may check one character or many. 236 * It may check characters preceding {@code pos} as well as those 237 * after, so long as no checks exceed the bounds specified. 238 * <p> 239 * It must return zero for no match, or a positive number if a match was found. 240 * The number indicates the number of characters that matched. 241 * 242 * @param buffer the text content to match against, do not change 243 * @param pos the starting position for the match, valid for buffer 244 * @param bufferStart the first active index in the buffer, valid for buffer 245 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 246 * @return the number of matching characters, zero for no match 247 */ 248 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 249 250 /** 251 * Returns the number of matching characters, zero for no match. 252 * <p> 253 * This method is called to check for a match. 254 * The parameter {@code pos} represents the current position to be 255 * checked in the string {@code buffer} (a character array which must 256 * not be changed). 257 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 258 * <p> 259 * The matching code may check one character or many. 260 * It may check characters preceding {@code pos} as well as those after. 261 * <p> 262 * It must return zero for no match, or a positive number if a match was found. 263 * The number indicates the number of characters that matched. 264 * 265 * @param buffer the text content to match against, do not change 266 * @param pos the starting position for the match, valid for buffer 267 * @return the number of matching characters, zero for no match 268 * @since 2.4 269 */ 270 public int isMatch(final char[] buffer, final int pos) { 271 return isMatch(buffer, pos, 0, buffer.length); 272 } 273 274 //----------------------------------------------------------------------- 275 /** 276 * Class used to define a set of characters for matching purposes. 277 */ 278 static final class CharSetMatcher extends StrMatcher { 279 /** The set of characters to match. */ 280 private final char[] chars; 281 282 /** 283 * Constructor that creates a matcher from a character array. 284 * 285 * @param chars the characters to match, must not be null 286 */ 287 CharSetMatcher(final char[] chars) { 288 this.chars = ArraySorter.sort(chars.clone()); 289 } 290 291 /** 292 * Returns whether or not the given character matches. 293 * 294 * @param buffer the text content to match against, do not change 295 * @param pos the starting position for the match, valid for buffer 296 * @param bufferStart the first active index in the buffer, valid for buffer 297 * @param bufferEnd the end index of the active buffer, valid for buffer 298 * @return the number of matching characters, zero for no match 299 */ 300 @Override 301 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 302 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 303 } 304 } 305 306 //----------------------------------------------------------------------- 307 /** 308 * Class used to define a character for matching purposes. 309 */ 310 static final class CharMatcher extends StrMatcher { 311 /** The character to match. */ 312 private final char ch; 313 314 /** 315 * Constructor that creates a matcher that matches a single character. 316 * 317 * @param ch the character to match 318 */ 319 CharMatcher(final char ch) { 320 this.ch = ch; 321 } 322 323 /** 324 * Returns whether or not the given character matches. 325 * 326 * @param buffer the text content to match against, do not change 327 * @param pos the starting position for the match, valid for buffer 328 * @param bufferStart the first active index in the buffer, valid for buffer 329 * @param bufferEnd the end index of the active buffer, valid for buffer 330 * @return the number of matching characters, zero for no match 331 */ 332 @Override 333 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 334 return ch == buffer[pos] ? 1 : 0; 335 } 336 } 337 338 //----------------------------------------------------------------------- 339 /** 340 * Class used to define a set of characters for matching purposes. 341 */ 342 static final class StringMatcher extends StrMatcher { 343 /** The string to match, as a character array. */ 344 private final char[] chars; 345 346 /** 347 * Constructor that creates a matcher from a String. 348 * 349 * @param str the string to match, must not be null 350 */ 351 StringMatcher(final String str) { 352 chars = str.toCharArray(); 353 } 354 355 /** 356 * Returns whether or not the given text matches the stored string. 357 * 358 * @param buffer the text content to match against, do not change 359 * @param pos the starting position for the match, valid for buffer 360 * @param bufferStart the first active index in the buffer, valid for buffer 361 * @param bufferEnd the end index of the active buffer, valid for buffer 362 * @return the number of matching characters, zero for no match 363 */ 364 @Override 365 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 366 final int len = chars.length; 367 if (pos + len > bufferEnd) { 368 return 0; 369 } 370 for (int i = 0; i < chars.length; i++, pos++) { 371 if (chars[i] != buffer[pos]) { 372 return 0; 373 } 374 } 375 return len; 376 } 377 378 @Override 379 public String toString() { 380 return super.toString() + ' ' + Arrays.toString(chars); 381 } 382 383 } 384 385 //----------------------------------------------------------------------- 386 /** 387 * Class used to match no characters. 388 */ 389 static final class NoMatcher extends StrMatcher { 390 391 /** 392 * Constructs a new instance of {@code NoMatcher}. 393 */ 394 NoMatcher() { 395 } 396 397 /** 398 * Always returns {@code false}. 399 * 400 * @param buffer the text content to match against, do not change 401 * @param pos the starting position for the match, valid for buffer 402 * @param bufferStart the first active index in the buffer, valid for buffer 403 * @param bufferEnd the end index of the active buffer, valid for buffer 404 * @return the number of matching characters, zero for no match 405 */ 406 @Override 407 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 408 return 0; 409 } 410 } 411 412 //----------------------------------------------------------------------- 413 /** 414 * Class used to match whitespace as per trim(). 415 */ 416 static final class TrimMatcher extends StrMatcher { 417 418 /** 419 * Constructs a new instance of {@code TrimMatcher}. 420 */ 421 TrimMatcher() { 422 } 423 424 /** 425 * Returns whether or not the given character matches. 426 * 427 * @param buffer the text content to match against, do not change 428 * @param pos the starting position for the match, valid for buffer 429 * @param bufferStart the first active index in the buffer, valid for buffer 430 * @param bufferEnd the end index of the active buffer, valid for buffer 431 * @return the number of matching characters, zero for no match 432 */ 433 @Override 434 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 435 return buffer[pos] <= 32 ? 1 : 0; 436 } 437 } 438 439}