001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.File;
020import java.util.ArrayDeque;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.Deque;
025import java.util.List;
026import java.util.Objects;
027import java.util.regex.Matcher;
028import java.util.regex.Pattern;
029
030/**
031 * General file name and file path manipulation utilities.
032 * <p>
033 * When dealing with file names you can hit problems when moving from a Windows
034 * based development machine to a Unix based production machine.
035 * This class aims to help avoid those problems.
036 * <p>
037 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
038 * using JDK {@link java.io.File File} objects and the two argument constructor
039 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
040 * <p>
041 * Most methods on this class are designed to work the same on both Unix and Windows.
042 * Those that don't include 'System', 'Unix' or 'Windows' in their name.
043 * <p>
044 * Most methods recognize both separators (forward and back), and both
045 * sets of prefixes. See the Javadoc of each method for details.
046 * <p>
047 * This class defines six components within a file name
048 * (example C:\dev\project\file.txt):
049 * <ul>
050 * <li>the prefix - C:\</li>
051 * <li>the path - dev\project\</li>
052 * <li>the full path - C:\dev\project\</li>
053 * <li>the name - file.txt</li>
054 * <li>the base name - file</li>
055 * <li>the extension - txt</li>
056 * </ul>
057 * Note that this class works best if directory file names end with a separator.
058 * If you omit the last separator, it is impossible to determine if the file name
059 * corresponds to a file or a directory. As a result, we have chosen to say
060 * it corresponds to a file.
061 * <p>
062 * This class only supports Unix and Windows style names.
063 * Prefixes are matched as follows:
064 * <pre>
065 * Windows:
066 * a\b\c.txt           --&gt; ""          --&gt; relative
067 * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
068 * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
069 * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
070 * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
071 *
072 * Unix:
073 * a/b/c.txt           --&gt; ""          --&gt; relative
074 * /a/b/c.txt          --&gt; "/"         --&gt; absolute
075 * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
076 * ~                   --&gt; "~/"        --&gt; current user (slash added)
077 * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
078 * ~user               --&gt; "~user/"    --&gt; named user (slash added)
079 * </pre>
080 * Both prefix styles are matched always, irrespective of the machine that you are
081 * currently running on.
082 * <p>
083 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
084 *
085 * @since 1.1
086 */
087public class FilenameUtils {
088
089    private static final String[] EMPTY_STRING_ARRAY = {};
090
091    private static final String EMPTY_STRING = "";
092
093    private static final int NOT_FOUND = -1;
094
095    /**
096     * The extension separator character.
097     * @since 1.4
098     */
099    public static final char EXTENSION_SEPARATOR = '.';
100
101    /**
102     * The extension separator String.
103     * @since 1.4
104     */
105    public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
106
107    /**
108     * The Unix separator character.
109     */
110    private static final char UNIX_SEPARATOR = '/';
111
112    /**
113     * The Windows separator character.
114     */
115    private static final char WINDOWS_SEPARATOR = '\\';
116
117    /**
118     * The system separator character.
119     */
120    private static final char SYSTEM_SEPARATOR = File.separatorChar;
121
122    /**
123     * The separator character that is the opposite of the system separator.
124     */
125    private static final char OTHER_SEPARATOR;
126    static {
127        if (isSystemWindows()) {
128            OTHER_SEPARATOR = UNIX_SEPARATOR;
129        } else {
130            OTHER_SEPARATOR = WINDOWS_SEPARATOR;
131        }
132    }
133
134    /**
135     * Instances should NOT be constructed in standard programming.
136     */
137    public FilenameUtils() {
138    }
139
140    /**
141     * Determines if Windows file system is in use.
142     *
143     * @return true if the system is Windows
144     */
145    static boolean isSystemWindows() {
146        return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
147    }
148
149    /**
150     * Checks if the character is a separator.
151     *
152     * @param ch  the character to check
153     * @return true if it is a separator character
154     */
155    private static boolean isSeparator(final char ch) {
156        return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR;
157    }
158
159    /**
160     * Normalizes a path, removing double and single dot path steps.
161     * <p>
162     * This method normalizes a path to a standard format.
163     * The input may contain separators in either Unix or Windows format.
164     * The output will contain separators in the format of the system.
165     * <p>
166     * A trailing slash will be retained.
167     * A double slash will be merged to a single slash (but UNC names are handled).
168     * A single dot path segment will be removed.
169     * A double dot will cause that path segment and the one before to be removed.
170     * If the double dot has no parent path segment to work with, {@code null}
171     * is returned.
172     * <p>
173     * The output will be the same on both Unix and Windows except
174     * for the separator character.
175     * <pre>
176     * /foo//               --&gt;   /foo/
177     * /foo/./              --&gt;   /foo/
178     * /foo/../bar          --&gt;   /bar
179     * /foo/../bar/         --&gt;   /bar/
180     * /foo/../bar/../baz   --&gt;   /baz
181     * //foo//./bar         --&gt;   /foo/bar
182     * /../                 --&gt;   null
183     * ../foo               --&gt;   null
184     * foo/bar/..           --&gt;   foo/
185     * foo/../../bar        --&gt;   null
186     * foo/../bar           --&gt;   bar
187     * //server/foo/../bar  --&gt;   //server/bar
188     * //server/../bar      --&gt;   null
189     * C:\foo\..\bar        --&gt;   C:\bar
190     * C:\..\bar            --&gt;   null
191     * ~/foo/../bar/        --&gt;   ~/bar/
192     * ~/../bar             --&gt;   null
193     * </pre>
194     * (Note the file separator returned will be correct for Windows/Unix)
195     *
196     * @param fileName  the fileName to normalize, null returns null
197     * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed
198     */
199    public static String normalize(final String fileName) {
200        return doNormalize(fileName, SYSTEM_SEPARATOR, true);
201    }
202    /**
203     * Normalizes a path, removing double and single dot path steps.
204     * <p>
205     * This method normalizes a path to a standard format.
206     * The input may contain separators in either Unix or Windows format.
207     * The output will contain separators in the format specified.
208     * <p>
209     * A trailing slash will be retained.
210     * A double slash will be merged to a single slash (but UNC names are handled).
211     * A single dot path segment will be removed.
212     * A double dot will cause that path segment and the one before to be removed.
213     * If the double dot has no parent path segment to work with, {@code null}
214     * is returned.
215     * <p>
216     * The output will be the same on both Unix and Windows except
217     * for the separator character.
218     * <pre>
219     * /foo//               --&gt;   /foo/
220     * /foo/./              --&gt;   /foo/
221     * /foo/../bar          --&gt;   /bar
222     * /foo/../bar/         --&gt;   /bar/
223     * /foo/../bar/../baz   --&gt;   /baz
224     * //foo//./bar         --&gt;   /foo/bar
225     * /../                 --&gt;   null
226     * ../foo               --&gt;   null
227     * foo/bar/..           --&gt;   foo/
228     * foo/../../bar        --&gt;   null
229     * foo/../bar           --&gt;   bar
230     * //server/foo/../bar  --&gt;   //server/bar
231     * //server/../bar      --&gt;   null
232     * C:\foo\..\bar        --&gt;   C:\bar
233     * C:\..\bar            --&gt;   null
234     * ~/foo/../bar/        --&gt;   ~/bar/
235     * ~/../bar             --&gt;   null
236     * </pre>
237     * The output will be the same on both Unix and Windows including
238     * the separator character.
239     *
240     * @param fileName  the fileName to normalize, null returns null
241     * @param unixSeparator {@code true} if a unix separator should
242     * be used or {@code false} if a windows separator should be used.
243     * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed
244     * @since 2.0
245     */
246    public static String normalize(final String fileName, final boolean unixSeparator) {
247        final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
248        return doNormalize(fileName, separator, true);
249    }
250
251    /**
252     * Normalizes a path, removing double and single dot path steps,
253     * and removing any final directory separator.
254     * <p>
255     * This method normalizes a path to a standard format.
256     * The input may contain separators in either Unix or Windows format.
257     * The output will contain separators in the format of the system.
258     * <p>
259     * A trailing slash will be removed.
260     * A double slash will be merged to a single slash (but UNC names are handled).
261     * A single dot path segment will be removed.
262     * A double dot will cause that path segment and the one before to be removed.
263     * If the double dot has no parent path segment to work with, {@code null}
264     * is returned.
265     * <p>
266     * The output will be the same on both Unix and Windows except
267     * for the separator character.
268     * <pre>
269     * /foo//               --&gt;   /foo
270     * /foo/./              --&gt;   /foo
271     * /foo/../bar          --&gt;   /bar
272     * /foo/../bar/         --&gt;   /bar
273     * /foo/../bar/../baz   --&gt;   /baz
274     * //foo//./bar         --&gt;   /foo/bar
275     * /../                 --&gt;   null
276     * ../foo               --&gt;   null
277     * foo/bar/..           --&gt;   foo
278     * foo/../../bar        --&gt;   null
279     * foo/../bar           --&gt;   bar
280     * //server/foo/../bar  --&gt;   //server/bar
281     * //server/../bar      --&gt;   null
282     * C:\foo\..\bar        --&gt;   C:\bar
283     * C:\..\bar            --&gt;   null
284     * ~/foo/../bar/        --&gt;   ~/bar
285     * ~/../bar             --&gt;   null
286     * </pre>
287     * (Note the file separator returned will be correct for Windows/Unix)
288     *
289     * @param fileName  the fileName to normalize, null returns null
290     * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed
291     */
292    public static String normalizeNoEndSeparator(final String fileName) {
293        return doNormalize(fileName, SYSTEM_SEPARATOR, false);
294    }
295
296    /**
297     * Normalizes a path, removing double and single dot path steps,
298     * and removing any final directory separator.
299     * <p>
300     * This method normalizes a path to a standard format.
301     * The input may contain separators in either Unix or Windows format.
302     * The output will contain separators in the format specified.
303     * <p>
304     * A trailing slash will be removed.
305     * A double slash will be merged to a single slash (but UNC names are handled).
306     * A single dot path segment will be removed.
307     * A double dot will cause that path segment and the one before to be removed.
308     * If the double dot has no parent path segment to work with, {@code null}
309     * is returned.
310     * <p>
311     * The output will be the same on both Unix and Windows including
312     * the separator character.
313     * <pre>
314     * /foo//               --&gt;   /foo
315     * /foo/./              --&gt;   /foo
316     * /foo/../bar          --&gt;   /bar
317     * /foo/../bar/         --&gt;   /bar
318     * /foo/../bar/../baz   --&gt;   /baz
319     * //foo//./bar         --&gt;   /foo/bar
320     * /../                 --&gt;   null
321     * ../foo               --&gt;   null
322     * foo/bar/..           --&gt;   foo
323     * foo/../../bar        --&gt;   null
324     * foo/../bar           --&gt;   bar
325     * //server/foo/../bar  --&gt;   //server/bar
326     * //server/../bar      --&gt;   null
327     * C:\foo\..\bar        --&gt;   C:\bar
328     * C:\..\bar            --&gt;   null
329     * ~/foo/../bar/        --&gt;   ~/bar
330     * ~/../bar             --&gt;   null
331     * </pre>
332     *
333     * @param fileName  the fileName to normalize, null returns null
334     * @param unixSeparator {@code true} if a unix separator should
335     * be used or {@code false} if a windows separator should be used.
336     * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed
337     * @since 2.0
338     */
339    public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
340         final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
341        return doNormalize(fileName, separator, false);
342    }
343
344    /**
345     * Internal method to perform the normalization.
346     *
347     * @param fileName  the fileName
348     * @param separator The separator character to use
349     * @param keepSeparator  true to keep the final separator
350     * @return the normalized fileName. Null bytes inside string will be removed.
351     */
352    private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) {
353        if (fileName == null) {
354            return null;
355        }
356
357        requireNonNullChars(fileName);
358
359        int size = fileName.length();
360        if (size == 0) {
361            return fileName;
362        }
363        final int prefix = getPrefixLength(fileName);
364        if (prefix < 0) {
365            return null;
366        }
367
368        final char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
369        fileName.getChars(0, fileName.length(), array, 0);
370
371        // fix separators throughout
372        final char otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR;
373        for (int i = 0; i < array.length; i++) {
374            if (array[i] == otherSeparator) {
375                array[i] = separator;
376            }
377        }
378
379        // add extra separator on the end to simplify code below
380        boolean lastIsDirectory = true;
381        if (array[size - 1] != separator) {
382            array[size++] = separator;
383            lastIsDirectory = false;
384        }
385
386        // adjoining slashes
387        // If we get here, prefix can only be 0 or greater, size 1 or greater
388        // If prefix is 0, set loop start to 1 to prevent index errors
389        for (int i = (prefix != 0) ? prefix : 1; i < size; i++) {
390            if (array[i] == separator && array[i - 1] == separator) {
391                System.arraycopy(array, i, array, i - 1, size - i);
392                size--;
393                i--;
394            }
395        }
396
397        // dot slash
398        for (int i = prefix + 1; i < size; i++) {
399            if (array[i] == separator && array[i - 1] == '.' &&
400                    (i == prefix + 1 || array[i - 2] == separator)) {
401                if (i == size - 1) {
402                    lastIsDirectory = true;
403                }
404                System.arraycopy(array, i + 1, array, i - 1, size - i);
405                size -=2;
406                i--;
407            }
408        }
409
410        // double dot slash
411        outer:
412        for (int i = prefix + 2; i < size; i++) {
413            if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
414                    (i == prefix + 2 || array[i - 3] == separator)) {
415                if (i == prefix + 2) {
416                    return null;
417                }
418                if (i == size - 1) {
419                    lastIsDirectory = true;
420                }
421                int j;
422                for (j = i - 4 ; j >= prefix; j--) {
423                    if (array[j] == separator) {
424                        // remove b/../ from a/b/../c
425                        System.arraycopy(array, i + 1, array, j + 1, size - i);
426                        size -= i - j;
427                        i = j + 1;
428                        continue outer;
429                    }
430                }
431                // remove a/../ from a/../c
432                System.arraycopy(array, i + 1, array, prefix, size - i);
433                size -= i + 1 - prefix;
434                i = prefix + 1;
435            }
436        }
437
438        if (size <= 0) {  // should never be less than 0
439            return EMPTY_STRING;
440        }
441        if (size <= prefix) {  // should never be less than prefix
442            return new String(array, 0, size);
443        }
444        if (lastIsDirectory && keepSeparator) {
445            return new String(array, 0, size);  // keep trailing separator
446        }
447        return new String(array, 0, size - 1);  // lose trailing separator
448    }
449
450    /**
451     * Concatenates a fileName to a base path using normal command line style rules.
452     * <p>
453     * The effect is equivalent to resultant directory after changing
454     * directory to the first argument, followed by changing directory to
455     * the second argument.
456     * <p>
457     * The first argument is the base path, the second is the path to concatenate.
458     * The returned path is always normalized via {@link #normalize(String)},
459     * thus {@code ..} is handled.
460     * <p>
461     * If {@code pathToAdd} is absolute (has an absolute prefix), then
462     * it will be normalized and returned.
463     * Otherwise, the paths will be joined, normalized and returned.
464     * <p>
465     * The output will be the same on both Unix and Windows except
466     * for the separator character.
467     * <pre>
468     * /foo/      + bar        --&gt;  /foo/bar
469     * /foo       + bar        --&gt;  /foo/bar
470     * /foo       + /bar       --&gt;  /bar
471     * /foo       + C:/bar     --&gt;  C:/bar
472     * /foo       + C:bar      --&gt;  C:bar (*)
473     * /foo/a/    + ../bar     --&gt;  /foo/bar
474     * /foo/      + ../../bar  --&gt;  null
475     * /foo/      + /bar       --&gt;  /bar
476     * /foo/..    + /bar       --&gt;  /bar
477     * /foo       + bar/c.txt  --&gt;  /foo/bar/c.txt
478     * /foo/c.txt + bar        --&gt;  /foo/c.txt/bar (!)
479     * </pre>
480     * (*) Note that the Windows relative drive prefix is unreliable when
481     * used with this method.
482     * (!) Note that the first parameter must be a path. If it ends with a name, then
483     * the name will be built into the concatenated path. If this might be a problem,
484     * use {@link #getFullPath(String)} on the base path argument.
485     *
486     * @param basePath  the base path to attach to, always treated as a path
487     * @param fullFileNameToAdd  the fileName (or path) to attach to the base
488     * @return the concatenated path, or null if invalid.  Null bytes inside string will be removed
489     */
490    public static String concat(final String basePath, final String fullFileNameToAdd) {
491        final int prefix = getPrefixLength(fullFileNameToAdd);
492        if (prefix < 0) {
493            return null;
494        }
495        if (prefix > 0) {
496            return normalize(fullFileNameToAdd);
497        }
498        if (basePath == null) {
499            return null;
500        }
501        final int len = basePath.length();
502        if (len == 0) {
503            return normalize(fullFileNameToAdd);
504        }
505        final char ch = basePath.charAt(len - 1);
506        if (isSeparator(ch)) {
507            return normalize(basePath + fullFileNameToAdd);
508        }
509        return normalize(basePath + '/' + fullFileNameToAdd);
510    }
511
512    /**
513     * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory).
514     * <p>
515     * The files names are expected to be normalized.
516     * </p>
517     *
518     * Edge cases:
519     * <ul>
520     * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
521     * <li>A directory does not contain itself: return false</li>
522     * <li>A null child file is not contained in any parent: return false</li>
523     * </ul>
524     *
525     * @param canonicalParent
526     *            the file to consider as the parent.
527     * @param canonicalChild
528     *            the file to consider as the child.
529     * @return true is the candidate leaf is under by the specified composite. False otherwise.
530     * @since 2.2
531     * @see FileUtils#directoryContains(File, File)
532     */
533    public static boolean directoryContains(final String canonicalParent, final String canonicalChild) {
534        Objects.requireNonNull(canonicalParent, "canonicalParent");
535
536        if (canonicalChild == null) {
537            return false;
538        }
539
540        if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
541            return false;
542        }
543
544        return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent);
545    }
546
547    /**
548     * Converts all separators to the Unix separator of forward slash.
549     *
550     * @param path  the path to be changed, null ignored
551     * @return the updated path
552     */
553    public static String separatorsToUnix(final String path) {
554        if (path == null || path.indexOf(WINDOWS_SEPARATOR) == NOT_FOUND) {
555            return path;
556        }
557        return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
558    }
559
560    /**
561     * Converts all separators to the Windows separator of backslash.
562     *
563     * @param path  the path to be changed, null ignored
564     * @return the updated path
565     */
566    public static String separatorsToWindows(final String path) {
567        if (path == null || path.indexOf(UNIX_SEPARATOR) == NOT_FOUND) {
568            return path;
569        }
570        return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
571    }
572
573    /**
574     * Converts all separators to the system separator.
575     *
576     * @param path  the path to be changed, null ignored
577     * @return the updated path
578     */
579    public static String separatorsToSystem(final String path) {
580        if (path == null) {
581            return null;
582        }
583        return isSystemWindows() ? separatorsToWindows(path) : separatorsToUnix(path);
584    }
585
586    /**
587     * Returns the length of the fileName prefix, such as {@code C:/} or {@code ~/}.
588     * <p>
589     * This method will handle a file in either Unix or Windows format.
590     * <p>
591     * The prefix length includes the first slash in the full fileName
592     * if applicable. Thus, it is possible that the length returned is greater
593     * than the length of the input string.
594     * <pre>
595     * Windows:
596     * a\b\c.txt           --&gt; 0           --&gt; relative
597     * \a\b\c.txt          --&gt; 1           --&gt; current drive absolute
598     * C:a\b\c.txt         --&gt; 2           --&gt; drive relative
599     * C:\a\b\c.txt        --&gt; 3           --&gt; absolute
600     * \\server\a\b\c.txt  --&gt; 9           --&gt; UNC
601     * \\\a\b\c.txt        --&gt; -1          --&gt; error
602     *
603     * Unix:
604     * a/b/c.txt           --&gt; 0           --&gt; relative
605     * /a/b/c.txt          --&gt; 1           --&gt; absolute
606     * ~/a/b/c.txt         --&gt; 2           --&gt; current user
607     * ~                   --&gt; 2           --&gt; current user (slash added)
608     * ~user/a/b/c.txt     --&gt; 6           --&gt; named user
609     * ~user               --&gt; 6           --&gt; named user (slash added)
610     * //server/a/b/c.txt  --&gt; 9
611     * ///a/b/c.txt        --&gt; -1          --&gt; error
612     * C:                  --&gt; 0           --&gt; valid filename as only null byte and / are reserved characters
613     * </pre>
614     * <p>
615     * The output will be the same irrespective of the machine that the code is running on.
616     * ie. both Unix and Windows prefixes are matched regardless.
617     *
618     * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
619     * These must be followed by a server name, so double-slashes are not collapsed
620     * to a single slash at the start of the fileName.
621     *
622     * @param fileName  the fileName to find the prefix in, null returns -1
623     * @return the length of the prefix, -1 if invalid or null
624     */
625    public static int getPrefixLength(final String fileName) {
626        if (fileName == null) {
627            return NOT_FOUND;
628        }
629        final int len = fileName.length();
630        if (len == 0) {
631            return 0;
632        }
633        char ch0 = fileName.charAt(0);
634        if (ch0 == ':') {
635            return NOT_FOUND;
636        }
637        if (len == 1) {
638            if (ch0 == '~') {
639                return 2;  // return a length greater than the input
640            }
641            return isSeparator(ch0) ? 1 : 0;
642        }
643        if (ch0 == '~') {
644            int posUnix = fileName.indexOf(UNIX_SEPARATOR, 1);
645            int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 1);
646            if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
647                return len + 1;  // return a length greater than the input
648            }
649            posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
650            posWin = posWin == NOT_FOUND ? posUnix : posWin;
651            return Math.min(posUnix, posWin) + 1;
652        }
653        final char ch1 = fileName.charAt(1);
654        if (ch1 == ':') {
655            ch0 = Character.toUpperCase(ch0);
656            if (ch0 >= 'A' && ch0 <= 'Z') {
657                if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) {
658                    return 0;
659                }
660                if (len == 2 || !isSeparator(fileName.charAt(2))) {
661                    return 2;
662                }
663                return 3;
664            }
665            if (ch0 == UNIX_SEPARATOR) {
666                return 1;
667            }
668            return NOT_FOUND;
669
670        }
671        if (!isSeparator(ch0) || !isSeparator(ch1)) {
672            return isSeparator(ch0) ? 1 : 0;
673        }
674        int posUnix = fileName.indexOf(UNIX_SEPARATOR, 2);
675        int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 2);
676        if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
677            return NOT_FOUND;
678        }
679        posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
680        posWin = posWin == NOT_FOUND ? posUnix : posWin;
681        final int pos = Math.min(posUnix, posWin) + 1;
682        final String hostnamePart = fileName.substring(2, pos - 1);
683        return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
684    }
685
686    /**
687     * Returns the index of the last directory separator character.
688     * <p>
689     * This method will handle a file in either Unix or Windows format.
690     * The position of the last forward or backslash is returned.
691     * <p>
692     * The output will be the same irrespective of the machine that the code is running on.
693     *
694     * @param fileName  the fileName to find the last path separator in, null returns -1
695     * @return the index of the last separator character, or -1 if there
696     * is no such character
697     */
698    public static int indexOfLastSeparator(final String fileName) {
699        if (fileName == null) {
700            return NOT_FOUND;
701        }
702        final int lastUnixPos = fileName.lastIndexOf(UNIX_SEPARATOR);
703        final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_SEPARATOR);
704        return Math.max(lastUnixPos, lastWindowsPos);
705    }
706
707    /**
708     * Returns the index of the last extension separator character, which is a dot.
709     * <p>
710     * This method also checks that there is no directory separator after the last dot. To do this it uses
711     * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format.
712     * </p>
713     * <p>
714     * The output will be the same irrespective of the machine that the code is running on, with the
715     * exception of a possible {@link IllegalArgumentException} on Windows (see below).
716     * </p>
717     * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt".
718     * In this case, the name wouldn't be the name of a file, but the identifier of an
719     * alternate data stream (bar.txt) on the file foo.exe. The method used to return
720     * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
721     * an {@link IllegalArgumentException} for names like this.
722     *
723     * @param fileName
724     *            the fileName to find the last extension separator in, null returns -1
725     * @return the index of the last extension separator character, or -1 if there is no such character
726     * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact,
727     * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
728     */
729    public static int indexOfExtension(final String fileName) throws IllegalArgumentException {
730        if (fileName == null) {
731            return NOT_FOUND;
732        }
733        if (isSystemWindows()) {
734            // Special handling for NTFS ADS: Don't accept colon in the fileName.
735            final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
736            if (offset != -1) {
737                throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
738            }
739        }
740        final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
741        final int lastSeparator = indexOfLastSeparator(fileName);
742        return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
743    }
744
745    /**
746     * Gets the prefix from a full fileName, such as {@code C:/}
747     * or {@code ~/}.
748     * <p>
749     * This method will handle a file in either Unix or Windows format.
750     * The prefix includes the first slash in the full fileName where applicable.
751     * <pre>
752     * Windows:
753     * a\b\c.txt           --&gt; ""          --&gt; relative
754     * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
755     * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
756     * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
757     * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
758     *
759     * Unix:
760     * a/b/c.txt           --&gt; ""          --&gt; relative
761     * /a/b/c.txt          --&gt; "/"         --&gt; absolute
762     * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
763     * ~                   --&gt; "~/"        --&gt; current user (slash added)
764     * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
765     * ~user               --&gt; "~user/"    --&gt; named user (slash added)
766     * </pre>
767     * <p>
768     * The output will be the same irrespective of the machine that the code is running on.
769     * ie. both Unix and Windows prefixes are matched regardless.
770     *
771     * @param fileName  the fileName to query, null returns null
772     * @return the prefix of the file, null if invalid. Null bytes inside string will be removed
773     */
774    public static String getPrefix(final String fileName) {
775        if (fileName == null) {
776            return null;
777        }
778        final int len = getPrefixLength(fileName);
779        if (len < 0) {
780            return null;
781        }
782        if (len > fileName.length()) {
783            requireNonNullChars(fileName + UNIX_SEPARATOR);
784            return fileName + UNIX_SEPARATOR;
785        }
786        final String path = fileName.substring(0, len);
787        requireNonNullChars(path);
788        return path;
789    }
790
791    /**
792     * Gets the path from a full fileName, which excludes the prefix.
793     * <p>
794     * This method will handle a file in either Unix or Windows format.
795     * The method is entirely text based, and returns the text before and
796     * including the last forward or backslash.
797     * <pre>
798     * C:\a\b\c.txt --&gt; a\b\
799     * ~/a/b/c.txt  --&gt; a/b/
800     * a.txt        --&gt; ""
801     * a/b/c        --&gt; a/b/
802     * a/b/c/       --&gt; a/b/c/
803     * </pre>
804     * <p>
805     * The output will be the same irrespective of the machine that the code is running on.
806     * <p>
807     * This method drops the prefix from the result.
808     * See {@link #getFullPath(String)} for the method that retains the prefix.
809     *
810     * @param fileName  the fileName to query, null returns null
811     * @return the path of the file, an empty string if none exists, null if invalid.
812     * Null bytes inside string will be removed
813     */
814    public static String getPath(final String fileName) {
815        return doGetPath(fileName, 1);
816    }
817
818    /**
819     * Gets the path from a full fileName, which excludes the prefix, and
820     * also excluding the final directory separator.
821     * <p>
822     * This method will handle a file in either Unix or Windows format.
823     * The method is entirely text based, and returns the text before the
824     * last forward or backslash.
825     * <pre>
826     * C:\a\b\c.txt --&gt; a\b
827     * ~/a/b/c.txt  --&gt; a/b
828     * a.txt        --&gt; ""
829     * a/b/c        --&gt; a/b
830     * a/b/c/       --&gt; a/b/c
831     * </pre>
832     * <p>
833     * The output will be the same irrespective of the machine that the code is running on.
834     * <p>
835     * This method drops the prefix from the result.
836     * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
837     *
838     * @param fileName  the fileName to query, null returns null
839     * @return the path of the file, an empty string if none exists, null if invalid.
840     * Null bytes inside string will be removed
841     */
842    public static String getPathNoEndSeparator(final String fileName) {
843        return doGetPath(fileName, 0);
844    }
845
846    /**
847     * Does the work of getting the path.
848     *
849     * @param fileName  the fileName
850     * @param separatorAdd  0 to omit the end separator, 1 to return it
851     * @return the path. Null bytes inside string will be removed
852     */
853    private static String doGetPath(final String fileName, final int separatorAdd) {
854        if (fileName == null) {
855            return null;
856        }
857        final int prefix = getPrefixLength(fileName);
858        if (prefix < 0) {
859            return null;
860        }
861        final int index = indexOfLastSeparator(fileName);
862        final int endIndex = index+separatorAdd;
863        if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) {
864            return EMPTY_STRING;
865        }
866        final String path = fileName.substring(prefix, endIndex);
867        requireNonNullChars(path);
868        return path;
869    }
870
871    /**
872     * Gets the full path from a full fileName, which is the prefix + path.
873     * <p>
874     * This method will handle a file in either Unix or Windows format.
875     * The method is entirely text based, and returns the text before and
876     * including the last forward or backslash.
877     * <pre>
878     * C:\a\b\c.txt --&gt; C:\a\b\
879     * ~/a/b/c.txt  --&gt; ~/a/b/
880     * a.txt        --&gt; ""
881     * a/b/c        --&gt; a/b/
882     * a/b/c/       --&gt; a/b/c/
883     * C:           --&gt; C:
884     * C:\          --&gt; C:\
885     * ~            --&gt; ~/
886     * ~/           --&gt; ~/
887     * ~user        --&gt; ~user/
888     * ~user/       --&gt; ~user/
889     * </pre>
890     * <p>
891     * The output will be the same irrespective of the machine that the code is running on.
892     *
893     * @param fileName  the fileName to query, null returns null
894     * @return the path of the file, an empty string if none exists, null if invalid
895     */
896    public static String getFullPath(final String fileName) {
897        return doGetFullPath(fileName, true);
898    }
899
900    /**
901     * Gets the full path from a full fileName, which is the prefix + path,
902     * and also excluding the final directory separator.
903     * <p>
904     * This method will handle a file in either Unix or Windows format.
905     * The method is entirely text based, and returns the text before the
906     * last forward or backslash.
907     * <pre>
908     * C:\a\b\c.txt --&gt; C:\a\b
909     * ~/a/b/c.txt  --&gt; ~/a/b
910     * a.txt        --&gt; ""
911     * a/b/c        --&gt; a/b
912     * a/b/c/       --&gt; a/b/c
913     * C:           --&gt; C:
914     * C:\          --&gt; C:\
915     * ~            --&gt; ~
916     * ~/           --&gt; ~
917     * ~user        --&gt; ~user
918     * ~user/       --&gt; ~user
919     * </pre>
920     * <p>
921     * The output will be the same irrespective of the machine that the code is running on.
922     *
923     * @param fileName  the fileName to query, null returns null
924     * @return the path of the file, an empty string if none exists, null if invalid
925     */
926    public static String getFullPathNoEndSeparator(final String fileName) {
927        return doGetFullPath(fileName, false);
928    }
929
930    /**
931     * Does the work of getting the path.
932     *
933     * @param fileName  the fileName
934     * @param includeSeparator  true to include the end separator
935     * @return the path
936     */
937    private static String doGetFullPath(final String fileName, final boolean includeSeparator) {
938        if (fileName == null) {
939            return null;
940        }
941        final int prefix = getPrefixLength(fileName);
942        if (prefix < 0) {
943            return null;
944        }
945        if (prefix >= fileName.length()) {
946            if (includeSeparator) {
947                return getPrefix(fileName);  // add end slash if necessary
948            }
949            return fileName;
950        }
951        final int index = indexOfLastSeparator(fileName);
952        if (index < 0) {
953            return fileName.substring(0, prefix);
954        }
955        int end = index + (includeSeparator ?  1 : 0);
956        if (end == 0) {
957            end++;
958        }
959        return fileName.substring(0, end);
960    }
961
962    /**
963     * Gets the name minus the path from a full fileName.
964     * <p>
965     * This method will handle a file in either Unix or Windows format.
966     * The text after the last forward or backslash is returned.
967     * <pre>
968     * a/b/c.txt --&gt; c.txt
969     * a.txt     --&gt; a.txt
970     * a/b/c     --&gt; c
971     * a/b/c/    --&gt; ""
972     * </pre>
973     * <p>
974     * The output will be the same irrespective of the machine that the code is running on.
975     *
976     * @param fileName  the fileName to query, null returns null
977     * @return the name of the file without the path, or an empty string if none exists.
978     * Null bytes inside string will be removed
979     */
980    public static String getName(final String fileName) {
981        if (fileName == null) {
982            return null;
983        }
984        requireNonNullChars(fileName);
985        final int index = indexOfLastSeparator(fileName);
986        return fileName.substring(index + 1);
987    }
988
989    /**
990     * Checks the input for null bytes, a sign of unsanitized data being passed to to file level functions.
991     *
992     * This may be used for poison byte attacks.
993     *
994     * @param path the path to check
995     */
996    private static void requireNonNullChars(final String path) {
997        if (path.indexOf(0) >= 0) {
998            throw new IllegalArgumentException("Null byte present in file/path name. There are no "
999                + "known legitimate use cases for such data, but several injection attacks may use it");
1000        }
1001    }
1002
1003    /**
1004     * Gets the base name, minus the full path and extension, from a full fileName.
1005     * <p>
1006     * This method will handle a file in either Unix or Windows format.
1007     * The text after the last forward or backslash and before the last dot is returned.
1008     * <pre>
1009     * a/b/c.txt --&gt; c
1010     * a.txt     --&gt; a
1011     * a/b/c     --&gt; c
1012     * a/b/c/    --&gt; ""
1013     * </pre>
1014     * <p>
1015     * The output will be the same irrespective of the machine that the code is running on.
1016     *
1017     * @param fileName  the fileName to query, null returns null
1018     * @return the name of the file without the path, or an empty string if none exists. Null bytes inside string
1019     * will be removed
1020     */
1021    public static String getBaseName(final String fileName) {
1022        return removeExtension(getName(fileName));
1023    }
1024
1025    /**
1026     * Gets the extension of a fileName.
1027     * <p>
1028     * This method returns the textual part of the fileName after the last dot.
1029     * There must be no directory separator after the dot.
1030     * <pre>
1031     * foo.txt      --&gt; "txt"
1032     * a/b/c.jpg    --&gt; "jpg"
1033     * a/b.txt/c    --&gt; ""
1034     * a/b/c        --&gt; ""
1035     * </pre>
1036     * <p>
1037     * The output will be the same irrespective of the machine that the code is running on, with the
1038     * exception of a possible {@link IllegalArgumentException} on Windows (see below).
1039     * </p>
1040     * <p>
1041     * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt".
1042     * In this case, the name wouldn't be the name of a file, but the identifier of an
1043     * alternate data stream (bar.txt) on the file foo.exe. The method used to return
1044     * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
1045     * an {@link IllegalArgumentException} for names like this.
1046     *
1047     * @param fileName the fileName to retrieve the extension of.
1048     * @return the extension of the file or an empty string if none exists or {@code null}
1049     * if the fileName is {@code null}.
1050     * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact,
1051     * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
1052     */
1053    public static String getExtension(final String fileName) throws IllegalArgumentException {
1054        if (fileName == null) {
1055            return null;
1056        }
1057        final int index = indexOfExtension(fileName);
1058        if (index == NOT_FOUND) {
1059            return EMPTY_STRING;
1060        }
1061        return fileName.substring(index + 1);
1062    }
1063
1064    /**
1065     * Special handling for NTFS ADS: Don't accept colon in the fileName.
1066     *
1067     * @param fileName a file name
1068     * @return ADS offsets.
1069     */
1070    private static int getAdsCriticalOffset(final String fileName) {
1071        // Step 1: Remove leading path segments.
1072        final int offset1 = fileName.lastIndexOf(SYSTEM_SEPARATOR);
1073        final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR);
1074        if (offset1 == -1) {
1075            if (offset2 == -1) {
1076                return 0;
1077            }
1078            return offset2 + 1;
1079        }
1080        if (offset2 == -1) {
1081            return offset1 + 1;
1082        }
1083        return Math.max(offset1, offset2) + 1;
1084    }
1085
1086    /**
1087     * Removes the extension from a fileName.
1088     * <p>
1089     * This method returns the textual part of the fileName before the last dot.
1090     * There must be no directory separator after the dot.
1091     * <pre>
1092     * foo.txt    --&gt; foo
1093     * a\b\c.jpg  --&gt; a\b\c
1094     * a\b\c      --&gt; a\b\c
1095     * a.b\c      --&gt; a.b\c
1096     * </pre>
1097     * <p>
1098     * The output will be the same irrespective of the machine that the code is running on.
1099     *
1100     * @param fileName  the fileName to query, null returns null
1101     * @return the fileName minus the extension
1102     */
1103    public static String removeExtension(final String fileName) {
1104        if (fileName == null) {
1105            return null;
1106        }
1107        requireNonNullChars(fileName);
1108
1109        final int index = indexOfExtension(fileName);
1110        if (index == NOT_FOUND) {
1111            return fileName;
1112        }
1113        return fileName.substring(0, index);
1114    }
1115
1116    /**
1117     * Checks whether two fileNames are equal exactly.
1118     * <p>
1119     * No processing is performed on the fileNames other than comparison,
1120     * thus this is merely a null-safe case-sensitive equals.
1121     *
1122     * @param fileName1  the first fileName to query, may be null
1123     * @param fileName2  the second fileName to query, may be null
1124     * @return true if the fileNames are equal, null equals null
1125     * @see IOCase#SENSITIVE
1126     */
1127    public static boolean equals(final String fileName1, final String fileName2) {
1128        return equals(fileName1, fileName2, false, IOCase.SENSITIVE);
1129    }
1130
1131    /**
1132     * Checks whether two fileNames are equal using the case rules of the system.
1133     * <p>
1134     * No processing is performed on the fileNames other than comparison.
1135     * The check is case-sensitive on Unix and case-insensitive on Windows.
1136     *
1137     * @param fileName1  the first fileName to query, may be null
1138     * @param fileName2  the second fileName to query, may be null
1139     * @return true if the fileNames are equal, null equals null
1140     * @see IOCase#SYSTEM
1141     */
1142    public static boolean equalsOnSystem(final String fileName1, final String fileName2) {
1143        return equals(fileName1, fileName2, false, IOCase.SYSTEM);
1144    }
1145
1146    /**
1147     * Checks whether two fileNames are equal after both have been normalized.
1148     * <p>
1149     * Both fileNames are first passed to {@link #normalize(String)}.
1150     * The check is then performed in a case-sensitive manner.
1151     *
1152     * @param fileName1  the first fileName to query, may be null
1153     * @param fileName2  the second fileName to query, may be null
1154     * @return true if the fileNames are equal, null equals null
1155     * @see IOCase#SENSITIVE
1156     */
1157    public static boolean equalsNormalized(final String fileName1, final String fileName2) {
1158        return equals(fileName1, fileName2, true, IOCase.SENSITIVE);
1159    }
1160
1161    /**
1162     * Checks whether two fileNames are equal after both have been normalized
1163     * and using the case rules of the system.
1164     * <p>
1165     * Both fileNames are first passed to {@link #normalize(String)}.
1166     * The check is then performed case-sensitive on Unix and
1167     * case-insensitive on Windows.
1168     *
1169     * @param fileName1  the first fileName to query, may be null
1170     * @param fileName2  the second fileName to query, may be null
1171     * @return true if the fileNames are equal, null equals null
1172     * @see IOCase#SYSTEM
1173     */
1174    public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) {
1175        return equals(fileName1, fileName2, true, IOCase.SYSTEM);
1176    }
1177
1178    /**
1179     * Checks whether two fileNames are equal, optionally normalizing and providing
1180     * control over the case-sensitivity.
1181     *
1182     * @param fileName1  the first fileName to query, may be null
1183     * @param fileName2  the second fileName to query, may be null
1184     * @param normalized  whether to normalize the fileNames
1185     * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1186     * @return true if the fileNames are equal, null equals null
1187     * @since 1.3
1188     */
1189    public static boolean equals(
1190            String fileName1, String fileName2,
1191            final boolean normalized, IOCase caseSensitivity) {
1192
1193        if (fileName1 == null || fileName2 == null) {
1194            return fileName1 == null && fileName2 == null;
1195        }
1196        if (normalized) {
1197            fileName1 = normalize(fileName1);
1198            if (fileName1 == null) {
1199                return false;
1200            }
1201            fileName2 = normalize(fileName2);
1202            if (fileName2 == null) {
1203                return false;
1204            }
1205        }
1206        if (caseSensitivity == null) {
1207            caseSensitivity = IOCase.SENSITIVE;
1208        }
1209        return caseSensitivity.checkEquals(fileName1, fileName2);
1210    }
1211
1212    /**
1213     * Checks whether the extension of the fileName is that specified.
1214     * <p>
1215     * This method obtains the extension as the textual part of the fileName
1216     * after the last dot. There must be no directory separator after the dot.
1217     * The extension check is case-sensitive on all platforms.
1218     *
1219     * @param fileName  the fileName to query, null returns false
1220     * @param extension  the extension to check for, null or empty checks for no extension
1221     * @return true if the fileName has the specified extension
1222     * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes
1223     */
1224    public static boolean isExtension(final String fileName, final String extension) {
1225        if (fileName == null) {
1226            return false;
1227        }
1228        requireNonNullChars(fileName);
1229
1230        if (extension == null || extension.isEmpty()) {
1231            return indexOfExtension(fileName) == NOT_FOUND;
1232        }
1233        final String fileExt = getExtension(fileName);
1234        return fileExt.equals(extension);
1235    }
1236
1237    /**
1238     * Checks whether the extension of the fileName is one of those specified.
1239     * <p>
1240     * This method obtains the extension as the textual part of the fileName
1241     * after the last dot. There must be no directory separator after the dot.
1242     * The extension check is case-sensitive on all platforms.
1243     *
1244     * @param fileName  the fileName to query, null returns false
1245     * @param extensions  the extensions to check for, null checks for no extension
1246     * @return true if the fileName is one of the extensions
1247     * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes
1248     */
1249    public static boolean isExtension(final String fileName, final String... extensions) {
1250        if (fileName == null) {
1251            return false;
1252        }
1253        requireNonNullChars(fileName);
1254
1255        if (extensions == null || extensions.length == 0) {
1256            return indexOfExtension(fileName) == NOT_FOUND;
1257        }
1258        final String fileExt = getExtension(fileName);
1259        for (final String extension : extensions) {
1260            if (fileExt.equals(extension)) {
1261                return true;
1262            }
1263        }
1264        return false;
1265    }
1266
1267    /**
1268     * Checks whether the extension of the fileName is one of those specified.
1269     * <p>
1270     * This method obtains the extension as the textual part of the fileName
1271     * after the last dot. There must be no directory separator after the dot.
1272     * The extension check is case-sensitive on all platforms.
1273     *
1274     * @param fileName  the fileName to query, null returns false
1275     * @param extensions  the extensions to check for, null checks for no extension
1276     * @return true if the fileName is one of the extensions
1277     * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes
1278     */
1279    public static boolean isExtension(final String fileName, final Collection<String> extensions) {
1280        if (fileName == null) {
1281            return false;
1282        }
1283        requireNonNullChars(fileName);
1284
1285        if (extensions == null || extensions.isEmpty()) {
1286            return indexOfExtension(fileName) == NOT_FOUND;
1287        }
1288        final String fileExt = getExtension(fileName);
1289        for (final String extension : extensions) {
1290            if (fileExt.equals(extension)) {
1291                return true;
1292            }
1293        }
1294        return false;
1295    }
1296
1297    /**
1298     * Checks a fileName to see if it matches the specified wildcard matcher,
1299     * always testing case-sensitive.
1300     * <p>
1301     * The wildcard matcher uses the characters '?' and '*' to represent a
1302     * single or multiple (zero or more) wildcard characters.
1303     * This is the same as often found on Dos/Unix command lines.
1304     * The check is case-sensitive always.
1305     * <pre>
1306     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1307     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1308     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1309     * wildcardMatch("c.txt", "*.???")      --&gt; true
1310     * wildcardMatch("c.txt", "*.????")     --&gt; false
1311     * </pre>
1312     * N.B. the sequence "*?" does not work properly at present in match strings.
1313     *
1314     * @param fileName  the fileName to match on
1315     * @param wildcardMatcher  the wildcard string to match against
1316     * @return true if the fileName matches the wildcard string
1317     * @see IOCase#SENSITIVE
1318     */
1319    public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
1320        return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
1321    }
1322
1323    /**
1324     * Checks a fileName to see if it matches the specified wildcard matcher
1325     * using the case rules of the system.
1326     * <p>
1327     * The wildcard matcher uses the characters '?' and '*' to represent a
1328     * single or multiple (zero or more) wildcard characters.
1329     * This is the same as often found on Dos/Unix command lines.
1330     * The check is case-sensitive on Unix and case-insensitive on Windows.
1331     * <pre>
1332     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1333     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1334     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1335     * wildcardMatch("c.txt", "*.???")      --&gt; true
1336     * wildcardMatch("c.txt", "*.????")     --&gt; false
1337     * </pre>
1338     * N.B. the sequence "*?" does not work properly at present in match strings.
1339     *
1340     * @param fileName  the fileName to match on
1341     * @param wildcardMatcher  the wildcard string to match against
1342     * @return true if the fileName matches the wildcard string
1343     * @see IOCase#SYSTEM
1344     */
1345    public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
1346        return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
1347    }
1348
1349    /**
1350     * Checks a fileName to see if it matches the specified wildcard matcher
1351     * allowing control over case-sensitivity.
1352     * <p>
1353     * The wildcard matcher uses the characters '?' and '*' to represent a
1354     * single or multiple (zero or more) wildcard characters.
1355     * N.B. the sequence "*?" does not work properly at present in match strings.
1356     *
1357     * @param fileName  the fileName to match on
1358     * @param wildcardMatcher  the wildcard string to match against
1359     * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1360     * @return true if the fileName matches the wildcard string
1361     * @since 1.3
1362     */
1363    public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase caseSensitivity) {
1364        if (fileName == null && wildcardMatcher == null) {
1365            return true;
1366        }
1367        if (fileName == null || wildcardMatcher == null) {
1368            return false;
1369        }
1370        if (caseSensitivity == null) {
1371            caseSensitivity = IOCase.SENSITIVE;
1372        }
1373        final String[] wcs = splitOnTokens(wildcardMatcher);
1374        boolean anyChars = false;
1375        int textIdx = 0;
1376        int wcsIdx = 0;
1377        final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);
1378
1379        // loop around a backtrack stack, to handle complex * matching
1380        do {
1381            if (!backtrack.isEmpty()) {
1382                final int[] array = backtrack.pop();
1383                wcsIdx = array[0];
1384                textIdx = array[1];
1385                anyChars = true;
1386            }
1387
1388            // loop whilst tokens and text left to process
1389            while (wcsIdx < wcs.length) {
1390
1391                if (wcs[wcsIdx].equals("?")) {
1392                    // ? so move to next text char
1393                    textIdx++;
1394                    if (textIdx > fileName.length()) {
1395                        break;
1396                    }
1397                    anyChars = false;
1398
1399                } else if (wcs[wcsIdx].equals("*")) {
1400                    // set any chars status
1401                    anyChars = true;
1402                    if (wcsIdx == wcs.length - 1) {
1403                        textIdx = fileName.length();
1404                    }
1405
1406                } else {
1407                    // matching text token
1408                    if (anyChars) {
1409                        // any chars then try to locate text token
1410                        textIdx = caseSensitivity.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
1411                        if (textIdx == NOT_FOUND) {
1412                            // token not found
1413                            break;
1414                        }
1415                        final int repeat = caseSensitivity.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
1416                        if (repeat >= 0) {
1417                            backtrack.push(new int[] {wcsIdx, repeat});
1418                        }
1419                    } else if (!caseSensitivity.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
1420                        // matching from current position
1421                        // couldn't match token
1422                        break;
1423                    }
1424
1425                    // matched text token, move text index to end of matched token
1426                    textIdx += wcs[wcsIdx].length();
1427                    anyChars = false;
1428                }
1429
1430                wcsIdx++;
1431            }
1432
1433            // full match
1434            if (wcsIdx == wcs.length && textIdx == fileName.length()) {
1435                return true;
1436            }
1437
1438        } while (!backtrack.isEmpty());
1439
1440        return false;
1441    }
1442
1443    /**
1444     * Splits a string into a number of tokens.
1445     * The text is split by '?' and '*'.
1446     * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1447     *
1448     * @param text  the text to split
1449     * @return the array of tokens, never null
1450     */
1451    static String[] splitOnTokens(final String text) {
1452        // used by wildcardMatch
1453        // package level so a unit test may run on this
1454
1455        if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1456            return new String[] { text };
1457        }
1458
1459        final char[] array = text.toCharArray();
1460        final ArrayList<String> list = new ArrayList<>();
1461        final StringBuilder buffer = new StringBuilder();
1462        char prevChar = 0;
1463        for (final char ch : array) {
1464            if (ch == '?' || ch == '*') {
1465                if (buffer.length() != 0) {
1466                    list.add(buffer.toString());
1467                    buffer.setLength(0);
1468                }
1469                if (ch == '?') {
1470                    list.add("?");
1471                } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*'
1472                    list.add("*");
1473                }
1474            } else {
1475                buffer.append(ch);
1476            }
1477            prevChar = ch;
1478        }
1479        if (buffer.length() != 0) {
1480            list.add(buffer.toString());
1481        }
1482
1483        return list.toArray(EMPTY_STRING_ARRAY);
1484    }
1485
1486    /**
1487     * Checks whether a given string is a valid host name according to
1488     * RFC 3986.
1489     *
1490     * <p>Accepted are IP addresses (v4 and v6) as well as what the
1491     * RFC calls a "reg-name". Percent encoded names don't seem to be
1492     * valid names in UNC paths.</p>
1493     *
1494     * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1495     * @param name the hostname to validate
1496     * @return true if the given name is a valid host name
1497     */
1498    private static boolean isValidHostName(final String name) {
1499        return isIPv6Address(name) || isRFC3986HostName(name);
1500    }
1501
1502    private static final Pattern IPV4_PATTERN =
1503        Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");
1504    private static final int IPV4_MAX_OCTET_VALUE = 255;
1505
1506    /**
1507     * Checks whether a given string represents a valid IPv4 address.
1508     *
1509     * @param name the name to validate
1510     * @return true if the given name is a valid IPv4 address
1511     */
1512    // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
1513    private static boolean isIPv4Address(final String name) {
1514        final Matcher m = IPV4_PATTERN.matcher(name);
1515        if (!m.matches() || m.groupCount() != 4) {
1516            return false;
1517        }
1518
1519        // verify that address subgroups are legal
1520        for (int i = 1; i <= 4; i++) {
1521            final String ipSegment = m.group(i);
1522            final int iIpSegment = Integer.parseInt(ipSegment);
1523            if (iIpSegment > IPV4_MAX_OCTET_VALUE) {
1524                return false;
1525            }
1526
1527            if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
1528                return false;
1529            }
1530
1531        }
1532
1533        return true;
1534    }
1535
1536    private static final int IPV6_MAX_HEX_GROUPS = 8;
1537    private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;
1538    private static final int MAX_UNSIGNED_SHORT = 0xffff;
1539    private static final int BASE_16 = 16;
1540
1541    // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
1542    /**
1543     * Checks whether a given string represents a valid IPv6 address.
1544     *
1545     * @param inet6Address the name to validate
1546     * @return true if the given name is a valid IPv6 address
1547     */
1548    private static boolean isIPv6Address(final String inet6Address) {
1549        final boolean containsCompressedZeroes = inet6Address.contains("::");
1550        if (containsCompressedZeroes && (inet6Address.indexOf("::") != inet6Address.lastIndexOf("::"))) {
1551            return false;
1552        }
1553        if ((inet6Address.startsWith(":") && !inet6Address.startsWith("::"))
1554                || (inet6Address.endsWith(":") && !inet6Address.endsWith("::"))) {
1555            return false;
1556        }
1557        String[] octets = inet6Address.split(":");
1558        if (containsCompressedZeroes) {
1559            final List<String> octetList = new ArrayList<>(Arrays.asList(octets));
1560            if (inet6Address.endsWith("::")) {
1561                // String.split() drops ending empty segments
1562                octetList.add("");
1563            } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
1564                octetList.remove(0);
1565            }
1566            octets = octetList.toArray(EMPTY_STRING_ARRAY);
1567        }
1568        if (octets.length > IPV6_MAX_HEX_GROUPS) {
1569            return false;
1570        }
1571        int validOctets = 0;
1572        int emptyOctets = 0; // consecutive empty chunks
1573        for (int index = 0; index < octets.length; index++) {
1574            final String octet = octets[index];
1575            if (octet.isEmpty()) {
1576                emptyOctets++;
1577                if (emptyOctets > 1) {
1578                    return false;
1579                }
1580            } else {
1581                emptyOctets = 0;
1582                // Is last chunk an IPv4 address?
1583                if (index == octets.length - 1 && octet.contains(".")) {
1584                    if (!isIPv4Address(octet)) {
1585                        return false;
1586                    }
1587                    validOctets += 2;
1588                    continue;
1589                }
1590                if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
1591                    return false;
1592                }
1593                final int octetInt;
1594                try {
1595                    octetInt = Integer.parseInt(octet, BASE_16);
1596                } catch (final NumberFormatException e) {
1597                    return false;
1598                }
1599                if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
1600                    return false;
1601                }
1602            }
1603            validOctets++;
1604        }
1605        return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes);
1606    }
1607
1608    private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");
1609
1610    /**
1611     * Checks whether a given string is a valid host name according to
1612     * RFC 3986 - not accepting IP addresses.
1613     *
1614     * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1615     * @param name the hostname to validate
1616     * @return true if the given name is a valid host name
1617     */
1618    private static boolean isRFC3986HostName(final String name) {
1619        final String[] parts = name.split("\\.", -1);
1620        for (int i = 0; i < parts.length; i++) {
1621            if (parts[i].isEmpty()) {
1622                // trailing dot is legal, otherwise we've hit a .. sequence
1623                return i == parts.length - 1;
1624            }
1625            if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
1626                return false;
1627            }
1628        }
1629        return true;
1630    }
1631}