diff --git a/README.md b/README.md index 56ec788..7b0ca8a 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,10 @@ Special Terms | Meaning ------------- | ------- `*` | matches any sequence of non-path-separators `/**/` | matches zero or more directories +`[^abc]` | excludes files matching a/b/c at the appropriate location +`[!abc]` | identical to `[^abc]` +`[^a-z]` | excludes files matching any character between a to z at the appropriate location +`[!a-z]` | identical to `[^a-z]` Any character with a special meaning can be escaped with a backslash (`\`). diff --git a/default.nix b/default.nix index 41af8e7..e094039 100644 --- a/default.nix +++ b/default.nix @@ -105,18 +105,24 @@ let # # Examples: # match "a*/b" "abc/b" # Returns true + # match "a*/∫" "abc/∫" # Returns true # match "a*/b" "a/c/b" # Returns false + # match "a*/∫" "a/c/∫" # Returns false # match "**/c" "a/b/c" # Returns true # match "**/c" "a/b" # Returns false # match "a\\*b" "ab" # Returns false # match "a\\*b" "a*b" # Returns true + # match "å\\*b" "åb" # Returns false + # match "å\\*b" "å*b" # Returns true match = pattern: name: let patLen = stringLength pattern; nameLen = stringLength name; - charAt = str: i: substring i 1 str; + charAt = str: i: internal.decodeUtf8 str i; + + nextCharLen = str: i: stringLength (charAt str i); isSeparator = char: char == "/"; @@ -136,6 +142,8 @@ let isEscape = patChar == "\\"; isClass = patChar == "["; nextPatChar = if (patIdx + 1) < patLen then charAt pattern (patIdx + 1) else ""; + nameCharLen = nextCharLen name nameIdx; + patCharLen = nextCharLen pattern patIdx; in if isStar then handleStar args @@ -147,16 +155,16 @@ let false else if nextPatChar == nameChar then doMatch (args // { - nameIdx = nameIdx + 1; - patIdx = patIdx + 2; + nameIdx = nameIdx + nameCharLen; + patIdx = patIdx + patCharLen + (nextCharLen pattern (patIdx + 1)); startOfSegment = isSeparator nameChar; }) else handleBacktrack args else if patChar == nameChar then doMatch (args // { - nameIdx = nameIdx + 1; - patIdx = patIdx + 1; + nameIdx = nameIdx + nameCharLen; + patIdx = patIdx + patCharLen; startOfSegment = isSeparator patChar; }) else @@ -173,11 +181,12 @@ let handleCharClass = args: let classInfo = internal.parseCharClass pattern args.patIdx; - matches = internal.matchesCharClass classInfo.content - (charAt name args.nameIdx); + nameChar = charAt name args.nameIdx; + nameCharLen = nextCharLen name args.nameIdx; + matches = internal.matchesCharClass classInfo.content nameChar; in if (if classInfo.isNegated then !matches else matches) then doMatch (args // { - nameIdx = args.nameIdx + 1; + nameIdx = args.nameIdx + nameCharLen; patIdx = classInfo.endIdx + 1; startOfSegment = false; }) @@ -187,7 +196,7 @@ let handleStar = args: let # Check ahead for a second '*'. - nextPatIdx = args.patIdx + 1; + nextPatIdx = args.patIdx + nextCharLen pattern args.patIdx; isDoublestar = nextPatIdx < patLen && charAt pattern nextPatIdx == "*"; @@ -195,17 +204,15 @@ let inherit (args) nameIdx; # Doublestar must begin with separator, otherwise we're going to # treat it like a single star like bash. - patIdx = nextPatIdx + (if isDoublestar then 1 else 0); + patIdx = nextPatIdx + (if isDoublestar then nextCharLen pattern nextPatIdx else 0); }; # Doublestar must also end with separator, treating as single star. - doublestarAfterChar = charAt pattern (nextPatIdx + 1); + doublestarAfterChar = charAt pattern (nextPatIdx + (if isDoublestar then nextCharLen pattern nextPatIdx else 0)); doublestarBacktrack = { inherit (args) nameIdx; - # Add two to be after the separator. - # e.g. '**/?' where nextPatIdx is index of '?'. - patIdx = nextPatIdx + 2; + patIdx = nextPatIdx + (2 * nextCharLen pattern nextPatIdx); }; in @@ -231,13 +238,13 @@ let let starBacktrack = { inherit (args.starBacktrack) patIdx; - nameIdx = args.starBacktrack.nameIdx + 1; + nameIdx = args.starBacktrack.nameIdx + nextCharLen name args.starBacktrack.nameIdx; }; starNameChar = charAt name args.starBacktrack.nameIdx; nextSeparatorIdx = - internal.findNextSeparator name args.doublestarBacktrack.nameIdx; + internal.findUnescapedChar name args.doublestarBacktrack.nameIdx [ "/" ]; doublestarBacktrack = { inherit (args.doublestarBacktrack) patIdx; diff --git a/dev/flake.lock b/dev/flake.lock index 6b203b3..ff79baf 100644 --- a/dev/flake.lock +++ b/dev/flake.lock @@ -2,11 +2,11 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1729540133, - "narHash": "sha256-dnsuHMwINdJbqDOiFYL869Aqp5TDGjnOWGicq7hjUr4=", + "lastModified": 1772898310, + "narHash": "sha256-cJ2LJgqTYCxth8TK4CKyIRWEt779eI622ATo9pdTqLQ=", "owner": "nixos", "repo": "nixpkgs", - "rev": "1bdef915db69c83bcefed550ac82852f7026f3bd", + "rev": "a686b1af21fad0eaf19ec88073c9a8461fbd9f1b", "type": "github" }, "original": { diff --git a/flake.lock b/flake.lock index f2b5a18..2f52521 100644 --- a/flake.lock +++ b/flake.lock @@ -17,7 +17,23 @@ }, "root": { "inputs": { - "nixpkgs-lib": "nixpkgs-lib" + "nixpkgs-lib": "nixpkgs-lib", + "utf8": "utf8" + } + }, + "utf8": { + "locked": { + "lastModified": 1771960330, + "narHash": "sha256-cuMzkLOAcw9Vt9KLVv/3MdLeuwvHK2uborUKiJgXr/U=", + "owner": "figsoda", + "repo": "utf8", + "rev": "4ef55eff198b6b21cd1e5df5dc6b802ec16185f1", + "type": "github" + }, + "original": { + "owner": "figsoda", + "repo": "utf8", + "type": "github" } } }, diff --git a/flake.nix b/flake.nix index a765de9..7f6ce79 100644 --- a/flake.nix +++ b/flake.nix @@ -2,9 +2,10 @@ description = "Simplify Nix source management using familiar glob patterns"; inputs.nixpkgs-lib.url = "github:nix-community/nixpkgs.lib"; + inputs.utf8.url = "github:figsoda/utf8"; - outputs = { self, nixpkgs-lib }: - let + outputs = { self, nixpkgs-lib, utf8 }: + let inherit (builtins) fromJSON readFile @@ -27,17 +28,17 @@ pkgsFor = system: import nixpkgs { inherit system; }; - globset = import self { inherit (nixpkgs-lib) lib; }; + globset = import self { lib = nixpkgs-lib.lib // { utf8 = utf8.lib; }; }; in { lib = globset; tests = forAllSystems (system: import ./internal/tests.nix { - lib = nixpkgs-lib.lib // { inherit globset; }; + lib = nixpkgs-lib.lib // { inherit globset; utf8 = utf8.lib; }; }); packages = forAllSystems (system: { - default = (import ./integration-tests.nix { pkgs = pkgsFor system; }); - integration-tests = (import ./integration-tests.nix { pkgs = pkgsFor system; }); + default = (import ./integration-tests.nix { pkgs = pkgsFor system; utf8 = utf8.lib; }); + integration-tests = (import ./integration-tests.nix { pkgs = pkgsFor system; utf8 = utf8.lib; }); }); checks = forAllSystems (system: { @@ -48,11 +49,12 @@ --eval-store "$HOME" \ --extra-experimental-features flakes \ --override-input nixpkgs-lib ${nixpkgs-lib} \ + --override-input utf8 ${utf8} \ --flake ${self}#tests touch $out ''; - integration-tests = (import ./integration-tests.nix { pkgs = pkgsFor system; }); + integration-tests = (import ./integration-tests.nix { pkgs = pkgsFor system; utf8 = utf8.lib; }); }); }; } diff --git a/integration-tests.nix b/integration-tests.nix index 7ed70e7..dbbf0b9 100644 --- a/integration-tests.nix +++ b/integration-tests.nix @@ -1,39 +1,44 @@ -{ pkgs }: +{ pkgs, utf8 ? pkgs.lib.utf8 }: let - lib = pkgs.lib; + lib = pkgs.lib // { inherit utf8; }; globset = import ./. { inherit lib; }; testRoot = ./test-data; + sanitizePath = p: builtins.unsafeDiscardStringContext (toString p); + normalizeFileset = fileset: builtins.sort builtins.lessThan - (map (p: lib.removePrefix "${toString testRoot}/" (toString p)) + (map (p: lib.removePrefix "${toString testRoot}/" (sanitizePath p)) (lib.fileset.toList fileset)); runTest = name: result: expected: pkgs.stdenv.mkDerivation { - name = "test-${name}"; - src = null; - dontUnpack = true; - doCheck = true; - checkPhase = '' - #!/usr/bin/env bash + name = "test-${lib.strings.sanitizeDerivationName name}"; + passAsFile = [ "expectedJson" "resultJson" ]; + expectedJson = builtins.toJSON expected; + resultJson = builtins.toJSON result; + + builder = pkgs.writeShellScript "builder.sh" '' + source $stdenv/setup + # Create output directory + mkdir -p $out echo "Testing ${name}..." - expected='${builtins.toJSON expected}' - result='${builtins.toJSON result}' - if [ "$result" = "$expected" ]; then - echo "PASS: ${name}" + # Compare the JSON files + if diff -u "$expectedJsonPath" "$resultJsonPath" > $out/diff; then + echo "PASS: ${name}" | tee $out/result exit 0 else - echo "FAIL: ${name}" - echo "Expected: $expected" - echo "Got: $result" + echo "FAIL: ${name}" | tee $out/result + echo "Expected:" | tee -a $out/result + cat "$expectedJsonPath" | tee -a $out/result + echo "Got:" | tee -a $out/result + cat "$resultJsonPath" | tee -a $out/result exit 1 fi ''; - buildPhase = '' - touch $out - ''; + dontUnpack = true; + nativeBuildInputs = [ pkgs.diffutils ]; }; testCases = { @@ -45,6 +50,19 @@ let "pkg/lib/utils.go" ]; + testUTFChars = runTest "globs files with an utf8 char match constraint" + (normalizeFileset (globset.globs testRoot [ "gø.*" "**/*.gø" ])) [ + "foo*.gø" + "foo.gø" + "gø.foo" + ]; + + testUTFCharsWithNegation = runTest "globs files with an utf8 char match constraint with negation" + (normalizeFileset (globset.globs testRoot [ "gø.*" "**/*.gø" "!*.foo" ])) [ + "foo*.gø" + "foo.gø" + ]; + testCProject = runTest "globs all C files that aren't tests" (normalizeFileset (globset.globs testRoot [ "**/*.c" "**/*.h" "!**/test_*.c" ])) [ @@ -90,6 +108,11 @@ let result = normalizeFileset testFileset; in runTest "escaping" result [ "src/foo*.c" ]; + testEscapingWithUTF8 = let + testFileset = globset.globs testRoot [ "foo\\*.gø" ]; + result = normalizeFileset testFileset; + in runTest "escaping with utf-8" result [ "foo*.gø" ]; + testGlobsOrdering = runTest "globs ordering" (normalizeFileset (globset.globs testRoot [ "**/*.c" "!**/test_*.c" "src/test/**/*.c" ])) [ "src/foo*.c" @@ -134,10 +157,20 @@ let "src/foobar.c" "src/lib.c" ]; + + testCharClassWithUTF8 = runTest "character class matching w/ utf-8" + (normalizeFileset (globset.glob testRoot "*.g[ø¬˚]")) [ + "foo*.gø" + "foo.gø" + ]; testCharClassWithEscaping = runTest "character class matching w/ escaping" (normalizeFileset (globset.glob testRoot "src/[e-g]oo\\*.c")) [ "src/foo*.c" ]; + + testCharClassWithEscapingAndUTF8 = runTest "character class matching w/ escaping and utf8" + (normalizeFileset (globset.glob testRoot "[e-g]oo\\*.[f-h][ø¬˚]")) + [ "foo*.gø" ]; testCharClassWithEscaping2 = runTest "character class matching w/ escaping 2" (normalizeFileset (globset.glob testRoot "src/[e-g]oo\\-.[oc]")) @@ -151,6 +184,12 @@ let "src/foo[.o" "src/foo].o" ]; + + testCharClassWithEscapingInsideClassAndUTF8 = + runTest "character class matching w/ escaping inside class and utf8" + (normalizeFileset (globset.glob testRoot "[e-g]oo[\\*].gø")) [ + "foo*.gø" + ]; testMultipleCharClassWithEscaping = runTest "multiple character class matching w/ escaping" @@ -160,6 +199,10 @@ let testCharRange = runTest "character range matching" (normalizeFileset (globset.glob testRoot "**/[a-m]*.py")) [ "scripts/main.py" ]; + + testCharRangeWithUTF8 = runTest "character range matching with utf8" + (normalizeFileset (globset.glob testRoot "**/*.g[ø-ÿ]")) + [ "foo*.gø" "foo.gø" ]; testNegatedClass = runTest "negated character class" (normalizeFileset (globset.glob testRoot "src/[^t]*.c")) [ @@ -168,6 +211,16 @@ let "src/lib.c" "src/main.c" ]; + + testNegatedClassWithUTF8 = runTest "negated character class w/ utf8" + (normalizeFileset (globset.glob testRoot "g[^˜∂∆].foo")) [ + "gø.foo" + ]; + + testAlternateNegatedClassWithUTF8 = runTest "negated character class w/ utf8" + (normalizeFileset (globset.glob testRoot "g[!˜∂∆].foo")) [ + "gø.foo" + ]; testNegatedClassMultiple = runTest "negated character class multiple" (normalizeFileset (globset.glob testRoot "src/[^lt]*.c")) [ @@ -208,7 +261,7 @@ let testEmptyCharClass = runTest "empty char class" (normalizeFileset (globset.glob testRoot "src/[]*.c")) [ ]; - + testBasicBrace = runTest "simple brace expansion" (normalizeFileset (globset.glob testRoot "src/*.{c,h,x}")) [ "src/bar1.x" @@ -221,6 +274,13 @@ let "src/lib.h" "src/main.c" ]; + + testBasicBraceWithUTF8 = runTest "simple brace expansion w/ utf8" + (normalizeFileset (globset.glob testRoot "g{o,ø}.*")) [ + "go.mod" + "go.sum" + "gø.foo" + ]; testEmptyBrace = runTest "empty alternatives in brace" (normalizeFileset (globset.glob testRoot "src/{,test/}*.c")) [ @@ -230,6 +290,12 @@ let "src/main.c" "src/test/test_main.c" ]; + + testEmptyBraceWithUTF8 = runTest "empty alternatives in brace w/ utf-8" + (normalizeFileset (globset.glob testRoot "foo{,\\*}.gø")) [ + "foo*.gø" + "foo.gø" + ]; testMultipleBraces = runTest "multiple brace expressions" (normalizeFileset (globset.glob testRoot "{src,scripts}/{main,utils}.{c,py}")) [ @@ -237,6 +303,12 @@ let "scripts/utils.py" "src/main.c" ]; + + testMultipleBracesWithUTF8 = runTest "multiple brace expressions w/ utf-8" (normalizeFileset + (globset.glob testRoot "{foo,foo*}.{go,gø}")) [ + "foo*.gø" + "foo.gø" + ]; testBracesWithEscapedAsterisk = runTest "Braces with escaped asterisk" (normalizeFileset (globset.globs testRoot [ "src/{,foo\\*}.c" ])) @@ -282,6 +354,12 @@ let "src/foo1.x" "src/foo2.x" ]; + + testBracesWithRangeInsideAndUTF8 = runTest "Braces with range inside w/ utf8" + (normalizeFileset + (globset.globs testRoot [ "foo.{g[ø-ÿ]}" ])) [ + "foo.gø" + ]; testBracesWithEmptyResult = runTest "Braces with empty result" (normalizeFileset (globset.globs testRoot [ "{foo,bar}/*.c" ])) [ ]; @@ -364,7 +442,7 @@ let ])) [ "pkg/lib/utils.go" ]; - + testComplexPattern5 = runTest "complex pattern combining multiple features 5" (normalizeFileset (globset.globs testRoot [ "**/*.{[c-x],go,nix}" @@ -374,10 +452,7 @@ let ]; }; -in pkgs.runCommand "run-all-tests" { - nativeBuildInputs = [ pkgs.bash ]; - buildInputs = builtins.attrValues testCases; -} '' - mkdir -p $out - echo "All tests passed!" > $out/result -'' + runAllTests = pkgs.linkFarm "run-all-tests" + (map (drv: { name = drv.name; path = drv; }) (builtins.attrValues testCases)); + +in runAllTests diff --git a/internal/default.nix b/internal/default.nix index 57ef07e..ff78814 100644 --- a/internal/default.nix +++ b/internal/default.nix @@ -10,10 +10,12 @@ let stringLength substring tail + elem elemAt ; inherit (lib) + utf8 hasInfix hasPrefix concatLists @@ -28,30 +30,33 @@ let ; in rec { - findOpenBrace = str: idx: - let len = stringLength str; - in if idx >= len then -1 - else if substring idx 1 str == "{" && (idx == 0 || (idx > 0 && substring (idx - 1) 1 str != "\\")) then idx - else findOpenBrace str (idx + 1); - - findCloseBrace = str: idx: - if idx >= stringLength str then -1 - else if substring idx 1 str == "}" && (idx == 0 || (idx > 0 && substring (idx - 1) 1 str != "\\")) then idx - else findCloseBrace str (idx + 1); - - findNextComma = str: idx: len: - if idx >= len then -1 - else if substring idx 1 str == "," && - (idx == 0 || substring (idx - 1) 1 str != "\\") - then idx - else findNextComma str (idx + 1) len; + decodeUtf8 = str: offset: + let + remaining = substring offset (stringLength str - offset) str; + in + head (utf8.chars remaining); + + findUnescapedChar = str: idx: chars: + let + find = i: + if i >= stringLength str then -1 + else + let + curChar = decodeUtf8 str i; + prevPrevChar = if i > 1 then decodeUtf8 str (i - 2) else ""; + prevChar = if i > 0 then decodeUtf8 str (i - 1) else ""; + isEscaped = prevChar == "\\" && prevPrevChar != "\\"; + in + if elem curChar chars && !isEscaped then i + else find (i + 1); + in find idx; collectParts = str: let len = stringLength str; doCollect = start: parts: let - nextComma = findNextComma str start len; + nextComma = findUnescapedChar str start [ "," ]; segment = if start < 0 || len < 0 then "" else substring start (if nextComma == -1 then len - start @@ -63,8 +68,8 @@ in rec { parseAlternates = pattern: let - openIdx = findOpenBrace pattern 0; - closeIdx = if openIdx == -1 then -1 else findCloseBrace pattern (openIdx + 1); + openIdx = findUnescapedChar pattern 0 [ "{" ]; + closeIdx = if openIdx == -1 then -1 else findUnescapedChar pattern (openIdx + 1) [ "}" ]; in if openIdx == -1 || closeIdx == -1 then { prefix = ""; alternates = [ pattern ]; suffix = ""; } else { prefix = substring 0 openIdx pattern; @@ -88,7 +93,7 @@ in rec { globSegments' = root: pattern: firstSegment: let - patternStart = firstUnescapedMeta pattern; + patternStart = findUnescapedChar pattern 0 [ "*" "[" ]; splitIndex = lastIndexSlash pattern; @@ -186,32 +191,14 @@ in rec { || pattern == "/**" || pattern == "**/" || pattern == "/**/"; - - firstUnescapedMeta = str: - let - chars = stringToCharacters str; - - find = i: chars: - if chars == [] then -1 - else let - char = head chars; - rest = tail chars; - in - if char == "*" || char == "[" then i - else if char == "\\" then - if rest == [] then -1 - else find (i + 2) (tail rest) - else find (i + 1) rest; - - in find 0 chars; lastIndexSlash = str: let len = stringLength str; isUnescapedSlash = i: - (substring i 1 str == "/") && - (i == 0 || substring (i - 1) 1 str != "\\"); + (decodeUtf8 str i == "/") && + (i == 0 || decodeUtf8 str (i - 1) != "\\"); findLastSlash = i: if i < 0 then -1 @@ -220,19 +207,8 @@ in rec { in findLastSlash (len - 1); - findNextSeparator = str: startIdx: - let - len = stringLength str; - - findSeparator = i: - if i >= len then -1 - else if substring i 1 str == "/" then i - else findSeparator (i + 1); - - in findSeparator startIdx; - unescapeMeta = chars: str: - replaceStrings + replaceStrings (map (c: "\\" + c) chars) chars str; @@ -251,28 +227,12 @@ in rec { parseCharClass = str: startIdx: let len = stringLength str; - - findClosingBracket = idx: - if idx >= len then - -1 - else - let - char = substring idx 1 str; - nextChar = - if (idx + 1) < len then substring (idx + 1) 1 str else ""; - in if char == "\\" && nextChar == "]" then - findClosingBracket (idx + 2) - else if char == "]" && idx > startIdx + 1 then - idx - else - findClosingBracket (idx + 1); - - endIdx = findClosingBracket (startIdx + 1); + endIdx = findUnescapedChar str (startIdx + 1) [ "]" ]; rawContent = substring (startIdx + 1) (endIdx - startIdx - 1) str; - firstChar = substring (startIdx + 1) 1 str; + firstChar = decodeUtf8 str (startIdx + 1); content = - let chars = stringToCharacters rawContent; + let chars = utf8.chars rawContent; isNegation = firstChar == "^" || firstChar == "!"; skipFirst = if isNegation then tail chars else chars; in concatStrings skipFirst; @@ -287,6 +247,7 @@ in rec { Examples: matchesCharClass "abc" "b" => true # Direct match + matchesCharClass "abç" "ç" => true # Direct utf-8 match matchesCharClass "a-z" "m" => true # Range match matchesCharClass "^0-9" "a" => true # Negated match matchesCharClass "!aeiou" "x" => true # Alternative negation @@ -295,7 +256,7 @@ in rec { let isNegated = hasPrefix "^" class || hasPrefix "!" class; actualClass = if isNegated then substring 1 (stringLength class - 1) class else class; - chars = stringToCharacters actualClass; + chars = utf8.chars actualClass; matches = if length chars >= 3 && elemAt chars 1 == "-" then @@ -318,8 +279,10 @@ in rec { */ inCharRange = start: end: char: let - startCode = charToInt start; - endCode = charToInt end; - charCode = charToInt char; - in charCode >= startCode && charCode <= endCode; + compareChars = a: b: + let + seqA = utf8.chars a; + seqB = utf8.chars b; + in builtins.lessThan seqA seqB || seqA == seqB; + in compareChars start char && compareChars char end; } diff --git a/internal/tests.nix b/internal/tests.nix index d53fe02..63bbd94 100644 --- a/internal/tests.nix +++ b/internal/tests.nix @@ -31,6 +31,43 @@ let ); in { + decodeUtf8 = mkSuite { + testNameFn = testCase: ''decodeUtf8 "${testCase.str}" ${toString testCase.offset}''; + valueFn = testCase: internal.decodeUtf8 testCase.str testCase.offset; + tests = [ + { + str = "•foo"; + offset = 0; + expected = "•"; + } + { + str = "•foo"; + offset = 3; + expected = "f"; + } + ]; + }; + + lastIndexSlash = mkSuite { + testNameFn = testCase: ''lastIndexSlash "${testCase.str}"''; + valueFn = testCase: internal.lastIndexSlash testCase.str; + tests = [ + { str = ""; expected = -1; } + { str = "/"; expected = 0; } + { str = "å/"; expected = 2; } + { str = "∫/"; expected = 3; } + { str = "a/b/c"; expected = 3; } + { str = "å/b/c"; expected = 4; } + { str = "å/∫/ç"; expected = 6; } + { str = "a\\/b/c"; expected = 4; } + { str = "√\\/b\\/c"; expected = -1; } + { str = "a/b\\/"; expected = 1; } + { str = "å/b\\/"; expected = 2; } + { str = "a\\//b"; expected = 3; } + { str = "\\//b"; expected = 2; } + ]; + }; + parseCharClass = mkSuite { testNameFn = testCase: ''parseCharClass "${testCase.input}" ${toString testCase.startIdx}''; valueFn = testCase: internal.parseCharClass testCase.input testCase.startIdx; @@ -44,6 +81,15 @@ in { isNegated = false; }; } + { + input = "[åbç]def"; + startIdx = 0; + expected = { + content = "åbç"; + endIdx = 6; + isNegated = false; + }; + } { input = "x[^0-9]"; startIdx = 1; @@ -53,6 +99,15 @@ in { isNegated = true; }; } + { + input = "x[^º-ª]"; + startIdx = 1; + expected = { + content = "º-ª"; + endIdx = 8; + isNegated = true; + }; + } { input = "[a\\]b]"; startIdx = 0; @@ -62,6 +117,15 @@ in { isNegated = false; }; } + { + input = "[å\\]∫]"; + startIdx = 0; + expected = { + content = "å\\]∫"; + endIdx = 8; + isNegated = false; + }; + } { input = "[\\^abc]"; startIdx = 0; @@ -71,6 +135,15 @@ in { isNegated = false; }; } + { + input = "[\\^åbc]"; + startIdx = 0; + expected = { + content = "\\^åbc"; + endIdx = 7; + isNegated = false; + }; + } { input = "[!a-z]123"; startIdx = 0; @@ -80,6 +153,15 @@ in { isNegated = true; }; } + { + input = "[!a-z]¡23"; + startIdx = 0; + expected = { + content = "a-z"; + endIdx = 5; + isNegated = true; + }; + } { input = "[^abc]"; startIdx = 0; @@ -89,6 +171,15 @@ in { isNegated = true; }; } + { + input = "[^a∫c]"; + startIdx = 0; + expected = { + content = "a∫c"; + endIdx = 7; + isNegated = true; + }; + } { input = "[abc"; startIdx = 0; @@ -107,14 +198,26 @@ in { tests = [ {class = "abc"; char = "b"; expected = true;} {class = "abc"; char = "d"; expected = false;} + {class = "abç"; char = "ç"; expected = true;} + {class = "abc"; char = "ç"; expected = false;} {class = "a-c"; char = "c"; expected = true;} {class = "a-c"; char = "d"; expected = false;} + {class = "α-ε"; char = "δ"; expected = true;} + {class = "α-γ"; char = "δ"; expected = false;} + {class = "^α-ε"; char = "δ"; expected = false;} + {class = "^α-γ"; char = "δ"; expected = true;} + {class = "!α-ε"; char = "δ"; expected = false;} + {class = "!α-γ"; char = "δ"; expected = true;} {class = "^0-9"; char = "a"; expected = true;} {class = "^0-9"; char = "9"; expected = false;} {class = "!e-g"; char = "d"; expected = true;} {class = "!e-g"; char = "f"; expected = false;} {class = "^abc"; char = "d"; expected = true;} {class = "!abc"; char = "c"; expected = false;} + {class = "^ab√"; char = "v"; expected = true;} + {class = "!ab√"; char = "v"; expected = true;} + {class = "^ab√"; char = "√"; expected = false;} + {class = "!ab√"; char = "√"; expected = false;} ]; }; @@ -128,64 +231,58 @@ in { {start = "3"; end = "6"; char = "7"; expected = false;} {start = "3"; end = "6"; char = "6"; expected = true;} {start = "3"; end = "6"; char = "3"; expected = true;} + {start = "α"; end = "ε"; char = "δ"; expected = true;} + {start = "α"; end = "γ"; char = "δ"; expected = false;} + {start = "←"; end = "↓"; char = "→"; expected = true;} + {start = "가"; end = "힣"; char = "박"; expected = true;} + {start = "あ"; end = "ん"; char = "き"; expected = true;} + {start = "😀"; end = "😎"; char = "😄"; expected = true;} ]; }; - firstUnescapedMeta = mkSuite { - testNameFn = testCase: ''firstUnescapedMeta "${testCase.str}"''; - valueFn = testCase: internal.firstUnescapedMeta testCase.str; - tests = [ - { str = ""; expected = -1; } - { str = "*abc"; expected = 0; } - { str = "\\*a*"; expected = 3; } - { str = "abc\\*def"; expected = -1; } - { str = "ab\\*cd*ef"; expected = 6; } - { str = "no\\*meta"; expected = -1; } - { str = "\\\\*meta"; expected = 2; } - { str = "escaped\\"; expected = -1; } - { str = "escaped\\\\"; expected = -1; } - ]; - }; - - findOpenBrace = mkSuite { - testNameFn = testCase: ''findOpenBrace "${testCase.str}" "${toString testCase.idx}"''; - valueFn = testCase: internal.findOpenBrace testCase.str testCase.idx; + findUnescapedChar = mkSuite { + testNameFn = testCase: ''findUnescapedChar "${testCase.str}" "${toString testCase.idx}" "${builtins.toJSON testCase.chars}"''; + valueFn = testCase: internal.findUnescapedChar testCase.str testCase.idx testCase.chars; tests = [ - { str = ""; idx = 0; expected = -1; } - { str = "{abc"; idx = 0; expected = 0; } - { str = "{abc"; idx = 1; expected = -1; } - { str = "\\{a{"; idx = 0; expected = 3; } - { str = "abc\\{def"; idx = 0; expected = -1; } - { str = "ab\\{cd{ef"; idx = 0; expected = 6; } - { str = "escaped\\"; idx = 0; expected = -1; } - { str = "escaped\\\\"; idx = 0; expected = -1; } - ]; - }; - - findCloseBrace = mkSuite { - testNameFn = testCase: ''findCloseBrace "${testCase.str}" "${toString testCase.idx}"''; - valueFn = testCase: internal.findCloseBrace testCase.str testCase.idx; - tests = [ - { str = ""; idx = 0; expected = -1; } - { str = "}abc"; idx = 0; expected = 0; } - { str = "}abc"; idx = 1; expected = -1; } - { str = "\\}a}"; idx = 0; expected = 3; } - { str = "abc\\}def"; idx = 0; expected = -1; } - { str = "ab\\}cd}ef"; idx = 0; expected = 6; } - { str = "escaped\\"; idx = 0; expected = -1; } - { str = "escaped\\\\"; idx = 0; expected = -1; } - ]; - }; - - findNextComma = mkSuite { - testNameFn = testCase: ''findNextComma "${testCase.str}" "${toString testCase.idx}" "${toString testCase.len}"''; - valueFn = testCase: internal.findNextComma testCase.str testCase.idx testCase.len; - tests = [ - { str = ""; idx = 0; len = 0; expected = -1; } - { str = ",abc"; idx = 0; len = 4; expected = 0; } - { str = "abc,def"; idx = 0; len = 7; expected = 3; } - { str = "abc\\,def"; idx = 0; len = 9; expected = -1; } - { str = "abc\\,def,ghi"; idx = 0; len = 13; expected = 8; } + { str = ""; idx = 0; chars = [ "{" ]; expected = -1; } + { str = "{abc"; idx = 0; chars = [ "{" ]; expected = 0; } + { str = "{abc"; idx = 1; chars = [ "{" ]; expected = -1; } + { str = "\\{a{"; idx = 0; chars = [ "{" ]; expected = 3; } + { str = "abc\\{def"; idx = 0; chars = [ "{" ]; expected = -1; } + { str = "ab\\{cd{ef"; idx = 0; chars = [ "{" ]; expected = 6; } + { str = "åb\\{cd{ef"; idx = 0; chars = [ "{" ]; expected = 7; } + { str = "å{"; idx = 0; chars = [ "{" ]; expected = 2; } + { str = "∫{"; idx = 0; chars = [ "{" ]; expected = 3; } + { str = "escaped\\"; idx = 0; chars = [ "{" ]; expected = -1; } + { str = "escaped\\\\"; idx = 0; chars = [ "{" ]; expected = -1; } + { str = ""; idx = 0; chars = [ "}" ]; expected = -1; } + { str = "}abc"; idx = 0; chars = [ "}" ]; expected = 0; } + { str = "}abc"; idx = 1; chars = [ "}" ]; expected = -1; } + { str = "\\}a}"; idx = 0; chars = [ "}" ]; expected = 3; } + { str = "abc\\}def"; idx = 0; chars = [ "}" ]; expected = -1; } + { str = "ab\\}cd}ef"; idx = 0; chars = [ "}" ]; expected = 6; } + { str = "aå\\}cd}ef"; idx = 0; chars = [ "}" ]; expected = 7; } + { str = "å}"; idx = 0; chars = [ "}" ]; expected = 2; } + { str = "∫}"; idx = 0; chars = [ "}" ]; expected = 3; } + { str = "escaped\\"; idx = 0; chars = [ "}" ]; expected = -1; } + { str = "escaped\\\\"; idx = 0; chars = [ "}" ]; expected = -1; } + { str = ""; idx = 0; chars = [ "," ]; expected = -1; } + { str = ",abc"; idx = 0; chars = [ "," ]; expected = 0; } + { str = ",åbc"; idx = 0; chars = [ "," ]; expected = 0; } + { str = "abc,def"; idx = 0; chars = [ "," ]; expected = 3; } + { str = "abå,def"; idx = 0; chars = [ "," ]; expected = 4; } + { str = "abc\\,def"; idx = 0; chars = [ "," ]; expected = -1; } + { str = "abc\\,def,ghi"; idx = 0; chars = [ "," ]; expected = 8; } + { str = ""; idx = 0; chars = [ "*" "[" ]; expected = -1; } + { str = "*abc"; idx = 0; chars = [ "*" "[" ]; expected = 0; } + { str = "\\*a*"; idx = 0; chars = [ "*" "[" ]; expected = 3; } + { str = "abc\\*def"; idx = 0; chars = [ "*" "[" ]; expected = -1; } + { str = "ab\\*cd*ef"; idx = 0; chars = [ "*" "[" ]; expected = 6; } + { str = "aå\\*cd*ef"; idx = 0; chars = [ "*" "[" ]; expected = 7; } + { str = "no\\*meta"; idx = 0; chars = [ "*" "[" ]; expected = -1; } + { str = "\\\\*meta"; idx = 0; chars = [ "*" "[" ]; expected = 2; } + { str = "escaped\\"; idx = 0; chars = [ "*" "[" ]; expected = -1; } + { str = "escaped\\\\"; idx = 0; chars = [ "*" "[" ]; expected = -1; } ]; }; @@ -194,16 +291,21 @@ in { valueFn = testCase: internal.collectParts testCase.str; tests = [ { str = "a,b,c"; expected = ["a" "b" "c"]; } + { str = "å,b,c"; expected = ["å" "b" "c"]; } + { str = "å,b,cç,d"; expected = ["å" "b" "cç" "d"]; } { str = "a\\,b,c"; expected = ["a\\,b" "c"]; } { str = "a\\[b,c"; expected = ["a\\[b" "c"]; } + { str = "a\\[∫,c"; expected = ["a\\[∫" "c"]; } { str = "a\\]b,c"; expected = ["a\\]b" "c"]; } { str = "a\\-b,c"; expected = ["a\\-b" "c"]; } { str = "a\\*b,c"; expected = ["a\\*b" "c"]; } { str = "a,b,[cd]"; expected = ["a" "b" "[cd]"]; } { str = "a\\,b,c,d\\{"; expected = ["a\\,b" "c" "d\\{"]; } + { str = "a\\,∫,c,ƒ\\{"; expected = ["a\\,∫" "c" "ƒ\\{"]; } { str = "foo\\,bar,baz"; expected = ["foo\\,bar" "baz"]; } { str = "single"; expected = ["single"]; } { str = "\\,"; expected = ["\\,"]; } + { str = "\\µ"; expected = ["\\µ"]; } { str = ","; expected = ["" ""]; } ]; }; @@ -213,13 +315,20 @@ in { valueFn = testCase: internal.expandAlternates testCase.pattern; tests = [ { pattern = "{a,b}"; expected = ["a" "b"]; } + { pattern = "{å,b}"; expected = ["å" "b"]; } { pattern = "{a*,b}"; expected = ["a*" "b"]; } + { pattern = "{å*,b}"; expected = ["å*" "b"]; } + { pattern = "{å*µ*,b}"; expected = ["å*µ*" "b"]; } { pattern = "{foo.[ch],test_foo.[ch]}"; expected = ["foo.[ch]" "test_foo.[ch]"]; } + { pattern = "{foo.[çh],test_foo.[çh]}"; expected = ["foo.[çh]" "test_foo.[çh]"]; } { pattern = "{[x-z],b}"; expected = ["[x-z]" "b"]; } + { pattern = "{[ƒ-√],b}"; expected = ["[ƒ-√]" "b"]; } { pattern = "pre{a\\,b,c}post"; expected = ["prea,bpost" "precpost"]; } { pattern = "foo\\{bar,baz}"; expected = ["foo\\{bar,baz}"]; } { pattern = "{a,b\\,c,d}"; expected = ["a" "b,c" "d"]; } + { pattern = "{a,b\\,c,∂}"; expected = ["a" "b,c" "∂"]; } { pattern = "{foo,bar}.{c,h}"; expected = ["foo.c" "foo.h" "bar.c" "bar.h"]; } + { pattern = "{foo,bar}.{ç,˙}"; expected = ["foo.ç" "foo.˙" "bar.ç" "bar.˙"]; } { pattern = "{,foo}"; expected = ["" "foo"]; } { pattern = "pre{a,b}post{1,2}"; expected = ["preapost1" "preapost2" "prebpost1" "prebpost2"]; } ]; @@ -230,9 +339,14 @@ in { valueFn = testCase: internal.parseAlternates testCase.pattern; tests = [ { pattern = "{a,b}"; expected = { prefix = ""; alternates = ["a" "b"]; suffix = ""; }; } + { pattern = "{å,b}"; expected = { prefix = ""; alternates = ["å" "b"]; suffix = ""; }; } + { pattern = "{ßå,b}"; expected = { prefix = ""; alternates = ["ßå" "b"]; suffix = ""; }; } + { pattern = "{ßå,b,√∫˜}"; expected = { prefix = ""; alternates = ["ßå" "b" "√∫˜"]; suffix = ""; }; } { pattern = "pre{a\\,b,c}post"; expected = { prefix = "pre"; alternates = ["a\\,b" "c"]; suffix = "post"; }; } + { pattern = "pre{a\\,∫,c}pos†"; expected = { prefix = "pre"; alternates = ["a\\,∫" "c"]; suffix = "pos†"; }; } { pattern = "foo\\{bar,baz}"; expected = { prefix = ""; alternates = ["foo\\{bar,baz}"]; suffix = ""; }; } { pattern = "{a,b\\,c,d}"; expected = { prefix = ""; alternates = ["a" "b\\,c" "d"]; suffix = ""; }; } + { pattern = "{å,b\\,ç,d}"; expected = { prefix = ""; alternates = ["å" "b\\,ç" "d"]; suffix = ""; }; } ]; }; @@ -247,13 +361,16 @@ in { { pattern = "/*"; path = "/debug/"; expected = false; } { pattern = "/*"; path = "//"; expected = false; } { pattern = "abc"; path = "abc"; expected = true; } + { pattern = "åbc"; path = "åbc"; expected = true; } { pattern = "*"; path = "abc"; expected = true; } { pattern = "*c"; path = "abc"; expected = true; } + { pattern = "*ç"; path = "abç"; expected = true; } { pattern = "*/"; path = "a/"; expected = true; } { pattern = "a*"; path = "a"; expected = true; } { pattern = "a*"; path = "abc"; expected = true; } { pattern = "a*"; path = "ab/c"; expected = false; } { pattern = "a*/b"; path = "abc/b"; expected = true; } + { pattern = "å*/b"; path = "åbc/b"; expected = true; } { pattern = "a*/b"; path = "a/c/b"; expected = false; } { pattern = "a*/c/"; path = "a/b"; expected = false; } { pattern = "a*b*c*d*e*"; path = "axbxcxdxe"; expected = true; } @@ -262,10 +379,12 @@ in { { pattern = "a*b*c*d*e*/f"; path = "axbxcxdxe/xxx/f"; expected = false; } { pattern = "a*b*c*d*e*/f"; path = "axbxcxdxexxx/fff"; expected = false; } { pattern = "a\\*b"; path = "ab"; expected = false; } + { pattern = "å\\*∫*"; path = "å∫¡™£"; expected = false; } # Globstar / doublestar { pattern = "**"; path = ""; expected = true; } { pattern = "a/**"; path = "a"; expected = true; } + { pattern = "å/**"; path = "å"; expected = true; } { pattern = "a/**/"; path = "a"; expected = true; } { pattern = "a/**"; path = "a/"; expected = true; } { pattern = "a/**/"; path = "a/"; expected = true; } @@ -274,12 +393,15 @@ in { { pattern = "**/c"; path = "c"; expected = true; } { pattern = "**/c"; path = "b/c"; expected = true; } { pattern = "**/c"; path = "a/b/c"; expected = true; } + { pattern = "**/ç"; path = "a/b/ç"; expected = true; } { pattern = "**/c"; path = "a/b"; expected = false; } { pattern = "**/c"; path = "abcd"; expected = false; } { pattern = "**/c"; path = "a/abc"; expected = false; } { pattern = "a/**/b"; path = "a/b"; expected = true; } + { pattern = "å/**/∫"; path = "å/∫"; expected = true; } { pattern = "a/**/c"; path = "a/b/c"; expected = true; } { pattern = "a/**/d"; path = "a/b/c/d"; expected = true; } + { pattern = "a/**/d"; path = "a/∫/ç/d"; expected = true; } { pattern = "a/\\**"; path = "a/b/c"; expected = false; } # Character Class @@ -294,15 +416,45 @@ in { expected = false; } { - pattern = "[a-z]"; - path = "m"; + pattern = "[abç]"; + path = "ç"; + expected = true; + } + { + pattern = "[abç]"; + path = "d"; + expected = false; + } + { + pattern = "[α-θ]"; + path = "θ"; + expected = true; + } + { + pattern = "[α-θ]"; + path = "ω"; + expected = false; + } + { + pattern = "[^α-θ]"; + path = "θ"; + expected = false; + } + { + pattern = "[^α-θ]"; + path = "ω"; expected = true; } { - pattern = "[a-z]"; - path = "3"; + pattern = "[!α-θ]"; + path = "θ"; expected = false; } + { + pattern = "[!α-θ]"; + path = "ω"; + expected = true; + } { pattern = "[^abc]"; path = "d"; @@ -363,11 +515,21 @@ in { path = "f].c"; expected = true; } + { + pattern = "ƒ[\\[\\]].ç"; + path = "ƒ].ç"; + expected = true; + } { pattern = "[ef]\\*.c"; path = "f*.c"; expected = true; } + { + pattern = "[´f]\\*.c"; + path = "´*.c"; + expected = true; + } ]; }; } diff --git "a/test-data/foo*.g\303\270" "b/test-data/foo*.g\303\270" new file mode 100644 index 0000000..e69de29 diff --git "a/test-data/foo.g\303\270" "b/test-data/foo.g\303\270" new file mode 100644 index 0000000..e69de29 diff --git "a/test-data/g\303\270.foo" "b/test-data/g\303\270.foo" new file mode 100644 index 0000000..e69de29