From ce774d06ed82c778f1f92c12846178c8173999c6 Mon Sep 17 00:00:00 2001 From: Cameron Clark Date: Fri, 24 Oct 2025 14:46:55 +0100 Subject: [PATCH 1/3] fix: panic in `regexp.MustCompile` when building wildcarddirectories --- .../tsoptions/wildcarddirectories_test.go | 65 +++++++++++++++++++ internal/vfs/utilities.go | 8 +-- 2 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 internal/tsoptions/wildcarddirectories_test.go diff --git a/internal/tsoptions/wildcarddirectories_test.go b/internal/tsoptions/wildcarddirectories_test.go new file mode 100644 index 0000000000..65976295ab --- /dev/null +++ b/internal/tsoptions/wildcarddirectories_test.go @@ -0,0 +1,65 @@ +package tsoptions + +import ( + "testing" + + "github.com/microsoft/typescript-go/internal/tspath" +) + +func TestGetWildcardDirectories_NonASCIICharacters(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + include []string + exclude []string + currentDirectory string + useCaseSensitiveFileNames bool + }{ + { + name: "Norwegian character æ in path", + include: []string{"src/**/*.test.ts", "src/**/*.stories.ts", "src/**/*.mdx"}, + exclude: []string{"node_modules"}, + currentDirectory: "C:/Users/TobiasLægreid/dev/app/frontend/packages/react", + useCaseSensitiveFileNames: false, + }, + { + name: "Japanese characters in path", + include: []string{"src/**/*.ts"}, + exclude: []string{"テスト"}, + currentDirectory: "/Users/ユーザー/プロジェクト", + useCaseSensitiveFileNames: true, + }, + { + name: "Chinese characters in path", + include: []string{"源代码/**/*.js"}, + exclude: []string{"节点模块"}, + currentDirectory: "/home/用户/项目", + useCaseSensitiveFileNames: true, + }, + { + name: "Various Unicode characters", + include: []string{"src/**/*.ts"}, + exclude: []string{"node_modules"}, + currentDirectory: "/Users/Müller/café/naïve/résumé", + useCaseSensitiveFileNames: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + comparePathsOptions := tspath.ComparePathsOptions{ + CurrentDirectory: tt.currentDirectory, + UseCaseSensitiveFileNames: tt.useCaseSensitiveFileNames, + } + + result := getWildcardDirectories(tt.include, tt.exclude, comparePathsOptions) + + if result == nil { + t.Fatalf("expected non-nil result") + } + }) + } +} diff --git a/internal/vfs/utilities.go b/internal/vfs/utilities.go index d86a7f0d10..4b44664d64 100644 --- a/internal/vfs/utilities.go +++ b/internal/vfs/utilities.go @@ -81,11 +81,11 @@ func IsImplicitGlob(lastPathComponent string) bool { return !strings.ContainsAny(lastPathComponent, ".*?") } -// Reserved characters, forces escaping of any non-word (or digit), non-whitespace character. -// It may be inefficient (we could just match (/[-[\]{}()*+?.,\\^$|#\s]/g), but this is future -// proof. +// Reserved characters - only escape actual regex metacharacters. +// Go's regexp doesn't support \x escape sequences for arbitrary characters, +// so we only escape characters that have special meaning in regex. var ( - reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[^\w\s/]`) + reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.\+*?()\[\]{}^$|#]`) wildcardCharCodes = []rune{'*', '?'} ) From 6bf55437f89bf2fe227a395f3ea9bb5dafc8e108 Mon Sep 17 00:00:00 2001 From: Cameron Clark Date: Fri, 24 Oct 2025 18:54:06 +0100 Subject: [PATCH 2/3] remove use of regex --- internal/vfs/utilities.go | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/internal/vfs/utilities.go b/internal/vfs/utilities.go index 4b44664d64..7817e812fc 100644 --- a/internal/vfs/utilities.go +++ b/internal/vfs/utilities.go @@ -2,7 +2,6 @@ package vfs import ( "fmt" - "regexp" "sort" "strings" "sync" @@ -75,18 +74,28 @@ func replaceWildcardCharacter(match string, singleAsteriskRegexFragment string) } } +func escapeRegexMetacharacters(s string, replaceWildcard func(string) string) string { + var result strings.Builder + result.Grow(len(s)) + for _, ch := range s { + switch ch { + case '\\', '.', '+', '*', '?', '(', ')', '[', ']', '{', '}', '^', '$', '|', '#': + result.WriteString(replaceWildcard(string(ch))) + default: + result.WriteRune(ch) + } + } + return result.String() +} + // An "includes" path "foo" is implicitly a glob "foo/** /*" (without the space) if its last component has no extension, // and does not contain any glob characters itself. func IsImplicitGlob(lastPathComponent string) bool { return !strings.ContainsAny(lastPathComponent, ".*?") } -// Reserved characters - only escape actual regex metacharacters. -// Go's regexp doesn't support \x escape sequences for arbitrary characters, -// so we only escape characters that have special meaning in regex. var ( - reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.\+*?()\[\]{}^$|#]`) - wildcardCharCodes = []rune{'*', '?'} + wildcardCharCodes = []rune{'*', '?'} ) var ( @@ -206,7 +215,7 @@ func getSubPatternFromSpec( componentPattern.WriteString("[^./]") component = component[1:] } - componentPattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter)) + componentPattern.WriteString(escapeRegexMetacharacters(component, replaceWildcardCharacter)) // Patterns should not include subfolders like node_modules unless they are // explicitly included as part of the path. @@ -219,7 +228,7 @@ func getSubPatternFromSpec( } subpattern.WriteString(componentPattern.String()) } else { - subpattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter)) + subpattern.WriteString(escapeRegexMetacharacters(component, replaceWildcardCharacter)) } } hasWrittenComponent = true From 853ac9db5b07969902f3fdd01c8ad2cf16fea2cd Mon Sep 17 00:00:00 2001 From: Cameron Clark Date: Sat, 25 Oct 2025 12:49:29 +0100 Subject: [PATCH 3/3] u --- internal/tsoptions/wildcarddirectories.go | 15 +++++++------- internal/vfs/utilities.go | 25 ++++++++--------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/internal/tsoptions/wildcarddirectories.go b/internal/tsoptions/wildcarddirectories.go index 33068745ac..a782b1123c 100644 --- a/internal/tsoptions/wildcarddirectories.go +++ b/internal/tsoptions/wildcarddirectories.go @@ -1,7 +1,6 @@ package tsoptions import ( - "regexp" "strings" "github.com/dlclark/regexp2" @@ -28,13 +27,13 @@ func getWildcardDirectories(include []string, exclude []string, comparePathsOpti } rawExcludeRegex := vfs.GetRegularExpressionForWildcard(exclude, comparePathsOptions.CurrentDirectory, "exclude") - var excludeRegex *regexp.Regexp + var excludeRegex *regexp2.Regexp if rawExcludeRegex != "" { - options := "" + flags := regexp2.ECMAScript if !comparePathsOptions.UseCaseSensitiveFileNames { - options = "(?i)" + flags |= regexp2.IgnoreCase } - excludeRegex = regexp.MustCompile(options + rawExcludeRegex) + excludeRegex = regexp2.MustCompile(rawExcludeRegex, regexp2.RegexOptions(flags)) } wildcardDirectories := make(map[string]bool) @@ -44,8 +43,10 @@ func getWildcardDirectories(include []string, exclude []string, comparePathsOpti for _, file := range include { spec := tspath.NormalizeSlashes(tspath.CombinePaths(comparePathsOptions.CurrentDirectory, file)) - if excludeRegex != nil && excludeRegex.MatchString(spec) { - continue + if excludeRegex != nil { + if matched, _ := excludeRegex.MatchString(spec); matched { + continue + } } match := getWildcardDirectoryFromSpec(spec, comparePathsOptions.UseCaseSensitiveFileNames) diff --git a/internal/vfs/utilities.go b/internal/vfs/utilities.go index 7817e812fc..4b44664d64 100644 --- a/internal/vfs/utilities.go +++ b/internal/vfs/utilities.go @@ -2,6 +2,7 @@ package vfs import ( "fmt" + "regexp" "sort" "strings" "sync" @@ -74,28 +75,18 @@ func replaceWildcardCharacter(match string, singleAsteriskRegexFragment string) } } -func escapeRegexMetacharacters(s string, replaceWildcard func(string) string) string { - var result strings.Builder - result.Grow(len(s)) - for _, ch := range s { - switch ch { - case '\\', '.', '+', '*', '?', '(', ')', '[', ']', '{', '}', '^', '$', '|', '#': - result.WriteString(replaceWildcard(string(ch))) - default: - result.WriteRune(ch) - } - } - return result.String() -} - // An "includes" path "foo" is implicitly a glob "foo/** /*" (without the space) if its last component has no extension, // and does not contain any glob characters itself. func IsImplicitGlob(lastPathComponent string) bool { return !strings.ContainsAny(lastPathComponent, ".*?") } +// Reserved characters - only escape actual regex metacharacters. +// Go's regexp doesn't support \x escape sequences for arbitrary characters, +// so we only escape characters that have special meaning in regex. var ( - wildcardCharCodes = []rune{'*', '?'} + reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.\+*?()\[\]{}^$|#]`) + wildcardCharCodes = []rune{'*', '?'} ) var ( @@ -215,7 +206,7 @@ func getSubPatternFromSpec( componentPattern.WriteString("[^./]") component = component[1:] } - componentPattern.WriteString(escapeRegexMetacharacters(component, replaceWildcardCharacter)) + componentPattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter)) // Patterns should not include subfolders like node_modules unless they are // explicitly included as part of the path. @@ -228,7 +219,7 @@ func getSubPatternFromSpec( } subpattern.WriteString(componentPattern.String()) } else { - subpattern.WriteString(escapeRegexMetacharacters(component, replaceWildcardCharacter)) + subpattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter)) } } hasWrittenComponent = true