From f8a7ad7afa5683472fd5e9ffbf6e3174ba66f50f Mon Sep 17 00:00:00 2001 From: RT Date: Thu, 17 Jul 2025 14:31:50 -0400 Subject: [PATCH 1/3] adjust loader inference --- platform/script/loader/inference.go | 72 +++-- platform/script/loader/inference_test.go | 331 +++++++++++++++++++++-- 2 files changed, 362 insertions(+), 41 deletions(-) diff --git a/platform/script/loader/inference.go b/platform/script/loader/inference.go index e3d2d21..0462833 100644 --- a/platform/script/loader/inference.go +++ b/platform/script/loader/inference.go @@ -8,14 +8,20 @@ import ( "strings" ) -// InferLoader analyzes the input and returns an appropriate loader based on type inference. -// It supports the following input types: -// - string: Infers from URI scheme (http/https -> HTTP, file -> Disk) or content (inline string with base64 decoding) -// - []byte: Returns FromBytes loader -// - io.Reader: Returns FromIoReader loader -// - Loader: Returns as-is +// InferLoader determines the appropriate loader for the given input. // -// Returns an error if the input type is unsupported or if loader creation fails. +// For string inputs, it applies the following checks in order: +// 1. URI scheme detection (http/https -> HTTP, file -> Disk) +// 2. File path format detection (absolute paths, known extensions) +// 3. Base64 validation (attempts decode) +// 4. String fallback +// +// Other input types: +// - []byte: FromBytes loader +// - io.Reader: FromIoReader loader +// - Loader: returned unchanged +// +// Returns an error for unsupported input types or loader creation failures. func InferLoader(input any) (Loader, error) { switch v := input.(type) { case string: @@ -32,21 +38,20 @@ func InferLoader(input any) (Loader, error) { } // inferFromString analyzes a string input and returns the appropriate loader. -// It checks for URI schemes and falls back to treating the string as inline content. +// It follows this order: scheme check -> file path check -> base64 check -> string fallback. func inferFromString(input string) (Loader, error) { input = strings.TrimSpace(input) if input == "" { return nil, fmt.Errorf("empty string input") } - // Try to parse as URL to detect scheme + // 1. Check for URI scheme if parsed, err := url.Parse(input); err == nil && parsed.Scheme != "" { switch parsed.Scheme { case "http", "https": return NewFromHTTP(input) case "file": path := parsed.Path - // Convert relative paths to absolute paths if !filepath.IsAbs(path) { absPath, err := filepath.Abs(path) if err != nil { @@ -55,26 +60,53 @@ func inferFromString(input string) (Loader, error) { path = absPath } return NewFromDisk(path) - default: - // Unknown scheme, treat as inline content - return NewFromStringBase64(input) } } - // Check if it looks like a file path - if filepath.IsAbs(input) || strings.Contains(input, "/") || strings.Contains(input, "\\") { + // 2. Check if it's a valid file path + if isValidFilePath(input) { + path := input // Convert relative paths to absolute paths if !filepath.IsAbs(input) { absPath, err := filepath.Abs(input) if err != nil { - // If we can't resolve the path, treat as inline content - return NewFromStringBase64(input) + return nil, fmt.Errorf("failed to resolve relative path %q: %w", input, err) } - input = absPath + path = absPath } - return NewFromDisk(input) + return NewFromDisk(path) } - // Default to treating as inline string content + // 3. Use the base64 check to determine if the input is valid base64 + // (it returns FromBytes if valid, otherwise falls back to FromString) return NewFromStringBase64(input) } + +// isValidFilePath checks if a string looks like a valid file path format. +func isValidFilePath(s string) bool { + // Don't consider strings with newlines/carriage returns as file paths + if strings.ContainsAny(s, "\n\r") { + return false + } + + // Don't consider strings that look like code as file paths + if strings.Contains(s, " -c ") || // shell commands + strings.Contains(s, "';") || // code with semicolons after quotes + strings.Contains(s, "console.") || // JavaScript console calls + strings.Contains(s, "var ") || // variable declarations + strings.Contains(s, "function ") { // function declarations + return false + } + + // Check for specific script file extensions (case insensitive) + validExtensions := []string{".wasm", ".risor", ".star", ".starlark"} + lower := strings.ToLower(s) + for _, ext := range validExtensions { + if strings.HasSuffix(lower, ext) { + return true + } + } + + // Don't treat absolute paths as file paths unless they have supported extensions + return false +} diff --git a/platform/script/loader/inference_test.go b/platform/script/loader/inference_test.go index 4002593..1cdd83a 100644 --- a/platform/script/loader/inference_test.go +++ b/platform/script/loader/inference_test.go @@ -26,22 +26,42 @@ func TestInferLoader(t *testing.T) { }{ { name: "HTTP URL", - input: "http://example.com/script.js", + input: "http://example.com/script.wasm", expectedType: (*FromHTTP)(nil), }, { name: "HTTPS URL", - input: "https://example.com/script.js", + input: "https://example.com/script.risor", expectedType: (*FromHTTP)(nil), }, { name: "file URL", - input: "file:///path/to/script.js", + input: "file:///path/to/script.star", expectedType: (*FromDisk)(nil), }, { - name: "absolute path", - input: "/absolute/path/script.js", + name: "absolute path with invalid extension", + input: "/absolute/path/script.invalid", + expectedType: (*FromString)(nil), + }, + { + name: "absolute path with wasm extension", + input: "/absolute/path/script.wasm", + expectedType: (*FromDisk)(nil), + }, + { + name: "absolute path with risor extension", + input: "/absolute/path/script.risor", + expectedType: (*FromDisk)(nil), + }, + { + name: "absolute path with star extension", + input: "/absolute/path/script.star", + expectedType: (*FromDisk)(nil), + }, + { + name: "absolute path with starlark extension", + input: "/absolute/path/script.starlark", expectedType: (*FromDisk)(nil), }, { @@ -186,17 +206,17 @@ func TestInferFromString(t *testing.T) { }{ { name: "http scheme", - input: "http://localhost:8080/script.js", + input: "http://localhost:8080/script.wasm", expectedType: (*FromHTTP)(nil), }, { name: "https scheme", - input: "https://api.example.com/script.js", + input: "https://api.example.com/script.risor", expectedType: (*FromHTTP)(nil), }, { name: "file scheme", - input: "file:///usr/local/scripts/test.js", + input: "file:///usr/local/scripts/test.star", expectedType: (*FromDisk)(nil), }, { @@ -223,15 +243,20 @@ func TestInferFromString(t *testing.T) { expectedType any }{ { - name: "absolute unix path", - input: "/usr/local/bin/script.js", - expectedType: (*FromDisk)(nil), + name: "absolute unix path with invalid extension", + input: "/usr/local/bin/script.invalid", + expectedType: (*FromString)(nil), }, { - name: "path with forward slash", - input: "some/path/script.js", + name: "absolute unix path with wasm extension", + input: "/usr/local/bin/script.wasm", expectedType: (*FromDisk)(nil), }, + { + name: "path with forward slash and invalid extension", + input: "some/path/script.invalid", + expectedType: (*FromString)(nil), + }, } for _, tc := range tests { @@ -255,14 +280,14 @@ func TestInferFromString(t *testing.T) { expectedType any }{ { - name: "windows absolute path", - input: "C:\\Program Files\\script.js", - expectedType: (*FromDisk)(nil), + name: "windows absolute path with invalid extension", + input: "C:\\Program Files\\script.invalid", + expectedType: (*FromString)(nil), }, { - name: "windows drive with colon", - input: "D:\\scripts\\test.js", - expectedType: (*FromDisk)(nil), + name: "windows drive with colon and invalid extension", + input: "D:\\scripts\\test.invalid", + expectedType: (*FromString)(nil), }, } @@ -358,7 +383,7 @@ func TestInferLoader_WindowsPath(t *testing.T) { t.Skip("Windows path test only runs on Windows") } - result, err := InferLoader("C:\\windows\\path\\script.js") + result, err := InferLoader("C:\\windows\\path\\script.starlark") require.NoError(t, err) assert.IsType(t, (*FromDisk)(nil), result) } @@ -446,7 +471,7 @@ func TestInferLoader_Integration(t *testing.T) { t.Run("file path inference with temp file", func(t *testing.T) { // Create a temporary directory and file tempDir := t.TempDir() - tempFile := filepath.Join(tempDir, "test_script.js") + tempFile := filepath.Join(tempDir, "test_script.wasm") // Write some content to the file content := "console.log('temporary file test');" @@ -470,3 +495,267 @@ func TestInferLoader_Integration(t *testing.T) { assert.Equal(t, content, string(actualContent)) }) } + +// TestInferLoader_MultilineScriptContent tests multiline content that might be misinterpreted as paths. +func TestInferLoader_MultilineScriptContent(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + }{ + { + name: "script with comment header", + input: `// JavaScript comment +function test() { + return "hello world"; +}`, + }, + { + name: "script starting with path-like comment", + input: `// /usr/bin/node +const fs = require('fs'); +console.log("test");`, + }, + { + name: "script with windows path in content", + input: `const path = "C:\\Program Files\\App\\script.invalid"; +function loadScript() { + return path; +}`, + }, + { + name: "multiline with control characters", + input: "function test() {\n\treturn 'hello\\nworld';\n}", + }, + { + name: "content starting with /usr/bin", + input: `/usr/bin/node +function test() { return 42; }`, + }, + { + name: "risor script with data access patterns", + input: `func process() { + service_name := ctx.get("service_name", "unknown") + version := ctx.get("version", "1.0.0") + return {"message": "Hello from Risor!", "version": version} +} +process()`, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result, err := InferLoader(tc.input) + require.NoError(t, err, "Input: %s", tc.input) + assert.IsType( + t, + (*FromString)(nil), + result, + "Should treat multiline content as string, not path", + ) + + // Verify content can be read correctly + reader, err := result.GetReader() + require.NoError(t, err) + defer func() { + assert.NoError(t, reader.Close()) + }() + + content, err := io.ReadAll(reader) + require.NoError(t, err) + assert.Equal(t, tc.input, string(content)) + }) + } +} + +// TestInferLoader_AmbiguousContentDetection tests edge cases between paths and content. +func TestInferLoader_AmbiguousContentDetection(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + expectedType any + description string + }{ + { + name: "path that doesn't exist should return FromString", + input: "nonexistent/path/script.invalid", + expectedType: (*FromString)(nil), + description: "Paths with unsupported extensions should be treated as content", + }, + { + name: "path with spaces and unsupported extension", + input: "some path with spaces.invalid", + expectedType: (*FromString)(nil), + description: "Paths with unsupported extensions should be treated as content", + }, + { + name: "relative path without extension", + input: "relative/path/file", + expectedType: (*FromString)(nil), + description: "Relative paths should be treated as content", + }, + { + name: "wasm file extension", + input: "script.wasm", + expectedType: (*FromDisk)(nil), + description: "WASM files should be treated as paths", + }, + { + name: "risor file extension", + input: "script.risor", + expectedType: (*FromDisk)(nil), + description: "Risor files should be treated as paths", + }, + { + name: "star file extension", + input: "config.star", + expectedType: (*FromDisk)(nil), + description: "Star files should be treated as paths", + }, + { + name: "starlark file extension", + input: "build.starlark", + expectedType: (*FromDisk)(nil), + description: "Starlark files should be treated as paths", + }, + { + name: "case insensitive WASM", + input: "MODULE.WASM", + expectedType: (*FromDisk)(nil), + description: "Case insensitive file extensions should work", + }, + { + name: "single line code that looks like path", + input: "/bin/bash -c 'echo hello'", + expectedType: (*FromString)(nil), + description: "Shell commands should be treated as content", + }, + { + name: "function call looks like content", + input: "function() { return 42; }", + expectedType: (*FromString)(nil), + description: "Function definitions should be treated as content", + }, + { + name: "javascript var declaration", + input: "var x = '/some/path'; console.log(x);", + expectedType: (*FromString)(nil), + description: "Variable declarations should be treated as content", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result, err := InferLoader(tc.input) + require.NoError(t, err) + assert.IsType(t, tc.expectedType, result, tc.description) + }) + } +} + +// TestInferLoader_URLParsingEdgeCases tests graceful handling of URL parsing failures. +func TestInferLoader_URLParsingEdgeCases(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + shouldError bool + errorContains string + expectedType any + }{ + { + name: "URL with control characters should fall back to content", + input: "http://example.com/script\nwith\nnewlines.invalid", + shouldError: false, + expectedType: (*FromString)(nil), + }, + { + name: "file URL with spaces should be handled gracefully", + input: "file:///path with spaces/script.wasm", + shouldError: false, + expectedType: (*FromDisk)(nil), // Valid file URL should return FromDisk + }, + { + name: "malformed scheme treated as content", + input: "ht!tp://invalid-scheme.com/script.invalid", + expectedType: (*FromString)(nil), + }, + { + name: "content that looks like URL with control chars", + input: "http://example.com\nfunction test() {}", + expectedType: (*FromString)(nil), + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result, err := InferLoader(tc.input) + + if tc.shouldError { + assert.Error(t, err) + if tc.errorContains != "" { + assert.Contains(t, err.Error(), tc.errorContains) + } + return + } + + require.NoError(t, err) + if tc.expectedType != nil { + assert.IsType(t, tc.expectedType, result) + } + }) + } +} + +// TestInferLoader_ControlCharactersAndSpecialCases tests handling of various special characters. +func TestInferLoader_ControlCharactersAndSpecialCases(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + expectedType any + }{ + { + name: "content with newlines and tabs", + input: "function test() {\n\treturn 'hello\\nworld';\n}", + expectedType: (*FromString)(nil), + }, + { + name: "content with carriage returns", + input: "line1\r\nline2\r\nfunction test() {}", + expectedType: (*FromString)(nil), + }, + { + name: "content with null bytes as binary", + input: string( + []byte{0x00, 0x61, 0x73, 0x6D, 0x01, 0x00, 0x00, 0x00}, + ), // WASM magic as string + expectedType: (*FromString)( + nil, + ), // Raw binary should be string, not auto-detected + }, + { + name: "content with multiple spaces", + input: "const x = 'value';", + expectedType: (*FromString)(nil), + }, + { + name: "windows path with spaces and unsupported extension", + input: "C:\\Program Files\\App with spaces\\script.invalid", + expectedType: (*FromString)(nil), + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result, err := InferLoader(tc.input) + require.NoError(t, err) + assert.IsType(t, tc.expectedType, result, "Input: %q", tc.input) + }) + } +} From 72bd6cda4f1e4d73c4227c59c0dd1a2590461e0b Mon Sep 17 00:00:00 2001 From: RT Date: Thu, 17 Jul 2025 14:33:01 -0400 Subject: [PATCH 2/3] remove the content assumption --- platform/script/loader/inference.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/platform/script/loader/inference.go b/platform/script/loader/inference.go index 0462833..cb90da9 100644 --- a/platform/script/loader/inference.go +++ b/platform/script/loader/inference.go @@ -89,15 +89,6 @@ func isValidFilePath(s string) bool { return false } - // Don't consider strings that look like code as file paths - if strings.Contains(s, " -c ") || // shell commands - strings.Contains(s, "';") || // code with semicolons after quotes - strings.Contains(s, "console.") || // JavaScript console calls - strings.Contains(s, "var ") || // variable declarations - strings.Contains(s, "function ") { // function declarations - return false - } - // Check for specific script file extensions (case insensitive) validExtensions := []string{".wasm", ".risor", ".star", ".starlark"} lower := strings.ToLower(s) From dec3b26a02ad539f739cacc8051e6becd0b1a13f Mon Sep 17 00:00:00 2001 From: RT Date: Thu, 17 Jul 2025 14:39:49 -0400 Subject: [PATCH 3/3] adjust the file path detection logic --- platform/script/loader/inference.go | 4 ++-- platform/script/loader/inference_test.go | 26 +++++++++++++++--------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/platform/script/loader/inference.go b/platform/script/loader/inference.go index cb90da9..aee6327 100644 --- a/platform/script/loader/inference.go +++ b/platform/script/loader/inference.go @@ -98,6 +98,6 @@ func isValidFilePath(s string) bool { } } - // Don't treat absolute paths as file paths unless they have supported extensions - return false + // Basic file path patterns without filesystem check + return filepath.IsAbs(s) } diff --git a/platform/script/loader/inference_test.go b/platform/script/loader/inference_test.go index 1cdd83a..63582f9 100644 --- a/platform/script/loader/inference_test.go +++ b/platform/script/loader/inference_test.go @@ -40,9 +40,11 @@ func TestInferLoader(t *testing.T) { expectedType: (*FromDisk)(nil), }, { - name: "absolute path with invalid extension", - input: "/absolute/path/script.invalid", - expectedType: (*FromString)(nil), + name: "absolute path with invalid extension", + input: "/absolute/path/script.invalid", + expectedType: (*FromDisk)( + nil, + ), // Absolute paths are treated as file paths regardless of extension }, { name: "absolute path with wasm extension", @@ -243,9 +245,11 @@ func TestInferFromString(t *testing.T) { expectedType any }{ { - name: "absolute unix path with invalid extension", - input: "/usr/local/bin/script.invalid", - expectedType: (*FromString)(nil), + name: "absolute unix path with invalid extension", + input: "/usr/local/bin/script.invalid", + expectedType: (*FromDisk)( + nil, + ), // Absolute paths are file paths even with unsupported extensions }, { name: "absolute unix path with wasm extension", @@ -628,10 +632,12 @@ func TestInferLoader_AmbiguousContentDetection(t *testing.T) { description: "Case insensitive file extensions should work", }, { - name: "single line code that looks like path", - input: "/bin/bash -c 'echo hello'", - expectedType: (*FromString)(nil), - description: "Shell commands should be treated as content", + name: "single line code that looks like path", + input: "/bin/bash -c 'echo hello'", + expectedType: (*FromDisk)( + nil, + ), // Absolute paths are treated as file paths even if they look like commands + description: "Absolute paths take precedence over content detection", }, { name: "function call looks like content",