Skip to content

Commit f0bbadd

Browse files
authored
fix(piperenv): add JSON data cleaning to handle invalid UTF-8 and control characters (#5449)
1 parent a7406e0 commit f0bbadd

File tree

4 files changed

+222
-2
lines changed

4 files changed

+222
-2
lines changed

cmd/writePipelineEnv.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,11 @@ func handleEncryption(password string, data []byte) ([]byte, error) {
107107

108108
func parseInput(data []byte) (piperenv.CPEMap, error) {
109109
commonPipelineEnv := piperenv.CPEMap{}
110-
decoder := json.NewDecoder(bytes.NewReader(data))
110+
111+
// Clean invalid UTF-8 sequences that can cause JSON parsing errors
112+
cleanData := piperenv.CleanJSONData(data)
113+
114+
decoder := json.NewDecoder(bytes.NewReader(cleanData))
111115
decoder.UseNumber()
112116
if err := decoder.Decode(&commonPipelineEnv); err != nil {
113117
return nil, err

cmd/writePipelineEnv_test.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//go:build unit
2+
// +build unit
3+
4+
package cmd
5+
6+
import (
7+
"encoding/json"
8+
"testing"
9+
10+
"github.com/SAP/jenkins-library/pkg/piperenv"
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
func TestCleanJSONDataWritePipeline(t *testing.T) {
15+
t.Parallel()
16+
17+
// Test valid UTF-8 (should be unchanged)
18+
validData := []byte(`{"git":{"commitMessage":"This is a valid commit message"}}`)
19+
result := piperenv.CleanJSONData(validData)
20+
require.Equal(t, validData, result)
21+
22+
// Test emoji with valid UTF-8 (should be unchanged)
23+
emojiData := []byte(`{"git":{"commitMessage":"🚀 feat: add new feature"}}`)
24+
result = piperenv.CleanJSONData(emojiData)
25+
require.Equal(t, emojiData, result)
26+
27+
// Test data with JSON control character (like \x16)
28+
invalidData := []byte("{\"git\":{\"commitMessage\":\"Test \x16 control char\"}}")
29+
result = piperenv.CleanJSONData(invalidData)
30+
require.NotEqual(t, invalidData, result)
31+
require.True(t, json.Valid(result), "Result should be valid JSON")
32+
33+
// Verify we can parse the cleaned data as JSON
34+
var parsed map[string]interface{}
35+
err := json.Unmarshal(result, &parsed)
36+
require.NoError(t, err)
37+
require.Contains(t, parsed, "git")
38+
39+
// The control character should be escaped as unicode
40+
require.Contains(t, string(result), "\\u0016")
41+
}
42+
43+
func TestParseInputWithInvalidUTF8(t *testing.T) {
44+
t.Parallel()
45+
46+
// Test parsing JSON with control character
47+
invalidJSON := []byte("{\"git\":{\"commitMessage\":\"Test \x16 control char commit\"}}")
48+
49+
// Should not fail due to control characters
50+
cpeMap, err := parseInput(invalidJSON)
51+
require.NoError(t, err)
52+
require.NotNil(t, cpeMap)
53+
54+
// Verify we can access the git section
55+
if git, ok := cpeMap["git"]; ok {
56+
require.NotNil(t, git)
57+
if gitMap, ok := git.(map[string]interface{}); ok {
58+
commitMsg, exists := gitMap["commitMessage"]
59+
require.True(t, exists)
60+
require.IsType(t, "", commitMsg)
61+
// The message should be present and non-empty
62+
require.NotEmpty(t, commitMsg)
63+
}
64+
}
65+
}
66+
67+
func TestParseInputWithValidEmoji(t *testing.T) {
68+
t.Parallel()
69+
70+
// Test parsing JSON with valid emoji
71+
validJSON := []byte(`{"git":{"commitMessage":"🚀 feat: add new feature"}}`)
72+
73+
cpeMap, err := parseInput(validJSON)
74+
require.NoError(t, err)
75+
require.NotNil(t, cpeMap)
76+
77+
// Verify emoji is preserved
78+
if git, ok := cpeMap["git"]; ok {
79+
if gitMap, ok := git.(map[string]interface{}); ok {
80+
commitMsg, exists := gitMap["commitMessage"]
81+
require.True(t, exists)
82+
require.Equal(t, "🚀 feat: add new feature", commitMsg)
83+
}
84+
}
85+
}

pkg/piperenv/CPEMap.go

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"path"
99
"path/filepath"
1010
"strings"
11+
"unicode/utf8"
1112

1213
"github.com/SAP/jenkins-library/pkg/log"
1314
)
@@ -122,7 +123,11 @@ func readFileContent(fullPath string) (string, interface{}, bool, error) {
122123
if strings.HasSuffix(fullPath, ".json") {
123124
// value is json encoded
124125
var value interface{}
125-
decoder := json.NewDecoder(bytes.NewReader(fileContent))
126+
127+
// Clean invalid UTF-8 sequences that can cause JSON parsing errors
128+
cleanContent := CleanJSONData(fileContent)
129+
130+
decoder := json.NewDecoder(bytes.NewReader(cleanContent))
126131
decoder.UseNumber()
127132
err = decoder.Decode(&value)
128133
if err != nil {
@@ -135,3 +140,73 @@ func readFileContent(fullPath string) (string, interface{}, bool, error) {
135140
}
136141
return fileName, string(fileContent), toBeEmptied, nil
137142
}
143+
144+
// CleanJSONData handles both invalid UTF-8 sequences and JSON control characters that can cause parsing errors
145+
func CleanJSONData(data []byte) []byte {
146+
// First ensure valid UTF-8
147+
if !utf8.Valid(data) {
148+
data = []byte(strings.ToValidUTF8(string(data), "\uFFFD"))
149+
}
150+
151+
// Check if it's already valid JSON - if so, return as-is
152+
if json.Valid(data) {
153+
return data
154+
}
155+
156+
// If not valid JSON, try to escape control characters in string literals
157+
s := string(data)
158+
result := strings.Builder{}
159+
inString := false
160+
escaped := false
161+
162+
for _, r := range s {
163+
if !inString {
164+
result.WriteRune(r)
165+
if r == '"' {
166+
inString = true
167+
}
168+
continue
169+
}
170+
171+
if escaped {
172+
result.WriteRune(r)
173+
escaped = false
174+
continue
175+
}
176+
177+
if r == '\\' {
178+
escaped = true
179+
result.WriteRune(r)
180+
continue
181+
}
182+
183+
if r == '"' {
184+
inString = false
185+
result.WriteRune(r)
186+
continue
187+
}
188+
189+
// Handle control characters (0x00-0x1F) that are invalid in JSON strings
190+
if r < 0x20 {
191+
switch r {
192+
case '\b':
193+
result.WriteString("\\b")
194+
case '\f':
195+
result.WriteString("\\f")
196+
case '\n':
197+
result.WriteString("\\n")
198+
case '\r':
199+
result.WriteString("\\r")
200+
case '\t':
201+
result.WriteString("\\t")
202+
default:
203+
// Use unicode escape for other control characters
204+
result.WriteString(fmt.Sprintf("\\u%04x", int(r)))
205+
}
206+
} else {
207+
result.WriteRune(r)
208+
}
209+
}
210+
211+
return []byte(result.String())
212+
}

pkg/piperenv/CPEMap_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,59 @@ func TestCommonPipelineEnvDirNotPresent(t *testing.T) {
107107
require.NoError(t, err)
108108
require.Len(t, cpe, 0)
109109
}
110+
111+
func TestCleanJSONData(t *testing.T) {
112+
t.Parallel()
113+
114+
// Test valid UTF-8 (should be unchanged)
115+
validData := []byte(`{"commitMessage":"This is a valid commit message"}`)
116+
result := CleanJSONData(validData)
117+
require.Equal(t, validData, result)
118+
119+
// Test emoji with valid UTF-8 (should be unchanged)
120+
emojiData := []byte(`{"commitMessage":"🚀 feat: add new feature"}`)
121+
result = CleanJSONData(emojiData)
122+
require.Equal(t, emojiData, result)
123+
124+
// Test data with JSON control character (like \x16)
125+
invalidData := []byte("{\"commitMessage\":\"Test \x16 invalid char\"}")
126+
result = CleanJSONData(invalidData)
127+
require.NotEqual(t, invalidData, result)
128+
require.True(t, json.Valid(result), "Result should be valid JSON")
129+
130+
// Verify we can parse the cleaned data as JSON
131+
var parsed map[string]interface{}
132+
err := json.Unmarshal(result, &parsed)
133+
require.NoError(t, err)
134+
require.Contains(t, parsed, "commitMessage")
135+
136+
// The control character should be escaped as unicode
137+
require.Contains(t, string(result), "\\u0016")
138+
}
139+
140+
func TestReadFileContentWithInvalidUTF8(t *testing.T) {
141+
t.Parallel()
142+
143+
// Create a temporary JSON file with control characters
144+
tmpDir := t.TempDir()
145+
jsonFile := path.Join(tmpDir, "test.json")
146+
147+
// Write JSON with control character that causes parsing errors
148+
invalidJSON := []byte("{\"commitMessage\":\"Test \x16 control char commit\"}")
149+
err := os.WriteFile(jsonFile, invalidJSON, 0644)
150+
require.NoError(t, err)
151+
152+
// Try to read the file - should not fail due to control characters
153+
_, value, _, err := readFileContent(jsonFile)
154+
require.NoError(t, err)
155+
require.NotNil(t, value)
156+
157+
// Verify we can extract the commit message
158+
if valueMap, ok := value.(map[string]interface{}); ok {
159+
commitMsg, exists := valueMap["commitMessage"]
160+
require.True(t, exists)
161+
require.IsType(t, "", commitMsg)
162+
// The control character should be properly handled in the message
163+
require.NotEmpty(t, commitMsg)
164+
}
165+
}

0 commit comments

Comments
 (0)