Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for complicated filenames and file copies #61

Merged
merged 3 commits into from
May 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions diff/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,69 @@ func TestParseMultiFileDiffHeaders(t *testing.T) {
},
},
},
{
filename: "complicated_filenames.diff",
wantDiffs: []*FileDiff{
{
OrigName: "/dev/null",
NewName: "b/new empty file with spaces",
Extended: []string{
"diff --git a/new empty file with spaces b/new empty file with spaces",
"new file mode 100644",
"index 0000000..e69de29",
},
},
{
OrigName: "/dev/null",
NewName: "b/new file with text",
Extended: []string{
"diff --git a/new file with text b/new file with text",
"new file mode 100644",
"index 0000000..c3ed4be",
},
},
{
OrigName: "a/existing file with spaces",
NewName: "b/new file with spaces",
Extended: []string{
"diff --git a/existing file with spaces b/new file with spaces",
"similarity index 100%",
"copy from existing file with spaces",
"copy to new file with spaces",
},
},
{
OrigName: "a/existing file with spaces",
NewName: "b/new, complicated\nfilenøme",
Extended: []string{
`diff --git a/existing file with spaces "b/new, complicated\nfilen\303\270me"`,
"similarity index 100%",
"copy from existing file with spaces",
`copy to "new, complicated\nfilen\303\270me"`,
},
},
{
OrigName: "a/existing file with spaces",
NewName: `b/new "complicated" filename`,
Extended: []string{
`diff --git a/existing file with spaces "b/new \"complicated\" filename"`,
"similarity index 100%",
"copy from existing file with spaces",
`copy to "new \"complicated\" filename"`,
},
},
{
OrigName: `a/existing "complicated" filename`,
NewName: "b/new, simpler filename",
Extended: []string{
`diff --git "a/existing \"complicated\" filename" b/new, simpler filename`,
"similarity index 100%",
`copy from "existing \"complicated\" filename"`,
"copy to new, simpler filename",
},
},
},
},
mrnugget marked this conversation as resolved.
Show resolved Hide resolved
}
for _, test := range tests {
t.Run(test.filename, func(t *testing.T) {
Expand Down
149 changes: 135 additions & 14 deletions diff/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,102 @@ func (r *FileDiffReader) ReadExtendedHeaders() ([]string, error) {
}
}

// readQuotedFilename extracts a quoted filename from the beginning of a string,
// returning the unquoted filename and any remaining text after the filename.
func readQuotedFilename(text string) (value string, remainder string, err error) {
if text == "" || text[0] != '"' {
return "", "", fmt.Errorf(`string must start with a '"': %s`, text)
}

// The end quote is the first quote NOT preceeded by an uneven number of backslashes.
numberOfBackslashes := 0
for i, c := range text {
if c == '"' && i > 0 && numberOfBackslashes%2 == 0 {
value, err = strconv.Unquote(text[:i+1])
remainder = text[i+1:]
return
} else if c == '\\' {
numberOfBackslashes++
} else {
numberOfBackslashes = 0
}
}
return "", "", fmt.Errorf(`end of string found while searching for '"': %s`, text)
}

// parseDiffGitArgs extracts the two filenames from a 'diff --git' line.
mrnugget marked this conversation as resolved.
Show resolved Hide resolved
// Returns false on syntax error, true if syntax is valid. Even with a
// valid syntax, it may be impossible to extract filenames; if so, the
// function returns ("", "", true).
func parseDiffGitArgs(diffArgs string) (string, string, bool) {
length := len(diffArgs)
if length < 3 {
return "", "", false
}

if diffArgs[0] != '"' && diffArgs[length-1] != '"' {
// Both filenames are unquoted.
firstSpace := strings.IndexByte(diffArgs, ' ')
if firstSpace <= 0 || firstSpace == length-1 {
return "", "", false
}

secondSpace := strings.IndexByte(diffArgs[firstSpace+1:], ' ')
if secondSpace == -1 {
if diffArgs[firstSpace+1] == '"' {
mrnugget marked this conversation as resolved.
Show resolved Hide resolved
// The second filename begins with '"', but doesn't end with one.
return "", "", false
}
return diffArgs[:firstSpace], diffArgs[firstSpace+1:], true
}

// One or both filenames contain a space, but the names are
// unquoted. Here, the 'diff --git' syntax is ambiguous, and
// we have to obtain the filenames elsewhere (e.g. from the
// hunk headers or extended headers). HOWEVER, if the file
// is newly created and empty, there IS no other place to
// find the filename. In this case, the two filenames are
// identical (except for the leading 'a/' prefix), and we have
// to handle that case here.
first := diffArgs[:length/2]
second := diffArgs[length/2+1:]
if len(first) >= 3 && length%2 == 1 && first[1] == '/' && first[1:] == second[1:] {
return first, second, true
}

// The syntax is (unfortunately) valid, but we could not extract
// the filenames.
return "", "", true
}

if diffArgs[0] == '"' {
first, remainder, err := readQuotedFilename(diffArgs)
if err != nil || len(remainder) < 2 || remainder[0] != ' ' {
return "", "", false
}
if remainder[1] == '"' {
second, remainder, err := readQuotedFilename(remainder[1:])
if remainder != "" || err != nil {
return "", "", false
}
return first, second, true
}
return first, remainder[1:], true
}

// In this case, second argument MUST be quoted (or it's a syntax error)
i := strings.IndexByte(diffArgs, '"')
if i == -1 || i+2 >= length || diffArgs[i-1] != ' ' {
return "", "", false
}

second, remainder, err := readQuotedFilename(diffArgs[i:])
if remainder != "" || err != nil {
return "", "", false
}
return diffArgs[:i-1], second, true
}

// handleEmpty detects when FileDiff was an empty diff and will not have any hunks
// that follow. It updates fd fields from the parsed extended headers.
func handleEmpty(fd *FileDiff) (wasEmpty bool) {
Expand All @@ -388,6 +484,10 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) {
return lineHasPrefix(idx1, prefix1) && lineHasPrefix(idx2, prefix2)
}

isCopy := (lineCount == 4 && linesHavePrefixes(2, "copy from ", 3, "copy to ")) ||
(lineCount == 6 && linesHavePrefixes(2, "copy from ", 3, "copy to ") && lineHasPrefix(5, "Binary files ")) ||
(lineCount == 6 && linesHavePrefixes(1, "old mode ", 2, "new mode ") && linesHavePrefixes(4, "copy from ", 5, "copy to "))

isRename := (lineCount == 4 && linesHavePrefixes(2, "rename from ", 3, "rename to ")) ||
(lineCount == 6 && linesHavePrefixes(2, "rename from ", 3, "rename to ") && lineHasPrefix(5, "Binary files ")) ||
(lineCount == 6 && linesHavePrefixes(1, "old mode ", 2, "new mode ") && linesHavePrefixes(4, "rename from ", 5, "rename to "))
Expand All @@ -402,22 +502,12 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) {

isBinaryPatch := lineCount == 3 && lineHasPrefix(2, "Binary files ") || lineCount > 3 && lineHasPrefix(2, "GIT binary patch")

if !isModeChange && !isRename && !isBinaryPatch && !isNewFile && !isDeletedFile {
if !isModeChange && !isCopy && !isRename && !isBinaryPatch && !isNewFile && !isDeletedFile {
return false
}

names := strings.SplitN(fd.Extended[0][len("diff --git "):], " ", 2)

var err error
fd.OrigName, err = strconv.Unquote(names[0])
if err != nil {
fd.OrigName = names[0]
}
fd.NewName, err = strconv.Unquote(names[1])
if err != nil {
fd.NewName = names[1]
}

var success bool
fd.OrigName, fd.NewName, success = parseDiffGitArgs(fd.Extended[0][len("diff --git "):])
if isNewFile {
fd.OrigName = "/dev/null"
}
Expand All @@ -426,7 +516,38 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) {
fd.NewName = "/dev/null"
}

return true
// For ambiguous 'diff --git' lines, try to reconstruct filenames using extended headers.
if success && (isCopy || isRename) && fd.OrigName == "" && fd.NewName == "" {
diffArgs := fd.Extended[0][len("diff --git "):]

tryReconstruct := func(header string, prefix string, whichFile int, result *string) {
if !strings.HasPrefix(header, prefix) {
return
}
rawFilename := header[len(prefix):]

// extract the filename prefix (e.g. "a/") from the 'diff --git' line.
var prefixLetterIndex int
if whichFile == 1 {
prefixLetterIndex = 0
} else if whichFile == 2 {
prefixLetterIndex = len(diffArgs) - len(rawFilename) - 2
}
if prefixLetterIndex < 0 || diffArgs[prefixLetterIndex+1] != '/' {
return
}

*result = diffArgs[prefixLetterIndex:prefixLetterIndex+2] + rawFilename
}

for _, header := range fd.Extended {
tryReconstruct(header, "copy from ", 1, &fd.OrigName)
tryReconstruct(header, "copy to ", 2, &fd.NewName)
tryReconstruct(header, "rename from ", 1, &fd.OrigName)
tryReconstruct(header, "rename to ", 2, &fd.NewName)
}
}
return success
}

var (
Expand Down
93 changes: 93 additions & 0 deletions diff/parse_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package diff

import (
"testing"
)

func TestReadQuotedFilename_Success(t *testing.T) {
tests := []struct {
input, value, remainder string
}{
{input: `""`, value: "", remainder: ""},
{input: `"aaa"`, value: "aaa", remainder: ""},
{input: `"aaa" bbb`, value: "aaa", remainder: " bbb"},
{input: `"aaa" "bbb" ccc`, value: "aaa", remainder: ` "bbb" ccc`},
{input: `"\""`, value: "\"", remainder: ""},
{input: `"uh \"oh\""`, value: "uh \"oh\"", remainder: ""},
{input: `"uh \\"oh\\""`, value: "uh \\", remainder: `oh\\""`},
{input: `"uh \\\"oh\\\""`, value: "uh \\\"oh\\\"", remainder: ""},
}
for _, tc := range tests {
value, remainder, err := readQuotedFilename(tc.input)
if err != nil {
t.Errorf("readQuotedFilename(`%s`): expected success, got '%s'", tc.input, err)
} else if value != tc.value || remainder != tc.remainder {
t.Errorf("readQuotedFilename(`%s`): expected `%s` and `%s`, got `%s` and `%s`", tc.input, tc.value, tc.remainder, value, remainder)
}
}
}

func TestReadQuotedFilename_Error(t *testing.T) {
tests := []string{
// Doesn't start with a quote
``,
`foo`,
` "foo"`,
// Missing end quote
`"`,
`"\"`,
// "\x" is not a valid Go string literal escape
`"\xxx"`,
}
for _, input := range tests {
_, _, err := readQuotedFilename(input)
if err == nil {
t.Errorf("readQuotedFilename(`%s`): expected error", input)
}
}
}

func TestParseDiffGitArgs_Success(t *testing.T) {
tests := []struct {
input, first, second string
}{
{input: `aaa bbb`, first: "aaa", second: "bbb"},
{input: `"aaa" bbb`, first: "aaa", second: "bbb"},
{input: `aaa "bbb"`, first: "aaa", second: "bbb"},
{input: `"aaa" "bbb"`, first: "aaa", second: "bbb"},
{input: `1/a 2/z`, first: "1/a", second: "2/z"},
{input: `1/hello world 2/hello world`, first: "1/hello world", second: "2/hello world"},
{input: `"new\nline" and spaces`, first: "new\nline", second: "and spaces"},
{input: `a/existing file with spaces "b/new, complicated\nfilen\303\270me"`, first: "a/existing file with spaces", second: "b/new, complicated\nfilen\303\270me"},
}
for _, tc := range tests {
first, second, success := parseDiffGitArgs(tc.input)
if !success {
t.Errorf("`diff --git %s`: expected success", tc.input)
} else if first != tc.first || second != tc.second {
t.Errorf("`diff --git %s`: expected `%s` and `%s`, got `%s` and `%s`", tc.input, tc.first, tc.second, first, second)
}
}
}

func TestParseDiffGitArgs_Unsuccessful(t *testing.T) {
tests := []string{
``,
`hello_world.txt`,
`word `,
` word`,
`"a/bad_quoting b/bad_quoting`,
`a/bad_quoting "b/bad_quoting`,
`a/bad_quoting b/bad_quoting"`,
`"a/bad_quoting b/bad_quoting"`,
`"a/bad""b/bad"`,
`"a/bad" "b/bad" "c/bad"`,
`a/bad "b/bad" "c/bad"`,
}
for _, input := range tests {
first, second, success := parseDiffGitArgs(input)
if success {
t.Errorf("`diff --git %s`: expected unsuccessful; got `%s` and `%s`", input, first, second)
}
}
}
26 changes: 26 additions & 0 deletions diff/testdata/complicated_filenames.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
diff --git a/new empty file with spaces b/new empty file with spaces
new file mode 100644
index 0000000..e69de29
diff --git a/new file with text b/new file with text
new file mode 100644
index 0000000..c3ed4be
--- /dev/null
+++ b/new file with text
@@ -0,0 +1 @@
+new file with text
diff --git a/existing file with spaces b/new file with spaces
similarity index 100%
copy from existing file with spaces
copy to new file with spaces
diff --git a/existing file with spaces "b/new, complicated\nfilen\303\270me"
similarity index 100%
copy from existing file with spaces
copy to "new, complicated\nfilen\303\270me"
diff --git a/existing file with spaces "b/new \"complicated\" filename"
similarity index 100%
copy from existing file with spaces
copy to "new \"complicated\" filename"
diff --git "a/existing \"complicated\" filename" b/new, simpler filename
similarity index 100%
copy from "existing \"complicated\" filename"
copy to new, simpler filename