From 90818a792e8dd8fc290231a76f98c33e245c104c Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Mon, 7 Aug 2023 11:17:06 -0500 Subject: [PATCH] feat: detect Windows-1252 encoding and decode to utf-8 --- encoding_test.go | 37 ++++++++++++++++++++++++++ go.mod | 3 +++ go.sum | 13 +++++++++ reader.go | 21 +++++++++++++-- test/testdata/nonascii-utf8.ach | 20 ++++++++++++++ test/testdata/nonascii-windows1252.ach | 20 ++++++++++++++ test/testdata/nonascii.ach | 20 ++++++++++++++ 7 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 test/testdata/nonascii-utf8.ach create mode 100644 test/testdata/nonascii-windows1252.ach create mode 100644 test/testdata/nonascii.ach diff --git a/encoding_test.go b/encoding_test.go index a7ee8ba6f..41be93310 100644 --- a/encoding_test.go +++ b/encoding_test.go @@ -19,8 +19,12 @@ package ach import ( "bytes" + "os" + "path/filepath" "testing" + "golang.org/x/net/html/charset" + "github.com/stretchr/testify/require" ) @@ -107,4 +111,37 @@ func TestExtendedCharacters(t *testing.T) { require.Equal(t, `My {Store} `, entries[0].IndividualName) require.Equal(t, `RF1¦RF2`, entries[0].Addenda02.ReferenceInformationOne) }) + + t.Run("detect", func(t *testing.T) { + bs, err := os.ReadFile(filepath.Join("test", "testdata", "nonascii-utf8.ach")) + require.NoError(t, err) + _, name, _ := charset.DetermineEncoding(bs, "plain/text") + require.Equal(t, "utf-8", name) + + bs, err = os.ReadFile(filepath.Join("test", "testdata", "nonascii-windows1252.ach")) + require.NoError(t, err) + _, name, _ = charset.DetermineEncoding(bs, "plain/text") + require.Equal(t, "windows-1252", name) + + bs, err = os.ReadFile(filepath.Join("test", "testdata", "nonascii.ach")) + require.NoError(t, err) + _, name, _ = charset.DetermineEncoding(bs, "plain/text") + require.Equal(t, "windows-1252", name) + }) + + t.Run("parse windows-1252", func(t *testing.T) { + file, err := ReadFile(filepath.Join("test", "testdata", "nonascii.ach")) + require.NoError(t, err) + + require.Len(t, file.Batches, 1) + bh := file.Batches[0].GetHeader() + require.Equal(t, "REG.SALARY", bh.CompanyEntryDescription) + + entries := file.Batches[0].GetEntries() + require.Len(t, entries, 1) + require.Equal(t, "0012Receiver Acc Name ", entries[0].IndividualName) + + require.Len(t, entries[0].Addenda05, 12) + require.Contains(t, entries[0].Addenda05[0].PaymentRelatedInformation, "¦ZZ¦PAYEXPENSEPAY") + }) } diff --git a/go.mod b/go.mod index dade7e32b..45e319d00 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,8 @@ require ( github.com/moov-io/iso4217 v0.3.0 github.com/prometheus/client_golang v1.16.0 github.com/stretchr/testify v1.8.4 + golang.org/x/net v0.10.0 + golang.org/x/oauth2 v0.8.0 golang.org/x/text v0.11.0 ) @@ -33,6 +35,7 @@ require ( github.com/rickar/cal/v2 v2.1.13 // indirect github.com/rogpeppe/go-internal v1.10.0 // indirect golang.org/x/sys v0.8.0 // indirect + google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.31.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index bd3bbd004..db2904383 100644 --- a/go.sum +++ b/go.sum @@ -16,6 +16,7 @@ github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBj github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= @@ -64,16 +65,28 @@ github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjR github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/oauth2 v0.8.0 h1:6dkIjl3j3LtZ/O3sTgZTMsLKSftL/B8Zgq4huOIIUu8= +golang.org/x/oauth2 v0.8.0/go.mod h1:yr7u4HXZRm1R1kBWqr/xKNqewf0plRYoB7sla+BCIXE= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= diff --git a/reader.go b/reader.go index b5f159234..ac829b54f 100644 --- a/reader.go +++ b/reader.go @@ -26,6 +26,8 @@ import ( "unicode/utf8" "github.com/moov-io/base" + + "golang.org/x/net/html/charset" ) var ( @@ -134,10 +136,25 @@ func ReadFiles(paths []string) ([]*File, error) { // NewReader returns a new ACH Reader that reads from r. func NewReader(r io.Reader) *Reader { - return &Reader{ + out := &Reader{ maxLines: defaultMaxLines, - scanner: bufio.NewScanner(r), } + + // charset.Reader will decode windows-1252 strings into utf-8 automatically. + rr, err := charset.NewReader(r, "text/plain") + if err != nil { + // Fake an empty reader if we read nothing + if err == io.EOF || err == io.ErrUnexpectedEOF { + out.scanner = bufio.NewScanner(strings.NewReader("")) + } else { + out.errors.Add(err) + } + } + if rr != nil { + out.scanner = bufio.NewScanner(rr) + } + + return out } func (r *Reader) SetMaxLines(max int) { diff --git a/test/testdata/nonascii-utf8.ach b/test/testdata/nonascii-utf8.ach new file mode 100644 index 000000000..51fdb55ca --- /dev/null +++ b/test/testdata/nonascii-utf8.ach @@ -0,0 +1,20 @@ +101 23138010401111111121906240000A094101Federal Reserve Bank My Bank Name +5200Name on Account 111111112 CTXREG.SALARY 190625 1111111110000001 +62723138010412345678 0100000000 0012Receiver Acc Name 1111111110000001 +705ISA¦00¦ ¦00¦ ¦ZZ¦PAYEXPENSEPAY ¦ZZ¦PAYAECSUSO ¦230628¦02100010000001 +7059¦U¦00401¦017587397¦0¦P¦^~GS¦RA¦PAYEXPENSEPAY¦PAYAECSUSO¦20230628¦0219¦17587397¦00020000001 +705X¦004010~ST¦820¦0069~BPR¦C¦1352.88¦C¦ACH¦CTX¦01¦026009593¦DA¦8765117001¦8201665000030000001 +70519¦¦01¦091218445¦DA¦4233388950¦20230629~TRN¦1¦M4986O3TM2¦¦M4986O3TM2~CUR¦PE¦USD¦00040000001 +705¦PR¦USD~REF¦BT¦J33QZD22QW~REF¦C2¦PAYEXPENSEPAY_120603391031634~REF¦TN¦¦B33TKE25Q00050000001 +705A~REF¦PH¦1~REF¦CA¦01~REF¦VI¦¦/PHON 888-888-8888~REF¦7U¦M4986O3TM2~REF¦8M¦¦B7033D00060000001 +7053WU8~REF¦SEK¦¦USUS_TRF_NURG_Y_N_US_CTX__USD~N1¦RB¦M1 FINANCE~N3¦ADDRESS UNKNOWN~00070000001 +705N4¦CITY UNKNOWN¦..¦¦US~N1¦PR¦META PLATFORMS INC~N3¦873 CALIFORNIA STREET 19TH FL00080000001 +705OOR~N4¦SAN FRANCISCO¦CA¦94104¦US~REF¦2U¦8201665019~N1¦PE¦JO SMITH~N4¦BELLEVUE¦¦900090000001 +7058005¦US~N1¦CE¦JO SMITH~N4¦BELLEVUE¦..¦98005¦US~N1¦DE¦META PLATFORMS INC~N3¦873 C00100000001 +705ALIFORNIA STREET 19TH FLOOR~N4¦SAN FRANCISCO¦CA¦94104¦US~ENT¦1~RMR¦IK¦/PHON 888-00110000001 +705888-8888 /CONT JO SM~RMR¦IK¦ANG~SE¦31¦0069~GE¦0001¦17587397~IEA¦0001¦017587397~ 00120000001 +82000000130023138010000100000000000000000000111111112 111111110000001 +9000001000001000000010023138010000100000000000000000000 +9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 +9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 +9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 diff --git a/test/testdata/nonascii-windows1252.ach b/test/testdata/nonascii-windows1252.ach new file mode 100644 index 000000000..1a635618f --- /dev/null +++ b/test/testdata/nonascii-windows1252.ach @@ -0,0 +1,20 @@ +101 23138010401210428821906240000A094101Federal Reserve Bank My Bank Name +5200Name on Account 121042882 CTXREG.SALARY 190625 1121042880000001 +62723138010412345678 0100000000 Receiver Account Name 1111111110000001 +705ISA¦00¦ ¦00¦ ¦ZZ¦PAYEXPENSEPAY ¦ZZ¦PAYAECSUSO ¦230628¦02100010871559 +7059¦U¦00401¦017587397¦0¦P¦^~GS¦RA¦PAYEXPENSEPAY¦PAYAECSUSO¦20230628¦0219¦17587397¦00020871559 +705X¦004010~ST¦820¦0069~BPR¦C¦1352.88¦C¦ACH¦CTX¦01¦026009593¦DA¦8765117001¦8201665000030871559 +70519¦¦01¦091218445¦DA¦4233388950¦20230629~TRN¦1¦M4986O3TM2¦¦M4986O3TM2~CUR¦PE¦USD¦00040871559 +705¦PR¦USD~REF¦BT¦J33QZD22QW~REF¦C2¦PAYEXPENSEPAY_120603391031634~REF¦TN¦¦B33TKE25Q00050871559 +705A~REF¦PH¦1~REF¦CA¦01~REF¦VI¦¦/PHON 888-888-8888~REF¦7U¦M4986O3TM2~REF¦8M¦¦B7033D00060871559 +7053WU8~REF¦SEK¦¦USUS_TRF_NURG_Y_N_US_CTX__USD~N1¦RB¦M1 FINANCE~N3¦ADDRESS UNKNOWN~00070871559 +705N4¦CITY UNKNOWN¦..¦¦US~N1¦PR¦META PLATFORMS INC~N3¦873 CALIFORNIA STREET 19TH FL00080871559 +705OOR~N4¦SAN FRANCISCO¦CA¦94104¦US~REF¦2U¦8201665019~N1¦PE¦JO SMITH~N4¦BELLEVUE¦¦900090871559 +7058005¦US~N1¦CE¦BO ZHANG~N4¦BELLEVUE¦..¦98005¦US~N1¦DE¦META PLATFORMS INC~N3¦315 M00100871559 +705ONTGOMERY STREET 13TH FLOOR~N4¦SAN FRANCISCO¦CA¦94104¦US~ENT¦1~RMR¦IK¦/PHON 888-00110871559 +705888-8888 /CONT BO ZH~RMR¦IK¦ANG~SE¦31¦0069~GE¦0001¦17587397~IEA¦0001¦017587397~ 00120871559 +82000000120023138010000100000000000000000000121042882 121042880000001 +9000001000001000000010023138010000100000000000000000000 +9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 +9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 +9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 diff --git a/test/testdata/nonascii.ach b/test/testdata/nonascii.ach new file mode 100644 index 000000000..ce663afaa --- /dev/null +++ b/test/testdata/nonascii.ach @@ -0,0 +1,20 @@ +101 23138010401111111121906240000A094101Federal Reserve Bank My Bank Name +5200Name on Account 111111112 CTXREG.SALARY 190625 1111111110000001 +62723138010412345678 0100000000 0012Receiver Acc Name 1111111110000001 +705ISA¦00¦ ¦00¦ ¦ZZ¦PAYEXPENSEPAY ¦ZZ¦PAYAECSUSO ¦230628¦02100010000001 +7059¦U¦00401¦017587397¦0¦P¦^~GS¦RA¦PAYEXPENSEPAY¦PAYAECSUSO¦20230628¦0219¦17587397¦00020000001 +705X¦004010~ST¦820¦0069~BPR¦C¦1352.88¦C¦ACH¦CTX¦01¦026009593¦DA¦8765117001¦8201665000030000001 +70519¦¦01¦091218445¦DA¦4233388950¦20230629~TRN¦1¦M4986O3TM2¦¦M4986O3TM2~CUR¦PE¦USD¦00040000001 +705¦PR¦USD~REF¦BT¦J33QZD22QW~REF¦C2¦PAYEXPENSEPAY_120603391031634~REF¦TN¦¦B33TKE25Q00050000001 +705A~REF¦PH¦1~REF¦CA¦01~REF¦VI¦¦/PHON 888-888-8888~REF¦7U¦M4986O3TM2~REF¦8M¦¦B7033D00060000001 +7053WU8~REF¦SEK¦¦USUS_TRF_NURG_Y_N_US_CTX__USD~N1¦RB¦M1 FINANCE~N3¦ADDRESS UNKNOWN~00070000001 +705N4¦CITY UNKNOWN¦..¦¦US~N1¦PR¦META PLATFORMS INC~N3¦873 CALIFORNIA STREET 19TH FL00080000001 +705OOR~N4¦SAN FRANCISCO¦CA¦94104¦US~REF¦2U¦8201665019~N1¦PE¦JO SMITH~N4¦BELLEVUE¦¦900090000001 +7058005¦US~N1¦CE¦JO SMITH~N4¦BELLEVUE¦..¦98005¦US~N1¦DE¦META PLATFORMS INC~N3¦873 C00100000001 +705ALIFORNIA STREET 19TH FLOOR~N4¦SAN FRANCISCO¦CA¦94104¦US~ENT¦1~RMR¦IK¦/PHON 888-00110000001 +705888-8888 /CONT JO SM~RMR¦IK¦ANG~SE¦31¦0069~GE¦0001¦17587397~IEA¦0001¦017587397~ 00120000001 +82000000130023138010000100000000000000000000111111112 111111110000001 +9000001000001000000010023138010000100000000000000000000 +9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 +9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 +9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999