Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: detect Windows-1252 encoding and decode to utf-8 #1272

Merged
merged 1 commit into from
Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions encoding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@ package ach

import (
"bytes"
"os"
"path/filepath"
"testing"

"golang.org/x/net/html/charset"

"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -107,4 +111,37 @@ func TestExtendedCharacters(t *testing.T) {
require.Equal(t, `My {Store} `, entries[0].IndividualName)
require.Equal(t, `RF1¦RF2`, entries[0].Addenda02.ReferenceInformationOne)
})

t.Run("detect", func(t *testing.T) {
bs, err := os.ReadFile(filepath.Join("test", "testdata", "nonascii-utf8.ach"))
require.NoError(t, err)
_, name, _ := charset.DetermineEncoding(bs, "plain/text")
require.Equal(t, "utf-8", name)

bs, err = os.ReadFile(filepath.Join("test", "testdata", "nonascii-windows1252.ach"))
require.NoError(t, err)
_, name, _ = charset.DetermineEncoding(bs, "plain/text")
require.Equal(t, "windows-1252", name)

bs, err = os.ReadFile(filepath.Join("test", "testdata", "nonascii.ach"))
require.NoError(t, err)
_, name, _ = charset.DetermineEncoding(bs, "plain/text")
require.Equal(t, "windows-1252", name)
})

t.Run("parse windows-1252", func(t *testing.T) {
file, err := ReadFile(filepath.Join("test", "testdata", "nonascii.ach"))
require.NoError(t, err)

require.Len(t, file.Batches, 1)
bh := file.Batches[0].GetHeader()
require.Equal(t, "REG.SALARY", bh.CompanyEntryDescription)

entries := file.Batches[0].GetEntries()
require.Len(t, entries, 1)
require.Equal(t, "0012Receiver Acc Name ", entries[0].IndividualName)

require.Len(t, entries[0].Addenda05, 12)
require.Contains(t, entries[0].Addenda05[0].PaymentRelatedInformation, "¦ZZ¦PAYEXPENSEPAY")
})
}
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ require (
github.com/moov-io/iso4217 v0.3.0
github.com/prometheus/client_golang v1.16.0
github.com/stretchr/testify v1.8.4
golang.org/x/net v0.10.0
golang.org/x/oauth2 v0.8.0
golang.org/x/text v0.11.0
)

Expand All @@ -33,6 +35,7 @@ require (
github.com/rickar/cal/v2 v2.1.13 // indirect
github.com/rogpeppe/go-internal v1.10.0 // indirect
golang.org/x/sys v0.8.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
13 changes: 13 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBj
github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4=
github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
Expand Down Expand Up @@ -64,16 +65,28 @@ github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjR
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/oauth2 v0.8.0 h1:6dkIjl3j3LtZ/O3sTgZTMsLKSftL/B8Zgq4huOIIUu8=
golang.org/x/oauth2 v0.8.0/go.mod h1:yr7u4HXZRm1R1kBWqr/xKNqewf0plRYoB7sla+BCIXE=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4=
golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
Expand Down
21 changes: 19 additions & 2 deletions reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import (
"unicode/utf8"

"github.com/moov-io/base"

"golang.org/x/net/html/charset"
)

var (
Expand Down Expand Up @@ -134,10 +136,25 @@ func ReadFiles(paths []string) ([]*File, error) {

// NewReader returns a new ACH Reader that reads from r.
func NewReader(r io.Reader) *Reader {
return &Reader{
out := &Reader{
maxLines: defaultMaxLines,
scanner: bufio.NewScanner(r),
}

// charset.Reader will decode windows-1252 strings into utf-8 automatically.
rr, err := charset.NewReader(r, "text/plain")
if err != nil {
// Fake an empty reader if we read nothing
if err == io.EOF || err == io.ErrUnexpectedEOF {
out.scanner = bufio.NewScanner(strings.NewReader(""))
} else {
out.errors.Add(err)
}
}
if rr != nil {
out.scanner = bufio.NewScanner(rr)
}

return out
}

func (r *Reader) SetMaxLines(max int) {
Expand Down
20 changes: 20 additions & 0 deletions test/testdata/nonascii-utf8.ach
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
101 23138010401111111121906240000A094101Federal Reserve Bank My Bank Name
5200Name on Account 111111112 CTXREG.SALARY 190625 1111111110000001
62723138010412345678 0100000000 0012Receiver Acc Name 1111111110000001
705ISA¦00¦ ¦00¦ ¦ZZ¦PAYEXPENSEPAY ¦ZZ¦PAYAECSUSO ¦230628¦02100010000001
7059¦U¦00401¦017587397¦0¦P¦^~GS¦RA¦PAYEXPENSEPAY¦PAYAECSUSO¦20230628¦0219¦17587397¦00020000001
705X¦004010~ST¦820¦0069~BPR¦C¦1352.88¦C¦ACH¦CTX¦01¦026009593¦DA¦8765117001¦8201665000030000001
70519¦¦01¦091218445¦DA¦4233388950¦20230629~TRN¦1¦M4986O3TM2¦¦M4986O3TM2~CUR¦PE¦USD¦00040000001
705¦PR¦USD~REF¦BT¦J33QZD22QW~REF¦C2¦PAYEXPENSEPAY_120603391031634~REF¦TN¦¦B33TKE25Q00050000001
705A~REF¦PH¦1~REF¦CA¦01~REF¦VI¦¦/PHON 888-888-8888~REF¦7U¦M4986O3TM2~REF¦8M¦¦B7033D00060000001
7053WU8~REF¦SEK¦¦USUS_TRF_NURG_Y_N_US_CTX__USD~N1¦RB¦M1 FINANCE~N3¦ADDRESS UNKNOWN~00070000001
705N4¦CITY UNKNOWN¦..¦¦US~N1¦PR¦META PLATFORMS INC~N3¦873 CALIFORNIA STREET 19TH FL00080000001
705OOR~N4¦SAN FRANCISCO¦CA¦94104¦US~REF¦2U¦8201665019~N1¦PE¦JO SMITH~N4¦BELLEVUE¦¦900090000001
7058005¦US~N1¦CE¦JO SMITH~N4¦BELLEVUE¦..¦98005¦US~N1¦DE¦META PLATFORMS INC~N3¦873 C00100000001
705ALIFORNIA STREET 19TH FLOOR~N4¦SAN FRANCISCO¦CA¦94104¦US~ENT¦1~RMR¦IK¦/PHON 888-00110000001
705888-8888 /CONT JO SM~RMR¦IK¦ANG~SE¦31¦0069~GE¦0001¦17587397~IEA¦0001¦017587397~ 00120000001
82000000130023138010000100000000000000000000111111112 111111110000001
9000001000001000000010023138010000100000000000000000000
9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
20 changes: 20 additions & 0 deletions test/testdata/nonascii-windows1252.ach
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
101 23138010401210428821906240000A094101Federal Reserve Bank My Bank Name
5200Name on Account 121042882 CTXREG.SALARY 190625 1121042880000001
62723138010412345678 0100000000 Receiver Account Name 1111111110000001
705ISA¦00¦ ¦00¦ ¦ZZ¦PAYEXPENSEPAY ¦ZZ¦PAYAECSUSO ¦230628¦02100010871559
7059¦U¦00401¦017587397¦0¦P¦^~GS¦RA¦PAYEXPENSEPAY¦PAYAECSUSO¦20230628¦0219¦17587397¦00020871559
705X¦004010~ST¦820¦0069~BPR¦C¦1352.88¦C¦ACH¦CTX¦01¦026009593¦DA¦8765117001¦8201665000030871559
70519¦¦01¦091218445¦DA¦4233388950¦20230629~TRN¦1¦M4986O3TM2¦¦M4986O3TM2~CUR¦PE¦USD¦00040871559
705¦PR¦USD~REF¦BT¦J33QZD22QW~REF¦C2¦PAYEXPENSEPAY_120603391031634~REF¦TN¦¦B33TKE25Q00050871559
705A~REF¦PH¦1~REF¦CA¦01~REF¦VI¦¦/PHON 888-888-8888~REF¦7U¦M4986O3TM2~REF¦8M¦¦B7033D00060871559
7053WU8~REF¦SEK¦¦USUS_TRF_NURG_Y_N_US_CTX__USD~N1¦RB¦M1 FINANCE~N3¦ADDRESS UNKNOWN~00070871559
705N4¦CITY UNKNOWN¦..¦¦US~N1¦PR¦META PLATFORMS INC~N3¦873 CALIFORNIA STREET 19TH FL00080871559
705OOR~N4¦SAN FRANCISCO¦CA¦94104¦US~REF¦2U¦8201665019~N1¦PE¦JO SMITH~N4¦BELLEVUE¦¦900090871559
7058005¦US~N1¦CE¦BO ZHANG~N4¦BELLEVUE¦..¦98005¦US~N1¦DE¦META PLATFORMS INC~N3¦315 M00100871559
705ONTGOMERY STREET 13TH FLOOR~N4¦SAN FRANCISCO¦CA¦94104¦US~ENT¦1~RMR¦IK¦/PHON 888-00110871559
705888-8888 /CONT BO ZH~RMR¦IK¦ANG~SE¦31¦0069~GE¦0001¦17587397~IEA¦0001¦017587397~ 00120871559
82000000120023138010000100000000000000000000121042882 121042880000001
9000001000001000000010023138010000100000000000000000000
9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
20 changes: 20 additions & 0 deletions test/testdata/nonascii.ach
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
101 23138010401111111121906240000A094101Federal Reserve Bank My Bank Name
5200Name on Account 111111112 CTXREG.SALARY 190625 1111111110000001
62723138010412345678 0100000000 0012Receiver Acc Name 1111111110000001
705ISA¦00¦ ¦00¦ ¦ZZ¦PAYEXPENSEPAY ¦ZZ¦PAYAECSUSO ¦230628¦02100010000001
7059¦U¦00401¦017587397¦0¦P¦^~GS¦RA¦PAYEXPENSEPAY¦PAYAECSUSO¦20230628¦0219¦17587397¦00020000001
705X¦004010~ST¦820¦0069~BPR¦C¦1352.88¦C¦ACH¦CTX¦01¦026009593¦DA¦8765117001¦8201665000030000001
70519¦¦01¦091218445¦DA¦4233388950¦20230629~TRN¦1¦M4986O3TM2¦¦M4986O3TM2~CUR¦PE¦USD¦00040000001
705¦PR¦USD~REF¦BT¦J33QZD22QW~REF¦C2¦PAYEXPENSEPAY_120603391031634~REF¦TN¦¦B33TKE25Q00050000001
705A~REF¦PH¦1~REF¦CA¦01~REF¦VI¦¦/PHON 888-888-8888~REF¦7U¦M4986O3TM2~REF¦8M¦¦B7033D00060000001
7053WU8~REF¦SEK¦¦USUS_TRF_NURG_Y_N_US_CTX__USD~N1¦RB¦M1 FINANCE~N3¦ADDRESS UNKNOWN~00070000001
705N4¦CITY UNKNOWN¦..¦¦US~N1¦PR¦META PLATFORMS INC~N3¦873 CALIFORNIA STREET 19TH FL00080000001
705OOR~N4¦SAN FRANCISCO¦CA¦94104¦US~REF¦2U¦8201665019~N1¦PE¦JO SMITH~N4¦BELLEVUE¦¦900090000001
7058005¦US~N1¦CE¦JO SMITH~N4¦BELLEVUE¦..¦98005¦US~N1¦DE¦META PLATFORMS INC~N3¦873 C00100000001
705ALIFORNIA STREET 19TH FLOOR~N4¦SAN FRANCISCO¦CA¦94104¦US~ENT¦1~RMR¦IK¦/PHON 888-00110000001
705888-8888 /CONT JO SM~RMR¦IK¦ANG~SE¦31¦0069~GE¦0001¦17587397~IEA¦0001¦017587397~ 00120000001
82000000130023138010000100000000000000000000111111112 111111110000001
9000001000001000000010023138010000100000000000000000000
9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
Loading