From 5e618c5c555134a95bc611bf84bb90c8d65d8b79 Mon Sep 17 00:00:00 2001 From: Pavel Pogodaev Date: Mon, 18 Nov 2024 15:12:45 +0300 Subject: [PATCH] HW10 is completed Signed-off-by: Pavel Pogodaev --- hw10_program_optimization/.sync | 0 hw10_program_optimization/go.mod | 3 + hw10_program_optimization/go.sum | 4 + hw10_program_optimization/stats.go | 40 +++--- hw10_program_optimization/stats_easyjson.go | 127 ++++++++++++++++++++ hw10_program_optimization/stats_test.go | 72 ++++++++++- 6 files changed, 222 insertions(+), 24 deletions(-) delete mode 100644 hw10_program_optimization/.sync create mode 100644 hw10_program_optimization/stats_easyjson.go diff --git a/hw10_program_optimization/.sync b/hw10_program_optimization/.sync deleted file mode 100644 index e69de29..0000000 diff --git a/hw10_program_optimization/go.mod b/hw10_program_optimization/go.mod index 8bccd17..94223fb 100644 --- a/hw10_program_optimization/go.mod +++ b/hw10_program_optimization/go.mod @@ -4,8 +4,11 @@ go 1.22 require github.com/stretchr/testify v1.7.0 +require github.com/mailru/easyjson v0.7.7 + require ( github.com/davecgh/go-spew v1.1.1 // indirect + github.com/josharian/intern v1.0.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect ) diff --git a/hw10_program_optimization/go.sum b/hw10_program_optimization/go.sum index c221f64..e1709f5 100644 --- a/hw10_program_optimization/go.sum +++ b/hw10_program_optimization/go.sum @@ -1,6 +1,10 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= diff --git a/hw10_program_optimization/stats.go b/hw10_program_optimization/stats.go index affb108..ecd79a4 100644 --- a/hw10_program_optimization/stats.go +++ b/hw10_program_optimization/stats.go @@ -1,11 +1,13 @@ package hw10programoptimization import ( - "encoding/json" + "bufio" + "errors" "fmt" "io" - "regexp" "strings" + + "github.com/mailru/easyjson" ) type User struct { @@ -20,7 +22,13 @@ type User struct { type DomainStat map[string]int +var ErrEmptyDomain = errors.New("empty domain") + func GetDomainStat(r io.Reader, domain string) (DomainStat, error) { + if domain == "" { + return nil, ErrEmptyDomain + } + u, err := getUsers(r) if err != nil { return nil, fmt.Errorf("get users error: %w", err) @@ -31,35 +39,29 @@ func GetDomainStat(r io.Reader, domain string) (DomainStat, error) { type users [100_000]User func getUsers(r io.Reader) (result users, err error) { - content, err := io.ReadAll(r) - if err != nil { - return - } + scanner := bufio.NewScanner(r) + var i int - lines := strings.Split(string(content), "\n") - for i, line := range lines { + for scanner.Scan() { var user User - if err = json.Unmarshal([]byte(line), &user); err != nil { + if err = easyjson.Unmarshal(scanner.Bytes(), &user); err != nil { return } + result[i] = user + i++ } - return + + return result, scanner.Err() } func countDomains(u users, domain string) (DomainStat, error) { result := make(DomainStat) + domain = strings.ToLower(domain) for _, user := range u { - matched, err := regexp.Match("\\."+domain, []byte(user.Email)) - if err != nil { - return nil, err - } - - if matched { - num := result[strings.ToLower(strings.SplitN(user.Email, "@", 2)[1])] - num++ - result[strings.ToLower(strings.SplitN(user.Email, "@", 2)[1])] = num + if strings.Contains(user.Email, "."+domain) { + result[strings.ToLower(strings.SplitN(user.Email, "@", 2)[1])]++ } } return result, nil diff --git a/hw10_program_optimization/stats_easyjson.go b/hw10_program_optimization/stats_easyjson.go new file mode 100644 index 0000000..2bccabf --- /dev/null +++ b/hw10_program_optimization/stats_easyjson.go @@ -0,0 +1,127 @@ +// Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT. + +package hw10programoptimization + +import ( + json "encoding/json" + easyjson "github.com/mailru/easyjson" + jlexer "github.com/mailru/easyjson/jlexer" + jwriter "github.com/mailru/easyjson/jwriter" +) + +// suppress unused package warning +var ( + _ *json.RawMessage + _ *jlexer.Lexer + _ *jwriter.Writer + _ easyjson.Marshaler +) + +func easyjsonE3ab7953DecodeGithubComFixmeMyFriendHw10ProgramOptimization(in *jlexer.Lexer, out *User) { + isTopLevel := in.IsStart() + if in.IsNull() { + if isTopLevel { + in.Consumed() + } + in.Skip() + return + } + in.Delim('{') + for !in.IsDelim('}') { + key := in.UnsafeFieldName(false) + in.WantColon() + if in.IsNull() { + in.Skip() + in.WantComma() + continue + } + switch key { + case "ID": + out.ID = int(in.Int()) + case "Name": + out.Name = string(in.String()) + case "Username": + out.Username = string(in.String()) + case "Email": + out.Email = string(in.String()) + case "Phone": + out.Phone = string(in.String()) + case "Password": + out.Password = string(in.String()) + case "Address": + out.Address = string(in.String()) + default: + in.SkipRecursive() + } + in.WantComma() + } + in.Delim('}') + if isTopLevel { + in.Consumed() + } +} +func easyjsonE3ab7953EncodeGithubComFixmeMyFriendHw10ProgramOptimization(out *jwriter.Writer, in User) { + out.RawByte('{') + first := true + _ = first + { + const prefix string = ",\"ID\":" + out.RawString(prefix[1:]) + out.Int(int(in.ID)) + } + { + const prefix string = ",\"Name\":" + out.RawString(prefix) + out.String(string(in.Name)) + } + { + const prefix string = ",\"Username\":" + out.RawString(prefix) + out.String(string(in.Username)) + } + { + const prefix string = ",\"Email\":" + out.RawString(prefix) + out.String(string(in.Email)) + } + { + const prefix string = ",\"Phone\":" + out.RawString(prefix) + out.String(string(in.Phone)) + } + { + const prefix string = ",\"Password\":" + out.RawString(prefix) + out.String(string(in.Password)) + } + { + const prefix string = ",\"Address\":" + out.RawString(prefix) + out.String(string(in.Address)) + } + out.RawByte('}') +} + +// MarshalJSON supports json.Marshaler interface +func (v User) MarshalJSON() ([]byte, error) { + w := jwriter.Writer{} + easyjsonE3ab7953EncodeGithubComFixmeMyFriendHw10ProgramOptimization(&w, v) + return w.Buffer.BuildBytes(), w.Error +} + +// MarshalEasyJSON supports easyjson.Marshaler interface +func (v User) MarshalEasyJSON(w *jwriter.Writer) { + easyjsonE3ab7953EncodeGithubComFixmeMyFriendHw10ProgramOptimization(w, v) +} + +// UnmarshalJSON supports json.Unmarshaler interface +func (v *User) UnmarshalJSON(data []byte) error { + r := jlexer.Lexer{Data: data} + easyjsonE3ab7953DecodeGithubComFixmeMyFriendHw10ProgramOptimization(&r, v) + return r.Error() +} + +// UnmarshalEasyJSON supports easyjson.Unmarshaler interface +func (v *User) UnmarshalEasyJSON(l *jlexer.Lexer) { + easyjsonE3ab7953DecodeGithubComFixmeMyFriendHw10ProgramOptimization(l, v) +} diff --git a/hw10_program_optimization/stats_test.go b/hw10_program_optimization/stats_test.go index f2c20a7..b40adba 100644 --- a/hw10_program_optimization/stats_test.go +++ b/hw10_program_optimization/stats_test.go @@ -1,3 +1,4 @@ +//go:build !bench // +build !bench package hw10programoptimization @@ -6,15 +7,30 @@ import ( "bytes" "testing" - "github.com/stretchr/testify/require" + "github.com/stretchr/testify/require" //nolint:depguard ) func TestGetDomainStat(t *testing.T) { data := `{"Id":1,"Name":"Howard Mendoza","Username":"0Oliver","Email":"aliquid_qui_ea@Browsedrive.gov","Phone":"6-866-899-36-79","Password":"InAQJvsq","Address":"Blackbird Place 25"} -{"Id":2,"Name":"Jesse Vasquez","Username":"qRichardson","Email":"mLynch@broWsecat.com","Phone":"9-373-949-64-00","Password":"SiZLeNSGn","Address":"Fulton Hill 80"} -{"Id":3,"Name":"Clarence Olson","Username":"RachelAdams","Email":"RoseSmith@Browsecat.com","Phone":"988-48-97","Password":"71kuz3gA5w","Address":"Monterey Park 39"} -{"Id":4,"Name":"Gregory Reid","Username":"tButler","Email":"5Moore@Teklist.net","Phone":"520-04-16","Password":"r639qLNu","Address":"Sunfield Park 20"} -{"Id":5,"Name":"Janice Rose","Username":"KeithHart","Email":"nulla@Linktype.com","Phone":"146-91-01","Password":"acSBF5","Address":"Russell Trail 61"}` + {"Id":2,"Name":"Jesse Vasquez","Username":"qRichardson","Email":"mLynch@broWsecat.com","Phone":"9-373-949-64-00","Password":"SiZLeNSGn","Address":"Fulton Hill 80"} + {"Id":3,"Name":"Clarence Olson","Username":"RachelAdams","Email":"RoseSmith@Browsecat.com","Phone":"988-48-97","Password":"71kuz3gA5w","Address":"Monterey Park 39"} + {"Id":4,"Name":"Gregory Reid","Username":"tButler","Email":"5Moore@Teklist.net","Phone":"520-04-16","Password":"r639qLNu","Address":"Sunfield Park 20"} + {"Id":5,"Name":"Janice Rose","Username":"KeithHart","Email":"nulla@Linktype.com","Phone":"146-91-01","Password":"acSBF5","Address":"Russell Trail 61"}` + + data2 := `{"Id":1,"Name":"Howard Mendoza","Username":"0Oliver","Email":"aliquid_qui_ea@я.ру","Phone":"6-866-899-36-79","Password":"InAQJvsq","Address":"Blackbird Place 25"} + {"Id":2,"Name":"Jesse Vasquez","Username":"qRichardson","Email":"mLynch@я.ру","Phone":"6-866-899-36-79","Password":"InAQJvsq","Address":"Blackbird Place 25"}` + + data3 := `{"Id":1,"Name":"Howard Mendoza","Username":"0Oliver","Email":"aliquid_qui_ea@Browsedrive","Phone":"6-866-899-36-79","Password":"InAQJvsq","Address":"Blackbird Place 25"} + {"Id":2,"Name":"Jesse Vasquez","Username":"qRichardson","Email":"mLynch@broWsecat.com","Phone":"9-373-949-64-00","Password":"SiZLeNSGn","Address":"Fulton Hill 80"} + {"Id":3,"Name":"Clarence Olson","Username":"RachelAdams","Email":"RoseSmith@Browsecat","Phone":"988-48-97","Password":"71kuz3gA5w","Address":"Monterey Park 39"}` + + data4 := `{"Id":1,"Name":"Howard Mendoza","Username":"0Oliver","Email":"@browsedrive.su","Phone":"6-866-899-36-79","Password":"InAQJvsq","Address":"Blackbird Place 25"} + {"Id":2,"Name":"Jesse Vasquez","Username":"qRichardson","Email":"@browsecat.com","Phone":"9-373-949-64-00","Password":"SiZLeNSGn","Address":"Fulton Hill 80"} + {"Id":3,"Name":"Clarence Olson","Username":"RachelAdams","Email":"RoseSmith@browsecat.ru","Phone":"988-48-97","Password":"71kuz3gA5w","Address":"Monterey Park 39"}` + + data5 := `{"Id":1,"Name":"Howard Mendoza","Username":"0Oliver","Email":"aliquid_qui_ea@","Phone":"6-866-899-36-79","Password":"InAQJvsq","Address":"Blackbird Place 25"} + {"Id":2,"Name":"Jesse Vasquez","Username":"qRichardson","Email":"mLynch@browsecat.com","Phone":"9-373-949-64-00","Password":"SiZLeNSGn","Address":"Fulton Hill 80"} + {"Id":3,"Name":"Clarence Olson","Username":"RachelAdams","Email":"RoseSmith@","Phone":"988-48-97","Password":"71kuz3gA5w","Address":"Monterey Park 39"}` t.Run("find 'com'", func(t *testing.T) { result, err := GetDomainStat(bytes.NewBufferString(data), "com") @@ -36,4 +52,50 @@ func TestGetDomainStat(t *testing.T) { require.NoError(t, err) require.Equal(t, DomainStat{}, result) }) + + t.Run("find empty domain", func(t *testing.T) { + _, err := GetDomainStat(bytes.NewBufferString(data), "") + require.ErrorIs(t, err, ErrEmptyDomain) + }) + + t.Run("find domain with capital letter", func(t *testing.T) { + result, err := GetDomainStat(bytes.NewBufferString(data), "Com") + require.NoError(t, err) + require.Equal(t, DomainStat{ + "browsecat.com": 2, + "linktype.com": 1, + }, result) + }) + + t.Run("find cyrillic 'ру'", func(t *testing.T) { + result, err := GetDomainStat(bytes.NewBufferString(data2), "ру") + require.NoError(t, err) + require.Equal(t, DomainStat{ + "я.ру": 2, + }, result) + }) + + t.Run("find 'com' without domain", func(t *testing.T) { + result, err := GetDomainStat(bytes.NewBufferString(data3), "com") + require.NoError(t, err) + require.Equal(t, DomainStat{ + "browsecat.com": 1, + }, result) + }) + + t.Run("find 'com' without user name", func(t *testing.T) { + result, err := GetDomainStat(bytes.NewBufferString(data4), "com") + require.NoError(t, err) + require.Equal(t, DomainStat{ + "browsecat.com": 1, + }, result) + }) + + t.Run("find 'com' none symbols after @", func(t *testing.T) { + result, err := GetDomainStat(bytes.NewBufferString(data5), "com") + require.NoError(t, err) + require.Equal(t, DomainStat{ + "browsecat.com": 1, + }, result) + }) }