diff --git a/README.md b/README.md index 4a3c012..307a4bf 100644 --- a/README.md +++ b/README.md @@ -8,4 +8,5 @@ |3|Decouple reading and processing of file content. A buffered goroutine is used to communicate between the two processes.|5:22.83|+57.24|[2babf7d](https://github.com/shraddhaag/1brc/commit/2babf7dda72d92c72722b220b8b663e747075bd7)| |4|Instead of sending each line to the channel, now sending 100 lines chunked together. Also, to minimise garbage collection, not freeing up memory when resetting a slice. |3:41.76|-161.07|[b7b1781](https://github.com/shraddhaag/1brc/commit/b7b1781f58fd258a06940bd6c05eb404c8a14af6)| |5|Read file in chunks of 100 MB instead of reading line by line. |3:32.62|-9.14|[c26fea4](https://github.com/shraddhaag/1brc/commit/c26fea40019552a7e4fc1c864236f433b1b686f0)| -|6|Convert temperature from `string` to `int64`, process in `int64` and convert to `float64` at the end. |2:51.50|-41.14|| \ No newline at end of file +|6|Convert temperature from `string` to `int64`, process in `int64` and convert to `float64` at the end. |2:51.50|-41.14|[7812da4](https://github.com/shraddhaag/1brc/commit/7812da4d0be07dd4686d5f9b9df1e93b08cd0dd1)| +|7|In the value of city <> tempoeratures map, save preprocessed min, max, count and sum of all temperatures instead of saving all recorded temperatures for the city.|1:39.81|-71.79|| \ No newline at end of file diff --git a/main.go b/main.go index 55ca81d..faf8c1c 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,6 @@ package main import ( - "cmp" "errors" "flag" "fmt" @@ -12,11 +11,9 @@ import ( "runtime" "runtime/pprof" "runtime/trace" + "sort" "strconv" "strings" - "sync" - - "slices" ) var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") @@ -71,66 +68,44 @@ type result struct { } func evaluate(input string) string { - // mapOfTemp, err := readFileLineByLineIntoAMap("./test_cases/measurements-rounding.txt") - // mapOfTemp, err := readFileLineByLineIntoAMap("measurements.txt") mapOfTemp, err := readFileLineByLineIntoAMap(input) if err != nil { panic(err) } - var resultArr []result - var wg sync.WaitGroup - var mx sync.Mutex - - updateResult := func(city, temp string) { - mx.Lock() - defer mx.Unlock() - - resultArr = append(resultArr, result{city, temp}) + resultArr := make([]string, len(mapOfTemp)) + var count int + for city, _ := range mapOfTemp { + resultArr[count] = city + count++ } - for city, temps := range mapOfTemp { - wg.Add(1) - go func(city string, temps []int64) { - defer wg.Done() - var min, max, avg int64 - min, max = math.MaxInt64, math.MinInt64 - - for _, temp := range temps { - if temp < min { - min = temp - } - - if temp > max { - max = temp - } - avg += temp - } - - updateResult(city, fmt.Sprintf("%.1f/%.1f/%.1f", round(float64(min)/10.0), round(float64(avg)/10.0/float64(len(temps))), round(float64(max)/10.0))) - - }(city, temps) - } - - wg.Wait() - slices.SortFunc(resultArr, func(i, j result) int { - return cmp.Compare(i.city, j.city) - }) + sort.Strings(resultArr) var stringsBuilder strings.Builder for _, i := range resultArr { - stringsBuilder.WriteString(fmt.Sprintf("%s=%s, ", i.city, i.temp)) + stringsBuilder.WriteString(fmt.Sprintf("%s=%.1f/%.1f/%.1f, ", i, + round(float64(mapOfTemp[i].min)/10.0), + round(float64(mapOfTemp[i].sum)/10.0/float64(mapOfTemp[i].count)), + round(float64(mapOfTemp[i].max)/10.0))) } return stringsBuilder.String()[:stringsBuilder.Len()-2] } -func readFileLineByLineIntoAMap(filepath string) (map[string][]int64, error) { +type cityTemperatureInfo struct { + count int64 + min int64 + max int64 + sum int64 +} + +func readFileLineByLineIntoAMap(filepath string) (map[string]cityTemperatureInfo, error) { file, err := os.Open(filepath) if err != nil { panic(err) } - mapOfTemp := make(map[string][]int64) + mapOfTemp := make(map[string]cityTemperatureInfo) chanOwner := func() <-chan []string { resultStream := make(chan []string, 100) @@ -170,21 +145,31 @@ func readFileLineByLineIntoAMap(filepath string) (map[string][]int64, error) { } city := text[:index] temp := convertStringToInt64(text[index+1:]) - if _, ok := mapOfTemp[city]; ok { - mapOfTemp[city] = append(mapOfTemp[city], temp) + if val, ok := mapOfTemp[city]; ok { + val.count++ + val.sum += temp + if temp < val.min { + val.min = temp + } + + if temp > val.max { + val.max = temp + } + mapOfTemp[city] = val } else { - mapOfTemp[city] = []int64{temp} + mapOfTemp[city] = cityTemperatureInfo{ + count: 1, + min: temp, + max: temp, + sum: temp, + } } } } + // fmt.Println(mapOfTemp) return mapOfTemp, nil } -type cityTemp struct { - city string - temp float64 -} - func convertStringToInt64(input string) int64 { input = input[:len(input)-2] + input[len(input)-1:] output, _ := strconv.ParseInt(input, 10, 64) diff --git a/profiles/cpu-int64.prof b/profiles/cpu-int64.prof new file mode 100644 index 0000000..9829a08 Binary files /dev/null and b/profiles/cpu-int64.prof differ diff --git a/profiles/cpu-preprocess.prof b/profiles/cpu-preprocess.prof new file mode 100644 index 0000000..1aa70d5 Binary files /dev/null and b/profiles/cpu-preprocess.prof differ