diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 6e4a38a52..ed779442c 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,8 +1,16 @@ +**NOTE:** The challenge has been closed for new submissions. No new pull requests for adding submissions are accepted at this time. +Any pending pull requests will be reviewed over the next few days, as described [here](https://github.com/gunnarmorling/1brc/discussions/687). +The final leader board will be published by Feb 5. + #### Check List: + +- [ ] You have run `./mvnw verify` and the project builds successfully - [ ] Tests pass (`./test.sh ` shows no differences between expected and actual outputs) - [ ] All formatting changes by the build are committed - [ ] Your launch script is named `calculate_average_.sh` (make sure to match casing of your GH user name) and is executable - [ ] Output matches that of `calculate_average_baseline.sh` +- [ ] For new entries, or after substantial changes: When implementing custom hash structures, please point to where you deal with hash collisions (line number) + * Execution time: * Execution time of reference implementation: diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index b5f09651c..859795578 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -17,6 +17,8 @@ name: Build on: + # Enable manual re-run + workflow_dispatch: { } push: branches: [ main ] pull_request: @@ -32,11 +34,12 @@ jobs: with: submodules: 'true' - - name: 'Set up Java' - uses: actions/setup-java@v2 + - name: Cache SDKMan + id: cache-sdkman + uses: actions/cache@v4 with: - java-version: 21 - distribution: 'temurin' + path: ~/.sdkman + key: ${{ runner.os }}-sdkman - name: 'Cache Maven packages' uses: actions/cache@v3 @@ -45,5 +48,23 @@ jobs: key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} restore-keys: ${{ runner.os }}-m2 + - name: 'Setup SDKMAN' + uses: sdkman/sdkman-action@b1f9b696c79148b66d3d3a06f7ea801820318d0f + id: sdkman + - name: 'Build project' - run: mvn -B clean verify -Pci + shell: bash + run: | + source "$HOME/.sdkman/bin/sdkman-init.sh" + if [ -f ${{ format('src/main/java-22/dev/morling/onebrc/CalculateAverage_{0}.java', github.event.pull_request.user.login || '') }} ]; then + sdk install java 22.ea.32-open || true + sdk use java 22.ea.32-open + fi + ./mvnw --version + ./mvnw -B clean verify -Pci + + - name: 'Test submission' + shell: bash + run: | + ./test_ci.sh ${{ github.event.pull_request.user.login }} + if: github.event_name == 'pull_request' diff --git a/.gitignore b/.gitignore index 63e0c144b..828124488 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,5 @@ out/ /measurements*.txt /*.out out_expected*.txt +/*-timing.json + diff --git a/.sdkmanrc b/.sdkmanrc new file mode 100644 index 000000000..dfb233bfa --- /dev/null +++ b/.sdkmanrc @@ -0,0 +1,3 @@ +# Enable auto-env through the sdkman_auto_env config +# Add key=value pairs of SDKs to use below +java=21.0.1-open diff --git a/ENVIRONMENT.md b/ENVIRONMENT.md new file mode 100644 index 000000000..5df4e69fe --- /dev/null +++ b/ENVIRONMENT.md @@ -0,0 +1,92 @@ +# Environment +This file just contains some intel about the environment in use and what has been done to get it into that state. + +## Machine Type + +* Hetzner AX161, Dedicated Hosted Hardware +* CPU: AMD EPYC 7502P 32 cores / 64 threads @ 2.5 GHz +* Memory: 128 GB ECC DDR4 RAM +* 2x SAMSUNG MZQL2960HCJR-00A07, 1 TB, Software RAID-1 +* CentOS 9, Linux 5.14.0-378.el9.x86_64 + +## Configuration + +* SMT off +* Turbo Boost Off +* Filesystem EXT4 + +## Details + +### CPU +``` +$ cat /proc/cpuinfo +processor : 0 +vendor_id : AuthenticAMD +cpu family : 23 +model : 49 +model name : AMD EPYC 7502P 32-Core Processor +stepping : 0 +microcode : 0x8301055 +cpu MHz : 2500.000 +cache size : 512 KB +physical id : 0 +siblings : 32 +core id : 0 +cpu cores : 32 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 16 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es +bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass retbleed smt_rsb +bogomips : 4990.70 +TLB size : 3072 4K pages +clflush size : 64 +cache_alignment : 64 +address sizes : 43 bits physical, 48 bits virtual +power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14] +... more for all other cores +``` + +## Setup + +### Turn SMT off +Disable during boot via boot-param, able to switch it on later again, if needed. + +Add `nosmt` to grub boot config in `/etc/default/grub` + +``` +# Added nosmt to command line +GRUB_CMDLINE_LINUX="biosdevname=0 crashkernel=auto rd.auto=1 consoleblank=0 nosmt" +``` + +Update boot config: +``` +sudo grub2-mkconfig -o /boot/grub2/grub.cfg +``` + +### Turbo Off +Using the legacy `/etc/rc.local` concept to change things during boot: + +``` +# Turn SMT off via software as well, already got nosmt in grub +echo off > /sys/devices/system/cpu/smt/control + +# Turn off turbo boost +echo 0 |tee /sys/devices/system/cpu/cpufreq/boost +``` +### Reduce Swapping +Reduce from default 60 to 10% memory pressure by adding `vm.swappiness = 10` to `/etc/sysctl.conf`. + +## Verify +Check after boot if all settings have been applied. Can also be used to control these during runtime. + +* SMT off: `cat /sys/devices/system/cpu/smt/active` must be 0 +* SWAP: `cat /proc/sys/vm/swappiness` must be 10 +* Turbo off: `cat /sys/devices/system/cpu/cpufreq/boost` must be 0 + + + + diff --git a/README.md b/README.md index 6e5250622..8d61f032a 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,24 @@ # 1️⃣🐝🏎️ The One Billion Row Challenge +_Status Feb 1: The challenge has been closed for new submissions. No new pull requests for adding submissions are accepted at this time. +Pending PRs will be evaluated over the next few days. Please don't push any changes to pending PRs after today, unless being asked to do so. +This will be the case if I spot an issue during evaluation (failing tests, etc.). In this case, I will comment on the PR, and you are allowed to push one update. +Only changes strictly needed to fix the bug at hand may be pushed at this point. +No force-pushes are allowed, so as to make sure I can see which changes have been made. +I will re-evaluate the entry, and if there are still remaining issues, you'll get one more and last opportunity to update the PR. +If it still is not valid at this point, it will be closed. +The final leader board will be published by Monday Feb 5._ + +_Status Jan 31: The challenge will close today at midnight UTC._ + _Status Jan 12: As there has been such a large number of entries to this challenge so far (100+), and this is becoming hard to manage, please only create new submissions if you expect them to run in 10 seconds or less on the evaluation machine._ _Status Jan 1: This challenge is [open for submissions](https://www.morling.dev/blog/one-billion-row-challenge/)!_ +> **Sponsorship** +> +> A big thank you to my employer [Decodable](https://www.decodable.co/) for funding the evaluation environment and supporting this challenge! + The One Billion Row Challenge (1BRC) is a fun exploration of how far modern Java can be pushed for aggregating one billion rows from a text file. Grab all your (virtual) threads, reach out to SIMD, optimize your GC, or pull any other trick, and create the fastest implementation for solving this task! @@ -41,106 +56,175 @@ These are the results from running all entries into the challenge on eight cores | # | Result (m:s.ms) | Implementation | JDK | Submitter | Notes | |---|-----------------|--------------------|-----|---------------|-----------| -| 1 | 00:02.575 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java)| 21.0.1-open | [Quan Anh Mai](https://github.com/merykitty) | Quan Anh Mai's implementation, using `Unsafe` | -| 2 | 00:02.708 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java)| 21.0.1-graal | [Thomas Wuerthinger](https://github.com/thomaswue), [Quan Anh Mai](https://github.com/merykitty), [Alfonso² Peterssen](https://github.com/mukel) | GraalVM native binary | -| 3 | 00:02.855 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| 21.0.1-graal | [Roy van Rijn](https://github.com/royvanrijn) | GraalVM native binary | -| | 00:03.258 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java)| 21.0.1-open | [Quan Anh Mai](https://github.com/merykitty) | | -| | 00:03.321 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_artsiomkorzun.java)| 21.0.1-graal | [Artsiom Korzun](https://github.com/artsiomkorzun) | | -| | 00:03.539 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_mtopolnik.java)| 21.0.1-graal | [Marko Topolnik](https://github.com/mtopolnik) | | -| | 00:03.714 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_hundredwatt.java)| 21.0.1-graal | [Jason Nochlin](https://github.com/hundredwatt) | | -| | 00:04.362 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_abeobk.java)| 21.0.1-open | [Van Phu DO](https://github.com/abeobk) | | -| | 00:04.726 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java)| 21.0.1-graal | [Elliot Barlas](https://github.com/ebarlas) | | -| | 00:04.741 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_cliffclick.java)| 21.0.1-open | [Cliff Click](https://github.com/cliffclick) | | -| | 00:04.823 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_JamalMulla.java)| 21.0.1-graal | [Jamal Mulla](https://github.com/JamalMulla) | | -| | 00:04.959 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yavuztas.java)| 21.0.1-graal | [Yavuz Tas](https://github.com/yavuztas) | | -| | 00:05.218 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_iziamos.java)| 21.0.1-open | [John Ziamos](https://github.com/iziamos) | | -| | 00:05.478 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_obourgain.java)| 21.0.1-open | [Olivier Bourgain](https://github.com/obourgain) | | -| | 00:05.530 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_vemana.java)| 21.0.1-graal | [Subrahmanyam](https://github.com/vemana) | | -| | 00:05.887 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_charlibot.java)| 21.0.1-graal | [Charlie Evans](https://github.com/charlibot) | | -| | 00:05.960 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_vaidhy.java)| 21.0.1-graal | [Vaidhy Mayilrangam](https://github.com/vaidhy) | | +| 1 | 00:01.535 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java)| 21.0.2-graal | [Thomas Wuerthinger](https://github.com/thomaswue), [Quan Anh Mai](https://github.com/merykitty), [Alfonso² Peterssen](https://github.com/mukel) | GraalVM native binary, uses Unsafe | +| 2 | 00:01.587 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_artsiomkorzun.java)| 21.0.2-graal | [Artsiom Korzun](https://github.com/artsiomkorzun) | GraalVM native binary, uses Unsafe | +| 3 | 00:01.608 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jerrinot.java)| 21.0.2-graal | [Jaromir Hamala](https://github.com/jerrinot) | GraalVM native binary, uses Unsafe | +| | 00:01.880 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_serkan_ozal.java)| 21.0.1-open | [Serkan ÖZAL](https://github.com/serkan-ozal) | uses Unsafe | +| | 00:01.921 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_abeobk.java)| 21.0.2-graal | [Van Phu DO](https://github.com/abeobk) | GraalVM native binary, uses Unsafe | +| | 00:02.018 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_stephenvonworley.java)| 21.0.2-graal | [Stephen Von Worley](https://github.com/stephenvonworley) | GraalVM native binary, uses Unsafe | +| | 00:02.157 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| 21.0.2-graal | [Roy van Rijn](https://github.com/royvanrijn) | GraalVM native binary, uses Unsafe | +| | 00:02.319 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yavuztas.java)| 21.0.2-graal | [Yavuz Tas](https://github.com/yavuztas) | GraalVM native binary, uses Unsafe | +| | 00:02.332 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_mtopolnik.java)| 21.0.2-graal | [Marko Topolnik](https://github.com/mtopolnik) | GraalVM native binary, uses Unsafe | +| | 00:02.367 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java)| 21.0.1-open | [Quan Anh Mai](https://github.com/merykitty) | uses Unsafe | +| | 00:02.507 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gonixunsafe.java)| 21.0.1-open | [gonix](https://github.com/gonix) | uses Unsafe | +| | 00:02.557 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yourwass.java)| 21.0.1-open | [yourwass](https://github.com/yourwass) | uses Unsafe | +| | 00:02.820 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_linl33.java)| 22.ea.32-open | [Li Lin](https://github.com/linl33) | uses Unsafe | +| | 00:02.995 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_tivrfoa.java)| 21.0.2-graal | [tivrfoa](https://github.com/tivrfoa) | GraalVM native binary, uses Unsafe | +| | 00:02.997 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gonix.java)| 21.0.1-open | [gonix](https://github.com/gonix) | | +| | 00:03.095 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_JamalMulla.java)| 21.0.2-graal | [Jamal Mulla](https://github.com/JamalMulla) | GraalVM native binary, uses Unsafe | +| | 00:03.210 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java)| 21.0.1-open | [Quan Anh Mai](https://github.com/merykitty) | | +| | 00:03.298 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_vemanaNonIdiomatic.java)| 21.0.1-graal | [Subrahmanyam (non-idiomatic)](https://github.com/vemana) | uses Unsafe | +| | 00:03.431 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java)| 21.0.1-graal | [Roman Musin](https://github.com/roman-r-m) | GraalVM native binary, uses Unsafe | +| | 00:03.469 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java)| 21.0.2-graal | [Elliot Barlas](https://github.com/ebarlas) | GraalVM native binary, uses Unsafe | +| | 00:03.698 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_hundredwatt.java)| 21.0.1-graal | [Jason Nochlin](https://github.com/hundredwatt) | | +| | 00:03.785 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java)| 21.0.2-graal | [zerninv](https://github.com/zerninv) | GraalVM native binary, uses Unsafe | +| | 00:03.820 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_iziamos.java)| 21.0.2-graal | [John Ziamos](https://github.com/iziamos) | GraalVM native binary, uses Unsafe | +| | 00:03.902 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jparera.java)| 21.0.1-open | [Juan Parera](https://github.com/jparera) | | +| | 00:03.966 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jincongho.java)| 21.0.1-open | [Jin Cong Ho](https://github.com/jincongho) | uses Unsafe | +| | 00:03.991 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_vaidhy.java)| 21.0.1-graal | [Vaidhy Mayilrangam](https://github.com/vaidhy) | uses Unsafe | +| | 00:04.066 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_JesseVanRooy.java)| 21.0.1-open | [JesseVanRooy](https://github.com/JesseVanRooy) | uses Unsafe | +| | 00:04.101 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_JaimePolidura.java)| 21.0.2-graal | [Jaime Polidura](https://github.com/JaimePolidura) | GraalVM native binary, uses Unsafe | +| | 00:04.209 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_giovannicuccu.java)| 21.0.1-open | [Giovanni Cuccu](https://github.com/giovannicuccu) | | +| | 00:04.474 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gamlerhart.java)| 21.0.1-open | [Roman Stoffel](https://github.com/gamlerhart) | | +| | 00:04.676 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_plevart.java)| 21.0.2-tem | [Peter Levart](https://github.com/plevart) | | +| | 00:04.684 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gigiblender.java)| 21.0.1-open | [Florin Blanaru](https://github.com/gigiblender) | uses Unsafe | +| | 00:04.701 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ianopolousfast.java)| 21.0.1-open | [Dr Ian Preston](https://github.com/ianopolousfast) | | +| | 00:04.741 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_cliffclick.java)| 21.0.1-open | [Cliff Click](https://github.com/cliffclick) | uses Unsafe | +| | 00:04.800 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_parkertimmins.java)| 21.0.1-open | [Parker Timmins](https://github.com/parkertimmins) | | +| | 00:04.884 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_shipilev.java)| 21.0.1-open | [Aleksey Shipilëv](https://github.com/shipilev) | | +| | 00:04.920 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_vemana.java)| 21.0.1-graal | [Subrahmanyam](https://github.com/vemana) | | +| | 00:05.077 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jonathanaotearoa.java)| 21.0.2-graal | [Jonathan Wright](https://github.com/jonathan-aotearoa) | GraalVM native binary, uses Unsafe | +| | 00:05.142 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_arjenw.java)| 21.0.1-open | [Arjen Wisse](https://github.com/arjenw) | | +| | 00:05.167 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_melgenek.java)| 21.0.2-open | [Yevhenii Melnyk](https://github.com/melgenek) | | +| | 00:05.235 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_unbounded.java)| 21.0.1-open | [unbounded](https://github.com/unbounded) | | +| | 00:05.336 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_EduardoSaverin.java)| java | [Sumit Chaudhary](https://github.com/EduardoSaverin) | uses Unsafe | +| | 00:05.354 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_armandino.java)| 21.0.2-graal | [Arman Sharif](https://github.com/armandino) | GraalVM native binary, uses Unsafe | +| | 00:05.478 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_obourgain.java)| 21.0.1-open | [Olivier Bourgain](https://github.com/obourgain) | uses Unsafe | +| | 00:05.559 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_PanagiotisDrakatos.java)| 21.0.1-graal | [Panagiotis Drakatos](https://github.com/PanagiotisDrakatos) | GraalVM native binary | +| | 00:05.887 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_charlibot.java)| 21.0.1-graal | [Charlie Evans](https://github.com/charlibot) | uses Unsafe | | | 00:05.979 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_spullara.java)| 21.0.1-graal | [Sam Pullara](https://github.com/spullara) | | -| | 00:06.140 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java)| 21.0.1-open | [zerninv](https://github.com/zerninv) | | | | 00:06.166 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_isolgpus.java)| 21.0.1-open | [Jamie Stansfield](https://github.com/isolgpus) | | +| | 00:06.257 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_flippingbits.java)| 21.0.1-graal | [Stefan Sprenger](https://github.com/flippingbits) | uses Unsafe | +| | 00:06.392 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_dpsoft.java)| 21.0.2-graal | [Diego Parra](https://github.com/dpsoft) | | +| | 00:06.576 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_as-com.java)| 21.0.1-open | [Andrew Sun](https://github.com/as-com) | uses Unsafe | +| | 00:06.635 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_justplainlaake.java)| 21.0.1-graal | [Laake Scates-Gervasi](https://github.com/justplainlaake) | GraalVM native binary, uses Unsafe | | | 00:06.654 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jbachorik.java)| 21.0.1-graal | [Jaroslav Bachorik](https://github.com/jbachorik) | | -| | 00:06.576 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_as-com.java)| 21.0.1-open | [Andrew Sun](https://github.com/as-com) | | | | 00:06.715 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_algirdasrascius.java)| 21.0.1-open | [Algirdas Raščius](https://github.com/algirdasrascius) | | +| | 00:06.884 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_rcasteltrione.java)| 21.0.1-graal | [rcasteltrione](https://github.com/rcasteltrione) | | +| | 00:06.982 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ChrisBellew.java)| 21.0.1-open | [Chris Bellew](https://github.com/ChrisBellew) | | +| | 00:07.563 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_3j5a.java)| 21.0.1-graal | [3j5a](https://github.com/3j5a) | | +| | 00:07.680 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_C5H12O5.java)| 21.0.1-graal | [Xylitol](https://github.com/C5H12O5) | uses Unsafe | +| | 00:07.712 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_anitasv.java)| 21.0.1-graal | [Anita SV](https://github.com/anitasv) | | | | 00:07.730 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jotschi.java)| 21.0.1-open | [Johannes Schüth](https://github.com/jotschi) | | -| | 00:07.809 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_roman-r-m.java)| 21.0.1-graal | [Roman Musin](https://github.com/roman-r-m) | | +| | 00:07.894 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_tonivade.java)| 21.0.2-tem | [Antonio Muñoz](https://github.com/tonivade) | | | | 00:07.925 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ricardopieper.java)| 21.0.1-graal | [Ricardo Pieper](https://github.com/ricardopieper) | | -| | 00:07.913 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_parkertimmins.java)| 21.0.1-open | [parkertimmins](https://github.com/parkertimmins) | | +| | 00:07.948 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_Smoofie.java)| java | [Smoofie](https://github.com/Smoofie) | uses Unsafe | +| | 00:08.157 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_JurenIvan.java)| 21.0.1-open | [JurenIvan](https://github.com/JurenIvan) | | | | 00:08.167 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ddimtirov.java)| 21.0.1-tem | [Dimitar Dimitrov](https://github.com/ddimtirov) | | | | 00:08.214 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_deemkeen.java)| 21.0.1-open | [deemkeen](https://github.com/deemkeen) | | -| | 00:08.398 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java)| 21.0.1-open | [Parth Mudgal](https://github.com/artpar) | | +| | 00:08.255 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_mattiz.java)| 21.0.1-open | [Mathias Bjerke](https://github.com/mattiz) | | +| | 00:08.398 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java)| 21.0.1-open | [Parth Mudgal](https://github.com/artpar) | uses Unsafe | | | 00:08.489 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gnabyl.java)| 21.0.1-graal | [Bang NGUYEN](https://github.com/gnabyl) | | -| | 00:08.517 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ags313.java)| 21.0.1-graal | [ags](https://github.com/ags313) | | -| | 00:08.689 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gamlerhart.java)| 21.0.1-open | [Roman Stoffel](https://github.com/gamlerhart) | | +| | 00:08.517 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ags313.java)| 21.0.1-graal | [ags](https://github.com/ags313) | uses Unsafe | +| | 00:08.557 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_adriacabeza.java)| 21.0.1-graal | [Adrià Cabeza](https://github.com/adriacabeza) | | +| | 00:08.622 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java)| 21.0.1-graal | [Keshavram Kuduwa](https://github.com/kuduwa-keshavram) | uses Unsafe | | | 00:08.892 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_fatroom.java)| 21.0.1-open | [Roman Romanchuk](https://github.com/fatroom) | | +| | 00:08.896 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_anestoruk.java)| 21.0.1-open | [Andrzej Nestoruk](https://github.com/anestoruk) | | | | 00:09.020 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yemreinci.java)| 21.0.1-open | [yemreinci](https://github.com/yemreinci) | | | | 00:09.071 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gabrielreid.java)| 21.0.1-open | [Gabriel Reid](https://github.com/gabrielreid) | | -| | 00:09.117 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa-keshavram.java)| 21.0.1-graal | [Keshavram Kuduwa](https://github.com/kuduwa-keshavram) | | | | 00:09.352 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_filiphr.java)| 21.0.1-graal | [Filip Hrisafov](https://github.com/filiphr) | | +| | 00:09.725 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java)| 21.0.2-graal | [Martin](https://github.com/martin2038) | GraalVM native binary | | | 00:09.867 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ricardopieper.java)| 21.0.1-graal | [Ricardo Pieper](https://github.com/ricardopieper) | | -| | 00:10.127 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java)| 21.0.1-open | [Parth Mudgal](https://github.com/artpar) | | -| | 00:10.553 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_C5H12O5.java)| 21.0.1-graal | [Xylitol](https://github.com/C5H12O5) | | +| | 00:09.945 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_japplis.java)| 21.0.1-open | [Anthony Goubard](https://github.com/japplis) | | +| | 00:10.092 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_phd3.java)| 21.0.1-graal | [Pratham](https://github.com/phd3) | | +| | 00:10.127 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java)| 21.0.1-open | [Parth Mudgal](https://github.com/artpar) | uses Unsafe | +| | 00:11.577 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_netrunnereve.java)| 21.0.1-open | [Eve](https://github.com/netrunnereve) | | | | 00:10.473 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_raipc.java)| 21.0.1-open | [Anton Rybochkin](https://github.com/raipc) | | | | 00:11.119 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_lawrey.java)| 21.0.1-open | [lawrey](https://github.com/lawrey) | | +| | 00:11.156 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_YannMoisan.java)| java | [Yann Moisan](https://github.com/YannMoisan) | | | | 00:11.167 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_palmr.java)| 21.0.1-open | [Nick Palmer](https://github.com/palmr) | | +| | 00:11.352 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_karthikeyan97.java)| 21.0.1-open | [karthikeyan97](https://github.com/karthikeyan97) | uses Unsafe | +| | 00:11.363 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_godofwharf.java)| 21.0.2-tem | [Guruprasad Sridharan](https://github.com/godofwharf) | | | | 00:11.405 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_imrafaelmerino.java)| 21.0.1-graal | [Rafael Merino García](https://github.com/imrafaelmerino) | | +| | 00:11.406 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gabrielfoo.java)| 21.0.1-graal | [gabrielfoo](https://github.com/gabrielfoo) | | | | 00:11.433 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jatingala.java)| 21.0.1-graal | [Jatin Gala](https://github.com/jatingala) | | +| | 00:11.505 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_bufistov.java)| 21.0.1-open | [Dmitry Bufistov](https://github.com/dmitry-midokura) | uses Unsafe | +| | 00:11.744 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_slovdahl.java)| 21.0.2-tem | [Sebastian Lövdahl](https://github.com/slovdahl) | | | | 00:11.805 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_coolmineman.java)| 21.0.1-graal | [Cool_Mineman](https://github.com/coolmineman) | | | | 00:11.934 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_arjenvaneerde.java)| 21.0.1-open | [arjenvaneerde](https://github.com/arjenvaneerde) | | -| | 00:11.987 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_flippingbits.java)| 21.0.1-graal | [Stefan Sprenger](https://github.com/flippingbits) | | | | 00:12.220 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_richardstartin.java)| 21.0.1-open | [Richard Startin](https://github.com/richardstartin) | | | | 00:12.495 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_SamuelYvon.java)| 21.0.1-graal | [Samuel Yvon](https://github.com/SamuelYvon) | GraalVM native binary | -| | 00:12.565 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_japplis.java)| 21.0.1-open | [Anthony Goubard](https://github.com/japplis) | | | | 00:12.568 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_MeanderingProgrammer.java)| 21.0.1-graal | [Vlad](https://github.com/MeanderingProgrammer) | | +| | 00:12.800 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yonatang.java)| java | [Yonatan Graber](https://github.com/yonatang) | | | | 00:13.013 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_thanhtrinity.java)| 21.0.1-graal | [Thanh Duong](https://github.com/thanhtrinity) | | -| | 00:13.623 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_phd3.java)| 21.0.1-open | [Pratham](https://github.com/phd3) | | +| | 00:13.071 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ianopolous.java)| 21.0.1-open | [Dr Ian Preston](https://github.com/ianopolous) | | +| | 00:13.729 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_cb0s.java)| java | [Cedric Boes](https://github.com/cb0s) | | | | 00:13.817 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_entangled90.java)| 21.0.1-open | [Carlo](https://github.com/entangled90) | | +| | 00:14.502 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_eriklumme.java)| 21.0.1-graal | [eriklumme](https://github.com/eriklumme) | | | | 00:14.772 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kevinmcmurtrie.java)| 21.0.1-open | [Kevin McMurtrie](https://github.com/kevinmcmurtrie) | | | | 00:14.867 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_berry120.java)| 21.0.1-open | [Michael Berry](https://github.com/berry120) | | +| | 00:14.900 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_Judekeyser.java)| java | [Judekeyser](https://github.com/Judekeyser) | | +| | 00:15.006 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_PawelAdamski.java)| java | [Paweł Adamski](https://github.com/PawelAdamski) | | | | 00:15.662 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_semotpan.java)| 21.0.1-open | [Serghei Motpan](https://github.com/semotpan) | | -| | 00:16.379 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ianopolous.java)| 21.0.1-open | [Dr Ian Preston](https://github.com/ianopolous) | | +| | 00:16.063 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_makohn.java)| 21.0.1-open | [Marek Kohn](https://github.com/makohn) | | +| | 00:16.457 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_bytesfellow.java)| 21.0.1-open | [Aleksei](https://github.com/bytesfellow) | | +| | 00:16.953 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gauravdeshmukh.java)| 21.0.1-open | [Gaurav Anantrao Deshmukh](https://github.com/gauravdeshmukh) | | +| | 00:17.046 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_dkarampi.java)| 21.0.1-open | [Dimitris Karampinas](https://github.com/dkarampi) | | +| | 00:17.086 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_breejesh.java)| java | [Breejesh Rathod](https://github.com/breejesh) | | | | 00:17.490 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kgeri.java)| 21.0.1-open | [Gergely Kiss](https://github.com/kgeri) | | +| | 00:17.255 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_tkosachev.java)| 21.0.1-open | [tkosachev](https://github.com/tkosachev) | | +| | 00:17.520 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_faridtmammadov.java)| 21.0.1-open | [Farid](https://github.com/faridtmammadov) | | | | 00:17.717 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_omarchenko4j.java)| 21.0.1-open | [Oleh Marchenko](https://github.com/omarchenko4j) | | | | 00:17.815 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_hallvard.java)| 21.0.1-open | [Hallvard Trætteberg](https://github.com/hallvard) | | +| | 00:17.932 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_plbpietrz.java)| 21.0.1-open | [Bartłomiej Pietrzyk](https://github.com/plbpietrz) | | | | 00:18.251 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_seijikun.java)| 21.0.1-graal | [Markus Ebner](https://github.com/seijikun) | | -| | 00:18.313 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jgrateron.java)| 21.0.1-open | [Jairo Graterón](https://github.com/jgrateron) | | | | 00:18.448 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_moysesb.java)| 21.0.1-open | [Moysés Borges Furtado](https://github.com/moysesb) | | | | 00:18.771 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_davecom.java)| 21.0.1-graal | [David Kopec](https://github.com/davecom) | | | | 00:18.902 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_maximz101.java)| 21.0.1-graal | [Maxime](https://github.com/maximz101) | | | | 00:19.357 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_truelive.java)| 21.0.1-graalce | [Roman Schweitzer](https://github.com/truelive) | | | | 00:20.691 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_Kidlike.java)| 21.0.1-graal | [Kidlike](https://github.com/Kidlike) | GraalVM native binary | | | 00:21.989 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_couragelee.java)| 21.0.1-open | [couragelee](https://github.com/couragelee) | | +| | 00:22.188 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jgrateron.java)| 21.0.1-open | [Jairo Graterón](https://github.com/jgrateron) | | +| | 00:22.334 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_albertoventurini.java)| 21.0.1-open | [Alberto Venturini](https://github.com/albertoventurini) | | | | 00:22.457 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_rby.java)| 21.0.1-open | [Ramzi Ben Yahya](https://github.com/rby) | | +| | 00:22.471 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_0xshivamagarwal.java)| 21.0.1-open | [Shivam Agarwal](https://github.com/0xshivamagarwal) | | +| | 00:24.986 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kumarsaurav123.java)| 21.0.1-open | [kumarsaurav123](https://github.com/kumarsaurav123) | | +| | 00:25.064 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_sudhirtumati.java)| 21.0.2-open | [Sudhir Tumati](https://github.com/sudhirtumati) | | +| | 00:26.500 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_felix19350.java)| 21.0.1-open | [Bruno Félix](https://github.com/felix19350) | | | | 00:28.381 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java)| 21.0.1-open | [Hampus](https://github.com/bjhara) | | +| | 00:29.741 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_xpmatteo.java)| 21.0.1-open | [Matteo Vaccari](https://github.com/xpmatteo) | | | | 00:32.018 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_padreati.java)| 21.0.1-open | [Aurelian Tutuianu](https://github.com/padreati) | | | | 00:34.388 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_twobiers.java)| 21.0.1-tem | [Tobi](https://github.com/twobiers) | | +| | 00:35.875 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_MahmoudFawzyKhalil.java)| 21.0.1-open | [MahmoudFawzyKhalil](https://github.com/MahmoudFawzyKhalil) | | | | 00:36.180 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_hchiorean.java)| 21.0.1-open | [Horia Chiorean](https://github.com/hchiorean) | | -| | 00:36.212 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kumarsaurav123.java)| 21.0.1-open | [kumarsaurav123](https://github.com/kumarsaurav123) | | +| | 00:36.424 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_manishgarg90.java)| java | [Manish Garg](https://github.com/manishgarg90) | | | | 00:38.340 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_AbstractKamen.java)| 21.0.1-open | [AbstractKamen](https://github.com/AbstractKamen) | | | | 00:41.982 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_criccomini.java)| 21.0.1-open | [Chris Riccomini](https://github.com/criccomini) | | | | 00:42.893 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_javamak.java)| 21.0.1-open | [javamak](https://github.com/javamak) | | -| | 00:45.447 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_armandino.java)| 21.0.1-open | [Arman Sharif](https://github.com/armandino) | | | | 00:46.597 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_maeda6uiui.java)| 21.0.1-open | [Maeda-san](https://github.com/maeda6uiui) | | | | 00:58.811 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_Ujjwalbharti.java)| 21.0.1-open | [Ujjwal Bharti](https://github.com/Ujjwalbharti) | | -| | 01:05.094 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_mudit-saxena.java)| 21.0.1-open | [Mudit Saxena](https://github.com/mudit-saxena) | | +| | 01:05.094 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_muditsaxena.java)| 21.0.1-open | [Mudit Saxena](https://github.com/mudit-saxena) | | +| | 01:05.979 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_dqhieuu.java)| 21.0.1-graal | [Hieu Dao Quang](https://github.com/dqhieuu) | | | | 01:06.790 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_khmarbaise.java)| 21.0.1-open | [Karl Heinz Marbaise](https://github.com/khmarbaise) | | | | 01:06.944 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_santanu.java)| 21.0.1-open | [santanu](https://github.com/santanu) | | | | 01:07.014 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_pedestrianlove.java)| 21.0.1-open | [pedestrianlove](https://github.com/pedestrianlove) | | +| | 01:07.101 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jeevjyot.java)| 21.0.1-open | [Jeevjyot Singh Chhabda](https://github.com/jeevjyot) | | | | 01:08.811 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_alesj.java)| 21.0.1-open | [Aleš Justin](https://github.com/alesj) | | | | 01:08.908 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java)| 21.0.1-open | [itaske](https://github.com/itaske) | | +| | 01:09.595 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_agoncal.java)| 21.0.1-tem | [Antonio Goncalves](https://github.com/agoncal) | | | | 01:09.882 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_rprabhu.java)| 21.0.1-open | [Prabhu R](https://github.com/rprabhu) | | | | 01:14.815 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_anandmattikopp.java)| 21.0.1-open | [twohardthings](https://github.com/anandmattikopp) | | | | 01:25.801 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ivanklaric.java)| 21.0.1-open | [ivanklaric](https://github.com/ivanklaric) | | | | 01:33.594 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gnmathur.java)| 21.0.1-open | [Gaurav Mathur](https://github.com/gnmathur) | | -| | 01:45.082 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_netrunnereve.java)| 21.0.1-open | [Eve](https://github.com/netrunnereve) | | +| | 01:53.208 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_mahadev_k.java)| java | [Mahadev K](https://github.com/mahadev-k) | | | | 01:56.607 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_abfrmblr.java)| 21.0.1-open | [Abhilash](https://github.com/abfrmblr) | | | | 03:43.521 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yehwankim23.java)| 21.0.1-open | [김예환 Ye-Hwan Kim (Sam)](https://github.com/yehwankim23) | | | | 03:59.760 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_fragmede.java)| 21.0.1-open | [Samson](https://github.com/fragmede) | | | | --- | | | | | | | 04:49.679 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_baseline.java) (Baseline) | 21.0.1-open | [Gunnar Morling](https://github.com/gunnarmorling) | | +\* These two entries have such a similar runtime (below the error margin I can reliably measure), that they share position #1 in the leaderboar. + Note that I am not super-scientific in the way I'm running the contenders (see [Evaluating Results](#evaluating-results) for the details). This is not a high-fidelity micro-benchmark and there can be variations of ~ +-5% between runs. @@ -164,50 +248,69 @@ For the 1BRC challenge, only the results in the previous section are of importan #### 32 Cores / 64 Threads For officially evaluating entries into the challenge, each contender is run on eight cores of the evaluation machine (AMD EPYC™ 7502P). -Here are the results from running the top 15 entries (as of commit [2c26b511](https://github.com/gunnarmorling/1brc/commit/2c26b511e741f4d96a51dda831001946ea27a591)) on all 32 cores / 64 threads (i.e. SMT is enabled) of the machine: +Here are the results from running the top 25 entries (as of commit [1ba9cdcf](https://github.com/gunnarmorling/1brc/commit/1ba9cdcf1552b7dcff8d46a9e9724671dd479fac), Feb 1) on all 32 cores / 64 threads (i.e. SMT is enabled) of the machine: | # | Result (m:s.ms) | Implementation | JDK | Submitter | Notes | |---|-----------------|--------------------|-----|---------------|-----------| -| 1 | 00:00.799 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java)| 21.0.1-graal | [Thomas Wuerthinger](https://github.com/thomaswue) | GraalVM native binary | -| 2 | 00:00.933 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| 21.0.1-graal | [Roy van Rijn](https://github.com/royvanrijn) | GraalVM native binary | -| 3 | 00:01.236 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_artsiomkorzun.java)| 21.0.1-graal | [Artsiom Korzun](https://github.com/artsiomkorzun) | | -| | 00:01.380 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java)| 21.0.1-open | [merykittyunsafe](https://github.com/merykittyunsafe) | | -| | 00:01.383 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_cliffclick.java)| 21.0.1-open | [Cliff Click](https://github.com/cliffclick) | | -| | 00:01.429 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_iziamos.java)| 21.0.1-open | [John Ziamos](https://github.com/iziamos) | | -| | 00:01.464 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_obourgain.java)| 21.0.1-open | [Olivier Bourgain](https://github.com/obourgain) | | -| | 00:01.603 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_abeobk.java)| 21.0.1-open | [Van Phu DO](https://github.com/abeobk) | | -| | 00:01.748 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yavuztas.java)| 21.0.1-graal | [Yavuz Tas](https://github.com/yavuztas) | | -| | 00:01.778 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java)| 21.0.1-open | [Quan Anh Mai](https://github.com/merykitty) | | -| | 00:01.942 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_mtopolnik.java)| 21.0.1-graal | [Marko Topolnik](https://github.com/mtopolnik) | | -| | 00:01.972 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java)| 21.0.1-graal | [Elliot Barlas](https://github.com/ebarlas) | | -| | 00:02.111 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_JamalMulla.java)| 21.0.1-graal | [Jamal Mulla](https://github.com/JamalMulla) | | -| | 00:02.644 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_vaidhy.java)| 21.0.1-graal | [Vaidhy Mayilrangam](https://github.com/vaidhy) | | -| | 00:03.697 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_hundredwatt.java)| 21.0.1-graal | [Jason Nochlin](https://github.com/hundredwatt) | | +| 1* | 00:00.324 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jerrinot.java)| 21.0.2-graal | [Jaromir Hamala](https://github.com/jerrinot) | GraalVM native binary, uses Unsafe | +| 1* | 00:00.326 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java)| 21.0.2-graal | [Thomas Wuerthinger](https://github.com/thomaswue), [Quan Anh Mai](https://github.com/merykitty), [Alfonso² Peterssen](https://github.com/mukel) | GraalVM native binary, uses Unsafe | +| 2* | 00:00.350 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_artsiomkorzun.java)| 21.0.2-graal | [Artsiom Korzun](https://github.com/artsiomkorzun) | GraalVM native binary, uses Unsafe | +| 2* | 00:00.351 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_abeobk.java)| 21.0.2-graal | [Van Phu DO](https://github.com/abeobk) | GraalVM native binary, uses Unsafe | +| 3 | 00:00.389 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_stephenvonworley.java)| 21.0.2-graal | [Stephen Von Worley](https://github.com/stephenvonworley) | GraalVM native binary, uses Unsafe | +| | 00:00.410 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yavuztas.java)| 21.0.2-graal | [Yavuz Tas](https://github.com/yavuztas) | GraalVM native binary, uses Unsafe | +| | 00:00.410 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| 21.0.2-graal | [Roy van Rijn](https://github.com/royvanrijn) | GraalVM native binary, uses Unsafe | +| | 00:00.502 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_mtopolnik.java)| 21.0.2-graal | [Marko Topolnik](https://github.com/mtopolnik) | GraalVM native binary, uses Unsafe | +| | 00:00.609 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java)| 21.0.1-graal | [Roman Musin](https://github.com/roman-r-m) | GraalVM native binary, uses Unsafe | +| | 00:00.611 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gonixunsafe.java)| 21.0.1-open | [gonixunsafe](https://github.com/gonixunsafe) | uses Unsafe | +| | 00:00.716 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_JamalMulla.java)| 21.0.2-graal | [Jamal Mulla](https://github.com/JamalMulla) | GraalVM native binary, uses Unsafe | +| | 00:00.728 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_tivrfoa.java)| 21.0.2-graal | [tivrfoa](https://github.com/tivrfoa) | GraalVM native binary, uses Unsafe | +| | 00:00.764 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_serkan_ozal.java)| 21.0.1-open | [Serkan ÖZAL](https://github.com/serkan-ozal) | uses Unsafe | +| | 00:00.785 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java)| 21.0.2-graal | [Elliot Barlas](https://github.com/ebarlas) | GraalVM native binary, uses Unsafe | +| | 00:00.814 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gonix.java)| 21.0.1-open | [gonix](https://github.com/gonix) | | +| | 00:00.838 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java)| 21.0.2-graal | [zerninv](https://github.com/zerninv) | GraalVM native binary, uses Unsafe | +| | 00:00.877 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_iziamos.java)| 21.0.2-graal | [John Ziamos](https://github.com/iziamos) | GraalVM native binary, uses Unsafe | +| | 00:01.179 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_vemanaNonIdiomatic.java)| 21.0.1-graal | [vemanaNonIdiomatic](https://github.com/vemanaNonIdiomatic) | uses Unsafe | +| | 00:01.268 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java)| 21.0.1-open | [merykittyunsafe](https://github.com/merykittyunsafe) | uses Unsafe | +| | 00:01.289 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_linl33.java)| 22.ea.32-open | [Li Lin](https://github.com/linl33) | uses Unsafe | +| | 00:01.345 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_hundredwatt.java)| 21.0.1-graal | [Jason Nochlin](https://github.com/hundredwatt) | | +| | 00:01.393 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java)| 21.0.1-open | [Quan Anh Mai](https://github.com/merykitty) | | +| | 00:01.478 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yourwass.java)| 21.0.1-open | [yourwass](https://github.com/yourwass) | uses Unsafe | +| | 00:01.770 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jincongho.java)| 21.0.1-open | [Jin Cong Ho](https://github.com/jincongho) | uses Unsafe | +| | 00:02.918 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jparera.java)| 21.0.1-open | [Juan Parera](https://github.com/jparera) | | #### 10K Key Set The 1BRC challenge data set contains 413 distinct weather stations, whereas the rules allow for 10,000 different station names to occur. -Here are the results from running the top 15 entries (as of commit [2c26b511](https://github.com/gunnarmorling/1brc/commit/2c26b511e741f4d96a51dda831001946ea27a591)) against 1,000,000,000 measurement values across 10K stations (created via _./create_measurements3.sh 1000000000_), +Here are the results from running the top 25 entries (as of commit [1ba9cdcf](https://github.com/gunnarmorling/1brc/commit/1ba9cdcf1552b7dcff8d46a9e9724671dd479fac), Feb 1) against 1,000,000,000 measurement values across 10K stations (created via _./create_measurements3.sh 1000000000_), using eight cores on the evaluation machine: | # | Result (m:s.ms) | Implementation | JDK | Submitter | Notes | |---|-----------------|--------------------|-----|---------------|-----------| -| 1 | 00:04.589 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_artsiomkorzun.java)| 21.0.1-graal | [Artsiom Korzun](https://github.com/artsiomkorzun) | | -| 2 | 00:05.296 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| 21.0.1-graal | [Roy van Rijn](https://github.com/royvanrijn) | GraalVM native binary | -| 3 | 00:05.308 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java)| 21.0.1-graal | [Thomas Wuerthinger](https://github.com/thomaswue) | GraalVM native binary | -| | 00:05.881 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_mtopolnik.java)| 21.0.1-graal | [Marko Topolnik](https://github.com/mtopolnik) | | -| | 00:07.120 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_JamalMulla.java)| 21.0.1-graal | [Jamal Mulla](https://github.com/JamalMulla) | | -| | 00:07.915 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_cliffclick.java)| 21.0.1-open | [Cliff Click](https://github.com/cliffclick) | | -| | 00:08.979 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yavuztas.java)| 21.0.1-graal | [Yavuz Tas](https://github.com/yavuztas) | | -| | 00:10.052 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java)| 21.0.1-open | [merykittyunsafe](https://github.com/merykittyunsafe) | | -| | 00:10.134 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_vaidhy.java)| 21.0.1-graal | [Vaidhy Mayilrangam](https://github.com/vaidhy) | | -| | 00:10.599 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java)| 21.0.1-graal | [Elliot Barlas](https://github.com/ebarlas) | | -| | 00:12.750 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java)| 21.0.1-open | [Quan Anh Mai](https://github.com/merykitty) | | -| | --- | | | | | -| | DNF | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_hundredwatt.java)| 21.0.1-graal | [Jason Nochlin](https://github.com/hundredwatt) | Didn't complete in 60 sec | -| | DNF | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_abeobk.java)| 21.0.1-open | [Van Phu DO](https://github.com/abeobk) | Didn't complete in 60 sec | -| | DNF | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_iziamos.java)| 21.0.1-open | [John Ziamos](https://github.com/iziamos) | Didn't complete in 60 sec | -| | DNF | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_obourgain.java)| 21.0.1-open | [Olivier Bourgain](https://github.com/obourgain) | Failed with java.lang.OutOfMemoryError: Java heap space | +| 1 | 00:02.977 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_artsiomkorzun.java)| 21.0.2-graal | [Artsiom Korzun](https://github.com/artsiomkorzun) | GraalVM native binary, uses Unsafe | +| 2 | 00:03.068 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_mtopolnik.java)| 21.0.2-graal | [Marko Topolnik](https://github.com/mtopolnik) | GraalVM native binary, uses Unsafe | +| 3 | 00:03.175 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_stephenvonworley.java)| 21.0.2-graal | [Stephen Von Worley](https://github.com/stephenvonworley) | GraalVM native binary, uses Unsafe | +| | 00:04.022 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| 21.0.2-graal | [Roy van Rijn](https://github.com/royvanrijn) | GraalVM native binary, uses Unsafe | +| | 00:04.047 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jerrinot.java)| 21.0.2-graal | [Jaromir Hamala](https://github.com/jerrinot) | GraalVM native binary, uses Unsafe | +| | 00:04.122 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gonixunsafe.java)| 21.0.1-open | [gonixunsafe](https://github.com/gonixunsafe) | uses Unsafe | +| | 00:04.520 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_tivrfoa.java)| 21.0.2-graal | [tivrfoa](https://github.com/tivrfoa) | GraalVM native binary, uses Unsafe | +| | 00:04.655 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_JamalMulla.java)| 21.0.2-graal | [Jamal Mulla](https://github.com/JamalMulla) | GraalVM native binary, uses Unsafe | +| | 00:04.708 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_gonix.java)| 21.0.1-open | [gonix](https://github.com/gonix) | | +| | 00:04.797 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java)| 21.0.2-graal | [Thomas Wuerthinger](https://github.com/thomaswue), [Quan Anh Mai](https://github.com/merykitty), [Alfonso² Peterssen](https://github.com/mukel) | GraalVM native binary, uses Unsafe | +| | 00:04.814 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_vemanaNonIdiomatic.java)| 21.0.1-graal | [vemanaNonIdiomatic](https://github.com/vemanaNonIdiomatic) | uses Unsafe | +| | 00:05.248 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java)| 21.0.2-graal | [zerninv](https://github.com/zerninv) | GraalVM native binary, uses Unsafe | +| | 00:05.367 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yavuztas.java)| 21.0.2-graal | [Yavuz Tas](https://github.com/yavuztas) | GraalVM native binary, uses Unsafe | +| | 00:05.894 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java)| 21.0.2-graal | [Elliot Barlas](https://github.com/ebarlas) | GraalVM native binary, uses Unsafe | +| | 00:06.014 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_abeobk.java)| 21.0.2-graal | [Van Phu DO](https://github.com/abeobk) | GraalVM native binary, uses Unsafe | +| | 00:06.380 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_iziamos.java)| 21.0.2-graal | [John Ziamos](https://github.com/iziamos) | GraalVM native binary, uses Unsafe | +| | 00:08.830 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_serkan_ozal.java)| 21.0.1-open | [Serkan ÖZAL](https://github.com/serkan-ozal) | uses Unsafe | +| | 00:09.349 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_yourwass.java)| 21.0.1-open | [yourwass](https://github.com/yourwass) | uses Unsafe | +| | 00:10.388 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java)| 21.0.1-open | [merykittyunsafe](https://github.com/merykittyunsafe) | uses Unsafe | +| | 00:12.467 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jparera.java)| 21.0.1-open | [Juan Parera](https://github.com/jparera) | | +| | 00:13.225 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java)| 21.0.1-open | [Quan Anh Mai](https://github.com/merykitty) | | +| | 00:15.901 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_jincongho.java)| 21.0.1-open | [Jin Cong Ho](https://github.com/jincongho) | uses Unsafe | +| | 00:17.972 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_hundredwatt.java)| 21.0.1-graal | [Jason Nochlin](https://github.com/hundredwatt) | | +| | 00:20.174 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java)| 21.0.1-graal | [Roman Musin](https://github.com/roman-r-m) | GraalVM native binary, uses Unsafe | +| | 00:21.591 | [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_linl33.java)| 22.ea.32-open | [Li Lin](https://github.com/linl33) | uses Unsafe | ## Prerequisites @@ -311,7 +414,7 @@ To submit your own implementation to 1BRC, follow these steps: * Create a fork of the [onebrc](https://github.com/gunnarmorling/onebrc/) GitHub repository. * Run `./create_fork.sh ` to copy the baseline implementation to your personal files, or do this manually: - * Create a copy of _CalculateAverage.java_, named _CalculateAverage\_.java_, e.g. _CalculateAverage\_doloreswilson.java_. + * Create a copy of _CalculateAverage\_baseline.java_, named _CalculateAverage\_.java_, e.g. _CalculateAverage\_doloreswilson.java_. * Create a copy of _calculate\_average\_baseline.sh_, named _calculate\_average\_.sh_, e.g. _calculate\_average\_doloreswilson.sh_. * Adjust that script so that it references your implementation class name. If needed, provide any JVM arguments via the `JAVA_OPTS` variable in that script. Make sure that script does not write anything to standard output other than calculation results. @@ -384,10 +487,25 @@ A: Probably not :) 1BRC results are reported in wallclock time, thus results of _Q: Why_ 1️⃣🐝🏎️ _?_\ A: It's the abbreviation of the project name: **One** **B**illion **R**ow **C**hallenge. -## Sponsorship +## 1BRC on the Web + +A list of external resources such as blog posts and videos, discussing 1BRC and specific implementations: + +* [The One Billion Row Challenge Shows That Java Can Process a One Billion Rows File in Two Seconds ](https://www.infoq.com/news/2024/01/1brc-fast-java-processing), by Olimpiu Pop (interview) +* [Cliff Click discussing his 1BRC solution on the Coffee Compiler Club](https://www.youtube.com/watch?v=NJNIbgV6j-Y) (video) +* [1️⃣🐝🏎️🦆 (1BRC in SQL with DuckDB)](https://rmoff.net/2024/01/03/1%EF%B8%8F%E2%83%A3%EF%B8%8F-1brc-in-sql-with-duckdb/), by Robin Moffatt (blog post) +* [1 billion rows challenge in PostgreSQL and ClickHouse](https://ftisiot.net/posts/1brows/), by Francesco Tisiot (blog post) +* [The One Billion Row Challenge with Snowflake](https://medium.com/snowflake/the-one-billion-row-challenge-with-snowflake-f612ae76dbd5), by Sean Falconer (blog post) +* [One billion row challenge using base R](https://www.r-bloggers.com/2024/01/one-billion-row-challenge-using-base-r/), by David Schoch (blog post) +* [1 Billion Row Challenge with Apache Pinot](https://hubertdulay.substack.com/p/1-billion-row-challenge-in-apache), by Hubert Dulay (blog post) +* [One Billion Row Challenge In C](https://www.dannyvankooten.com/blog/2024/1brc/), by Danny Van Kooten (blog post) +* [One Billion Row Challenge in Racket](https://defn.io/2024/01/10/one-billion-row-challenge-in-racket/), by Bogdan Popa (blog post) +* [The One Billion Row Challenge - .NET Edition](https://dev.to/mergeconflict/392-the-one-billion-row-challenge-net-edition), by Frank A. Krueger (podcast) +* [One Billion Row Challenge](https://curiouscoding.nl/posts/1brc/), by Ragnar Groot Koerkamp (blog post) +* [ClickHouse and The One Billion Row Challenge](https://clickhouse.com/blog/clickhouse-one-billion-row-challenge), by Dale McDiarmid (blog post) +* [One Billion Row Challenge & Azure Data Explorer](https://nielsberglund.com/post/2024-01-28-one-billion-row-challenge--azure-data-explorer/), by Niels Berglund (blog post) +* [One Billion Row Challenge - view from sidelines](https://www.chashnikov.dev/post/one-billion-row-challenge-view-from-sidelines), by Leo Chashnikov (blog post) -A big thank you to my employer [Decodable](https://www.decodable.co/) for funding the evaluation environment and supporting this challenge! - ## License This code base is available under the Apache License, version 2. diff --git a/calculate_average_0xshivamagarwal.sh b/calculate_average_0xshivamagarwal.sh new file mode 100755 index 000000000..32298fb31 --- /dev/null +++ b/calculate_average_0xshivamagarwal.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -dsa -XX:+UseNUMA" +if [[ ! "$(uname -s)" = "Darwin" ]]; then + JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages" +fi +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_0xshivamagarwal diff --git a/calculate_average_3j5a.sh b/calculate_average_3j5a.sh new file mode 100755 index 000000000..b4a427732 --- /dev/null +++ b/calculate_average_3j5a.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--add-opens=java.base/jdk.internal.util=ALL-UNNAMED" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_3j5a diff --git a/calculate_average_AlexanderYastrebov.sh b/calculate_average_AlexanderYastrebov.sh new file mode 100755 index 000000000..ea951bd4e --- /dev/null +++ b/calculate_average_AlexanderYastrebov.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +INPUT=${1:-"measurements.txt"} + +target/AlexanderYastrebov/1brc "$INPUT" diff --git a/calculate_average_ChrisBellew.sh b/calculate_average_ChrisBellew.sh new file mode 100755 index 000000000..122ddce64 --- /dev/null +++ b/calculate_average_ChrisBellew.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--add-modules jdk.incubator.vector --enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_chrisbellew \ No newline at end of file diff --git a/calculate_average_EduardoSaverin.sh b/calculate_average_EduardoSaverin.sh new file mode 100755 index 000000000..d94e7f77d --- /dev/null +++ b/calculate_average_EduardoSaverin.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_EduardoSaverin diff --git a/calculate_average_JaimePolidura.sh b/calculate_average_JaimePolidura.sh new file mode 100755 index 000000000..dfd890848 --- /dev/null +++ b/calculate_average_JaimePolidura.sh @@ -0,0 +1,26 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [ -f target/CalculateAverage_JaimePolidura_image ]; then + target/CalculateAverage_JaimePolidura_image +else + echo "Native image not found. Running in JVM mode" + JAVA_OPTS="--enable-preview -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -XX:+UseTransparentHugePages -XX:+TrustFinalNonStaticFields" + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_JaimePolidura +fi + + diff --git a/calculate_average_JamalMulla.sh b/calculate_average_JamalMulla.sh index 228d56bfb..119263bad 100755 --- a/calculate_average_JamalMulla.sh +++ b/calculate_average_JamalMulla.sh @@ -15,5 +15,11 @@ # limitations under the License. # -JAVA_OPTS="--enable-preview -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -XX:+UseTransparentHugePages" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_JamalMulla + + +if [ -f target/CalculateAverage_JamalMulla_image ]; then + target/CalculateAverage_JamalMulla_image +else + JAVA_OPTS="--enable-preview -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -XX:+UseTransparentHugePages -XX:-TieredCompilation" + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_JamalMulla +fi \ No newline at end of file diff --git a/calculate_average_JesseVanRooy.sh b/calculate_average_JesseVanRooy.sh new file mode 100755 index 000000000..c680e974c --- /dev/null +++ b/calculate_average_JesseVanRooy.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="--enable-preview -XX:-TieredCompilation -Dsun.stdout.encoding=UTF-8" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_JesseVanRooy diff --git a/calculate_average_Judekeyser.sh b/calculate_average_Judekeyser.sh new file mode 100755 index 000000000..9490c15c5 --- /dev/null +++ b/calculate_average_Judekeyser.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview --add-modules jdk.incubator.vector" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_Judekeyser diff --git a/calculate_average_JurenIvan.sh b/calculate_average_JurenIvan.sh new file mode 100755 index 000000000..73d956e90 --- /dev/null +++ b/calculate_average_JurenIvan.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_JurenIvan diff --git a/calculate_average_MahmoudFawzyKhalil.sh b/calculate_average_MahmoudFawzyKhalil.sh new file mode 100755 index 000000000..761d7e675 --- /dev/null +++ b/calculate_average_MahmoudFawzyKhalil.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_MahmoudFawzyKhalil diff --git a/calculate_average_PanagiotisDrakatos.sh b/calculate_average_PanagiotisDrakatos.sh new file mode 100755 index 000000000..699ebdb28 --- /dev/null +++ b/calculate_average_PanagiotisDrakatos.sh @@ -0,0 +1,36 @@ +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 +JAVA_OPTS="--enable-preview -Xms1536m -Xmx10536m -XX:NewSize=256m -XX:MaxNewSize=512m -XX:MaxMetaspaceSize=512m -XX:+DisableExplicitGC -XX:+UseSerialGC -XX:-TieredCompilation -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -dsa -XX:+UseNUMA" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_PanagiotisDrakatos diff --git a/calculate_average_PawelAdamski.sh b/calculate_average_PawelAdamski.sh new file mode 100755 index 000000000..e8d4bd4ce --- /dev/null +++ b/calculate_average_PawelAdamski.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="-Xnoclassgc" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_PawelAdamski diff --git a/calculate_average_Smoofie.sh b/calculate_average_Smoofie.sh new file mode 100755 index 000000000..3688c3a34 --- /dev/null +++ b/calculate_average_Smoofie.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_Smoofie diff --git a/calculate_average_YannMoisan.sh b/calculate_average_YannMoisan.sh new file mode 100755 index 000000000..74552f0c4 --- /dev/null +++ b/calculate_average_YannMoisan.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_YannMoisan diff --git a/calculate_average_abeobk.sh b/calculate_average_abeobk.sh index a7b43d404..18c4c9448 100755 --- a/calculate_average_abeobk.sh +++ b/calculate_average_abeobk.sh @@ -15,5 +15,12 @@ # limitations under the License. # -JAVA_OPTS="--enable-preview" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_abeobk +if [ -f target/CalculateAverage_abeobk_image ]; then + echo "Picking up existing native image 'target/CalculateAverage_abeobk_image', delete the file to select JVM mode." 1>&2 + target/CalculateAverage_abeobk_image +else + JAVA_OPTS="--enable-preview" + echo "Chosing to run the app in JVM mode as no native image was found, use prepare_abeobk.sh to generate." 1>&2 + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_abeobk +fi + diff --git a/calculate_average_adriacabeza.sh b/calculate_average_adriacabeza.sh new file mode 100755 index 000000000..7d4be43d5 --- /dev/null +++ b/calculate_average_adriacabeza.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="-XX:+UseStringDeduplication -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -XX:+AlwaysPreTouch" +java --enable-preview -classpath target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_adriacabeza + diff --git a/calculate_average_agoncal.sh b/calculate_average_agoncal.sh new file mode 100755 index 000000000..9a295fc46 --- /dev/null +++ b/calculate_average_agoncal.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# sdk use java 21.0.1-tem + +JAVA_OPTS="--enable-preview -XX:+UseShenandoahGC -XX:+UseStringDeduplication -da" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_agoncal \ No newline at end of file diff --git a/calculate_average_albertoventurini.sh b/calculate_average_albertoventurini.sh new file mode 100755 index 000000000..6263e14a0 --- /dev/null +++ b/calculate_average_albertoventurini.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="-Xnoclassgc" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_albertoventurini diff --git a/calculate_average_anestoruk.sh b/calculate_average_anestoruk.sh new file mode 100755 index 000000000..9db63c898 --- /dev/null +++ b/calculate_average_anestoruk.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_anestoruk diff --git a/calculate_average_anitasv.sh b/calculate_average_anitasv.sh new file mode 100755 index 000000000..01d0d745b --- /dev/null +++ b/calculate_average_anitasv.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_anitasv diff --git a/calculate_average_arjenw.sh b/calculate_average_arjenw.sh new file mode 100755 index 000000000..750ced9e0 --- /dev/null +++ b/calculate_average_arjenw.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="-Xms500m -Xmx500m --enable-preview -dsa -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -XX:-AlwaysPreTouch" + +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_arjenw $@ diff --git a/calculate_average_armandino.sh b/calculate_average_armandino.sh index 719953d52..21a4f8ccf 100755 --- a/calculate_average_armandino.sh +++ b/calculate_average_armandino.sh @@ -15,6 +15,11 @@ # limitations under the License. # - -JAVA_OPTS="" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_armandino +if [ -f target/CalculateAverage_armandino_image ]; then + echo "Picking up existing native image 'target/CalculateAverage_armandino_image', delete the file to select JVM mode." 1>&2 + target/CalculateAverage_armandino_image +else + echo "Chosing to run the app in JVM mode as no native image was found, use prepare_armandino.sh to generate." 1>&2 + JAVA_OPTS="--enable-preview -da -dsa -Xms128m -Xmx128m -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -XX:+AlwaysPreTouch" + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_armandino +fi diff --git a/calculate_average_artsiomkorzun.sh b/calculate_average_artsiomkorzun.sh index 96e3467d2..d9c18284e 100755 --- a/calculate_average_artsiomkorzun.sh +++ b/calculate_average_artsiomkorzun.sh @@ -15,5 +15,11 @@ # limitations under the License. # -JAVA_OPTS="--enable-preview -Xmx128m -XX:+UseSerialGC -XX:-TieredCompilation" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_artsiomkorzun +if [ -f target/CalculateAverage_artsiomkorzun_image ]; then + echo "Picking up existing native image 'target/CalculateAverage_artsiomkorzun_image', delete the file to select JVM mode." 1>&2 + target/CalculateAverage_artsiomkorzun_image +else + JAVA_OPTS="--enable-preview -Xmx128m -XX:+UseSerialGC -XX:-TieredCompilation" + echo "Chosing to run the app in JVM mode as no native image was found, use prepare_artsiomkorzun.sh to generate." 1>&2 + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_artsiomkorzun +fi \ No newline at end of file diff --git a/calculate_average_breejesh.sh b/calculate_average_breejesh.sh new file mode 100755 index 000000000..0f0738b2b --- /dev/null +++ b/calculate_average_breejesh.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_breejesh diff --git a/calculate_average_bytesfellow.sh b/calculate_average_bytesfellow.sh new file mode 100755 index 000000000..eb21169e3 --- /dev/null +++ b/calculate_average_bytesfellow.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="-Xms12g -Xmx12g -XX:+AlwaysPreTouch -XX:+UseParallelGC -XX:-OmitStackTraceInFastThrow " +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_bytesfellow diff --git a/calculate_average_cb0s.sh b/calculate_average_cb0s.sh new file mode 100755 index 000000000..af5a93ab4 --- /dev/null +++ b/calculate_average_cb0s.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Arguments +JAVA_OPTS="--enable-preview -XX:MaxGCPauseMillis=1 -XX:-AlwaysPreTouch -XX:+UseParallelGC -XX:+TieredCompilation" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_cb0s diff --git a/calculate_average_dkarampi.sh b/calculate_average_dkarampi.sh new file mode 100755 index 000000000..a6ce60945 --- /dev/null +++ b/calculate_average_dkarampi.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="-XX:+AlwaysCompileLoopMethods" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_dkarampi diff --git a/calculate_average_dmitry-midokura.sh b/calculate_average_dmitry-midokura.sh new file mode 100755 index 000000000..1bb529b8d --- /dev/null +++ b/calculate_average_dmitry-midokura.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +#JAVA_OPTS="-verbose:gc" +JAVA_OPTS="--enable-preview -Xmx128m -XX:+UseSerialGC -XX:-TieredCompilation" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_bufistov $1 $2 diff --git a/calculate_average_dpsoft.sh b/calculate_average_dpsoft.sh new file mode 100755 index 000000000..fd4d4634b --- /dev/null +++ b/calculate_average_dpsoft.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xms128m -Xmx128m -XX:+AlwaysPreTouch -XX:+UseTransparentHugePages -XX:-TieredCompilation -XX:+TrustFinalNonStaticFields" + +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_dpsoft \ No newline at end of file diff --git a/calculate_average_dqhieuu.sh b/calculate_average_dqhieuu.sh new file mode 100755 index 000000000..f0d7fd7ad --- /dev/null +++ b/calculate_average_dqhieuu.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_dqhieuu diff --git a/calculate_average_ebarlas.sh b/calculate_average_ebarlas.sh index 422867d82..c73cb1abf 100755 --- a/calculate_average_ebarlas.sh +++ b/calculate_average_ebarlas.sh @@ -15,5 +15,10 @@ # limitations under the License. # -JAVA_OPTS="" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_ebarlas +if [ -f target/CalculateAverage_ebarlas_image ]; then + echo "Picking up existing native image 'target/CalculateAverage_ebarlas_image', delete the file to select JVM mode." 1>&2 + target/CalculateAverage_ebarlas_image +else + echo "Choosing to run the app in JVM mode as no native image was found, use prepare_ebarlas.sh to generate." 1>&2 + java --enable-preview --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_ebarlas +fi diff --git a/calculate_average_eriklumme.sh b/calculate_average_eriklumme.sh new file mode 100755 index 000000000..793af9b09 --- /dev/null +++ b/calculate_average_eriklumme.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="-Xms6g -Xmx6g" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_eriklumme diff --git a/calculate_average_faridtmammadov.sh b/calculate_average_faridtmammadov.sh new file mode 100755 index 000000000..c521e9a1b --- /dev/null +++ b/calculate_average_faridtmammadov.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_faridtmammadov + diff --git a/calculate_average_felix19350.sh b/calculate_average_felix19350.sh index 4007d7564..e84f8371c 100755 --- a/calculate_average_felix19350.sh +++ b/calculate_average_felix19350.sh @@ -15,6 +15,16 @@ # limitations under the License. # +# ParallelGC test - Time (measured by evaluate2.sh): 00:33.130 +# JAVA_OPTS="--enable-preview -XX:+UseParallelGC -XX:+UseTransparentHugePages" + +# G1GC test - Time (measured by evaluate2.sh): 00:26.447 +# JAVA_OPTS="--enable-preview -XX:+UseG1GC -XX:+UseTransparentHugePages" + +# ZGC test - Time (measured by evaluate2.sh): 00:22.813 +JAVA_OPTS="--enable-preview -XX:+UseZGC -XX:+UseTransparentHugePages" + +# EpsilonGC test - for now doesnt work because heap space gets exhausted +#JAVA_OPTS="--enable-preview -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -XX:+AlwaysPreTouch" -JAVA_OPTS="--enable-preview -XX:+UseParallelGC -Xms4g -Xmx4g" java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_felix19350 diff --git a/calculate_average_flippingbits.sh b/calculate_average_flippingbits.sh index b37baa0e5..7dcbe74bb 100755 --- a/calculate_average_flippingbits.sh +++ b/calculate_average_flippingbits.sh @@ -15,5 +15,5 @@ # limitations under the License. # -JAVA_OPTS="--add-modules=jdk.incubator.vector" +JAVA_OPTS="--add-modules=jdk.incubator.vector --enable-preview" java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_flippingbits diff --git a/calculate_average_gabrielfoo.sh b/calculate_average_gabrielfoo.sh new file mode 100755 index 000000000..bc684dfd8 --- /dev/null +++ b/calculate_average_gabrielfoo.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="-Xmx64m" +JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions" +JAVA_OPTS="$JAVA_OPTS -XX:+AlwaysPreTouch" +JAVA_OPTS="$JAVA_OPTS -XX:+TrustFinalNonStaticFields -XX:InlineSmallCode=10000" +JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:CICompilerCount=2 -XX:CompileThreshold=1000" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_gabrielfoo \ No newline at end of file diff --git a/calculate_average_gamlerhart.sh b/calculate_average_gamlerhart.sh index c52a25bfb..7427ea657 100755 --- a/calculate_average_gamlerhart.sh +++ b/calculate_average_gamlerhart.sh @@ -15,5 +15,5 @@ # limitations under the License. # -JAVA_OPTS="--enable-preview --add-modules=jdk.incubator.vector" +JAVA_OPTS="--enable-preview --add-modules=jdk.incubator.vector -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -Xmx512m -Xlog:all=error" java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_gamlerhart diff --git a/calculate_average_gauravdeshmukh.sh b/calculate_average_gauravdeshmukh.sh new file mode 100755 index 000000000..4f941e4bd --- /dev/null +++ b/calculate_average_gauravdeshmukh.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_gauravdeshmukh diff --git a/calculate_average_gigiblender.sh b/calculate_average_gigiblender.sh new file mode 100755 index 000000000..7d51bdc36 --- /dev/null +++ b/calculate_average_gigiblender.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_gigiblender diff --git a/calculate_average_giovannicuccu.sh b/calculate_average_giovannicuccu.sh new file mode 100755 index 000000000..218838559 --- /dev/null +++ b/calculate_average_giovannicuccu.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview --add-modules=jdk.incubator.vector -XX:-TieredCompilation" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_giovannicuccu diff --git a/calculate_average_godofwharf.sh b/calculate_average_godofwharf.sh new file mode 100755 index 000000000..b8df7a052 --- /dev/null +++ b/calculate_average_godofwharf.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview --add-modules jdk.incubator.vector -DpageSize=262144 -XX:+UseParallelGC -Xms2600m -XX:ParallelGCThreads=8 -XX:Tier4CompileThreshold=1000 -XX:Tier3CompileThreshold=500 -XX:Tier3CompileThreshold=250 -Dthreads=9 -Djava.util.concurrent.ForkJoinPool.common.parallelism=9" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_godofwharf 2>/dev/null \ No newline at end of file diff --git a/calculate_average_gonix.sh b/calculate_average_gonix.sh new file mode 100755 index 000000000..c3f00893c --- /dev/null +++ b/calculate_average_gonix.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="--enable-preview" +exec cat < <(exec java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_gonix) diff --git a/calculate_average_gonixunsafe.sh b/calculate_average_gonixunsafe.sh new file mode 100755 index 000000000..24bee2797 --- /dev/null +++ b/calculate_average_gonixunsafe.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="--enable-preview" +# Copied from @serkan-ozal +# Unsure if it helps (maybe something within ~10ms), +# but at least it doesn't seem to make anything worse. +JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions" +JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:MaxInlineSize=10000 -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000" +JAVA_OPTS="$JAVA_OPTS -XX:-UseCountedLoopSafepoints -XX:GuaranteedSafepointInterval=0" +JAVA_OPTS="$JAVA_OPTS -XX:+TrustFinalNonStaticFields -da -dsa -XX:+UseNUMA -XX:-EnableJVMCI" +if [[ ! "$(uname -s)" = "Darwin" ]]; then + JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages" +fi + +exec cat < <(exec java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_gonixunsafe) diff --git a/calculate_average_ianopolousfast.sh b/calculate_average_ianopolousfast.sh new file mode 100755 index 000000000..56d5a856e --- /dev/null +++ b/calculate_average_ianopolousfast.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview --add-modules=jdk.incubator.vector -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0 -XX:-UseTransparentHugePages" + +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_ianopolousfast diff --git a/calculate_average_iziamos.sh b/calculate_average_iziamos.sh index 7ce3ff1ad..0f9178797 100755 --- a/calculate_average_iziamos.sh +++ b/calculate_average_iziamos.sh @@ -15,5 +15,18 @@ # limitations under the License. # -JAVA_OPTS="--enable-preview --add-modules=jdk.incubator.vector -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0 -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -Xms16m -Xmx16m -XX:-AlwaysPreTouch -XX:-TieredCompilation -XX:CICompilerCount=1" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_iziamos + +if [ -f target/CalculateAverage_iziamos_image ]; then + echo "Using graal" 1>&2 + target/CalculateAverage_iziamos_image +else + echo "Using openjdk" 1>&2 + JAVA_OPTS="--enable-preview + -XX:+UnlockExperimentalVMOptions \ + -XX:+UseEpsilonGC -Xms16m -Xmx16m -XX:-AlwaysPreTouch \ + -XX:-TieredCompilation -XX:CICompilerCount=1 -XX:CompilationMode=high-only \ + -XX:C1MaxTrivialSize=500 -XX:-UseCountedLoopSafepoints -XX:+UseCMoveUnconditionally -XX:+DisableAttachMechanism \ + -XX:-PreserveFramePointer -Xnoclassgc -disablesystemassertions -XX:-UsePerfData \ + -XX:-UseTransparentHugePages -XX:-UseCompressedOops" + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_iziamos +fi diff --git a/calculate_average_japplis.sh b/calculate_average_japplis.sh index 47ba3e7e0..38a59786f 100755 --- a/calculate_average_japplis.sh +++ b/calculate_average_japplis.sh @@ -15,5 +15,5 @@ # limitations under the License. # -JAVA_OPTS="-Xmx2G" +JAVA_OPTS="" java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_japplis $* diff --git a/calculate_average_jeevjyot.sh b/calculate_average_jeevjyot.sh new file mode 100755 index 000000000..215eeff14 --- /dev/null +++ b/calculate_average_jeevjyot.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_jeevjyot diff --git a/calculate_average_jerrinot.sh b/calculate_average_jerrinot.sh new file mode 100755 index 000000000..731172373 --- /dev/null +++ b/calculate_average_jerrinot.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# -XX:+UnlockDiagnosticVMOptions -XX:PrintAssemblyOptions=intel -XX:CompileCommand=print,*.CalculateAverage_mtopolnik::recordMeasurementAndAdvanceCursor" +# -XX:InlineSmallCode=10000 -XX:-TieredCompilation -XX:CICompilerCount=2 -XX:CompileThreshold=1000\ +if [ -f target/CalculateAverage_jerrinot_image ]; then + echo "Picking up existing native image 'target/CalculateAverage_jerrinot_image', delete the file to select JVM mode." 1>&2 + target/CalculateAverage_jerrinot_image +else + JAVA_OPTS="--enable-preview" + echo "Choosing to run the app in JVM mode as no native image was found, use prepare_jerrinot.sh to generate." 1>&2 + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_jerrinot +fi diff --git a/calculate_average_jincongho.sh b/calculate_average_jincongho.sh new file mode 100755 index 000000000..8edda54dd --- /dev/null +++ b/calculate_average_jincongho.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview --add-modules=jdk.incubator.vector --enable-native-access=ALL-UNNAMED" +JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000" +JAVA_OPTS="$JAVA_OPTS -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_jincongho \ No newline at end of file diff --git a/calculate_average_jonathan-aotearoa.sh b/calculate_average_jonathan-aotearoa.sh new file mode 100755 index 000000000..4375c3ca4 --- /dev/null +++ b/calculate_average_jonathan-aotearoa.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [ -f target/CalculateAverage_jonathan-aotearoa_image ]; then + echo "Using native image 'target/CalculateAverage_jonathan-aotearoa_image'. Delete this file to select JVM mode." 1>&2 + target/CalculateAverage_jonathan-aotearoa_image +else + JAVA_OPTS="--enable-preview -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -dsa -XX:+UseNUMA" + JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages" + echo "Running in JVM mode as no native image was found. Run 'prepare_jonathan-aotearoa.sh' to generate a native image." 1>&2 + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_jonathanaotearoa +fi + diff --git a/calculate_average_jparera.sh b/calculate_average_jparera.sh new file mode 100755 index 000000000..4c7a9e7d3 --- /dev/null +++ b/calculate_average_jparera.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview --add-modules=jdk.incubator.vector -XX:-TieredCompilation" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_jparera diff --git a/calculate_average_justplainlaake.sh b/calculate_average_justplainlaake.sh new file mode 100755 index 000000000..2c0341f54 --- /dev/null +++ b/calculate_average_justplainlaake.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [ -f target/CalculateAverage_justplainlaake_image ]; then #if there is a native image, then lets run it. Else fallback to standard java execution + target/CalculateAverage_justplainlaake_image +else + java -XX:+UseG1GC --enable-preview -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -dsa -XX:+UseNUMA --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_justplainlaake +fi + diff --git a/calculate_average_karthikeyan97.sh b/calculate_average_karthikeyan97.sh new file mode 100755 index 000000000..cca36e97a --- /dev/null +++ b/calculate_average_karthikeyan97.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="-Xms10240m -Xmx40960m " + +if [ -f target/CalculateAverage_karthikeyan97_image ]; then + #echo "Picking up existing native image 'target/CalculateAverage_karthikeyan97_image', delete the file to select JVM mode." 1>&2 + target/CalculateAverage_karthikeyan97_image -Xms10240m -Xmx40960m +else + #echo "Chosing to run the app in JVM mode as no native image was found, use prepare_karthikeyan97.sh to generate." 1>&2 + java -Xms10240m -Xmx40960m --enable-preview --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_karthikeyan97 + +fi + + diff --git a/calculate_average_kuduwa-keshavram.sh b/calculate_average_kuduwa-keshavram.sh index 904c8db88..33941d335 100755 --- a/calculate_average_kuduwa-keshavram.sh +++ b/calculate_average_kuduwa-keshavram.sh @@ -16,5 +16,5 @@ # -JAVA_OPTS="" +JAVA_OPTS="--enable-preview" java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_kuduwa_keshavram diff --git a/calculate_average_kumarsaurav123.sh b/calculate_average_kumarsaurav123.sh index 1c823e5bd..4567dcf28 100755 --- a/calculate_average_kumarsaurav123.sh +++ b/calculate_average_kumarsaurav123.sh @@ -16,6 +16,6 @@ # -JAVA_OPTS="-Xms6G -Xmx16G" +JAVA_OPTS="-Xms16G -Xmx32G --enable-preview" java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_kumarsaurav123 diff --git a/calculate_average_linl33.sh b/calculate_average_linl33.sh new file mode 100755 index 000000000..5610895d1 --- /dev/null +++ b/calculate_average_linl33.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="-Xrs --enable-preview --add-modules jdk.incubator.vector --enable-native-access=ALL-UNNAMED" +JAVA_OPTS="${JAVA_OPTS} -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions" +JAVA_OPTS="${JAVA_OPTS} -Xms128m -XX:+AlwaysPreTouch -XX:+AlwaysPreTouchStacks -XX:-UseTransparentHugePages" +JAVA_OPTS="${JAVA_OPTS} -XX:-UseCompressedClassPointers -XX:+ForceUnreachable -XX:-CompactStrings" +JAVA_OPTS="${JAVA_OPTS} -XX:CodeEntryAlignment=64 -XX:OptoLoopAlignment=64 -XX:MaxLoopPad=16 -XX:ObjectAlignmentInBytes=64" +JAVA_OPTS="${JAVA_OPTS} -XX:-UseLoopPredicate -XX:LoopStripMiningIter=0 -XX:LoopStripMiningIterShortLoop=0" +JAVA_OPTS="${JAVA_OPTS} -XX:-UseCountedLoopSafepoints -XX:GuaranteedSafepointInterval=0 -XX:AllocatePrefetchStyle=0" +JAVA_OPTS="${JAVA_OPTS} -XX:+TrustFinalNonStaticFields -XX:LockingMode=2 -XX:+UseSystemMemoryBarrier" +JAVA_OPTS="${JAVA_OPTS} -XX:-UseDynamicNumberOfCompilerThreads -XX:-UseDynamicNumberOfGCThreads" +JAVA_OPTS="${JAVA_OPTS} -XX:ArchiveRelocationMode=0 -XX:-UsePerfData -XX:-UseNotificationThread -XX:-CheckIntrinsics" +#JAVA_OPTS="${JAVA_OPTS} -XX:+UseZGC -XX:-ZProactive -XX:+ZCollectionIntervalOnly -XX:ZCollectionInterval=0 -XX:-ZUncommit -XX:-ZBufferStoreBarriers -XX:ZIndexDistributorStrategy=1" +JAVA_OPTS="${JAVA_OPTS} -XX:+UseEpsilonGC -XX:-UseCompressedOops" +#JAVA_OPTS="${JAVA_OPTS} -XX:+UseParallelGC -XX:-UseCompressedOops" +#JAVA_OPTS="${JAVA_OPTS} -XX:+UseG1GC -XX:-UseCompressedOops" +JAVA_OPTS="${JAVA_OPTS} -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0 -Djava.lang.invoke.VarHandle.VAR_HANDLE_GUARDS=false -Djava.lang.invoke.MethodHandle.DONT_INLINE_THRESHOLD=-1" +JAVA_OPTS="${JAVA_OPTS} -Dfile.encoding=UTF-8 -Dsun.stdout.encoding=UTF-8 -Dsun.stderr.encoding=UTF-8" + +JAVA_OPTS="${JAVA_OPTS} -Xlog:all=off -Xverify:none -XX:SharedArchiveFile=target/CalculateAverage_linl33_dynamic.jsa" + +MALLOC_ARENA_MAX=1 java ${JAVA_OPTS} --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_linl33 2>/dev/null diff --git a/calculate_average_mahadev-k.sh b/calculate_average_mahadev-k.sh new file mode 100755 index 000000000..6f686be77 --- /dev/null +++ b/calculate_average_mahadev-k.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_mahadev_k diff --git a/calculate_average_makohn.sh b/calculate_average_makohn.sh new file mode 100755 index 000000000..092bae1c5 --- /dev/null +++ b/calculate_average_makohn.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_makohn diff --git a/calculate_average_manishgarg90.sh b/calculate_average_manishgarg90.sh new file mode 100755 index 000000000..93c6a3794 --- /dev/null +++ b/calculate_average_manishgarg90.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_manishgarg90 diff --git a/calculate_average_martin2038.sh b/calculate_average_martin2038.sh new file mode 100755 index 000000000..c141e2bdb --- /dev/null +++ b/calculate_average_martin2038.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +if [ -f target/CalculateAverage_martin2038_image ]; then + echo "Picking up existing native image 'target/CalculateAverage_martin2038_image', delete the file to select JVM mode." 1>&2 + target/CalculateAverage_martin2038_image +else + + #JAVA_OPTS="--enable-preview" + echo "Chosing to run the app in JVM mode as no native image was found, use prepare_martin2038.sh to generate." 1>&2 + # JAVA_OPTS="-XX:-EnableJVMCI -Xms16g -Xmx16g -XX:+AlwaysPreTouch -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC" + JAVA_OPTS="" + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_martin2038 + +fi diff --git a/calculate_average_mattiz.sh b/calculate_average_mattiz.sh new file mode 100755 index 000000000..2432b7f4a --- /dev/null +++ b/calculate_average_mattiz.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_mattiz diff --git a/calculate_average_melgenek.sh b/calculate_average_melgenek.sh new file mode 100755 index 000000000..ad709c31b --- /dev/null +++ b/calculate_average_melgenek.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview --add-modules jdk.incubator.vector -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0" +JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -XX:+AlwaysPreTouch" +# These flags are mostly copied from the shipilev's branch. They don't really give a predictable benefit, but they don't hurt either. +JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:CICompilerCount=1 -XX:CompileThreshold=2048 -XX:-UseCountedLoopSafepoints -XX:+TrustFinalNonStaticFields" + +if [[ "$(uname -s)" == "Linux" ]]; then + JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages" +fi + +# https://stackoverflow.com/a/23378780/7221823 +logicalCpuCount=$([ $(uname) = 'Darwin' ] && + sysctl -n hw.logicalcpu_max || + lscpu -p | egrep -v '^#' | wc -l) +# The required heap is proportional to the number of cores. +# There's roughly 6MB heap per thread required for the 10k problem. +requiredMemory=$(echo "(l(15 + 6 * $logicalCpuCount)/l(2))" | bc -l) +heapSize=$(echo "scale=0; 2^(($requiredMemory+1)/1)" | bc) + +JAVA_OPTS="$JAVA_OPTS -Xms${heapSize}m -Xmx${heapSize}m" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_melgenek diff --git a/calculate_average_mtopolnik.sh b/calculate_average_mtopolnik.sh index e48711a19..acd102459 100755 --- a/calculate_average_mtopolnik.sh +++ b/calculate_average_mtopolnik.sh @@ -15,7 +15,11 @@ # limitations under the License. # -# -XX:+UnlockDiagnosticVMOptions -XX:PrintAssemblyOptions=intel -XX:CompileCommand=print,*.CalculateAverage_mtopolnik::recordMeasurementAndAdvanceCursor" -# -XX:InlineSmallCode=10000 -XX:-TieredCompilation -XX:CICompilerCount=2 -XX:CompileThreshold=1000\ -java --enable-preview \ - --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_mtopolnik +if [ -f target/CalculateAverage_mtopolnik_image ]; then + echo "Using native image 'target/CalculateAverage_mtopolnik_image'" 1>&2 + target/CalculateAverage_mtopolnik_image +else + JAVA_OPTS="--enable-preview" + echo "Native image not found, using JVM mode." 1>&2 + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_mtopolnik +fi diff --git a/calculate_average_plbpietrz.sh b/calculate_average_plbpietrz.sh new file mode 100755 index 000000000..bcd76ad61 --- /dev/null +++ b/calculate_average_plbpietrz.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="" +java $JAVA_OPTS -Xmx99m --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_plbpietrz diff --git a/calculate_average_plevart.sh b/calculate_average_plevart.sh new file mode 100755 index 000000000..32cee488a --- /dev/null +++ b/calculate_average_plevart.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview --add-modules=jdk.incubator.vector" +JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation" +JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields" +JAVA_OPTS="$JAVA_OPTS -XX:InlineSmallCode=15000 -XX:FreqInlineSize=400 -XX:MaxInlineSize=400" +#JAVA_OPTS="$JAVA_OPTS -XX:+PrintCompilation -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_plevart $* diff --git a/calculate_average_rcasteltrione.sh b/calculate_average_rcasteltrione.sh new file mode 100755 index 000000000..e68a2482b --- /dev/null +++ b/calculate_average_rcasteltrione.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="--enable-preview" +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_rcasteltrione diff --git a/calculate_average_roman-r-m.sh b/calculate_average_roman-r-m.sh index 47626a1ac..5ba132f12 100755 --- a/calculate_average_roman-r-m.sh +++ b/calculate_average_roman-r-m.sh @@ -15,5 +15,16 @@ # limitations under the License. # -JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m +if [ -f target/CalculateAverage_roman_r_m_image ]; then + echo "Running native image 'target/CalculateAverage_roman_r_m_image'." 1>&2 + target/CalculateAverage_roman_r_m_image +else + JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages" + JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -dsa -XX:+UseNUMA" + # epsilon GC needs enough memory or it makes things worse + # see https://stackoverflow.com/questions/58087596/why-are-repeated-memory-allocations-observed-to-be-slower-using-epsilon-vs-g1 + JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx1G -Xms1G -XX:+AlwaysPreTouch" + + echo "Running on JVM" 1>&2 + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m +fi diff --git a/calculate_average_serkan-ozal.sh b/calculate_average_serkan-ozal.sh new file mode 100755 index 000000000..3cfbb661d --- /dev/null +++ b/calculate_average_serkan-ozal.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector " +JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions" +JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:MaxInlineSize=10000 -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000" +JAVA_OPTS="$JAVA_OPTS -XX:-UseCountedLoopSafepoints -XX:LoopStripMiningIter=0 -XX:GuaranteedSafepointInterval=0" +JAVA_OPTS="$JAVA_OPTS -XX:+TrustFinalNonStaticFields -da -dsa -XX:+UseNUMA -XX:-EnableJVMCI" +JAVA_OPTS="$JAVA_OPTS -XX:SharedArchiveFile=target/CalculateAverage_serkan_ozal_cds.jsa" +JAVA_OPTS="$JAVA_OPTS -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0" +if [[ ! "$(uname -s)" = "Darwin" ]]; then + JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages" +fi + +#echo "Process started at $(date +%s%N | cut -b1-13)" +eval "exec 3< <({ java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_serkan_ozal; })" +read <&3 result +echo -e "$result" +#echo "Process finished at $(date +%s%N | cut -b1-13)" diff --git a/calculate_average_shipilev.sh b/calculate_average_shipilev.sh new file mode 100755 index 000000000..13a12cd68 --- /dev/null +++ b/calculate_average_shipilev.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="-XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -Xms1g -Xmx1g -XX:-AlwaysPreTouch -XX:+UseTransparentHugePages +-XX:-TieredCompilation -XX:-UseCountedLoopSafepoints -XX:+TrustFinalNonStaticFields -XX:CompileThreshold=2048 +--add-opens java.base/java.nio=ALL-UNNAMED --add-exports java.base/jdk.internal.ref=ALL-UNNAMED +-XX:+UnlockDiagnosticVMOptions -XX:CompileCommand=quiet +-XX:CompileCommand=dontinline,dev.morling.onebrc.CalculateAverage_shipilev\$ParsingTask::seqCompute +-XX:CompileCommand=dontinline,dev.morling.onebrc.CalculateAverage_shipilev\$MeasurementsMap::updateSlow +-XX:CompileCommand=inline,dev.morling.onebrc.CalculateAverage_shipilev\$Bucket::matches" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_shipilev diff --git a/calculate_average_slovdahl.sh b/calculate_average_slovdahl.sh new file mode 100755 index 000000000..3f99dc03e --- /dev/null +++ b/calculate_average_slovdahl.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="${JAVA_OPTS} --enable-preview -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions" +JAVA_OPTS="${JAVA_OPTS} -Xmx8g -Xms8g" +JAVA_OPTS="${JAVA_OPTS} -XX:+TrustFinalNonStaticFields -XX:-UseCompressedOops" + +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_slovdahl diff --git a/calculate_average_stephenvonworley.sh b/calculate_average_stephenvonworley.sh new file mode 100755 index 000000000..2fca19ffa --- /dev/null +++ b/calculate_average_stephenvonworley.sh @@ -0,0 +1,25 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [ -f target/CalculateAverage_stephenvonworley_image ]; then + target/CalculateAverage_stephenvonworley_image +else + JAVA_OPTS="--enable-preview" + echo "Chosing to run the app in JVM mode as no native image was found, use prepare_stephenvonworley.sh to generate." 1>&2 + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_stephenvonworley +fi + diff --git a/calculate_average_sudhirtumati.sh b/calculate_average_sudhirtumati.sh new file mode 100755 index 000000000..fb31f8672 --- /dev/null +++ b/calculate_average_sudhirtumati.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview -Xmx128m -XX:+UseSerialGC -XX:-TieredCompilation" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_sudhirtumati diff --git a/calculate_average_tivrfoa.sh b/calculate_average_tivrfoa.sh new file mode 100755 index 000000000..cec66fdd8 --- /dev/null +++ b/calculate_average_tivrfoa.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [ -f target/CalculateAverage_tivrfoa_image ]; then + target/CalculateAverage_tivrfoa_image +else + JAVA_OPTS="--enable-preview" + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_tivrfoa +fi + diff --git a/calculate_average_tkosachev.sh b/calculate_average_tkosachev.sh new file mode 100755 index 000000000..6b4ec6023 --- /dev/null +++ b/calculate_average_tkosachev.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_tkosachev diff --git a/calculate_average_tonivade.sh b/calculate_average_tonivade.sh new file mode 100755 index 000000000..a484a5343 --- /dev/null +++ b/calculate_average_tonivade.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="-Xmx1G -Xms1G -XX:+AlwaysPreTouch -XX:+UseParallelGC -XX:-UseCompressedOops --enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_tonivade diff --git a/calculate_average_unbounded.sh b/calculate_average_unbounded.sh new file mode 100755 index 000000000..ab874052e --- /dev/null +++ b/calculate_average_unbounded.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview --add-modules jdk.incubator.vector -XX:-TieredCompilation -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_unbounded diff --git a/calculate_average_vemana.sh b/calculate_average_vemana.sh index b3437f208..06a911a21 100755 --- a/calculate_average_vemana.sh +++ b/calculate_average_vemana.sh @@ -18,6 +18,8 @@ # Basics JAVA_OPTS="" JAVA_OPTS="$JAVA_OPTS --enable-preview" +JAVA_OPTS="$JAVA_OPTS --add-exports java.base/jdk.internal.ref=ALL-UNNAMED" +JAVA_OPTS="$JAVA_OPTS --add-opens java.base/java.nio=ALL-UNNAMED" #JAVA_OPTS="$JAVA_OPTS --add-modules jdk.incubator.vector" #JAVA_OPTS="$JAVA_OPTS -XX:+UnlockDiagnosticVMOptions" diff --git a/calculate_average_vemanaNonIdiomatic.sh b/calculate_average_vemanaNonIdiomatic.sh new file mode 100755 index 000000000..99974ee03 --- /dev/null +++ b/calculate_average_vemanaNonIdiomatic.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Basics +JAVA_OPTS="" +JAVA_OPTS="$JAVA_OPTS --enable-preview" +JAVA_OPTS="$JAVA_OPTS --add-exports java.base/jdk.internal.ref=ALL-UNNAMED" +JAVA_OPTS="$JAVA_OPTS --add-opens java.base/java.nio=ALL-UNNAMED" + +# JIT parameters +JAVA_OPTS="$JAVA_OPTS -XX:+AlwaysCompileLoopMethods" + +# GC parameters +JAVA_OPTS="$JAVA_OPTS -XX:+UseParallelGC" + + +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_vemanaNonIdiomatic "$@" diff --git a/calculate_average_xpmatteo.sh b/calculate_average_xpmatteo.sh new file mode 100755 index 000000000..d1cd87039 --- /dev/null +++ b/calculate_average_xpmatteo.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_xpmatteo + diff --git a/calculate_average_yavuztas.sh b/calculate_average_yavuztas.sh index bfa7b1090..bbcd403e0 100755 --- a/calculate_average_yavuztas.sh +++ b/calculate_average_yavuztas.sh @@ -15,5 +15,11 @@ # limitations under the License. # -JAVA_OPTS="-Xms128m -Xmx128m -XX:MaxGCPauseMillis=1 -XX:-AlwaysPreTouch -XX:+UseSerialGC --enable-preview" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_yavuztas +if [ -f target/CalculateAverage_yavuztas_image ]; then + echo "Picking up existing native image 'target/CalculateAverage_yavuztas_image', delete the file to select JVM mode." 1>&2 + target/CalculateAverage_yavuztas_image +else + JAVA_OPTS="-XX:MaxGCPauseMillis=1 -XX:-AlwaysPreTouch -XX:+UseSerialGC -XX:+TieredCompilation --enable-preview" + echo "Choosing to run the app in JVM mode as no native image was found, use prepare_yavuztas.sh to generate." 1>&2 + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_yavuztas +fi diff --git a/calculate_average_yonatang.sh b/calculate_average_yonatang.sh new file mode 100755 index 000000000..6bc44bda3 --- /dev/null +++ b/calculate_average_yonatang.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# GC is overrated +JAVA_OPTS="-XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -XX:+AlwaysPreTouch -Xms512m -Xmx512m" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_yonatang diff --git a/calculate_average_yourwass.sh b/calculate_average_yourwass.sh new file mode 100755 index 000000000..50e31fb0b --- /dev/null +++ b/calculate_average_yourwass.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 + +JAVA_OPTS="-Xlog:all=off -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0 --enable-preview --enable-native-access=ALL-UNNAMED --add-modules jdk.incubator.vector" + +eval "exec 3< <({ java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_yourwass; })" +read <&3 result +echo -e "$result" diff --git a/calculate_average_zerninv.sh b/calculate_average_zerninv.sh index 2b76c7d7d..6dbda3022 100755 --- a/calculate_average_zerninv.sh +++ b/calculate_average_zerninv.sh @@ -15,5 +15,11 @@ # limitations under the License. # -JAVA_OPTS="--enable-preview" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_zerninv \ No newline at end of file +if [ -f target/CalculateAverage_zerninv_image ]; then + echo "Picking up existing native image 'target/CalculateAverage_zerninv_image', delete the file to select JVM mode." 1>&2 + target/CalculateAverage_zerninv_image +else + JAVA_OPTS="--enable-preview -Xmx512m -XX:+UseSerialGC -XX:-TieredCompilation" + echo "Chosing to run the app in JVM mode as no native image was found, use prepare_zerninv.sh to generate." 1>&2 + java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_zerninv +fi \ No newline at end of file diff --git a/evaluate.sh b/evaluate.sh index f92a4562f..5f89b6cf8 100755 --- a/evaluate.sh +++ b/evaluate.sh @@ -19,7 +19,7 @@ set -eo pipefail if [ -z "$1" ] then - echo "Usage: evaluate2.sh ( ...)" + echo "Usage: evaluate.sh ( ...)" echo " for each fork, there must be a 'calculate_average_.sh' script and an optional 'prepare_.sh'." exit 1 fi @@ -34,8 +34,9 @@ BOLD_YELLOW='\033[1;33m' RESET='\033[0m' # No Color MEASUREMENTS_FILE="measurements_1B.txt" -RUNS=5 +RUNS=10 DEFAULT_JAVA_VERSION="21.0.1-open" +: "${BUILD_JAVA_VERSION:=21.0.1-open}" RUN_TIME_LIMIT=300 # seconds TIMEOUT="" @@ -115,6 +116,7 @@ if [ -f "/sys/devices/system/cpu/cpufreq/boost" ]; then fi fi +print_and_execute sdk use java $BUILD_JAVA_VERSION print_and_execute java --version print_and_execute ./mvnw --quiet clean verify @@ -269,6 +271,12 @@ for fork in "$@"; do fi fi + # check if Java source file uses Unsafe + if grep -F "theUnsafe" -q ./src/main/java*/dev/morling/onebrc/CalculateAverage_$fork.java ; then + # if notes is not empty, append a comma and space before the unsafe note + notes="${notes:+$notes, }uses Unsafe" + fi + echo -n "$trimmed_mean;" >> $leaderboard_temp_file # for sorting echo -n "| # " >> $leaderboard_temp_file echo -n "| $trimmed_mean_formatted " >> $leaderboard_temp_file diff --git a/evaluate_10K.sh b/evaluate_10K.sh new file mode 100755 index 000000000..6847d279b --- /dev/null +++ b/evaluate_10K.sh @@ -0,0 +1,324 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -eo pipefail + +if [ -z "$1" ] + then + echo "Usage: evaluate.sh ( ...)" + echo " for each fork, there must be a 'calculate_average_.sh' script and an optional 'prepare_.sh'." + exit 1 +fi + +BOLD_WHITE='\033[1;37m' +CYAN='\033[0;36m' +GREEN='\033[0;32m' +PURPLE='\033[0;35m' +BOLD_RED='\033[1;31m' +RED='\033[0;31m' +BOLD_YELLOW='\033[1;33m' +RESET='\033[0m' # No Color + +MEASUREMENTS_FILE="measurements_10K_1B.txt" +RUNS=5 +DEFAULT_JAVA_VERSION="21.0.1-open" +: "${BUILD_JAVA_VERSION:=21.0.1-open}" +RUN_TIME_LIMIT=300 # seconds + +TIMEOUT="" +if [ "$(uname -s)" == "Linux" ]; then + TIMEOUT="timeout -v $RUN_TIME_LIMIT" +else # MacOs + if [ -x "$(command -v gtimeout)" ]; then + TIMEOUT="gtimeout -v $RUN_TIME_LIMIT" # from `brew install coreutils` + else + echo -e "${BOLD_YELLOW}WARNING${RESET} gtimeout not available, benchmark runs may take indefinitely long." + fi +fi + +function check_command_installed { + if ! [ -x "$(command -v $1)" ]; then + echo "Error: $1 is not installed." >&2 + exit 1 + fi +} + +function print_and_execute() { + echo "+ $@" >&2 + "$@" +} + +check_command_installed java +check_command_installed hyperfine +check_command_installed jq +check_command_installed bc + +# Validate that ./calculate_average_.sh exists for each fork +for fork in "$@"; do + if [ ! -f "./calculate_average_$fork.sh" ]; then + echo -e "${BOLD_RED}ERROR${RESET}: ./calculate_average_$fork.sh does not exist." >&2 + exit 1 + fi +done + +## SDKMAN Setup +# 1. Custom check for sdkman installed; not sure why check_command_installed doesn't detect it properly +if [ ! -f "$HOME/.sdkman/bin/sdkman-init.sh" ]; then + echo -e "${BOLD_RED}ERROR${RESET}: sdkman is not installed." >&2 + exit 1 +fi + +# 2. Init sdkman in this script +source "$HOME/.sdkman/bin/sdkman-init.sh" + +# 3. make sure the default java version is installed +if [ ! -d "$HOME/.sdkman/candidates/java/$DEFAULT_JAVA_VERSION" ]; then + print_and_execute sdk install java $DEFAULT_JAVA_VERSION +fi + +# 4. Install missing SDK java versions in any of the prepare_*.sh scripts for the provided forks +for fork in "$@"; do + if [ -f "./prepare_$fork.sh" ]; then + grep -h "^sdk use" "./prepare_$fork.sh" | cut -d' ' -f4 | while read -r version; do + if [ ! -d "$HOME/.sdkman/candidates/java/$version" ]; then + print_and_execute sdk install java $version + fi + done || true # grep returns exit code 1 when no match, `|| true` prevents the script from exiting early + fi +done +## END - SDKMAN Setup + +# Check if SMT is enabled (we want it disabled) +if [ -f "/sys/devices/system/cpu/smt/active" ]; then + if [ "$(cat /sys/devices/system/cpu/smt/active)" != "0" ]; then + echo -e "${BOLD_YELLOW}WARNING${RESET} SMT is enabled" + fi +fi + +# Check if Turbo Boost is enabled (we want it disabled) +if [ -f "/sys/devices/system/cpu/cpufreq/boost" ]; then + if [ "$(cat /sys/devices/system/cpu/cpufreq/boost)" != "0" ]; then + echo -e "${BOLD_YELLOW}WARNING${RESET} Turbo Boost is enabled" + fi +fi + +print_and_execute sdk use java $BUILD_JAVA_VERSION +print_and_execute java --version +# print_and_execute ./mvnw --quiet clean verify + +print_and_execute rm -f measurements.txt +print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt + +echo "" + +# check if measurements_xxx.out exists +if [ ! -f "${MEASUREMENTS_FILE%.txt}.out" ]; then + echo -e "${BOLD_RED}ERROR${RESET}: ${MEASUREMENTS_FILE%.txt}.out does not exist." >&2 + echo "Please create it with:" + echo "" + echo " ./calculate_average_baseline.sh > ${MEASUREMENTS_FILE%.txt}.out" + echo "" + exit 1 +fi + +# Run tests and benchmark for each fork +filetimestamp=$(date +"%Y%m%d%H%M%S") # same for all fork.out files from this run +failed=() +for fork in "$@"; do + set +e # we don't want prepare.sh, test.sh or hyperfine failing on 1 fork to exit the script early + + # Run prepare script + if [ -f "./prepare_$fork.sh" ]; then + print_and_execute source "./prepare_$fork.sh" + else + print_and_execute sdk use java $DEFAULT_JAVA_VERSION + fi + + # Run the test suite + print_and_execute $TIMEOUT ./test.sh $fork + if [ $? -ne 0 ]; then + failed+=("$fork") + echo "" + echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork failed" + + continue + fi + echo "" + + # Run the test on $MEASUREMENTS_FILE; this serves as the warmup + print_and_execute $TIMEOUT ./test.sh $fork $MEASUREMENTS_FILE + if [ $? -ne 0 ]; then + failed+=("$fork") + echo "" + echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork $MEASUREMENTS_FILE failed" + + continue + fi + echo "" + + # re-link measurements.txt since test.sh deleted it + print_and_execute rm -f measurements.txt + print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt + + # Use hyperfine to run the benchmark for each fork + HYPERFINE_OPTS="--warmup 0 --runs $RUNS --export-json $fork-$filetimestamp-timing.json --output ./$fork-$filetimestamp.out" + + # check if this script is running on a Linux box + if [ "$(uname -s)" == "Linux" ]; then + check_command_installed numactl + + # Linux platform + # prepend this with numactl --physcpubind=0-7 for running it only with 8 cores + numactl --physcpubind=0-7 hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1" + else # MacOS + hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1" + fi + # Catch hyperfine command failed + if [ $? -ne 0 ]; then + failed+=("$fork") + # Hyperfine already prints the error message + echo "" + continue + fi +done +set -e + +# Summary +echo -e "${BOLD_WHITE}Summary${RESET}" +for fork in "$@"; do + # skip reporting results for failed forks + if [[ " ${failed[@]} " =~ " ${fork} " ]]; then + echo -e " ${RED}$fork${RESET}: command failed or output did not match" + continue + fi + + # Trimmed mean = The slowest and the fastest runs are discarded, the + # mean value of the remaining three runs is the result for that contender + trimmed_mean=$(jq -r '.results[0].times | sort_by(.|tonumber) | .[1:-1] | add / length' $fork-$filetimestamp-timing.json) + raw_times=$(jq -r '.results[0].times | join(",")' $fork-$filetimestamp-timing.json) + + if [ "$fork" == "$1" ]; then + color=$CYAN + elif [ "$fork" == "$2" ]; then + color=$GREEN + else + color=$PURPLE + fi + + echo -e " ${color}$fork${RESET}: trimmed mean ${BOLD_WHITE}$trimmed_mean${RESET}, raw times ${BOLD_WHITE}$raw_times${RESET}" +done +echo "" + +## Leaderboard - prints the leaderboard in Markdown table format +echo -e "${BOLD_WHITE}Leaderboard${RESET}" + +# 1. Create a temp file to store the leaderboard entries +leaderboard_temp_file=$(mktemp) + +# 2. Process each fork and append the 1-line entry to the temp file +for fork in "$@"; do + # skip reporting results for failed forks + if [[ " ${failed[@]} " =~ " ${fork} " ]]; then + continue + fi + + trimmed_mean=$(jq -r '.results[0].times | sort_by(.|tonumber) | .[1:-1] | add / length' $fork-$filetimestamp-timing.json) + + # trimmed_mean is in seconds + # Format trimmed_mean as MM::SS.mmm + # using bc + trimmed_mean_minutes=$(echo "$trimmed_mean / 60" | bc) + trimmed_mean_seconds=$(echo "$trimmed_mean % 60 / 1" | bc) + trimmed_mean_ms=$(echo "($trimmed_mean - $trimmed_mean_minutes * 60 - $trimmed_mean_seconds) * 1000 / 1" | bc) + trimmed_mean_formatted=$(printf "%02d:%02d.%03d" $trimmed_mean_minutes $trimmed_mean_seconds $trimmed_mean_ms) + + # Get Github user's name from public Github API (rate limited after ~50 calls, so results are cached in github_users.txt) + set +e + github_user__name=$(grep "^$fork;" github_users.txt | cut -d ';' -f2) + if [ -z "$github_user__name" ]; then + github_user__name=$(curl -s https://api.github.com/users/$fork | jq -r '.name' | tr -d '"') + if [ "$github_user__name" != "null" ]; then + echo "$fork;$github_user__name" >> github_users.txt + else + github_user__name=$fork + fi + fi + set -e + + # Read java version from prepare_$fork.sh if it exists, otherwise assume 21.0.1-open + java_version="21.0.1-open" + # Hard-coding the note message for now + notes="" + if [ -f "./prepare_$fork.sh" ]; then + java_version=$(grep -F "sdk use java" ./prepare_$fork.sh | cut -d' ' -f4) + + if grep -F "native-image" -q ./prepare_$fork.sh ; then + notes="GraalVM native binary" + fi + fi + + # check if Java source file uses Unsafe + if grep -F "theUnsafe" -q ./src/main/java*/dev/morling/onebrc/CalculateAverage_$fork.java ; then + # if notes is not empty, append a comma and space before the unsafe note + notes="${notes:+$notes, }uses Unsafe" + fi + + echo -n "$trimmed_mean;" >> $leaderboard_temp_file # for sorting + echo -n "| # " >> $leaderboard_temp_file + echo -n "| $trimmed_mean_formatted " >> $leaderboard_temp_file + echo -n "| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_$fork.java)" >> $leaderboard_temp_file + echo -n "| $java_version " >> $leaderboard_temp_file + echo -n "| [$github_user__name](https://github.com/$fork) " >> $leaderboard_temp_file + echo -n "| $notes " >> $leaderboard_temp_file + echo "|" >> $leaderboard_temp_file +done + +# 3. Sort leaderboard_temp_file by trimmed_mean and remove the sorting column +sort -n $leaderboard_temp_file | cut -d ';' -f 2 > $leaderboard_temp_file.sorted + +# 4. Print the leaderboard +echo "" +echo "| # | Result (m:s.ms) | Implementation | JDK | Submitter | Notes |" +echo "|---|-----------------|--------------------|-----|---------------|-----------|" +# If $leaderboard_temp_file.sorted has more than 3 entires, include rankings +if [ $(wc -l < $leaderboard_temp_file.sorted) -gt 3 ]; then + head -n 1 $leaderboard_temp_file.sorted | tr '#' 1 + head -n 2 $leaderboard_temp_file.sorted | tail -n 1 | tr '#' 2 + head -n 3 $leaderboard_temp_file.sorted | tail -n 1 | tr '#' 3 + tail -n+4 $leaderboard_temp_file.sorted | tr '#' ' ' +else + # Don't show rankings + cat $leaderboard_temp_file.sorted | tr '#' ' ' +fi +echo "" + +# 5. Cleanup +rm $leaderboard_temp_file +## END - Leaderboard + +# Finalize .out files +echo "Raw results saved to file(s):" +for fork in "$@"; do + if [ -f "$fork-$filetimestamp-timing.json" ]; then + cat $fork-$filetimestamp-timing.json >> $fork-$filetimestamp.out + rm $fork-$filetimestamp-timing.json + fi + + if [ -f "$fork-$filetimestamp.out" ]; then + echo " $fork-$filetimestamp.out" + fi +done diff --git a/github_users.txt b/github_users.txt index ef5ef51b7..eb3ac2ca1 100644 --- a/github_users.txt +++ b/github_users.txt @@ -1,3 +1,4 @@ +giovannicuccu;Giovanni Cuccu Ujjwalbharti;Ujjwal Bharti abfrmblr;Abhilash ags313;ags @@ -50,3 +51,7 @@ yehwankim23;김예환 Ye-Hwan Kim (Sam) hundredwatt;Jason Nochlin gnmathur;Gaurav Mathur vemana;Subrahmanyam +jincongho;Jin Cong Ho +yonatang;Yonatan Graber +adriacabeza;Adrià Cabeza +AlexanderYastrebov;Alexander Yastrebov diff --git a/pom.xml b/pom.xml index 72d7ad8ab..3fc164725 100644 --- a/pom.xml +++ b/pom.xml @@ -83,7 +83,11 @@ formatter-maven-plugin 2.16.0 - etc/eclipse-formatter-config.xml + etc/eclipse-formatter-config.xml + + ${project.build.sourceDirectory} + ${project.basedir}/src/main/java-22 + @@ -211,6 +215,7 @@ github_users.txt src/main/java/dev/morling/onebrc/CalculateAverage_cliffclick.java + .sdkmanrc @@ -286,5 +291,33 @@ true + + jdk22 + + 22 + + + + + + + + maven-compiler-plugin + + 22 + + ${project.basedir}/src/main/java-22 + + + + + + + + + diff --git a/prepare_3j5a.sh b/prepare_3j5a.sh new file mode 100755 index 000000000..06b81c4dd --- /dev/null +++ b/prepare_3j5a.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_AlexanderYastrebov.sh b/prepare_AlexanderYastrebov.sh new file mode 100755 index 000000000..3521ecb03 --- /dev/null +++ b/prepare_AlexanderYastrebov.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +DOCKER_BUILDKIT=1 docker build -o target/AlexanderYastrebov src/main/go/AlexanderYastrebov diff --git a/prepare_EduardoSaverin.sh b/prepare_EduardoSaverin.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_EduardoSaverin.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_JaimePolidura.sh b/prepare_JaimePolidura.sh new file mode 100755 index 000000000..8c4e0e040 --- /dev/null +++ b/prepare_JaimePolidura.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 + +if [ ! -f target/CalculateAverage_JaimePolidura_image ]; then + OPTS="--gc=epsilon -O3 --enable-preview --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_JaimePolidura" + native-image $OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_JaimePolidura_image dev.morling.onebrc.CalculateAverage_JaimePolidura +fi diff --git a/prepare_JamalMulla.sh b/prepare_JamalMulla.sh index ec0f35f1c..d950d43ce 100755 --- a/prepare_JamalMulla.sh +++ b/prepare_JamalMulla.sh @@ -16,4 +16,10 @@ # source "$HOME/.sdkman/bin/sdkman-init.sh" -sdk use java 21.0.1-graal 1>&2 \ No newline at end of file +sdk use java 21.0.2-graal 1>&2 + +# ./mvnw clean verify removes target/ and will re-trigger native image creation. +if [ ! -f target/CalculateAverage_JamalMulla_image ]; then + NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native --enable-preview --strict-image-heap --link-at-build-time -R:MaxHeapSize=64m -da -dsa --no-fallback --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_JamalMulla" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_JamalMulla_image dev.morling.onebrc.CalculateAverage_JamalMulla +fi \ No newline at end of file diff --git a/prepare_Judekeyser.sh b/prepare_Judekeyser.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_Judekeyser.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_PanagiotisDrakatos.sh b/prepare_PanagiotisDrakatos.sh new file mode 100755 index 000000000..35fadfcb5 --- /dev/null +++ b/prepare_PanagiotisDrakatos.sh @@ -0,0 +1,23 @@ +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 + +if [ ! -f target/CalculateAverage_PanagiotisDrakatos_image ]; then + NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -R:MaxHeapSize=10536m --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_PanagiotisDrakatos" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_PanagiotisDrakatos_image dev.morling.onebrc.CalculateAverage_PanagiotisDrakatos +fi \ No newline at end of file diff --git a/prepare_PawelAdamski.sh b/prepare_PawelAdamski.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_PawelAdamski.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_Smoofie.sh b/prepare_Smoofie.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_Smoofie.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_YannMoisan.sh b/prepare_YannMoisan.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_YannMoisan.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_abeobk.sh b/prepare_abeobk.sh new file mode 100755 index 000000000..380e2093c --- /dev/null +++ b/prepare_abeobk.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 + +# ./mvnw clean verify removes target/ and will re-trigger native image creation. +if [ ! -f target/CalculateAverage_abeobk_image ]; then + NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native -H:InlineAllBonus=10 -H:-GenLoopSafepoints --enable-preview --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_abeobk" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_abeobk_image dev.morling.onebrc.CalculateAverage_abeobk +fi diff --git a/prepare_adriacabeza.sh b/prepare_adriacabeza.sh new file mode 100755 index 000000000..f83a3ff69 --- /dev/null +++ b/prepare_adriacabeza.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_agoncal.sh b/prepare_agoncal.sh new file mode 100755 index 000000000..d2a3c6ba1 --- /dev/null +++ b/prepare_agoncal.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-tem 1>&2 diff --git a/prepare_anitasv.sh b/prepare_anitasv.sh new file mode 100755 index 000000000..f83a3ff69 --- /dev/null +++ b/prepare_anitasv.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_armandino.sh b/prepare_armandino.sh new file mode 100755 index 000000000..19a71f9ea --- /dev/null +++ b/prepare_armandino.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 + +# ./mvnw clean verify removes target/ and will re-trigger native image creation. +if [ ! -f target/CalculateAverage_armandino_image ]; then + NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native --enable-preview -H:InlineAllBonus=10 -H:-ParseRuntimeOptions --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_armandino\$Scanner" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_armandino_image dev.morling.onebrc.CalculateAverage_armandino +fi diff --git a/prepare_artsiomkorzun.sh b/prepare_artsiomkorzun.sh index f83a3ff69..7cbcdfc8a 100755 --- a/prepare_artsiomkorzun.sh +++ b/prepare_artsiomkorzun.sh @@ -16,4 +16,9 @@ # source "$HOME/.sdkman/bin/sdkman-init.sh" -sdk use java 21.0.1-graal 1>&2 +sdk use java 21.0.2-graal 1>&2 + +if [ ! -f target/CalculateAverage_artsiomkorzun_image ]; then + NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native -H:TuneInlinerExploration=1 -R:MaxHeapSize=64m -H:-GenLoopSafepoints --enable-preview --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_artsiomkorzun" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_artsiomkorzun_image dev.morling.onebrc.CalculateAverage_artsiomkorzun +fi \ No newline at end of file diff --git a/prepare_breejesh.sh b/prepare_breejesh.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_breejesh.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_cb0s.sh b/prepare_cb0s.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_cb0s.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_chrisbellew.sh b/prepare_chrisbellew.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_chrisbellew.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_dpsoft.sh b/prepare_dpsoft.sh new file mode 100755 index 000000000..5e6393e94 --- /dev/null +++ b/prepare_dpsoft.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 \ No newline at end of file diff --git a/prepare_dqhieuu.sh b/prepare_dqhieuu.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_dqhieuu.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_ebarlas.sh b/prepare_ebarlas.sh index f83a3ff69..64b2bea81 100755 --- a/prepare_ebarlas.sh +++ b/prepare_ebarlas.sh @@ -16,4 +16,9 @@ # source "$HOME/.sdkman/bin/sdkman-init.sh" -sdk use java 21.0.1-graal 1>&2 +sdk use java 21.0.2-graal 1>&2 + +if [ ! -f target/CalculateAverage_ebarlas_image ]; then + NATIVE_IMAGE_OPTS="-H:+UnlockExperimentalVMOptions --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_ebarlas --gc=epsilon -O3 -march=native -R:MaxHeapSize=128m -H:-GenLoopSafepoints --enable-preview" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_ebarlas_image dev.morling.onebrc.CalculateAverage_ebarlas +fi diff --git a/prepare_eriklumme.sh b/prepare_eriklumme.sh new file mode 100755 index 000000000..f83a3ff69 --- /dev/null +++ b/prepare_eriklumme.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_gabrielfoo.sh b/prepare_gabrielfoo.sh new file mode 100755 index 000000000..e19dea509 --- /dev/null +++ b/prepare_gabrielfoo.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal > /dev/null 2>&1 \ No newline at end of file diff --git a/prepare_godofwharf.sh b/prepare_godofwharf.sh new file mode 100755 index 000000000..907c86d8d --- /dev/null +++ b/prepare_godofwharf.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-tem 1>&2 \ No newline at end of file diff --git a/prepare_iziamos.sh b/prepare_iziamos.sh new file mode 100755 index 000000000..621937ca2 --- /dev/null +++ b/prepare_iziamos.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 + +if [ ! -f target/CalculateAverage_iziamos_image ]; then + NATIVE_IMAGE_OPTS="-H:+UnlockExperimentalVMOptions --gc=epsilon -O3 -march=native -R:MaxHeapSize=64m -H:-GenLoopSafepoints --enable-preview -H:InlineAllBonus=10 -H:-ParseRuntimeOptions --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_iziamos" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_iziamos_image dev.morling.onebrc.CalculateAverage_iziamos +fi diff --git a/prepare_jerrinot.sh b/prepare_jerrinot.sh new file mode 100755 index 000000000..58aac6bbd --- /dev/null +++ b/prepare_jerrinot.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 + +if [ ! -f target/CalculateAverage_jerrinot_image ]; then + NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native --enable-preview -H:-GenLoopSafepoints -H:InlineAllBonus=10 --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_jerrinot" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_jerrinot_image dev.morling.onebrc.CalculateAverage_jerrinot +fi diff --git a/prepare_jonathan-aotearoa.sh b/prepare_jonathan-aotearoa.sh new file mode 100755 index 000000000..bcf76acfa --- /dev/null +++ b/prepare_jonathan-aotearoa.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 + +if [ ! -f target/CalculateAverage_jonathan-aotearoa_image ]; then + # Enable preview features and disable system assertions. + JAVA_OPTS="--enable-preview -dsa" + # Use the no-op GC. + # Enable CPU features (-march=native) and level-3 optimisations (-O3) + NATIVE_IMAGE_OPTS="--initialize-at-build-time=dev.morling.onebrc.CalculateAverage_jonathanaotearoa --gc=epsilon -O3 -march=native --strict-image-heap $JAVA_OPTS" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_jonathan-aotearoa_image dev.morling.onebrc.CalculateAverage_jonathanaotearoa +fi \ No newline at end of file diff --git a/prepare_justplainlaake.sh b/prepare_justplainlaake.sh new file mode 100755 index 000000000..bc7c6dce9 --- /dev/null +++ b/prepare_justplainlaake.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 + +if [ ! -f target/CalculateAverage_justplainlaake_image ]; then + #disable assertions + #optimize code for best performance + #native march gives best performance for machine image is built on + #strict image heap allows all classes ot be used at build time + #native image info prints the trace of the build + #enable preview allows for preview features of current release + #epsilon garbage collector is a gc that doesn't gc... haha + native-image -dsa -O3 -march=native --strict-image-heap --native-image-info --enable-preview --gc=epsilon -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_justplainlaake_image dev.morling.onebrc.CalculateAverage_justplainlaake +fi diff --git a/prepare_linl33.sh b/prepare_linl33.sh new file mode 100755 index 000000000..f943c90ef --- /dev/null +++ b/prepare_linl33.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 22.ea.32-open 1>&2 + +CLASS_NAME="CalculateAverage_linl33" + +JAVA_OPTS="-Xrs --enable-preview --add-modules jdk.incubator.vector --enable-native-access=ALL-UNNAMED" +JAVA_OPTS="${JAVA_OPTS} -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions" +JAVA_OPTS="${JAVA_OPTS} -Xms128m -XX:+AlwaysPreTouch -XX:+AlwaysPreTouchStacks -XX:-UseTransparentHugePages" +JAVA_OPTS="${JAVA_OPTS} -XX:-UseCompressedClassPointers -XX:+ForceUnreachable -XX:-CompactStrings" +JAVA_OPTS="${JAVA_OPTS} -XX:CodeEntryAlignment=64 -XX:OptoLoopAlignment=64 -XX:MaxLoopPad=16 -XX:ObjectAlignmentInBytes=64" +JAVA_OPTS="${JAVA_OPTS} -XX:-UseLoopPredicate -XX:LoopStripMiningIter=0 -XX:LoopStripMiningIterShortLoop=0" +JAVA_OPTS="${JAVA_OPTS} -XX:-UseCountedLoopSafepoints -XX:GuaranteedSafepointInterval=0 -XX:AllocatePrefetchStyle=0" +JAVA_OPTS="${JAVA_OPTS} -XX:+TrustFinalNonStaticFields -XX:LockingMode=2 -XX:+UseSystemMemoryBarrier" +JAVA_OPTS="${JAVA_OPTS} -XX:-UseDynamicNumberOfCompilerThreads -XX:-UseDynamicNumberOfGCThreads" +JAVA_OPTS="${JAVA_OPTS} -XX:ArchiveRelocationMode=0 -XX:-UsePerfData -XX:-UseNotificationThread -XX:-CheckIntrinsics" +#JAVA_OPTS="${JAVA_OPTS} -XX:+UseZGC -XX:-ZProactive -XX:+ZCollectionIntervalOnly -XX:ZCollectionInterval=0 -XX:-ZUncommit -XX:-ZBufferStoreBarriers -XX:ZIndexDistributorStrategy=1" +JAVA_OPTS="${JAVA_OPTS} -XX:+UseEpsilonGC -XX:-UseCompressedOops" +#JAVA_OPTS="${JAVA_OPTS} -XX:+UseParallelGC -XX:-UseCompressedOops" +#JAVA_OPTS="${JAVA_OPTS} -XX:+UseG1GC -XX:-UseCompressedOops" +JAVA_OPTS="${JAVA_OPTS} -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0 -Djava.lang.invoke.VarHandle.VAR_HANDLE_GUARDS=false -Djava.lang.invoke.MethodHandle.DONT_INLINE_THRESHOLD=-1" +JAVA_OPTS="${JAVA_OPTS} -Dfile.encoding=UTF-8 -Dsun.stdout.encoding=UTF-8 -Dsun.stderr.encoding=UTF-8" +JAVA_OPTS="${JAVA_OPTS} -Ddev.morling.onebrc.CalculateAverage_linl33.measurementsPath=src/test/resources/samples/measurements-10000-unique-keys.txt" + +# create CDS archive +java ${JAVA_OPTS} -Xshare:off -XX:DumpLoadedClassList=target/${CLASS_NAME}.classlist --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.${CLASS_NAME} +java ${JAVA_OPTS} -Xshare:dump -XX:SharedClassListFile=target/${CLASS_NAME}.classlist -XX:SharedArchiveFile=target/${CLASS_NAME}.jsa --class-path target/average-1.0.0-SNAPSHOT.jar +java ${JAVA_OPTS} -Xshare:on -XX:SharedArchiveFile=target/${CLASS_NAME}.jsa -XX:ArchiveClassesAtExit=target/${CLASS_NAME}_dynamic.jsa --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.${CLASS_NAME} diff --git a/prepare_mahadev-k.sh b/prepare_mahadev-k.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_mahadev-k.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_manishgarg90.sh b/prepare_manishgarg90.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_manishgarg90.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_martin2038.sh b/prepare_martin2038.sh new file mode 100755 index 000000000..cf8e83f77 --- /dev/null +++ b/prepare_martin2038.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 +## +#if [ ! -f target/CalculateAverage_martin2038 ]; then +# MAIN=dev.morling.onebrc.CalculateAverage_martin2038 +# NATIVE_IMAGE_OPTS="-H:+UnlockExperimentalVMOptions --initialize-at-build-time=$MAIN --gc=epsilon -O3 -march=native -R:MaxHeapSize=515m -H:-GenLoopSafepoints -H:InlineAllBonus=10 -H:-ParseRuntimeOptions" +# native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_martin2038_image $MAIN +#fi \ No newline at end of file diff --git a/prepare_melgenek.sh b/prepare_melgenek.sh new file mode 100755 index 000000000..09c53f634 --- /dev/null +++ b/prepare_melgenek.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-open 1>&2 diff --git a/prepare_mtopolnik.sh b/prepare_mtopolnik.sh index f83a3ff69..d84f20dd8 100755 --- a/prepare_mtopolnik.sh +++ b/prepare_mtopolnik.sh @@ -16,4 +16,9 @@ # source "$HOME/.sdkman/bin/sdkman-init.sh" -sdk use java 21.0.1-graal 1>&2 +sdk use java 21.0.2-graal 1>&2 + +if [ ! -f target/CalculateAverage_mtopolnik_image ]; then + NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -H:+UnlockExperimentalVMOptions -H:-GenLoopSafepoints -march=native --enable-preview -H:InlineAllBonus=10 -H:-ParseRuntimeOptions --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_mtopolnik" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_mtopolnik_image dev.morling.onebrc.CalculateAverage_mtopolnik +fi diff --git a/prepare_phd3.sh b/prepare_phd3.sh new file mode 100755 index 000000000..f83a3ff69 --- /dev/null +++ b/prepare_phd3.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_plevart.sh b/prepare_plevart.sh new file mode 100755 index 000000000..5259fbe65 --- /dev/null +++ b/prepare_plevart.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-tem 1>&2 diff --git a/prepare_rcasteltrione.sh b/prepare_rcasteltrione.sh new file mode 100755 index 000000000..f83a3ff69 --- /dev/null +++ b/prepare_rcasteltrione.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_roman-r-m.sh b/prepare_roman-r-m.sh index f83a3ff69..dcd5500df 100755 --- a/prepare_roman-r-m.sh +++ b/prepare_roman-r-m.sh @@ -17,3 +17,14 @@ source "$HOME/.sdkman/bin/sdkman-init.sh" sdk use java 21.0.1-graal 1>&2 + +# ./mvnw clean verify removes target/ and will re-trigger native image creation. +if [ ! -f target/CalculateAverage_roman_r_m_image ]; then + + JAVA_OPTS="--enable-preview -dsa" + NATIVE_IMAGE_OPTS="--initialize-at-build-time=dev.morling.onebrc.CalculateAverage_roman_r_m --gc=epsilon -Ob -O3 -march=native --strict-image-heap $JAVA_OPTS" + NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS -R:MaxHeapSize=128m" + NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS -H:+UnlockExperimentalVMOptions -H:-GenLoopSafepoints -H:InlineAllBonus=10 -H:-ParseRuntimeOptions" + + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_roman_r_m_image dev.morling.onebrc.CalculateAverage_roman_r_m +fi \ No newline at end of file diff --git a/prepare_royvanrijn.sh b/prepare_royvanrijn.sh index 2088b7b30..81672e8f9 100755 --- a/prepare_royvanrijn.sh +++ b/prepare_royvanrijn.sh @@ -16,13 +16,13 @@ # source "$HOME/.sdkman/bin/sdkman-init.sh" -sdk use java 21.0.1-graal 1>&2 +sdk use java 21.0.2-graal 1>&2 # ./mvnw clean verify removes target/ and will re-trigger native image creation. if [ ! -f target/CalculateAverage_royvanrijn_image ]; then - JAVA_OPTS="--enable-preview -dsa" - NATIVE_IMAGE_OPTS="--gc=epsilon -Ob -O3 -march=native --strict-image-heap $JAVA_OPTS" + JAVA_OPTS="--enable-preview" + NATIVE_IMAGE_OPTS="-H:+UnlockExperimentalVMOptions --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_royvanrijn --gc=epsilon -O3 -march=native -R:MaxHeapSize=515m -H:-GenLoopSafepoints -H:InlineAllBonus=10 -H:-ParseRuntimeOptions $JAVA_OPTS" native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_royvanrijn_image dev.morling.onebrc.CalculateAverage_royvanrijn fi diff --git a/prepare_serkan-ozal.sh b/prepare_serkan-ozal.sh new file mode 100755 index 000000000..75df48a5b --- /dev/null +++ b/prepare_serkan-ozal.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-open 1>&2 + +JAVA_OPTS="--enable-preview --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector " +JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions" +JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:MaxInlineSize=10000 -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000" +JAVA_OPTS="$JAVA_OPTS -XX:-UseCountedLoopSafepoints -XX:GuaranteedSafepointInterval=0" +JAVA_OPTS="$JAVA_OPTS -XX:+TrustFinalNonStaticFields -da -dsa -XX:+UseNUMA -XX:-EnableJVMCI" +JAVA_OPTS="$JAVA_OPTS -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0" +JAVA_OPTS="${JAVA_OPTS} -Dfile.path=src/test/resources/samples/measurements-10000-unique-keys.txt" +if [[ ! "$(uname -s)" = "Darwin" ]]; then + JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages" +fi + +# Set configs +export USE_SHARED_ARENA=true +export USE_SHARED_REGION=true +export CLOSE_STDOUT_ON_RESULT=true + +CLASS_NAME="CalculateAverage_serkan_ozal" + +# Create CDS archive +java ${JAVA_OPTS} -Xshare:off -XX:DumpLoadedClassList=target/${CLASS_NAME}.classlist --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.${CLASS_NAME} +java ${JAVA_OPTS} -Xshare:dump -XX:SharedClassListFile=target/${CLASS_NAME}.classlist -XX:SharedArchiveFile=target/${CLASS_NAME}.jsa --class-path target/average-1.0.0-SNAPSHOT.jar +java ${JAVA_OPTS} -Xshare:on -XX:SharedArchiveFile=target/${CLASS_NAME}.jsa -XX:ArchiveClassesAtExit=target/${CLASS_NAME}_cds.jsa --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.${CLASS_NAME} diff --git a/prepare_slovdahl.sh b/prepare_slovdahl.sh new file mode 100755 index 000000000..52791308f --- /dev/null +++ b/prepare_slovdahl.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +source "$HOME/.sdkman/bin/sdkman-init.sh" + +sdk use java 21.0.2-tem 1>&2 > /dev/null +./mvnw verify diff --git a/prepare_stephenvonworley.sh b/prepare_stephenvonworley.sh new file mode 100755 index 000000000..4e8d22511 --- /dev/null +++ b/prepare_stephenvonworley.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 + +# ./mvnw clean verify removes target/ and will re-trigger native image creation. +if [ ! -f target/CalculateAverage_stephenvonworley_image ]; then + NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -H:TuneInlinerExploration=1 -march=native --enable-preview --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_stephenvonworley" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_stephenvonworley_image dev.morling.onebrc.CalculateAverage_stephenvonworley +fi diff --git a/prepare_sudhirtumati.sh b/prepare_sudhirtumati.sh new file mode 100755 index 000000000..735bdab4c --- /dev/null +++ b/prepare_sudhirtumati.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-open 1>&2 diff --git a/prepare_thomaswue.sh b/prepare_thomaswue.sh index 1c6be6494..3e75233f9 100755 --- a/prepare_thomaswue.sh +++ b/prepare_thomaswue.sh @@ -16,11 +16,23 @@ # source "$HOME/.sdkman/bin/sdkman-init.sh" -sdk use java 21.0.1-graal 1>&2 +sdk use java 21.0.2-graal 1>&2 # ./mvnw clean verify removes target/ and will re-trigger native image creation. if [ ! -f target/CalculateAverage_thomaswue_image ]; then - NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native --enable-preview" - # Use -H:MethodFilter=CalculateAverage_thomaswue.* -H:Dump=:2 -H:PrintGraph=Network for IdealGraphVisualizer graph dumping. + + # Performance tuning flags, optimization level 3, maximum inlining exploration, and compile for the architecture where the native image is generated. + NATIVE_IMAGE_OPTS="-O3 -H:TuneInlinerExploration=1 -march=native" + + # Need to enable preview for accessing the raw address of the foreign memory access API. + # Initializing the Scanner to make sure the unsafe access object is known as a non-null compile time constant. + NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS --enable-preview --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_thomaswue\$Scanner" + + # There is no need for garbage collection and therefore also no safepoints required. + NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS --gc=epsilon -H:-GenLoopSafepoints" + + # Uncomment the following line for outputting the compiler graph to the IdealGraphVisualizer + # NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS -H:MethodFilter=CalculateAverage_thomaswue.* -H:Dump=:2 -H:PrintGraph=Network" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_thomaswue_image dev.morling.onebrc.CalculateAverage_thomaswue fi diff --git a/prepare_tivrfoa.sh b/prepare_tivrfoa.sh new file mode 100755 index 000000000..024d6f984 --- /dev/null +++ b/prepare_tivrfoa.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 + +# ./mvnw clean verify removes target/ and will re-trigger native image creation. +if [ ! -f target/CalculateAverage_tivrfoa_image ]; then + NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -H:-GenLoopSafepoints -march=native --enable-preview -H:InlineAllBonus=10 -H:-ParseRuntimeOptions --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_tivrfoa\$Scanner" + # Use -H:MethodFilter=CalculateAverage_tivrfoa.* -H:Dump=:2 -H:PrintGraph=Network for IdealGraphVisualizer graph dumping. + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_tivrfoa_image dev.morling.onebrc.CalculateAverage_tivrfoa +fi diff --git a/prepare_tonivade.sh b/prepare_tonivade.sh new file mode 100755 index 000000000..cdf474f87 --- /dev/null +++ b/prepare_tonivade.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-tem 1>&2 diff --git a/prepare_vemanaNonIdiomatic.sh b/prepare_vemanaNonIdiomatic.sh new file mode 100755 index 000000000..58dbc240f --- /dev/null +++ b/prepare_vemanaNonIdiomatic.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 + diff --git a/prepare_yavuztas.sh b/prepare_yavuztas.sh index f83a3ff69..f9871afd7 100755 --- a/prepare_yavuztas.sh +++ b/prepare_yavuztas.sh @@ -16,4 +16,9 @@ # source "$HOME/.sdkman/bin/sdkman-init.sh" -sdk use java 21.0.1-graal 1>&2 +sdk use java 21.0.2-graal 1>&2 + +if [ ! -f target/CalculateAverage_yavuztas_image ]; then + NATIVE_IMAGE_OPTS="--initialize-at-build-time=dev.morling.onebrc.CalculateAverage_yavuztas --gc=epsilon -O3 -march=native -R:MaxHeapSize=128m -H:-GenLoopSafepoints --enable-preview" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_yavuztas_image dev.morling.onebrc.CalculateAverage_yavuztas +fi diff --git a/prepare_yonatang.sh b/prepare_yonatang.sh new file mode 100755 index 000000000..4cda7b411 --- /dev/null +++ b/prepare_yonatang.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 diff --git a/prepare_zerninv.sh b/prepare_zerninv.sh new file mode 100755 index 000000000..ae7343301 --- /dev/null +++ b/prepare_zerninv.sh @@ -0,0 +1,25 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.2-graal 1>&2 + +if [ ! -f target/CalculateAverage_zerninv_image ]; then + NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native -R:MaxHeapSize=512m -H:-GenLoopSafepoints --enable-preview --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_zerninv" + native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_zerninv_image dev.morling.onebrc.CalculateAverage_zerninv +fi \ No newline at end of file diff --git a/src/main/go/AlexanderYastrebov/Dockerfile b/src/main/go/AlexanderYastrebov/Dockerfile new file mode 100644 index 000000000..a3b28067f --- /dev/null +++ b/src/main/go/AlexanderYastrebov/Dockerfile @@ -0,0 +1,22 @@ +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM golang AS build-stage +COPY . src/ +RUN cd src && go build . + +FROM scratch AS export-stage +COPY --from=build-stage /go/src/1brc / diff --git a/src/main/go/AlexanderYastrebov/README.md b/src/main/go/AlexanderYastrebov/README.md new file mode 100644 index 000000000..dc0119252 --- /dev/null +++ b/src/main/go/AlexanderYastrebov/README.md @@ -0,0 +1,58 @@ +# 1brc in go + +It uses Docker with BuildKit plugin to build and [export binary](https://docs.docker.com/engine/reference/commandline/build/#output) binary, +see [prepare_AlexanderYastrebov.sh](../../../../prepare_AlexanderYastrebov.sh) +and [calculate_average_AlexanderYastrebov.sh](../../../../calculate_average_AlexanderYastrebov.sh). + +Demo: +```sh +$ ./test.sh AlexanderYastrebov +[+] Building 0.2s (9/9) FINISHED + => [internal] load .dockerignore 0.0s + => => transferring context: 2B 0.0s + => [internal] load build definition from Dockerfile 0.0s + => => transferring dockerfile: 172B 0.0s + => [internal] load metadata for docker.io/library/golang:latest 0.0s + => [internal] load build context 0.0s + => => transferring context: 145B 0.0s + => [build-stage 1/3] FROM docker.io/library/golang 0.0s + => CACHED [build-stage 2/3] COPY . src/ 0.0s + => CACHED [build-stage 3/3] RUN cd src && go build . 0.0s + => CACHED [export-stage 1/1] COPY --from=build-stage /go/src/1brc / 0.0s + => exporting to client directory 0.1s + => => copying files 2.03MB 0.0s +Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-10000-unique-keys.txt +Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-10.txt +Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-1.txt +Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-20.txt +Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-2.txt +Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-3.txt +Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-boundaries.txt +Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-complex-utf8.txt +Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-dot.txt +Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-shortest.txt +Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-short.txt + +# Run once to setup the benchmark +# ./create_measurements.sh 1000000000 +# mv measurements.txt measurements_1B.txt +# ln -s measurements_1B.txt measurements.txt +# ./calculate_average_baseline.sh > out_expected.txt + +$ wc -l measurements_1B.txt +1000000000 measurements_1B.txt + +$ ./evaluate2.sh AlexanderYastrebov royvanrijn +... 0.0s +Benchmark 1: ./calculate_average_AlexanderYastrebov.sh 2>&1 + Time (mean ± σ): 16.786 s ± 0.545 s [User: 56.030 s, System: 10.068 s] + Range (min … max): 15.918 s … 17.309 s 5 runs +... +Benchmark 1: ./calculate_average_royvanrijn.sh 2>&1 + Time (mean ± σ): 16.731 s ± 0.190 s [User: 56.485 s, System: 10.279 s] + Range (min … max): 16.490 s … 16.951 s 5 runs + +Summary + AlexanderYastrebov: trimmed mean 16.901712789513336, raw times 16.69836470718,17.30911065018,16.83413600418,15.91787706218,17.17263765718 + royvanrijn: trimmed mean 16.738037123633333, raw times 16.4900939703,16.9513459953,16.5794539913,16.8297746273,16.8048827523 +``` diff --git a/src/main/go/AlexanderYastrebov/calc.go b/src/main/go/AlexanderYastrebov/calc.go new file mode 100644 index 000000000..149d38db7 --- /dev/null +++ b/src/main/go/AlexanderYastrebov/calc.go @@ -0,0 +1,283 @@ +package main + +import ( + "bytes" + "fmt" + "log" + "math" + "os" + "runtime" + "sort" + "sync" + "syscall" +) + +type measurement struct { + min, max, sum, count int64 +} + +func main() { + if len(os.Args) != 2 { + log.Fatalf("Missing measurements filename") + } + + measurements := processFile(os.Args[1]) + + ids := make([]string, 0, len(measurements)) + for id := range measurements { + ids = append(ids, id) + } + sort.Strings(ids) + + fmt.Print("{") + for i, id := range ids { + if i > 0 { + fmt.Print(", ") + } + m := measurements[id] + fmt.Printf("%s=%.1f/%.1f/%.1f", id, round(float64(m.min)/10.0), round(float64(m.sum)/10.0/float64(m.count)), round(float64(m.max)/10.0)) + } + fmt.Println("}") +} + +func processFile(filename string) map[string]*measurement { + f, err := os.Open(filename) + if err != nil { + log.Fatalf("Open: %v", err) + } + defer f.Close() + + fi, err := f.Stat() + if err != nil { + log.Fatalf("Stat: %v", err) + } + + size := fi.Size() + if size <= 0 || size != int64(int(size)) { + log.Fatalf("Invalid file size: %d", size) + } + + data, err := syscall.Mmap(int(f.Fd()), 0, int(size), syscall.PROT_READ, syscall.MAP_SHARED) + if err != nil { + log.Fatalf("Mmap: %v", err) + } + + defer func() { + if err := syscall.Munmap(data); err != nil { + log.Fatalf("Munmap: %v", err) + } + }() + + return process(data) +} + +func process(data []byte) map[string]*measurement { + nChunks := runtime.NumCPU() + + chunkSize := len(data) / nChunks + if chunkSize == 0 { + chunkSize = len(data) + } + + chunks := make([]int, 0, nChunks) + offset := 0 + for offset < len(data) { + offset += chunkSize + if offset >= len(data) { + chunks = append(chunks, len(data)) + break + } + + nlPos := bytes.IndexByte(data[offset:], '\n') + if nlPos == -1 { + chunks = append(chunks, len(data)) + break + } else { + offset += nlPos + 1 + chunks = append(chunks, offset) + } + } + + var wg sync.WaitGroup + wg.Add(len(chunks)) + + results := make([]map[string]*measurement, len(chunks)) + start := 0 + for i, chunk := range chunks { + go func(data []byte, i int) { + results[i] = processChunk(data) + wg.Done() + }(data[start:chunk], i) + start = chunk + } + wg.Wait() + + measurements := make(map[string]*measurement) + for _, r := range results { + for id, rm := range r { + m := measurements[id] + if m == nil { + measurements[id] = rm + } else { + m.min = min(m.min, rm.min) + m.max = max(m.max, rm.max) + m.sum += rm.sum + m.count += rm.count + } + } + } + return measurements +} + +func processChunk(data []byte) map[string]*measurement { + // Use fixed size linear probe lookup table + const ( + // use power of 2 for fast modulo calculation, + // should be larger than max number of keys which is 10_000 + entriesSize = 1 << 14 + + // use FNV-1a hash + fnv1aOffset64 = 14695981039346656037 + fnv1aPrime64 = 1099511628211 + ) + + type entry struct { + m measurement + hash uint64 + vlen int + value [128]byte // use power of 2 > 100 for alignment + } + entries := make([]entry, entriesSize) + entriesCount := 0 + + // keep short and inlinable + getMeasurement := func(hash uint64, value []byte) *measurement { + i := hash & uint64(entriesSize-1) + entry := &entries[i] + + // bytes.Equal could be commented to speedup assuming no hash collisions + for entry.vlen > 0 && !(entry.hash == hash && bytes.Equal(entry.value[:entry.vlen], value)) { + i = (i + 1) & uint64(entriesSize-1) + entry = &entries[i] + } + + if entry.vlen == 0 { + entry.hash = hash + entry.vlen = copy(entry.value[:], value) + entriesCount++ + } + return &entry.m + } + + // assume valid input + for len(data) > 0 { + + idHash := uint64(fnv1aOffset64) + semiPos := 0 + for i, b := range data { + if b == ';' { + semiPos = i + break + } + + // calculate FNV-1a hash + idHash ^= uint64(b) + idHash *= fnv1aPrime64 + } + + idData := data[:semiPos] + + data = data[semiPos+1:] + + var temp int64 + // parseNumber + { + negative := data[0] == '-' + if negative { + data = data[1:] + } + + _ = data[3] + if data[1] == '.' { + // 1.2\n + temp = int64(data[0])*10 + int64(data[2]) - '0'*(10+1) + data = data[4:] + // 12.3\n + } else { + _ = data[4] + temp = int64(data[0])*100 + int64(data[1])*10 + int64(data[3]) - '0'*(100+10+1) + data = data[5:] + } + + if negative { + temp = -temp + } + } + + m := getMeasurement(idHash, idData) + if m.count == 0 { + m.min = temp + m.max = temp + m.sum = temp + m.count = 1 + } else { + m.min = min(m.min, temp) + m.max = max(m.max, temp) + m.sum += temp + m.count++ + } + } + + result := make(map[string]*measurement, entriesCount) + for i := range entries { + entry := &entries[i] + if entry.m.count > 0 { + result[string(entry.value[:entry.vlen])] = &entry.m + } + } + return result +} + +func round(x float64) float64 { + return roundJava(x*10.0) / 10.0 +} + +// roundJava returns the closest integer to the argument, with ties +// rounding to positive infinity, see java's Math.round +func roundJava(x float64) float64 { + t := math.Trunc(x) + if x < 0.0 && t-x == 0.5 { + //return t + } else if math.Abs(x-t) >= 0.5 { + t += math.Copysign(1, x) + } + + if t == 0 { // check -0 + return 0.0 + } + return t +} + +// parseNumber reads decimal number that matches "^-?[0-9]{1,2}[.][0-9]" pattern, +// e.g.: -12.3, -3.4, 5.6, 78.9 and return the value*10, i.e. -123, -34, 56, 789. +func parseNumber(data []byte) int64 { + negative := data[0] == '-' + if negative { + data = data[1:] + } + + var result int64 + switch len(data) { + // 1.2 + case 3: + result = int64(data[0])*10 + int64(data[2]) - '0'*(10+1) + // 12.3 + case 4: + result = int64(data[0])*100 + int64(data[1])*10 + int64(data[3]) - '0'*(100+10+1) + } + + if negative { + return -result + } + return result +} diff --git a/src/main/go/AlexanderYastrebov/calc_test.go b/src/main/go/AlexanderYastrebov/calc_test.go new file mode 100644 index 000000000..db7e27a2c --- /dev/null +++ b/src/main/go/AlexanderYastrebov/calc_test.go @@ -0,0 +1,86 @@ +package main + +import ( + "fmt" + "os" + "testing" +) + +func TestRoundJava(t *testing.T) { + for _, tc := range []struct { + value float64 + expected string + }{ + {value: -1.5, expected: "-1.0"}, + {value: -1.0, expected: "-1.0"}, + {value: -0.7, expected: "-1.0"}, + {value: -0.5, expected: "0.0"}, + {value: -0.3, expected: "0.0"}, + {value: 0.0, expected: "0.0"}, + {value: 0.3, expected: "0.0"}, + {value: 0.5, expected: "1.0"}, + {value: 0.7, expected: "1.0"}, + {value: 1.0, expected: "1.0"}, + {value: 1.5, expected: "2.0"}, + } { + if rounded := roundJava(tc.value); fmt.Sprintf("%.1f", rounded) != tc.expected { + t.Errorf("Wrong rounding of %v, expected: %s, got: %.1f", tc.value, tc.expected, rounded) + } + } +} + +func TestParseNumber(t *testing.T) { + for _, tc := range []struct { + value string + expected string + }{ + {value: "-99.9", expected: "-999"}, + {value: "-12.3", expected: "-123"}, + {value: "-1.5", expected: "-15"}, + {value: "-1.0", expected: "-10"}, + {value: "0.0", expected: "0"}, + {value: "0.3", expected: "3"}, + {value: "12.3", expected: "123"}, + {value: "99.9", expected: "999"}, + } { + if number := parseNumber([]byte(tc.value)); fmt.Sprintf("%d", number) != tc.expected { + t.Errorf("Wrong parsing of %v, expected: %s, got: %d", tc.value, tc.expected, number) + } + } +} + +var parseNumberSink int64 + +func BenchmarkParseNumber(b *testing.B) { + data1 := []byte("1.2") + data2 := []byte("-12.3") + + for i := 0; i < b.N; i++ { + parseNumberSink = parseNumber(data1) + parseNumber(data2) + } +} + +func BenchmarkProcess(b *testing.B) { + // $ ./create_measurements.sh 1000000 && mv measurements.txt measurements-1e6.txt + // Created file with 1,000,000 measurements in 514 ms + const filename = "../../../../measurements-1e6.txt" + + data, err := os.ReadFile(filename) + if err != nil { + b.Fatal(err) + } + + measurements := process(data) + rows := int64(0) + for _, m := range measurements { + rows += m.count + } + + b.ReportAllocs() + b.ResetTimer() + b.ReportMetric(float64(rows), "rows/op") + + for i := 0; i < b.N; i++ { + process(data) + } +} diff --git a/src/main/go/AlexanderYastrebov/go.mod b/src/main/go/AlexanderYastrebov/go.mod new file mode 100644 index 000000000..08f5bd193 --- /dev/null +++ b/src/main/go/AlexanderYastrebov/go.mod @@ -0,0 +1,3 @@ +module github.com/AlexanderYastrebov/1brc + +go 1.21.5 diff --git a/src/main/java-22/dev/morling/onebrc/CalculateAverage_linl33.java b/src/main/java-22/dev/morling/onebrc/CalculateAverage_linl33.java new file mode 100644 index 000000000..dc9fd23af --- /dev/null +++ b/src/main/java-22/dev/morling/onebrc/CalculateAverage_linl33.java @@ -0,0 +1,519 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorSpecies; +import sun.misc.Unsafe; + +import java.io.IOException; +import java.lang.foreign.*; +import java.lang.invoke.MethodHandle; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; +import java.util.stream.IntStream; + +public class CalculateAverage_linl33 { + private static final String FILE_PATH_PROPERTY = "dev.morling.onebrc.CalculateAverage_linl33.measurementsPath"; + private static final int WEATHER_STATION_LENGTH_MAX = 100; + private static final long WEATHER_STATION_DISTINCT_MAX = 10_000L; + private static final int N_THREADS = Runtime.getRuntime().availableProcessors(); + + private static final MemorySegment ALL = MemorySegment.NULL.reinterpret(Long.MAX_VALUE); + private static final VectorSpecies BYTE_SPECIES = ByteVector.SPECIES_PREFERRED; + + private static final Thread.Builder THREAD_BUILDER = Thread + .ofPlatform() + .name("1brc-CalculateAverage-", 0) + .inheritInheritableThreadLocals(false); + + private static final Unsafe UNSAFE; + + static { + if (ByteOrder.nativeOrder() != ByteOrder.LITTLE_ENDIAN) { + throw new UnsupportedOperationException("Error: BE JVMs are not supported"); + } + if ((BYTE_SPECIES.vectorByteSize() & (BYTE_SPECIES.vectorByteSize() - 1)) != 0) { + throw new UnsupportedOperationException(STR."Unsupported vectorByteSize \{BYTE_SPECIES.vectorByteSize()}"); + } + + try { + var f = Unsafe.class.getDeclaredField("theUnsafe"); + f.setAccessible(true); + UNSAFE = (Unsafe) f.get(null); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + public static void main() throws InterruptedException, IOException { + final var filePath = Paths.get(System.getProperty(FILE_PATH_PROPERTY, "./measurements.txt")); + + try (final var channel = FileChannel.open(filePath)) { + final var inputMapped = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size(), Arena.global()); + + final var chunkBounds = calcChunkBounds(inputMapped.address(), inputMapped.byteSize()); + final var maps = new HashTable[N_THREADS]; + + try (final var threadPool = Executors.newFixedThreadPool(N_THREADS, THREAD_BUILDER.factory()); + final var singleThreadExecutor = Executors.newSingleThreadExecutor(Thread.ofVirtual().factory())) { + final var rootTask = CompletableFuture.runAsync(new CalculateAverageTask(maps, chunkBounds, 0), threadPool); + + final var futures = IntStream + .range(1, N_THREADS) + .mapToObj(t -> CompletableFuture + .runAsync(new CalculateAverageTask(maps, chunkBounds, t), threadPool) + .runAfterBothAsync(rootTask, () -> maps[0].merge(maps[t]), singleThreadExecutor)) + .toArray(CompletableFuture[]::new); + + CompletableFuture.allOf(futures).join(); + } + + printSorted(maps[0]); + } + } + + private static long[] calcChunkBounds(final long mappedAddr, final long fileSizeBytes) { + final var chunkBounds = new long[N_THREADS + 1]; + chunkBounds[0] = mappedAddr; + chunkBounds[chunkBounds.length - 1] = mappedAddr + fileSizeBytes; + + final var chunkSize = (fileSizeBytes / N_THREADS) & -CalculateAverageTask.BATCH_SIZE_BYTES; + for (int i = 1; i < chunkBounds.length - 1; i++) { + chunkBounds[i] = chunkBounds[i - 1] + chunkSize; + } + + return chunkBounds; + } + + private static void printSorted(final HashTable temperatureMeasurements) { + final var weatherStations = new AggregatedMeasurement[(int) temperatureMeasurements.size]; + final var nameBuffer = new byte[WEATHER_STATION_LENGTH_MAX]; + + for (int i = 0; i < weatherStations.length; i++) { + final var offset = temperatureMeasurements.getOffset(i); + final var nameAddr = UNSAFE.getLong(offset); + final var nameLength = UNSAFE.getInt(offset + Integer.BYTES * 7); + MemorySegment.copy(ALL, ValueLayout.JAVA_BYTE, nameAddr, nameBuffer, 0, nameLength); + final var nameStr = new String(nameBuffer, 0, nameLength, StandardCharsets.UTF_8); + weatherStations[i] = new AggregatedMeasurement(nameStr, i); + } + + Arrays.sort(weatherStations); + + System.out.print('{'); + for (int i = 0; i < weatherStations.length - 1; i++) { + printAggMeasurement(weatherStations[i], temperatureMeasurements); + System.out.print(','); + System.out.print(' '); + } + printAggMeasurement(weatherStations[weatherStations.length - 1], temperatureMeasurements); + System.out.println('}'); + } + + private static void printAggMeasurement(final AggregatedMeasurement aggMeasurement, + final HashTable temperatureMeasurements) { + final var offset = temperatureMeasurements.getOffset(aggMeasurement.id()); + + // name + System.out.print(aggMeasurement.name()); + System.out.print('='); + + // min + printAsDouble(offset + Integer.BYTES * 5); + System.out.print('/'); + + // mean + final double total = UNSAFE.getLong(offset + Integer.BYTES * 2); + final var count = UNSAFE.getInt(offset + Integer.BYTES * 4); + System.out.print(round(total / count / 10d)); + System.out.print('/'); + + // max + printAsDouble(offset + Integer.BYTES * 6); + } + + private static void printAsDouble(final long addr) { + final var val = (double) UNSAFE.getInt(addr); + System.out.print(val / 10d); + } + + private static double round(final double d) { + return Math.round(d * 10d) / 10d; + } + + private static class CalculateAverageTask implements Runnable { + public static final int BATCH_SIZE_BYTES = BYTE_SPECIES.vectorByteSize(); + + private final HashTable[] maps; + private final long[] chunkBounds; + private final long chunkStart; + private final long chunkEnd; + private final int t; + + private HashTable map; + + public CalculateAverageTask(HashTable[] maps, long[] chunkBounds, int t) { + this.maps = maps; + this.chunkBounds = chunkBounds; + this.chunkStart = chunkBounds[t]; + this.chunkEnd = chunkBounds[t + 1]; + this.t = t; + } + + @Override + public void run() { + this.maps[this.t] = new HashTable(); + this.map = this.maps[this.t]; + + var lineStart = this.chunkBounds[0]; + // walk back to find the previous '\n' and use it as lineStart + for (long i = this.chunkStart - 1; i > this.chunkBounds[0]; i--) { + if (UNSAFE.getByte(i) == (byte) '\n') { + lineStart = i + 1L; + break; + } + } + + final var vectorLimit = this.chunkStart + ((this.chunkEnd - this.chunkStart) & -BATCH_SIZE_BYTES); + for (long i = this.chunkStart; i < vectorLimit; i += BATCH_SIZE_BYTES) { + var lfMask = ByteVector.fromMemorySegment(BYTE_SPECIES, ALL, i, ByteOrder.nativeOrder()) + .eq((byte) '\n') + .toLong(); + + final var lfCount = Long.bitCount(lfMask); + for (int j = 0; j < lfCount; j++) { + final var lfPosRelative = Long.numberOfTrailingZeros(lfMask); + final var lfAddress = i + lfPosRelative; + processLine(lineStart, lfAddress); + + lineStart = lfAddress + 1L; + // unset the lowest set bit, should compile to BLSR + lfMask &= lfMask - 1L; + } + } + + if (vectorLimit != this.chunkEnd) { + processTrailingBytes(lineStart, vectorLimit, this.chunkEnd); + } + } + + private void processTrailingBytes(long lineStart, + final long start, + final long end) { + for (long i = start; i < end; i++) { + final var b = UNSAFE.getByte(i); + if (b != (byte) '\n') { + continue; + } + + processLine(lineStart, i); + lineStart = i + 1; + } + } + + private void processLine(final long lineStart, final long lfAddress) { + // read 5 bytes before '\n' + // the temperature is formatted to 1 decimal place + // therefore the shortest temperature value is 0.0 + // so there are always at least 5 bytes between the location name and '\n' + final var trailing5Bytes = UNSAFE.getLong(lfAddress - 5); + final int trailingDWordRaw = (int) (trailing5Bytes >>> 8); + + // select the low nibble for each byte, '0'-'9' -> 0-9, ';' -> 11, '-' -> 13 + final var trailingDWordLowNibble = trailingDWordRaw & 0x0f_0f_0f_0f; + // parse the 2 digits around the decimal point (note that these 2 digits must be present) + final var trailingDigitsParsed = (trailingDWordLowNibble * 0x00_0a_00_01) >>> 24; + + // this byte must be ('-' & 0xf), (';' & 0xf), or a valid digit (0-9) + final var secondHighestByte = trailingDWordLowNibble & 0xf; + + var temperature = trailingDigitsParsed; + var lineLength = lfAddress - lineStart - 4; + + if (secondHighestByte > 9) { + if (secondHighestByte == ('-' & 0xf)) { + lineLength--; + temperature = -temperature; + } + } + else { + lineLength--; + temperature += secondHighestByte * 100; + + final var isNegative = (trailing5Bytes & 0xffL) == '-'; + if (isNegative) { + lineLength--; + temperature = -temperature; + } + } + + this.map.putEntry(lineStart, (int) lineLength, temperature); + } + } + + /** + * Open addressing, linear probing hash map backed by off-heap memory + */ + private static class HashTable { + private static final int TRUNCATED_HASH_BITS = 26; + // max # of unique keys + private static final long DENSE_SIZE = WEATHER_STATION_DISTINCT_MAX; + // max hash code (exclusive) + private static final long SPARSE_SIZE = 1L << (TRUNCATED_HASH_BITS + 1); + public static final long SPARSE_SCALE = 32; + public static final long DENSE_SCALE = 8; + + public final long sparseAddress; + public final long denseAddress; + public long size; + + public HashTable() { + var arena = new MallocArena(Arena.global()); + var callocArena = new CallocArena(Arena.global()); + + final var sparse = callocArena.allocate(ValueLayout.JAVA_BYTE, SPARSE_SIZE * SPARSE_SCALE); + this.sparseAddress = (sparse.address() + MallocArena.MAX_ALIGN) & -MallocArena.MAX_ALIGN; + + final var dense = arena.allocate(ValueLayout.JAVA_BYTE, DENSE_SIZE * DENSE_SCALE); + this.denseAddress = (dense.address() + MallocArena.MAX_ALIGN) & -MallocArena.MAX_ALIGN; + } + + public long getOffset(final long index) { + return UNSAFE.getLong(this.denseAddress + index * DENSE_SCALE); + } + + public void putEntry(final long keyAddress, final int keyLength, final int value) { + final var hash = hash(keyAddress, keyLength); + this.putEntryInternal(hash, keyAddress, keyLength, value, 1, value, value); + } + + private void putEntryInternal(final long hash, + final long keyAddress, + final int keyLength, + final long temperature, + final int count, + final int temperatureMin, + final int temperatureMax) { + final var sparseOffset = this.sparseAddress + truncateHash(hash) * SPARSE_SCALE; + + for (long n = 0, sparseLinearOffset = sparseOffset; n < WEATHER_STATION_DISTINCT_MAX; n++, sparseLinearOffset += SPARSE_SCALE) { + final var entryKeyAddress = UNSAFE.getLong(sparseLinearOffset); + + if (entryKeyAddress == 0L) { + this.add(sparseLinearOffset, keyAddress, keyLength, temperature, count, temperatureMin, temperatureMax); + this.size++; + return; + } + + if (mismatch(keyAddress, entryKeyAddress, keyLength)) { + continue; + } + + final var currMin = UNSAFE.getInt(sparseLinearOffset + Integer.BYTES * 5); + final var currMax = UNSAFE.getInt(sparseLinearOffset + Integer.BYTES * 6); + final var currTotal = UNSAFE.getLong(sparseLinearOffset + Integer.BYTES * 2); + final var currCount = UNSAFE.getInt(sparseLinearOffset + Integer.BYTES * 4); + + UNSAFE.putLong(sparseLinearOffset + Integer.BYTES * 2, currTotal + temperature); + UNSAFE.putInt(sparseLinearOffset + Integer.BYTES * 4, currCount + count); + + if (temperatureMin < currMin) { + UNSAFE.putInt(sparseLinearOffset + Integer.BYTES * 5, temperatureMin); + } + + if (temperatureMax > currMax) { + UNSAFE.putInt(sparseLinearOffset + Integer.BYTES * 6, temperatureMax); + } + + return; + } + } + + public void merge(final HashTable other) { + final var otherSize = other.size; + for (long i = 0; i < otherSize; i++) { + final var offset = other.getOffset(i); + + final var keyAddress = UNSAFE.getLong(offset); + final var keyLength = UNSAFE.getInt(offset + Integer.BYTES * 7); + final var hash = hash(keyAddress, keyLength); + + this.putEntryInternal( + hash, + keyAddress, + keyLength, + UNSAFE.getLong(offset + Integer.BYTES * 2), + UNSAFE.getInt(offset + Integer.BYTES * 4), + UNSAFE.getInt(offset + Integer.BYTES * 5), + UNSAFE.getInt(offset + Integer.BYTES * 6)); + } + } + + private void add(final long sparseOffset, + final long keyAddress, + final int keyLength, + final long temperature, + final int count, + final int temperatureMin, + final int temperatureMax) { + // new entry, initialize sparse and dense + final var denseOffset = this.denseAddress + this.size * DENSE_SCALE; + UNSAFE.putLong(denseOffset, sparseOffset); + + UNSAFE.putLong(sparseOffset, keyAddress); + UNSAFE.putLong(sparseOffset + Integer.BYTES * 2, temperature); + UNSAFE.putInt(sparseOffset + Integer.BYTES * 4, count); + UNSAFE.putInt(sparseOffset + Integer.BYTES * 5, temperatureMin); + UNSAFE.putInt(sparseOffset + Integer.BYTES * 6, temperatureMax); + UNSAFE.putInt(sparseOffset + Integer.BYTES * 7, keyLength); + } + + private static boolean mismatch(final long leftAddr, final long rightAddr, final int length) { + // key length compare is unnecessary + // strings compared through delimiter byte ';' + + final var loopBound = length >= (BYTE_SPECIES.vectorByteSize() - 1) ? ((length + 1) & -BYTE_SPECIES.vectorByteSize()) : 0; + for (long i = 0; i < loopBound; i += BYTE_SPECIES.vectorByteSize()) { + final var l = ByteVector.fromMemorySegment(BYTE_SPECIES, ALL, leftAddr + i, ByteOrder.nativeOrder()); + final var r = ByteVector.fromMemorySegment(BYTE_SPECIES, ALL, rightAddr + i, ByteOrder.nativeOrder()); + if (!l.eq(r).allTrue()) { + return true; + } + } + + final var l = ByteVector.fromMemorySegment(BYTE_SPECIES, ALL, leftAddr + loopBound, ByteOrder.nativeOrder()); + final var r = ByteVector.fromMemorySegment(BYTE_SPECIES, ALL, rightAddr + loopBound, ByteOrder.nativeOrder()); + final var eqMask = l.eq(r).toLong(); + + return Long.numberOfTrailingZeros(~eqMask) < ((length + 1) & (BYTE_SPECIES.vectorByteSize() - 1)); + // to support platforms without TZCNT, the check can be replaced with + // a comparison to lowestZero = ~eqMask & (eqMask + 1) + } + + // Use the leading and trailing few bytes as hash + // this performs better than computing a good hash + private static long hash(final long keyAddress, final int keyLength) { + final var leadingQWord = UNSAFE.getLong(keyAddress); + // the constant is the 64 bit FNV-1 offset basis + final var hash = -3750763034362895579L ^ leadingQWord; + if (keyLength < Integer.BYTES) { + // the key is at least 2 bytes (if you count the delimiter) + return hash & 0xffffL; + } + else { + final var trailingDWord = UNSAFE.getLong(keyAddress + keyLength - Integer.BYTES) & 0xffffffffL; + // only the lower dword in hash is guaranteed to exist so shift left 32 + return (hash << Integer.SIZE) ^ trailingDWord; + } + } + + private static long truncateHash(final long hash) { + return ((hash >>> TRUNCATED_HASH_BITS) ^ hash) & ((1L << TRUNCATED_HASH_BITS) - 1L); + } + } + + private static class MallocArena implements Arena { + public static final long MAX_ALIGN = 1L << 21; + + protected static final Linker LINKER = Linker.nativeLinker(); + protected static final AddressLayout C_POINTER = (AddressLayout) LINKER.canonicalLayouts().get("void*"); + protected static final ValueLayout C_SIZE_T = (ValueLayout) LINKER.canonicalLayouts().get("size_t"); + private static final MethodHandle MALLOC = LINKER.downcallHandle( + LINKER.defaultLookup().find("malloc").orElseThrow(), + FunctionDescriptor.of(C_POINTER, C_SIZE_T), + Linker.Option.critical(false)); + private static final MethodHandle FREE = LINKER.downcallHandle( + LINKER.defaultLookup().find("free").orElseThrow(), + FunctionDescriptor.ofVoid(C_POINTER), + Linker.Option.critical(false)); + protected static final MethodHandle CALLOC = LINKER.downcallHandle( + LINKER.defaultLookup().find("calloc").orElseThrow(), + FunctionDescriptor.of(C_POINTER, C_SIZE_T, C_SIZE_T), + Linker.Option.critical(false)); + + private final Arena arena; + + public MallocArena(Arena arena) { + this.arena = arena; + } + + @Override + public MemorySegment allocate(final long byteSize, final long byteAlignment) { + return malloc(byteSize + MAX_ALIGN).reinterpret(this, MallocArena::free); + } + + @Override + public MemorySegment.Scope scope() { + return arena.scope(); + } + + @Override + public void close() { + arena.close(); + } + + private static MemorySegment malloc(final long byteSize) { + try { + return ((MemorySegment) MALLOC.invokeExact(byteSize)).reinterpret(byteSize); + } + catch (Throwable e) { + throw new RuntimeException(e); + } + } + + protected static void free(final MemorySegment address) { + try { + FREE.invokeExact(address); + } + catch (Throwable e) { + throw new RuntimeException(e); + } + } + } + + private static class CallocArena extends MallocArena { + public CallocArena(Arena arena) { + super(arena); + } + + @Override + public MemorySegment allocate(final long byteSize, final long byteAlignment) { + return calloc(byteSize + MAX_ALIGN).reinterpret(this, MallocArena::free); + } + + private static MemorySegment calloc(final long byteSize) { + try { + return ((MemorySegment) MallocArena.CALLOC.invokeExact(1L, byteSize)).reinterpret(byteSize); + } + catch (Throwable e) { + throw new RuntimeException(e); + } + } + } + + private record AggregatedMeasurement(String name, long id) implements Comparable { + + @Override + public int compareTo(final AggregatedMeasurement other) { + return name.compareTo(other.name); + } +}} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_0xshivamagarwal.java b/src/main/java/dev/morling/onebrc/CalculateAverage_0xshivamagarwal.java new file mode 100644 index 000000000..77a04bf4d --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_0xshivamagarwal.java @@ -0,0 +1,137 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.nio.file.StandardOpenOption.READ; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class CalculateAverage_0xshivamagarwal { + private static final Path FILE = Path.of("./measurements.txt"); + private static final byte COLON = ';'; + private static final byte NEW_LINE = '\n'; + private static final byte HYPHEN = '-'; + private static final byte DOT = '.'; + private static final int NO_OF_THREADS = Runtime.getRuntime().availableProcessors(); + + private static long[] mergeFn(final long[] v1, final long[] v2) { + v1[0] = Math.min(v1[0], v2[0]); + v1[1] = Math.max(v1[1], v2[1]); + v1[2] += v2[2]; + v1[3] += v2[3]; + return v1; + } + + private static String toString(final Map.Entry entry) { + var m = entry.getValue(); + + return entry.getKey() + + '=' + + m[0] / 10.0 + + '/' + + Math.round(1.0 * m[2] / m[3]) / 10.0 + + '/' + + m[1] / 10.0; + } + + private static Map parseData( + final MemorySegment data, long offset, final long limit) { + var map = new HashMap(10000, 1); + var sep = false; + var neg = false; + var key = new byte[100]; + var len = 0; + var val = 0; + + while (offset < limit) { + var b = data.get(JAVA_BYTE, offset++); + if (sep) { + if (b == NEW_LINE) { + val = neg ? -val : val; + map.merge( + new String(key, 0, len), + new long[]{ val, val, val, 1 }, + CalculateAverage_0xshivamagarwal::mergeFn); + sep = false; + neg = false; + len = 0; + val = 0; + } + else if (b == HYPHEN) { + neg = true; + } + else if (b != DOT) { + val = val * 10 + (b - 48); + } + } + else if (b == COLON) { + sep = true; + } + else { + key[len++] = b; + } + } + + return map; + } + + public static void main(String[] args) throws IOException { + final String result; + + try (var channel = FileChannel.open(FILE, READ); + var arena = Arena.ofShared()) { + var data = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size(), arena); + var chunkSize = data.byteSize() / NO_OF_THREADS; + var chunks = new long[NO_OF_THREADS + 1]; + chunks[NO_OF_THREADS] = data.byteSize(); + + for (int i = 1; i < NO_OF_THREADS; ++i) { + var chunkPos = i * chunkSize; + + while (data.get(JAVA_BYTE, chunkPos++) != NEW_LINE) { + } + + chunks[i] = chunkPos; + } + + result = IntStream.range(0, NO_OF_THREADS) + .mapToObj(i -> parseData(data, chunks[i], chunks[i + 1])) + .parallel() + .reduce( + (m1, m2) -> { + m2.forEach((k, v) -> m1.merge(k, v, CalculateAverage_0xshivamagarwal::mergeFn)); + return m1; + }) + .map( + map -> map.entrySet().parallelStream() + .sorted(Map.Entry.comparingByKey()) + .map(CalculateAverage_0xshivamagarwal::toString) + .collect(Collectors.joining(", ", "{", "}"))) + .orElse(null); + } + + System.out.println(result); + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_3j5a.java b/src/main/java/dev/morling/onebrc/CalculateAverage_3j5a.java new file mode 100644 index 000000000..178cfacee --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_3j5a.java @@ -0,0 +1,277 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.invoke.MethodHandle; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +import static java.lang.Class.forName; +import static java.lang.System.out; +import static java.lang.invoke.MethodHandles.lookup; +import static java.util.Comparator.comparing; + +public class CalculateAverage_3j5a { + + private static final String FILE = "./measurements.txt"; + + public static void main(String[] args) throws IOException { + try (RandomAccessFile measurementsFile = new RandomAccessFile(FILE, "r")) { + var slices = slice(measurementsFile); + var measurementsChannel = measurementsFile.getChannel(); + slices.stream().parallel().map(slice -> { + MappedByteBuffer measurementsSlice = map(slice, measurementsChannel); + var measurementBuffer = new byte[rules.maxMeasurementLength]; + var measurements = HashMap. newHashMap(rules.uniqueStationsCount); + while (measurementsSlice.hasRemaining()) { + var a = nextStationMeasurement(measurementBuffer, measurementsSlice); + var stats = measurements.get(a.station); + if (stats == null) { + a.station.detachFromMeasurementBuffer(); + stats = new StationMeasurementStatistics(a); + measurements.put(a.station, stats); + } + else { + stats.add(a); + } + } + return measurements; + }).reduce((aslice, bslice) -> { + aslice.forEach((astation, astats) -> { + var bstats = bslice.putIfAbsent(astation, astats); + if (bstats != null) { + bstats.merge(astats); + } + }); + return bslice; + }).ifPresent(measurements -> { + var results = new StringBuilder(measurements.size() * (rules.maxStationNameLength + rules.maxStationStatisticsOutputLength)); + measurements.values().stream() + .sorted(comparing(StationMeasurementStatistics::getName)) + .forEach(stationStats -> results.append(stationStats).append(", ")); + out.println("{" + results.substring(0, results.length() - 2) + "}"); + }); + } + } + + record Rules(int minMeasurementLength, int maxStationNameLength, + int maxMeasurementLength, int maxStationStatisticsOutputLength, + int uniqueStationsCount) { + Rules() { + this(5, 100, 106, 18, 10_000); + } + } + + private static final Rules rules = new Rules(); + + record MeasurementsSlice(long start, long length) { + } + + static class Station { + + private byte[] name; + final int length; + private int hash; + + private static final MethodHandle vectorizedHashCode; + private static final int T_BYTE = 8; + + static { + try { + var arraysSupport = forName("jdk.internal.util.ArraysSupport"); + Class[] vectorizedHashCodeSignature = { Object.class, int.class, int.class, int.class, int.class }; + var vectorizedHashCodeMethod = arraysSupport.getDeclaredMethod("vectorizedHashCode", vectorizedHashCodeSignature); + vectorizedHashCode = lookup().unreflect(vectorizedHashCodeMethod); + } + catch (NoSuchMethodException | IllegalAccessException | ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + Station(byte[] name, int length) { + this.name = name; + this.length = length; + } + + public void detachFromMeasurementBuffer() { + var n = new byte[length]; + System.arraycopy(name, 0, n, 0, length); + this.name = n; + } + + @Override + public boolean equals(Object that) { + return Arrays.mismatch(this.name, 0, length, ((Station) that).name, 0, length) < 0; + } + + @Override + public int hashCode() { + if (hash == 0) { + try { + hash = (int) vectorizedHashCode.invokeExact((Object) name, 0, length, 1, T_BYTE); + } + catch (Throwable e) { + throw new RuntimeException(e); + } + } + return hash; + } + + } + + record StationMeasurement(Station station, int temperature) { + } + + private static class StationMeasurementStatistics { + + private final byte[] bname; + private String name; + private int min; + private int max; + private long sum; + private int count = 1; + + StationMeasurementStatistics(StationMeasurement stationMeasurement) { + this.bname = stationMeasurement.station.name; + this.min = stationMeasurement.temperature; + this.max = stationMeasurement.temperature; + this.sum = stationMeasurement.temperature; + } + + public String getName() { + if (name == null) { + name = new String(bname, StandardCharsets.UTF_8); + } + return name; + } + + void add(StationMeasurement measurement) { + var temperature = measurement.temperature; + update(1, temperature, temperature, temperature); + } + + void merge(StationMeasurementStatistics other) { + update(other.count, other.min, other.max, other.sum); + } + + private void update(int count, int min, int max, long sum) { + this.count += count; + if (this.min > min) { + this.min = min; + } + if (this.max < max) { + this.max = max; + } + this.sum += sum; + } + + @Override + public String toString() { + var name = getName(); + var min = this.min / 10f; + var mean = Math.round(this.sum / (float) this.count) / 10f; + var max = this.max / 10f; + return new StringBuilder(name.length() + rules.maxStationStatisticsOutputLength) + .append(name).append("=").append(min).append("/").append(mean).append("/").append(max) + .toString(); + } + } + + private static StationMeasurement nextStationMeasurement(byte[] measurement, MappedByteBuffer memoryMappedSlice) { + byte b; + int i = rules.minMeasurementLength; + memoryMappedSlice.get(measurement, 0, i); + while ((b = memoryMappedSlice.get()) != '\n') { + measurement[i] = b; + i++; + } + var zeroOffset = '0'; + int temperature = measurement[--i] - zeroOffset; + i--; // skipping dot + var base = 10; + while ((b = measurement[--i]) != ';') { + if (b == '-') { + temperature = -temperature; + } + else { + temperature = base * (b - zeroOffset) + temperature; + base *= base; + } + } + return new StationMeasurement(new Station(measurement, i), temperature); + } + + private static MappedByteBuffer map(MeasurementsSlice slice, FileChannel measurements) { + try { + return measurements.map(FileChannel.MapMode.READ_ONLY, slice.start, slice.length); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static List slice(RandomAccessFile measurements) throws IOException { + int chunks = Runtime.getRuntime().availableProcessors(); + List measurementSlices; + while ((measurementSlices = slice(measurements, chunks)) == null) { + chunks++; + } + return measurementSlices; + } + + private static List slice(RandomAccessFile measurements, int chunks) throws IOException { + long measurementsFileLength = measurements.length(); + long chunkLength = 0; + long remainder; + if (chunks < measurementsFileLength) { + chunks--; + do { + chunkLength = measurementsFileLength / ++chunks; + remainder = measurementsFileLength % chunkLength; + } while (chunkLength + remainder > Integer.MAX_VALUE); + } + if (chunkLength <= rules.maxMeasurementLength) { + return List.of(new MeasurementsSlice(0, measurementsFileLength)); + } + var measurementSlices = new ArrayList(chunks); + var sliceStart = 0L; + for (int i = 0; i < chunks - 1; i++) { + var sliceLength = chunkLength; + measurements.seek(sliceStart + sliceLength); + while (measurements.readByte() != '\n') { + measurements.seek(sliceStart + ++sliceLength); + } + sliceLength++; + if (sliceLength > Integer.MAX_VALUE) { + return null; + } + measurementSlices.add(new MeasurementsSlice(sliceStart, sliceLength)); + sliceStart = sliceStart + sliceLength; + } + var previousSlice = measurementSlices.getLast(); + var lastSliceStart = previousSlice.start + previousSlice.length; + measurementSlices.addLast(new MeasurementsSlice(lastSliceStart, measurementsFileLength - lastSliceStart)); + return measurementSlices; + } + +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_C5H12O5.java b/src/main/java/dev/morling/onebrc/CalculateAverage_C5H12O5.java index a7baf9baf..4c0351a28 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_C5H12O5.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_C5H12O5.java @@ -15,136 +15,386 @@ */ package dev.morling.onebrc; +import sun.misc.Unsafe; + import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.reflect.Field; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.channels.AsynchronousFileChannel; import java.nio.channels.CompletionHandler; import java.nio.charset.StandardCharsets; -import java.nio.file.Paths; +import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; -import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.FutureTask; -import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.LinkedTransferQueue; +import java.util.concurrent.TransferQueue; /** - * Calculates the average using AIO and multiple threads. + * Results on Mac mini (Apple M2 with 8-core CPU / 8GB unified memory): + *
+ *   using AIO and multiple threads:
+ *     120.15s user 4.33s system 710% cpu 17.522 total
+ *
+ *   reduce the number of memory copies:
+ *      45.87s user 2.82s system 530% cpu  9.185 total
+ *
+ *   processing byte array backwards and using bitwise operation to find specific byte (inspired by thomaswue):
+ *      25.38s user 3.44s system 342% cpu  8.406 total
+ * 
* * @author Xylitol */ +@SuppressWarnings("unchecked") public class CalculateAverage_C5H12O5 { - private static final int BUFFER_CAPACITY = 1024 * 1024 * 10; - private static final int MAP_CAPACITY = 10000; - private static final int PROCESSORS = Runtime.getRuntime().availableProcessors(); - private static final BlockingQueue BYTES_QUEUE = new LinkedBlockingQueue<>(PROCESSORS); - private static long readPosition; + private static final int AVAILABLE_PROCESSOR_NUM = Runtime.getRuntime().availableProcessors(); + private static final int TRANSFER_QUEUE_CAPACITY = 1024 / 16 / AVAILABLE_PROCESSOR_NUM; // 1GB memory max + private static final int BYTE_BUFFER_CAPACITY = 1024 * 1024 * 16; // 16MB one time + private static final int EXPECTED_MAPPINGS_NUM = 10000; + + /** + * Fragment the file into chunks. + */ + private static long[] fragment(Path path) throws IOException { + long size = Files.size(path); + long chunk = size / AVAILABLE_PROCESSOR_NUM; + List positions = new ArrayList<>(); + try (RandomAccessFile file = new RandomAccessFile(path.toFile(), "r")) { + long position = chunk; + for (int i = 0; i < AVAILABLE_PROCESSOR_NUM - 1; i++) { + if (position >= size) { + break; + } + file.seek(position); + // move the position to the next newline byte + while (file.read() != '\n') { + position++; + } + positions.add(++position); + position += chunk; + } + } + if (positions.isEmpty() || positions.getLast() < size) { + positions.add(size); + } + return positions.stream().mapToLong(Long::longValue).toArray(); + } public static void main(String[] args) throws Exception { - System.out.println(calc("./measurements.txt")); + // fragment the input file + Path path = Path.of("./measurements.txt"); + long[] positions = fragment(path); + + // start the calculation tasks + FutureTask>[] tasks = new FutureTask[positions.length]; + for (int i = 0; i < positions.length; i++) { + tasks[i] = new FutureTask<>(new Calculator(path, (i == 0 ? 0 : positions[i - 1]), positions[i])); + new Thread(tasks[i]).start(); + } + + // wait for the results + Map result = HashMap.newHashMap(EXPECTED_MAPPINGS_NUM); + for (FutureTask> task : tasks) { + task.get().forEach((k, v) -> result.merge(k, v, MeasurementData::merge)); + } + + // sort and print the results + TreeMap sorted = new TreeMap<>(); + for (Map.Entry entry : result.entrySet()) { + sorted.put(new String(entry.getKey().bytes, StandardCharsets.UTF_8), entry.getValue()); + } + System.out.println(sorted); } /** - * Calculate the average. + * The calculation task. */ - public static String calc(String path) throws IOException, ExecutionException, InterruptedException { - readPosition = 0; - Map result = HashMap.newHashMap(MAP_CAPACITY); - // read and offer to queue - try (AsynchronousFileChannel channel = AsynchronousFileChannel.open( - Paths.get(path), Set.of(StandardOpenOption.READ), Executors.newVirtualThreadPerTaskExecutor())) { - ByteBuffer buffer = ByteBuffer.allocateDirect(BUFFER_CAPACITY); - channel.read(buffer, readPosition, buffer, new CompletionHandler<>() { + private static class Calculator implements Callable> { + private final TransferQueue transfer = new LinkedTransferQueue<>(); + private final AsynchronousFileChannel asyncChannel; + private final long limit; + private long position; + + public Calculator(Path file, long position, long limit) throws IOException { + ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor(); + this.asyncChannel = AsynchronousFileChannel.open(file, Set.of(StandardOpenOption.READ), executor); + this.position = position; + this.limit = limit; + } + + @Override + public Map call() throws InterruptedException { + ByteBuffer buffer = ByteBuffer.allocateDirect(BYTE_BUFFER_CAPACITY); + asyncChannel.read(buffer, position, buffer, new CompletionHandler<>() { @Override - public void completed(Integer bytesRead, ByteBuffer buffer) { - try { - if (bytesRead > 0) { - for (int i = buffer.position() - 1; i >= 0; i--) { - if (buffer.get(i) == '\n') { - buffer.limit(i + 1); - break; - } - } - buffer.flip(); - byte[] bytes = new byte[buffer.remaining()]; - buffer.get(bytes); - readPosition += buffer.limit(); - BYTES_QUEUE.put(bytes); - buffer.clear(); - channel.read(buffer, readPosition, buffer, this); - } - else { - for (int i = 0; i < PROCESSORS; i++) { - BYTES_QUEUE.put(new byte[0]); + public void completed(Integer readSize, ByteBuffer buffer) { + if (position + readSize >= limit) { + buffer.limit(readSize - (int) (position + readSize - limit)); + } + else { + for (int i = buffer.position() - 1; i >= 0; i--) { + if (buffer.get(i) == '\n') { + // truncate the buffer to the last newline byte + buffer.limit(i + 1); + break; } } } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); + buffer.flip(); + byte[] bytes = new byte[buffer.limit() + 1]; + // add a newline byte at the beginning + bytes[0] = '\n'; + buffer.get(bytes, 1, buffer.limit()); + transfer(bytes); + if ((position += buffer.limit()) < limit) { + buffer.clear(); + asyncChannel.read(buffer, position, buffer, this); + } + else { + // stop signal + transfer(new byte[0]); } } @Override public void failed(Throwable exc, ByteBuffer buffer) { - // ignore + transfer(new byte[0]); } }); + return process(); + } - @SuppressWarnings("unchecked") - FutureTask>[] tasks = new FutureTask[PROCESSORS]; - for (int i = 0; i < PROCESSORS; i++) { - tasks[i] = new FutureTask<>(new Task()); - new Thread(tasks[i]).start(); + /** + * Transfer or put the bytes to the queue. + */ + private void transfer(byte[] bytes) { + try { + if (transfer.size() >= TRANSFER_QUEUE_CAPACITY) { + transfer.transfer(bytes); + } + else { + transfer.put(bytes); + } } - for (FutureTask> task : tasks) { - task.get().forEach((k, v) -> result.merge(k.toString(), v, MeasurementData::merge)); + catch (InterruptedException e) { + throw new RuntimeException(e); } } - return new TreeMap<>(result).toString(); + + /** + * Take and process the bytes from the queue. + */ + private Map process() throws InterruptedException { + Map result = HashMap.newHashMap(EXPECTED_MAPPINGS_NUM); + for (byte[] bytes = transfer.take(); bytes.length > 0; bytes = transfer.take()) { + Station station = new Station(bytes); + // read the bytes backwards + for (int position = bytes.length - 2; position >= 1; position--) { + + // calculate the temperature value + int temperature = bytes[position] - '0' + (bytes[position -= 2] - '0') * 10; + byte unknownByte = bytes[--position]; + int semicolon = switch (unknownByte) { + case ';' -> position; + case '-' -> { + temperature = -temperature; + yield --position; + } + default -> { + temperature += (unknownByte - '0') * 100; + if (bytes[--position] == '-') { + temperature = -temperature; + --position; + } + yield position; + } + }; + + // calculate the station name hash + int hash = 1; + while (true) { + long temp = LineFinder.previousLong(bytes, position); + int distance = LineFinder.NATIVE.fromRight(temp); + if (distance == 0) { + // current byte is '\n' + break; + } + position -= distance; + if (distance == 8) { + // can't find '\n' in previous 8 bytes + hash = 31 * hash + (int) (temp ^ (temp >>> 32)); + continue; + } + // clear the redundant bytes + temp = LineFinder.NATIVE.clearLeft(temp, distance); + hash = 31 * hash + (int) (temp ^ (temp >>> 32)); + } + + // merge data to the result map + MeasurementData data = result.get(station.slice(hash, position + 1, semicolon)); + if (data == null) { + result.put(station.copy(), new MeasurementData(temperature)); + } else { + data.merge(temperature); + } + } + } + return result; + } } /** - * The measurement name. + * To find the nearest newline byte position in a long. */ - private record MeasurementName(byte[] bytes, int length) { + private interface LineFinder { + // choose the implementation according to the native byte order + LineFinder NATIVE = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN ? LELineFinder.INST : BELineFinder.INST; - @Override - public boolean equals(Object name) { - MeasurementName other = (MeasurementName) name; - if (other.length != length) { - return false; + Unsafe UNSAFE = initUnsafe(); + int BYTE_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(byte[].class); + int LONG_BYTES = Long.SIZE / Byte.SIZE; + + static Unsafe initUnsafe() { + try { + Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + return (Unsafe) theUnsafe.get(Unsafe.class); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); } - return Arrays.compare(bytes, 0, length, other.bytes, 0, length) == 0; } - @Override - public int hashCode() { - int result = 1; - for (int i = 0; i < length; i++) { - result = 31 * result + bytes[i]; + static long previousLong(byte[] bytes, long offset) { + return UNSAFE.getLong(bytes, BYTE_ARRAY_BASE_OFFSET + offset + 1 - LONG_BYTES); + } + + /** + * Mark the highest bit of newline byte (0x0A) to 1. + */ + static long markHighestBit(long longBytes) { + long temp = longBytes ^ 0x0A0A0A0A0A0A0A0AL; + return (temp - 0x0101010101010101L) & ~temp & 0x8080808080808080L; + } + + /** + * Find the nearest newline byte position from right to left. + */ + int fromRight(long longBytes); + + /** + * Clear the left bytes out of the range. + */ + long clearLeft(long longBytes, int keepNum); + + enum LELineFinder implements LineFinder { + INST; + + private static final long[] MASKS = new long[8]; + + static { + for (int i = 1; i <= 7; i++) { + MASKS[i] = 0xFFFFFFFFFFFFFFFFL << ((8 - i) << 3); + } } - return result; + + @Override + public int fromRight(long longBytes) { + return Long.numberOfLeadingZeros(markHighestBit(longBytes)) >>> 3; + } + + @Override + public long clearLeft(long longBytes, int keepNum) { + return longBytes & MASKS[keepNum]; + } + } + + enum BELineFinder implements LineFinder { + INST; + + private static final long[] MASKS = new long[8]; + + static { + for (int i = 1; i <= 7; i++) { + MASKS[i] = 0xFFFFFFFFFFFFFFFFL >>> ((8 - i) << 3); + } + } + + @Override + public int fromRight(long longBytes) { + return Long.numberOfTrailingZeros(markHighestBit(longBytes)) >>> 3; + } + + @Override + public long clearLeft(long longBytes, int keepNum) { + return longBytes & MASKS[keepNum]; + } + } + } + + /** + * The station name wrapper ( bytes[from, to) ). + */ + private static class Station { + private final byte[] bytes; + private int from; + private int to; + private int hash; + + public Station(byte[] bytes) { + this(bytes, 0, 0, 0); + } + + public Station(byte[] bytes, int hash, int from, int to) { + this.bytes = bytes; + this.slice(hash, from, to); + } + + public Station slice(int hash, int from, int to) { + this.hash = hash; + this.from = from; + this.to = to; + return this; + } + + public Station copy() { + int length = to - from; + byte[] newBytes = new byte[length]; + System.arraycopy(bytes, from, newBytes, 0, length); + return new Station(newBytes, hash, 0, length); } @Override - public String toString() { - return new String(bytes, 0, length, StandardCharsets.UTF_8); + public boolean equals(Object station) { + Station other = (Station) station; + return Arrays.equals(bytes, from, to, other.bytes, other.from, other.to); + } + + @Override + public int hashCode() { + return hash; } } /** - * The measurement data. + * The measurement data wrapper ( temperature * 10 ). */ private static class MeasurementData { private int min; private int max; - private int sum; + private long sum; private int count; public MeasurementData(int value) { @@ -154,11 +404,15 @@ public MeasurementData(int value) { this.count = 1; } - public MeasurementData merge(MeasurementData data) { - return merge(data.min, data.max, data.sum, data.count); + public MeasurementData merge(int value) { + return merge(value, value, value, 1); + } + + public MeasurementData merge(MeasurementData other) { + return merge(other.min, other.max, other.sum, other.count); } - public MeasurementData merge(int min, int max, int sum, int count) { + public MeasurementData merge(int min, int max, long sum, int count) { this.min = Math.min(this.min, min); this.max = Math.max(this.max, max); this.sum += sum; @@ -168,67 +422,7 @@ public MeasurementData merge(int min, int max, int sum, int count) { @Override public String toString() { - return (min / 10.0) + "/" + (Math.round((double) sum / count) / 10.0) + "/" + (max / 10.0); - } - } - - /** - * The task to calculate. - */ - private static class Task implements Callable> { - - @Override - public Map call() throws InterruptedException { - // poll from queue and calculate - Map result = HashMap.newHashMap(MAP_CAPACITY); - for (byte[] bytes = BYTES_QUEUE.take(); true; bytes = BYTES_QUEUE.take()) { - if (bytes.length == 0) { - break; - } - int start = 0; - for (int end = 0; end < bytes.length; end++) { - if (bytes[end] == '\n') { - byte[] newBytes = new byte[end - start]; - System.arraycopy(bytes, start, newBytes, 0, newBytes.length); - int semicolon = newBytes.length - 4; - for (; semicolon >= 0; semicolon--) { - if (newBytes[semicolon] == ';') { - break; - } - } - MeasurementName station = new MeasurementName(newBytes, semicolon); - int value = toInt(newBytes, semicolon + 1); - MeasurementData data = result.get(station); - if (data != null) { - data.merge(value, value, value, 1); - } - else { - result.put(station, new MeasurementData(value)); - } - start = end + 1; - } - } - } - return result; - } - - /** - * Convert the byte array to int. - */ - private static int toInt(byte[] bytes, int start) { - boolean negative = false; - int result = 0; - for (int i = start; i < bytes.length; i++) { - byte b = bytes[i]; - if (b == '-') { - negative = true; - continue; - } - if (b != '.') { - result = result * 10 + (b - '0'); - } - } - return negative ? -result : result; + return STR."\{min / 10.0}/\{Math.round((double) sum / count) / 10.0}/\{max / 10.0}"; } } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_EduardoSaverin.java b/src/main/java/dev/morling/onebrc/CalculateAverage_EduardoSaverin.java new file mode 100644 index 000000000..e33e4cf04 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_EduardoSaverin.java @@ -0,0 +1,319 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import sun.misc.Unsafe; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.reflect.Field; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import static java.nio.file.StandardOpenOption.READ; + +public class CalculateAverage_EduardoSaverin { + private static final Path FILE = Path.of("./measurements.txt"); + private static final int NO_OF_THREADS = Runtime.getRuntime().availableProcessors(); + private static final Unsafe UNSAFE = initUnsafe(); + private static final int FNV_32_OFFSET = 0x811c9dc5; + private static final int FNV_32_PRIME = 0x01000193; + private static final Map resultRowMap = new HashMap<>(); + private static final Lock lock = new ReentrantLock(); + + private static Unsafe initUnsafe() { + try { + Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + return (Unsafe) theUnsafe.get(Unsafe.class); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + public record Chunk(long start, long length) { + } + + record MapEntry(String key, ResultRow row) { + } + + private static final class ResultRow { + private double min; + private double max; + private double sum; + private int count; + + private ResultRow(double v) { + this.min = v; + this.max = v; + this.sum = v; + this.count = 1; + } + + public String toString() { + return round(min) + "/" + round(sum / count) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value) / 10.0; + } + } + + /** + * 0xA - Represents New Line + * + * @param fileChannel + * @return + * @throws IOException + */ + static List getChunks(FileChannel fileChannel) throws IOException { + int numThreads = 1; + if (fileChannel.size() > 64000) { + numThreads = NO_OF_THREADS; + } + final long fileBytes = fileChannel.size(); + final long chunkSize = fileBytes / numThreads; + final List chunks = new ArrayList<>(numThreads); + final long mappedAddress = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileBytes, Arena.global()).address(); + long chunkStart = 0; + // Ensures that the chunk size does not exceed the remaining bytes in the file. + long chunkLength = Math.min(fileBytes - chunkStart - 1, chunkSize); + while (chunkStart < fileBytes) { + MappedByteBuffer mappedByteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, chunkStart + chunkLength, + Math.min(Math.min(fileBytes - chunkStart - chunkLength, chunkLength), 100)); + // Until \n found + while (mappedByteBuffer.get() != 0xA) { + chunkLength++; + } + chunks.add(new Chunk(mappedAddress + chunkStart, chunkLength + 1)); + chunkStart += (chunkLength + 1); + chunkLength = Math.min(fileBytes - chunkStart - 1, chunkSize); + } + return chunks; + } + + static class SimplerHashMap { + final int MAPSIZE = 65536; + final ResultRow[] slots = new ResultRow[MAPSIZE]; + final byte[][] keys = new byte[MAPSIZE][]; + + public void putOrMerge(final byte[] key, final short length, final int hash, final int temp) { + int slot = hash; + ResultRow slotValue; + + // Doing Linear Probing if Collision + while ((slotValue = slots[slot]) != null && (keys[slot].length != length || !unsafeEquals(keys[slot], key, length))) { + slot++; + } + + // Existing Key + if (slotValue != null) { + slotValue.min = Math.min(slotValue.min, temp); + slotValue.max = Math.max(slotValue.max, temp); + slotValue.sum += temp; + slotValue.count++; + return; + } + + // New Key + slots[slot] = new ResultRow(temp); + byte[] bytes = new byte[length]; + System.arraycopy(key, 0, bytes, 0, length); + keys[slot] = bytes; + } + + static boolean unsafeEquals(final byte[] a, final byte[] b, final short length) { + // byte by byte comparisons are slow, so do as big chunks as possible + final int baseOffset = Unsafe.ARRAY_BYTE_BASE_OFFSET; + + short i = 0; + // Double + for (; i < (length & -8); i += 8) { + if (UNSAFE.getDouble(a, i + baseOffset) != UNSAFE.getDouble(b, i + baseOffset)) { + return false; + } + } + + // Long + for (; i < (length & -8); i += 8) { + if (UNSAFE.getLong(a, i + baseOffset) != UNSAFE.getLong(b, i + baseOffset)) { + return false; + } + } + if (i == length) { + return true; + } + // Int + for (; i < (length - i & -4); i += 4) { + if (UNSAFE.getInt(a, i + baseOffset) != UNSAFE.getInt(b, i + baseOffset)) { + return false; + } + } + if (i == length) { + return true; + } + // Short + for (; i < (length - i & -2); i += 2) { + if (UNSAFE.getShort(a, i + baseOffset) != UNSAFE.getShort(b, i + baseOffset)) { + return false; + } + } + if (i == length) { + return true; + } + // Byte + for (; i < (length - i); i++) { + if (UNSAFE.getByte(a, i + baseOffset) != UNSAFE.getByte(b, i + baseOffset)) { + return false; + } + } + + return true; + } + + // Get all pairs + public List getAll() { + final List result = new ArrayList<>(slots.length); + for (int i = 0; i < slots.length; i++) { + ResultRow slotValue = slots[i]; + if (slotValue != null) { + result.add(new MapEntry(new String(keys[i], StandardCharsets.UTF_8), slotValue)); + } + } + return result; + } + } + + private static class Task implements Runnable { + + private final SimplerHashMap results; + private final Chunk chunk; + + public Task(Chunk chunk) { + this.results = new SimplerHashMap(); + this.chunk = chunk; + } + + @Override + public void run() { + // Max length of any city name + final byte[] nameBytes = new byte[100]; + short nameIndex = 0; + int ot; + int hash = FNV_32_OFFSET; + + long i = chunk.start; + final long cl = chunk.start + chunk.length; + while (i < cl) { + byte c; + // 0x3B is ; + while ((c = UNSAFE.getByte(i++)) != 0x3B) { + nameBytes[nameIndex++] = c; + // FNV-1a hash : https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function + hash ^= c; + hash *= FNV_32_PRIME; + } + + // Temperature just after Semicolon + c = UNSAFE.getByte(i++); + // 0x2D is Minus(-) + // Below you will see -48 which is used to convert from ASCII to Integer, 48 represents 0 in ASCII + if (c == 0x2D) { + // X.X or XX.X + if (UNSAFE.getByte(i + 3) == 0xA) { + ot = (UNSAFE.getByte(i++) - 48) * 10; + } + else { + ot = (UNSAFE.getByte(i++) - 48) * 100; + ot += (UNSAFE.getByte(i++) - 48) * 10; + } + // Now dot + i++; // Skipping Dot + ot += (UNSAFE.getByte(i++) - 48); + // Make Number Negative Since we detected (-) sign + ot = -ot; + } + else { + // X.X or XX.X + if (UNSAFE.getByte(i + 2) == 0xA) { + ot = (c - 48) * 10; + } + else { + ot = (c - 48) * 100; + ot += (UNSAFE.getByte(i++) - 48) * 10; + } + // Now dot + i++; // Skipping Dot + // Number after dot + ot += (UNSAFE.getByte(i++) - 48); + } + // Since Parsed Line, Next thing must be newline + i++; + hash &= 65535; + results.putOrMerge(nameBytes, nameIndex, hash, ot); + // Reset + nameIndex = 0; + hash = FNV_32_OFFSET; + } + List all = results.getAll(); + lock.lock(); + try { + for (MapEntry me : all) { + ResultRow rr; + ResultRow lr = me.row; + if ((rr = resultRowMap.get(me.key)) != null) { + rr.min = Math.min(rr.min, lr.min); + rr.max = Math.max(rr.max, lr.max); + rr.count += lr.count; + rr.sum += lr.sum; + } + else { + resultRowMap.put(me.key, lr); + } + } + } + catch (Exception e) { + e.printStackTrace(); + } + finally { + lock.unlock(); + } + } + } + + public static void main(String[] args) throws IOException, InterruptedException { + FileChannel fileChannel = FileChannel.open(FILE, READ); + List chunks = getChunks(fileChannel); + List threads = new ArrayList<>(); + for (Chunk chunk : chunks) { + Thread thread = new Thread(new Task(chunk)); + thread.setPriority(Thread.MAX_PRIORITY); // Make this thread of highest priority + threads.add(thread); + thread.start(); + } + for (Thread thread : threads) { + thread.join(); + } + System.out.println(new TreeMap<>(resultRowMap)); + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_JaimePolidura.java b/src/main/java/dev/morling/onebrc/CalculateAverage_JaimePolidura.java new file mode 100644 index 000000000..bc9070cf8 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_JaimePolidura.java @@ -0,0 +1,401 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import sun.misc.Unsafe; + +import java.io.RandomAccessFile; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.reflect.Field; +import java.nio.channels.FileChannel; +import java.util.Map; +import java.util.TreeMap; + +public final class CalculateAverage_JaimePolidura { + private static final String FILE = "./measurements.txt"; + private static final Unsafe UNSAFE = initUnsafe(); + private static final long SEMICOLON_PATTERN = 0X3B3B3B3B3B3B3B3BL; + + private static Unsafe initUnsafe() { + try { + Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + return (Unsafe) theUnsafe.get(Unsafe.class); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + public static void main(String[] args) throws Exception { + Worker[] workers = createWorkers(); + + startWorkers(workers); + joinWorkers(workers); + + Map results = mergeWorkersResults(workers); + printResults(results); + } + + private static void joinWorkers(Worker[] workers) throws InterruptedException { + for (int i = 0; i < workers.length; i++) { + workers[i].join(); + } + } + + private static void startWorkers(Worker[] workers) { + for (int i = 0; i < workers.length; i++) { + workers[i].start(); + } + } + + private static Worker[] createWorkers() throws Exception { + FileChannel channel = new RandomAccessFile(FILE, "r").getChannel(); + MemorySegment mmappedFile = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size(), Arena.global()); + + int nWorkers = channel.size() > 1024 * 1024 ? Runtime.getRuntime().availableProcessors() : 1; + Worker[] workers = new Worker[nWorkers]; + long quantityPerWorker = Math.floorDiv(channel.size(), nWorkers); + long quantityLastWorker = quantityPerWorker + (channel.size() % nWorkers); + + for (int i = 0; i < nWorkers; i++) { + boolean isLastWorker = i == nWorkers - 1; + + long startAddr = mmappedFile.address() + quantityPerWorker * i; + long endAddr = startAddr + (isLastWorker ? quantityLastWorker : quantityPerWorker); + workers[i] = new Worker(mmappedFile, channel.size(), startAddr, endAddr); + workers[i].setPriority(Thread.MAX_PRIORITY); + } + + return workers; + } + + private static Map mergeWorkersResults(Worker[] workers) { + Map mergedResults = new TreeMap<>(); + + for (int i = 0; i < workers.length; i++) { + Worker worker = workers[i]; + + for (Result entry : worker.results.entries) { + if (entry != null) { + String name = new String(entry.name, 0, entry.nameLength); + Result alreadyExistingResult = mergedResults.get(name); + if (alreadyExistingResult != null) { + alreadyExistingResult.min = Math.min(alreadyExistingResult.min, entry.min); + alreadyExistingResult.max = Math.max(alreadyExistingResult.max, entry.max); + alreadyExistingResult.count = alreadyExistingResult.count + entry.count; + alreadyExistingResult.sum = alreadyExistingResult.sum + entry.sum; + } + else { + mergedResults.put(name, entry); + } + } + } + } + + return mergedResults; + } + + private static void printResults(Map results) { + StringBuilder stringBuilder = new StringBuilder(results.size() * 32); + stringBuilder.append('{'); + + for (Map.Entry entry : results.entrySet()) { + if (stringBuilder.length() > 1) { + stringBuilder.append(", "); + } + + Result result = entry.getValue(); + stringBuilder.append(entry.getKey()) + .append('=') + .append(round(((double) result.min) / 10.0)) + .append('/') + .append(round((double) result.sum / (result.count * 10))) + .append('/') + .append(round(((double) result.max) / 10.0d)); + + } + + stringBuilder.append('}'); + + System.out.println(stringBuilder); + } + + static class Worker extends Thread { + private final byte[] lastParsedNameBytes = new byte[100]; + private int lastParsedNameLength; + private long lastParsedNameHash; + private int lastParsedTemperature; + + private final SimpleMap results; + private final MemorySegment mmappedFile; + private final long mmappedFileSize; + private long currentAddr; // Will point to beginning of string + private long endAddr; // Will point to \n + + public Worker(MemorySegment mmappedFile, long mmappedFileSize, long startAddr, long endAddr) { + super("Worker[" + startAddr + ", " + endAddr + "]"); + + this.mmappedFileSize = mmappedFileSize; + this.mmappedFile = mmappedFile; + this.currentAddr = startAddr; + this.endAddr = endAddr; + + this.results = new SimpleMap(roundUpToPowerOfTwo(1 << 16)); // 2^16 + } + + @Override + public void run() { + adjustStartAddr(); + adjustEndAddr(); + + if (this.currentAddr >= endAddr) { + return; + } + + while (currentAddr < endAddr) { + parseName(); + parseTemperature(); + + this.currentAddr++; // We don't want it to point to \n + + results.put(this.lastParsedNameHash, this.lastParsedNameBytes, this.lastParsedNameLength, this.lastParsedTemperature); + } + } + + // Idea from Quan Anh Mai's implementation + private void parseTemperature() { + long numberWord = UNSAFE.getLong(currentAddr); + + // The 4th binary digit of the ascii (Starting from left) of a digit is 1 while '.' is 0 + int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); + // 28 = 4 + 8 * 3 (4 bytes is the number of tail zeros in the byte of decimalPos) + // xxxn.nn- shift: 28 - 28 = 0 + // xxxxxn.n shift: 28 - 12 = 16 + // xxxxn.nn shift: 28 - 20 = 8 + int shift = 28 - decimalSepPos; + + // Negative in ASCII: 00101101 2D. In ascii every digit starts with hex digit 3 + // So in order to know if a number is positive, we simpy need the first bit of the 2º half + // If signed is 0 the number is positive. If it is negative signed will be -1. + long signed = (~numberWord << 59) >> 63; + + // If signed is 0 (positive), designMask will be 0xFFFFFFFFFFFFFFFF (-256) + // If signed is -1, all 1s (negative), designMask will be 0xFFFFFFFFFFFFFF00 (-1) + long designMask = ~(signed & 0xFF); + + // Align the number to a fixed position + // (x represents any non-related character, _ represents 0x00, n represents the actual digit and - negative) + // xxxn.nn- -> xxxn.nn- + // xxxxxn.n -> xxxn.n__ + // xxxxn.nn -> xxxn.nn_ + long numberAligned = (numberWord & designMask) << shift; + + // We convert ascii representation to number value + long numberConvertedFromAscii = numberAligned & 0x0F000F0F00L; + + // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) + // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = + // 0x000000UU00TTHH00 + + // 0x00UU00TTHH000000 * 10 + + // 0xUU00TTHH00000000 * 100 + // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400 + // This results in our value lies in the bit 32 to 41 of this product + // That was close :) + long absValue = ((numberConvertedFromAscii * 0x640a0001) >>> 32) & 0x3FF; + + long signedValue = (absValue ^ signed) - signed; + + this.currentAddr += (((decimalSepPos - 4) / 8) + 2); + + this.lastParsedTemperature = (int) signedValue; + } + + // I first saw this idea in Artsiom Korzun's implementation + private void parseName() { + this.lastParsedNameHash = 0; + + long totalWordHash = 0; + int totalWordLength = 0; + + for (;;) { + long actualWord = UNSAFE.getLong(currentAddr + totalWordLength); + long hasSemicolon = hasByte(actualWord, SEMICOLON_PATTERN); + + if (hasSemicolon != 0) { + int actualLength = Long.numberOfTrailingZeros(hasSemicolon) >> 3; + if (actualLength == 0) { + actualWord = 0; + } + + actualWord = mask(actualWord, actualLength); + + UNSAFE.putLong(this.lastParsedNameBytes, Unsafe.ARRAY_BYTE_BASE_OFFSET + totalWordLength, actualWord); + + totalWordHash ^= actualWord; + totalWordLength += actualLength; + + this.lastParsedNameLength = totalWordLength; + this.lastParsedNameHash = totalWordHash; + this.currentAddr += totalWordLength + 1; // +1 Because we don't want to point to ';' + + break; + } + else { + UNSAFE.putLong(this.lastParsedNameBytes, Unsafe.ARRAY_BYTE_BASE_OFFSET + totalWordLength, actualWord); + + totalWordLength += 8; + totalWordHash ^= actualWord; + } + } + } + + // Removes "garbage" of a word byte + private long mask(long word, int length) { + int shift = (8 - length) * 8; + return (word << shift) >> shift; + } + + private long hasByte(long word, long pattern) { + long patternMatch = word ^ pattern; + return (patternMatch - 0x0101010101010101L) & (~patternMatch & 0x8080808080808080L); + } + + private void adjustStartAddr() { + if (currentAddr == this.mmappedFile.address()) { + return; + } + + while (UNSAFE.getByte(currentAddr) != '\n' && currentAddr != endAddr) { + currentAddr++; + } + + currentAddr++; // We want it to point to the first character instead of \n + } + + private void adjustEndAddr() { + long endAddressMmappedFile = mmappedFile.address() + mmappedFileSize; + if (endAddr >= endAddressMmappedFile) { + return; + } + + while (UNSAFE.getByte(endAddr) != '\n' && endAddr != endAddressMmappedFile) { + endAddr++; + } + } + } + + static class SimpleMap { + private final Result[] entries; + private final long size; + + public SimpleMap(int size) { + this.entries = new Result[size]; + this.size = size; + } + + public void put(long hashToPut, byte[] nameToPut, int nameLength, int valueToPut) { + int index = toIndex(hashToPut); + + for (;;) { + Result actualEntry = entries[index]; + + if (actualEntry == null) { + byte[] nameToPutCopy = new byte[nameLength]; + UNSAFE.copyMemory(nameToPut, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameToPutCopy, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength); + + entries[index] = new Result(hashToPut, nameToPutCopy, nameLength, valueToPut, + valueToPut, valueToPut, 1); + return; + } + if (actualEntry.isSameName(nameToPut, nameLength)) { + actualEntry.min = Math.min(actualEntry.min, valueToPut); + actualEntry.max = Math.max(actualEntry.max, valueToPut); + actualEntry.count++; + actualEntry.sum = actualEntry.sum + valueToPut; + return; + } + + index = toIndex(index + 31); + } + } + + private int toIndex(long hash) { + return (int) (((hash >> 32) ^ ((int) hash)) & (this.size - 1)); + } + } + + static class Result { + public byte[] name; + public int nameLength; + public int max; + public int min; + public int sum; + public int count; + public long hash; + + public Result(long hash, byte[] name, int nameLength, int max, int min, int sum, int occ) { + this.nameLength = nameLength; + this.count = occ; + this.hash = hash; + this.name = name; + this.max = max; + this.min = min; + this.sum = sum; + } + + public boolean isSameName(byte[] otherNameBytes, int otherNameLength) { + return this.nameLength == otherNameLength && isSameNameBytes(otherNameBytes); + } + + private boolean isSameNameBytes(byte[] otherNameBytes) { + for (int i = 0; i < this.nameLength; i += 8) { + long thisNameBytesAsLong = UNSAFE.getLong(this.name, Unsafe.ARRAY_BYTE_BASE_OFFSET + i); + long otherNameBytesAsLong = UNSAFE.getLong(otherNameBytes, Unsafe.ARRAY_BYTE_BASE_OFFSET + i); + + int isPositiveAsInt = (((8 - nameLength + i) >> 31) & 1) ^ 0x01; + int shift = ((8 - nameLength + i) * isPositiveAsInt) * 8; + otherNameBytesAsLong = (otherNameBytesAsLong << shift) >>> shift; + + if (thisNameBytesAsLong != otherNameBytesAsLong) { + return false; + } + } + + return true; + } + } + + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + private static int roundUpToPowerOfTwo(int number) { + if (number <= 0) { + return 1; + } + + number--; + number |= number >> 1; + number |= number >> 2; + number |= number >> 4; + number |= number >> 8; + number |= number >> 16; + + return number + 1; + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_JamalMulla.java b/src/main/java/dev/morling/onebrc/CalculateAverage_JamalMulla.java index 770588556..7daf1997f 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_JamalMulla.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_JamalMulla.java @@ -21,21 +21,32 @@ import java.io.RandomAccessFile; import java.lang.foreign.Arena; import java.lang.reflect.Field; -import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; -import java.util.*; +import java.util.Map; +import java.util.TreeMap; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; public class CalculateAverage_JamalMulla { - private static final Map global = new HashMap<>(); + private static final long ALL_SEMIS = 0x3B3B3B3B3B3B3B3BL; + private static final Map global = new TreeMap<>(); private static final String FILE = "./measurements.txt"; private static final Unsafe UNSAFE = initUnsafe(); private static final Lock lock = new ReentrantLock(); - private static final int FNV_32_INIT = 0x811c9dc5; - private static final int FNV_32_PRIME = 0x01000193; + private static final long FXSEED = 0x517cc1b727220a95L; + + private static final long[] masks = { + 0x0, + 0x00000000000000FFL, + 0x000000000000FFFFL, + 0x0000000000FFFFFFL, + 0x00000000FFFFFFFFL, + 0x000000FFFFFFFFFFL, + 0x0000FFFFFFFFFFFFL, + 0x00FFFFFFFFFFFFFFL + }; private static Unsafe initUnsafe() { try { @@ -53,12 +64,16 @@ private static final class ResultRow { private int max; private long sum; private int count; + private final long keyStart; + private final byte keyLength; - private ResultRow(int v) { + private ResultRow(int v, final long keyStart, final byte keyLength) { this.min = v; this.max = v; this.sum = v; this.count = 1; + this.keyStart = keyStart; + this.keyLength = keyLength; } public String toString() { @@ -68,236 +83,197 @@ public String toString() { private double round(double value) { return Math.round(value) / 10.0; } + } private record Chunk(Long start, Long length) { } - static List getChunks(int numThreads, FileChannel channel) throws IOException { + static Chunk[] getChunks(int numThreads, FileChannel channel) throws IOException { // get all chunk boundaries final long filebytes = channel.size(); final long roughChunkSize = filebytes / numThreads; - final List chunks = new ArrayList<>(numThreads); + final Chunk[] chunks = new Chunk[numThreads]; final long mappedAddress = channel.map(FileChannel.MapMode.READ_ONLY, 0, filebytes, Arena.global()).address(); long chunkStart = 0; long chunkLength = Math.min(filebytes - chunkStart - 1, roughChunkSize); + int i = 0; while (chunkStart < filebytes) { - // unlikely we need to read more than this many bytes to find the next newline - MappedByteBuffer mbb = channel.map(FileChannel.MapMode.READ_ONLY, chunkStart + chunkLength, - Math.min(Math.min(filebytes - chunkStart - chunkLength, chunkLength), 100)); - - while (mbb.get() != 0xA /* \n */) { + while (UNSAFE.getByte(mappedAddress + chunkStart + chunkLength) != 0xA /* \n */) { chunkLength++; } - chunks.add(new Chunk(mappedAddress + chunkStart, chunkLength + 1)); + chunks[i++] = new Chunk(mappedAddress + chunkStart, chunkLength + 1); // to skip the nl in the next chunk chunkStart += chunkLength + 1; chunkLength = Math.min(filebytes - chunkStart - 1, roughChunkSize); } + return chunks; } - private static class CalculateTask implements Runnable { + private static void run(Chunk chunk) { - private final SimplerHashMap results; - private final Chunk chunk; + // can't have more than 10000 unique keys but want to match max hash + final int MAPSIZE = 65536; + final ResultRow[] slots = new ResultRow[MAPSIZE]; - public CalculateTask(Chunk chunk) { - this.results = new SimplerHashMap(); - this.chunk = chunk; - } + byte nameLength; + int temp; + long hash; + + long i = chunk.start; + final long cl = chunk.start + chunk.length; + long word; + long hs; + long start; + byte c; + int slot; + long n; + ResultRow slotValue; + + while (i < cl) { + start = i; + hash = 0; + + word = UNSAFE.getLong(i); + + while (true) { + n = word ^ ALL_SEMIS; + hs = (n - 0x0101010101010101L) & (~n & 0x8080808080808080L); + if (hs != 0) + break; + hash = (hash ^ word) * FXSEED; + i += 8; + word = UNSAFE.getLong(i); + } - @Override - public void run() { - // no names bigger than this - final byte[] nameBytes = new byte[100]; - short nameIndex = 0; - int ot; - // fnv hash - int hash = FNV_32_INIT; - - long i = chunk.start; - final long cl = chunk.start + chunk.length; - while (i < cl) { - byte c; - while ((c = UNSAFE.getByte(i++)) != 0x3B /* semi-colon */) { - nameBytes[nameIndex++] = c; - hash ^= c; - hash *= FNV_32_PRIME; + i += Long.numberOfTrailingZeros(hs) >> 3; + + // hash of what's left ((hs >>> 7) - 1) masks off the bytes from word that are before the semicolon + hash = (hash ^ word & (hs >>> 7) - 1) * FXSEED; + nameLength = (byte) (i++ - start); + + // temperature value follows + c = UNSAFE.getByte(i++); + // we know the val has to be between -99.9 and 99.8 + // always with a single fractional digit + // represented as a byte array of either 4 or 5 characters + if (c != 0x2D /* minus sign */) { + // could be either n.x or nn.x + if (UNSAFE.getByte(i + 2) == 0xA) { + temp = (c - 48) * 10; // char 1 } - - // temperature value follows - c = UNSAFE.getByte(i++); - // we know the val has to be between -99.9 and 99.8 - // always with a single fractional digit - // represented as a byte array of either 4 or 5 characters - if (c == 0x2D /* minus sign */) { - // could be either n.x or nn.x - if (UNSAFE.getByte(i + 3) == 0xA) { - ot = (UNSAFE.getByte(i++) - 48) * 10; // char 1 - } - else { - ot = (UNSAFE.getByte(i++) - 48) * 100; // char 1 - ot += (UNSAFE.getByte(i++) - 48) * 10; // char 2 - } - i++; // skip dot - ot += (UNSAFE.getByte(i++) - 48); // char 2 - ot = -ot; + else { + temp = (c - 48) * 100; // char 1 + temp += (UNSAFE.getByte(i++) - 48) * 10; // char 2 + } + temp += (UNSAFE.getByte(++i) - 48); // char 3 + } + else { + // could be either n.x or nn.x + if (UNSAFE.getByte(i + 3) == 0xA) { + temp = (UNSAFE.getByte(i) - 48) * 10; // char 1 + i += 2; } else { - // could be either n.x or nn.x - if (UNSAFE.getByte(i + 2) == 0xA) { - ot = (c - 48) * 10; // char 1 - } - else { - ot = (c - 48) * 100; // char 1 - ot += (UNSAFE.getByte(i++) - 48) * 10; // char 2 - } - i++; // skip dot - ot += (UNSAFE.getByte(i++) - 48); // char 3 + temp = (UNSAFE.getByte(i) - 48) * 100; // char 1 + temp += (UNSAFE.getByte(i + 1) - 48) * 10; // char 2 + i += 3; + } + temp += (UNSAFE.getByte(i) - 48); // char 2 + temp = -temp; + } + i += 2; + + // xor folding + slot = (int) (hash ^ hash >> 32) & 65535; + + // Linear probe for open slot + while ((slotValue = slots[slot]) != null && (slotValue.keyLength != nameLength || !unsafeEquals(slotValue.keyStart, start, nameLength))) { + slot = (slot + 1) % MAPSIZE; + } + + // existing + if (slotValue != null) { + slotValue.sum += temp; + slotValue.count++; + if (temp > slotValue.max) { + slotValue.max = temp; + continue; } + if (temp < slotValue.min) + slotValue.min = temp; - i++;// nl - hash &= 65535; - results.putOrMerge(nameBytes, nameIndex, hash, ot); - // reset - nameIndex = 0; - hash = 0x811c9dc5; } + else { + // new value + slots[slot] = new ResultRow(temp, start, nameLength); + } + } - // merge results with overall results - List all = results.getAll(); - lock.lock(); - try { - for (MapEntry me : all) { - ResultRow rr; - ResultRow lr = me.row; - if ((rr = global.get(me.key)) != null) { - rr.min = Math.min(rr.min, lr.min); - rr.max = Math.max(rr.max, lr.max); - rr.count += lr.count; - rr.sum += lr.sum; + // merge results with overall results + ResultRow rr; + String key; + byte[] bytes; + lock.lock(); + try { + for (ResultRow resultRow : slots) { + if (resultRow != null) { + bytes = new byte[resultRow.keyLength]; + // copy the name bytes + UNSAFE.copyMemory(null, resultRow.keyStart, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, resultRow.keyLength); + key = new String(bytes, StandardCharsets.UTF_8); + if ((rr = global.get(key)) != null) { + rr.min = Math.min(rr.min, resultRow.min); + rr.max = Math.max(rr.max, resultRow.max); + rr.count += resultRow.count; + rr.sum += resultRow.sum; } else { - global.put(me.key, lr); + global.put(key, resultRow); } } } - finally { - lock.unlock(); + } + finally { + lock.unlock(); + } + + } + + static boolean unsafeEquals(final long a_address, final long b_address, final byte b_length) { + // byte by byte comparisons are slow, so do as big chunks as possible + byte i = 0; + for (; i < (b_length & -8); i += 8) { + if (UNSAFE.getLong(a_address + i) != UNSAFE.getLong(b_address + i)) { + return false; } } + if (i == b_length) + return true; + return (UNSAFE.getLong(a_address + i) & masks[b_length - i]) == (UNSAFE.getLong(b_address + i) & masks[b_length - i]); } public static void main(String[] args) throws IOException, InterruptedException { - FileChannel channel = new RandomAccessFile(FILE, "r").getChannel(); int numThreads = 1; + FileChannel channel = new RandomAccessFile(FILE, "r").getChannel(); if (channel.size() > 64000) { numThreads = Runtime.getRuntime().availableProcessors(); } - List chunks = getChunks(numThreads, channel); - List threads = new ArrayList<>(); - for (Chunk chunk : chunks) { - Thread thread = new Thread(new CalculateTask(chunk)); + Chunk[] chunks = getChunks(numThreads, channel); + Thread[] threads = new Thread[chunks.length]; + for (int i = 0; i < chunks.length; i++) { + int finalI = i; + Thread thread = new Thread(() -> run(chunks[finalI])); thread.setPriority(Thread.MAX_PRIORITY); thread.start(); - threads.add(thread); + threads[i] = thread; } for (Thread t : threads) { t.join(); } - // create treemap just to sort - System.out.println(new TreeMap<>(global)); + System.out.println(global); + channel.close(); } - - record MapEntry(String key, ResultRow row) { - } - - static class SimplerHashMap { - // can't have more than 10000 unique keys but want to match max hash - final int MAPSIZE = 65536; - final ResultRow[] slots = new ResultRow[MAPSIZE]; - final byte[][] keys = new byte[MAPSIZE][]; - - public void putOrMerge(final byte[] key, final short length, final int hash, final int temp) { - int slot = hash; - ResultRow slotValue; - - // Linear probe for open slot - while ((slotValue = slots[slot]) != null && (keys[slot].length != length || !unsafeEquals(keys[slot], key, length))) { - slot++; - } - - // existing - if (slotValue != null) { - slotValue.min = Math.min(slotValue.min, temp); - slotValue.max = Math.max(slotValue.max, temp); - slotValue.sum += temp; - slotValue.count++; - return; - } - - // new value - slots[slot] = new ResultRow(temp); - byte[] bytes = new byte[length]; - System.arraycopy(key, 0, bytes, 0, length); - keys[slot] = bytes; - } - - static boolean unsafeEquals(final byte[] a, final byte[] b, final short length) { - // byte by byte comparisons are slow, so do as big chunks as possible - final int baseOffset = Unsafe.ARRAY_BYTE_BASE_OFFSET; - - short i = 0; - // round down to nearest power of 8 - for (; i < (length & -8); i += 8) { - if (UNSAFE.getLong(a, i + baseOffset) != UNSAFE.getLong(b, i + baseOffset)) { - return false; - } - } - if (i == length) { - return true; - } - // leftover ints - for (; i < (length - i & -4); i += 4) { - if (UNSAFE.getInt(a, i + baseOffset) != UNSAFE.getInt(b, i + baseOffset)) { - return false; - } - } - if (i == length) { - return true; - } - // leftover shorts - for (; i < (length - i & -2); i += 2) { - if (UNSAFE.getShort(a, i + baseOffset) != UNSAFE.getShort(b, i + baseOffset)) { - return false; - } - } - if (i == length) { - return true; - } - // leftover bytes - for (; i < (length - i); i++) { - if (UNSAFE.getByte(a, i + baseOffset) != UNSAFE.getByte(b, i + baseOffset)) { - return false; - } - } - - return true; - } - - // Get all pairs - public List getAll() { - final List result = new ArrayList<>(slots.length); - for (int i = 0; i < slots.length; i++) { - ResultRow slotValue = slots[i]; - if (slotValue != null) { - result.add(new MapEntry(new String(keys[i], StandardCharsets.UTF_8), slotValue)); - } - } - return result; - } - } - } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_JesseVanRooy.java b/src/main/java/dev/morling/onebrc/CalculateAverage_JesseVanRooy.java new file mode 100644 index 000000000..ba0475e35 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_JesseVanRooy.java @@ -0,0 +1,256 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import sun.misc.Unsafe; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.lang.reflect.Field; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.stream.IntStream; + +//Disclaimer: The idea from the segmentation into #core amount of chunks came from previously submitted solutions. +public class CalculateAverage_JesseVanRooy { + + private static final String FILE = "./measurements.txt"; + + private static final ValueLayout.OfByte DATA_LAYOUT = ValueLayout.JAVA_BYTE; + + private static final Unsafe UNSAFE = initUnsafe(); + + private static Unsafe initUnsafe() { + try { + Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + return (Unsafe) theUnsafe.get(Unsafe.class); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + public static class Result { + long nameStart; + long nameSize; + String name; + int min; + int max; + long sum; + int count; + + double min() { + return min / 10.0; + } + + double max() { + return max / 10.0; + } + + double mean() { + return (sum / 10.0) / count; + } + } + + public static class ThreadResult { + Result[] results; + } + + static final int MAP_SIZE = 16384; + static final int MAP_MASK = MAP_SIZE - 1; + static final int VALUE_CAPACITY = 10000; + + static void process(MemorySegment memorySegment, ThreadResult threadResult) { + // initialize hash table + final int[] keys = new int[MAP_SIZE]; + Arrays.fill(keys, -1); + final Result[] values = new Result[MAP_SIZE]; + + // pre-create the result objects + final Result[] preCreatedResults = new Result[VALUE_CAPACITY]; + int usedPreCreatedResults = 0; + for (int i = 0; i < VALUE_CAPACITY; i++) + preCreatedResults[i] = new Result(); + + // load address info + final long size = memorySegment.byteSize(); + final long address = memorySegment.address(); + final long end = address + size; + + for (long index = address; index < end;) { + final long nameStart = index; + + byte next = UNSAFE.getByte(index); + + // hash the city name + int hash = 0; + while (next != ';') { + hash = (hash * 33) + next; + + index++; + next = UNSAFE.getByte(index); + } + + final long nameEnd = index; + + // skip the separator + index++; + next = UNSAFE.getByte(index); + + // check for negative + boolean negative = next == '-'; + if (negative) { + index++; + next = UNSAFE.getByte(index); + } + + // count the temperature + int temperature = next - '0'; + index++; + next = UNSAFE.getByte(index); + + if (next != '.') { + temperature = (temperature * 10) + (next - '0'); + index++; + } + + // skip the . + index++; + next = UNSAFE.getByte(index); + + // add the last digit to temperature + temperature = (temperature * 10) + (next - '0'); + index++; + + // negate the temperature if needed + if (negative) { + temperature = -temperature; + } + + // skip the newline + index++; + + // insert into map + for (int i = hash; i < hash + MAP_SIZE; i++) { + int mapIndex = i & MAP_MASK; + if (keys[mapIndex] == -1) { + Result result = preCreatedResults[usedPreCreatedResults++]; + result.nameStart = nameStart; + result.nameSize = nameEnd - nameStart; + result.min = temperature; + result.max = temperature; + result.sum = temperature; + result.count = 1; + + keys[mapIndex] = hash; + values[mapIndex] = result; + break; + } + if (keys[mapIndex] == hash) { + Result result = values[mapIndex]; + result.min = Math.min(result.min, temperature); + result.max = Math.max(result.max, temperature); + result.sum += temperature; + result.count++; + break; + } + } + } + + threadResult.results = Arrays.stream(values).filter(Objects::nonNull).toArray(Result[]::new); + + for (Result result : threadResult.results) { + result.name = new String(memorySegment.asSlice(result.nameStart - address, result.nameSize).toArray(DATA_LAYOUT)); + } + } + + public static void main(String[] args) throws IOException, InterruptedException { + int numberOfChunks = Runtime.getRuntime().availableProcessors(); + + try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { + + long fileSize = fileChannel.size(); + MemorySegment allData = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()); + + long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks; + long[] segmentBounds = new long[numberOfChunks + 1]; + + segmentBounds[0] = 0; + for (int i = 1; i < numberOfChunks; i++) { + long chunkAddress = i * segmentSize; + while (chunkAddress < fileSize && allData.getAtIndex(DATA_LAYOUT, chunkAddress++) != '\n') { + } + segmentBounds[i] = Math.min(chunkAddress, fileSize); + } + segmentBounds[numberOfChunks] = fileSize; + + ThreadResult[] threadResults = IntStream.range(0, numberOfChunks) + .parallel() + .mapToObj(i -> { + long size = segmentBounds[i + 1] - segmentBounds[i]; + long offset = segmentBounds[i]; + MemorySegment segment = allData.asSlice(offset, size); + ThreadResult result = new ThreadResult(); + process(segment, result); + return result; + }) + .toArray(ThreadResult[]::new); + + HashMap combinedResults = new HashMap<>(1024); + + for (int i = 0; i < numberOfChunks; i++) { + for (Result result : threadResults[i].results) { + if (!combinedResults.containsKey(result.name)) { + Result newResult = new Result(); + newResult.name = result.name; + newResult.min = result.min; + newResult.max = result.max; + newResult.sum = result.sum; + newResult.count = result.count; + combinedResults.put(result.name, newResult); + } + else { + Result existingResult = combinedResults.get(result.name); + existingResult.min = Math.min(existingResult.min, result.min); + existingResult.max = Math.max(existingResult.max, result.max); + existingResult.sum += result.sum; + existingResult.count += result.count; + } + } + } + + Result[] sortedResults = combinedResults.values().toArray(Result[]::new); + Arrays.sort(sortedResults, Comparator.comparing(result -> result.name)); + + System.out.print("{"); + + for (int i = 0; i < sortedResults.length; i++) { + Result sortedResult = sortedResults[i]; + if (i != 0) { + System.out.print(", "); + } + System.out.printf(Locale.US, "%s=%.1f/%.1f/%.1f", sortedResult.name, sortedResult.min(), sortedResult.mean(), sortedResult.max()); + } + + System.out.printf("}\n"); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_Judekeyser.java b/src/main/java/dev/morling/onebrc/CalculateAverage_Judekeyser.java new file mode 100644 index 000000000..40bcf08e0 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_Judekeyser.java @@ -0,0 +1,414 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorSpecies; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.io.UncheckedIOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import static java.lang.foreign.ValueLayout.OfByte.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.OfByte.JAVA_INT_UNALIGNED; + +public class CalculateAverage_Judekeyser { + private static final String FILE = "./measurements.txt"; + private static final int chunkSize = (1 << 7) << 12; // This can't go beyond 2^21, because otherwise we might exceed int capacity + + private static final int numberOfIOWorkers = 1 << 8; // We are going to need (numberOfIOWorkers-1) * chunkSize capacity + private static final int numberOfParallelWorkers = Runtime.getRuntime().availableProcessors() - 1; + + private static final VectorSpecies SPECIES = ByteVector.SPECIES_PREFERRED; + + public static void main(String[] args) throws Exception { + class SimpleStatistics { + int min, max, sum, count; + SimpleStatistics() { + min = Integer.MAX_VALUE; + max = Integer.MIN_VALUE; + sum = 0; + count = 0; + } + + void accept(int value) { + min = Math.min(min, value); + max = Math.max(max, value); + sum += value; + count++; + } + } + class Statistics { + double min, max, avg; + long count; + Statistics(SimpleStatistics simple) { + min = simple.min/10.; + max = simple.max/10.; + avg = simple.sum/10./simple.count; + count = simple.count; + } + + void accept(SimpleStatistics simple) { + min = Math.min(min, simple.min/10.); + max = Math.max(max, simple.max/10.); + var nextCount = count + simple.count; + avg = (avg * count + simple.sum/10.)/nextCount; + count = nextCount; + } + + static final DecimalFormat format; + static { + var decimalFormatSymbols = DecimalFormatSymbols.getInstance(); + decimalFormatSymbols.setDecimalSeparator('.'); + format = new DecimalFormat("#0.0", decimalFormatSymbols); + } + @Override + public String toString() { + return STR."\{format.format(round(min))}/\{format.format(round(avg))}/\{format.format(round(max))}"; + } + + static double round(double d) { + return Math.round(d*10.)/10.; + } + } + class Name { + final int[] data; + final int hash; + Name(int[] data) { + this.data = data; + { + var hash = 0; + for (var d : data) { + hash = 31 * hash + d; + } + this.hash = hash; + } + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public boolean equals(Object obj) { + if(obj == this) return true; + else if(obj instanceof Name name && name.data.length == data.length) { + int size = 0; + while(size < data.length) { + if(data[size] != name.data[size]) { + return false; + } else size++; + } + return true; + } else return false; + } + + @Override + public String toString() { + var bdata = new byte[data.length * 4]; + int j = 0; + for(int i = 0;i < data.length; i++) { + bdata[j++] = (byte)((data[i] >>> 0) & 255); + bdata[j++] = (byte)((data[i] >>> 8) & 255); + bdata[j++] = (byte)((data[i] >>> 16) & 255); + bdata[j++] = (byte)((data[i] >>> 24) & 255); + } + while(bdata[--j] == 0); + return new String(bdata, 0, j+1, StandardCharsets.UTF_8); + } + } + + record Line(Name name, int value) {} + + var results = new HashMap(); + try(var file = new RandomAccessFile(Paths.get(FILE).toFile(), "r")) { + class Ls implements Iterator { + final int M = chunkSize; + final Arena arena = Arena.ofShared(); + final long length; + + long offset; + + Ls() throws IOException { + offset = 0L; + length = file.length(); + } + + @Override + public MemorySegment next() { + MemorySegment memorySegment; + try { + memorySegment = file.getChannel().map( + FileChannel.MapMode.READ_ONLY, + offset, Math.min(M + 128L, file.getChannel().size() - offset), + arena + ); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + + var size = M; + if (offset + M < length) { + b: + { + for (int N = 0; N < 128; N++) { + var b = memorySegment.get(JAVA_BYTE, size); + size += 1; + if (b == '\n') { + break b; + } + } + assert false : "Lines are smaller than 128 bytes"; + } + offset += size; + } else { + size = (int) (length - offset); + offset = length; + } + + return memorySegment.asSlice(0, size); + } + + @Override + public boolean hasNext() { + return offset < length; + } + } + + class It implements Iterator { + int offset; + final int length; + final MemorySegment memorySegment; + final ByteOrder endian; + + It(MemorySegment memorySegment) { + offset = 0; + endian = ByteOrder.nativeOrder(); + this.memorySegment = memorySegment; + length = (int) memorySegment.byteSize(); + assert '\n' == memorySegment.get(JAVA_BYTE, length - 1); + } + + @Override + public boolean hasNext() { + return offset < length; + } + + @Override + public Line next() { + int size; + b: { + /* + * Vectorization does not seem to bring anything interesting. + * This is a bit disappointing. What am I doing wrong? + */ + + size = 0; + + while (offset+size+SPECIES.length() <= length) { + var vector = ByteVector.fromMemorySegment( + SPECIES, memorySegment, + offset+size, endian + ); + var j = vector.eq((byte) '\n').firstTrue(); + if (j < SPECIES.length()) { + assert j >= 0; + size += j; + assert memorySegment.get(JAVA_BYTE, offset+size) == '\n'; + break b; + } else { + assert j == SPECIES.length(); + size += SPECIES.length(); + } + } + { + byte b; + for (; size < 128; size++) { + b = memorySegment.get(JAVA_BYTE, offset+size); + if (b == '\n') break b; + } + assert false : "Lines are smaller than 128 bytes"; + } + assert memorySegment.get(JAVA_BYTE, offset+size) == '\n'; + assert size < 128; + } + + Name name; + int value; + { + long cursor = offset+size - 1L; + { + value = memorySegment.get(JAVA_BYTE, cursor) - '0'; + value += (memorySegment.get(JAVA_BYTE, cursor-2L) - '0') * 10; + cursor -= 3L; + if (memorySegment.get(JAVA_BYTE, cursor) == '-') { + value *= -1; + cursor -= 1L; + } else if (memorySegment.get(JAVA_BYTE, cursor) != ';') { + value += (memorySegment.get(JAVA_BYTE, cursor) - '0') * 100; + cursor -= 1L; + if (memorySegment.get(JAVA_BYTE, cursor) == '-') { + value *= -1; + cursor -= 1L; + } + } + } + //var data = memorySegment.asSlice(offset, cursor-offset).toArray(JAVA_BYTE); + //System.arraycopy(chunk, 0, data, 0, data.length); + //assert ';' != data[data.length - 1]; + //name = new Name(data); + { + int mod4StringSize = ((int)(cursor-offset+3))/4 * 4; + var data = memorySegment.asSlice(offset, mod4StringSize).toArray(JAVA_INT_UNALIGNED); + switch(((int)(cursor - offset)) % 4) { + case 0: break; + case 1: { + data[data.length - 1] &= 255; + } break; + case 2: { + data[data.length - 1] &= 65535; + } break; + case 3: { + data[data.length - 1] &= 16777215; + } break; + } + name = new Name(data); + } + } + offset += size + 1; + return new Line(name, value); + } + } + + record Pair(MemorySegment segment, Map simple) { + Pair(MemorySegment segment) { + this(segment, apply(segment)); + } + + private static Map apply(MemorySegment memorySegment) { + try { + return call(memorySegment); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private static Map call(MemorySegment memorySegment) throws IOException { + var it = new It(memorySegment); + var simple = new HashMap(); + while (it.hasNext()) { + var line = it.next(); + var name = line.name(); + var value = line.value(); + + var statistics = simple.get(name); + if (statistics == null) { + statistics = new SimpleStatistics(); + simple.put(name, statistics); + } + statistics.accept(value); + } + return simple; + } + } + + var ls = new Ls(); + + try( + var nioService = Executors.newVirtualThreadPerTaskExecutor(); + var parallelService =Executors.newFixedThreadPool(numberOfParallelWorkers) + ) { + var tasksQueue = new ArrayList>(); + for(;;) { + assert tasksQueue.size() <= numberOfIOWorkers; + if(tasksQueue.size() < numberOfIOWorkers) { + if(ls.hasNext()) { + var memseg = ls.next(); + var task = CompletableFuture.supplyAsync( + () -> { + memseg.load(); + return memseg; + }, nioService + ).thenApplyAsync(Pair::new, parallelService); + + tasksQueue.add(task); + } else if(tasksQueue.isEmpty()) break; + } + /* + * Wait for the tasks and merge what's ready + */ + { + var copy = new ArrayList>(tasksQueue.size()); + for(var worker: tasksQueue) { + if(worker.isDone()) { + /* + * Merge the maps + */ + var p = worker.get(); + var simple = p.simple(); + p.segment().unload(); + for (var entry : simple.entrySet()) { + var name = entry.getKey(); + + var statistics = results.get(name); + if (statistics == null) { + statistics = new Statistics(entry.getValue()); + results.put(name, statistics); + } else { + statistics.accept(entry.getValue()); + } + } + } else copy.add(worker); + } + tasksQueue.clear(); + tasksQueue.addAll(copy); + } + } + } + } + + /* + * Print + */ + { + var sortedMap = new TreeMap(); + for(var entry: results.entrySet()) { + sortedMap.put( + entry.getKey().toString(), + entry.getValue() + ); + } + var joiner = new StringJoiner(", ", "{", "}"); + for (var entry : sortedMap.entrySet()) { + joiner.add(STR. "\{ entry.getKey() }=\{ entry.getValue() }" ); + } + System.out.println(joiner); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_JurenIvan.java b/src/main/java/dev/morling/onebrc/CalculateAverage_JurenIvan.java new file mode 100644 index 000000000..3f9306899 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_JurenIvan.java @@ -0,0 +1,219 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Objects; +import java.util.TreeMap; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.lang.Math.round; +import static java.nio.channels.FileChannel.MapMode.READ_ONLY; +import static java.nio.file.StandardOpenOption.READ; + +public class CalculateAverage_JurenIvan { + + private static final String FILE_NAME = "./measurements.txt"; + + public static void main(String[] args) throws IOException { + long[] segments = getSegments(Runtime.getRuntime().availableProcessors()); + + var result = IntStream.range(0, segments.length - 1) + .parallel() + .mapToObj(i -> processSegment(segments[i], segments[i + 1])) + .flatMap(m -> Arrays.stream(m.hashTable).filter(Objects::nonNull)) + .collect(Collectors.toMap(m -> new String(m.city), m -> m, Measurement::merge, TreeMap::new)); + + System.out.println(result); + } + + private static LinearProbingHashMap processSegment(long start, long end) { + var results = new LinearProbingHashMap(1 << 19); + + try (var fileChannel = (FileChannel) Files.newByteChannel(Path.of(FILE_NAME), READ)) { + var bb = fileChannel.map(READ_ONLY, start, end - start); + var buffer = new byte[100]; + + int limit = bb.limit(); + for (int startLine = bb.position(); startLine < limit; startLine = bb.position()) { + int currentPosition = startLine; + + byte b; + int hash = 7; + int wordLen = 0; + while (currentPosition < end && (b = bb.get(currentPosition++)) != ';') { + buffer[wordLen++] = b; + hash = hash * 31 + b; + } + + int temp; + int negative = 1; + if (bb.get(currentPosition) == '-') { + negative = -1; + currentPosition++; + } + + if (bb.get(currentPosition + 1) == '.') { + temp = negative * ((bb.get(currentPosition) - '0') * 10 + (bb.get(currentPosition + 2) - '0')); + currentPosition += 3; + } + else { + temp = negative * ((bb.get(currentPosition) - '0') * 100 + ((bb.get(currentPosition + 1) - '0') * 10 + (bb.get(currentPosition + 3) - '0'))); + currentPosition += 4; + } + + currentPosition++; + + results.put(hash, buffer, wordLen, temp); + + bb.position(currentPosition); + } + } + catch (IOException e) { + throw new RuntimeException(e); + } + return results; + } + + private static long[] getSegments(int segmentCount) throws IOException { + try (var raf = new RandomAccessFile(FILE_NAME, "r")) { + long fileSize = raf.length(); + + if (fileSize < 100000) { + long[] chunks = new long[2]; + chunks[1] = fileSize; + return chunks; + } + + while (fileSize / segmentCount >= (Integer.MAX_VALUE - 150)) { + segmentCount *= 2; + } + + long[] chunks = new long[segmentCount + 1]; + + chunks[0] = 0; + long segmentSize = fileSize / segmentCount; + + for (int i = 1; i < segmentCount; i++) { + long chunkOffset = chunks[i - 1] + segmentSize; + raf.seek(chunkOffset); + while (raf.readByte() != '\n') { + } + chunks[i] = raf.getFilePointer(); + } + chunks[segmentCount] = fileSize; + return chunks; + } + } + + public static class LinearProbingHashMap { + final Measurement[] hashTable; + int slots; + + public LinearProbingHashMap(int slots) { + this.slots = slots; + this.hashTable = new Measurement[slots]; + } + + void put(int hash, byte[] key, int len, int temperature) { + hash = Math.abs(hash); + int index = hash & (slots - 1); + + int i = index; + while (hashTable[i] != null) { + if (keyIsEqual(key, hashTable[i].city, len)) { // handling hash collisions + hashTable[i].add(temperature); + return; + } + i++; + if (i == slots) { + i = 0; + } + } + + var cityArr = new byte[len]; + System.arraycopy(key, 0, cityArr, 0, len); + hashTable[i] = new Measurement(cityArr, hash, temperature, temperature, 1, temperature); + } + + private boolean keyIsEqual(byte[] one, byte[] other, int len) { + if (len != other.length) + return false; + for (int i = 0; i < len; i++) { + if (one[i] != other[i]) { + return false; + } + } + return true; + } + + } + + static class Measurement { + byte[] city; + int hash; + int min; + int max; + int count; + long sum; + + public Measurement(byte[] city, int hash, int min, int max, int count, long sum) { + this.city = city; + this.hash = hash; + this.min = min; + this.max = max; + this.count = count; + this.sum = sum; + } + + public void add(int temperature) { + min = Math.min(min, temperature); + max = Math.max(max, temperature); + count++; + sum += temperature; + } + + public Measurement merge(Measurement other) { + min = Math.min(min, other.min); + max = Math.max(max, other.max); + count += other.count; + sum += other.sum; + return this; + } + + @Override + public String toString() { + return (min * 1.0) / 10 + "/" + round((sum * 1.0) / count) / 10.0 + "/" + (max * 1.0) / 10; + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public boolean equals(Object obj) { + return Arrays.equals(city, ((Measurement) obj).city); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_MahmoudFawzyKhalil.java b/src/main/java/dev/morling/onebrc/CalculateAverage_MahmoudFawzyKhalil.java new file mode 100644 index 000000000..6eb426a15 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_MahmoudFawzyKhalil.java @@ -0,0 +1,190 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.channels.FileChannel; +import java.util.*; +import java.util.concurrent.ForkJoinPool; + +// Solution using project Panama and Map Reduce +public class CalculateAverage_MahmoudFawzyKhalil { + + private static final String FILE = "./measurements.txt"; + + public static void main(String[] args) throws Exception { + mapReduce(); + } + + private static void mapReduce() throws IOException { + var f = new File(FILE); + try (var raf = new RandomAccessFile(f, "r")) { + FileChannel channel = raf.getChannel(); + long fileSize = channel.size(); + MemorySegment ms = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()); + long chunkSize = fileSize / ForkJoinPool.commonPool().getParallelism(); + List chunks = getChunks(ms, chunkSize); + Map result = chunks.stream() + .parallel() + .map(c -> readChunkToMap(c, ms)) + .reduce(Collections.emptyMap(), (a, b) -> combine(a, b)); + System.out.println(new TreeMap<>(result)); + } + } + + private static List getChunks(MemorySegment ms, long chunkSize) { + List chunks = new ArrayList<>(32); + long start = 0; + long fileSize = ms.byteSize(); + long end = chunkSize; + + while (start < fileSize) { + byte b = ms.get(ValueLayout.JAVA_BYTE, end); + if (b == '\n') { + chunks.add(new Chunk(start, end)); + start = end + 1; + end = Math.min(end + chunkSize, fileSize - 2); + } + end++; + } + return chunks; + } + + private static Map readChunkToMap(Chunk chunk, MemorySegment ms) { + Map map = new HashMap<>(); + + long start = chunk.start(); + while (start < chunk.end()) { + long cityNameSize = 0; + while (ms.get(ValueLayout.JAVA_BYTE, start + cityNameSize) != ';') { + cityNameSize++; + } + + String cityName = readString(ms, start, cityNameSize); + start = start + cityNameSize + 1; + + long temperatureSize = 0; + while (ms.get(ValueLayout.JAVA_BYTE, start + temperatureSize) != '\n') { + temperatureSize++; + } + + String temperature = readString(ms, start, temperatureSize); + start = start + temperatureSize + 1; + + // System.out.println(STR."\{cityName};\{temperature}"); + addMeasurement(map, cityName, temperature); + } + + return map; + } + + // Credit goes to imrafaelmerino for combine function + private static Map combine(Map xs, Map ys) { + Map result = new HashMap<>(); + + for (var key : xs.keySet()) { + var m1 = xs.get(key); + var m2 = ys.get(key); + var combined = (m2 == null) ? m1 : (m1 == null) ? m2 : m1.combine(m2); + result.put(key, combined); + } + + for (var key : ys.keySet()) + result.putIfAbsent(key, ys.get(key)); + return result; + } + + private static String readString(MemorySegment ms, long start, long size) { + byte[] stringBytes = ms.asSlice(start, size) + .toArray(ValueLayout.JAVA_BYTE); + return new String(stringBytes); + } + + private static void addMeasurement(Map measurements, String station, String reading) { + measurements.compute(station, + (_, oldMeasurements) -> oldMeasurements == null ? MeasurementAggregate.of(reading) : oldMeasurements.update(reading)); + } + + record Chunk(long start, long end) { + } + + private static final class MeasurementAggregate { + private double min; + private double max; + private double sum; + private long count; + + private MeasurementAggregate(double min, double max, double sum, long count) { + this.min = min; + this.max = max; + this.sum = sum; + this.count = count; + } + + public static MeasurementAggregate of(String temperature) { + double measurement = Double.parseDouble(temperature); + return new MeasurementAggregate(measurement, measurement, measurement, 1); + } + + @Override + public boolean equals(Object obj) { + if (obj == this) + return true; + if (obj == null || obj.getClass() != this.getClass()) + return false; + var that = (MeasurementAggregate) obj; + return Double.doubleToLongBits(this.min) == Double.doubleToLongBits(that.min) && + Double.doubleToLongBits(this.max) == Double.doubleToLongBits(that.max) && + Double.doubleToLongBits(this.sum) == Double.doubleToLongBits(that.sum) && + this.count == that.count; + } + + @Override + public int hashCode() { + return Objects.hash(min, max, sum, count); + } + + public MeasurementAggregate update(String part) { + double measurement = Double.parseDouble(part); + this.min = Math.min(this.min, measurement); + this.max = Math.max(this.max, measurement); + this.sum += measurement; + this.count++; + return this; + } + + public String toString() { + return min + "/" + round(round(sum) / count) + "/" + max; + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + public MeasurementAggregate combine(MeasurementAggregate m2) { + return new MeasurementAggregate( + Math.min(this.min, m2.min), + Math.max(this.max, m2.max), + this.sum + m2.sum, + this.count + m2.count); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_PanagiotisDrakatos.java b/src/main/java/dev/morling/onebrc/CalculateAverage_PanagiotisDrakatos.java new file mode 100644 index 000000000..04633948f --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_PanagiotisDrakatos.java @@ -0,0 +1,449 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class CalculateAverage_PanagiotisDrakatos { + private static final String FILE = "./measurements.txt"; + private static final long MAP_SIZE = 1024 * 1024 * 12L; + private static TreeMap sortedCities; + + public static void main(String[] args) throws IOException { + SeekableByteRead(FILE); + System.out.println(sortedCities.toString()); + boolean DEBUG = true; + } + + private static void SeekableByteRead(String path) throws IOException { + FileInputStream fileInputStream = new FileInputStream(new File(FILE)); + FileChannel fileChannel = fileInputStream.getChannel(); + try { + sortedCities = getFileSegments(new File(FILE), fileChannel).stream() + .map(CalculateAverage_PanagiotisDrakatos::SplitSeekableByteChannel) + .parallel() + .map(CalculateAverage_PanagiotisDrakatos::MappingByteBufferToData) + .flatMap(MeasurementRepository::get) + .collect(Collectors.toMap(e -> e.cityName, MeasurementRepository.Entry::measurement, MeasurementObject::updateWith, TreeMap::new)); + } + catch (NullPointerException e) { + } + fileChannel.close(); + } + + record FileSegment(long start, long end, FileChannel fileChannel) { + } + + private static List getFileSegments(final File file, final FileChannel fileChannel) throws IOException { + final int numberOfSegments = Runtime.getRuntime().availableProcessors(); + final long fileSize = file.length(); + final long segmentSize = fileSize / numberOfSegments; + final List segments = new ArrayList<>(); + if (segmentSize < 1000) { + segments.add(new FileSegment(0, fileSize, fileChannel)); + return segments; + } + try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r")) { + long segStart = 0; + long segEnd = segmentSize; + while (segStart < fileSize) { + segEnd = findSegment(randomAccessFile, segEnd, fileSize); + segments.add(new FileSegment(segStart, segEnd, fileChannel)); + segStart = segEnd; // Just re-use the end and go from there. + segEnd = Math.min(fileSize, segEnd + segmentSize); + } + } + return segments; + } + + private static long findSegment(RandomAccessFile raf, long location, final long fileSize) throws IOException { + raf.seek(location); + while (location < fileSize) { + location++; + if (raf.read() == '\n') + return location; + } + return location; + } + + private static ByteBuffer SplitSeekableByteChannel(FileSegment segment) { + try { + MappedByteBuffer buffer = segment.fileChannel.map(FileChannel.MapMode.READ_ONLY, segment.start(), segment.end - segment.start()); + return buffer; + } + catch (Exception ex) { + long start = segment.start; + long end = 0; + try { + end = segment.fileChannel.size(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + MappedByteBuffer buffer = null; + ArrayList list = new ArrayList<>(); + while (start < end) { + try { + buffer = segment.fileChannel.map(FileChannel.MapMode.READ_ONLY, start, Math.min(MAP_SIZE, end - start)); + // don't split the data in the middle of lines + // find the closest previous newline + int realEnd = buffer.limit() - 1; + while (buffer.get(realEnd) != '\n') + realEnd--; + + realEnd++; + buffer.limit(realEnd); + start += realEnd; + list.add(buffer.slice(0, realEnd - 1)); + } + catch (Exception e) { + e.printStackTrace(); + } + } + sortedCities = list.stream().parallel().map(CalculateAverage_PanagiotisDrakatos::MappingByteBufferToData).flatMap(MeasurementRepository::get) + .collect(Collectors.toMap(e -> e.cityName, MeasurementRepository.Entry::measurement, MeasurementObject::updateWith, TreeMap::new)); + return null; + } + } + + public static ByteBuffer concat(ByteBuffer[] buffers) { + int overAllCapacity = 0; + for (int i = 0; i < buffers.length; i++) + overAllCapacity += buffers[i].limit() - buffers[i].position(); + overAllCapacity += buffers[0].limit() - buffers[0].position(); + ByteBuffer all = ByteBuffer.allocate(overAllCapacity); + for (int i = 0; i < buffers.length; i++) { + ByteBuffer curr = buffers[i]; + all.put(curr); + } + + all.flip(); + return all; + } + + private static TreeMap combineMaps(Stream stream1, Stream stream2) { + Stream resultingStream = Stream.concat(stream1, stream2); + return resultingStream.collect(Collectors.toMap(e -> e.cityName, MeasurementRepository.Entry::measurement, MeasurementObject::updateWith, TreeMap::new)); + } + + private static int longHashStep(final int hash, final long word) { + return 31 * hash + (int) (word ^ (word >>> 32)); + } + + private static final long SEPARATOR_PATTERN = compilePattern((byte) ';'); + + private static long compilePattern(final byte value) { + return ((long) value << 56) | ((long) value << 48) | ((long) value << 40) | ((long) value << 32) | ((long) value << 24) | ((long) value << 16) + | ((long) value << 8) | (long) value; + } + + private static MeasurementRepository MappingByteBufferToData(ByteBuffer byteBuffer) { + MeasurementRepository measurements = new MeasurementRepository(); + ByteBuffer bb = byteBuffer.duplicate(); + + int start = 0; + int limit = bb.limit(); + + long[] cityNameAsLongArray = new long[16]; + int[] delimiterPointerAndHash = new int[2]; + + bb.order(ByteOrder.nativeOrder()); + final boolean bufferIsBigEndian = bb.order().equals(ByteOrder.BIG_ENDIAN); + + while ((start = bb.position()) < limit + 1) { + + int delimiterPointer; + + findNextDelimiterAndCalculateHash(bb, SEPARATOR_PATTERN, start, limit, delimiterPointerAndHash, cityNameAsLongArray, bufferIsBigEndian); + delimiterPointer = delimiterPointerAndHash[0]; + // Simple lookup is faster for '\n' (just three options) + if (delimiterPointer >= limit) { + return measurements; + } + final int cityNameLength = delimiterPointer - start; + + int temp_counter = 0; + int temp_end = delimiterPointer + 1; + try { + // bb.position(delimiterPointer++); + while (bb.get(temp_end) != '\n') { + temp_counter++; + temp_end++; + } + } + catch (IndexOutOfBoundsException e) { + // temp_counter--; + // temp_end--; + } + ByteBuffer temp = bb.duplicate().slice(delimiterPointer + 1, temp_counter); + int tempPointer = 0; + int abs = 1; + if (temp.get(0) == '-') { + abs = -1; + tempPointer++; + } + int measuredValue; + if (temp.get(tempPointer + 1) == '.') { + measuredValue = abs * ((temp.get(tempPointer)) * 10 + (temp.get(tempPointer + 2)) - 528); + } + else { + measuredValue = abs * (temp.get(tempPointer) * 100 + temp.get(tempPointer + 1) * 10 + temp.get(tempPointer + 3) - 5328); + } + + measurements.update(cityNameAsLongArray, bb, cityNameLength, delimiterPointerAndHash[1]).updateWith(measuredValue); + + if (temp_end + 1 > limit) + return measurements; + bb.position(temp_end + 1); + } + return measurements; + } + + private static void findNextDelimiterAndCalculateHash(final ByteBuffer bb, final long pattern, final int start, final int limit, final int[] output, + final long[] asLong, final boolean bufferBigEndian) { + int hash = 1; + int i; + int lCnt = 0; + for (i = start; i <= limit - 8; i += 8) { + long word = bb.getLong(i); + if (bufferBigEndian) { + word = Long.reverseBytes(word); // Reversing the bytes is the cheapest way to do this + } + final long match = word ^ pattern; + long mask = ((match - 0x0101010101010101L) & ~match) & 0x8080808080808080L; + + if (mask != 0) { + final int index = Long.numberOfTrailingZeros(mask) >> 3; + output[0] = (i + index); + + final long partialHash = word & ((mask >> 7) - 1); + asLong[lCnt] = partialHash; + output[1] = longHashStep(hash, partialHash); + return; + } + asLong[lCnt++] = word; + hash = longHashStep(hash, word); + } + // Handle remaining bytes near the limit of the buffer: + long partialHash = 0; + int len = 0; + for (; i < limit; i++) { + byte read; + if ((read = bb.get(i)) == (byte) pattern) { + asLong[lCnt] = partialHash; + output[0] = i; + output[1] = longHashStep(hash, partialHash); + return; + } + partialHash = partialHash | ((long) read << (len << 3)); + len++; + } + output[0] = limit; // delimiter not found + } + + static class MeasurementRepository { + private int tableSize = 1 << 20; // can grow in theory, made large enough not to (this is faster) + private int tableMask = (tableSize - 1); + private int tableLimit = (int) (tableSize * LOAD_FACTOR); + private int tableFilled = 0; + private static final float LOAD_FACTOR = 0.8f; + + private Entry[] table = new Entry[tableSize]; + + record Entry(int hash, long[] nameBytesInLong, String cityName, MeasurementObject measurement) { + @Override + public String toString() { + return cityName + "=" + measurement; + } + } + + public MeasurementObject update(long[] nameBytesInLong, ByteBuffer bb, int length, int calculatedHash) { + + final int nameBytesInLongLength = 1 + (length >>> 3); + + int index = calculatedHash & tableMask; + Entry tableEntry; + while ((tableEntry = table[index]) != null + && (tableEntry.hash != calculatedHash || !arrayEquals(tableEntry.nameBytesInLong, nameBytesInLong, nameBytesInLongLength))) { // search for the right spot + index = (index + 1) & tableMask; + } + + if (tableEntry != null) { + return tableEntry.measurement; + } + + // --- This is a brand new entry, insert into the hashtable and do the extra calculations (once!) do slower calculations here. + MeasurementObject measurement = new MeasurementObject(); + + // Now create a string: + byte[] buffer = new byte[length]; + bb.get(buffer, 0, length); + String cityName = new String(buffer, 0, length); + + // Store the long[] for faster equals: + long[] nameBytesInLongCopy = new long[nameBytesInLongLength]; + System.arraycopy(nameBytesInLong, 0, nameBytesInLongCopy, 0, nameBytesInLongLength); + + // And add entry: + Entry toAdd = new Entry(calculatedHash, nameBytesInLongCopy, cityName, measurement); + table[index] = toAdd; + + // Resize the table if filled too much: + if (++tableFilled > tableLimit) { + resizeTable(); + } + + return toAdd.measurement; + } + + private void resizeTable() { + // Resize the table: + Entry[] oldEntries = table; + table = new Entry[tableSize <<= 2]; // x2 + tableMask = (tableSize - 1); + tableLimit = (int) (tableSize * LOAD_FACTOR); + + for (Entry entry : oldEntries) { + if (entry != null) { + int updatedTableIndex = entry.hash & tableMask; + while (table[updatedTableIndex] != null) { + updatedTableIndex = (updatedTableIndex + 1) & tableMask; + } + table[updatedTableIndex] = entry; + } + } + } + + public Stream get() { + return Arrays.stream(table).filter(Objects::nonNull); + } + } + + private static boolean arrayEquals(final long[] a, final long[] b, final int length) { + for (int i = 0; i < length; i++) { + if (a[i] != b[i]) + return false; + } + return true; + } + + private static final class MeasurementObject { + + private int MAX; + private int MIN; + + private long SUM; + + private int REPEAT; + + public MeasurementObject(int MAX, int MIN, long SUM, int REPEAT) { + this.MAX = MAX; + this.MIN = MIN; + this.SUM = SUM; + this.REPEAT = REPEAT; + } + + public MeasurementObject() { + this.MAX = -999; + this.MIN = 9999; + this.SUM = 0; + this.REPEAT = 0; + } + + public MeasurementObject(int MAX, int MIN, long SUM) { + this.MAX = MAX; + this.MIN = MIN; + this.SUM = SUM; + } + + public MeasurementObject(int MAX, int MIN) { + this.MAX = MAX; + this.MIN = MIN; + } + + public static MeasurementObject combine(MeasurementObject m1, MeasurementObject m2) { + var mres = new MeasurementObject(); + mres.MIN = MeasurementObject.min(m1.MIN, m2.MIN); + mres.MAX = MeasurementObject.max(m1.MAX, m2.MAX); + mres.SUM = m1.SUM + m2.SUM; + mres.REPEAT = m1.REPEAT + m2.REPEAT; + return mres; + } + + public static MeasurementObject updateWith(MeasurementObject m1, MeasurementObject m2) { + var mres = new MeasurementObject(); + mres.MIN = MeasurementObject.min(m1.MIN, m2.MIN); + mres.MAX = MeasurementObject.max(m1.MAX, m2.MAX); + mres.SUM = m1.SUM + m2.SUM; + mres.REPEAT = m1.REPEAT + m2.REPEAT; + return mres; + } + + public MeasurementObject updateWith(int measurement) { + MIN = MeasurementObject.min(MIN, measurement); + MAX = MeasurementObject.max(MAX, measurement); + SUM += measurement; + REPEAT++; + return this; + } + + private static int max(final int a, final int b) { + final int diff = a - b; + final int dsgn = diff >> 31; + return a - (diff & dsgn); + } + + private static int min(final int a, final int b) { + final int diff = a - b; + final int dsgn = diff >> 31; + return b + (diff & dsgn); + } + + private double round(double value) { + return Math.round(value) / 10.0; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + MeasurementObject that = (MeasurementObject) o; + return MAX == that.MAX && MIN == that.MIN && REPEAT == that.REPEAT; + } + + @Override + public int hashCode() { + return Objects.hash(MAX, MIN, REPEAT); + } + + @Override + public String toString() { + return round(MIN) + "/" + round((1.0 * SUM) / REPEAT) + "/" + round(MAX); + } + } +} \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_PawelAdamski.java b/src/main/java/dev/morling/onebrc/CalculateAverage_PawelAdamski.java new file mode 100644 index 000000000..45470558f --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_PawelAdamski.java @@ -0,0 +1,209 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.util.*; +import java.util.stream.Collectors; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.stream.Collectors.groupingByConcurrent; + +public class CalculateAverage_PawelAdamski { + + private static final long READ_SIZE = 100_000_000; + private static final String FILE = "./measurements.txt"; + + private static record ResultRow(double min, double mean, double max) { + + public ResultRow(MeasurementAggregator ma) { + this(ma.min / 10.0, ((Math.round(ma.sum * 100.0) / 100.0) / (double) ma.count) / 10.0, ma.max / 10.0); + } + + public String toString() { + return round(min) + "/" + round(mean) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + private static class Station { + byte[] bytes; + int hash; + + public Station(byte[] station) { + this.bytes = station; + this.hash = Arrays.hashCode(bytes); + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public boolean equals(Object o) { + return Arrays.equals(bytes, ((Station) o).bytes); + } + + } + + private static class MeasurementAggregator { + private long min; + private long max; + private long sum; + private long count; + + public MeasurementAggregator(long temp) { + min = temp; + max = temp; + sum = temp; + count = 1; + } + + public MeasurementAggregator() { + min = Long.MAX_VALUE; + max = Long.MIN_VALUE; + sum = 0; + count = 0; + } + + public MeasurementAggregator merge(MeasurementAggregator measurement) { + MeasurementAggregator ma = new MeasurementAggregator(); + ma.min = Math.min(min, measurement.min); + ma.max = Math.max(max, measurement.max); + ma.sum = sum + measurement.sum; + ma.count = count + measurement.count; + return ma; + } + } + + public static void main(String[] args) throws IOException { + try (RandomAccessFile raf = new RandomAccessFile(FILE, "r")) { + List parts = splitFileIntoParts(raf); + Map rr = calculateTemperatureStats(parts, raf); + Map results = prepareResults(rr); + System.out.println(results); + } + } + + private static Map prepareResults(Map rr) { + Map measurements = new TreeMap<>(); + rr.forEach((k, v) -> measurements.put(new String(k.bytes, UTF_8), new ResultRow(v))); + return measurements; + } + + private static Map calculateTemperatureStats(List parts, RandomAccessFile raf) { + return parts.parallelStream() + .map(filePart -> parse(filePart, raf)) + .flatMap(m -> m.entrySet().stream()) + .collect(groupingByConcurrent( + Map.Entry::getKey, + Collectors.reducing( + new MeasurementAggregator(), + Map.Entry::getValue, + MeasurementAggregator::merge))); + } + + private static ArrayList splitFileIntoParts(RandomAccessFile raf) throws IOException { + ArrayList parts = new ArrayList<>((int) (raf.length() / READ_SIZE)); + long pointer = 0; + long nextPointer = 0; + long fileLength = raf.length(); + while (pointer < fileLength) { + if (pointer + READ_SIZE > fileLength) { + nextPointer = fileLength; + } + else { + nextPointer = findNextLine(raf, pointer + READ_SIZE); + } + parts.add(new FilePart(pointer, nextPointer - pointer)); + pointer = nextPointer; + } + return parts; + } + + private static Map parse(FilePart filePart, RandomAccessFile raf) { + try { + byte[] bytes = readBytesFromFile(filePart, raf); + return parseBytesIntoStationsMap(bytes); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static HashMap parseBytesIntoStationsMap(byte[] bytes) { + HashMap measurementAggregator = new HashMap<>(500); + int semicolonIndex = 0; + int newLineIndex = -1; + for (int i = 0; i < bytes.length; i++) { + if (bytes[i] == ';') { + semicolonIndex = i; + } + else if (bytes[i] == '\n') { + byte[] station = Arrays.copyOfRange(bytes, newLineIndex + 1, semicolonIndex); + long temp = parseDouble(bytes, semicolonIndex + 1, i); + MeasurementAggregator measurement = new MeasurementAggregator(temp); + measurementAggregator.compute(new Station(station), (k, prevV) -> prevV == null ? measurement : prevV.merge(measurement)); + newLineIndex = i; + } + } + return measurementAggregator; + } + + private static byte[] readBytesFromFile(FilePart filePart, RandomAccessFile raf) throws IOException { + var bb = raf.getChannel().map(FileChannel.MapMode.READ_ONLY, filePart.start(), filePart.len()); + byte[] bytes = new byte[bb.remaining()]; + bb.get(bytes); + return bytes; + } + + private static long parseDouble(byte[] text, int start, int end) { + boolean negative = false; + int result = 0; + for (int i = start; i < end; i++) { + byte c = text[i]; + if (c == '-') { + negative = true; + } + else if (c != '.') { + result *= 10; + result += c - '0'; + } + } + if (negative) { + return -result; + } + else { + return result; + } + } + + private static long findNextLine(RandomAccessFile raf, long currentPosition) throws IOException { + raf.seek(currentPosition); + while (raf.readByte() != '\n') + ; + return raf.getFilePointer(); + } + + record FilePart(long start, long len) { + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_Smoofie.java b/src/main/java/dev/morling/onebrc/CalculateAverage_Smoofie.java new file mode 100644 index 000000000..4b7533744 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_Smoofie.java @@ -0,0 +1,457 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import sun.misc.Unsafe; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.foreign.Arena; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.util.TreeMap; +import java.util.concurrent.Executors; +import java.util.stream.IntStream; + +public class CalculateAverage_Smoofie { + + private static final String FILE = "./measurements.txt"; + private static final Unsafe unsafe = getUnsafe(); + + private static class MeasurementAggregator { + private int min = -1000; + private int max = 1000; + private long sum = 0; + private int count = 0; + + @Override + public String toString() { + return ((double) min) / 10 + "/" + round(sum / 10.0 / count) + "/" + ((double) max) / 10; + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + private static final class CountResult { + private final long cityHashTableAddress; + private final long countsAddress; + private int cityIdCounter; + private long nextCollisionAddress; + + private CountResult( + + // cityId|cityLength|cityNameAddress|nextElementAddress|cityCountsAddress + long cityHashTableAddress, + long countsAddress, + int cityIdCounter, + long nextCollisionAddress) { + this.cityHashTableAddress = cityHashTableAddress; + this.countsAddress = countsAddress; + this.cityIdCounter = cityIdCounter; + this.nextCollisionAddress = nextCollisionAddress; + } + + } + + private static int hash(long cityNameAddress, short cityLength) { + if (cityLength < 17) { + long[] city = new long[2]; + unsafe.copyMemory(null, cityNameAddress, city, Unsafe.ARRAY_LONG_BASE_OFFSET, cityLength); + long hash = city[0] ^ (city[1] >> 1); + int foldedHash = (int) (hash ^ (hash >>> 31)); + return (foldedHash & foldedHash >>> 15) & 0xffff; + } + else { + long[] city = new long[cityLength >> 3 + 1]; + unsafe.copyMemory(null, cityNameAddress, city, Unsafe.ARRAY_LONG_BASE_OFFSET, cityLength); + + long hash = city[0]; + for (int i = 1; i < city.length; i++) { + hash ^= city[i]; + } + + int foldedHash = (int) (hash ^ (hash >>> 30)); + return (foldedHash & foldedHash >>> 15) & 0xffff; + } + } + + private static Unsafe getUnsafe() { + try { + var field = Unsafe.class.getDeclaredField("theUnsafe"); + field.setAccessible(true); + return (Unsafe) field.get(null); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + private static long locateSemicolon(long input) { + long semiXor = input ^ 0x3B3B3B3B3B3B3B3BL; + return (semiXor - 0x0101010101010101L) & ~semiXor & 0x8080808080808080L; + } + + public static void main(String[] args) throws IOException, InterruptedException { + var numberOfThreads = Runtime.getRuntime().availableProcessors(); + var executorService = Executors.newFixedThreadPool(numberOfThreads); + var resultMap = new TreeMap(); + var subCountResults = new CountResult[numberOfThreads]; + + try (RandomAccessFile randomAccessFile = new RandomAccessFile(FILE, "r"); + FileChannel fileChannel = randomAccessFile.getChannel()) { + + long fileSize = randomAccessFile.length(); + if (fileSize < numberOfThreads * 1024) { + numberOfThreads = fileSize < 1024 ? 1 : (int) (fileSize / 1024); + } + long chunkSize = fileSize / numberOfThreads; + + long inputFileAddress = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()).address(); + final long[] inputFileMemoryOffsets = new long[numberOfThreads + 1]; + inputFileMemoryOffsets[0] = inputFileAddress; + inputFileMemoryOffsets[numberOfThreads] = inputFileAddress + fileSize; + for (long i = inputFileAddress + chunkSize, index = 1; index < numberOfThreads; i += chunkSize, index++) { + while (unsafe.getByte(i++) != '\n') + ; + inputFileMemoryOffsets[(int) index] = i; + } + + for (int i = 0; i < numberOfThreads; i++) { + final long start = inputFileMemoryOffsets[i]; + final long end = inputFileMemoryOffsets[i + 1]; + + final int threadIndex = i; + executorService.execute(() -> { + var cityHashTableAddress = unsafe.allocateMemory(75536 * 32); + unsafe.setMemory(cityHashTableAddress, 75536 * 32, (byte) 0); + long nextCollisionAddress = cityHashTableAddress + (65536 << 5); + + var countsAddress = unsafe.allocateMemory(10000 * 2 * 1000 * 4); + int cityId; + int temperature = 0; + int cityIdCounter = 0; + long position = start; + byte c; + long input; + long inputSemicolon; + int cityHash; + long hashAddress; + short cityLength; + long cityStart; + long temperatureAddress; + while (position < end) { + cityStart = position; + input = unsafe.getLong(position); + inputSemicolon = locateSemicolon(input); + if (inputSemicolon == 0) { + position += 8; + input = unsafe.getLong(position); + inputSemicolon = locateSemicolon(input); + + if (inputSemicolon == 0) { + // probably not gonna happen very often + while (inputSemicolon == 0) { + position += 8; + input = unsafe.getLong(position); + inputSemicolon = locateSemicolon(input); + } + } + } + position += Long.numberOfTrailingZeros(inputSemicolon) >> 3; + + cityLength = (short) (position - cityStart); + + cityHash = hash(cityStart, cityLength); + hashAddress = cityHashTableAddress + ((long) cityHash << 5); + cityId = -1; + outer: for (;;) { + if (cityLength != unsafe.getShort(hashAddress + 4)) { + if (unsafe.getShort(hashAddress + 4) == 0) { + // new hash slot init + cityId = cityIdCounter++; + unsafe.setMemory(countsAddress + cityId * 8000, 8000, (byte) 0); + unsafe.putInt(hashAddress, cityId); + unsafe.putShort(hashAddress + 4, cityLength); + unsafe.putLong(hashAddress + 6, cityStart); + unsafe.putLong(hashAddress + 22, countsAddress + cityId * 8000); + break; + } + if (unsafe.getLong(hashAddress + 14) != 0) { + hashAddress = unsafe.getLong(hashAddress + 14); + continue; + } + break; + } + long cityNameAddress = unsafe.getLong(hashAddress + 6); + int j; + for (j = 0; j < cityLength >> 3 << 3; j += 8) { + if (unsafe.getLong(cityStart + j) != unsafe.getLong(cityNameAddress + j)) { + if (unsafe.getLong(hashAddress + 14) != 0) { + hashAddress = unsafe.getLong(hashAddress + 14); + continue outer; + } + break outer; + } + } + if (j < cityLength) { + if ((unsafe.getLong(cityStart + j) << ((0x8 - cityLength & 0x7) << 3)) != (unsafe + .getLong(cityNameAddress + j) << ((0x8 - cityLength & 0x7) << 3))) { + if (unsafe.getLong(hashAddress + 14) != 0) { + hashAddress = unsafe.getLong(hashAddress + 14); + continue; + } + break; + } + } + cityId = unsafe.getInt(hashAddress); + break; + } + + if (cityId == -1) { + // collision + cityId = cityIdCounter++; + unsafe.setMemory(countsAddress + cityId * 8000, 8000, (byte) 0); + unsafe.putLong(hashAddress + 14, nextCollisionAddress); + hashAddress = nextCollisionAddress; + nextCollisionAddress += 32; + unsafe.putInt(hashAddress, cityId); + unsafe.putShort(hashAddress + 4, cityLength); + unsafe.putLong(hashAddress + 6, cityStart); + unsafe.putLong(hashAddress + 22, countsAddress + cityId * 8000); + } + + position++; // skip semicolon + + // long inputDecimalPoint = locateDecimalPoint(unsafe.getLong(position)); + // position += (Long.numberOfTrailingZeros(inputDecimalPoint) >> 3) + 3; + + temperature = 0; + c = unsafe.getByte(position++); + if (c == '-') { + while ((c = unsafe.getByte(position++)) != '\n') { + if (c != '.') { + temperature = temperature * 10 + (c ^ 0x30); + } + } + temperatureAddress = unsafe.getLong(hashAddress + 22) + (1000 + temperature) * 4; + unsafe.putInt(temperatureAddress, unsafe.getInt(temperatureAddress) + 1); + } + else { + temperature = c - '0'; + while ((c = unsafe.getByte(position++)) != '\n') { + if (c != '.') { + temperature = temperature * 10 + (c ^ 0x30); + } + } + + temperatureAddress = unsafe.getLong(hashAddress + 22) + temperature * 4; + unsafe.putInt(temperatureAddress, unsafe.getInt(temperatureAddress) + 1); + } + } + subCountResults[threadIndex] = new CountResult(cityHashTableAddress, countsAddress, cityIdCounter, nextCollisionAddress); + }); + } + + executorService.shutdown(); + executorService.awaitTermination(120, java.util.concurrent.TimeUnit.SECONDS); + + // aggregate results 1..n to 0 + var subCountA = subCountResults[0]; + for (int r = 1; r < numberOfThreads; r++) { + CountResult subCountB = subCountResults[r]; + for (int i = 0; i < 65536; i++) { + long bHashAddress = subCountB.cityHashTableAddress + ((long) i << 5); + if (unsafe.getShort(bHashAddress + 4) == 0) { + continue; + } + long aHashAddress = subCountA.cityHashTableAddress + ((long) i << 5); + // check if a initialized + if (unsafe.getShort(aHashAddress + 4) == 0) { + // new hash slot init + for (long addressA = aHashAddress, addressB = bHashAddress; addressB != 0;) { + unsafe.putInt(addressA, subCountA.cityIdCounter++); + unsafe.putShort(addressA + 4, unsafe.getShort(addressB + 4)); + unsafe.putLong(addressA + 6, unsafe.getLong(addressB + 6)); + addressB = unsafe.getLong(addressB + 14); + if (addressB != 0) { + unsafe.putLong(addressA + 14, subCountA.nextCollisionAddress); + addressA = subCountA.nextCollisionAddress; + subCountA.nextCollisionAddress += 32; + } + } + } + else { + // check to copy collision list too + outerB: for (long addressB = bHashAddress; addressB != 0; addressB = unsafe.getLong(addressB + 14)) { + short cityLength = unsafe.getShort(addressB + 4); + long cityNameAddress = unsafe.getLong(addressB + 6); + // compare to each city in A slot + outerA: for (long aAddress = aHashAddress; aAddress != 0; aAddress = unsafe.getLong(aAddress + 14)) { + if (unsafe.getShort(aAddress + 4) == cityLength) { + long aCityNameAddress = unsafe.getLong(aAddress + 6); + int j; + for (j = 0; j < cityLength >> 3 << 3; j += 8) { + if (unsafe.getLong(cityNameAddress + j) != unsafe.getLong(aCityNameAddress + j)) { + // nope, not the same, try next + continue outerA; + } + } + if (j == cityLength || + (unsafe.getLong(cityNameAddress + j) << ((0x8 - cityLength & 0x7) << 3)) == (unsafe + .getLong(aCityNameAddress + j) << ((0x8 - cityLength & 0x7) << 3))) { + // found the same city, continue with next city in B slot + continue outerB; + } + } + } + // city not found in A slot, add it. It's a collision too + long addressA = aHashAddress; + while (unsafe.getLong(addressA + 14) != 0) { + addressA = unsafe.getLong(addressA + 14); + } + unsafe.putLong(addressA + 14, subCountA.nextCollisionAddress); + addressA = subCountA.nextCollisionAddress; + subCountA.nextCollisionAddress += 32; + + unsafe.putInt(addressA, subCountA.cityIdCounter++); + unsafe.putShort(addressA + 4, cityLength); + unsafe.putLong(addressA + 6, cityNameAddress); + } + } + } + + int[] cityIdMap = new int[10000]; + for (int i = 0; i < 10000; i++) { + cityIdMap[i] = -1; + } + + for (int i = 0; i < 65536; i++) { + long bHashAddress = subCountB.cityHashTableAddress + ((long) i << 5); + long aHashAddress = subCountA.cityHashTableAddress + ((long) i << 5); + if (unsafe.getShort(aHashAddress + 4) == 0) { + continue; + } + // for each city in A slot + outerA: for (long aAddress = aHashAddress; aAddress != 0; aAddress = unsafe.getLong(aAddress + 14)) { + short cityLength = unsafe.getShort(aAddress + 4); + long cityNameAddress = unsafe.getLong(aAddress + 6); + int cityIdA = unsafe.getInt(aAddress); + // compare to each city in B slot + outer: for (long bAddress = bHashAddress; bAddress != 0; bAddress = unsafe.getLong(bAddress + 14)) { + if (unsafe.getShort(bAddress + 4) == cityLength) { + long bCityNameAddress = unsafe.getLong(bAddress + 6); + int j; + for (j = 0; j < cityLength >> 3 << 3; j += 8) { + if (unsafe.getLong(cityNameAddress + j) != unsafe.getLong(bCityNameAddress + j)) { + // nope, not the same, try next + continue outer; + } + } + if (j == cityLength || + (unsafe.getLong(cityNameAddress + j) << ((0x8 - cityLength & 0x7) << 3)) == (unsafe + .getLong(bCityNameAddress + j) << ((0x8 - cityLength & 0x7) << 3))) { + cityIdMap[cityIdA] = unsafe.getInt(bAddress); + // found the same city, continue with next city in A slot + continue outerA; + } + } + } + } + } + + for (int i = 0; i < subCountA.cityIdCounter; i++) { + int cityId2 = cityIdMap[i]; + if (cityId2 != -1) { + for (int j = 0; j < 2; j++) { + for (int k = 0; k < 1000; k++) { + unsafe.putInt(subCountA.countsAddress + i * 8000 + j * 4000 + k * 4, + unsafe.getInt(subCountA.countsAddress + i * 8000 + j * 4000 + k * 4) + + unsafe.getInt(subCountB.countsAddress + cityId2 * 8000 + j * 4000 + k * 4)); + } + } + } + } + } + + var countResult = subCountResults[0]; + var reverseCityIds = new String[10000]; + for (int i = 0; i < 65536; i++) { + long resultHashAddress = countResult.cityHashTableAddress + ((long) i << 5); + if (unsafe.getShort(resultHashAddress + 4) != 0) { + for (long address = resultHashAddress; address != 0; address = unsafe.getLong(address + 14)) { + int cityId = unsafe.getInt(address); + int cityLength = unsafe.getShort(address + 4); + long cityNameAddress = unsafe.getLong(address + 6); + byte[] cityBytes = new byte[cityLength]; + unsafe.copyMemory(null, cityNameAddress, cityBytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, cityLength); + reverseCityIds[cityId] = new String(cityBytes, StandardCharsets.UTF_8); + } + } + } + + // count result as stream + IntStream.range(0, 10000).parallel().forEach(cityId -> { + var cityName = reverseCityIds[cityId]; + if (cityName == null) { + return; + } + var cityAddress = countResult.countsAddress + cityId * 8000; + var cityResult = new MeasurementAggregator(); + for (int i = 999; i > -1; i--) { + if (unsafe.getInt(cityAddress + 4000 + i * 4) > 0) { + cityResult.min = -i; + break; + } + } + if (cityResult.min == -1000) { + for (int i = 0; i < 1000; i++) { + if (unsafe.getInt(cityAddress + i * 4) > 0) { + cityResult.min = i; + break; + } + } + } + for (int i = 999; i > -1; i--) { + if (unsafe.getInt(cityAddress + i * 4) > 0) { + cityResult.max = i; + break; + } + } + if (cityResult.max == 1000) { + for (int i = 0; i < 1000; i++) { + if (unsafe.getInt(cityAddress + 4000 + i * 4) > 0) { + cityResult.max = -i; + break; + } + } + } + for (int i = 0; i < 1000; i++) { + cityResult.sum += ((long) unsafe.getInt(cityAddress + i * 4)) * i; + cityResult.sum -= ((long) unsafe.getInt(cityAddress + 4000 + i * 4)) * i; + cityResult.count += unsafe.getInt(cityAddress + i * 4); + cityResult.count += unsafe.getInt(cityAddress + 4000 + i * 4); + } + synchronized (resultMap) { + resultMap.put(cityName, cityResult); + } + }); + + System.out.println(resultMap); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_YannMoisan.java b/src/main/java/dev/morling/onebrc/CalculateAverage_YannMoisan.java new file mode 100644 index 000000000..03370e67b --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_YannMoisan.java @@ -0,0 +1,275 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.concurrent.ForkJoinPool; +import java.util.function.Supplier; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +/** + * based on imrafaelmerino + * ./calculate_average_imrafaelmerino.sh 129.10s user 4.73s system 1395% cpu 9.591 total + * + * ./calculate_average_baseline.sh 193.27s user 5.81s system 100% cpu 3:17.85 total + * + * addition to copied implementation + * - use a Location object as a key in the Map to avoid String instantiations. + * ./calculate_average_YannMoisan.sh 118.36s user 5.72s system 1425% cpu 8.705 total + * + * Model Name: MacBook Pro + * Chip: Intel Core i9 + * Total Number of Cores: 8 + * Memory: 64 GB + * */ +public class CalculateAverage_YannMoisan { + + private static final String FILE = "./measurements.txt"; + private static final int FIELD_SIZE = 128; + + public static void main(String[] args) throws IOException { + var chunkSize = 1024 * 1024 * 50L; // Long.parseLong(args[0].trim()); + var result = calculateStats(FILE, chunkSize); + System.out.println(result); + } + + private static Map calculateStats(String file, + long chunkSize) + throws IOException { + + try (var fileChannel = FileChannel.open(Paths.get(file), + StandardOpenOption.READ)) { + var stats = fileMemoryStream(fileChannel, chunkSize) + .parallel() + .map(p -> ManagedComputation.compute(() -> parse(p))) + .reduce(Collections.emptyMap(), + (stat1, stat2) -> combine(stat1, stat2)); + + var tm = new TreeMap(); + stats.forEach((k, v) -> tm.put(new String(k.value, 0, k.value.length), v)); + return tm; + } + + } + + private static Map combine(Map xs, + Map ys) { + + Map result = new HashMap<>(); + + for (var key : xs.keySet()) { + var m1 = xs.get(key); + var m2 = ys.get(key); + var combined = (m2 == null) ? m1 : (m1 == null) ? m2 : Stat.combine(m1, m2); + result.put(key, combined); + } + + for (var key : ys.keySet()) + result.putIfAbsent(key, ys.get(key)); + return result; + + } + + private static Map parse(ByteBuffer bb) { + Map stats = new HashMap<>(); + var limit = bb.limit(); + var field = new byte[FIELD_SIZE]; + while (bb.position() < limit) { + var fieldCurrentIndex = 0; + field[fieldCurrentIndex++] = bb.get(); + while (bb.position() < limit) { + var fieldByte = bb.get(); + if (fieldByte == ';') + break; + field[fieldCurrentIndex++] = fieldByte; + } + var fieldStr = new Location(Arrays.copyOfRange(field, 0, fieldCurrentIndex)); + var number = 0; + var sign = 1; + while (bb.position() < limit) { + var numberByte = bb.get(); + if (numberByte == '-') + sign = -1; + else if (numberByte == '\n') + break; + else if (numberByte != '.') + number = number * 10 + (numberByte - '0'); + } + var v = stats.get(fieldStr); + if (v == null) { + var vv = new Stat(); + vv.update(sign * number); + stats.put(fieldStr, vv); + } + else { + v.update(sign * number); + } + } + + return stats; + } + + private static Stream fileMemoryStream(FileChannel fileChannel, + long chunkSize) + throws IOException { + + var spliterator = Spliterators.spliteratorUnknownSize(fileMemoryIterator(fileChannel, + chunkSize), + Spliterator.IMMUTABLE); + return StreamSupport.stream(spliterator, + false); + } + + private static Iterator fileMemoryIterator(FileChannel fileChannel, long chunkSize) throws IOException { + return new Iterator<>() { + + private final long size = fileChannel.size(); + private long start = 0; + + @Override + public boolean hasNext() { + return start < size; + } + + @Override + public ByteBuffer next() { + try { + var buffer = fileChannel.map(MapMode.READ_ONLY, + start, + Math.min(chunkSize, + size - start)); + var limmit = buffer.limit() - 1; + while (buffer.get(limmit) != '\n') + limmit--; + limmit++; + buffer.limit(limmit); + start += limmit; + return buffer; + } + catch (IOException ex) { + throw new UncheckedIOException(ex); + } + } + }; + } + + private static final class Location { + public final byte[] value; + + public Location(byte[] value) { + this.value = value; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + Location location = (Location) o; + return Arrays.equals(value, location.value); + } + + @Override + public int hashCode() { + return Arrays.hashCode(value); + } + } + + private static final class Stat { + + private int min = Integer.MAX_VALUE; + private int max = Integer.MIN_VALUE; + private long sum = 0L; + private long count = 0L; + + public static Stat combine(Stat m1, + Stat m2) { + var stat = new Stat(); + stat.min = Math.min(m1.min, m2.min); + stat.max = Math.max(m1.max, m2.max); + stat.sum = m1.sum + m2.sum; + stat.count = m1.count + m2.count; + return stat; + } + + private void update(int value) { + this.min = Math.min(this.min, value); + this.max = Math.max(this.max, value); + this.sum += value; + this.count++; + } + + @Override + public String toString() { + return round(min / 10.0) + "/" + round((sum / 10.0) / count) + "/" + round(max / 10.0); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + private static final class ManagedComputation { + static T compute(final Supplier supplier) { + var managedBlocker = new ManagedSupplier<>(supplier); + try { + ForkJoinPool.managedBlock(managedBlocker); + return managedBlocker.getResult(); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + + } + + private static class ManagedSupplier implements ForkJoinPool.ManagedBlocker { + private final Supplier task; + private T result; + private boolean isDone = false; + + private ManagedSupplier(final Supplier supplier) { + task = supplier; + } + + @Override + public boolean block() { + result = task.get(); + isDone = true; + return true; + } + + @Override + public boolean isReleasable() { + return isDone; + } + + T getResult() { + return result; + } + } + + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_abeobk.java b/src/main/java/dev/morling/onebrc/CalculateAverage_abeobk.java index 1a71349b3..88de5d2a9 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_abeobk.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_abeobk.java @@ -24,14 +24,24 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.IntStream; + import sun.misc.Unsafe; public class CalculateAverage_abeobk { + private static final int CPU_CNT = Runtime.getRuntime().availableProcessors(); + private static final String FILE = "./measurements.txt"; private static final int BUCKET_SIZE = 1 << 16; - private static final int BUCKET_MASK = BUCKET_SIZE - 1; + private static final long BUCKET_MASK = BUCKET_SIZE - 1; private static final int MAX_STR_LEN = 100; + private static final int MAX_STATIONS = 10000; + private static final long CHUNK_SZ = 1 << 22; private static final Unsafe UNSAFE = initUnsafe(); private static final long[] HASH_MASKS = new long[]{ 0x0L, @@ -44,6 +54,11 @@ public class CalculateAverage_abeobk { 0xffffffffffffffL, 0xffffffffffffffffL, }; + private static AtomicInteger chunk_id = new AtomicInteger(0); + private static AtomicReference mapref = new AtomicReference<>(null); + private static int chunk_cnt; + private static long start_addr, end_addr; + private static Unsafe initUnsafe() { try { Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); @@ -55,176 +70,429 @@ private static Unsafe initUnsafe() { } } - // stat - private static class Stat { - private int min; - private int max; - private long sum; - private int count; - - Stat(int v) { - sum = min = max = v; - count = 1; + /* + * MAIN FUNCTION + */ + public static void main(String[] args) throws InterruptedException, IOException { + // thomaswue trick + if (args.length == 0 || !("--worker".equals(args[0]))) { + spawnWorker(); + return; } - void add(int val) { - min = Math.min(val, min); - max = Math.max(val, max); - sum += val; - count++; + var file = FileChannel.open(Path.of(FILE), StandardOpenOption.READ); + long file_size = file.size(); + start_addr = file.map(MapMode.READ_ONLY, 0, file.size(), Arena.global()).address(); + end_addr = start_addr + file_size; + + // only use all cpus on large file + int cpu_cnt = file_size < 1e6 ? 1 : CPU_CNT; + chunk_cnt = (int) Math.ceilDiv(file_size, CHUNK_SZ); + + // spawn workers + for (var w : IntStream.range(0, cpu_cnt).mapToObj(i -> new Worker(i)).toList()) { + w.join(); } - void merge(Stat other) { - min = Math.min(other.min, min); - max = Math.max(other.max, max); - sum += other.sum; - count += other.count; + // collect results + TreeMap ms = new TreeMap<>(); + for (var crr : mapref.get()) { + if (crr == null) + continue; + var prev = ms.putIfAbsent(crr.key(), crr); + if (prev != null) + prev.merge(crr); } + // print result + System.out.println(ms); + System.out.close(); + } + + /* + * HELPER FUNCTIONS + */ - public String toString() { - return String.format("%.1f/%.1f/%.1f", min * 0.1, sum * 0.1 / count, max * 0.1); + // Get semicolon pos code + static final long getSemiCode(final long w) { + long x = w ^ 0x3b3b3b3b3b3b3b3bL; // xor with ;;;;;;;; + return (x - 0x0101010101010101L) & (~x & 0x8080808080808080L); + } + + // Get new line pos code + static final long getLFCode(final long w) { + long x = w ^ 0x0A0A0A0A0A0A0A0AL; // xor with \n\n\n\n\n\n\n\n + return (x - 0x0101010101010101L) & (~x & 0x8080808080808080L); + } + + // Get decimal point pos code + static final int getDotCode(final long w) { + return Long.numberOfTrailingZeros(~w & 0x10101000); + } + + // Convert semicolon pos code to position + static final int getSemiPos(final long spc) { + return Long.numberOfTrailingZeros(spc) >>> 3; + } + + // Find next line address + static final long nextLF(long addr) { + long word = UNSAFE.getLong(addr); + long lfpos_code = getLFCode(word); + while (lfpos_code == 0) { + addr += 8; + word = UNSAFE.getLong(addr); + lfpos_code = getLFCode(word); } + return addr + (Long.numberOfTrailingZeros(lfpos_code) >>> 3) + 1; + } + + // Parse number + // great idea from merykitty (Quan Anh Mai) + static final long num(long w, int d) { + int shift = 28 - d; + long signed = (~w << 59) >> 63; + long dsmask = ~(signed & 0xFF); + long digits = ((w & dsmask) << shift) & 0x0F000F0F00L; + long abs_val = ((digits * 0x640a0001) >>> 32) & 0x3FF; + return ((abs_val ^ signed) - signed); + } + + // Hash mixer + static final long mix(long hash) { + long h = hash * 37; + return (h ^ (h >>> 29)); + } + + // Spawn worker (thomaswue trick + private static void spawnWorker() throws IOException { + ProcessHandle.Info info = ProcessHandle.current().info(); + ArrayList workerCommand = new ArrayList<>(); + info.command().ifPresent(workerCommand::add); + info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args))); + workerCommand.add("--worker"); + new ProcessBuilder() + .command(workerCommand) + .start() + .getInputStream() + .transferTo(System.out); } - static class Node { + final static class Node { long addr; + long hash; + long word0; + long sum; + long min, max; int keylen; - int hash; - long[] buf = new long[13]; - Stat stat; + int count; - String key() { - byte[] buf = new byte[MAX_STR_LEN]; - UNSAFE.copyMemory(null, addr, buf, Unsafe.ARRAY_BYTE_BASE_OFFSET, keylen); - return new String(buf, 0, keylen, StandardCharsets.UTF_8); + public final String toString() { + return (min / 10.0) + "/" + + (Math.round(((double) sum / count)) / 10.0) + "/" + + (max / 10.0); } - Node(long a, int kl, int h, int v, long[] b) { - stat = new Stat(v); + final String key() { + byte[] sbuf = new byte[MAX_STR_LEN]; + UNSAFE.copyMemory(null, addr, sbuf, Unsafe.ARRAY_BYTE_BASE_OFFSET, keylen); + return new String(sbuf, 0, (int) keylen, StandardCharsets.UTF_8); + } + + Node(long a, long h, int kl, long v) { + addr = a; + min = max = v; + keylen = kl; + hash = h; + } + + Node(long a, long h, int kl) { addr = a; + hash = h; + min = 999; + max = -999; + keylen = kl; + } + + Node(long a, long w0, long h, int kl, long v) { + addr = a; + word0 = w0; + hash = h; + min = max = v; keylen = kl; + } + + Node(long a, long w0, long h, int kl) { + addr = a; + word0 = w0; hash = h; - System.arraycopy(b, 0, buf, 0, Math.ceilDiv(kl, 8)); + min = 999; + max = -999; + keylen = kl; } - boolean contentEquals(final long[] other_buf) { - int k = keylen / 8; - int r = keylen % 8; - // Since the city name is most likely shorter than 16 characters - // this should be faster than typical conditional checks - long sum = 0; - for (int i = 0; i < k; i++) { - sum += buf[i] ^ other_buf[i]; + final void add(long val) { + sum += val; + count++; + if (val > max) { + max = val; + } + if (val < min) { + min = val; } - sum += (buf[k] ^ other_buf[k]) & HASH_MASKS[r]; - return sum == 0; } - } - // split into chunks - static long[] slice(long start_addr, long end_addr, long chunk_size, int cpu_cnt) { - long[] ptrs = new long[cpu_cnt + 1]; - ptrs[0] = start_addr; - for (int i = 1; i < cpu_cnt; i++) { - long addr = start_addr + i * chunk_size; - while (addr < end_addr && UNSAFE.getByte(addr++) != '\n') - ; - ptrs[i] = Math.min(addr, end_addr); - } - ptrs[cpu_cnt] = end_addr; - return ptrs; + final void merge(Node other) { + sum += other.sum; + count += other.count; + if (other.max > max) { + max = other.max; + } + if (other.min < min) { + min = other.min; + } + } + + final boolean contentEquals(long other_addr, long other_word0, long other_hash, long kl) { + if (word0 != other_word0 || hash != other_hash) + return false; + // this is faster than comparision if key is short + long xsum = 0; + long n = kl & 0xF8; + for (long i = 8; i < n; i += 8) { + xsum |= (UNSAFE.getLong(addr + i) ^ UNSAFE.getLong(other_addr + i)); + } + return xsum == 0; + } + + final boolean contentEquals(Node other) { + if (hash != other.hash) + return false; + long n = keylen & 0xF8; + for (long i = 0; i < n; i += 8) { + if (UNSAFE.getLong(addr + i) != UNSAFE.getLong(other.addr + i)) + return false; + } + return true; + } } - public static void main(String[] args) throws InterruptedException, IOException { - int cpu_cnt = Runtime.getRuntime().availableProcessors(); - try (var file = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { - long start_addr = file.map(MapMode.READ_ONLY, 0, file.size(), Arena.global()).address(); - long file_size = file.size(); - long end_addr = start_addr + file_size; - long chunk_size = Math.ceilDiv(file_size, cpu_cnt); - - // processing - var threads = new Thread[cpu_cnt]; - var maps = new Node[cpu_cnt][]; - var ptrs = slice(start_addr, end_addr, chunk_size, cpu_cnt); - - for (int i = 0; i < cpu_cnt; i++) { - int thread_id = i; - long start = ptrs[i]; - long end = ptrs[i + 1]; - maps[i] = new Node[BUCKET_SIZE + 16]; // extra space for collisions - - (threads[i] = new Thread(() -> { - long addr = start; - var map = maps[thread_id]; - long[] buf = new long[13]; - // parse loop - while (addr < end) { - int idx = 0; - long hash = 0; - long word = 0; - long row_addr = addr; - int semi_pos = 8; - while (semi_pos == 8) { - word = UNSAFE.getLong(addr); - buf[idx++] = word; - // idea from thomaswue & royvanrijn - long xor_semi = word ^ 0x3b3b3b3b3b3b3b3bL; // xor with ;;;;;;;; - long semipos_code = (xor_semi - 0x0101010101010101L) & ~xor_semi & 0x8080808080808080L; - semi_pos = Long.numberOfTrailingZeros(semipos_code) >>> 3; - addr += semi_pos; - hash ^= word & HASH_MASKS[semi_pos]; - } + // Thread pool worker + static final class Worker extends Thread { + final int thread_id; // for debug use only + + Worker(int i) { + thread_id = i; + this.setPriority(Thread.MAX_PRIORITY); + this.start(); + } - int hash32 = (int) (hash ^ (hash >>> 31)); - int keylen = (int) (addr - row_addr); + @Override + public void run() { + var map = new Node[BUCKET_SIZE + MAX_STATIONS]; // extra space for collisions - // great idea from merykitty (Quan Anh Mai) - long num_word = UNSAFE.getLong(++addr); - int dot_pos = Long.numberOfTrailingZeros(~num_word & 0x10101000); - addr += (dot_pos >>> 3) + 3; + int id; + // process in small chunk to maintain disk locality (artsiomkorzun trick) + while ((id = chunk_id.getAndIncrement()) < chunk_cnt) { + long addr = start_addr + id * CHUNK_SZ; + long end = Math.min(addr + CHUNK_SZ, end_addr); - int shift = 28 - dot_pos; - long signed = (~num_word << 59) >> 63; - long dsmask = ~(signed & 0xFF); - long digits = ((num_word & dsmask) << shift) & 0x0F000F0F00L; - long abs_val = ((digits * 0x640a0001) >>> 32) & 0x3FF; - int val = (int) ((abs_val ^ signed) - signed); + // find start of line + if (id > 0) { + addr = nextLF(addr); + } - int bucket = (hash32 & BUCKET_MASK); + final int num_segs = 3; + long seglen = (end - addr) / num_segs; + + long a0 = addr; + long a1 = nextLF(addr + 1 * seglen); + long a2 = nextLF(addr + 2 * seglen); + ChunkParser p0 = new ChunkParser(map, a0, a1); + ChunkParser p1 = new ChunkParser(map, a1, a2); + ChunkParser p2 = new ChunkParser(map, a2, end); + + while (p0.ok() && p1.ok() && p2.ok()) { + long w0 = p0.word(); + long w1 = p1.word(); + long w2 = p2.word(); + long sc0 = getSemiCode(w0); + long sc1 = getSemiCode(w1); + long sc2 = getSemiCode(w2); + Node n0 = p0.key(w0, sc0); + Node n1 = p1.key(w1, sc1); + Node n2 = p2.key(w2, sc2); + long v0 = p0.val(); + long v1 = p1.val(); + long v2 = p2.val(); + n0.add(v0); + n1.add(v1); + n2.add(v2); + } + + while (p0.ok()) { + long w = p0.word(); + long sc = getSemiCode(w); + Node n = p0.key(w, sc); + long v = p0.val(); + n.add(v); + } + while (p1.ok()) { + long w = p1.word(); + long sc = getSemiCode(w); + Node n = p1.key(w, sc); + long v = p1.val(); + n.add(v); + } + while (p2.ok()) { + long w = p2.word(); + long sc = getSemiCode(w); + Node n = p2.key(w, sc); + long v = p2.val(); + n.add(v); + } + } + + // merge is cheaper than string casting (artsiomkorzun) + while (!mapref.compareAndSet(null, map)) { + var other_map = mapref.getAndSet(null); + if (other_map != null) { + for (int i = 0; i < other_map.length; i++) { + var other = other_map[i]; + if (other == null) + continue; + int bucket = (int) (other.hash & BUCKET_MASK); while (true) { var node = map[bucket]; if (node == null) { - map[bucket] = new Node(row_addr, keylen, hash32, val, buf); + map[bucket] = other; break; } - if (node.keylen == keylen && node.hash == hash32 && node.contentEquals(buf)) { - node.stat.add(val); + if (node.contentEquals(other)) { + node.merge(other); break; } bucket++; } } - })).start(); + } } + } + } + + static final class ChunkParser { + long addr; + long end; + Node[] map; - // join all - for (var thread : threads) - thread.join(); - - // collect results - TreeMap ms = new TreeMap<>(); - for (var map : maps) { - for (var node : map) { - if (node == null) - continue; - var stat = ms.putIfAbsent(node.key(), node.stat); - if (stat != null) - stat.merge(node.stat); + ChunkParser(Node[] m, long a, long e) { + map = m; + addr = a; + end = e; + } + + final boolean ok() { + return addr < end; + } + + final long word() { + return UNSAFE.getLong(addr); + } + + final void skip(int n) { + addr += n; + } + + final void skip(long n) { + addr += n; + } + + final long val0() { + long w = word(); + int d = getDotCode(w); + return num(w, d); + } + + final long val() { + long w = word(); + int d = getDotCode(w); + skip((d >>> 3) + 3); + return num(w, d); + } + + // optimize for contest + // save as much slow memory access as possible + // about 50% key < 8chars, 25% key bettween 8-10 chars + // keylength histogram (%) = [0, 0, 0, 0, 4, 10, 21, 15, 13, 11, 6, 6, 4, 2... + final Node key(long word0, long semipos_code) { + long row_addr = addr; + // about 50% chance key < 8 chars + if (semipos_code != 0) { + int semi_pos = Long.numberOfTrailingZeros(semipos_code) >>> 3; + skip(semi_pos + 1); + long tail = word0 & HASH_MASKS[semi_pos]; + long hash = mix(tail); + int bucket = (int) (hash & BUCKET_MASK); + while (true) { + Node node = map[bucket]; + if (node == null) { + return (map[bucket] = new Node(row_addr, hash, semi_pos)); + } + if (node.hash == hash) { + return node; + } + bucket++; + } + } + + skip(8); + long word = UNSAFE.getLong(addr); + semipos_code = getSemiCode(word); + // 43% chance + if (semipos_code != 0) { + int semi_pos = Long.numberOfTrailingZeros(semipos_code) >>> 3; + skip(semi_pos + 1); + long tail = word0 ^ (word & HASH_MASKS[semi_pos]); + long hash = mix(tail); + int bucket = (int) (hash & BUCKET_MASK); + while (true) { + Node node = map[bucket]; + if (node == null) { + return (map[bucket] = new Node(row_addr, word0, hash, semi_pos + 8)); + } + if (node.word0 == word0 && node.hash == hash) { + return node; + } + bucket++; } } - System.out.println(ms); + // why not going for more? tested, slower + long hash = word0; + while (semipos_code == 0) { + hash ^= word; + skip(8); + word = UNSAFE.getLong(addr); + semipos_code = getSemiCode(word); + } + + int semi_pos = Long.numberOfTrailingZeros(semipos_code) >>> 3; + skip(semi_pos); + long keylen = addr - row_addr; + skip(1); + long tail = hash ^ (word & HASH_MASKS[semi_pos]); + hash = mix(tail); + int bucket = (int) (hash & BUCKET_MASK); + + while (true) { + Node node = map[bucket]; + if (node == null) { + return (map[bucket] = new Node(row_addr, word0, hash, (int) keylen)); + } + if (node.contentEquals(row_addr, word0, hash, keylen)) { + return node; + } + bucket++; + } } } } \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_adriacabeza.java b/src/main/java/dev/morling/onebrc/CalculateAverage_adriacabeza.java new file mode 100644 index 000000000..99936b235 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_adriacabeza.java @@ -0,0 +1,244 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +/** + * This class calculates average measurements from a file in a parallelized manner. + */ +public class CalculateAverage_adriacabeza { + + private static final Path FILE_PATH = Paths.get("./measurements.txt"); + public static final int CITY_NAME_MAX_CHARACTERS = 128; + private static final int N_PROCESSORS = Runtime.getRuntime().availableProcessors(); + private static final int DJB2_INIT = 5381; + private static final Map cityMap = new ConcurrentHashMap<>(10_000, 1, N_PROCESSORS); + + /** + * Represents result containing a HashMap with city as key and ResultRow as value. + */ + private static class Result { + public void addStation(int hash, int value) { + resultMap.put(hash, new StationData(value)); + } + + public StationData getData(int hash) { + return resultMap.get(hash); + } + + private static class StationData { + private int min, sum, count, max; + + public StationData(int value) { + this.count = 1; + this.sum = value; + this.min = value; + this.max = value; + } + + public void update(int value) { + this.count++; + this.sum += value; + this.min = Math.min(this.min, value); + this.max = Math.max(this.max, value); + } + + public String toString() { + return "%.1f/%.1f/%.1f".formatted(min / 10.0, sum / 10.0 / count, max / 10.0); + } + + } + + private final Map resultMap; + + public Result() { + this.resultMap = new HashMap<>(10_000, 1); + } + + public Map getResultMap() { + return resultMap; + } + + public void merge(Result other) { + other.getResultMap().forEach((city, resultRow) -> resultMap.merge(city, resultRow, (existing, incoming) -> { + existing.min = Math.min(existing.min, incoming.min); + existing.max = Math.max(existing.max, incoming.max); + existing.sum += incoming.sum; + existing.count += incoming.count; + return existing; + })); + } + + public String toString() { + return this.resultMap.entrySet().parallelStream() + .map(entry -> "%s=%s".formatted(cityMap.get(entry.getKey()), entry.getValue())) + .sorted(Comparator.comparing(s -> s.split("=")[0])) + .collect(Collectors.joining(", ", "{", "}")); + } + } + + /** + * Finds the ending position in the file, ensuring it ends at the beginning of a line. + * + * @param channel File channel + * @param position Current position in the file + * @return Ending position at the beginning of a line + * @throws IOException If an I/O error occurs + */ + private static long findEndPosition(FileChannel channel, long position) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(1); + + // Iterate over the file from the given position to find the next newline character + while (position < channel.size()) { + channel.read(buffer, position); + + // Check if the current byte is a newline character + if (buffer.get(0) == '\n') { + return position + 1; // Return the position immediately after the newline + } + + position++; + buffer.clear(); + } + + return channel.size(); // Return the end of the file if no newline is found after the current position + } + + /** + * Gets the mapped byte buffers for parallel processing. + * + * @param nProcessors Number of processors for parallelization + * @return List of MappedByteBuffers + * @throws IOException If an I/O error occurs + */ + private static List getMappedByteBuffers(int nProcessors) throws IOException { + try (FileChannel channel = FileChannel.open(FILE_PATH, StandardOpenOption.READ)) { + long fileSize = channel.size(); + long chunkSize = (fileSize + nProcessors - 1) / nProcessors; + long pos = 0; + + List buffers = new ArrayList<>(nProcessors); + for (int i = 0; i < nProcessors; i++) { + long endPosition = findEndPosition(channel, pos + chunkSize); + long size = endPosition - pos; + MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, pos, size); + pos = pos + size; + buffers.add(buffer); + } + return buffers; + } + } + + private static int readNumberFromBuffer(ByteBuffer buffer, int limit) { + var number = 0; + var sign = 1; + while (buffer.position() < limit) { + var numberByte = buffer.get(); + if (numberByte == '-') + sign = -1; + else if (numberByte == '\n') + break; + else if (numberByte != '.') + number = number * 10 + (numberByte - '0'); + } + return sign * number; + } + + /** + * Calculates average measurements from the file. + * + * @return Result containing min/mean/max values for each city + */ + private static Result calculateAverageMeasurements(List chunks) { + // Process each buffer in parallel + return chunks.parallelStream() + .map(buffer -> { + Result partialResult = new Result(); + var limit = buffer.limit(); + var field = new byte[CITY_NAME_MAX_CHARACTERS]; + Set seenHashes = new HashSet<>(10_000, 1); + while (buffer.position() < limit) { + var fieldCurrentIndex = 0; + var fieldByte = buffer.get(); + field[fieldCurrentIndex++] = fieldByte; + // implement djb2 hash: https://theartincode.stanis.me/008-djb2/ + int hash = DJB2_INIT; + while (buffer.position() < limit) { + // hash = hash * 33 + fieldByte + hash = (((hash << 5) + hash) + fieldByte); + fieldByte = buffer.get(); + if (fieldByte == ';') + break; + field[fieldCurrentIndex++] = fieldByte; + } + + var number = readNumberFromBuffer(buffer, limit); + if (!seenHashes.contains(hash)) { + seenHashes.add(hash); + cityMap.put(hash, new String(field, 0, fieldCurrentIndex)); + partialResult.addStation(hash, number); + } + else { + partialResult.getData(hash).update(number); + } + } + return partialResult; + }).reduce(new Result(), (partialResult1, partialResult2) -> { + Result result = new Result(); + result.merge(partialResult1); + result.merge(partialResult2); + return result; + }); + } + + /** + * The main method to run the average measurements calculations program. + * + * @param args Command line arguments. Not utilized in this program. + */ + public static void main(String[] args) { + try { + // Get the MappedByteBuffers by splitting the file evenly across available processors + var buffers = getMappedByteBuffers(Runtime.getRuntime().availableProcessors()); + + // Calculate the average measurements from the buffers obtained + var measurements = calculateAverageMeasurements(buffers); + + // Print the measurements result to the console. + System.out.println(measurements); + + } catch (IOException e) { + // Handle any potential I/O exceptions by printing the error message to the console + System.err.println(STR."Error processing file: \{e.getMessage()}"); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_agoncal.java b/src/main/java/dev/morling/onebrc/CalculateAverage_agoncal.java new file mode 100644 index 000000000..fe6a0a64d --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_agoncal.java @@ -0,0 +1,153 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; + +/** + * This is the solution from GitHut Copilot Chat with the help of Antonio Goncalves (prompting and guiding, but trying not to change code directly on my own, always using Copilot). + *

+ * List of prompts that has been used: + *

+ * ============= + * ============= + * ============= + * v1 - 73603 ms + * You are entering The One Billion Row Challenge (1BRC) which is an exploration of how far modern Java can be pushed for aggregating one billion rows from a text file. Grab all the (virtual) threads, reach out to SIMD, optimize the GC, or pull any other trick, and create the fastest implementation for solving this task! + * The text file contains temperature values for a range of weather stations. Each row is one measurement in the format ;, with the measurement value having exactly one fractional digit. The following delimited with --- shows ten rows as an example: + * --- + * Hamburg;12.0 + * Bulawayo;8.9 + * Palembang;38.8 + * St. John's;15.2 + * Cracow;12.6 + * Bridgetown;26.9 + * Istanbul;6.2 + * Roseau;34.4 + * Conakry;31.2 + * Istanbul;23.0 + * --- + * You have to write a Java program which reads the file, calculates the min, mean, and max temperature value per weather station, and emits the results on stdout like the result below delimited by --- (i.e. sorted alphabetically by station name, and the result values per station in the format //, rounded to one fractional digit). Notice the curly braces: + * --- + * {Abha=-23.0/18.0/59.2, Abidjan=-16.2/26.0/67.3, Abéché=-10.0/29.4/69.0, Accra=-10.1/26.4/66.4, Addis Ababa=-23.7/16.0/67.0, Adelaide=-27.8/17.3/58.5, ...} + * --- + * You must use Java 21. + * Create an algorithm in any way you see fit including parallelizing the computation, using the (incubating) Vector API, memory-mapping different sections of the file concurrently, using AppCDS, GraalVM, CRaC, etc. for speeding up the application start-up, choosing and tuning the garbage collector, and much more. + * No external library dependencies may be used. + * ============= + * ============= + * ============= + * (Here I had to chat with Copilot about formatting the output, there were commas missing, the curly brackets were also missed) + * ============= + * ============= + * ============= + * v2 - 71831 ms + * Being written in Java 21, please use records instead of classes for Measurement. + * ============= + * ============= + * ============= + * v3 - 69333 ms + * If the temperatures are small numbers, why use double? Can't you use another datatype ? + *

+ * The profiler mentions that this line of code has very bad performance. Can you refactor it so it has better performance: + * --- + * String[] parts = line.split(";") + * --- + *

+ * There is a maximum of 10000 unique station names. Can you optimize the code taking this into account? + * ============= + * ============= + * ============= + * v4 - 56417 ms + * Which parameters can I pass to the JVM to make it run faster ? + * Which GC can I use and what is the most optimized to run CalculateAverage ? + */ +public class CalculateAverage_agoncal { + + private static final String FILE = "./measurements.txt"; + + record Measurement(String station, double temperature) { + } + + static class StationStats { + double min; + double max; + double sum; + int count; + + public StationStats(double temperature) { + this.min = temperature; + this.max = temperature; + this.sum = 0; + this.count = 0; + } + + synchronized void update(double temperature) { + min = Math.min(min, temperature); + max = Math.max(max, temperature); + sum += temperature; + count++; + } + + double getAverage() { + return round(sum) / count; + } + + @Override + public String toString() { + return String.format("%.1f/%.1f/%.1f", round(min), round(getAverage()), round(max)); + } + } + + public static void main(String[] args) throws IOException { + Map stats = new ConcurrentHashMap<>(10_000); + try (BufferedReader reader = Files.newBufferedReader(Paths.get(FILE))) { + reader.lines().parallel().forEach(line -> { + int separatorIndex = line.indexOf(';'); + String station = line.substring(0, separatorIndex); + String temperature = line.substring(separatorIndex + 1); + Measurement m = new Measurement(station, Double.parseDouble(temperature)); + stats.computeIfAbsent(m.station, k -> new StationStats(m.temperature)).update(m.temperature); + }); + } + + TreeMap sortedStats = new TreeMap<>(stats); + Iterator> iterator = sortedStats.entrySet().iterator(); + System.out.print("{"); + while (iterator.hasNext()) { + Map.Entry entry = iterator.next(); + StationStats s = entry.getValue(); + if (iterator.hasNext()) { + System.out.printf("%s=%s, ", entry.getKey(), s.toString()); + } + else { + System.out.printf("%s=%s", entry.getKey(), s.toString()); + } + } + System.out.println("}"); + } + + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_albertoventurini.java b/src/main/java/dev/morling/onebrc/CalculateAverage_albertoventurini.java new file mode 100644 index 000000000..91e00e332 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_albertoventurini.java @@ -0,0 +1,328 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.EOFException; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +/** + * == File reading == + * The file is read using RandomAccessFile, and split into chunks. Each thread is assigned a chunk. + * E.g. if the file size is 100, and we have two threads, the first thread will read from 0 to 49, + * the second from 50 to 99. + * Each chunk is aligned to the next end-of-line (or to the end-of-file), so that each thread + * consumes full input lines. + * Further, each file chunk is split into smaller pieces (byte arrays), with each piece up to 2^22 bytes. + * This particular size seems to work best on my machine. + * == Data structure == + * Each thread stores its results in a prefix tree (trie). Each node in the trie represents + * one byte of a location's name. Non-ASCII characters are represented by multiple nodes in the trie. + * Each leaf contains the statistics for a location. + */ +public class CalculateAverage_albertoventurini { + + // The maximum byte that can ever appear in a UTF-8-encoded string is 11110111, i.e., 0xF7 + private static final int MAX_UTF8_BYTE_VALUE = 0xF7; + + // Define a prefix tree that is used to store results. + // Each node in the trie represents a byte (NOT character) from a location name. + // A nice side effect is, when traversing the trie to print results, + // the names will be printed in alphabetical order. + private static final class TrieNode { + final TrieNode[] children = new TrieNode[MAX_UTF8_BYTE_VALUE]; + int min = Integer.MAX_VALUE; + int max = Integer.MIN_VALUE; + int sum; + int count; + } + + private static final int TWO_BYTE_TO_INT = 480 + 48; + private static final int THREE_BYTE_TO_INT = 4800 + 480 + 48; + + // Process a chunk and write results in a Trie rooted at 'root'. + private static void processChunk(final TrieNode root, final ChunkReader cr) { + while (cr.ensureHasMoreRows()) { + TrieNode node = root; + + // Process the location name navigating through the trie + int b = cr.getNext(); + do { + b &= 0xFF; + if (node.children[b] == null) { + node.children[b] = new TrieNode(); + } + node = node.children[b]; + b = cr.getNext(); + } while (b != ';'); + + // Process the reading value (temperature) + final int reading; + + final byte b1 = cr.getNext(); + final byte b2 = cr.getNext(); + if (b2 == '.') { // value is n.n + reading = (b1 * 10 + cr.getNext() - TWO_BYTE_TO_INT); + } + else { + final byte b3 = cr.getNext(); + final byte b4 = cr.getNext(); + if (b4 == '.') { // value is -nn.n + reading = -(b2 * 100 + b3 * 10 + cr.getNext() - THREE_BYTE_TO_INT); + } + else if (b1 == '-') { // value is -n.n + reading = -(b2 * 10 + b4 - TWO_BYTE_TO_INT); + } + else { // value is nn.n + reading = (b1 * 100 + b2 * 10 + b4 - THREE_BYTE_TO_INT); + } + } + cr.cursor++; // new line + + if (reading < node.min) { + node.min = reading; + } + if (reading > node.max) { + node.max = reading; + } + node.sum += reading; + node.count++; + } + } + + // Print results. + // Because there are multiple tries (one for each thread), this method + // aggregates results from all tries. + static class ResultPrinter { + // Contains the bytes for the current location name. 100 bytes should be enough + // to represent each location name encoded in UTF-8. + final byte[] bytes = new byte[100]; + + boolean firstOutput = true; + + void printResults(final TrieNode[] roots) { + System.out.print("{"); + printResultsRec(roots, bytes, 0); + System.out.println("}"); + } + + private static double round(long value) { + return Math.round(value) / 10.0; + } + + // Find and print results recursively. + private void printResultsRec(final TrieNode[] nodes, final byte[] bytes, final int index) { + long min = Long.MAX_VALUE; + long max = Long.MIN_VALUE; + long sum = 0; + long count = 0; + + for (final TrieNode node : nodes) { + if (node != null && node.count > 0) { + min = Math.min(min, node.min); + max = Math.max(max, node.max); + sum += node.sum; + count += node.count; + } + } + + if (count > 0) { + final String location = new String(bytes, 0, index); + if (firstOutput) { + firstOutput = false; + } + else { + System.out.print(", "); + } + double mean = Math.round((double) sum / (double) count) / 10.0; + System.out.print(location + "=" + round(min) + "/" + mean + "/" + round(max)); + } + + for (int i = 0; i < MAX_UTF8_BYTE_VALUE; i++) { + final TrieNode[] childNodes = new TrieNode[nodes.length]; + boolean shouldRecurse = false; + for (int j = 0; j < nodes.length; j++) { + if (nodes[j] != null && nodes[j].children[i] != null) { + childNodes[j] = nodes[j].children[i]; + + // Only recurse if there's at least one trie that has non-null child for index 'i'. + shouldRecurse = true; + } + } + if (shouldRecurse) { + bytes[index] = (byte) i; + printResultsRec(childNodes, bytes, index + 1); + } + } + } + } + + private static final String FILE = "./measurements.txt"; + + /** + * Read a chunk of a {@link RandomAccessFile} file. + * Internally, the chunk is further subdivided into "sub-chunks" (byte arrays). + */ + private static final class ChunkReader { + // Byte arrays of size 2^20 seem to have the best performance on my machine. + private static final int BYTE_ARRAY_SIZE = 1 << 20; + private final byte[] bytes; + + private final RandomAccessFile file; + + // The initial position of this chunk. + private final long chunkBegin; + + // The length of this chunk. + private final long chunkLength; + + // The beginning of the current "sub-chunk", relative to the initial position of the chunk. + private long offset = 0; + + // The size of the current "sub-chunk". + private int subChunkSize = 0; + + // The current position within the current "sub-chunk". + private int cursor = 0; + + // The maximum size of a row + private static final int MAX_ROW_SIZE_BYTES = 107; + + ChunkReader( + final RandomAccessFile file, + final long chunkBegin, + final long chunkLength) { + this.file = file; + this.chunkBegin = chunkBegin; + this.chunkLength = chunkLength; + + int byteArraySize = chunkLength < BYTE_ARRAY_SIZE ? (int) chunkLength : BYTE_ARRAY_SIZE; + this.bytes = new byte[byteArraySize]; + + readSubChunk(); + } + + // Return true if this ChunkReader has more bytes available, false otherwise. + // If this ChunkReader needs to read a new "sub-chunk", it does so in this method. + boolean ensureHasMoreRows() { + if (cursor >= subChunkSize) { + offset += cursor; + if (offset >= chunkLength) { + return false; + } + readSubChunk(); + } + + return true; + } + + byte getNext() { + return bytes[cursor++]; + } + + private void readSubChunk() { + try { + synchronized (file) { + file.seek(chunkBegin + offset); + subChunkSize = file.read(bytes); + } + } + catch (IOException e) { + throw new RuntimeException(e); + } + + // Always "pretend" that we've read a few bytes less, + // so that we don't stop in the middle of reading a row + subChunkSize -= MAX_ROW_SIZE_BYTES; + + cursor = 0; + } + } + + private static ChunkReader[] makeChunkReaders( + final int count, + final RandomAccessFile file) + throws Exception { + + final ChunkReader[] chunkReaders = new ChunkReader[count]; + + // The total size of each chunk + final long chunkReaderSize = file.length() / count; + + long previousPosition = 0; + long currentPosition; + + for (int i = 0; i < count; i++) { + // Go to the end of the chunk + file.seek(chunkReaderSize * (i + 1)); + + // Align to the next end of line or end of file + try { + while (file.readByte() != '\n') + ; + } + catch (EOFException e) { + } + + currentPosition = file.getFilePointer(); + long chunkBegin = previousPosition; + long chunkLength = currentPosition - previousPosition; + chunkReaders[i] = new ChunkReader(file, chunkBegin, chunkLength); + + previousPosition = currentPosition; + } + + return chunkReaders; + } + + // Spin up threads and assign a file chunk to each one. + // Then use the 'ResultPrinter' class to aggregate and print the results. + private static void processWithChunkReaders() throws Exception { + final var randomAccessFile = new RandomAccessFile(FILE, "r"); + + final int nThreads = randomAccessFile.length() < 1 << 20 ? 1 : Runtime.getRuntime().availableProcessors(); + + final CountDownLatch latch = new CountDownLatch(nThreads); + + final ChunkReader[] chunkReaders = makeChunkReaders(nThreads, randomAccessFile); + final TrieNode[] roots = new TrieNode[nThreads]; + for (int i = 0; i < nThreads; i++) { + roots[i] = new TrieNode(); + } + + final ExecutorService executorService = Executors.newFixedThreadPool(nThreads); + for (int i = 0; i < nThreads; i++) { + final int idx = i; + executorService.submit(() -> { + processChunk(roots[idx], chunkReaders[idx]); + latch.countDown(); + }); + } + executorService.shutdown(); + latch.await(); + + new ResultPrinter().printResults(roots); + + executorService.close(); + } + + public static void main(String[] args) throws Exception { + processWithChunkReaders(); + } +} \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_anestoruk.java b/src/main/java/dev/morling/onebrc/CalculateAverage_anestoruk.java new file mode 100644 index 000000000..07440d7b0 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_anestoruk.java @@ -0,0 +1,207 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.TreeMap; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import static java.lang.Math.ceil; +import static java.lang.Math.max; +import static java.lang.Math.min; +import static java.lang.Runtime.getRuntime; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.nio.channels.FileChannel.MapMode.READ_ONLY; +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.concurrent.CompletableFuture.supplyAsync; + +public class CalculateAverage_anestoruk { + + private static final String path = "./measurements.txt"; + private static final int cpus = getRuntime().availableProcessors(); + + public static void main(String[] args) throws IOException { + List rangeList = new ArrayList<>(); + MemorySegment segment; + + try (FileChannel channel = FileChannel.open(Path.of(path))) { + final long fileSize = channel.size(); + final long chunkSize = calculateChunkSize(fileSize); + final int chunks = (int) ceil((double) fileSize / chunkSize); + segment = channel.map(READ_ONLY, 0, fileSize, Arena.global()); + long startOffset = 0; + long size = chunkSize; + for (int i = 0; i < chunks && size > 0; i++) { + long endOffset = startOffset + size; + while (endOffset < fileSize && segment.get(JAVA_BYTE, endOffset) != '\n') { + endOffset++; + } + rangeList.add(new SegmentRange(startOffset, endOffset)); + startOffset = endOffset + 1; + size = min(chunkSize, fileSize - startOffset); + } + } + + TreeMap result = new TreeMap<>(); + try (ExecutorService executor = Executors.newFixedThreadPool(cpus)) { + List> futures = new ArrayList<>(); + for (SegmentRange range : rangeList) { + futures.add(supplyAsync(() -> process(range, segment), executor)); + } + for (CompletableFuture future : futures) { + try { + Record[] partialResult = future.get(); + mergeResult(result, partialResult); + } + catch (InterruptedException | ExecutionException ex) { + throw new RuntimeException(ex); + } + } + } + + System.out.println(result); + } + + private static long calculateChunkSize(long fileSize) { + int divisor = cpus; + long chunkSize; + if (fileSize > 10_000) { + while ((chunkSize = fileSize / divisor) > Integer.MAX_VALUE - 512) { + divisor *= 2; + } + return chunkSize; + } + return fileSize; + } + + private static Record[] process(SegmentRange range, MemorySegment segment) { + Record[] records = new Record[1024 * 100]; + byte[] cityBuffer = new byte[100]; + long offset = range.startOffset; + byte b; + while (offset < range.endOffset) { + int cityLength = 0; + int hash = 0; + while ((b = segment.get(JAVA_BYTE, offset++)) != ';') { + cityBuffer[cityLength++] = b; + hash = hash * 31 + b; + } + hash = Math.abs(hash); + int value = 0; + boolean negative; + if ((b = segment.get(JAVA_BYTE, offset++)) == '-') { + negative = true; + } + else { + negative = false; + value = b - '0'; + } + while ((b = segment.get(JAVA_BYTE, offset++)) != '\n') { + if (b != '.') { + value = value * 10 + (b - '0'); + } + } + int temperature = negative ? -value : value; + addRecord(records, hash, cityBuffer, cityLength, temperature); + } + return records; + } + + private static void addRecord(Record[] records, int hash, byte[] cityBuffer, int cityLength, int temperature) { + int idx = hash % records.length; + Record record; + while ((record = records[idx]) != null) { + if (record.hash == hash && Arrays.equals(record.city, 0, record.city.length, cityBuffer, 0, cityLength)) { + record.add(temperature); + return; + } + idx = (idx + 1) % records.length; + } + byte[] city = new byte[cityLength]; + System.arraycopy(cityBuffer, 0, city, 0, cityLength); + records[idx] = new Record(hash, city, temperature); + } + + private static void mergeResult(TreeMap result, Record[] partialResult) { + for (Record partialRecord : partialResult) { + if (partialRecord == null) { + continue; + } + String cityName = new String(partialRecord.city, UTF_8); + result.compute(cityName, (_, record) -> { + if (record == null) { + return partialRecord; + } + record.merge(partialRecord); + return record; + }); + } + } + + private record SegmentRange(long startOffset, long endOffset) { + } + + private static class Record { + + private final int hash; + private final byte[] city; + private int min; + private int max; + private long sum; + private int count; + + public Record(int hash, byte[] city, int temperature) { + this.hash = hash; + this.city = city; + this.min = temperature; + this.max = temperature; + this.sum = temperature; + this.count = 1; + } + + public void add(int temperature) { + min = min(min, temperature); + max = max(max, temperature); + sum += temperature; + count++; + } + + public void merge(Record other) { + min = min(min, other.min); + max = max(max, other.max); + sum += other.sum; + count += other.count; + } + + @Override + public String toString() { + return "%.1f/%.1f/%.1f".formatted( + (min / 10.0), + ((double) sum / count / 10.0), + (max / 10.0)); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_anitasv.java b/src/main/java/dev/morling/onebrc/CalculateAverage_anitasv.java new file mode 100644 index 000000000..7d3d6af7b --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_anitasv.java @@ -0,0 +1,288 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.stream.IntStream; + +public class CalculateAverage_anitasv { + private static final String FILE = "./measurements.txt"; + + private record Shard(MemorySegment mmapMemory, + long chunkStart, long chunkEnd) { + + byte getByte(long address) { + return mmapMemory.get(ValueLayout.JAVA_BYTE, address); + } + + long indexOf(long position, byte ch) { + ByteBuffer buf = mmapMemory.asSlice(position, + Math.min(128, mmapMemory.byteSize() - position)) + .asByteBuffer(); + while (buf.hasRemaining()) { + if (buf.get() == ch) { + return position + (buf.position() - 1); + } + } + return -1; + } + + MemorySegment getRange(long start, long end) { + return mmapMemory.asSlice(start, end - start); + } + + int parseDouble(long start, long end) { + int normalized = 0; + boolean sign = true; + long index = start; + if (getByte(index) == '-') { + index++; + sign = false; + } + boolean hasDot = false; + for (; index < end; index++) { + byte ch = getByte(index); + if (ch != '.') { + normalized = normalized * 10 + (ch - '0'); + } else { + hasDot = true; + } + } + if (!hasDot) { + normalized *= 10; + } + if (!sign) { + normalized = -normalized; + } + return normalized; + } + + public int computeHash(long position, long stationEnd) { + ByteBuffer buf2 = mmapMemory.asSlice(position, stationEnd - position) + .asByteBuffer(); + return buf2.hashCode(); + } + + public long truncate(long index) { + return Math.min(index, mmapMemory.byteSize()); + } + + public long getLong(long position) { + return mmapMemory.get(ValueLayout.JAVA_LONG_UNALIGNED, position); + } + } + + private record ResultRow(IntSummaryStatistics statistics, int keyLength, int next) { + } + + private static class FastHashMap { + private final byte[] keys; + private final ResultRow[] values; + + private final int capacityMinusOne; + + private final MemorySegment keySegment; + + private int next = -1; + + private FastHashMap(int capacity) { + this.capacityMinusOne = capacity - 1; + this.keys = new byte[capacity << 7]; + this.keySegment = MemorySegment.ofArray(keys); + this.values = new ResultRow[capacity]; + } + + IntSummaryStatistics find(int hash, Shard shard, long stationStart, long stationEnd) { + int initialIndex = hash & capacityMinusOne; + int lookupLength = (int) (stationEnd - stationStart); + int lookupAligned = ((lookupLength + 7) & (-8)); + int i = initialIndex; + + lookupAligned = (int) (shard.truncate(stationStart + lookupAligned) - stationStart) - 7; + + do { + int keyIndex = i << 7; + + if (keys[keyIndex] != 0 && keys[keyIndex + lookupLength] == 0) { + + int mismatch = -1, j; + for (j = 0; j < lookupAligned; j += 8) { + long entryLong = keySegment.get(ValueLayout.JAVA_LONG_UNALIGNED, keyIndex + j); + long lookupLong = shard.getLong(stationStart + j); + if (entryLong != lookupLong) { + int diff = Long.numberOfTrailingZeros(entryLong ^ lookupLong); + mismatch = j + (diff >> 3); + break; + } + } + if (mismatch == -1) { + for (; j < lookupLength; j++) { + byte entryByte = keys[keyIndex + j]; + byte lookupByte = shard.getByte(stationStart + j); + if (entryByte != lookupByte) { + mismatch = j; + break; + } + } + } + if (mismatch == -1 || mismatch >= lookupLength) { + return this.values[i].statistics; + } + } + if (keys[keyIndex] == 0) { + MemorySegment fullLookup = shard.getRange(stationStart, stationEnd); + + keySegment.asSlice(keyIndex, lookupLength) + .copyFrom(fullLookup); + + keys[keyIndex + lookupLength] = 0; + IntSummaryStatistics stats = new IntSummaryStatistics(); + ResultRow resultRow = new ResultRow(stats, lookupLength, this.next); + this.next = i; + this.values[i] = resultRow; + return stats; + } + + if (i == capacityMinusOne) { + i = 0; + } + else { + i++; + } + } while (i != initialIndex); + throw new IllegalStateException("Hash size too small"); + } + + Iterable> values() { + return () -> new Iterator<>() { + + int scan = FastHashMap.this.next; + + @Override + public boolean hasNext() { + return scan != -1; + } + + @Override + public Map.Entry next() { + ResultRow resultRow = values[scan]; + IntSummaryStatistics stats = resultRow.statistics; + String key = new String(keys, scan << 7, resultRow.keyLength, + StandardCharsets.UTF_8); + scan = resultRow.next; + return new AbstractMap.SimpleEntry<>(key, stats); + } + }; + } + + } + + private static Iterable> process(Shard shard) { + FastHashMap result = new FastHashMap(1 << 14); + + boolean skip = shard.chunkStart != 0; + for (long position = shard.chunkStart; position < shard.chunkEnd; position++) { + if (skip) { + position = shard.indexOf(position, (byte) '\n'); + skip = false; + } + else { + long stationEnd = shard.indexOf(position, (byte) ';'); + int hash = shard.computeHash(position, stationEnd); + + long temperatureEnd = shard.indexOf(stationEnd + 1, (byte) '\n'); + int temperature = shard.parseDouble(stationEnd + 1, temperatureEnd); + + IntSummaryStatistics stats = result.find(hash, shard, position, stationEnd); + stats.accept(temperature); + position = temperatureEnd; + } + } + + return result.values(); + } + + private static Map combineResults(List>> list) { + Map output = HashMap.newHashMap(1024); + for (Iterable> map : list) { + for (Map.Entry entry : map) { + output.compute(entry.getKey(), (ignore, val) -> { + if (val == null) { + return entry.getValue(); + } + else { + val.combine(entry.getValue()); + return val; + } + }); + } + } + + return output; + } + + private static Map master(MemorySegment mmapMemory) { + long totalBytes = mmapMemory.byteSize(); + int numWorkers = Runtime.getRuntime().availableProcessors(); + long chunkSize = Math.ceilDiv(totalBytes, numWorkers); + return combineResults(IntStream.range(0, numWorkers) + .parallel() + .mapToObj(workerId -> { + long chunkStart = workerId * chunkSize; + long chunkEnd = Math.min(chunkStart + chunkSize + 1, totalBytes); + return new Shard(mmapMemory, chunkStart, chunkEnd); + }) + .map(CalculateAverage_anitasv::process) + .toList()); + } + + public static Map start() throws IOException { + try (FileChannel fileChannel = FileChannel.open(Path.of(FILE), + StandardOpenOption.READ)) { + long fileSize = fileChannel.size(); + MemorySegment mmapMemory = fileChannel.map( + FileChannel.MapMode.READ_ONLY, + 0, fileSize, Arena.global()); + return master(mmapMemory); + } + } + + private static Map toPrintMap(Map output) { + Map outputStr = new TreeMap<>(); + for (Map.Entry entry : output.entrySet()) { + IntSummaryStatistics stat = entry.getValue(); + outputStr.put(entry.getKey(), statToString(stat)); + } + return outputStr; + } + + private static String statToString(IntSummaryStatistics stat) { + return STR."\{stat.getMin() / 10.0}/\{Math.round(stat.getAverage()) / 10.0}/\{stat.getMax() / 10.0}"; + } + + public static void main(String[] args) throws IOException { + System.out.println(toPrintMap(start())); + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_arjenw.java b/src/main/java/dev/morling/onebrc/CalculateAverage_arjenw.java new file mode 100644 index 000000000..9355d4729 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_arjenw.java @@ -0,0 +1,236 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.File; +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.function.Consumer; +import java.util.function.Supplier; +import java.util.stream.IntStream; + +// Calculate Average +// * baseline: 3m7s +// * single-threaded chunk-based reading: 0m45s +// * multi-threaded chunk-based reading: 0m14s +// * less branches in parsing: 0m12s +// * list approach iso map: 0m5.5s +// * chunk finetuning: 0m4.5s +// * threadlocal result gathering: 0m4.3s (trying graalvm-ce) +// * memory-mapped file approach: 0m3.2s (also way simpler and neater code; inspired by spullara) +// * smarter number parsing: 0m2.95s (inspired by iziamos) +// * switching back to 21-tem vm 0m2.6s +// * small optimizations 0m2.5s (skip byte-array copy, optimal StationList array size avoiding collisions) + +public class CalculateAverage_arjenw { + private static final int TWO_BYTE_TO_INT = 480 + 48; // 48 is the ASCII code for '0' + private static final int THREE_BYTE_TO_INT = 4800 + 480 + 48; + private static final String FILE = "./measurements.txt"; + + public static void main(String[] args) { + var file = new File(args.length > 0 ? args[0] : FILE); + var fileSize = file.length(); + var numberOfProcessors = fileSize > 1_000_000 ? Runtime.getRuntime().availableProcessors() : 1; + var segmentSize = (int) Math.min(Integer.MAX_VALUE, fileSize / numberOfProcessors); // bytebuffer position is an int, so can be max Integer.MAX_VALUE + var segmentCount = (int) (fileSize / segmentSize); + var results = IntStream.range(0, segmentCount) + .mapToObj(segmentNr -> parseSegment(file, fileSize, segmentSize, segmentNr)) + .parallel() + .reduce(StationList::merge) + .orElseGet(StationList::new) + .toStringArray(); + Arrays.sort(results, Comparator.comparing(o -> takeUntil(o, '='))); + System.out.format("{%s}%n", String.join(", ", results)); + } + + private static StationList parseSegment(File file, long fileSize, int segmentSize, int segmentNr) { + long segmentStart = segmentNr * (long) segmentSize; + long segmentEnd = Math.min(fileSize, segmentStart + segmentSize + 100); + try (var fileChannel = (FileChannel) Files.newByteChannel(file.toPath(), StandardOpenOption.READ)) { + var bb = fileChannel.map(FileChannel.MapMode.READ_ONLY, segmentStart, segmentEnd - segmentStart); + if (segmentStart > 0) { + // noinspection StatementWithEmptyBody + while (bb.get() != '\n') + ; // skip to first new line + } + StationList stationList = new StationList(); + var buffer = new byte[100]; + while (bb.position() < segmentSize) { + byte b; + var i = 0; + int hash = 0; + while ((b = bb.get()) != ';') { + hash = hash * 31 + b; + buffer[i++] = b; + } + + int value; + byte b1 = bb.get(); + byte b2 = bb.get(); + byte b3 = bb.get(); + byte b4 = bb.get(); + if (b2 == '.') {// value is n.n + value = (b1 * 10 + b3 - TWO_BYTE_TO_INT); + // b4 == \n + } + else { + if (b4 == '.') { // value is -nn.n + value = -(b2 * 100 + b3 * 10 + bb.get() - THREE_BYTE_TO_INT); + } + else if (b1 == '-') { // value is -n.n + value = -(b2 * 10 + b4 - TWO_BYTE_TO_INT); + } + else { // value is nn.n + value = (b1 * 100 + b2 * 10 + b4 - THREE_BYTE_TO_INT); + } + bb.get(); // new line + } + + if (stationList.add(buffer, i, Math.abs(hash), value)) + buffer = new byte[100]; // station was new, create new buffer to contain the next station's name + } + + return stationList; + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static final class Station { + private final byte[] data; + private final int hash; + private final int length; + + private int min; + private int max; + private int total; + private int count; + + private Station(byte[] data, int length, int hash, int value) { + this.data = data; + this.hash = hash; + this.length = length; + + min = max = total = value; + count = 1; + } + + @Override + public String toString() { + return STR."\{new String(data, 0, length, StandardCharsets.UTF_8)}=\{min / 10.0}/\{Math.round(((double) total) / count) / 10.0}/\{max / 10.0}"; + } + + private void append(int min, int max, int total, int count) { + if (min < this.min) + this.min = min; + if (max > this.max) + this.max = max; + this.total += total; + this.count += count; + } + + public void append(int value) { + append(value, value, value, 1); + } + + public void merge(Station other) { + append(other.min, other.max, other.total, other.count); + } + } + + private static class StationList implements Iterable { + private final static int MAX_ENTRY = 65375; // choose a value that _eliminates_ collisions on the test set. + private final Station[] array = new Station[MAX_ENTRY]; + private int size = 0; + + private boolean add(int hash, Supplier create, Consumer update) { + var position = hash % MAX_ENTRY; + Station existing; + while ((existing = array[position]) != null && existing.hash != hash) { + position = (position + 1) % MAX_ENTRY; + } + if (existing == null) { + array[position] = create.get(); + size++; + return true; + } + else { + update.accept(existing); + return false; + } + } + + public boolean add(byte[] data, int stationNameLength, int stationHash, int value) { + return add(stationHash, () -> new Station(data, stationNameLength, stationHash, value), existing -> existing.append(value)); + } + + public void add(Station station) { + add(station.hash, () -> station, existing -> existing.merge(station)); + } + + public String[] toStringArray() { + var destination = new String[size]; + + var i = 0; + for (Station station : this) + destination[i++] = station.toString(); + + return destination; + } + + public StationList merge(StationList other) { + for (Station station : other) + add(station); + return this; + } + + @Override + public Iterator iterator() { + return new Iterator<>() { + private int index = 0; + + @Override + public boolean hasNext() { + Station station = null; + while (index < MAX_ENTRY && (station = array[index]) == null) + index++; + return station != null; + } + + @Override + public Station next() { + if (hasNext()) { + return array[index++]; + } + throw new NoSuchElementException(); + } + }; + } + } + + private static String takeUntil(String s, char c) { + var pos = s.indexOf(c); + return pos > -1 ? s.substring(0, pos) : s; + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_armandino.java b/src/main/java/dev/morling/onebrc/CalculateAverage_armandino.java index 21abbb193..0e9125337 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_armandino.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_armandino.java @@ -15,227 +15,337 @@ */ package dev.morling.onebrc; +import sun.misc.Unsafe; + import java.io.IOException; import java.io.PrintStream; -import java.nio.ByteBuffer; +import java.lang.foreign.Arena; +import java.lang.reflect.Field; import java.nio.channels.FileChannel; import java.nio.file.Path; import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.Collections; +import java.util.Collection; import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; +import java.util.TreeMap; import static java.nio.channels.FileChannel.MapMode.READ_ONLY; import static java.nio.charset.StandardCharsets.UTF_8; public class CalculateAverage_armandino { - private static final String FILE = "./measurements.txt"; + private static final Path FILE = Path.of("./measurements.txt"); - private static final int MAX_KEY_LENGTH = 100; + private static final int NUM_CHUNKS = Math.max(8, Runtime.getRuntime().availableProcessors()); + private static final int INITIAL_MAP_CAPACITY = 8192; private static final byte SEMICOLON = 59; private static final byte NL = 10; - private static final byte DOT = 46; - private static final byte MINUS = 45; + private static final int PRIME = 1117; + + private static final int KEY_OFFSET = 0, // 100b + HASH_OFFSET = 100, // int + KEY_LENGTH_OFFSET = 104, // short + MIN_OFFSET = 106, // short + MAX_OFFSET = 108, // short + COUNT_OFFSET = 110, // int + SUM_OFFSET = 114; // long + + private static final long ENTRY_SIZE = 100 // key: offset=0 + + 4 // keyHash: offset=100 + + 2 // keyLength: offset=104 + + 2 // min: 108; offset=106 + + 2 // max: 110; offset=108 + + 4 // count: 114; offset=110 + + 8; // sum: 122; offset=118 + + private static final Unsafe UNSAFE = getUnsafe(); public static void main(String[] args) throws Exception { - Aggregator aggregator = new Aggregator(); - aggregator.process(); - aggregator.printStats(); - } - - private static class Aggregator { + var channel = FileChannel.open(FILE, StandardOpenOption.READ); - private final Map map = new ConcurrentHashMap<>(2048); + Chunk[] chunks = split(channel); + ChunkProcessor[] processors = new ChunkProcessor[chunks.length]; - private record Chunk(long start, long end) { + for (int i = 0; i < processors.length; i++) { + processors[i] = new ChunkProcessor(chunks[i].start, chunks[i].end); + processors[i].start(); } - void process() throws Exception { - var channel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ); - final Chunk[] chunks = split(channel); - final Thread[] threads = new Thread[chunks.length]; + Map results = new TreeMap<>(); - for (int i = 0; i < chunks.length; i++) { - final Chunk chunk = chunks[i]; + for (int i = 0; i < processors.length; i++) { + processors[i].join(); + final long end = processors[i].map.mapEnd; - threads[i] = Thread.ofVirtual().start(() -> { - try { - var bb = channel.map(READ_ONLY, chunk.start, chunk.end - chunk.start); - process(bb); - } - catch (IOException e) { - throw new RuntimeException(e); - } - }); - } + for (long addr = processors[i].map.mapStart; addr < end; addr += ENTRY_SIZE) { + final short keyLength = UNSAFE.getShort(addr + KEY_LENGTH_OFFSET); + + if (keyLength == 0) + continue; - for (Thread t : threads) { - t.join(); + final byte[] keyBytes = new byte[keyLength]; + UNSAFE.copyMemory(null, addr, keyBytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, keyLength); + final short min = UNSAFE.getShort(addr + MIN_OFFSET); + final short max = UNSAFE.getShort(addr + MAX_OFFSET); + final int count = UNSAFE.getInt(addr + COUNT_OFFSET); + final long sum = UNSAFE.getLong(addr + SUM_OFFSET); + final Stats s = new Stats(new String(keyBytes, 0, keyLength, UTF_8), min, max, count, sum); + results.merge(s.key, s, CalculateAverage_armandino::mergeStats); } } - private static Chunk[] split(final FileChannel channel) throws IOException { - final long fileSize = channel.size(); - if (fileSize < 10000) { - return new Chunk[]{ new Chunk(0, fileSize) }; - } + print(results.values()); + } - final int numChunks = 8; - final long chunkSize = fileSize / numChunks; - final var chunks = new Chunk[numChunks]; + private static Stats mergeStats(final Stats x, final Stats y) { + x.min = Math.min(x.min, y.min); + x.max = Math.max(x.max, y.max); + x.count += y.count; + x.sum += y.sum; + return x; + } - for (int i = 0; i < numChunks; i++) { - long start = 0; - long end = chunkSize; + private static class ChunkProcessor extends Thread { + private final UnsafeMap map = new UnsafeMap(INITIAL_MAP_CAPACITY); - if (i > 0) { - start = chunks[i - 1].end + 1; - end = Math.min(start + chunkSize, fileSize); + final long chunkStart; + final long chunkEnd; + + private ChunkProcessor(long chunkStart, long chunkEnd) { + this.chunkStart = chunkStart; + this.chunkEnd = chunkEnd; + } + + @Override + public void run() { + long i = chunkStart; + while (i < chunkEnd) { + final long keyAddress = i; + int keyHash = 0; + byte b; + + while ((b = UNSAFE.getByte(i++)) != SEMICOLON) { + keyHash = PRIME * keyHash + b; } - end = end == fileSize ? end : seekNextNewline(channel, end); - chunks[i] = new Chunk(start, end); + final short keyLength = (short) (i - keyAddress - 1); + final long numberWord = UNSAFE.getLong(i); + final int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); + final short measurement = parseNumber(decimalSepPos, numberWord); + final int addOffset = (decimalSepPos >>> 3) + 3; + i += addOffset; + + map.addEntry(keyHash, keyAddress, keyLength, measurement); } - return chunks; } - private static long seekNextNewline(final FileChannel channel, final long end) throws IOException { - var bb = ByteBuffer.allocate(MAX_KEY_LENGTH); - channel.position(end).read(bb); + // credit: merykitty + private static short parseNumber(int decimalSepPos, long numberWord) { + int shift = 28 - decimalSepPos; + // signed is -1 if negative, 0 otherwise + long signed = (~numberWord << 59) >> 63; + long designMask = ~(signed & 0xFF); + // Align the number to a specific position and transform the ascii to digit value + long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; + // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) + // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = + // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100 + long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; + return (short) ((absValue ^ signed) - signed); + } + } - for (int i = 0; i < bb.limit(); i++) { - if (bb.get(i) == NL) { - return end + i; - } - } + private static class Stats { + private final String key; + private int min; + private int max; + private int count; + private long sum; - throw new IllegalStateException("Couldn't find next newline"); + Stats(final String key, final int min, final int max, final int count, final long sum) { + this.min = min; + this.max = max; + this.count = count; + this.sum = sum; + this.key = key; } - private void process(final ByteBuffer bb) { - final var sample = new Sample(); - var isKey = true; + void print(final PrintStream out) { + out.print(key); + out.print('='); + out.print(round(min / 10f)); + out.print('/'); + out.print(round((sum / 10f) / count)); + out.print('/'); + out.print(round(max) / 10f); + } - for (long i = 0, sz = bb.limit(); i < sz; i++) { + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } - final byte b = bb.get(); + private static void print(final Collection sorted) { + int size = sorted.size(); + System.out.print('{'); + for (Stats stats : sorted) { + stats.print(System.out); + if (--size > 0) { + System.out.print(", "); + } + } + System.out.println('}'); + } - if (b == SEMICOLON) { - isKey = false; - } - else if (b == NL) { - isKey = true; - addSample(sample); - sample.reset(); - } - else if (isKey) { - sample.pushKey(b); - } - else if (b == DOT) { - // skip - } - else if (b == MINUS) { - sample.sign = -1; - } - else { - sample.pushMeasurement(b); + private static Chunk[] split(final FileChannel channel) throws IOException { + final long fileSize = channel.size(); + long start = channel.map(READ_ONLY, 0, fileSize, Arena.global()).address(); + final long endAddress = start + fileSize; + if (fileSize < 10000) { + return new Chunk[]{ new Chunk(start, endAddress) }; + } + + final long chunkSize = fileSize / NUM_CHUNKS; + final var chunks = new Chunk[NUM_CHUNKS]; + long end = start + chunkSize; + + for (int i = 0; i < NUM_CHUNKS; i++) { + if (i > 0) { + start = chunks[i - 1].end; + end = Math.min(start + chunkSize, endAddress); + } + if (end < endAddress) { + while (UNSAFE.getByte(end) != NL) { + end++; } + end++; } + chunks[i] = new Chunk(start, end); } + return chunks; + } - private void addSample(final Sample sample) { - final Stats stats = map.computeIfAbsent(sample.keyHash, - k -> new Stats(new String(sample.keyBytes, 0, sample.keyLength, UTF_8))); + private record Chunk(long start, long end) { + } - final var val = sample.getMeasurement(); + private static Unsafe getUnsafe() { + try { + Field unsafe = Unsafe.class.getDeclaredField("theUnsafe"); + unsafe.setAccessible(true); + return (Unsafe) unsafe.get(null); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } - if (val < stats.min) - stats.min = val; + private static class UnsafeMap { - if (val > stats.max) - stats.max = val; + long mapStart; + long mapEnd; + int capacity; // num entries - stats.sum += val; - stats.count++; + UnsafeMap(int numEntries) { + capacity = numEntries; + final long size = ENTRY_SIZE * numEntries; + mapStart = UNSAFE.allocateMemory(size); + mapEnd = mapStart + size; + UNSAFE.setMemory(mapStart, size, (byte) 0); } - void printStats() { - var sorted = new ArrayList<>(map.values()); - Collections.sort(sorted); + void addEntry(final int keyHash, final long keyAddress, final short keyLength, final short measurement) { + final int pos = (capacity - 1) & keyHash; - int size = sorted.size(); + long addr = mapStart + pos * ENTRY_SIZE; + int hash = UNSAFE.getInt(addr + HASH_OFFSET); - System.out.print('{'); - - for (Stats stats : sorted) { - stats.print(System.out); - if (--size > 0) { - System.out.print(", "); - } + if (hash == 0) { // new entry + initEntry(addr, keyAddress, keyLength, measurement, keyHash); + return; + } + if (hash == keyHash && keysEqual(addr, keyAddress, keyLength)) { + updateEntry(addr, measurement); + return; } - System.out.println('}'); - } - } - private static class Stats implements Comparable { - private final String city; - private int min = Integer.MAX_VALUE; - private int max = Integer.MIN_VALUE; - private long sum; - private int count; + // this can be improved to avoid clustering at the start. + // should only affect the 10k test + addr = mapStart; - private Stats(String city) { - this.city = city; - } + while (addr < mapEnd) { + addr += ENTRY_SIZE; + hash = UNSAFE.getInt(addr + HASH_OFFSET); - @Override - public int compareTo(final Stats o) { - return city.compareTo(o.city); - } + if (hash == 0) { + initEntry(addr, keyAddress, keyLength, measurement, keyHash); + return; + } + if (hash == keyHash && keysEqual(addr, keyAddress, keyLength)) { + updateEntry(addr, measurement); + return; + } + } - void print(final PrintStream out) { - out.print(city); - out.print('='); - out.print(round(min / 10f)); - out.print('/'); - out.print(round((sum / 10f) / count)); - out.print('/'); - out.print(round(max) / 10f); + resize(keyHash, keyAddress, keyLength, measurement); } - private static double round(double value) { - return Math.round(value * 10.0) / 10.0; - } - } + private void resize(final int keyHash, final long keyAddress, final short keyLength, final short measurement) { + UnsafeMap newMap = new UnsafeMap(capacity * 2); + + for (long addr = mapStart; addr < mapEnd; addr += ENTRY_SIZE) { + final short oKeyLength = UNSAFE.getShort(addr + KEY_LENGTH_OFFSET); + final int oKeyHsh = UNSAFE.getInt(addr + HASH_OFFSET); + final short oMin = UNSAFE.getShort(addr + MIN_OFFSET); + final short oMax = UNSAFE.getShort(addr + MAX_OFFSET); + final int oCount = UNSAFE.getInt(addr + COUNT_OFFSET); + final long oSum = UNSAFE.getLong(addr + SUM_OFFSET); + + final int newPos = (newMap.capacity - 1) & oKeyHsh; + long newAddr = newMap.mapStart + newPos * ENTRY_SIZE; + + UNSAFE.putShort(newAddr + KEY_LENGTH_OFFSET, oKeyLength); + UNSAFE.putInt(newAddr + HASH_OFFSET, oKeyHsh); + UNSAFE.putShort(newAddr + MIN_OFFSET, oMin); + UNSAFE.putShort(newAddr + MAX_OFFSET, oMax); + UNSAFE.putInt(newAddr + COUNT_OFFSET, oCount); + UNSAFE.putLong(newAddr + SUM_OFFSET, oSum); + } - private static class Sample { - private final byte[] keyBytes = new byte[MAX_KEY_LENGTH]; - private int keyLength; - private int keyHash; - private int measurement; - private int sign = 1; + newMap.addEntry(keyHash, keyAddress, keyLength, measurement); - void pushKey(byte b) { - keyBytes[keyLength++] = b; - keyHash = 31 * keyHash + b; + this.mapStart = newMap.mapStart; + this.mapEnd = newMap.mapEnd; + this.capacity = newMap.capacity; } - void pushMeasurement(byte b) { - final int i = b - '0'; - measurement = measurement * 10 + i; + private static void initEntry(final long entry, final long keyAddress, final short keyLength, final short measurement, final int keyHash) { + UNSAFE.copyMemory(keyAddress, entry, keyLength); + UNSAFE.putInt(entry + HASH_OFFSET, keyHash); + UNSAFE.putShort(entry + KEY_LENGTH_OFFSET, keyLength); + UNSAFE.putShort(entry + MIN_OFFSET, Short.MAX_VALUE); + UNSAFE.putShort(entry + MAX_OFFSET, Short.MIN_VALUE); + + updateEntry(entry, measurement); } - int getMeasurement() { - return sign * measurement; + private static void updateEntry(final long entry, final short measurement) { + UNSAFE.putShort(entry + MIN_OFFSET, + (short) Math.min(UNSAFE.getShort(entry + MIN_OFFSET), measurement)); + UNSAFE.putShort(entry + MAX_OFFSET, + (short) Math.max(UNSAFE.getShort(entry + MAX_OFFSET), measurement)); + UNSAFE.putInt(entry + COUNT_OFFSET, + UNSAFE.getInt(entry + COUNT_OFFSET) + 1); + UNSAFE.putLong(entry + SUM_OFFSET, + UNSAFE.getLong(entry + SUM_OFFSET) + measurement); } + } - void reset() { - keyHash = 0; - keyLength = 0; - measurement = 0; - sign = 1; + private static boolean keysEqual(long key1Address, long key2Address, final int keyLength) { + // credit: abeobk + long xsum = 0; + int n = keyLength & 0xF8; + for (int i = 0; i < n; i += 8) { + xsum |= (UNSAFE.getLong(key1Address + i) ^ UNSAFE.getLong(key2Address + i)); } + return xsum == 0; } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_artsiomkorzun.java b/src/main/java/dev/morling/onebrc/CalculateAverage_artsiomkorzun.java index 4f6c8fd10..cc6e3b95a 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_artsiomkorzun.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_artsiomkorzun.java @@ -20,11 +20,13 @@ import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; import java.lang.reflect.Field; -import java.nio.ByteOrder; import java.nio.channels.FileChannel; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Map; +import java.util.Optional; import java.util.TreeMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -32,17 +34,14 @@ public class CalculateAverage_artsiomkorzun { private static final Path FILE = Path.of("./measurements.txt"); - private static final MemorySegment MAPPED_FILE = map(FILE); - - private static final int PARALLELISM = Runtime.getRuntime().availableProcessors(); - private static final int SEGMENT_SIZE = 32 * 1024 * 1024; - private static final int SEGMENT_COUNT = (int) ((MAPPED_FILE.byteSize() + SEGMENT_SIZE - 1) / SEGMENT_SIZE); - private static final int SEGMENT_OVERLAP = 1024; - private static final long COMMA_PATTERN = pattern(';'); + private static final long SEGMENT_SIZE = 2 * 1024 * 1024; + private static final long COMMA_PATTERN = 0x3B3B3B3B3B3B3B3BL; + private static final long LINE_PATTERN = 0x0A0A0A0A0A0A0A0AL; private static final long DOT_BITS = 0x10101000; private static final long MAGIC_MULTIPLIER = (100 * 0x1000000 + 10 * 0x10000 + 1); + private static final long[] WORD_MASK = { 0, 0, 0, 0, 0, 0, 0, 0, -1 }; + private static final int[] LENGTH_MASK = { 0, 0, 0, 0, 0, 0, 0, 0, -1 }; - private static final ByteOrder BYTE_ORDER = ByteOrder.nativeOrder(); private static final Unsafe UNSAFE; static { @@ -64,16 +63,61 @@ public static void main(String[] args) throws Exception { // System.err.println("Time: " + (end - start)); // } + if (isSpawn(args)) { + spawn(); + return; + } + execute(); } + private static boolean isSpawn(String[] args) { + for (String arg : args) { + if ("--worker".equals(arg)) { + return false; + } + } + + return true; + } + + private static void spawn() throws Exception { + ProcessHandle.Info info = ProcessHandle.current().info(); + ArrayList commands = new ArrayList<>(); + Optional command = info.command(); + Optional arguments = info.arguments(); + + if (command.isPresent()) { + commands.add(command.get()); + } + + if (arguments.isPresent()) { + commands.addAll(Arrays.asList(arguments.get())); + } + + commands.add("--worker"); + + new ProcessBuilder() + .command(commands) + .start() + .getInputStream() + .transferTo(System.out); + } + private static void execute() throws Exception { + MemorySegment fileMemory = map(FILE); + long fileAddress = fileMemory.address(); + long fileSize = fileMemory.byteSize(); + int segmentCount = (int) ((fileSize + SEGMENT_SIZE - 1) / SEGMENT_SIZE); + AtomicInteger counter = new AtomicInteger(); AtomicReference result = new AtomicReference<>(); - Aggregator[] aggregators = new Aggregator[PARALLELISM]; + + int parallelism = Runtime.getRuntime().availableProcessors(); + Aggregator[] aggregators = new Aggregator[parallelism]; for (int i = 0; i < aggregators.length; i++) { - aggregators[i] = new Aggregator(counter, result); + aggregators[i] = new Aggregator(counter, result, fileAddress, fileSize, segmentCount); aggregators[i].start(); } @@ -81,8 +125,9 @@ private static void execute() throws Exception { aggregators[i].join(); } - Map aggregates = result.get().aggregate(); + Map aggregates = result.get().build(); System.out.println(text(aggregates)); + System.out.close(); } private static MemorySegment map(Path file) { @@ -95,19 +140,15 @@ private static MemorySegment map(Path file) { } } - private static long pattern(char c) { - long b = c & 0xFFL; - return b | (b << 8) | (b << 16) | (b << 24) | (b << 32) | (b << 40) | (b << 48) | (b << 56); - } - - private static long getLongLittleEndian(long address) { - long value = UNSAFE.getLong(address); - - if (BYTE_ORDER == ByteOrder.BIG_ENDIAN) { - value = Long.reverseBytes(value); - } - - return value; + private static long word(long address) { + return UNSAFE.getLong(address); + /* + * if (BYTE_ORDER == ByteOrder.BIG_ENDIAN) { + * value = Long.reverseBytes(value); + * } + * + * return value; + */ } private static String text(Map aggregates) { @@ -139,96 +180,111 @@ private record Aggregate(int min, int max, long sum, int cnt) { private static class Aggregates { - private static final int ENTRIES = 64 * 1024; - private static final int SIZE = 32 * ENTRIES; + private static final long ENTRIES = 64 * 1024; + private static final long SIZE = 128 * ENTRIES; + private static final long MASK = (ENTRIES - 1) << 7; private final long pointer; public Aggregates() { - long address = UNSAFE.allocateMemory(SIZE + 8096); + long address = UNSAFE.allocateMemory(SIZE + 4096); pointer = (address + 4095) & (~4095); UNSAFE.setMemory(pointer, SIZE, (byte) 0); } - public void add(long reference, int length, int hash, int value) { - for (int offset = offset(hash);; offset = next(offset)) { + public long find(long word1, long word2, long hash) { + long address = pointer + offset(hash); + long w1 = word(address + 24); + long w2 = word(address + 32); + return (word1 == w1) && (word2 == w2) ? address : 0; + } + + public long put(long reference, long word, long length, long hash) { + for (long offset = offset(hash);; offset = next(offset)) { long address = pointer + offset; - long ref = UNSAFE.getLong(address); + if (equal(reference, word, address + 24, length)) { + return address; + } - if (ref == 0) { - alloc(reference, length, hash, value, address); - break; + int len = UNSAFE.getInt(address); + if (len == 0) { + alloc(reference, length, hash, address); + return address; } + } + } - if (equal(ref, reference, length)) { - long sum = UNSAFE.getLong(address + 16) + value; - int cnt = UNSAFE.getInt(address + 24) + 1; - short min = (short) Math.min(UNSAFE.getShort(address + 28), value); - short max = (short) Math.max(UNSAFE.getShort(address + 30), value); + public static void update(long address, long value) { + long sum = UNSAFE.getLong(address + 8) + value; + int cnt = UNSAFE.getInt(address + 16) + 1; + short min = UNSAFE.getShort(address + 20); + short max = UNSAFE.getShort(address + 22); - UNSAFE.putLong(address + 16, sum); - UNSAFE.putInt(address + 24, cnt); - UNSAFE.putShort(address + 28, min); - UNSAFE.putShort(address + 30, max); - break; - } + UNSAFE.putLong(address + 8, sum); + UNSAFE.putInt(address + 16, cnt); + + if (value < min) { + UNSAFE.putShort(address + 20, (short) value); + } + + if (value > max) { + UNSAFE.putShort(address + 22, (short) value); } } public void merge(Aggregates rights) { - for (int rightOffset = 0; rightOffset < SIZE; rightOffset += 32) { + for (long rightOffset = 0; rightOffset < SIZE; rightOffset += 128) { long rightAddress = rights.pointer + rightOffset; - long reference = UNSAFE.getLong(rightAddress); + int length = UNSAFE.getInt(rightAddress); - if (reference == 0) { + if (length == 0) { continue; } - int hash = UNSAFE.getInt(rightAddress + 8); - int length = UNSAFE.getInt(rightAddress + 12); + int hash = UNSAFE.getInt(rightAddress + 4); - for (int offset = offset(hash);; offset = next(offset)) { + for (long offset = offset(hash);; offset = next(offset)) { long address = pointer + offset; - long ref = UNSAFE.getLong(address); - if (ref == 0) { - UNSAFE.copyMemory(rightAddress, address, 32); + if (equal(address + 24, rightAddress + 24, length)) { + long sum = UNSAFE.getLong(address + 8) + UNSAFE.getLong(rightAddress + 8); + int cnt = UNSAFE.getInt(address + 16) + UNSAFE.getInt(rightAddress + 16); + short min = (short) Math.min(UNSAFE.getShort(address + 20), UNSAFE.getShort(rightAddress + 20)); + short max = (short) Math.max(UNSAFE.getShort(address + 22), UNSAFE.getShort(rightAddress + 22)); + + UNSAFE.putLong(address + 8, sum); + UNSAFE.putInt(address + 16, cnt); + UNSAFE.putShort(address + 20, min); + UNSAFE.putShort(address + 22, max); break; } - if (equal(ref, reference, length)) { - long sum = UNSAFE.getLong(address + 16) + UNSAFE.getLong(rightAddress + 16); - int cnt = UNSAFE.getInt(address + 24) + UNSAFE.getInt(rightAddress + 24); - short min = (short) Math.min(UNSAFE.getShort(address + 28), UNSAFE.getShort(rightAddress + 28)); - short max = (short) Math.max(UNSAFE.getShort(address + 30), UNSAFE.getShort(rightAddress + 30)); + int len = UNSAFE.getInt(address); - UNSAFE.putLong(address + 16, sum); - UNSAFE.putInt(address + 24, cnt); - UNSAFE.putShort(address + 28, min); - UNSAFE.putShort(address + 30, max); + if (len == 0) { + UNSAFE.copyMemory(rightAddress, address, length + 24); break; } } } } - public Map aggregate() { + public Map build() { TreeMap set = new TreeMap<>(); - for (int offset = 0; offset < SIZE; offset += 32) { + for (long offset = 0; offset < SIZE; offset += 128) { long address = pointer + offset; - long ref = UNSAFE.getLong(address); + int length = UNSAFE.getInt(address); - if (ref != 0) { - int length = UNSAFE.getInt(address + 12) - 1; - byte[] array = new byte[length]; - UNSAFE.copyMemory(null, ref, array, Unsafe.ARRAY_BYTE_BASE_OFFSET, length); + if (length != 0) { + byte[] array = new byte[length - 1]; + UNSAFE.copyMemory(null, address + 24, array, Unsafe.ARRAY_BYTE_BASE_OFFSET, array.length); String key = new String(array); - long sum = UNSAFE.getLong(address + 16); - int cnt = UNSAFE.getInt(address + 24); - short min = UNSAFE.getShort(address + 28); - short max = UNSAFE.getShort(address + 30); + long sum = UNSAFE.getLong(address + 8); + int cnt = UNSAFE.getInt(address + 16); + short min = UNSAFE.getShort(address + 20); + short max = UNSAFE.getShort(address + 22); Aggregate aggregate = new Aggregate(min, max, sum, cnt); set.put(key, aggregate); @@ -238,25 +294,23 @@ public Map aggregate() { return set; } - private static void alloc(long reference, int length, int hash, int value, long address) { - UNSAFE.putLong(address, reference); - UNSAFE.putInt(address + 8, hash); - UNSAFE.putInt(address + 12, length); - UNSAFE.putLong(address + 16, value); - UNSAFE.putInt(address + 24, 1); - UNSAFE.putShort(address + 28, (short) value); - UNSAFE.putShort(address + 30, (short) value); + private static void alloc(long reference, long length, long hash, long address) { + UNSAFE.putInt(address, (int) length); + UNSAFE.putInt(address + 4, (int) hash); + UNSAFE.putShort(address + 20, Short.MAX_VALUE); + UNSAFE.putShort(address + 22, Short.MIN_VALUE); + UNSAFE.copyMemory(reference, address + 24, length); } - private static int offset(int hash) { - return ((hash) & (ENTRIES - 1)) << 5; + private static long offset(long hash) { + return hash & MASK; } - private static int next(int prev) { - return (prev + 32) & (SIZE - 1); + private static long next(long prev) { + return (prev + 128) & (SIZE - 1); } - private static boolean equal(long leftAddress, long rightAddress, int length) { + private static boolean equal(long leftAddress, long leftWord, long rightAddress, long length) { while (length > 8) { long left = UNSAFE.getLong(leftAddress); long right = UNSAFE.getLong(rightAddress); @@ -270,10 +324,24 @@ private static boolean equal(long leftAddress, long rightAddress, int length) { length -= 8; } - int shift = (8 - length) << 3; - long left = getLongLittleEndian(leftAddress) << shift; - long right = getLongLittleEndian(rightAddress) << shift; - return (left == right); + return leftWord == word(rightAddress); + } + + private static boolean equal(long leftAddress, long rightAddress, long length) { + do { + long left = UNSAFE.getLong(leftAddress); + long right = UNSAFE.getLong(rightAddress); + + if (left != right) { + return false; + } + + leftAddress += 8; + rightAddress += 8; + length -= 8; + } while (length > 0); + + return true; } } @@ -281,28 +349,108 @@ private static class Aggregator extends Thread { private final AtomicInteger counter; private final AtomicReference result; + private final long fileAddress; + private final long fileSize; + private final int segmentCount; - public Aggregator(AtomicInteger counter, AtomicReference result) { + public Aggregator(AtomicInteger counter, AtomicReference result, + long fileAddress, long fileSize, int segmentCount) { super("aggregator"); this.counter = counter; this.result = result; + this.fileAddress = fileAddress; + this.fileSize = fileSize; + this.segmentCount = segmentCount; } @Override public void run() { Aggregates aggregates = new Aggregates(); - for (int segment; (segment = counter.getAndIncrement()) < SEGMENT_COUNT;) { - long position = (long) SEGMENT_SIZE * segment; - int size = (int) Math.min(SEGMENT_SIZE + SEGMENT_OVERLAP, MAPPED_FILE.byteSize() - position); - long address = MAPPED_FILE.address() + position; - long limit = address + Math.min(SEGMENT_SIZE, size - 1); + for (int segment; (segment = counter.getAndIncrement()) < segmentCount;) { + long position = SEGMENT_SIZE * segment; + long size = Math.min(SEGMENT_SIZE + 1, fileSize - position); + long start = fileAddress + position; + long end = start + size; if (segment > 0) { - address = next(address); + start = next(start); + } + + long chunk = (end - start) / 3; + long left = next(start + chunk); + long right = next(start + chunk + chunk); + + Chunk chunk1 = new Chunk(start, left); + Chunk chunk2 = new Chunk(left, right); + Chunk chunk3 = new Chunk(right, end); + + while (chunk1.has() && chunk2.has() && chunk3.has()) { + long word1 = word(chunk1.position); + long word2 = word(chunk2.position); + long word3 = word(chunk3.position); + long word4 = word(chunk1.position + 8); + long word5 = word(chunk2.position + 8); + long word6 = word(chunk3.position + 8); + + long separator1 = separator(word1); + long separator2 = separator(word2); + long separator3 = separator(word3); + long separator4 = separator(word4); + long separator5 = separator(word5); + long separator6 = separator(word6); + + long pointer1 = find(aggregates, chunk1, word1, word4, separator1, separator4); + long pointer2 = find(aggregates, chunk2, word2, word5, separator2, separator5); + long pointer3 = find(aggregates, chunk3, word3, word6, separator3, separator6); + + long value1 = value(chunk1); + long value2 = value(chunk2); + long value3 = value(chunk3); + + Aggregates.update(pointer1, value1); + Aggregates.update(pointer2, value2); + Aggregates.update(pointer3, value3); } - aggregate(aggregates, address, limit); + while (chunk1.has()) { + long word1 = word(chunk1.position); + long word2 = word(chunk1.position + 8); + + long separator1 = separator(word1); + long separator2 = separator(word2); + + long pointer = find(aggregates, chunk1, word1, word2, separator1, separator2); + long value = value(chunk1); + + Aggregates.update(pointer, value); + } + + while (chunk2.has()) { + long word1 = word(chunk2.position); + long word2 = word(chunk2.position + 8); + + long separator1 = separator(word1); + long separator2 = separator(word2); + + long pointer = find(aggregates, chunk2, word1, word2, separator1, separator2); + long value = value(chunk2); + + Aggregates.update(pointer, value); + } + + while (chunk3.has()) { + long word1 = word(chunk3.position); + long word2 = word(chunk3.position + 8); + + long separator1 = separator(word1); + long separator2 = separator(word2); + + long pointer = find(aggregates, chunk3, word1, word2, separator1, separator2); + long value = value(chunk3); + + Aggregates.update(pointer, value); + } } while (!result.compareAndSet(null, aggregates)) { @@ -314,62 +462,122 @@ public void run() { } } - private static void aggregate(Aggregates aggregates, long position, long limit) { - // this parsing can produce seg fault at page boundaries - // e.g. file size is 4096 and the last entry is X=0.0, which is less than 8 bytes - // as a result a read will be split across pages, where one of them is not mapped - // but for some reason it works on my machine, leaving to investigate + private static long next(long position) { + while (true) { + long word = word(position); + long match = word ^ LINE_PATTERN; + long line = (match - 0x0101010101010101L) & (~match & 0x8080808080808080L); + + if (line == 0) { + position += 8; + continue; + } + + return position + length(line) + 1; + } + } + + private static long find(Aggregates aggregates, Chunk chunk, long word1, long word2, long separator1, long separator2) { + boolean small = (separator1 | separator2) != 0; + long start = chunk.position; + long hash; + long word; + + if (small) { + int length1 = length(separator1); + int length2 = length(separator2); + word1 = mask(word1, separator1); + word2 = mask(word2 & WORD_MASK[length1], separator2); + hash = mix(word1 ^ word2); - for (long start = position, hash = 0; position <= limit;) { - int length; // idea: royvanrijn, explanation: https://richardstartin.github.io/posts/finding-bytes - { - long word = getLongLittleEndian(position); - long match = word ^ COMMA_PATTERN; - long mask = (match - 0x0101010101010101L) & ~match & 0x8080808080808080L; + chunk.position += length1 + (length2 & LENGTH_MASK[length1]) + 1; + long pointer = aggregates.find(word1, word2, hash); - if (mask == 0) { + if (pointer != 0) { + return pointer; + } + + word = (separator1 == 0) ? word2 : word1; + } + else { + chunk.position += 16; + hash = word1 ^ word2; + + while (true) { + word = word(chunk.position); + long separator = separator(word); + + if (separator == 0) { + chunk.position += 8; hash ^= word; - position += 8; continue; } - int bit = Long.numberOfTrailingZeros(mask); - position += (bit >>> 3) + 1; // +sep - hash ^= (word << (69 - bit)); - length = (int) (position - start); + word = mask(word, separator); + hash = mix(hash ^ word); + chunk.position += length(separator) + 1; + break; } + } - int value; // idea: merykitty - { - long word = getLongLittleEndian(position); - long inverted = ~word; - int dot = Long.numberOfTrailingZeros(inverted & DOT_BITS); - long signed = (inverted << 59) >> 63; - long mask = ~(signed & 0xFF); - long digits = ((word & mask) << (28 - dot)) & 0x0F000F0F00L; - long abs = ((digits * MAGIC_MULTIPLIER) >>> 32) & 0x3FF; - value = (int) ((abs ^ signed) - signed); - position += (dot >> 3) + 3; - } + long length = chunk.position - start; + return aggregates.put(start, word, length, hash); + } - aggregates.add(start, length, mix(hash), value); + private static long value(Chunk chunk) { + long num = word(chunk.position); + long dot = dot(num); + long value = value(num, dot); + chunk.position += (dot >> 3) + 3; + return value; + } - start = position; - hash = 0; - } + private static long separator(long word) { + long match = word ^ COMMA_PATTERN; + return (match - 0x0101010101010101L) & (~match & 0x8080808080808080L); } - private static long next(long position) { - while (UNSAFE.getByte(position++) != '\n') { - // continue - } - return position; + private static long mask(long word, long separator) { + long mask = separator ^ (separator - 1); + return word & mask; } - private static int mix(long x) { + private static int length(long separator) { + return Long.numberOfTrailingZeros(separator) >>> 3; + } + + private static long mix(long x) { long h = x * -7046029254386353131L; - h ^= h >>> 32; - return (int) (h ^ h >>> 16); + h ^= h >>> 35; + return h; + // h ^= h >>> 32; + // return (int) (h ^ h >>> 16); + } + + private static long dot(long num) { + return Long.numberOfTrailingZeros(~num & DOT_BITS); + } + + private static long value(long w, long dot) { + long signed = (~w << 59) >> 63; + long mask = ~(signed & 0xFF); + long digits = ((w & mask) << (28 - dot)) & 0x0F000F0F00L; + long abs = ((digits * MAGIC_MULTIPLIER) >>> 32) & 0x3FF; + return (abs ^ signed) - signed; + } + } + + private static class Chunk { + final long limit; + long position; + + public Chunk(long position, long limit) { + this.position = position; + this.limit = limit; + } + + boolean has() { + return position < limit; } } -} +} \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_breejesh.java b/src/main/java/dev/morling/onebrc/CalculateAverage_breejesh.java new file mode 100644 index 000000000..3ee87c943 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_breejesh.java @@ -0,0 +1,180 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.File; +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +public class CalculateAverage_breejesh { + private static final String FILE = "./measurements.txt"; + private static final int TWO_BYTE_TO_INT = 480 + 48; // 48 is the ASCII code for '0' + private static final int THREE_BYTE_TO_INT = 4800 + 480 + 48; + + private static final class Measurement { + + private int min; + private int max; + private int total; + private int count; + + public Measurement(int value) { + this.min = value; + this.max = value; + this.total = value; + this.count = 1; + } + + @Override + public String toString() { + StringBuilder result = new StringBuilder(); + result.append(min / 10.0); + result.append("/"); + result.append(Math.round(((double) total) / count) / 10.0); + result.append("/"); + result.append(max / 10.0); + return result.toString(); + } + + private void append(int min, int max, int total, int count) { + if (min < this.min) + this.min = min; + if (max > this.max) + this.max = max; + this.total += total; + this.count += count; + } + + public void append(int value) { + append(value, value, value, 1); + } + + public void merge(Measurement other) { + append(other.min, other.max, other.total, other.count); + } + } + + public static void main(String[] args) throws Exception { + // long start = System.currentTimeMillis(); + // Find system details to determine cores and + var file = new File(args.length > 0 ? args[0] : FILE); + long fileSize = file.length(); + var numberOfCores = fileSize > 1_000_000 ? Runtime.getRuntime().availableProcessors() : 1; + var splitSectionSize = (int) Math.min(Integer.MAX_VALUE, fileSize / numberOfCores); // bytebuffer position is an int, so can be max Integer.MAX_VALUE + var segmentCount = (int) (fileSize / splitSectionSize); + + // Divide file into segments + ExecutorService executor = Executors.newFixedThreadPool(segmentCount); + List>> futures = new ArrayList<>(); + for (int i = 0; i < segmentCount; i++) { + long sectionStart = i * (long) splitSectionSize; + long sectionEnd = Math.min(fileSize, sectionStart + splitSectionSize + 100); + var fileChannel = (FileChannel) Files.newByteChannel(file.toPath(), StandardOpenOption.READ); + CompletableFuture> future = CompletableFuture.supplyAsync(() -> { + MappedByteBuffer currentBuffer = null; + try { + currentBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, sectionStart, sectionEnd - sectionStart); + } + catch (IOException e) { + throw new RuntimeException(e); + } + // Skip till new line for unequal segments, not to be done for first section + if (sectionStart > 0) { + while (currentBuffer.get() != '\n') + ; + } + Map map = new HashMap<>(); + while (currentBuffer.position() < splitSectionSize) { + // Read station + String str = getStationFromBuffer(currentBuffer); + // Read number + int value = getValueFromBuffer(currentBuffer); + if (map.containsKey(str)) { + map.get(str).append(value); + } + else { + map.put(str, new Measurement(value)); + } + } + return map; + }, executor); + futures.add(future); + } + + CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join(); + Map finalMap = new TreeMap<>(); + for (CompletableFuture> future : futures) { + Map map = future.get(); + map.keySet().stream().forEach( + key -> { + if (finalMap.containsKey(key)) { + finalMap.get(key).merge(map.get(key)); + } + else { + finalMap.put(key, map.get(key)); + } + }); + } + + System.out.println(finalMap); + // System.out.printf("Time %s", System.currentTimeMillis() - start); + System.exit(0); + } + + private static String getStationFromBuffer(MappedByteBuffer currentBuffer) { + byte currentByte; + var byteCounter = 0; + var buffer = new byte[100]; + while ((currentByte = currentBuffer.get()) != ';') { + buffer[byteCounter++] = currentByte; + } + return new String(buffer, 0, byteCounter, StandardCharsets.UTF_8); + } + + private static int getValueFromBuffer(MappedByteBuffer currentBuffer) { + int value; + byte[] nums = new byte[4]; + currentBuffer.get(nums); + if (nums[1] == '.') { + // case of n.n + value = (nums[0] * 10 + nums[2] - TWO_BYTE_TO_INT); + } + else { + if (nums[3] == '.') { + // case of -nn.n + value = -(nums[1] * 100 + nums[2] * 10 + currentBuffer.get() - THREE_BYTE_TO_INT); + } + else if (nums[0] == '-') { + // case of -n.n + value = -(nums[1] * 10 + nums[3] - TWO_BYTE_TO_INT); + } + else { + // case of nn.n + value = (nums[0] * 100 + nums[1] * 10 + nums[3] - THREE_BYTE_TO_INT); + } + currentBuffer.get(); // new line + } + return value; + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_bufistov.java b/src/main/java/dev/morling/onebrc/CalculateAverage_bufistov.java new file mode 100644 index 000000000..178a6e11e --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_bufistov.java @@ -0,0 +1,380 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import sun.misc.Unsafe; + +import static java.lang.Math.toIntExact; + +import java.lang.foreign.Arena; +import java.lang.reflect.Field; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import java.io.FileInputStream; +import java.io.IOException; +import java.util.concurrent.Future; + +class ByteArrayWrapper { + private final byte[] data; + + public ByteArrayWrapper(byte[] data) { + this.data = data; + } + + @Override + public boolean equals(Object other) { + return Arrays.equals(data, ((ByteArrayWrapper) other).data); + } + + @Override + public int hashCode() { + return Arrays.hashCode(data); + } +} + +public class CalculateAverage_bufistov { + + static class ResultRow { + byte[] station; + + String stationString; + long min, max, count, suma; + + ResultRow() { + } + + ResultRow(byte[] station, long value) { + this.station = new byte[station.length]; + System.arraycopy(station, 0, this.station, 0, station.length); + this.min = value; + this.max = value; + this.count = 1; + this.suma = value; + } + + ResultRow(long value) { + this.min = value; + this.max = value; + this.count = 1; + this.suma = value; + } + + void setStation(long startPosition, long endPosition) { + this.station = new byte[(int) (endPosition - startPosition)]; + for (int i = 0; i < this.station.length; ++i) { + this.station[i] = UNSAFE.getByte(startPosition + i); + } + } + + public String toString() { + stationString = new String(station, StandardCharsets.UTF_8); + return stationString + "=" + round(min / 10.0) + "/" + round(suma / 10.0 / count) + "/" + round(max / 10.0); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + void update(long newValue) { + this.count += 1; + this.suma += newValue; + if (newValue < this.min) { + this.min = newValue; + } + else if (newValue > this.max) { + this.max = newValue; + } + } + + ResultRow merge(ResultRow another) { + this.count += another.count; + this.suma += another.suma; + this.min = Math.min(this.min, another.min); + this.max = Math.max(this.max, another.max); + return this; + } + } + + static class OpenHash { + ResultRow[] data; + int dataSizeMask; + + // ResultRow metrics = new ResultRow(); + + public OpenHash(int capacityPow2) { + assert capacityPow2 <= 20; + int dataSize = 1 << capacityPow2; + dataSizeMask = dataSize - 1; + data = new ResultRow[dataSize]; + } + + int hashByteArray(byte[] array) { + int result = 0; + long mask = 0; + for (int i = 0; i < array.length; ++i, mask = ((mask + 1) & 3)) { + result += array[i] << mask; + } + return result & dataSizeMask; + } + + void merge(byte[] station, long value, int hashValue) { + while (data[hashValue] != null && !Arrays.equals(station, data[hashValue].station)) { + hashValue += 1; + hashValue &= dataSizeMask; + } + if (data[hashValue] == null) { + data[hashValue] = new ResultRow(station, value); + } + else { + data[hashValue].update(value); + } + // metrics.update(delta); + } + + void merge(byte[] station, long value) { + merge(station, value, hashByteArray(station)); + } + + void merge(final long startPosition, long endPosition, int hashValue, long value) { + while (data[hashValue] != null && !equalsToStation(startPosition, endPosition, data[hashValue].station)) { + hashValue += 1; + hashValue &= dataSizeMask; + } + if (data[hashValue] == null) { + data[hashValue] = new ResultRow(value); + data[hashValue].setStation(startPosition, endPosition); + } + else { + data[hashValue].update(value); + } + } + + boolean equalsToStation(long startPosition, long endPosition, byte[] station) { + if (endPosition - startPosition != station.length) { + return false; + } + for (int i = 0; i < station.length; ++i, ++startPosition) { + if (UNSAFE.getByte(startPosition) != station[i]) + return false; + } + return true; + } + + HashMap toJavaHashMap() { + HashMap result = new HashMap<>(20000); + for (int i = 0; i < data.length; ++i) { + if (data[i] != null) { + var key = new ByteArrayWrapper(data[i].station); + result.put(key, data[i]); + } + } + return result; + } + } + + static final Unsafe UNSAFE; + + static { + try { + Field unsafe = Unsafe.class.getDeclaredField("theUnsafe"); + unsafe.setAccessible(true); + UNSAFE = (Unsafe) unsafe.get(Unsafe.class); + } + catch (Throwable e) { + throw new RuntimeException(e); + } + } + + static final long LINE_SEPARATOR = '\n'; + + public static class FileRead implements Callable> { + + private final FileChannel fileChannel; + + private long currentLocation; + private long bytesToRead; + + private static final int hashCapacityPow2 = 18; + + static final int hashCapacityMask = (1 << hashCapacityPow2) - 1; + + public FileRead(FileChannel fileChannel, long startLocation, long bytesToRead, boolean firstSegment) { + this.fileChannel = fileChannel; + this.currentLocation = startLocation; + this.bytesToRead = bytesToRead; + } + + @Override + public HashMap call() throws IOException { + try { + OpenHash openHash = new OpenHash(hashCapacityPow2); + log("Reading the channel: " + currentLocation + ":" + bytesToRead); + if (currentLocation > 0) { + toLineBeginPrefix(); + } + toLineBeginSuffix(); + var memorySegment = fileChannel.map(FileChannel.MapMode.READ_ONLY, currentLocation, bytesToRead, Arena.global()); + currentLocation = memorySegment.address(); + processChunk(openHash); + log("Done Reading the channel: " + currentLocation + ":" + bytesToRead); + return openHash.toJavaHashMap(); + } + catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + byte getByte(long position) throws IOException { + MappedByteBuffer byteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, position, 1); + return byteBuffer.get(); + } + + void toLineBeginPrefix() throws IOException { + while (getByte(currentLocation - 1) != LINE_SEPARATOR) { + ++currentLocation; + --bytesToRead; + } + } + + void toLineBeginSuffix() throws IOException { + while (getByte(currentLocation + bytesToRead - 1) != LINE_SEPARATOR) { + ++bytesToRead; + } + } + + void processChunk(OpenHash result) { + long nameBegin = currentLocation; + long nameEnd = -1; + long numberBegin = -1; + int currentHash = 0; + int currentMask = 0; + int nameHash = 0; + long end = currentLocation + bytesToRead; + byte nextByte; + for (; currentLocation < end; ++currentLocation) { + nextByte = UNSAFE.getByte(currentLocation); + if (nextByte == ';') { + nameEnd = currentLocation; + numberBegin = currentLocation + 1; + nameHash = currentHash & hashCapacityMask; + } + else if (nextByte == LINE_SEPARATOR) { + long value = getValue(numberBegin, currentLocation); + // log("Station name: '" + getStationName(nameBegin, nameEnd) + "' value: " + value + " hash: " + nameHash); + result.merge(nameBegin, nameEnd, nameHash, value); + nameBegin = currentLocation + 1; + currentHash = 0; + currentMask = 0; + } + else { + currentHash += (nextByte << currentMask); + currentMask = (currentMask + 1) & 3; + } + } + } + + long getValue(long startLocation, long endLocation) { + byte nextByte = UNSAFE.getByte(startLocation); + boolean negate = nextByte == '-'; + long result = negate ? 0 : nextByte - '0'; + for (long i = startLocation + 1; i < endLocation; ++i) { + nextByte = UNSAFE.getByte(i); + if (nextByte != '.') { + result *= 10; + result += nextByte - '0'; + } + } + return negate ? -result : result; + } + + String getStationName(long from, long to) { + byte[] bytes = new byte[(int) (to - from)]; + for (int i = 0; i < bytes.length; ++i) { + bytes[i] = UNSAFE.getByte(from + i); + } + return new String(bytes, StandardCharsets.UTF_8); + } + } + + public static void main(String[] args) throws Exception { + String fileName = "measurements.txt"; + if (args.length > 0 && args[0].length() > 0) { + fileName = args[0]; + } + log("InputFile: " + fileName); + FileInputStream fileInputStream = new FileInputStream(fileName); + int numThreads = 2 * Runtime.getRuntime().availableProcessors(); + if (args.length > 1) { + numThreads = Integer.parseInt(args[1]); + } + log("NumThreads: " + numThreads); + FileChannel channel = fileInputStream.getChannel(); + final long fileSize = channel.size(); + long remaining_size = fileSize; + long chunk_size = Math.min((fileSize + numThreads - 1) / numThreads, Integer.MAX_VALUE - 5); + + ExecutorService executor = Executors.newFixedThreadPool(numThreads); + + long startLocation = 0; + ArrayList>> results = new ArrayList<>(numThreads); + var fileChannel = FileChannel.open(Paths.get(fileName)); + boolean firstSegment = true; + while (remaining_size > 0) { + long actualSize = Math.min(chunk_size, remaining_size); + results.add(executor.submit(new FileRead(fileChannel, startLocation, toIntExact(actualSize), firstSegment))); + firstSegment = false; + remaining_size -= actualSize; + startLocation += actualSize; + } + executor.shutdown(); + + // Wait for all threads to finish + while (!executor.isTerminated()) { + Thread.yield(); + } + log("Finished all threads"); + fileInputStream.close(); + HashMap result = new HashMap<>(20000); + for (var future : results) { + for (var entry : future.get().entrySet()) { + result.merge(entry.getKey(), entry.getValue(), ResultRow::merge); + } + } + ResultRow[] finalResult = result.values().toArray(new ResultRow[0]); + for (var row : finalResult) { + row.toString(); + } + Arrays.sort(finalResult, Comparator.comparing(a -> a.stationString)); + System.out.println("{" + String.join(", ", Arrays.stream(finalResult).map(ResultRow::toString).toList()) + "}"); + log("All done!"); + } + + static void log(String message) { + // System.err.println(Instant.now() + "[" + Thread.currentThread().getName() + "]: " + message); + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_bytesfellow.java b/src/main/java/dev/morling/onebrc/CalculateAverage_bytesfellow.java new file mode 100644 index 000000000..869b1950e --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_bytesfellow.java @@ -0,0 +1,557 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; +import java.util.stream.IntStream; + +public class CalculateAverage_bytesfellow { + + public static final String CPU_CORES_1BRC_ENV_VARIABLE = "CPU_CORES_1BRC"; + private static final byte Separator = ';'; + + private static final double SchedulerCpuRatio = 0.4; + + private static final int availableCpu = System.getenv(CPU_CORES_1BRC_ENV_VARIABLE) != null ? Integer.parseInt(System.getenv(CPU_CORES_1BRC_ENV_VARIABLE)) + : Runtime.getRuntime().availableProcessors(); + + private static final int SchedulerPoolSize = Math.max((int) (availableCpu * SchedulerCpuRatio), 1); + private static final int SchedulerQueueSize = Math.min(SchedulerPoolSize * 3, 12); + private static final int PartitionsNumber = Math.max((availableCpu - SchedulerPoolSize), 1); + private static final int PartitionExecutorQueueSize = 1000; + + private static final int InputStreamBlockSize = 4096; + private static final int InputStreamReadBufferLen = 250 * InputStreamBlockSize; + + static class Partition { + + private static final AtomicInteger cntr = new AtomicInteger(-1); + private final Map partitionResult = new HashMap<>(10000); // as per requirement we have not more than 10K keys + private final AtomicInteger leftToExecute = new AtomicInteger(0); + + private final String name = "partition-" + cntr.incrementAndGet(); + + private final Executor executor = new ThreadPoolExecutor(1, 1, + 0L, TimeUnit.MILLISECONDS, + new LinkedBlockingQueue<>(PartitionExecutorQueueSize) { // some limit to avoid OOM + @Override + public boolean offer(Runnable runnable) { + try { + put(runnable); // block if limit was exceeded + } + catch (InterruptedException e) { + throw new RuntimeException(e); + } + return true; + } + }, r -> { + Thread t = new Thread(r); + t.setDaemon(true); + t.setName(name); + return t; + }); + + public void scheduleToProcess(byte[] slice, List lines) { + + if (!lines.isEmpty()) { + leftToExecute.incrementAndGet(); + executor.execute( + () -> { + for (int i = 0; i < lines.size(); i++) { + LineParams lineParams = lines.get(i); + + Measurement measurement = getMeasurement(slice, lineParams); + + MeasurementAggregator measurementAggregator = partitionResult.get(measurement.station); + if (measurementAggregator == null) { + partitionResult.put(new Station(measurement.station), new MeasurementAggregator().withMeasurement(measurement)); + } + else { + measurementAggregator.withMeasurement(measurement); + } + } + + leftToExecute.decrementAndGet(); + }); + } + + } + + public void materializeNames() { + partitionResult.keySet().forEach(Station::materializeName); + } + + public Map getResult() { + return partitionResult; + } + + public boolean allTasksCompleted() { + return leftToExecute.get() == 0; + } + + } + + record LineParams(int start, int length) { + } + + static class Partitioner { + + private final List allPartitions = new ArrayList<>(); + private final int partitionsSize; + + AtomicInteger jobsScheduled = new AtomicInteger(0); + + final Executor scheduler = new ThreadPoolExecutor(SchedulerPoolSize, SchedulerPoolSize, + 0L, TimeUnit.MILLISECONDS, + new LinkedBlockingQueue<>(SchedulerQueueSize) { // some limit to avoid OOM + + @Override + public Runnable take() throws InterruptedException { + return super.take(); + } + + @Override + public boolean offer(Runnable runnable) { + try { + put(runnable); // preventing unlimited scheduling due to possible OOM + } + catch (InterruptedException e) { + throw new RuntimeException(e); + } + return true; + } + }, r -> { + Thread t = new Thread(r); + t.setDaemon(true); + t.setName("scheduler"); + return t; + }); + + Partitioner(int partitionsSize) { + IntStream.range(0, partitionsSize).forEach((i) -> allPartitions.add(new Partition())); + this.partitionsSize = partitionsSize; + } + + private int partitionsSize() { + return partitionsSize; + } + + void processSlice(byte[] slice) { + + jobsScheduled.incrementAndGet(); + + scheduler.execute(() -> { + List> partitionedLines = new ArrayList<>(partitionsSize()); + // allocate some capacity, assuming that on average lines are half of the max (407 bytes) length + IntStream.range(0, partitionsSize()).forEach((p) -> partitionedLines.add(new ArrayList<>(slice.length / 407 / 2))); + + int start = 0; + int i = 0; + int startCharLen = 0; + while (i < slice.length) { + + if (slice[i] == '\n' || i == (slice.length - 1)) { + + int lineLength = i - start + (i == (slice.length - 1) ? 1 : 0); + LineParams lineParams = new LineParams(start, lineLength); + + int partitioningCode = getPartitioningCode(slice, start, getUtf8CharNumberOfBytes(slice[start])); + int partition = computePartition(partitioningCode); + + partitionedLines.get(partition).add(lineParams); + start = i + 1; + + } + + i++; + } + + processPartitionedBatch(slice, partitionedLines); + + jobsScheduled.decrementAndGet(); + }); + + } + + private static byte[] getLine(byte[] slice, int lineLength, int start) { + byte[] line = new byte[lineLength]; + System.arraycopy(slice, start, line, 0, lineLength); + return line; + } + + private void processPartitionedBatch(byte[] slice, List> partitionedLines) { + for (int i = 0; i < partitionedLines.size(); i++) { + allPartitions.get(i).scheduleToProcess(slice, partitionedLines.get(i)); + } + } + + private int computePartition(int code) { + return Math.abs(code % partitionsSize()); + } + + private static int getPartitioningCode(byte[] line, int start, int utf8CharNumberOfBytes) { + // seems good enough + if (utf8CharNumberOfBytes == 4) { + return line[start] + line[start + 1] + line[start + 2] + line[start + 3]; + } + else if (utf8CharNumberOfBytes == 3) { + return line[start] + line[start + 1] + line[start + 2]; + } + else if (utf8CharNumberOfBytes == 2) { + return line[start] + line[start + 1]; + } + else { + return line[start]; + } + } + + SortedMap getAllResults() { + allPartitions.parallelStream().forEach(Partition::materializeNames); + SortedMap result = new TreeMap<>(); + allPartitions.forEach((p) -> result.putAll(p.getResult())); + return result; + } + + public boolean allTasksCompleted() { + return allPartitions.stream().allMatch(Partition::allTasksCompleted); + } + + } + + private static final String FILE = "./measurements.txt"; + + public static class Station implements Comparable { + + private final byte[] inputSlice; + private final int hash; + + private final int startIdx; + private final int len; + + private volatile String nameAsString; + + public Station(byte[] inputSlice, int startIdx, int len) { + this.inputSlice = inputSlice; + this.startIdx = startIdx; + this.len = len; + this.hash = hashcodeFast(); + } + + public Station(Station from) { + this.inputSlice = new byte[from.len]; + System.arraycopy(from.inputSlice, from.startIdx, this.inputSlice, 0, from.len); + this.startIdx = 0; + this.len = from.len; + this.hash = from.hash; + } + + private int hashcodeFast() { + if (len == 0) { + return 0; + } + else if (len == 1) { + return inputSlice[startIdx] * 109; + } + else if (len == 2) { + return inputSlice[startIdx + 1] * 109 * 109 + inputSlice[startIdx]; + } + else if (len == 3) { + return inputSlice[startIdx + 2] * 109 * 109 * 109 + inputSlice[startIdx + 1] * 109 * 109 + inputSlice[startIdx]; + } + else { + return inputSlice[startIdx + 3] * 109 * 109 * 109 * 109 + inputSlice[startIdx + 2] * 109 * 109 * 109 + inputSlice[startIdx + 1] * 109 * 109 + + inputSlice[startIdx]; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + + Station station = (Station) o; + + if (len != station.len) { + return false; + } + + return Arrays.equals(inputSlice, startIdx, startIdx + len, station.inputSlice, station.startIdx, station.startIdx + len); + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public int compareTo(Station o) { + return materializeName().compareTo(o.materializeName()); // + } + + public String materializeName() { + if (nameAsString == null) { + byte[] nameForMaterialization = new byte[len]; + System.arraycopy(inputSlice, startIdx, nameForMaterialization, 0, len); + nameAsString = new String(nameForMaterialization, StandardCharsets.UTF_8); + } + + return nameAsString; + } + + @Override + public String toString() { + return materializeName(); + } + } + + private record Measurement(Station station, long value) { + } + + private record ResultRow(long min, long sum, long count, long max) { + + public String toString() { + return fakeDouble(min) + "/" + round((double) sum / (double) count / 10.0) + "/" + fakeDouble(max); + } + + private String fakeDouble(long value) { + long positiveValue = value < 0 ? -value : value; + long wholePart = positiveValue / 10; + String positiveDouble = wholePart + "." + (positiveValue - wholePart * 10); + + + return (value < 0 ? "-" : "") + positiveDouble; + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + } + + public static class MeasurementAggregator { + private long min = Long.MAX_VALUE; + private long max = Long.MIN_VALUE; + private long sum; + private long count; + + MeasurementAggregator withMeasurement(Measurement m) { + + min = Math.min(min, m.value); + max = Math.max(max, m.value); + sum += m.value; + count++; + + return this; + } + + @Override + public String toString() { + return new ResultRow(min, sum, count, max).toString(); + } + + } + + private static long parseToLongIgnoringDecimalPoint(byte[] slice, int startIndex, int len) { + long value = 0; + + int start = startIndex; + if (slice[startIndex] == '-') { + start = startIndex + 1; + } + + for (int i = start; i < startIndex + len; i++) { + if (slice[i] == '.') { + continue; + } + + if (i > 0) { + value = multipleByTen(value); // *= 10; + } + value += digitAsLong(slice, i); + } + + return start > startIndex ? -value : value; + } + + private static long multipleByTen(long value) { + return (value << 3) + (value << 1); + } + + private static long digitAsLong(byte[] digits, int position) { + return (digits[position] - 48); + } + + public static void main(String[] args) throws IOException { + + Partitioner partitioner = new Partitioner(PartitionsNumber); + + try (FileInputStream fileInputStream = new FileInputStream(FILE)) { + parseStreamWithBytes(fileInputStream, InputStreamReadBufferLen, partitioner::processSlice); + } + catch (Exception e) { + throw new RuntimeException(e); + } + + showResults(partitioner); + + } + + static void parseStreamWithBytes(InputStream inputStream, int bufferLen, Consumer sliceConsumer) throws IOException { + + byte[] byteArray = new byte[bufferLen]; + int offset = 0; + int lenToRead = bufferLen; + + int readLen; + + while ((readLen = inputStream.read(byteArray, offset, lenToRead)) > -1) { + if (readLen == 0) { + continue; + } + + int traverseLen = Math.min(offset + readLen, bufferLen); + int lastLineBreakInSlicePosition = traverseLen; + + for (int j = traverseLen - 1; j >= 0; j--) { + if (byteArray[j] == '\n') { + lastLineBreakInSlicePosition = j + 1; + break; + } + } + + if (lastLineBreakInSlicePosition == traverseLen) { + // todo: end of line was not found in a slice? + } + + int sliceSize = lastLineBreakInSlicePosition / SchedulerPoolSize; + + int s = 0; + + int j = Math.min(sliceSize, lastLineBreakInSlicePosition - 1); + while (s < lastLineBreakInSlicePosition && j < lastLineBreakInSlicePosition) { + if (byteArray[j] == '\n') { + int len = j - s; + byte[] slice = new byte[len]; + System.arraycopy(byteArray, s, slice, 0, len); + sliceConsumer.accept(slice); + + s = j + 1; + j = Math.min(s + sliceSize, lastLineBreakInSlicePosition - 1); + + } + else { + j++; + } + } + + if (s < traverseLen && lastLineBreakInSlicePosition < traverseLen) { + // some tail left, carry it over to the next read + int len = traverseLen - s; + System.arraycopy(byteArray, s, byteArray, 0, len); + offset = len; + lenToRead = bufferLen - len; + } + else { + offset = 0; + lenToRead = bufferLen; + } + } + } + + static int getUtf8CharNumberOfBytes(byte firstByteOfChar) { + int masked = firstByteOfChar & 0b11111000; + if (masked == 0b11110000) { + return 4; + } + else if (masked == 0b11100000) { + return 3; + } + else if (masked == 0b11000000) { + return 2; + } + else { + return 1; + } + } + + static void showResults(Partitioner partitioner) { + + CountDownLatch c = new CountDownLatch(1); + partitioner.scheduler.execute(() -> { + + try { + // check if any unprocessed slices + while (partitioner.jobsScheduled.get() > 0) { + } + + // check if anything left in partitions + while (!partitioner.allTasksCompleted()) { + } + + SortedMap result = partitioner.getAllResults(); + System.out.println(result); // output aggregated measurements according to the requirement + } + catch (Exception e) { + System.out.println(e); + } + c.countDown(); + }); + + try { + c.await(); + } + catch (InterruptedException e) { + throw new RuntimeException(e); + } + + } + + private static Measurement getMeasurement(byte[] slice, LineParams lineParams) { + int idx = lastIndexOfSeparator(slice, lineParams); + return new Measurement( + new Station(slice, lineParams.start, idx - lineParams.start), + parseToLongIgnoringDecimalPoint(slice, idx + 1, lineParams.start + lineParams.length - (idx + 1))); + } + + private static int lastIndexOfSeparator(byte[] slice, LineParams lineParams) { + // hacky - we know that from the end of the line we have only + // single byte characters + // -2 is also hacky since we expect a particular format at the end of the line + + int lastIdx = lineParams.start + lineParams.length() - 1; + if (slice[lastIdx - 3] == Separator) { + return lastIdx - 3; + } + else if (slice[lastIdx - 4] == Separator) { + return lastIdx - 4; + } + else if (slice[lastIdx - 5] == Separator) { + return lastIdx - 5; + } + + return -1; + } + +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_cb0s.java b/src/main/java/dev/morling/onebrc/CalculateAverage_cb0s.java new file mode 100644 index 000000000..1e9c7058f --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_cb0s.java @@ -0,0 +1,338 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class CalculateAverage_cb0s { + + private static final String FILE = "./measurements.txt"; + private static final int INPUT_BUFFER_SIZE = 1 << 16; // yields the best performance on my system... + + public static void main(String[] args) throws IOException, InterruptedException { + run(); + // benchmark(); + } + + private static void benchmark() throws IOException { + var startTime = System.currentTimeMillis(); + for (int count = 0; count < 3; ++count) { + run(); + } + var stopTime = System.currentTimeMillis(); + + System.out.println(STR."Running 3 times took: \{stopTime - startTime}ms (1 run: \{(stopTime - startTime) / 3}ms)"); + } + + private static void run() throws IOException { + var fileSize = getFileSize(); + + // for consistency for smaller files (actually a mess, could be solved more elegantly in the parsing step) + var processors = Runtime.getRuntime().availableProcessors(); + processors = Math.max(1, Math.min(processors, (int) fileSize / 106)); + while (fileSize / processors < INPUT_BUFFER_SIZE && processors > 1) + --processors; + + var chunkSize = fileSize / processors; + + System.out.write('{'); + + // for getting a bit more out of this solution, we don't check for null + var mergedResults = IntStream.range(0, processors) + .parallel() + .mapToObj(i -> processChunk(i, chunkSize)) + .reduce(TempResultStorage::merge).get(); + + var endResult = mergedResults.aggregatedResultsPreOrdered.stream() + .map(Station::toString) + .collect(Collectors.joining(", ")); + + System.out.write(endResult.getBytes()); + + System.out.write(new byte[]{ '}', '\n' }); + } + + private static class MeasurementAggregator { + public MeasurementAggregator(int initialValue) { + min = initialValue; + max = initialValue; + count = 1; + sum = initialValue; + } + + public int min, max, count; + // we need to long if the possible absolute sum is greater than 2^31 + public long sum; + } + + private record Station( + MeasurementAggregator results, + RawName rawName + ) implements Comparable { + + @Override + public boolean equals(Object otherObject) { + if (otherObject instanceof Station otherStation) { + return otherStation.rawName.equals(rawName); + } + return false; + } + + @Override + public int compareTo(Station otherStation) { + return rawName.compareTo(otherStation.rawName); + } + + @Override + public String toString() { + return STR."\{rawName}=\{results.min/10.0}/\{Math.round(results.sum / (float) results.count) / 10.0}/\{results.max/10.0}"; + } + + @Override + public int hashCode() { + return rawName.hashCode(); + } + + } + + private record RawName( + byte[] rawName + ) implements Comparable { + + @Override + public boolean equals(Object otherObject) { + RawName otherRawName = (RawName) otherObject; + return Arrays.equals(otherRawName.rawName, this.rawName); + + /* + * Although being safer, comparing actually is a small bottleneck + * if (otherObject instanceof RawName otherRawName) { + * return Arrays.equals(otherRawName.rawName, this.rawName); + * } + * return false; + */ + } + + @Override + public int hashCode() { + return Arrays.hashCode(rawName); + } + + @Override + public String toString() { + return new String(rawName, 0, rawName.length, StandardCharsets.UTF_8); + } + + @Override + public int compareTo(RawName otherRawName) { + int result = 0; + // Math.min is SLIGHTLY less efficient, but we don't care at this point + var lowerIndex = Math.min(rawName.length, otherRawName.rawName.length); + for (int i = 0; i < lowerIndex && result == 0; ++i) { + result = Byte.compareUnsigned(rawName[i], otherRawName.rawName[i]); + } + + return result == 0 ? rawName.length - otherRawName.rawName.length : result; + } +} + +private static class TempResultStorage { + public void insertMeasurement(byte[] dataRow, int from, int to) { + // 1st parse measurement + var sepIndex = from + 1; + while (dataRow[sepIndex] != ';') + ++sepIndex; + + var parsedMeasurement = parseMeasurement(dataRow, sepIndex + 1, to); + + // 2nd handle if city occurs the first time + var rawName = new RawName(Arrays.copyOfRange(dataRow, from, sepIndex)); + var tempIndex = indexCache.get(rawName); + if (tempIndex == null) { + var aggregator = new MeasurementAggregator(parsedMeasurement); + var tempStation = new Station(aggregator, rawName); + aggregatedResults.add(tempStation); + indexCache.put(rawName, aggregatedResults.size() - 1); + aggregatedResultsPreOrdered.add(tempStation); + return; + } + + // or update already existing station + var tempResults = aggregatedResults.get(tempIndex).results; + // TODO: compare to: add simd vector storage and process once every 8 iterations + + tempResults.sum += parsedMeasurement; + tempResults.count++; + + if (tempResults.max < parsedMeasurement) { + tempResults.max = parsedMeasurement; + } + else if (tempResults.min > parsedMeasurement) { + tempResults.min = parsedMeasurement; + } + } + + public TempResultStorage() { + aggregatedResults = new ArrayList<>(INITIAL_RESULT_SIZE); + indexCache = new HashMap<>(INITIAL_RESULT_SIZE); + aggregatedResultsPreOrdered = new TreeSet<>(); + } + + public static TempResultStorage merge(TempResultStorage storage0, TempResultStorage storage1) { + // default case + if (storage0 == null) { + return storage1; + } + + // TODO: Implementation with SIMD commands + for (var station1 : storage1.aggregatedResults) { + // System.out.println(station1.results.count + " " + station1.results.sum); + var key = storage0.indexCache.get(station1.rawName); + if (key == null) { + storage0.aggregatedResults.add(station1); + storage0.indexCache.put(station1.rawName, storage0.aggregatedResults.size() - 1); + storage0.aggregatedResultsPreOrdered.add(station1); + continue; + } + + var station0 = storage0.aggregatedResults.get(key); + station0.results.count += station1.results.count; + station0.results.sum += station1.results.sum; + + if (station0.results.min > station1.results.min) { + station0.results.min = station1.results.min; + } + + if (station1.results.max > station0.results.max) { + station0.results.max = station1.results.max; + } + } + + return storage0; + } + + // the closer it is to the actual value the better -> for 10_000 stations 10_000 is obviously better + private static final int INITIAL_RESULT_SIZE = 420; + + // we use a custom name mapping for faster access to aggregatedResults and easier sorting + private final List aggregatedResults; + private final TreeSet aggregatedResultsPreOrdered; + private final HashMap indexCache; + + /** + * Parses a char[] array to the contained number in a fixed point format. + * The number can be between [-99.9, 99.9] (i.e. has either 2 or 3 digits and might contain a sign) + * and represents a temperature measurement. + * Note that no checking takes place. Incorrect formats yield unexpected results. + * + * @param dataRow char array actually containing the number + * @param from the start index of the number inside the array (included) + * @param to the end index of the number (not included, i.e. the char after the number or the length) + * @return fixed point (int) representation of the contained measurement + */ + private int parseMeasurement(byte[] dataRow, int from, int to) { + // almost branch-less solution + int sign = -1 + 2 * ((dataRow[from] >> 4) & 1); + + int floatingPoint = dataRow[to - 1] - 48; + int lastIntDigit = dataRow[to - 3] - 48; + int firstIntDigit = to - from - 4 >= 0 ? (sign + 1) / 2 * dataRow[to - 4] - 48 : 0; + + if (to - from >= 4) { + firstIntDigit = dataRow[to - 4] - 48; + + if (to - from == 4 && sign == -1) { + firstIntDigit = 0; + } + } + + return (firstIntDigit * 100 + lastIntDigit * 10 + floatingPoint) * sign; + } + + } + + private static TempResultStorage processChunk(int i, long chunkSize) { + var storage = new TempResultStorage(); + var readBuffer = new byte[INPUT_BUFFER_SIZE]; + + try (var inputStream = new BufferedInputStream(new FileInputStream(FILE), INPUT_BUFFER_SIZE)) { + var readBytes = 0L; // we set it to one because our first loop will not register last read byte + var readBytesDelta = 0; + + // preparation + if (i != 0) { + --readBytes; + inputStream.skip(i * chunkSize - 1); + int c; + while ((c = inputStream.read()) != '\n' && c != -1) + ++readBytes; + } + + // actual parsing + // worst case: only \n is missing for a whole line + var carryOver = new byte[107]; + var carryOverSize = 0; + + while (readBytes < chunkSize && inputStream.available() > 0) { + readBytes += (readBytesDelta = inputStream.read(readBuffer, 0, readBuffer.length)); + int from = 0, to = 0; + + if (carryOverSize != 0) { + while (readBuffer[to] != '\n') + ++to; + System.arraycopy(readBuffer, from, carryOver, carryOverSize, to - from + 1); + + storage.insertMeasurement(carryOver, 0, carryOverSize + to - from); + from = ++to; + } + + // Actually looking 5 ahead instead of 1 at each new line + // Minimal line consists of: [name-byte];[first_digit].[last_digit]\n + while (to <= readBytesDelta && (readBytes - readBytesDelta + to) < chunkSize) { + to += 5; + + while (to < readBytesDelta && readBuffer[to] != '\n') + ++to; + + if (to >= readBytesDelta) { + System.arraycopy(readBuffer, from, carryOver, 0, readBytesDelta - from); + carryOverSize = readBytesDelta - from; + break; + } + + storage.insertMeasurement(readBuffer, from, to); + from = ++to; + } + } + } + catch (IOException e) { + return null; // shouldn't happen + } + + return storage; + } + + private static long getFileSize() { + return new File(CalculateAverage_cb0s.FILE).length(); + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_chrisbellew.java b/src/main/java/dev/morling/onebrc/CalculateAverage_chrisbellew.java new file mode 100644 index 000000000..8b8b8fcdc --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_chrisbellew.java @@ -0,0 +1,738 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.reflect.Field; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.util.HashMap; +import java.util.TreeMap; +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorMask; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; +import sun.misc.Unsafe; + +/** + * This is Chris Bellew's implementation. Here are the key points: + * + * - The file is equally split into ranges, one range per thread. + * 18 threads was experimentally found to be optimal. + * + * - Each thread memory maps the file range it is responsible for and + * then iterates through the range, one smaller buffer at a time. + * + * - The contents are parsed by using SIMD vector equality comparisons + * between the source data and the newline character, effectively + * delimiting each line. The measurement of each line is discovered + * by moving back from the end of the line, parsing into an integer + * as it goes. The integer representation is 10x the actual value + * but is used because integer parsing was found to be much faster + * than floating point parsing, and it's also immune to floating + * point arithmetic errors when aggregating the measurements later. + * + * - Once the name and the measurement is parsed for a line, the name + * is hashed and used a lookup into a hash table. The value of the + * hash table at the given slot is an index into another array, this + * time an array of SIMD vectors that represent that name as a series + * of vectors. The vectors are used to compare equality of the name of + * the source line with the name in the slot to confirm the slot is + * occupied by the same city name. The indirection of having a hash + * table storing lookups into another array of vectors is to allow + * the hash table slots to have a fixed size, while allowing the city + * names to be arbitrarily long. The hash table can then use open + * addressing to resolve collisions and remain efficient for lookups. + * + * - After the range has been processed, the results are collected by + * iterating through the hash table and looking up the corresponding + * integer table for each slot then collecting the min, max, count + * and sum of the measurements for each city. Then the results are + * combined from all threads, using a treemap for sorting, and printed. + */ +public final class CalculateAverage_chrisbellew { + public static final long FILE_SIZE = getFileSize(); + + /** + * The overlap is the number of bytes that is peeked into the next buffer + * in order to find the end of the last newline in the current buffer. + * Every buffer ignores the characters before the first newline character + * and peeks into the next buffer to find the first newline character. This + * way no data is lost even though the buffers are arbitrarily sliced. + * 100 is the maximum length of a city name, 1 is the semicolon character, + * 5 is the maximum length of a measurement, 1 is the newline character, + * 8 is one extra vector length so that we don't overflow the buffer. + * If we overlap to this length then we will always be able to complete the + * last line in the buffer. + */ + public static final int OVERLAP = 100 + 1 + 5 + 1 + 8; + + public static void main(String[] args) throws IOException { + /** + * The test cases use small test files. This causes issues because we + * are trying to open the file at different locations on 16 threads. + */ + final int NUM_THREADS = FILE_SIZE < 12_000_000_000L ? 1 : 16; + + /** + * Experimentally optimal buffer size for iterating over each + * memory mapped segment of the file. + */ + final int BUFFER_SIZE = 1024 * 256; + + /** + * Split the whole file into slices. One slice per thread. + */ + var ranges = getThreadRanges(NUM_THREADS); + + var processors = new ThreadProcessor[NUM_THREADS]; + Thread[] threads = new Thread[NUM_THREADS]; + for (var i = 0; i < NUM_THREADS; i++) { + processors[i] = new ThreadProcessor(ranges[i].start, ranges[i].end, BUFFER_SIZE); + threads[i] = new Thread(processors[i]); + threads[i].start(); + } + + var results = new TreeMap(); + for (int i = 0; i < NUM_THREADS; i++) { + try { + threads[i].join(); + processors[i].collectResults(results); + } + catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + printResults(results); + } + + private static void printResults(TreeMap results) { + var builder = new StringBuilder(); + builder.append("{"); + boolean first = true; + for (var entry : results.entrySet()) { + var city = entry.getKey(); + var result = entry.getValue(); + var average = ((float) Math.round((float) result.sum / (float) result.count)) / 10.0; + var min = ((float) result.min) / 10.0; + var max = ((float) result.max) / 10.0; + + if (first) { + first = false; + } + else { + builder.append(", "); + } + builder.append(city).append("=").append(min).append("/").append(average).append("/").append(max); + } + builder.append("}"); + System.out.println(builder.toString()); + } + + /** + * Splits the measurements file into ranges for each thread, ensuring that the last + * range ends at the end of the file. + */ + public static final FileRange[] getThreadRanges(int threads) throws IOException { + var chunkSize = FILE_SIZE / threads; + var ranges = new FileRange[threads]; + for (var i = 0; i < threads; i++) { + var start = i * chunkSize; + var end = i == threads - 1 ? FILE_SIZE : (i + 1) * chunkSize; + ranges[i] = new FileRange(start, end); + } + return ranges; + } + + private static final long getFileSize() { + try (var stream = new FileInputStream("measurements.txt")) { + return stream.getChannel().size(); + } + catch (IOException e) { + throw new RuntimeException("Failed to get file size", e); + } + } + + /** + * Processes a range of the file. The range is defined by a start and end + * position. The start is inclusive and the end is exclusive. + */ + static final class ThreadProcessor implements Runnable { + /** + * The number of slots in the hash table. This number was found to be the + * minimum number to use in conjunction with the hashing function to + * produce no collisions on the test data. The test data is a hint, but the + * correctness of the implementation is not coupled to the test data because + * the hash table is able to handle collisions in other arbitrary source data. + */ + private static final int NUM_SLOTS = 12133; + + /** + * The size of the SIMD vector to use when striding through the source data + * in order to detect newlines, and when comparing equality of the source line + * with a given slot in the hash table. + */ + private static final VectorSpecies SPECIES = ByteVector.SPECIES_64; + + /** + * A precomputed lookup table of vector masks to use when comparing equality of + * the source line and a given slot in the hash table. Each slot in the hash table + * has a set of vectors associated with it. The source name is split into vectors + * and each source vector is compared with the corresponding slot vector for equality. + * Unless the length of the city name is a multiple of the vector length, the last + * vector in the slot will be a partial vector. The masks are used to ignore the + * unused bytes in the last vector. + */ + private static final VectorMask[] MASKS = generateMasks(SPECIES); + + /** + * The unsafe instance is used to allocate memory for the hash table slots + * and integer table slots. It skips the JVM's garbage collector and allows + * the memory to be accessed directly without overhead such as bounds checks. + */ + private static final Unsafe unsafe = getUnsafe(); + + /** + * The start and end positions this thread will iterate through. + */ + private final long start; + private final long end; + + private final int bufferSize; + + /** + * The main memory address at the beginning of the hash table slots. + */ + private final long slotsAddress; + + /** + * The main memory address at the beginning of the integer table slots. + */ + private final long numbersAddress; + + /** + * The main memory address at the beginning of the name length table slots. + */ + private final long lengthsAddress; + + /** + * The SIMD vectors associated with each slot in the hash table. The + * content of a given slot in a hash table is a lookup into this array. + * The intent of having this array as an extra lookup is to allow N + * vectors per slot while having fixed size slots. + */ + private ByteVector[] vectors = new ByteVector[200000]; + private String[] cityNames = new String[NUM_SLOTS]; + + /** + * The next available index in the vectors array. + */ + private short nextVectorIndex = 8; + + /** + * A map of city name strings to their corresponding slot index in the + * hash table. When the hash table slots will be sparsely populated it's + * not efficient to iterate through the slots when collecting the results. + * This map provides a way to discover the occupied slots. + */ + private final HashMap cityVectorLookup = new HashMap<>(); + + public ThreadProcessor(long start, long end, int bufferSize) { + this.start = start; + this.end = end; + this.bufferSize = bufferSize; + + /** + * Allocate memory for the hash table and the integer table. + * Initialise the hash table slots to 0, so we can use 0 to + * indicate an empty slot. + */ + slotsAddress = unsafe.allocateMemory(NUM_SLOTS * 2); + for (int i = 0; i < NUM_SLOTS; i++) { + unsafe.putShort(slotsAddress + i * 2, (short) 0); + } + numbersAddress = unsafe.allocateMemory(NUM_SLOTS * 16); + lengthsAddress = unsafe.allocateMemory(NUM_SLOTS); + } + + public final void run() { + try (RandomAccessFile file = new RandomAccessFile("measurements.txt", "r")) { + FileChannel fileChannel = file.getChannel(); + + /** + * Work out whether we need to peek into the next range. If this is the last + * range then the end of this range will be the end of the file, so we won't + * peek. Otherwise, we'll peek just enough into the next slot to complete the + * last line in this range. + */ + boolean lastRange = end == FILE_SIZE; + long length = lastRange ? end - start : end - start + OVERLAP; + + MappedByteBuffer buffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, start, length); + processRange(buffer, lastRange); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Iterates through the entire memory mapped range, one buffer at a time. + * The buffers are made to overlap to allow each buffer to peek into the next + * range to complete the last line. + */ + private final void processRange(MappedByteBuffer buffer, boolean lastRange) { + byte[] buf = new byte[bufferSize]; + int remaining; + long globalPosition = start; + while ((remaining = buffer.remaining()) != 0) { + int numBytes = Math.min(remaining, bufferSize); + boolean lastBuffer = remaining == numBytes; + + /** + * Fill this buffer and process it. + */ + buffer.get(buf, 0, numBytes); + processBuffer(buf, numBytes, lastRange, lastBuffer, globalPosition); + + /** + * Start the next range slightly before the end of this range. + */ + if (!lastBuffer) { + buffer.position(buffer.position() - OVERLAP); + } + + globalPosition += numBytes; + } + } + + /** + * Parses and processes each line from a buffer. + */ + private final void processBuffer(byte[] buffer, int numBytes, boolean lastRange, boolean lastBuffer, long globalPosition) { + + /** + * Skip past any characters before the first newline because the previous + * segment will have already processed them. That is unless this if the + * first buffer in the first range (global position zero), in which case + * we will start from the first character. + */ + int index = globalPosition == 0 ? 0 : findFirstNewline(buffer) + 1; + + /** + * Keep track of the start of the city name. + */ + int nameStart = index; + + while (true) { + /** + * Take a slice of bytes and convert it into a vector so we can apply + * SIMD operations to find newlines. + */ + ByteVector vector = ByteVector.fromArray(SPECIES, buffer, index); + + /** + * Find the newline using SIMD. + */ + VectorMask newLineMask = vector.eq((byte) '\n'); + int firstTrue = newLineMask.firstTrue(); + if (firstTrue == SPECIES.length()) { + /** + * We haven't found a newline in this vector, so move on to the + * next vector. + */ + index += SPECIES.length(); + continue; + } + + slice(buffer, index + firstTrue, nameStart); + + index = index + firstTrue + 1; + nameStart = index; + + /** + * If this is the last buffer in the last range then we want to + * process every character until the very end of the file. + */ + if (lastRange && lastBuffer) { + if (index == numBytes) { + return; + } + + /** + * If we're less than one vector length away from the end + * of the buffer then just take the remaining bytes as the + * final line. If we tried to use a vector it would overflow. + */ + if (index >= numBytes - SPECIES.length()) { + slice(buffer, numBytes - 1, nameStart); + return; + } + continue; + } + + /** + * If it's not the last buffer or it's not the last range then + * we want to overlap into the next buffer, but only by enough + * to complete the last line. + */ + if (index > numBytes - OVERLAP) { + return; + } + } + } + + /** + * Finds the first newline in a buffer using SIMD. Used to skip past a + * partial line at the beginning of a buffer. + */ + private final int findFirstNewline(byte[] buffer) { + int index = 0; + while (true) { + ByteVector vector = ByteVector.fromArray(SPECIES, buffer, index); + VectorMask newLineMask = vector.eq((byte) '\n'); + int firstTrue = newLineMask.firstTrue(); + if (firstTrue == SPECIES.length()) { + index += SPECIES.length(); + continue; + } + return index + firstTrue; + } + } + + /** + * Given the index in the buffer of where a name starts, and the index of + * the next newline, creeps back from the next newline to find the structure + * of the measurement, parsing it into a number as it goes. It is parsed + * into an integer because it's faster than parsing as a float, and it's also + * immune to floating point arithmetic errors when aggregating the measurements + * later. + * + * Then proceeds to record the fully parsed name and measurement in the hash table. + */ + private final void slice(byte[] buffer, int newlineIndex, int nameStart) { + int i = newlineIndex - 1; + int measurement = buffer[i] - '0'; + i -= 2; // Skip before the decimal point + measurement += (buffer[i] - '0') * 10; + i--; + + if (buffer[i] == ';') { + // 1.2 + record(buffer, nameStart, i, measurement); + } + else { + // 12.3 or -1.2 or -12.3 + if (buffer[i] == '-') { + // -1.2 + record(buffer, nameStart, i - 1, -measurement); + } + else { + // 12.3 or -12.3 + measurement += (buffer[i] - '0') * 100; + i--; + if (buffer[i] == '-') { + // -12.3 + record(buffer, nameStart, i - 1, -measurement); + } + else { + // 12.3 + record(buffer, nameStart, i, measurement); + } + } + } + } + + /** + * Given a name and measurement, looks up a slot in the hash table by hashing + * the city name as a key, then applies the measurement to the accumulated + * aggregation of that city's measurements. + */ + private final void record(byte[] buffer, int nameStart, int nameEnd, int measurement) { + int nameLength = nameEnd - nameStart; + + /** + * The length of most city names will not be a multiple of the SIMD + * vector length so there will be a remainder in the final vector + * of extraneous bytes. We need to mask these bytes out when comparing. + */ + var remainder = nameLength % SPECIES.length(); + + var numVectors = nameLength / SPECIES.length() + (remainder == 0 ? 0 : 1); + + /** + * Lookup the slot index in the hash table for the city name. + */ + var slotIndex = nameToSlotIndex(buffer, nameStart, nameLength); + + /** + * Identify if the slot is occupied, then check the equality of the + * slot with the city name. + */ + var vectorOffset = unsafe.getShort(slotsAddress + slotIndex * 2); + while (vectorOffset != 0) { + + /** + * Check the set of vectors in the slot match the city name + */ + if (slotEquals(buffer, nameStart, vectorOffset, numVectors, remainder, slotIndex)) { + + /** + * Check the length of the slot name and city name match. This + * check is needed because the vector equality check can give + * false positives if one city name starts with another. + */ + byte slotNameLength = unsafe.getByte(lengthsAddress + slotIndex); + if (slotNameLength == nameLength) { + updateSlot(slotIndex, measurement); + break; + } + } + + /** + * If the slot is occupied but the city name doesn't match, then + * we try the next slot in the hash table through linear probing. + */ + slotIndex = (slotIndex + 1) % NUM_SLOTS; + vectorOffset = unsafe.getShort(slotsAddress + slotIndex * 2); + } + + /** + * If the slot was unoccupied, then we can initialise it with the + * city name and measurement. + */ + if (vectorOffset == 0) { + /** + * Record where the city name length is recorded for this slot. + */ + unsafe.putByte(lengthsAddress + slotIndex, (byte) nameLength); + + /** + * Record where the start of the set of vectors are recorded for + */ + unsafe.putShort(slotsAddress + slotIndex * 2, nextVectorIndex); + + /** + * Records the vectors for the city name. + */ + for (int v = 0; v < numVectors; v++) { + vectors[nextVectorIndex] = ByteVector.fromArray(SPECIES, buffer, nameStart + v * SPECIES.length()); + nextVectorIndex++; + } + + cityVectorLookup.put(new String(buffer, nameStart, nameLength), slotIndex); + + /** + * Min, max, count, sum + */ + var numbersIndex = getNumbersIndex(slotIndex); + unsafe.putInt(numbersIndex, measurement); + unsafe.putInt(numbersIndex + 4, measurement); + unsafe.putInt(numbersIndex + 8, 1); + unsafe.putInt(numbersIndex + 12, measurement); + + cityNames[slotIndex] = new String(buffer, nameStart, nameLength); + } + } + + /** + * Given the index bounds of a name in a buffer, creates a hash of the name + * by multiplying the first twelve characters. This was experimentally found + * to provide a good distribution of hash values for the test data. In + * combination with the number of slots in the hash table, this produces no + * collisions on the test data. The test data is a hint, but the correctness + * of the implementation is not coupled to the test data because the hash + * table is able to handle collisions in other arbitrary source data. + */ + private final int nameToSlotIndex(byte[] buffer, int nameStart, int nameLength) { + var integer = 1; + integer *= buffer[nameStart + 0]; + if (nameLength > 1) { + integer *= buffer[nameStart + 1]; + if (nameLength > 2) { + integer *= buffer[nameStart + 2]; + if (nameLength > 3) { + integer *= buffer[nameStart + 3]; + if (nameLength > 4) { + integer *= buffer[nameStart + 4]; + if (nameLength > 5) { + integer *= buffer[nameStart + 5]; + if (nameLength > 6) { + integer *= buffer[nameStart + 6]; + if (nameLength > 7) { + integer *= buffer[nameStart + 7]; + if (nameLength > 8) { + integer *= buffer[nameStart + 8]; + if (nameLength > 9) { + integer *= buffer[nameStart + 9]; + if (nameLength > 10) { + integer *= buffer[nameStart + 10]; + if (nameLength > 11) { + integer *= buffer[nameStart + 11]; + } + } + } + } + } + } + } + } + } + } + } + return Math.abs(integer) % NUM_SLOTS; + } + + /** + * Given a slot index and a measurement, updates the aggregation of the + * measurements for the city in that slot. + */ + private final void updateSlot(int slotIndex, int measurement) { + var numbersIndex = getNumbersIndex(slotIndex); + var min = unsafe.getInt(numbersIndex); + var max = unsafe.getInt(numbersIndex + 4); + var count = unsafe.getInt(numbersIndex + 8); + var sum = unsafe.getInt(numbersIndex + 12); + + unsafe.putInt(numbersIndex, Math.min(min, measurement)); + unsafe.putInt(numbersIndex + 4, Math.max(max, measurement)); + unsafe.putInt(numbersIndex + 8, count + 1); + unsafe.putInt(numbersIndex + 12, sum + measurement); + } + + /** + * Given a name in a buffer, a slot index, and a number of vectors, checks + * the equality of the name and the slot. + * + * The length of the name is not necessarily a multiple of the SIMD vector + * length, so the last vector in the slot will be a partial vector. The + * masks are used to ignore the unused bytes in the last vector. + */ + private final boolean slotEquals(byte[] buffer, int nameStart, int vectorOffset, int numVectors, int remainder, int slotIndex) { + for (int v = 0; v < numVectors; v++) { + var nameVector = ByteVector.fromArray(SPECIES, buffer, nameStart + v * SPECIES.length()); + var slotVector = vectors[vectorOffset + v]; + if (v == numVectors - 1) { + if (remainder == 0) { + if (!slotVector.eq(nameVector).allTrue()) { + return false; + } + } + else { + var mask = MASKS[remainder - 1]; + if (!slotVector.compare(VectorOperators.EQ, nameVector, mask).equals(mask)) { + return false; + } + } + break; + } + else { + if (!slotVector.eq(nameVector).allTrue()) { + return false; + } + } + } + + return true; + } + + /** + * Given a slot index, returns the main memory address of the integer table + * where the min, max, count and sum of the measurements are stored. + */ + private final long getNumbersIndex(int slotIndex) { + return numbersAddress + slotIndex * 16; + } + + public void collectResults(TreeMap results) { + for (var entry : cityVectorLookup.entrySet()) { + var city = entry.getKey(); + var slotIndex = entry.getValue(); + var numbersIndex = getNumbersIndex(slotIndex); + var min = unsafe.getInt(numbersIndex); + var max = unsafe.getInt(numbersIndex + 4); + var count = unsafe.getInt(numbersIndex + 8); + var sum = unsafe.getInt(numbersIndex + 12); + results.compute(city, (k, v) -> { + if (v == null) { + return new CityResult(min, max, sum, count); + } + else { + v.min = Math.min(v.min, min); + v.max = Math.max(v.max, max); + v.sum += sum; + v.count += count; + return v; + } + }); + } + } + + /** + * Generates a lookup table of vector masks to use when comparing equality of + * the last vector of the source line and a given slot in the hash table. + */ + private static final VectorMask[] generateMasks(VectorSpecies species) { + VectorMask[] masks = new VectorMask[species.length() - 1]; + masks[0] = VectorMask.fromArray(species, new boolean[]{ true, false, false, false, false, false, false, false }, 0); + masks[1] = VectorMask.fromArray(species, new boolean[]{ true, true, false, false, false, false, false, false }, 0); + masks[2] = VectorMask.fromArray(species, new boolean[]{ true, true, true, false, false, false, false, false }, 0); + masks[3] = VectorMask.fromArray(species, new boolean[]{ true, true, true, true, false, false, false, false }, 0); + masks[4] = VectorMask.fromArray(species, new boolean[]{ true, true, true, true, true, false, false, false }, 0); + masks[5] = VectorMask.fromArray(species, new boolean[]{ true, true, true, true, true, true, false, false }, 0); + masks[6] = VectorMask.fromArray(species, new boolean[]{ true, true, true, true, true, true, true, false }, 0); + return masks; + } + + private static final Unsafe getUnsafe() { + Field field; + try { + field = Unsafe.class.getDeclaredField("theUnsafe"); + field.setAccessible(true); + return (Unsafe) field.get(null); + } + catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e) { + throw new RuntimeException("Failed to get unsafe", e); + } + } + } + + static final class CityResult { + public int min; + public int max; + public int sum; + public int count; + + public CityResult(int min, int max, int sum, int count) { + this.min = min; + this.max = max; + this.sum = sum; + this.count = count; + } + } + + static final class FileRange { + public final long start; + public final long end; + + public FileRange(long start, long end) { + this.start = start; + this.end = end; + } + } +} \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_dkarampi.java b/src/main/java/dev/morling/onebrc/CalculateAverage_dkarampi.java new file mode 100644 index 000000000..2b826eef6 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_dkarampi.java @@ -0,0 +1,260 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import static java.nio.channels.FileChannel.MapMode.READ_ONLY; + +public class CalculateAverage_dkarampi { + private static final String FILE = "./measurements.txt"; + private static final int BUFFER_SIZE = (1 << 29); // 500mb + private static final int HT_SIZE = nextPowerOfTwo(10000); + private static final int NUM_THREADS = 8; + private final List stationHashTables = new ArrayList<>(); + + public static void main(String[] args) throws Exception { + new CalculateAverage_dkarampi().runFast(); + } + + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + private static boolean areEqual(byte[] a, int aLen, byte[] b, int bLen) { + if (aLen != bLen) { + return false; + } + for (byte i = 0; i < aLen; i++) { + if (a[i] != b[i]) { + return false; + } + } + return true; + } + + public static int nextPowerOfTwo(int n) { + for (int i = 1; i < 32; i <<= 1) { + n |= n >> i; + } + return n + 1; + } + + private void runFast() throws Exception { + createStationHashTables(); + FileChannel channel = FileChannel.open(Path.of(FILE)); + + List> buffersList = new ArrayList<>(); + for (int i = 0; i < NUM_THREADS; i++) { + buffersList.add(new ArrayList<>()); + } + + List buffers = createBuffers(channel); + for (int i = 0; i < buffers.size(); i++) { + buffersList.get(i % NUM_THREADS).add(buffers.get(i)); + } + + List tasks = new ArrayList<>(); + for (int i = 0; i < NUM_THREADS; i++) { + tasks.add(new Task(stationHashTables.get(i), buffersList.get(i))); + } + + ExecutorService executorService = Executors.newFixedThreadPool(NUM_THREADS); + Future[] futures = new Future[NUM_THREADS]; + for (int i = 0; i < NUM_THREADS; i++) { + futures[i] = executorService.submit(tasks.get(i)); + } + + for (Future future : futures) { + future.get(); + } + + sortAndPrint(); + + executorService.shutdown(); + channel.close(); + } + + private void createStationHashTables() { + for (int i = 0; i < NUM_THREADS; i++) { + Station[] stationsHashTable = new Station[HT_SIZE]; + for (int j = 0; j < HT_SIZE; j++) { + stationsHashTable[j] = new Station(); + } + stationHashTables.add(stationsHashTable); + } + } + + private List createBuffers(FileChannel channel) throws Exception { + List buffers = new ArrayList<>(); + long size = channel.size(); + int lastByte; + for (long offset = 0; offset < size; offset += lastByte + 1) { + long sizeToMap = Math.min(size - offset, BUFFER_SIZE); + MappedByteBuffer buffer = channel.map(READ_ONLY, offset, sizeToMap); + lastByte = (int) sizeToMap - 1; + while (buffer.get(lastByte) != '\n') + --lastByte; + buffers.add(new Buffer(buffer, lastByte + 1)); + } + return buffers; + } + + private void sortAndPrint() { + TreeMap sortedStations = new TreeMap<>(); + + for (Station[] stationHashTable : stationHashTables) { + for (Station station : stationHashTable) { + if (station.freq == 0) { + continue; + } + String key = new String(station.name, 0, station.nameLen); + Station st = sortedStations.get(key); + if (st == null) { + sortedStations.put(key, station); + } + else { + st.min = Math.min(st.min, station.min); + st.max = Math.max(st.max, station.max); + st.sum += station.sum; + st.freq += station.freq; + } + } + } + + StringBuilder sb = new StringBuilder(); + sb.append("{"); + for (Map.Entry entry : sortedStations.entrySet()) { + String name = entry.getKey(); + Station station = entry.getValue(); + sb.append(name); + sb.append("="); + sb.append(round(station.min)); + sb.append("/"); + sb.append(round(round(station.sum) / station.freq)); + sb.append("/"); + sb.append(round(station.max)); + sb.append(", "); + } + sb.delete(sb.length() - 2, sb.length()); + sb.append("}"); + System.out.println(sb); + } + + private record Buffer(ByteBuffer byteBuffer, int length) { + } + + private static class Station { + double sum; + double min = 100; + double max = -100; + int freq; + short nameLen; + byte[] name = new byte[nextPowerOfTwo(100)]; + } + + private record Task(Station[] stations, List buffers) implements Runnable { + + @Override + public void run() { + for (Buffer buffer : buffers) { + process(buffer); + } + } + + private void process(Buffer buffer) { + short nameLen = 0; + int hash = 5381; + int temperature; + byte[] name = new byte[100]; + + for (int i = 0; i < buffer.length; i++) { + byte c = buffer.byteBuffer.get(i); + if (c == ';') { + int sign = 1; + c = buffer.byteBuffer.get(++i); + if (c == '-') { + sign = -1; + c = buffer.byteBuffer.get(++i); + temperature = (c - '0') * 10; + c = buffer.byteBuffer.get(++i); + if (c == '.') { + c = buffer.byteBuffer.get(++i); + temperature = temperature + c - '0'; + } + else { + temperature = temperature + c - '0'; + ++i; // dot + c = buffer.byteBuffer.get(++i); + temperature = temperature * 10 + c - '0'; + } + } + else { + temperature = (c - '0') * 10; + c = buffer.byteBuffer.get(++i); + if (c == '.') { + c = buffer.byteBuffer.get(++i); + temperature = temperature + c - '0'; + } + else { + temperature = temperature + c - '0'; + ++i; // dot + c = buffer.byteBuffer.get(++i); + temperature = temperature * 10 + c - '0'; + } + } + hash = hash & 0x7FFFFFFF; + updateStations(hash, name, nameLen, sign * (double) temperature / 10); + ++i; // For '\n' + nameLen = 0; + hash = 5383; + } + else { + name[nameLen++] = c; + hash = ((hash << 5) + hash) + c; + } + } + } + + private void updateStations(int hash, byte[] name, short nameLen, double temperature) { + int idx; + for (idx = hash % HT_SIZE; stations[idx].freq != 0; idx = (idx + 1) % HT_SIZE) { + if (areEqual(stations[idx].name, stations[idx].nameLen, name, nameLen)) { + stations[idx].sum += temperature; + stations[idx].min = Math.min(stations[idx].min, temperature); + stations[idx].max = Math.max(stations[idx].max, temperature); + ++stations[idx].freq; + return; + } + } + stations[idx].sum = temperature; + stations[idx].min = temperature; + stations[idx].max = temperature; + stations[idx].nameLen = nameLen; + System.arraycopy(name, 0, stations[idx].name, 0, nameLen); + stations[idx].freq = 1; + } +}} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_dpsoft.java b/src/main/java/dev/morling/onebrc/CalculateAverage_dpsoft.java new file mode 100644 index 000000000..671d8bab7 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_dpsoft.java @@ -0,0 +1,324 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.concurrent.Phaser; + +public class CalculateAverage_dpsoft { + private static final String FILE = "./measurements.txt"; + private static final int MAX_ROWS = 1 << 15; + private static final int ROWS_MASK = MAX_ROWS - 1; + + public static void main(String[] args) throws IOException { + final var cpus = Runtime.getRuntime().availableProcessors(); + final var segments = getMemorySegments(cpus); + final var tasks = new MeasurementExtractor[segments.size()]; + final var phaser = new Phaser(segments.size()); + + for (int i = 0; i < segments.size(); i++) { + tasks[i] = new MeasurementExtractor(segments.get(i), phaser); + } + + phaser.awaitAdvance(phaser.getPhase()); + + final var allMeasurements = Arrays.stream(tasks) + .parallel() + .map(MeasurementExtractor::getMeasurements) + .reduce(MeasurementMap::merge) + .orElseThrow(); + + System.out.println(sortSequentially(allMeasurements)); + + System.exit(0); + } + + private static Map sortSequentially(MeasurementMap allMeasurements) { + final Map sorted = new TreeMap<>(); + for (Measurement m : allMeasurements.measurements) { + if (m != null) { + sorted.put(new String(m.name, StandardCharsets.UTF_8), m); + } + } + return sorted; + } + + // Inspired by @spullara + private static List getMemorySegments(int numberOfSegments) throws IOException { + var file = new File(FILE); + long fileSize = file.length(); + long segmentSize = fileSize / numberOfSegments; + List segments = new ArrayList<>(numberOfSegments); + + if (fileSize < 1_000_000) { + segments.add(new FileSegment(0, fileSize)); + return segments; + } + + while (segmentSize >= Integer.MAX_VALUE) { + numberOfSegments += 1; + segmentSize = fileSize / numberOfSegments; + } + + try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r")) { + for (int i = 0; i < numberOfSegments; i++) { + long segStart = i * segmentSize; + long segEnd = (i == numberOfSegments - 1) ? fileSize : segStart + segmentSize; + segStart = findSegment(i, 0, randomAccessFile, segStart, segEnd); + segEnd = findSegment(i, numberOfSegments - 1, randomAccessFile, segEnd, fileSize); + + segments.add(new FileSegment(segStart, segEnd)); + } + } + return segments; + } + + private static long findSegment(int i, int skipSegment, RandomAccessFile raf, long location, long fileSize) throws IOException { + if (i != skipSegment) { + raf.seek(location); + while (location < fileSize) { + location++; + if (raf.read() == '\n') + break; + } + } + return location; + } + + record FileSegment(long start, long end) { + } + + static final class MeasurementExtractor implements Runnable { + private final FileSegment segment; + private final Phaser phaser; + private final MeasurementMap measurements = new MeasurementMap(); + + MeasurementExtractor(FileSegment memorySegment, Phaser phaser) { + this.segment = memorySegment; + this.phaser = phaser; + (new Thread(this)).start(); + } + + @Override + public void run() { + long segmentEnd = segment.end(); + try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { + var mbb = fileChannel.map(FileChannel.MapMode.READ_ONLY, segment.start(), segmentEnd - segment.start()); + mbb.order(ByteOrder.nativeOrder()); + + if (segment.start() > 0) { + skipToFirstLine(mbb); + } + + while (mbb.remaining() > 0 && mbb.position() <= segmentEnd) { + int pos = mbb.position(); + int nameHash = hashAndRewind(mbb); + var m = measurements.getOrCompute(nameHash, mbb, pos); + int temp = readTemperatureFromBuffer(mbb); + + m.sample(temp); + } + } + catch (IOException e) { + throw new RuntimeException("Error reading file", e); + } + finally { + phaser.arriveAndAwaitAdvance(); + } + } + + // inspired by @lawrey + private static int hashAndRewind(MappedByteBuffer mbb) { + int hash = 0; + int idx = mbb.position(); + outer: while (true) { + int name = mbb.getInt(); + for (int c = 0; c < 4; c++) { + int b = (name >> (c << 3)) & 0xFF; + if (b == ';') { + idx += c + 1; + break outer; + } + hash ^= b * 82805; + } + idx += 4; + } + + var rewind = mbb.position() - idx; + mbb.position(mbb.position() - rewind); + return hash; + } + + private static int readTemperatureFromBuffer(MappedByteBuffer mbb) { + int temp = 0; + boolean negative = false; + + outer: while (mbb.remaining() > 0) { + int b = mbb.get(); + switch (b) { + case '-': + negative = true; + break; + default: + temp = 10 * temp + (b - '0'); + break; + case '.': + b = mbb.get(); + temp = 10 * temp + (b - '0'); + case '\r': + mbb.get(); + case '\n': + break outer; + } + } + if (negative) + temp = -temp; + return temp; + } + + public MeasurementMap getMeasurements() { + return measurements; + } + + // Skips to the first line in the buffer, used for chunk processing. + private static void skipToFirstLine(MappedByteBuffer mbb) { + while ((mbb.get() & 0xFF) >= ' ') { + // Skip bytes until reaching the start of a line. + } + } + } + + // credits to @shipilev + static class MeasurementMap { + private final Measurement[] measurements = new Measurement[MAX_ROWS]; + + public Measurement getOrCompute(int hash, MappedByteBuffer mbb, int position) { + int index = hash & ROWS_MASK; + var measurement = measurements[index]; + if (measurement != null && hash == measurement.nameHash && Measurement.equalsTo(measurement.name, mbb, position)) { + return measurement; + } + else { + return compute(hash, mbb, position); + } + } + + private Measurement compute(int hash, MappedByteBuffer mbb, int position) { + var index = hash & ROWS_MASK; + Measurement m; + + while (true) { + m = measurements[index]; + if (m == null || (hash == m.nameHash && Measurement.equalsTo(m.name, mbb, position))) { + break; + } + index = (index + 1) & ROWS_MASK; + } + + if (m == null) { + int len = mbb.position() - position - 1; + byte[] bytes = new byte[len]; + mbb.position(position); + mbb.get(bytes, 0, len); + mbb.get(); + measurements[index] = m = new Measurement(bytes, hash); + } + + return m; + } + + public MeasurementMap merge(MeasurementMap otherMap) { + for (Measurement other : otherMap.measurements) { + if (other == null) + continue; + int index = other.nameHash & ROWS_MASK; + while (true) { + Measurement m = measurements[index]; + if (m == null) { + measurements[index] = other; + break; + } + else if (Arrays.equals(m.name, other.name)) { + m.merge(other); + break; + } + else { + index = (index + 1) & ROWS_MASK; + } + } + } + return this; + } + } + + static final class Measurement { + public final int nameHash; + public final byte[] name; + + public long sum; + public int count = 0; + public int min = Integer.MAX_VALUE; + public int max = Integer.MIN_VALUE; + + public Measurement(byte[] name, int nameHash) { + this.name = name; + this.nameHash = nameHash; + } + + public static boolean equalsTo(byte[] name, MappedByteBuffer mbb, int position) { + int len = mbb.position() - position - 1; + if (len != name.length) + return false; + for (int i = 0; i < len; i++) { + if (name[i] != mbb.get(position + i)) + return false; + } + return true; + } + + public void sample(int temp) { + min = Math.min(min, temp); + max = Math.max(max, temp); + sum += temp; + count++; + } + + public Measurement merge(Measurement m2) { + min = Math.min(min, m2.min); + max = Math.max(max, m2.max); + sum += m2.sum; + count += m2.count; + return this; + } + + public String toString() { + return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); + } + + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } +} \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_dqhieuu.java b/src/main/java/dev/morling/onebrc/CalculateAverage_dqhieuu.java new file mode 100644 index 000000000..8c155773e --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_dqhieuu.java @@ -0,0 +1,117 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +public class CalculateAverage_dqhieuu { + private static final String FILE = "measurements.txt"; + + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + private static class MeasurementAggregator { + private Lock lock = new ReentrantLock(); + private double min = Double.POSITIVE_INFINITY; + private double max = Double.NEGATIVE_INFINITY; + private double sum = 0; + private int count = 0; + + @Override + public String toString() { + return round(min) + "/" + round(round(sum) / count) + "/" + round(max); + } + } + + public static void main(String[] args) throws IOException { + var lineStream = Files.lines(Paths.get(FILE)).parallel(); + + Map measurements = new ConcurrentHashMap<>(10_000); + + lineStream.forEach( + l -> { + var sepIdx = 0; + while (l.charAt(sepIdx) != ';') { + sepIdx++; + } + + var station = l.substring(0, sepIdx); + + int valueInt = 0; + int sign = l.charAt(sepIdx + 1) == '-' ? -1 : 1; + + var lineLength = l.length(); + for (var i = sepIdx + 1; i < lineLength; i++) { + var c = l.charAt(i); + if (c == '-' || c == '.') { + continue; + } + valueInt = valueInt * 10 + (c - '0'); + } + + var value = ((double) valueInt / 10.0) * sign; + + var agg = measurements.computeIfAbsent(station, k -> new MeasurementAggregator()); + + agg.lock.lock(); + + if (value < agg.min) { + agg.min = value; + } + if (value > agg.max) { + agg.max = value; + } + agg.sum += value; + agg.count++; + + agg.lock.unlock(); + }); + + Map sortedEntries = new TreeMap<>(measurements); + + var res = new StringBuilder(); + res.append("{"); + + var first = true; + for (var entry : sortedEntries.entrySet()) { + if (first) { + first = false; + } + else { + res.append(", "); + } + + var k = entry.getKey(); + var v = entry.getValue(); + + res.append(k); + res.append('='); + res.append(v); + } + + res.append("}"); + + System.out.println(res); + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java b/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java index 7c24afd76..3d8ad900d 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java @@ -18,9 +18,9 @@ import sun.misc.Unsafe; import java.io.IOException; -import java.nio.BufferUnderflowException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; import java.nio.file.Paths; @@ -30,9 +30,12 @@ public class CalculateAverage_ebarlas { - private static final int MAX_KEY_SIZE = 100; - private static final int HASH_FACTOR = 433; - private static final int HASH_TBL_SIZE = 16_383; // range of allowed hash values, inclusive + private static final Arena ARENA = Arena.global(); + + private static final int MAX_KEY_SIZE = 104; // 4 additional bytes to allow for single-int overflow due to padding + private static final int MAX_VAL_SIZE = 5; // -dd.d + private static final int MAX_LINE_SIZE = MAX_KEY_SIZE + MAX_VAL_SIZE + 2; // key, semicolon, val, newline + private static final int HASH_TBL_SIZE = 131_071; // range of allowed hash values, inclusive private static final Unsafe UNSAFE = makeUnsafe(); @@ -50,7 +53,7 @@ private static Unsafe makeUnsafe() { public static void main(String[] args) throws IOException, InterruptedException { var path = Paths.get("measurements.txt"); var channel = FileChannel.open(path, StandardOpenOption.READ); - var numPartitions = (int) Math.max((channel.size() / Integer.MAX_VALUE) + 1, Runtime.getRuntime().availableProcessors()); + var numPartitions = Runtime.getRuntime().availableProcessors(); var partitionSize = channel.size() / numPartitions; var partitions = new Partition[numPartitions]; var threads = new Thread[numPartitions]; @@ -63,8 +66,8 @@ public static void main(String[] args) throws IOException, InterruptedException var pSize = pEnd - pStart; Runnable r = () -> { try { - var buffer = channel.map(FileChannel.MapMode.READ_ONLY, pStart, pSize).order(ByteOrder.LITTLE_ENDIAN); - partitions[pIdx] = processBuffer(buffer, pIdx == 0); + var ms = channel.map(FileChannel.MapMode.READ_ONLY, pStart, pSize, ARENA); + partitions[pIdx] = processSegment(ms, pIdx == 0, pIdx == numPartitions - 1); } catch (IOException e) { throw new RuntimeException(e); @@ -142,7 +145,7 @@ private static void foldFootersAndHeaders(List partitions) { // fold var merged = mergeFooterAndHeader(pPrev.footer, pNext.header); if (merged != null && merged.length != 0) { if (merged[merged.length - 1] == '\n') { // fold into prev partition - doProcessBuffer(ByteBuffer.wrap(merged).order(ByteOrder.LITTLE_ENDIAN), true, pPrev.stats); + doProcessSegment(ARENA.allocateArray(ValueLayout.JAVA_BYTE, merged), 0, pPrev.stats, true); } else { // no newline appeared in partition, carry forward pNext.footer = merged; @@ -164,143 +167,209 @@ private static byte[] mergeFooterAndHeader(byte[] footer, byte[] header) { return merged; } - private static Partition processBuffer(ByteBuffer buffer, boolean first) { - return doProcessBuffer(buffer, first, new Stats[HASH_TBL_SIZE + 1]); - } - - private static Partition doProcessBuffer(ByteBuffer buffer, boolean first, Stats[] stats) { - var header = first ? null : readHeader(buffer); - var keyStart = reallyDoProcessBuffer(buffer, stats); - var footer = keyStart < buffer.limit() ? readFooter(buffer, keyStart) : null; - return new Partition(header, footer, stats); + private static Partition processSegment(MemorySegment ms, boolean first, boolean last) { + var stats = new Stats[HASH_TBL_SIZE + 1]; // vals range from [0, size] inclusive + var header = first ? null : readHeader(ms); + var keyStart = doProcessSegment(ms, header == null ? 0 : header.offset, stats, last); // last segment is complete + var footer = keyStart < ms.byteSize() ? readFooter(ms, keyStart) : null; + return new Partition(header == null ? null : header.data, footer, stats); } - private static int reallyDoProcessBuffer(ByteBuffer buffer, Stats[] stats) { - long keyBaseAddr = UNSAFE.allocateMemory(MAX_KEY_SIZE); - int keyStart = 0; // start of key in buffer used for footer calc - try { // abort with exception to allow optimistic line processing - while (true) { // one line per iteration - keyStart = buffer.position(); // preserve line start - int keyHash = 0; // key hash code - long keyAddr = keyBaseAddr; // address for next int - int keyArrLen = 0; // number of key 4-byte ints - int keyLastBytes; // occupancy in last byte (1, 2, 3, or 4) - int val; // temperature value - while (true) { - int n = buffer.getInt(); - byte b0 = (byte) (n & 0xFF); - byte b1 = (byte) ((n >> 8) & 0xFF); - byte b2 = (byte) ((n >> 16) & 0xFF); - byte b3 = (byte) ((n >> 24) & 0xFF); - if (b0 == ';') { // ...;1.1 - val = getVal(buffer, b1, b2, b3, buffer.get()); - keyLastBytes = 4; - break; + private static long doProcessSegment(MemorySegment ms, long offset, Stats[] stats, boolean complete) { + long cursor = ms.address() + offset; + long keyBaseAddr = UNSAFE.allocateMemory(MAX_KEY_SIZE); // reusable target for current key data + long lineStart = cursor; // start of key in segment used for footer calc + long limit = ms.address() + (complete ? ms.byteSize() : ms.byteSize() - MAX_LINE_SIZE); // stop short of longest line, sweep up at the end + while (cursor < limit) { // one line per iteration + int keyHash = 0; // key hash code + long keyAddr = keyBaseAddr; // address for next int + int keyArrLen = 0; // number of key 4-byte ints + int keyLastBytes; // occupancy in last byte (1, 2, 3, or 4) + int val; + while (true) { + int n = UNSAFE.getInt(cursor); + cursor += 4; + if ((n & 0xFF) == ';') { // ;vvv + UNSAFE.putInt(keyAddr, 0); // always pad with extra int to facilitate 8-byte aligned comparisons + keyLastBytes = 4; + byte b0 = (byte) ((n >> 8) & 0xFF); + byte b1 = (byte) ((n >> 16) & 0xFF); + byte b2 = (byte) ((n >> 24) & 0xFF); + if (b0 == '-') { + if (b2 != '.') { // 6 bytes: -dd.dn + cursor++; // decimal point + byte b4 = UNSAFE.getByte(cursor); + cursor += 2; // adv beyond digit and newline + val = -(((b1 - '0') * 10 + (b2 - '0')) * 10 + (b4 - '0')); + } + else { // 5 bytes: -d.dn + byte b3 = UNSAFE.getByte(cursor); + cursor += 2; // digit and newline + val = -((b1 - '0') * 10 + (b3 - '0')); + } } - else if (b1 == ';') { // ...a;1.1 - val = getVal(buffer, b2, b3, buffer.get(), buffer.get()); - UNSAFE.putInt(keyAddr, b0); - keyLastBytes = 1; - keyArrLen++; - keyHash = HASH_FACTOR * keyHash + b0; - break; + else { + if (b1 != '.') { // 5 bytes: dd.dn + var b3 = UNSAFE.getByte(cursor); + cursor += 2; // digit and newline + val = ((b0 - '0') * 10 + (b1 - '0')) * 10 + (b3 - '0'); + } + else { // 4 bytes: d.dn + cursor++; // newline + val = (b0 - '0') * 10 + (b2 - '0'); + } } - else if (b2 == ';') { // ...ab;1.1 - val = getVal(buffer, b3, buffer.get(), buffer.get(), buffer.get()); - UNSAFE.putInt(keyAddr, n & 0x0000FFFF); - keyLastBytes = 2; - keyArrLen++; - keyHash = HASH_FACTOR * (HASH_FACTOR * keyHash + b0) + b1; - break; + break; + } + else if ((n & 0xFF00) == 0x3b00) { // k;vv + int k = n & 0xFF; + UNSAFE.putLong(keyAddr, k); // pad with extra int for comparison alignment + keyLastBytes = 1; + keyArrLen++; + keyHash += k; + byte b0 = (byte) ((n >> 16) & 0xFF); + byte b1 = (byte) ((n >> 24) & 0xFF); + byte b2 = UNSAFE.getByte(cursor++); + if (b0 == '-') { + if (b2 != '.') { // 6 bytes: -dd.dn + cursor++; // decimal point + byte b4 = UNSAFE.getByte(cursor); + cursor += 2; // adv beyond digit and newline + val = -(((b1 - '0') * 10 + (b2 - '0')) * 10 + (b4 - '0')); + } + else { // 5 bytes: -d.dn + byte b3 = UNSAFE.getByte(cursor); + cursor += 2; // digit newline + val = -((b1 - '0') * 10 + (b3 - '0')); + } } - else if (b3 == ';') { // ...abc;1.1 - UNSAFE.putInt(keyAddr, n & 0x00FFFFFF); - keyLastBytes = 3; - keyArrLen++; - keyHash = HASH_FACTOR * (HASH_FACTOR * (HASH_FACTOR * keyHash + b0) + b1) + b2; - n = buffer.getInt(); - b0 = (byte) (n & 0xFF); - b1 = (byte) ((n >> 8) & 0xFF); - b2 = (byte) ((n >> 16) & 0xFF); - b3 = (byte) ((n >> 24) & 0xFF); - val = getVal(buffer, b0, b1, b2, b3); - break; + else { + if (b1 != '.') { // 5 bytes: dd.dn + byte b3 = UNSAFE.getByte(cursor); + cursor += 2; // newline + val = ((b0 - '0') * 10 + (b1 - '0')) * 10 + (b3 - '0'); + } + else { // 4 bytes: d.dn + cursor++; + val = (b0 - '0') * 10 + (b2 - '0'); + } + } + break; + } + else if ((n & 0xFF0000) == 0x3b0000) { // kk;v + int k = n & 0xFFFF; + UNSAFE.putLong(keyAddr, k); // pad with extra int for comparison alignment + keyLastBytes = 2; + keyArrLen++; + keyHash += k; + byte b0 = (byte) ((n >> 24) & 0xFF); + if (b0 == '-') { + n = UNSAFE.getInt(cursor); + cursor += 4; + byte b1 = (byte) (n & 0xFF); + byte b2 = (byte) ((n >> 8) & 0xFF); + byte b3 = (byte) ((n >> 16) & 0xFF); + if (b2 != '.') { // 6 bytes: -dd.dn + byte b4 = (byte) ((n >> 24) & 0xFF); + cursor++; // newline + val = -(((b1 - '0') * 10 + (b2 - '0')) * 10 + (b4 - '0')); + } + else { // 5 bytes: -d.dn + val = -((b1 - '0') * 10 + (b3 - '0')); + } } else { - UNSAFE.putInt(keyAddr, n); - keyArrLen++; - keyAddr += 4; - keyHash = HASH_FACTOR * (HASH_FACTOR * (HASH_FACTOR * (HASH_FACTOR * keyHash + b0) + b1) + b2) + b3; + byte b1 = UNSAFE.getByte(cursor++); + byte b2 = UNSAFE.getByte(cursor++); + byte b3 = UNSAFE.getByte(cursor++); + if (b1 != '.') { // 5 bytes: dd.dn + cursor++; // newline + val = ((b0 - '0') * 10 + (b1 - '0')) * 10 + (b3 - '0'); + } + else { // 4 bytes: d.dn + val = (b0 - '0') * 10 + (b2 - '0'); + } } + break; } - var idx = keyHash & HASH_TBL_SIZE; - var st = stats[idx]; - if (st == null) { // nothing in table, eagerly claim spot - st = stats[idx] = newStats(keyBaseAddr, keyArrLen, keyLastBytes, keyHash); + else if ((n & 0xFF000000) == 0x3b000000) { // kkk; + int k = n & 0xFFFFFF; + UNSAFE.putLong(keyAddr, k); // pad with extra int for comparison alignment + keyLastBytes = 3; + keyArrLen++; + keyHash += k; + n = UNSAFE.getInt(cursor); + cursor += 4; + byte b0 = (byte) (n & 0xFF); + byte b1 = (byte) ((n >> 8) & 0xFF); + byte b2 = (byte) ((n >> 16) & 0xFF); + byte b3 = (byte) ((n >> 24) & 0xFF); + if (b0 == '-') { + if (b2 != '.') { // 6 bytes: -dd.dn + byte b4 = UNSAFE.getByte(cursor); + cursor += 2; // adv beyond digit and newline + val = -(((b1 - '0') * 10 + (b2 - '0')) * 10 + (b4 - '0')); + } + else { // 5 bytes: -d.dn + cursor++; // newline + val = -((b1 - '0') * 10 + (b3 - '0')); + } + } + else { + if (b1 != '.') { // 5 bytes: dd.dn + cursor++; // newline + val = ((b0 - '0') * 10 + (b1 - '0')) * 10 + (b3 - '0'); + } + else { // 4 bytes: d.dn + val = (b0 - '0') * 10 + (b2 - '0'); + } + } + break; } - else if (!equals(st.keyAddr, st.keyLen, keyBaseAddr, keyArrLen)) { - st = findInTable(stats, keyHash, keyBaseAddr, keyArrLen, keyLastBytes); + else { // kkkk + UNSAFE.putInt(keyAddr, n); + keyArrLen++; + keyAddr += 4; + keyHash += n; } - st.min = Math.min(st.min, val); - st.max = Math.max(st.max, val); - st.sum += val; - st.count++; } + keyHash ^= keyHash >>> 13; + var idx = keyHash & HASH_TBL_SIZE; + var st = stats[idx]; + if (st == null) { // nothing in table, eagerly claim spot + st = stats[idx] = newStats(keyBaseAddr, keyArrLen, keyLastBytes, keyHash); + } + else if (!equals(st.keyAddr, st.keyLen, keyBaseAddr, keyArrLen)) { + st = findInTable(stats, keyHash, keyBaseAddr, keyArrLen, keyLastBytes); + } + st.min = Math.min(st.min, val); + st.max = Math.max(st.max, val); + st.sum += val; + st.count++; + lineStart = cursor; // preserve line start } - catch (BufferUnderflowException ignore) { - - } - return keyStart; + return lineStart - ms.address(); } private static boolean equals(long key1, int len1, long key2, int len2) { if (len1 != len2) { return false; } - if (len1 == 2) { + if (len1 <= 2) { return UNSAFE.getLong(key1) == UNSAFE.getLong(key2); } - if (len1 == 3) { - return UNSAFE.getInt(key1) == UNSAFE.getInt(key2) && UNSAFE.getInt(key1 + 4) == UNSAFE.getInt(key2 + 4); - } - if (len1 == 1) { - return UNSAFE.getInt(key1) == UNSAFE.getInt(key2); - } - if (len1 == 4) { + if (len1 <= 4) { return UNSAFE.getLong(key1) == UNSAFE.getLong(key2) && UNSAFE.getLong(key1 + 8) == UNSAFE.getLong(key2 + 8); } - for (int i = 0; i < len1; i++) { + for (int i = 0; i < len1; i += 2) { var offset = i << 2; - if (UNSAFE.getInt(key1 + offset) != UNSAFE.getInt(key2 + offset)) { + if (UNSAFE.getLong(key1 + offset) != UNSAFE.getLong(key2 + offset)) { return false; } } return true; } - private static int getVal(ByteBuffer buffer, byte b0, byte b1, byte b2, byte b3) { - if (b0 == '-') { - if (b2 != '.') { // 6 bytes: -dd.dn - var b = buffer.get(); - buffer.get(); // newline - return -(((b1 - '0') * 10 + (b2 - '0')) * 10 + (b - '0')); - } - else { // 5 bytes: -d.dn - buffer.get(); // newline - return -((b1 - '0') * 10 + (b3 - '0')); - } - } - else { - if (b1 != '.') { // 5 bytes: dd.dn - buffer.get(); // newline - return ((b0 - '0') * 10 + (b1 - '0')) * 10 + (b3 - '0'); - } - else { // 4 bytes: d.dn - return (b0 - '0') * 10 + (b2 - '0'); - } - } - } - private static Stats findInTable(Stats[] stats, int hash, long keyAddr, int keyLen, int keyLastBytes) { // open-addressing scan var idx = hash & HASH_TBL_SIZE; var st = stats[idx]; @@ -315,24 +384,32 @@ private static Stats findInTable(Stats[] stats, int hash, long keyAddr, int keyL } private static Stats newStats(long keyAddr, int keyLen, int keyLastBytes, int hash) { - var bytes = keyLen << 2; + var bytes = (keyLen + 1) << 2; // include overflow chunk long k = UNSAFE.allocateMemory(bytes); UNSAFE.copyMemory(keyAddr, k, bytes); return new Stats(k, keyLen, keyLastBytes, hash); } - private static byte[] readFooter(ByteBuffer buffer, int lineStart) { // read from line start to current pos (end-of-input) - var footer = new byte[buffer.limit() - lineStart]; - buffer.get(lineStart, footer, 0, footer.length); + private static byte[] readFooter(MemorySegment ms, long offset) { // read from line start to current pos (end-of-input) + var footer = new byte[(int) (ms.byteSize() - offset)]; + for (int i = 0; i < footer.length; i++) { + footer[i] = ms.get(ValueLayout.JAVA_BYTE, offset + i); + } return footer; } - private static byte[] readHeader(ByteBuffer buffer) { // read up to and including first newline (or end-of-input) - while (buffer.hasRemaining() && buffer.get() != '\n') + private static ByteArrayOffset readHeader(MemorySegment ms) { // read up to and including first newline (or end-of-input) + long offset = 0; + while (offset < ms.byteSize() && ms.get(ValueLayout.JAVA_BYTE, offset++) != '\n') ; - var header = new byte[buffer.position()]; - buffer.get(0, header, 0, header.length); - return header; + var header = new byte[(int) offset]; + for (int i = 0; i < offset; i++) { + header[i] = ms.get(ValueLayout.JAVA_BYTE, i); + } + return new ByteArrayOffset(header, offset); + } + + record ByteArrayOffset(byte[] data, long offset) { } private static class Partition { diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_eriklumme.java b/src/main/java/dev/morling/onebrc/CalculateAverage_eriklumme.java new file mode 100644 index 000000000..768be4a25 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_eriklumme.java @@ -0,0 +1,373 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.FileInputStream; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +public class CalculateAverage_eriklumme { + + private static final String FILE = "./measurements.txt"; + private static final int NUM_CPUS = Runtime.getRuntime().availableProcessors(); + private static final int LINE_OVERHEAD = 208; + private static final int NUM_TASKS = NUM_CPUS * 6; + + private final CountDownLatch countDownLatch = new CountDownLatch(NUM_TASKS); + + private final FileInputStream fileInputStream = new FileInputStream(FILE); + private final FileChannel fileChannel = fileInputStream.getChannel(); + private final long fileSize = fileChannel.size(); + private final int fileSizePerThread = (int) Math.max(Math.ceil(fileSize / (float) NUM_TASKS), 1000); + + private CalculateAverage_eriklumme() throws Exception { + Map map = new HashMap<>(); + + try (ExecutorService executorService = Executors.newFixedThreadPool(NUM_CPUS); fileInputStream; fileChannel) { + long sizeAccountedFor = 0; + + List>> futures = new ArrayList<>(NUM_TASKS); + for (int i = 0; i < NUM_TASKS; i++) { + if (sizeAccountedFor >= fileSize) { + // The file is so small that because of the minimum file size per thread, we've covered it in less + // threads than expected + countDownLatch.countDown(); + continue; + } + futures.add(executorService.submit(new DataProcessor(i))); + sizeAccountedFor += fileSizePerThread; + } + countDownLatch.await(); + + for (Future> future : futures) { + Map futureMap = future.get(); + futureMap.forEach((key, value) -> map.merge(key, value, + (st1, st2) -> { + st1.sum += st2.sum; + st1.count += st2.count; + st1.min = Math.min(st1.min, st2.min); + st1.max = Math.max(st1.max, st2.max); + return st1; + })); + } + } + + StringBuilder result = new StringBuilder("{"); + boolean first = true; + List values = new ArrayList<>(map.values()); + values.sort(Comparator.comparing(StationMeasurement::stringName)); + + for (StationMeasurement stationMeasurement : values) { + if (!first) { + result.append(", "); + } + first = false; + result.append(new String(stationMeasurement.stationName.value, StandardCharsets.UTF_8)).append("="); + result.append(DECIMAL_LOOKUP[stationMeasurement.min + 1000]); + result.append(String.format("/%.1f/", (stationMeasurement.sum / (stationMeasurement.count * 10.0)))); + result.append(DECIMAL_LOOKUP[stationMeasurement.max + 1000]); + } + result.append("}"); + + System.out.println(result); + } + + private static class StationMeasurement { + private final ByteArrayWrapper stationName; + + private StationMeasurement(ByteArrayWrapper stationName) { + this.stationName = stationName; + } + + private int min = Integer.MAX_VALUE; + private int max = Integer.MIN_VALUE; + private long sum = 0; + private int count = 0; + + public String stringName() { + return new String(stationName.value, StandardCharsets.UTF_8); + } + } + + private enum Mode { + UNINITIALIZED, + READ_STATION, + READ_VALUE + } + + private record ByteArrayWrapper(byte[] value) { + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o instanceof ByteArrayWrapper that) { + return Arrays.equals(value, that.value); + } + return false; + } + + @Override + public int hashCode() { + return Arrays.hashCode(value); + } + } + + public class DataProcessor implements Callable> { + + private final int processorIndex; + + public DataProcessor(int processorIndex) { + this.processorIndex = processorIndex; + } + + @Override + public Map call() throws Exception { + Map map = new HashMap<>(); + + byte[] stationBuffer = new byte[200]; + int stationIndex = 0; + + byte[] valueBuffer = new byte[10]; + int valueIndex = 0; + + Mode mode = processorIndex == 0 ? Mode.READ_STATION : Mode.UNINITIALIZED; + byte b; + + long offset = ((long) fileSizePerThread) * processorIndex; + long sizeWithOverhead = Math.min(((long) fileSizePerThread) + LINE_OVERHEAD, fileSize - offset); + + try { + MappedByteBuffer buffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, offset, sizeWithOverhead); + // Read from buffer in chunks for improved performance + byte[] bytes = new byte[(int) (sizeWithOverhead / 6) + 1]; + + while (buffer.hasRemaining()) { + long bytesRemaining = sizeWithOverhead - buffer.position(); + int bytesOffset, bytesLength; + if (bytesRemaining >= bytes.length) { + bytesOffset = 0; + bytesLength = bytes.length; + } + else { + bytesOffset = (int) (bytes.length - bytesRemaining); + bytesLength = (int) bytesRemaining; + } + buffer.get(bytes, bytesOffset, bytesLength); + + for (int i = bytesOffset; i < bytes.length; i++) { + b = bytes[i]; + if (b == '\n') { + // We have a station to store + if (mode == Mode.READ_VALUE) { + storeStation(map, stationBuffer, stationIndex, valueBuffer, valueIndex); + stationIndex = 0; + valueIndex = 0; + } + mode = Mode.READ_STATION; + + // We've run past our size, can happen + if (buffer.position() - bytes.length + i >= fileSizePerThread) { + return map; + } + } + else if (mode == Mode.UNINITIALIZED) { + // Do-nothing, read more + } + else if (b == ';') { + mode = Mode.READ_VALUE; + } + else if (mode == Mode.READ_STATION) { + stationBuffer[stationIndex++] = b; + } + else { + valueBuffer[valueIndex++] = b; + } + } + } + if (mode == Mode.READ_VALUE && valueIndex > 0) { + // One value left to store + storeStation(map, stationBuffer, stationIndex, valueBuffer, valueIndex); + } + } + finally { + countDownLatch.countDown(); + } + return map; + } + + private void storeStation(Map map, byte[] stationBuffer, int stationIndex, byte[] valueBuffer, int valueIndex) { + ByteArrayWrapper stationName = new ByteArrayWrapper(Arrays.copyOfRange(stationBuffer, 0, stationIndex)); + + int value = 0; + for (int i = 0; i < valueIndex; i++) { + byte b = valueBuffer[valueIndex - i - 1]; + if (i == 1) { + // Skip the decimal point + } + else if (b == '-') { + // Number is negative + value = (-value); + } + else { + int valueAtIndex = b - 48; + if (i == 0) { + value += valueAtIndex; + } + else { + value += valueAtIndex * (i == 2 ? 10 : 100); + } + } + } + StationMeasurement stationMeasurement = map.computeIfAbsent(stationName, StationMeasurement::new); + stationMeasurement.count++; + stationMeasurement.min = Math.min(value, stationMeasurement.min); + stationMeasurement.max = Math.max(value, stationMeasurement.max); + stationMeasurement.sum += value; + } + } + + public static void main(String[] args) throws Exception { + Locale.setDefault(Locale.US); + new CalculateAverage_eriklumme(); + } + + private static final String[] DECIMAL_LOOKUP = new String[]{ + "-100.0", "-99.9", "-99.8", "-99.7", "-99.6", "-99.5", "-99.4", "-99.3", "-99.2", "-99.1", "-99.0", "-98.9", "-98.8", "-98.7", "-98.6", "-98.5", "-98.4", + "-98.3", "-98.2", "-98.1", "-98.0", "-97.9", "-97.8", "-97.7", "-97.6", "-97.5", "-97.4", "-97.3", "-97.2", "-97.1", "-97.0", "-96.9", "-96.8", "-96.7", + "-96.6", "-96.5", "-96.4", "-96.3", "-96.2", "-96.1", "-96.0", "-95.9", "-95.8", "-95.7", "-95.6", "-95.5", "-95.4", "-95.3", "-95.2", "-95.1", "-95.0", + "-94.9", "-94.8", "-94.7", "-94.6", "-94.5", "-94.4", "-94.3", "-94.2", "-94.1", "-94.0", "-93.9", "-93.8", "-93.7", "-93.6", "-93.5", "-93.4", "-93.3", + "-93.2", "-93.1", "-93.0", "-92.9", "-92.8", "-92.7", "-92.6", "-92.5", "-92.4", "-92.3", "-92.2", "-92.1", "-92.0", "-91.9", "-91.8", "-91.7", "-91.6", + "-91.5", "-91.4", "-91.3", "-91.2", "-91.1", "-91.0", "-90.9", "-90.8", "-90.7", "-90.6", "-90.5", "-90.4", "-90.3", "-90.2", "-90.1", "-90.0", "-89.9", + "-89.8", "-89.7", "-89.6", "-89.5", "-89.4", "-89.3", "-89.2", "-89.1", "-89.0", "-88.9", "-88.8", "-88.7", "-88.6", "-88.5", "-88.4", "-88.3", "-88.2", + "-88.1", "-88.0", "-87.9", "-87.8", "-87.7", "-87.6", "-87.5", "-87.4", "-87.3", "-87.2", "-87.1", "-87.0", "-86.9", "-86.8", "-86.7", "-86.6", "-86.5", + "-86.4", "-86.3", "-86.2", "-86.1", "-86.0", "-85.9", "-85.8", "-85.7", "-85.6", "-85.5", "-85.4", "-85.3", "-85.2", "-85.1", "-85.0", "-84.9", "-84.8", + "-84.7", "-84.6", "-84.5", "-84.4", "-84.3", "-84.2", "-84.1", "-84.0", "-83.9", "-83.8", "-83.7", "-83.6", "-83.5", "-83.4", "-83.3", "-83.2", "-83.1", + "-83.0", "-82.9", "-82.8", "-82.7", "-82.6", "-82.5", "-82.4", "-82.3", "-82.2", "-82.1", "-82.0", "-81.9", "-81.8", "-81.7", "-81.6", "-81.5", "-81.4", + "-81.3", "-81.2", "-81.1", "-81.0", "-80.9", "-80.8", "-80.7", "-80.6", "-80.5", "-80.4", "-80.3", "-80.2", "-80.1", "-80.0", "-79.9", "-79.8", "-79.7", + "-79.6", "-79.5", "-79.4", "-79.3", "-79.2", "-79.1", "-79.0", "-78.9", "-78.8", "-78.7", "-78.6", "-78.5", "-78.4", "-78.3", "-78.2", "-78.1", "-78.0", + "-77.9", "-77.8", "-77.7", "-77.6", "-77.5", "-77.4", "-77.3", "-77.2", "-77.1", "-77.0", "-76.9", "-76.8", "-76.7", "-76.6", "-76.5", "-76.4", "-76.3", + "-76.2", "-76.1", "-76.0", "-75.9", "-75.8", "-75.7", "-75.6", "-75.5", "-75.4", "-75.3", "-75.2", "-75.1", "-75.0", "-74.9", "-74.8", "-74.7", "-74.6", + "-74.5", "-74.4", "-74.3", "-74.2", "-74.1", "-74.0", "-73.9", "-73.8", "-73.7", "-73.6", "-73.5", "-73.4", "-73.3", "-73.2", "-73.1", "-73.0", "-72.9", + "-72.8", "-72.7", "-72.6", "-72.5", "-72.4", "-72.3", "-72.2", "-72.1", "-72.0", "-71.9", "-71.8", "-71.7", "-71.6", "-71.5", "-71.4", "-71.3", "-71.2", + "-71.1", "-71.0", "-70.9", "-70.8", "-70.7", "-70.6", "-70.5", "-70.4", "-70.3", "-70.2", "-70.1", "-70.0", "-69.9", "-69.8", "-69.7", "-69.6", "-69.5", + "-69.4", "-69.3", "-69.2", "-69.1", "-69.0", "-68.9", "-68.8", "-68.7", "-68.6", "-68.5", "-68.4", "-68.3", "-68.2", "-68.1", "-68.0", "-67.9", "-67.8", + "-67.7", "-67.6", "-67.5", "-67.4", "-67.3", "-67.2", "-67.1", "-67.0", "-66.9", "-66.8", "-66.7", "-66.6", "-66.5", "-66.4", "-66.3", "-66.2", "-66.1", + "-66.0", "-65.9", "-65.8", "-65.7", "-65.6", "-65.5", "-65.4", "-65.3", "-65.2", "-65.1", "-65.0", "-64.9", "-64.8", "-64.7", "-64.6", "-64.5", "-64.4", + "-64.3", "-64.2", "-64.1", "-64.0", "-63.9", "-63.8", "-63.7", "-63.6", "-63.5", "-63.4", "-63.3", "-63.2", "-63.1", "-63.0", "-62.9", "-62.8", "-62.7", + "-62.6", "-62.5", "-62.4", "-62.3", "-62.2", "-62.1", "-62.0", "-61.9", "-61.8", "-61.7", "-61.6", "-61.5", "-61.4", "-61.3", "-61.2", "-61.1", "-61.0", + "-60.9", "-60.8", "-60.7", "-60.6", "-60.5", "-60.4", "-60.3", "-60.2", "-60.1", "-60.0", "-59.9", "-59.8", "-59.7", "-59.6", "-59.5", "-59.4", "-59.3", + "-59.2", "-59.1", "-59.0", "-58.9", "-58.8", "-58.7", "-58.6", "-58.5", "-58.4", "-58.3", "-58.2", "-58.1", "-58.0", "-57.9", "-57.8", "-57.7", "-57.6", + "-57.5", "-57.4", "-57.3", "-57.2", "-57.1", "-57.0", "-56.9", "-56.8", "-56.7", "-56.6", "-56.5", "-56.4", "-56.3", "-56.2", "-56.1", "-56.0", "-55.9", + "-55.8", "-55.7", "-55.6", "-55.5", "-55.4", "-55.3", "-55.2", "-55.1", "-55.0", "-54.9", "-54.8", "-54.7", "-54.6", "-54.5", "-54.4", "-54.3", "-54.2", + "-54.1", "-54.0", "-53.9", "-53.8", "-53.7", "-53.6", "-53.5", "-53.4", "-53.3", "-53.2", "-53.1", "-53.0", "-52.9", "-52.8", "-52.7", "-52.6", "-52.5", + "-52.4", "-52.3", "-52.2", "-52.1", "-52.0", "-51.9", "-51.8", "-51.7", "-51.6", "-51.5", "-51.4", "-51.3", "-51.2", "-51.1", "-51.0", "-50.9", "-50.8", + "-50.7", "-50.6", "-50.5", "-50.4", "-50.3", "-50.2", "-50.1", "-50.0", "-49.9", "-49.8", "-49.7", "-49.6", "-49.5", "-49.4", "-49.3", "-49.2", "-49.1", + "-49.0", "-48.9", "-48.8", "-48.7", "-48.6", "-48.5", "-48.4", "-48.3", "-48.2", "-48.1", "-48.0", "-47.9", "-47.8", "-47.7", "-47.6", "-47.5", "-47.4", + "-47.3", "-47.2", "-47.1", "-47.0", "-46.9", "-46.8", "-46.7", "-46.6", "-46.5", "-46.4", "-46.3", "-46.2", "-46.1", "-46.0", "-45.9", "-45.8", "-45.7", + "-45.6", "-45.5", "-45.4", "-45.3", "-45.2", "-45.1", "-45.0", "-44.9", "-44.8", "-44.7", "-44.6", "-44.5", "-44.4", "-44.3", "-44.2", "-44.1", "-44.0", + "-43.9", "-43.8", "-43.7", "-43.6", "-43.5", "-43.4", "-43.3", "-43.2", "-43.1", "-43.0", "-42.9", "-42.8", "-42.7", "-42.6", "-42.5", "-42.4", "-42.3", + "-42.2", "-42.1", "-42.0", "-41.9", "-41.8", "-41.7", "-41.6", "-41.5", "-41.4", "-41.3", "-41.2", "-41.1", "-41.0", "-40.9", "-40.8", "-40.7", "-40.6", + "-40.5", "-40.4", "-40.3", "-40.2", "-40.1", "-40.0", "-39.9", "-39.8", "-39.7", "-39.6", "-39.5", "-39.4", "-39.3", "-39.2", "-39.1", "-39.0", "-38.9", + "-38.8", "-38.7", "-38.6", "-38.5", "-38.4", "-38.3", "-38.2", "-38.1", "-38.0", "-37.9", "-37.8", "-37.7", "-37.6", "-37.5", "-37.4", "-37.3", "-37.2", + "-37.1", "-37.0", "-36.9", "-36.8", "-36.7", "-36.6", "-36.5", "-36.4", "-36.3", "-36.2", "-36.1", "-36.0", "-35.9", "-35.8", "-35.7", "-35.6", "-35.5", + "-35.4", "-35.3", "-35.2", "-35.1", "-35.0", "-34.9", "-34.8", "-34.7", "-34.6", "-34.5", "-34.4", "-34.3", "-34.2", "-34.1", "-34.0", "-33.9", "-33.8", + "-33.7", "-33.6", "-33.5", "-33.4", "-33.3", "-33.2", "-33.1", "-33.0", "-32.9", "-32.8", "-32.7", "-32.6", "-32.5", "-32.4", "-32.3", "-32.2", "-32.1", + "-32.0", "-31.9", "-31.8", "-31.7", "-31.6", "-31.5", "-31.4", "-31.3", "-31.2", "-31.1", "-31.0", "-30.9", "-30.8", "-30.7", "-30.6", "-30.5", "-30.4", + "-30.3", "-30.2", "-30.1", "-30.0", "-29.9", "-29.8", "-29.7", "-29.6", "-29.5", "-29.4", "-29.3", "-29.2", "-29.1", "-29.0", "-28.9", "-28.8", "-28.7", + "-28.6", "-28.5", "-28.4", "-28.3", "-28.2", "-28.1", "-28.0", "-27.9", "-27.8", "-27.7", "-27.6", "-27.5", "-27.4", "-27.3", "-27.2", "-27.1", "-27.0", + "-26.9", "-26.8", "-26.7", "-26.6", "-26.5", "-26.4", "-26.3", "-26.2", "-26.1", "-26.0", "-25.9", "-25.8", "-25.7", "-25.6", "-25.5", "-25.4", "-25.3", + "-25.2", "-25.1", "-25.0", "-24.9", "-24.8", "-24.7", "-24.6", "-24.5", "-24.4", "-24.3", "-24.2", "-24.1", "-24.0", "-23.9", "-23.8", "-23.7", "-23.6", + "-23.5", "-23.4", "-23.3", "-23.2", "-23.1", "-23.0", "-22.9", "-22.8", "-22.7", "-22.6", "-22.5", "-22.4", "-22.3", "-22.2", "-22.1", "-22.0", "-21.9", + "-21.8", "-21.7", "-21.6", "-21.5", "-21.4", "-21.3", "-21.2", "-21.1", "-21.0", "-20.9", "-20.8", "-20.7", "-20.6", "-20.5", "-20.4", "-20.3", "-20.2", + "-20.1", "-20.0", "-19.9", "-19.8", "-19.7", "-19.6", "-19.5", "-19.4", "-19.3", "-19.2", "-19.1", "-19.0", "-18.9", "-18.8", "-18.7", "-18.6", "-18.5", + "-18.4", "-18.3", "-18.2", "-18.1", "-18.0", "-17.9", "-17.8", "-17.7", "-17.6", "-17.5", "-17.4", "-17.3", "-17.2", "-17.1", "-17.0", "-16.9", "-16.8", + "-16.7", "-16.6", "-16.5", "-16.4", "-16.3", "-16.2", "-16.1", "-16.0", "-15.9", "-15.8", "-15.7", "-15.6", "-15.5", "-15.4", "-15.3", "-15.2", "-15.1", + "-15.0", "-14.9", "-14.8", "-14.7", "-14.6", "-14.5", "-14.4", "-14.3", "-14.2", "-14.1", "-14.0", "-13.9", "-13.8", "-13.7", "-13.6", "-13.5", "-13.4", + "-13.3", "-13.2", "-13.1", "-13.0", "-12.9", "-12.8", "-12.7", "-12.6", "-12.5", "-12.4", "-12.3", "-12.2", "-12.1", "-12.0", "-11.9", "-11.8", "-11.7", + "-11.6", "-11.5", "-11.4", "-11.3", "-11.2", "-11.1", "-11.0", "-10.9", "-10.8", "-10.7", "-10.6", "-10.5", "-10.4", "-10.3", "-10.2", "-10.1", "-10.0", + "-9.9", "-9.8", "-9.7", "-9.6", "-9.5", "-9.4", "-9.3", "-9.2", "-9.1", "-9.0", "-8.9", "-8.8", "-8.7", "-8.6", "-8.5", "-8.4", "-8.3", "-8.2", "-8.1", + "-8.0", "-7.9", "-7.8", "-7.7", "-7.6", "-7.5", "-7.4", "-7.3", "-7.2", "-7.1", "-7.0", "-6.9", "-6.8", "-6.7", "-6.6", "-6.5", "-6.4", "-6.3", "-6.2", + "-6.1", "-6.0", "-5.9", "-5.8", "-5.7", "-5.6", "-5.5", "-5.4", "-5.3", "-5.2", "-5.1", "-5.0", "-4.9", "-4.8", "-4.7", "-4.6", "-4.5", "-4.4", "-4.3", + "-4.2", "-4.1", "-4.0", "-3.9", "-3.8", "-3.7", "-3.6", "-3.5", "-3.4", "-3.3", "-3.2", "-3.1", "-3.0", "-2.9", "-2.8", "-2.7", "-2.6", "-2.5", "-2.4", + "-2.3", "-2.2", "-2.1", "-2.0", "-1.9", "-1.8", "-1.7", "-1.6", "-1.5", "-1.4", "-1.3", "-1.2", "-1.1", "-1.0", "-0.9", "-0.8", "-0.7", "-0.6", "-0.5", + "-0.4", "-0.3", "-0.2", "-0.1", "0.0", "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8", "0.9", "1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", + "1.8", "1.9", "2.0", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8", "2.9", "3.0", "3.1", "3.2", "3.3", "3.4", "3.5", "3.6", "3.7", "3.8", "3.9", + "4.0", "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "4.7", "4.8", "4.9", "5.0", "5.1", "5.2", "5.3", "5.4", "5.5", "5.6", "5.7", "5.8", "5.9", "6.0", "6.1", + "6.2", "6.3", "6.4", "6.5", "6.6", "6.7", "6.8", "6.9", "7.0", "7.1", "7.2", "7.3", "7.4", "7.5", "7.6", "7.7", "7.8", "7.9", "8.0", "8.1", "8.2", "8.3", + "8.4", "8.5", "8.6", "8.7", "8.8", "8.9", "9.0", "9.1", "9.2", "9.3", "9.4", "9.5", "9.6", "9.7", "9.8", "9.9", "10.0", "10.1", "10.2", "10.3", "10.4", + "10.5", "10.6", "10.7", "10.8", "10.9", "11.0", "11.1", "11.2", "11.3", "11.4", "11.5", "11.6", "11.7", "11.8", "11.9", "12.0", "12.1", "12.2", "12.3", + "12.4", "12.5", "12.6", "12.7", "12.8", "12.9", "13.0", "13.1", "13.2", "13.3", "13.4", "13.5", "13.6", "13.7", "13.8", "13.9", "14.0", "14.1", "14.2", + "14.3", "14.4", "14.5", "14.6", "14.7", "14.8", "14.9", "15.0", "15.1", "15.2", "15.3", "15.4", "15.5", "15.6", "15.7", "15.8", "15.9", "16.0", "16.1", + "16.2", "16.3", "16.4", "16.5", "16.6", "16.7", "16.8", "16.9", "17.0", "17.1", "17.2", "17.3", "17.4", "17.5", "17.6", "17.7", "17.8", "17.9", "18.0", + "18.1", "18.2", "18.3", "18.4", "18.5", "18.6", "18.7", "18.8", "18.9", "19.0", "19.1", "19.2", "19.3", "19.4", "19.5", "19.6", "19.7", "19.8", "19.9", + "20.0", "20.1", "20.2", "20.3", "20.4", "20.5", "20.6", "20.7", "20.8", "20.9", "21.0", "21.1", "21.2", "21.3", "21.4", "21.5", "21.6", "21.7", "21.8", + "21.9", "22.0", "22.1", "22.2", "22.3", "22.4", "22.5", "22.6", "22.7", "22.8", "22.9", "23.0", "23.1", "23.2", "23.3", "23.4", "23.5", "23.6", "23.7", + "23.8", "23.9", "24.0", "24.1", "24.2", "24.3", "24.4", "24.5", "24.6", "24.7", "24.8", "24.9", "25.0", "25.1", "25.2", "25.3", "25.4", "25.5", "25.6", + "25.7", "25.8", "25.9", "26.0", "26.1", "26.2", "26.3", "26.4", "26.5", "26.6", "26.7", "26.8", "26.9", "27.0", "27.1", "27.2", "27.3", "27.4", "27.5", + "27.6", "27.7", "27.8", "27.9", "28.0", "28.1", "28.2", "28.3", "28.4", "28.5", "28.6", "28.7", "28.8", "28.9", "29.0", "29.1", "29.2", "29.3", "29.4", + "29.5", "29.6", "29.7", "29.8", "29.9", "30.0", "30.1", "30.2", "30.3", "30.4", "30.5", "30.6", "30.7", "30.8", "30.9", "31.0", "31.1", "31.2", "31.3", + "31.4", "31.5", "31.6", "31.7", "31.8", "31.9", "32.0", "32.1", "32.2", "32.3", "32.4", "32.5", "32.6", "32.7", "32.8", "32.9", "33.0", "33.1", "33.2", + "33.3", "33.4", "33.5", "33.6", "33.7", "33.8", "33.9", "34.0", "34.1", "34.2", "34.3", "34.4", "34.5", "34.6", "34.7", "34.8", "34.9", "35.0", "35.1", + "35.2", "35.3", "35.4", "35.5", "35.6", "35.7", "35.8", "35.9", "36.0", "36.1", "36.2", "36.3", "36.4", "36.5", "36.6", "36.7", "36.8", "36.9", "37.0", + "37.1", "37.2", "37.3", "37.4", "37.5", "37.6", "37.7", "37.8", "37.9", "38.0", "38.1", "38.2", "38.3", "38.4", "38.5", "38.6", "38.7", "38.8", "38.9", + "39.0", "39.1", "39.2", "39.3", "39.4", "39.5", "39.6", "39.7", "39.8", "39.9", "40.0", "40.1", "40.2", "40.3", "40.4", "40.5", "40.6", "40.7", "40.8", + "40.9", "41.0", "41.1", "41.2", "41.3", "41.4", "41.5", "41.6", "41.7", "41.8", "41.9", "42.0", "42.1", "42.2", "42.3", "42.4", "42.5", "42.6", "42.7", + "42.8", "42.9", "43.0", "43.1", "43.2", "43.3", "43.4", "43.5", "43.6", "43.7", "43.8", "43.9", "44.0", "44.1", "44.2", "44.3", "44.4", "44.5", "44.6", + "44.7", "44.8", "44.9", "45.0", "45.1", "45.2", "45.3", "45.4", "45.5", "45.6", "45.7", "45.8", "45.9", "46.0", "46.1", "46.2", "46.3", "46.4", "46.5", + "46.6", "46.7", "46.8", "46.9", "47.0", "47.1", "47.2", "47.3", "47.4", "47.5", "47.6", "47.7", "47.8", "47.9", "48.0", "48.1", "48.2", "48.3", "48.4", + "48.5", "48.6", "48.7", "48.8", "48.9", "49.0", "49.1", "49.2", "49.3", "49.4", "49.5", "49.6", "49.7", "49.8", "49.9", "50.0", "50.1", "50.2", "50.3", + "50.4", "50.5", "50.6", "50.7", "50.8", "50.9", "51.0", "51.1", "51.2", "51.3", "51.4", "51.5", "51.6", "51.7", "51.8", "51.9", "52.0", "52.1", "52.2", + "52.3", "52.4", "52.5", "52.6", "52.7", "52.8", "52.9", "53.0", "53.1", "53.2", "53.3", "53.4", "53.5", "53.6", "53.7", "53.8", "53.9", "54.0", "54.1", + "54.2", "54.3", "54.4", "54.5", "54.6", "54.7", "54.8", "54.9", "55.0", "55.1", "55.2", "55.3", "55.4", "55.5", "55.6", "55.7", "55.8", "55.9", "56.0", + "56.1", "56.2", "56.3", "56.4", "56.5", "56.6", "56.7", "56.8", "56.9", "57.0", "57.1", "57.2", "57.3", "57.4", "57.5", "57.6", "57.7", "57.8", "57.9", + "58.0", "58.1", "58.2", "58.3", "58.4", "58.5", "58.6", "58.7", "58.8", "58.9", "59.0", "59.1", "59.2", "59.3", "59.4", "59.5", "59.6", "59.7", "59.8", + "59.9", "60.0", "60.1", "60.2", "60.3", "60.4", "60.5", "60.6", "60.7", "60.8", "60.9", "61.0", "61.1", "61.2", "61.3", "61.4", "61.5", "61.6", "61.7", + "61.8", "61.9", "62.0", "62.1", "62.2", "62.3", "62.4", "62.5", "62.6", "62.7", "62.8", "62.9", "63.0", "63.1", "63.2", "63.3", "63.4", "63.5", "63.6", + "63.7", "63.8", "63.9", "64.0", "64.1", "64.2", "64.3", "64.4", "64.5", "64.6", "64.7", "64.8", "64.9", "65.0", "65.1", "65.2", "65.3", "65.4", "65.5", + "65.6", "65.7", "65.8", "65.9", "66.0", "66.1", "66.2", "66.3", "66.4", "66.5", "66.6", "66.7", "66.8", "66.9", "67.0", "67.1", "67.2", "67.3", "67.4", + "67.5", "67.6", "67.7", "67.8", "67.9", "68.0", "68.1", "68.2", "68.3", "68.4", "68.5", "68.6", "68.7", "68.8", "68.9", "69.0", "69.1", "69.2", "69.3", + "69.4", "69.5", "69.6", "69.7", "69.8", "69.9", "70.0", "70.1", "70.2", "70.3", "70.4", "70.5", "70.6", "70.7", "70.8", "70.9", "71.0", "71.1", "71.2", + "71.3", "71.4", "71.5", "71.6", "71.7", "71.8", "71.9", "72.0", "72.1", "72.2", "72.3", "72.4", "72.5", "72.6", "72.7", "72.8", "72.9", "73.0", "73.1", + "73.2", "73.3", "73.4", "73.5", "73.6", "73.7", "73.8", "73.9", "74.0", "74.1", "74.2", "74.3", "74.4", "74.5", "74.6", "74.7", "74.8", "74.9", "75.0", + "75.1", "75.2", "75.3", "75.4", "75.5", "75.6", "75.7", "75.8", "75.9", "76.0", "76.1", "76.2", "76.3", "76.4", "76.5", "76.6", "76.7", "76.8", "76.9", + "77.0", "77.1", "77.2", "77.3", "77.4", "77.5", "77.6", "77.7", "77.8", "77.9", "78.0", "78.1", "78.2", "78.3", "78.4", "78.5", "78.6", "78.7", "78.8", + "78.9", "79.0", "79.1", "79.2", "79.3", "79.4", "79.5", "79.6", "79.7", "79.8", "79.9", "80.0", "80.1", "80.2", "80.3", "80.4", "80.5", "80.6", "80.7", + "80.8", "80.9", "81.0", "81.1", "81.2", "81.3", "81.4", "81.5", "81.6", "81.7", "81.8", "81.9", "82.0", "82.1", "82.2", "82.3", "82.4", "82.5", "82.6", + "82.7", "82.8", "82.9", "83.0", "83.1", "83.2", "83.3", "83.4", "83.5", "83.6", "83.7", "83.8", "83.9", "84.0", "84.1", "84.2", "84.3", "84.4", "84.5", + "84.6", "84.7", "84.8", "84.9", "85.0", "85.1", "85.2", "85.3", "85.4", "85.5", "85.6", "85.7", "85.8", "85.9", "86.0", "86.1", "86.2", "86.3", "86.4", + "86.5", "86.6", "86.7", "86.8", "86.9", "87.0", "87.1", "87.2", "87.3", "87.4", "87.5", "87.6", "87.7", "87.8", "87.9", "88.0", "88.1", "88.2", "88.3", + "88.4", "88.5", "88.6", "88.7", "88.8", "88.9", "89.0", "89.1", "89.2", "89.3", "89.4", "89.5", "89.6", "89.7", "89.8", "89.9", "90.0", "90.1", "90.2", + "90.3", "90.4", "90.5", "90.6", "90.7", "90.8", "90.9", "91.0", "91.1", "91.2", "91.3", "91.4", "91.5", "91.6", "91.7", "91.8", "91.9", "92.0", "92.1", + "92.2", "92.3", "92.4", "92.5", "92.6", "92.7", "92.8", "92.9", "93.0", "93.1", "93.2", "93.3", "93.4", "93.5", "93.6", "93.7", "93.8", "93.9", "94.0", + "94.1", "94.2", "94.3", "94.4", "94.5", "94.6", "94.7", "94.8", "94.9", "95.0", "95.1", "95.2", "95.3", "95.4", "95.5", "95.6", "95.7", "95.8", "95.9", + "96.0", "96.1", "96.2", "96.3", "96.4", "96.5", "96.6", "96.7", "96.8", "96.9", "97.0", "97.1", "97.2", "97.3", "97.4", "97.5", "97.6", "97.7", "97.8", + "97.9", "98.0", "98.1", "98.2", "98.3", "98.4", "98.5", "98.6", "98.7", "98.8", "98.9", "99.0", "99.1", "99.2", "99.3", "99.4", "99.5", "99.6", "99.7", + "99.8", "99.9" }; +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_faridtmammadov.java b/src/main/java/dev/morling/onebrc/CalculateAverage_faridtmammadov.java new file mode 100644 index 000000000..f4b920bb0 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_faridtmammadov.java @@ -0,0 +1,203 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.stream.Collectors; + +public class CalculateAverage_faridtmammadov { + private static final String FILE = "./measurements.txt"; + + public static void main(String[] args) throws IOException { + int availableProcessors = Runtime.getRuntime().availableProcessors(); + + var map = getSegments(availableProcessors).stream() + .map(CalculateAverage_faridtmammadov::aggregate).parallel() + .flatMap(f -> f.entrySet().stream()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, Aggregate::update, TreeMap::new)); + + printFormatted(map); + } + + private static List getSegments(int numberOfChunks) throws IOException { + try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { + var fileSize = fileChannel.size(); + var segmentSize = fileSize / numberOfChunks; + var segment = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()); + var baseAddress = segment.address(); + var endAddress = baseAddress + fileSize; + var segments = new ArrayList(); + var startAddress = baseAddress; + + for (var i = 0; i < numberOfChunks; i++) { + var pointer = startAddress + segmentSize; + while (pointer < endAddress) { + long offset = pointer - baseAddress; + byte b = segment.get(ValueLayout.JAVA_BYTE, offset); + if (b == '\n') { + break; + } + pointer++; + } + if (pointer >= endAddress) { + var offsetStart = startAddress - baseAddress; + var offsetEnd = endAddress - baseAddress - offsetStart; + segments.add(segment.asSlice(offsetStart, offsetEnd)); + break; + } + var offsetStart = startAddress - baseAddress; + var offsetEnd = pointer - baseAddress - offsetStart; + segments.add(segment.asSlice(offsetStart, offsetEnd)); + startAddress = pointer + 1; + } + + return segments; + } + } + + private static Map aggregate(MemorySegment segment) { + var map = new HashMap(); + var iterator = new MemorySegmentIterator(segment); + + while (iterator.hasNext()) { + String city = parseCity(iterator); + long temperature = parseTemperature(iterator); + + map.compute(city, (key, value) -> { + if (value == null) { + return new Aggregate(temperature); + } + else { + return value.update(temperature); + } + }); + } + + return map; + } + + private static String parseCity(MemorySegmentIterator iterator) { + var byteStream = new ByteArrayOutputStream(); + while (iterator.hasNext()) { + var b = iterator.getNextByte(); + if (b == ';') { + return byteStream.toString(StandardCharsets.UTF_8); + } + byteStream.write(b); + } + + return null; + } + + public static long parseTemperature(MemorySegmentIterator iterator) { + long value = 0L; + int sign = 1; + while (iterator.hasNext()) { + byte b = iterator.getNextByte(); + if (b >= '0' && b <= '9') { + value = value * 10 + b - '0'; + } + else if (b == '\n') { + return value * sign; + } + else if (b == '-') { + sign = -1; + } + } + + return value * sign; + } + + private static void printFormatted(Map map) { + var iterator = map.entrySet().iterator(); + var length = map.entrySet().size(); + System.out.print("{"); + for (int i = 0; i < length - 1; i++) { + var entry = iterator.next(); + System.out.printf("%s=%s, ", entry.getKey(), entry.getValue().toString()); + } + var lastEntry = iterator.next(); + System.out.printf("%s=%s}\n", lastEntry.getKey(), lastEntry.getValue().toString()); + } + + static class Aggregate { + long min; + long max; + long sum; + int count; + + public Aggregate(long temperature) { + min = temperature; + max = temperature; + sum = temperature; + count = 1; + } + + public Aggregate update(long temp) { + min = Math.min(min, temp); + max = Math.max(max, temp); + sum += temp; + count++; + return this; + } + + public Aggregate update(Aggregate agg) { + min = Math.min(min, agg.min); + max = Math.max(max, agg.max); + sum += agg.sum; + count += agg.count; + return this; + } + + public String toString() { + return String.format("%s/%s/%s", min / 10.0f, Math.round(sum * 1.0f / count) / 10.0f, max / 10.0f); + } + } + + static class MemorySegmentIterator { + private long offset; + private final MemorySegment segment; + private final long segmentSize; + + public MemorySegmentIterator(MemorySegment segment) { + this.segment = segment; + this.segmentSize = segment.byteSize(); + } + + public boolean hasNext() { + return offset < segmentSize; + } + + public byte getNextByte() { + var b = segment.get(ValueLayout.JAVA_BYTE, offset); + offset++; + return b; + } + } +} \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_felix19350.java b/src/main/java/dev/morling/onebrc/CalculateAverage_felix19350.java index c54976d58..8c047b72a 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_felix19350.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_felix19350.java @@ -16,17 +16,16 @@ package dev.morling.onebrc; import java.io.IOException; -import java.io.RandomAccessFile; import java.lang.foreign.Arena; -import java.lang.foreign.MemorySegment; import java.lang.foreign.ValueLayout; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.TreeMap; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executors; import java.util.stream.Collectors; @@ -36,6 +35,55 @@ public class CalculateAverage_felix19350 { private static final String FILE = "./measurements.txt"; private static final int NEW_LINE_SEEK_BUFFER_LEN = 128; + private static final int EXPECTED_MAX_NUM_CITIES = 15_000; // 10K cities + a buffer no to trigger the load factor + + private static class CityRef { + + final int length; + final int fingerprint; + final byte[] stringBytes; + + public CityRef(ByteBuffer byteBuffer, int startIdx, int length, int fingerprint) { + this.length = length; + this.stringBytes = new byte[length]; + byteBuffer.get(startIdx, this.stringBytes, 0, this.stringBytes.length); + this.fingerprint = fingerprint; + } + + public String cityName() { + return new String(stringBytes, StandardCharsets.UTF_8); + } + + @Override + public int hashCode() { + return fingerprint; + } + + @Override + public boolean equals(Object other) { + if (other instanceof CityRef otherRef) { + if (fingerprint != otherRef.fingerprint) { + return false; + } + + if (this.length != otherRef.length) { + return false; + } + + for (var i = 0; i < this.length; i++) { + if (this.stringBytes[i] != otherRef.stringBytes[i]) { + return false; + } + } + return true; + } + else { + return false; + } + } + + } + private static class ResultRow { private int min; @@ -73,95 +121,104 @@ public void mergeResult(ResultRow value) { } } - private record AverageAggregatorTask(MemorySegment memSegment) { + private record AverageAggregatorTask(ByteBuffer byteBuffer) { + private static final int HASH_FACTOR = 31; // Mersenne prime - public static Stream createStreamOf(List memorySegments) { - return memorySegments.stream().map(AverageAggregatorTask::new); + + public static Stream createStreamOf(List byteBuffers) { + return byteBuffers.stream().map(AverageAggregatorTask::new); } - public Map processChunk() { - final var result = new TreeMap(); - var offset = 0L; - var lineStart = 0L; - while (offset < memSegment.byteSize()) { - byte nextByte = memSegment.get(ValueLayout.OfByte.JAVA_BYTE, offset); - if ((char) nextByte == '\n') { - this.processLine(result, memSegment.asSlice(lineStart, (offset - lineStart)).asByteBuffer()); - lineStart = offset + ValueLayout.JAVA_BYTE.byteSize(); - } - offset += ValueLayout.OfByte.JAVA_BYTE.byteSize(); + public Map processChunk() { + final var measurements = new HashMap(EXPECTED_MAX_NUM_CITIES); + var lineStart = 0; + // process line by line playing with the fact that a line is no longer than 106 bytes + // 100 bytes for city name + 1 byte for separator + 1 bytes for negative sign + 4 bytes for number + while (lineStart < byteBuffer.limit()) { + lineStart = this.processLine(measurements, byteBuffer, lineStart); } - - return result; + return measurements; } - private void processLine(Map result, ByteBuffer lineBytes) { + private int processLine(Map measurements, ByteBuffer byteBuffer, int start) { + var fingerPrint = 0; var separatorIdx = -1; - for (int i = 0; i < lineBytes.limit(); i++) { - if ((char) lineBytes.get() == ';') { - separatorIdx = i; - lineBytes.clear(); - break; + var sign = 1; + var value = 0; + var lineEnd = -1; + // Lines are processed in two stages: + // 1 - prior do the city name separator + // 2 - after the separator + // this ensures less if clauses + + // stage 1 loop + { + for (int i = 0; i < NEW_LINE_SEEK_BUFFER_LEN; i++) { + final var currentByte = byteBuffer.get(start + i); + if (currentByte == ';') { + separatorIdx = i; + break; + } else { + fingerPrint = HASH_FACTOR * fingerPrint + currentByte; + } } } - assert (separatorIdx > 0); - var valueCapacity = lineBytes.capacity() - (separatorIdx + 1); - var cityBytes = new byte[separatorIdx]; - var valueBytes = new byte[valueCapacity]; - lineBytes.get(cityBytes, 0, separatorIdx); - lineBytes.get(separatorIdx + 1, valueBytes); + // stage 2 loop: + { + for (int i = separatorIdx + 1; i < NEW_LINE_SEEK_BUFFER_LEN; i++) { + final var currentByte = byteBuffer.get(start + i); + switch (currentByte) { + case '-': + sign = -1; + break; + case '.': + break; + case '\n': + lineEnd = start + i + 1; + break; + default: + // only digits are expected here + value = value * 10 + (currentByte - '0'); + } + + if (lineEnd != -1) { + break; + } + } + } - var city = new String(cityBytes, StandardCharsets.UTF_8); - var value = parseInt(valueBytes); + assert (separatorIdx > 0); + final var cityRef = new CityRef(byteBuffer, start, separatorIdx,fingerPrint); + value = sign * value; - var latestValue = result.get(city); - if (latestValue != null) { - latestValue.mergeValue(value); + final var existingMeasurement = measurements.get(cityRef); + if (existingMeasurement == null) { + measurements.put(cityRef, new ResultRow(value)); } else { - result.put(city, new ResultRow(value)); + existingMeasurement.mergeValue(value); } - } - private static int parseInt(byte[] valueBytes) { - int multiplier = 1; - int digitValue = 0; - var numDigits = valueBytes.length-1; // there is always one decimal place - var ds = new int[]{1,10,100}; - - for (byte valueByte : valueBytes) { - switch ((char) valueByte) { - case '-': - multiplier = -1; - numDigits -= 1; - break; - case '.': - break; - default: - digitValue += ((int) valueByte - 48) * (ds[numDigits - 1]); - numDigits -= 1; - break;// TODO continue here - } - } - return multiplier*digitValue; + return lineEnd; //to account for the line end } } public static void main(String[] args) throws IOException { // memory map the files and divide by number of cores - var numProcessors = Runtime.getRuntime().availableProcessors(); - var memorySegments = calculateMemorySegments(numProcessors); - var tasks = AverageAggregatorTask.createStreamOf(memorySegments); - assert (memorySegments.size() == numProcessors); + final var numProcessors = Runtime.getRuntime().availableProcessors(); + final var byteBuffers = calculateMemorySegments(numProcessors); + final var tasks = AverageAggregatorTask.createStreamOf(byteBuffers); + assert (byteBuffers.size() <= numProcessors); + assert (!byteBuffers.isEmpty()); try (var pool = Executors.newFixedThreadPool(numProcessors)) { - var results = tasks + final Map aggregatedCities = tasks .parallel() .map(task -> CompletableFuture.supplyAsync(task::processChunk, pool)) .map(CompletableFuture::join) - .reduce(new TreeMap<>(), (partialMap, accumulator) -> { - partialMap.forEach((key, value) -> { - var prev = accumulator.get(key); + .reduce(new HashMap<>(EXPECTED_MAX_NUM_CITIES), (currentMap, accumulator) -> { + currentMap.forEach((key, value) -> { + final var prev = accumulator.get(key); if (prev == null) { accumulator.put(key, value); } @@ -172,6 +229,9 @@ public static void main(String[] args) throws IOException { return accumulator; }); + var results = new HashMap(EXPECTED_MAX_NUM_CITIES); + aggregatedCities.forEach((key, value) -> results.put(key.cityName(), value)); + System.out.print("{"); String output = results.keySet() .stream() @@ -183,16 +243,16 @@ public static void main(String[] args) throws IOException { } } - private static List calculateMemorySegments(int numChunks) throws IOException { - try (RandomAccessFile raf = new RandomAccessFile(FILE, "r")) { - var result = new ArrayList(numChunks); - var chunks = new ArrayList(numChunks); + private static List calculateMemorySegments(int numChunks) throws IOException { + try (FileChannel fc = FileChannel.open(Paths.get(FILE))) { + var memMappedFile = fc.map(FileChannel.MapMode.READ_ONLY, 0L, fc.size(), Arena.ofAuto()); + var result = new ArrayList(numChunks); - var fileSize = raf.length(); - var chunkSize = fileSize / numChunks; + var fileSize = fc.size(); + var chunkSize = fileSize / numChunks; // TODO: if chunksize > MAX INT we will need to adjust + var previousChunkEnd = 0L; for (int i = 0; i < numChunks; i++) { - var previousChunkEnd = i == 0 ? 0L : chunks.get(i - 1)[1]; if (previousChunkEnd >= fileSize) { // There is a scenario for very small files where the number of chunks may be greater than // the number of lines. @@ -205,31 +265,27 @@ private static List calculateMemorySegments(int numChunks) throws } else { // all other chunks are end at a new line (\n) - var theoreticalEnd = previousChunkEnd + chunkSize; - var buffer = new byte[NEW_LINE_SEEK_BUFFER_LEN]; - raf.seek(theoreticalEnd); - raf.read(buffer, 0, NEW_LINE_SEEK_BUFFER_LEN); - + var theoreticalEnd = Math.min(previousChunkEnd + chunkSize, fileSize); var newLineOffset = 0; - for (byte b : buffer) { + for (int j = 0; j < NEW_LINE_SEEK_BUFFER_LEN; j++) { + var candidateOffset = theoreticalEnd + j; + if (candidateOffset >= fileSize) { + break; + } + byte b = memMappedFile.get(ValueLayout.OfByte.JAVA_BYTE, candidateOffset); newLineOffset += 1; if ((char) b == '\n') { break; } } chunk[1] = Math.min(fileSize, theoreticalEnd + newLineOffset); + previousChunkEnd = chunk[1]; } - assert (chunk[0] >= 0L); - assert (chunk[0] <= fileSize); assert (chunk[1] > chunk[0]); assert (chunk[1] <= fileSize); - var memMappedFile = raf.getChannel() - .map(FileChannel.MapMode.READ_ONLY, chunk[0], (chunk[1] - chunk[0]), Arena.ofAuto()); - memMappedFile.load(); - chunks.add(chunk); - result.add(memMappedFile); + result.add(memMappedFile.asSlice(chunk[0], (chunk[1] - chunk[0])).asByteBuffer()); } return result; } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_flippingbits.java b/src/main/java/dev/morling/onebrc/CalculateAverage_flippingbits.java index 2510d8526..3489877f6 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_flippingbits.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_flippingbits.java @@ -18,8 +18,13 @@ import jdk.incubator.vector.ShortVector; import jdk.incubator.vector.VectorOperators; +import sun.misc.Unsafe; +import java.lang.foreign.Arena; +import java.lang.reflect.Field; + import java.io.IOException; import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; import java.util.*; @@ -34,14 +39,31 @@ public class CalculateAverage_flippingbits { private static final String FILE = "./measurements.txt"; - private static final long CHUNK_SIZE = 10 * 1024 * 1024; // 10 MB + private static final long MINIMUM_FILE_SIZE_PARTITIONING = 10 * 1024 * 1024; // 10 MB private static final int SIMD_LANE_LENGTH = ShortVector.SPECIES_MAX.length(); - private static final int MAX_STATION_NAME_LENGTH = 100; + private static final int NUM_STATIONS = 10_000; + + private static final int HASH_MAP_OFFSET_CAPACITY = 200_000; + + private static final Unsafe UNSAFE = initUnsafe(); + + private static int HASH_PRIME_NUMBER = 31; + + private static Unsafe initUnsafe() { + try { + Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + return (Unsafe) theUnsafe.get(Unsafe.class); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } public static void main(String[] args) throws IOException { - var result = Arrays.asList(getSegments()).stream() + var result = Arrays.asList(getSegments()).parallelStream() .map(segment -> { try { return processSegment(segment[0], segment[1]); @@ -50,126 +72,137 @@ public static void main(String[] args) throws IOException { throw new RuntimeException(e); } }) - .parallel() - .reduce((firstMap, secondMap) -> { - for (var entry : secondMap.entrySet()) { - PartitionAggregate firstAggregate = firstMap.get(entry.getKey()); - if (firstAggregate == null) { - firstMap.put(entry.getKey(), entry.getValue()); - } - else { - firstAggregate.mergeWith(entry.getValue()); - } - } - return firstMap; - }) - .map(TreeMap::new).get(); + .reduce(FasterHashMap::mergeWith) + .get(); + + var sortedMap = new TreeMap(); + for (Station station : result.getEntries()) { + sortedMap.put(station.getName(), station); + } - System.out.println(result); + System.out.println(sortedMap); } private static long[][] getSegments() throws IOException { try (var file = new RandomAccessFile(FILE, "r")) { - var fileSize = file.length(); + var channel = file.getChannel(); + + var fileSize = channel.size(); + var startAddress = channel + .map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()) + .address(); + // Split file into segments, so we can work around the size limitation of channels - var numSegments = (int) (fileSize / CHUNK_SIZE); + var numSegments = (fileSize > MINIMUM_FILE_SIZE_PARTITIONING) + ? Runtime.getRuntime().availableProcessors() + : 1; + var segmentSize = fileSize / numSegments; - var boundaries = new long[numSegments + 1][2]; - var endPointer = 0L; + var boundaries = new long[numSegments][2]; + var endPointer = startAddress; - for (var i = 0; i < numSegments; i++) { + for (var i = 0; i < numSegments - 1; i++) { // Start of segment - boundaries[i][0] = Math.min(Math.max(endPointer, i * CHUNK_SIZE), fileSize); - - // Seek end of segment, limited by the end of the file - file.seek(Math.min(boundaries[i][0] + CHUNK_SIZE - 1, fileSize)); + boundaries[i][0] = endPointer; // Extend segment until end of line or file - while (file.read() != '\n') { + endPointer = endPointer + segmentSize; + while (UNSAFE.getByte(endPointer) != '\n') { + endPointer++; } // End of segment - endPointer = file.getFilePointer(); - boundaries[i][1] = endPointer; + boundaries[i][1] = endPointer++; } - boundaries[numSegments][0] = Math.max(endPointer, numSegments * CHUNK_SIZE); - boundaries[numSegments][1] = fileSize; + boundaries[numSegments - 1][0] = endPointer; + boundaries[numSegments - 1][1] = startAddress + fileSize; return boundaries; } } - private static Map processSegment(long startOfSegment, long endOfSegment) - throws IOException { - Map stationAggregates = new HashMap<>(50_000); - var byteChunk = new byte[(int) (endOfSegment - startOfSegment)]; - var stationBuffer = new byte[MAX_STATION_NAME_LENGTH]; - try (var file = new RandomAccessFile(FILE, "r")) { - file.seek(startOfSegment); - file.read(byteChunk); - var i = 0; - while (i < byteChunk.length) { - // Station name has at least one byte - stationBuffer[0] = byteChunk[i]; - i++; - // Read station name - var j = 1; - while (byteChunk[i] != ';') { - stationBuffer[j] = byteChunk[i]; - j++; - i++; - } - var station = new String(stationBuffer, 0, j, StandardCharsets.UTF_8); + private static FasterHashMap processSegment(long startOfSegment, long endOfSegment) throws IOException { + var fasterHashMap = new FasterHashMap(); + for (var i = startOfSegment; i < endOfSegment; i += 3) { + // Read station name + int nameHash = UNSAFE.getByte(i); + final var nameStartAddress = i++; + var character = UNSAFE.getByte(i); + while (character != ';') { + nameHash = nameHash * HASH_PRIME_NUMBER + character; i++; + character = UNSAFE.getByte(i); + } + var nameLength = (int) (i - nameStartAddress); + i++; - // Read measurement - var isNegative = byteChunk[i] == '-'; - var measurement = 0; - if (isNegative) { + // Read measurement + var isNegative = UNSAFE.getByte(i) == '-'; + var measurement = 0; + if (isNegative) { + i++; + character = UNSAFE.getByte(i); + while (character != '.') { + measurement = measurement * 10 + character - '0'; i++; - while (byteChunk[i] != '.') { - measurement = measurement * 10 + byteChunk[i] - '0'; - i++; - } - measurement = (measurement * 10 + byteChunk[i + 1] - '0') * -1; + character = UNSAFE.getByte(i); } - else { - while (byteChunk[i] != '.') { - measurement = measurement * 10 + byteChunk[i] - '0'; - i++; - } - measurement = measurement * 10 + byteChunk[i + 1] - '0'; + measurement = (measurement * 10 + UNSAFE.getByte(i + 1) - '0') * -1; + } + else { + character = UNSAFE.getByte(i); + while (character != '.') { + measurement = measurement * 10 + character - '0'; + i++; + character = UNSAFE.getByte(i); } - - // Update aggregate - var aggregate = stationAggregates.computeIfAbsent(station, x -> new PartitionAggregate()); - aggregate.addMeasurementAndComputeAggregate((short) measurement); - i += 3; + measurement = measurement * 10 + UNSAFE.getByte(i + 1) - '0'; } - stationAggregates.values().forEach(PartitionAggregate::aggregateRemainingMeasurements); + + fasterHashMap.addEntry(nameHash, nameLength, nameStartAddress, (short) measurement); + } + + for (Station station : fasterHashMap.getEntries()) { + station.aggregateRemainingMeasurements(); } - return stationAggregates; + return fasterHashMap; } - private static class PartitionAggregate { - final short[] doubleLane = new short[SIMD_LANE_LENGTH * 2]; + private static class Station { + final short[] measurements = new short[SIMD_LANE_LENGTH * 2]; // Assume that we do not have more than Integer.MAX_VALUE measurements for the same station per partition - int count = 0; + int count = 1; long sum = 0; short min = Short.MAX_VALUE; short max = Short.MIN_VALUE; + final long nameAddress; + final int nameLength; + final int nameHash; + + public Station(int nameHash, int nameLength, long nameAddress, short measurement) { + this.nameHash = nameHash; + this.nameLength = nameLength; + this.nameAddress = nameAddress; + measurements[0] = measurement; + } + + public String getName() { + byte[] name = new byte[nameLength]; + UNSAFE.copyMemory(null, nameAddress, name, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength); + return new String(name, StandardCharsets.UTF_8); + } public void addMeasurementAndComputeAggregate(short measurement) { // Add measurement to buffer, which is later processed by SIMD instructions - doubleLane[count % doubleLane.length] = measurement; + measurements[count % measurements.length] = measurement; count++; // Once lane is full, use SIMD instructions to calculate aggregates - if (count % doubleLane.length == 0) { - var firstVector = ShortVector.fromArray(ShortVector.SPECIES_MAX, doubleLane, 0); - var secondVector = ShortVector.fromArray(ShortVector.SPECIES_MAX, doubleLane, SIMD_LANE_LENGTH); + if (count % measurements.length == 0) { + var firstVector = ShortVector.fromArray(ShortVector.SPECIES_MAX, measurements, 0); + var secondVector = ShortVector.fromArray(ShortVector.SPECIES_MAX, measurements, SIMD_LANE_LENGTH); var simdMin = firstVector.min(secondVector).reduceLanes(VectorOperators.MIN); min = (short) Math.min(min, simdMin); @@ -182,19 +215,35 @@ public void addMeasurementAndComputeAggregate(short measurement) { } public void aggregateRemainingMeasurements() { - for (var i = 0; i < count % doubleLane.length; i++) { - var measurement = doubleLane[i]; + for (var i = 0; i < count % measurements.length; i++) { + var measurement = measurements[i]; min = (short) Math.min(min, measurement); max = (short) Math.max(max, measurement); sum += measurement; } } - public void mergeWith(PartitionAggregate otherAggregate) { - min = (short) Math.min(min, otherAggregate.min); - max = (short) Math.max(max, otherAggregate.max); - count = count + otherAggregate.count; - sum = sum + otherAggregate.sum; + public void mergeWith(Station otherStation) { + min = (short) Math.min(min, otherStation.min); + max = (short) Math.max(max, otherStation.max); + count = count + otherStation.count; + sum = sum + otherStation.sum; + } + + public boolean nameEquals(long otherNameAddress) { + var swarLimit = (nameLength / Long.BYTES) * Long.BYTES; + var i = 0; + for (; i < swarLimit; i += Long.BYTES) { + if (UNSAFE.getLong(nameAddress + i) != UNSAFE.getLong(otherNameAddress + i)) { + return false; + } + } + for (; i < nameLength; i++) { + if (UNSAFE.getByte(nameAddress + i) != UNSAFE.getByte(otherNameAddress + i)) { + return false; + } + } + return true; } public String toString() { @@ -206,4 +255,67 @@ public String toString() { (max / 10.0)); } } + + /** + * Use two arrays for implementing the hash map: + * - The array `entries` holds the map values, in our case instances of the class Station. + * - The array `offsets` maps hashes of the keys to indexes in the `entries` array. + * + * We create `offsets` with a much larger capacity than `entries`, so we minimize collisions. + */ + private static class FasterHashMap { + // Using 16-bit integers (shorts) for offsets supports up to 2^15 (=32,767) entries + // If you need to store more entries, consider replacing short with int + short[] offsets = new short[HASH_MAP_OFFSET_CAPACITY]; + Station[] entries = new Station[NUM_STATIONS + 1]; + int slotsInUse = 0; + + private int getOffsetIdx(int nameHash, int nameLength, long nameAddress) { + var offsetIdx = nameHash & (offsets.length - 1); + var offset = offsets[offsetIdx]; + + while (offset != 0 && + (nameLength != entries[offset].nameLength || !entries[offset].nameEquals(nameAddress))) { + offsetIdx = (offsetIdx + 1) % offsets.length; + offset = offsets[offsetIdx]; + } + + return offsetIdx; + } + + public void addEntry(int nameHash, int nameLength, long nameAddress, short measurement) { + var offsetIdx = getOffsetIdx(nameHash, nameLength, nameAddress); + var offset = offsets[offsetIdx]; + + if (offset == 0) { + slotsInUse++; + entries[slotsInUse] = new Station(nameHash, nameLength, nameAddress, measurement); + offsets[offsetIdx] = (short) slotsInUse; + } + else { + entries[offset].addMeasurementAndComputeAggregate(measurement); + } + } + + public FasterHashMap mergeWith(FasterHashMap otherMap) { + for (Station station : otherMap.getEntries()) { + var offsetIdx = getOffsetIdx(station.nameHash, station.nameLength, station.nameAddress); + var offset = offsets[offsetIdx]; + + if (offset == 0) { + slotsInUse++; + entries[slotsInUse] = station; + offsets[offsetIdx] = (short) slotsInUse; + } + else { + entries[offset].mergeWith(station); + } + } + return this; + } + + public List getEntries() { + return Arrays.asList(entries).subList(1, slotsInUse + 1); + } + } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_gabrielfoo.java b/src/main/java/dev/morling/onebrc/CalculateAverage_gabrielfoo.java new file mode 100644 index 000000000..35e8bb36f --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_gabrielfoo.java @@ -0,0 +1,180 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.RandomAccessFile; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.TreeMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadFactory; + +public class CalculateAverage_gabrielfoo { + private static final String FILE = "./measurements.txt"; + private static final int UTF8_MAX_LEN_100_BYTES = 400; + private static final int DOUBLE_DIGITS_MAX = 3; + private static final int UNIQUE_STATION_NAMES = 10000; + + private static class ResultRow { + private double min = Double.POSITIVE_INFINITY; + private double sum = 0.0; + private double max = Double.NEGATIVE_INFINITY; + private int count = 0; + + public String toString() { + return min + "/" + (Math.round(sum / count) / 10.0) + "/" + max; + } + + public void updateMinMax(double incoming) { + min = Math.min(min, incoming); + max = Math.max(max, incoming); + sum += incoming * 10.0; + count += 1; + } + + public void combine(ResultRow other) { + min = Math.min(min, other.min); + max = Math.max(max, other.max); + sum += other.sum; + count += other.count; + } + } + + public static MappedByteBuffer[] mapFileToMemory(final RandomAccessFile file, final int chunkCount) throws Exception { + FileChannel channel = file.getChannel(); + final long chunkSize = Math.ceilDiv(file.length(), chunkCount); + + MappedByteBuffer buffers[] = new MappedByteBuffer[chunkCount]; + + long position = 0; + for (int i = 0; i < chunkCount - 1; ++i) { + file.seek(position + chunkSize); + long ptr = file.getFilePointer(); + + while (file.readByte() != '\n') { + file.seek(++ptr); + } + + buffers[i] = channel.map(FileChannel.MapMode.READ_ONLY, position, ptr - position + 1); + + position = ptr + 1; + } + + buffers[buffers.length - 1] = channel.map(FileChannel.MapMode.READ_ONLY, position, file.length() - position); + + return buffers; + } + + public static void main(String[] args) throws Exception { + final RandomAccessFile file = new RandomAccessFile(FILE, "r"); + final int coreCount = file.length() < 2147483647 ? 1 : Runtime.getRuntime().availableProcessors(); + ArrayList> maps = new ArrayList<>(); + + final ThreadFactory threadFactory = new ThreadFactory() { + public Thread newThread(Runnable r) { + Thread t = new Thread(r); + t.setPriority(Thread.MAX_PRIORITY); + return t; + } + }; + ExecutorService executor = Executors.newFixedThreadPool(coreCount, threadFactory); + + Future initFuture = executor.submit(() -> { + for (int i = 0; i < coreCount; ++i) { + maps.add(new HashMap<>(UNIQUE_STATION_NAMES, 0.9f)); + } + }); + + MappedByteBuffer[] buffers = mapFileToMemory(file, coreCount); + initFuture.get(); + + Future[] futures = new Future[buffers.length]; + + for (int k = 0; k < buffers.length; ++k) { + final MappedByteBuffer buffer = buffers[k]; + final var map = maps.get(k); + futures[k] = executor.submit(() -> { + int start = 0; + byte[] stationArr = new byte[UTF8_MAX_LEN_100_BYTES]; + double[] floatArr = new double[DOUBLE_DIGITS_MAX]; + byte currentByte; + + while (buffer.hasRemaining()) { + currentByte = buffer.get(); + stationArr[buffer.position() - start - 1] = currentByte; + + if (currentByte == ';') { + final int stationEnd = buffer.position() - 1; + // convert to double now + currentByte = buffer.get(); + boolean neg = currentByte == '-'; + if (neg) + currentByte = buffer.get(); + floatArr[0] = currentByte - '0'; + currentByte = buffer.get(); + if (currentByte == '.') { + floatArr[1] = (buffer.get() - '0') / 10.0; + floatArr[2] = 0.0; + } + else { + floatArr[0] *= 10.0; + floatArr[1] = (currentByte - '0'); + buffer.get(); + floatArr[2] = (buffer.get() - '0') / 10.0; + } + final double f = (neg ? -1 : 1) * (floatArr[0] + floatArr[1] + floatArr[2]); + + buffer.get(); // discard \n + + String station = new String(stationArr, 0, stationEnd - start); + + map.compute(station, (key, existingRow) -> { + ResultRow row = (existingRow == null) ? new ResultRow() : existingRow; + row.updateMinMax(f); + return row; + }); + + start = buffer.position(); + } + } + + }); + } + + for (Future future : futures) { + future.get(); + } + + HashMap resultHashMap = maps.get(0); + + maps.stream().skip(1).flatMap(map -> map.entrySet().stream()).forEach(entry -> { + resultHashMap.merge(entry.getKey(), entry.getValue(), (oldVal, newVal) -> { + oldVal.combine(newVal); + return oldVal; + }); + }); + + TreeMap res = new TreeMap<>(resultHashMap); + + executor.shutdown(); + + System.out.println(res); + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_gamlerhart.java b/src/main/java/dev/morling/onebrc/CalculateAverage_gamlerhart.java index 2f73a3348..4d44494b9 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_gamlerhart.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_gamlerhart.java @@ -24,12 +24,14 @@ import java.nio.channels.FileChannel; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Iterator; import java.util.TreeMap; +import java.util.stream.Collector; +import java.util.stream.Collectors; import static java.lang.Double.doubleToRawLongBits; import static java.lang.Double.longBitsToDouble; -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.*; /** * Broad experiments in this implementation: @@ -69,19 +71,16 @@ public static void main(String[] args) throws Exception { ArrayList

sections = splitFileIntoSections(fileSize, fileContent); var loopBound = byteVec.loopBound(fileSize) - vecLen; - PrivateHashMap result = sections.stream() + var result = sections.stream() .parallel() .map(s -> { return parseSection(s.start, s.end, loopBound, fileContent); - }).reduce((mine, other) -> { - assert mine != other; - mine.mergeFrom(fileContent, other); - return mine; - }) - .get(); + }); var measurements = new TreeMap(); - result.fill(fileContent, measurements); + result.forEachOrdered(m -> { + m.fillMerge(fileContent, measurements); + }); System.out.println(measurements); } } @@ -160,11 +159,22 @@ private static class PrivateHashMap { // Encoding: // - Key: long // - 48 bits index, 16 bits length - // - min: double - // - max: double - // - sum: double - // - double: double - final long[] keyValues = new long[SIZE * 5]; + final long[] keys = new long[SIZE]; + final Value[] values = new Value[SIZE]; + + private class Value { + public Value(double min, double max, double sum, long count) { + this.min = min; + this.max = max; + this.sum = sum; + this.count = count; + } + + public double min; + public double max; + public double sum; + public long count; + } // int debug_size = 0; @@ -179,43 +189,40 @@ public void add(MemorySegment file, long pos, int len, double val) { } private static int calculateHash(MemorySegment file, long pos, int len) { - int hashCode = 1; - int i = 0; - int intBound = (len / 4) * 4; - for (; i < intBound; i += 4) { - int v = file.get(INT_UNALIGNED_BIG_ENDIAN, pos + i); - hashCode = 31 * hashCode + v; + if (len > 4) { + return file.get(INT_UNALIGNED_BIG_ENDIAN, pos) + 31 * len; } - for (; i < len; i++) { - int v = file.get(JAVA_BYTE, pos + i); - hashCode = 31 * hashCode + v; + else { + int hashCode = len; + int i = 0; + for (; i < len; i++) { + int v = file.get(JAVA_BYTE, pos + i); + hashCode = 31 * hashCode + v; + } + return hashCode; } - return hashCode; } private void doAdd(MemorySegment file, int hash, long pos, int len, double val) { int slot = hash & MASK; for (var probe = 0; probe < 20000; probe++) { - var iSl = ((slot + probe) & MASK) * 5; - var slotEntry = keyValues[iSl]; + var iSl = ((slot + probe) & MASK); + var slotEntry = keys[iSl]; var emtpy = slotEntry == 0; if (emtpy) { long keyInfo = pos << SHIFT_POS | len; - long valueBits = doubleToRawLongBits(val); - keyValues[iSl] = keyInfo; - keyValues[iSl + 1] = valueBits; - keyValues[iSl + 2] = valueBits; - keyValues[iSl + 3] = valueBits; - keyValues[iSl + 4] = 1; + keys[iSl] = keyInfo; + values[iSl] = new Value(val, val, val, 1); // debug_size++; return; } else if (isSameEntry(file, slotEntry, pos, len)) { - keyValues[iSl + 1] = doubleToRawLongBits(Math.min(longBitsToDouble(keyValues[iSl + 1]), val)); - keyValues[iSl + 2] = doubleToRawLongBits(Math.max(longBitsToDouble(keyValues[iSl + 2]), val)); - keyValues[iSl + 3] = doubleToRawLongBits(longBitsToDouble(keyValues[iSl + 3]) + val); - keyValues[iSl + 4] = keyValues[iSl + 4] + 1; + var vE = values[iSl]; + vE.min = Math.min(vE.min, val); + vE.max = Math.max(vE.max, val); + vE.sum = vE.sum + val; + vE.count++; return; } else { @@ -234,7 +241,7 @@ else if (isSameEntry(file, slotEntry, pos, len)) { private boolean isSameEntry(MemorySegment file, long slotEntry, long pos, int len) { long keyPos = (slotEntry & MASK_POS) >> SHIFT_POS; int keyLen = (int) (slotEntry & MASK_LEN); - var isSame = isSame(file, keyPos, pos, len); + var isSame = len == keyLen && isSame(file, keyPos, pos, len); return isSame; } @@ -243,8 +250,8 @@ private static boolean isSame(MemorySegment file, long i1, long i2, int len) { var i1len = i1 + vecLen; var i2len = i2 + vecLen; if (len < vecLen && i1len <= file.byteSize() && i2len <= file.byteSize()) { - var v1 = byteVec.fromMemorySegment(file, i1, ByteOrder.BIG_ENDIAN); - var v2 = byteVec.fromMemorySegment(file, i2, ByteOrder.BIG_ENDIAN); + var v1 = byteVec.fromMemorySegment(file, i1, ByteOrder.nativeOrder()); + var v2 = byteVec.fromMemorySegment(file, i2, ByteOrder.nativeOrder()); var isTrue = v1.compare(VectorOperators.EQ, v2, allTrue.indexInRange(0, len)); return isTrue.trueCount() == len; } @@ -268,118 +275,65 @@ private static boolean isSame(MemorySegment file, long i1, long i2, int len) { return true; } - public PrivateHashMap mergeFrom(MemorySegment file, PrivateHashMap other) { - for (int slot = 0; slot < other.keyValues.length / 5; slot++) { - int srcI = slot * 5; - long keyE = other.keyValues[srcI]; - if (keyE != 0) { - long oPos = (keyE & MASK_POS) >> SHIFT_POS; - int oLen = (int) (keyE & MASK_LEN); - addMerge(file, other, srcI, oPos, oLen); - } - } - return this; - } - - private void addMerge(MemorySegment file, PrivateHashMap other, int srcI, long oPos, int oLen) { - int slot = calculateHash(file, oPos, oLen) & MASK; - for (var probe = 0; probe < 20000; probe++) { - var iSl = ((slot + probe) & MASK) * 5; - var slotEntry = keyValues[iSl]; - - var emtpy = slotEntry == 0; - // var debugKey = new String(file.asSlice(oPos, oLen).toArray(JAVA_BYTE)); - if (emtpy) { - // if (debugKey.equals("Cabo San Lucas")) { - // System.out.println("=> VALUES (init) " + debugKey + "@" + iSl + " max: " + longBitsToDouble(other.keyValues[srcI + 2]) + "," + longBitsToDouble(keyValues[iSl + 2])); - // } - keyValues[iSl] = other.keyValues[srcI]; - keyValues[iSl + 1] = other.keyValues[srcI + 1]; - keyValues[iSl + 2] = other.keyValues[srcI + 2]; - keyValues[iSl + 3] = other.keyValues[srcI + 3]; - keyValues[iSl + 4] = other.keyValues[srcI + 4]; - // debug_size++; - return; - } - else if (isSameEntry(file, slotEntry, oPos, oLen)) { - // if (debugKey.equals("Cabo San Lucas")) { - // System.out.println("=> VALUES (merge) " + "@" + iSl + debugKey + " max: " + longBitsToDouble(other.keyValues[srcI + 2]) + "," - // + longBitsToDouble(keyValues[iSl + 2]) + "=> " - // + Math.max(longBitsToDouble(keyValues[iSl + 2]), longBitsToDouble(other.keyValues[srcI + 2]))); - // } - keyValues[iSl + 1] = doubleToRawLongBits(Math.min(longBitsToDouble(keyValues[iSl + 1]), longBitsToDouble(other.keyValues[srcI + 1]))); - keyValues[iSl + 2] = doubleToRawLongBits(Math.max(longBitsToDouble(keyValues[iSl + 2]), longBitsToDouble(other.keyValues[srcI + 2]))); - keyValues[iSl + 3] = doubleToRawLongBits(longBitsToDouble(keyValues[iSl + 3]) + longBitsToDouble(other.keyValues[srcI + 3])); - keyValues[iSl + 4] = keyValues[iSl + 4] + other.keyValues[srcI + 4]; - // if (debugKey.equals("Cabo San Lucas")) { - // System.out.println("=> VALUES (after-merge) self: "+ "@" + iSl + System.identityHashCode(this) + ":"+ debugKey + " max: " + - // + longBitsToDouble(keyValues[iSl + 2]) + "=> "); - // } - return; - } - else { - // long keyPos = (slotEntry & MASK_POS) >> SHIFT_POS; - // int keyLen = (int) (slotEntry & MASK_LEN); - // System.out.println("Colliding " + new String(file.asSlice(pos,len).toArray(ValueLayout.JAVA_BYTE)) + - // " with key" + new String(file.asSlice(keyPos,keyLen).toArray(ValueLayout.JAVA_BYTE)) + - // " hash " + hash + " slot " + slot + "+" + probe + " at " + iSl); - // debug_reprobeMax = Math.max(debug_reprobeMax, probe); - } - } - throw new IllegalStateException("More than 20000 reprobes"); - } - - public void fill(MemorySegment file, TreeMap treeMap) { - for (int i = 0; i < keyValues.length / 5; i++) { - var ji = i * 5; - long keyE = keyValues[ji]; + public void fillMerge(MemorySegment file, TreeMap treeMap) { + for (int i = 0; i < keys.length; i++) { + var ji = i; + long keyE = keys[ji]; if (keyE != 0) { long keyPos = (keyE & MASK_POS) >> SHIFT_POS; int keyLen = (int) (keyE & MASK_LEN); byte[] keyBytes = new byte[keyLen]; MemorySegment.copy(file, JAVA_BYTE, keyPos, keyBytes, 0, keyLen); var key = new String(keyBytes); - var min = longBitsToDouble(keyValues[ji + 1]); - var max = longBitsToDouble(keyValues[ji + 2]); - var sum = longBitsToDouble(keyValues[ji + 3]); - var count = keyValues[ji + 4]; - treeMap.put(key, new ResultRow(min, sum / count, max)); + var vE = values[ji]; + var min = vE.min; + var max = vE.max; + var sum = vE.sum; + var count = vE.count; + treeMap.compute(key, (k, e) -> { + if (e == null) { + return new ResultRow(min, max, sum, count); + } + else { + return new ResultRow(Math.min(e.min, min), Math.max(e.max, max), e.sum + sum, e.count + count); + } + }); } } } - public String debugPrint(MemorySegment file) { - StringBuilder b = new StringBuilder(); - for (int i = 0; i < keyValues.length / 5; i++) { - var ji = i * 5; - long keyE = keyValues[ji]; - if (keyE != 0) { - long keyPos = (keyE & MASK_POS) >> SHIFT_POS; - int keyLen = (int) (keyE & MASK_LEN); - byte[] keyBytes = new byte[keyLen]; - MemorySegment.copy(file, JAVA_BYTE, keyPos, keyBytes, 0, keyLen); - var key = new String(keyBytes); - var min = longBitsToDouble(keyValues[ji + 1]); - var max = longBitsToDouble(keyValues[ji + 2]); - var sum = longBitsToDouble(keyValues[ji + 3]); - var count = keyValues[ji + 4]; - b.append("{").append(key).append("@").append(ji) - .append(",").append(min) - .append(",").append(max) - .append(",").append(sum) - .append(",").append(count).append("},"); - } - } - return b.toString(); - } + // public String debugPrint(MemorySegment file) { + // StringBuilder b = new StringBuilder(); + // for (int i = 0; i < keyValues.length / 5; i++) { + // var ji = i * 5; + // long keyE = keyValues[ji]; + // if (keyE != 0) { + // long keyPos = (keyE & MASK_POS) >> SHIFT_POS; + // int keyLen = (int) (keyE & MASK_LEN); + // byte[] keyBytes = new byte[keyLen]; + // MemorySegment.copy(file, JAVA_BYTE, keyPos, keyBytes, 0, keyLen); + // var key = new String(keyBytes); + // var min = longBitsToDouble(keyValues[ji + 1]); + // var max = longBitsToDouble(keyValues[ji + 2]); + // var sum = longBitsToDouble(keyValues[ji + 3]); + // var count = keyValues[ji + 4]; + // b.append("{").append(key).append("@").append(ji) + // .append(",").append(min) + // .append(",").append(max) + // .append(",").append(sum) + // .append(",").append(count).append("},"); + // } + // } + // return b.toString(); + // } } record Section(long start, long end) { } - private static record ResultRow(double min, double mean, double max) { + private static record ResultRow(double min, double max, double sum, long count) { public String toString() { - return round(min) + "/" + round(mean) + "/" + round(max); + return round(min) + "/" + round(((Math.round(sum * 10.0) / 10.0) / count)) + "/" + round(max); } private double round(double value) { diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_gauravdeshmukh.java b/src/main/java/dev/morling/onebrc/CalculateAverage_gauravdeshmukh.java new file mode 100644 index 000000000..def75ecfe --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_gauravdeshmukh.java @@ -0,0 +1,308 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.File; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +public class CalculateAverage_gauravdeshmukh { + + private static final String FILE = "./measurements.txt"; + private static final byte NEGATIVE_SIGN_BYTE = 0x2D; + private static final byte DOT_BYTE = 0x2E; + private static final int SEARCH_SPACE_BUFFER_SIZE = 140; + + private static final long SEMI_COLON_MASK = 0x3B3B3B3B3B3B3B3BL; + private static final long EOL_MASK = 0x0A0A0A0A0A0A0A0AL; + + private static class ByteString { + final private String string; + final private int staticHashCode; + + public ByteString(byte[] bytes) { + this.string = new String(bytes, StandardCharsets.UTF_8); + this.staticHashCode = this.string.hashCode(); + } + + public byte[] getBytes() { + return string.getBytes(StandardCharsets.UTF_8); + } + + @Override + public boolean equals(Object bs) { + return this.string.equals(bs.toString()); + } + + @Override + public int hashCode() { + return staticHashCode; + } + + @Override + public String toString() { + return this.string; + } + } + + private static class Measurement { + public ByteString station; + public int value; + + public Measurement(ByteString station, int value) { + this.station = station; + this.value = value; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append(station.toString()); + sb.append(";"); + sb.append(value); + return sb.toString(); + } + } + + private static class MeasurementAggregator { + private double min = Double.POSITIVE_INFINITY; + private double max = Double.NEGATIVE_INFINITY; + private int sum; + private long count; + + public String toString() { + return round(min / 10.0) + "/" + round(sum * 1.0 / 10.0 / count) + "/" + round(max / 10.0); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + public static void main(String[] args) throws Exception { + // long st = System.currentTimeMillis(); + int cores = 1; + + File file = new File(FILE); + long fileSize = file.length(); + if (fileSize > 1048576) { + cores = Runtime.getRuntime().availableProcessors(); + } + long chunkSize = fileSize / cores; + + ExecutorService executorService = Executors.newFixedThreadPool(cores); + List callableTasks = new ArrayList<>(cores); + RandomAccessFile raf = new RandomAccessFile(file, "r"); + long end = chunkSize, start = 0; + for (int i = 0; i < cores; i++) { + if (i < cores - 1) { + MappedByteBuffer mbb = raf.getChannel().map(FileChannel.MapMode.READ_ONLY, end, Math.min(SEARCH_SPACE_BUFFER_SIZE, fileSize - end)); + int eolIndex = -1; + int extraBytes = 0; + while (true) { + long word; + try { + word = mbb.getLong(); + } + catch (java.nio.BufferUnderflowException ex) { + byte[] remainingBytes = ByteBuffer.allocate(8).putLong(0).array(); + mbb.get(mbb.position(), remainingBytes, 0, mbb.remaining()); + word = ByteBuffer.wrap(remainingBytes).getLong(); + } + eolIndex = findEolInLong(word); + if (eolIndex > -1) { + extraBytes = extraBytes + eolIndex + 1; + break; + } + extraBytes += 8; + } + end = end + extraBytes; + } + + callableTasks.add(new ParallelFileReaderTask(start, (end - start), + raf.getChannel().map(FileChannel.MapMode.READ_ONLY, start, (end - start)))); + start = end; + end = Math.min(end + chunkSize, fileSize - 1); + } + List>> futures = executorService.invokeAll(callableTasks); + List> resultList = new ArrayList<>(futures.size()); + for (Future> future : futures) { + resultList.add(future.get()); + } + + Map resultMap = new TreeMap<>(); + for (Map map : resultList) { + for (Map.Entry entry : map.entrySet()) { + MeasurementAggregator agg = resultMap.get(entry.getKey().toString()); + if (agg == null) { + agg = new MeasurementAggregator(); + resultMap.put(entry.getKey().toString(), agg); + } + agg.min = Math.min(agg.min, entry.getValue().min); + agg.max = Math.max(agg.max, entry.getValue().max); + agg.sum = agg.sum + entry.getValue().sum; + agg.count = agg.count + entry.getValue().count; + } + } + System.out.println(resultMap); + executorService.shutdown(); + // System.out.println("Time taken: " + (System.currentTimeMillis() - st)); + } + + private static int findEolInLong(long word) { + return findPositionInLong(word, EOL_MASK); + } + + private static int findSemiColonInLong(long word) { + return findPositionInLong(word, SEMI_COLON_MASK); + } + + private static int findPositionInLong(long word, long searchMask) { + long maskedWord = word ^ searchMask; + long tmp = (maskedWord - 0x0101010101010101L) & ~maskedWord & 0x8080808080808080L; + return tmp == 0 ? -1 : (Long.numberOfLeadingZeros(tmp) >>> 3); + } + + private static class ParallelFileReaderTask implements Callable> { + private long start; + private int size; + private MappedByteBuffer mbf; + byte[] bytes; + private static final int BATCH_READ_SIZE = 64; + Map map; + + public ParallelFileReaderTask(long start, long size, MappedByteBuffer mbf) { + this.start = start; + this.size = (int) size; + this.mbf = mbf; + this.bytes = new byte[BATCH_READ_SIZE]; + this.map = new HashMap<>(10000); + } + + @Override + public Map call() throws Exception { + int bytesReadTillNow = 0; + int startOfStation = 0, startOfNumber = -1, endOfStation = -1, endOfNumber = -1; + boolean isLastRead = false; + try { + while (bytesReadTillNow < this.size) { + int semiColonIndex = -1; + while (semiColonIndex == -1 && bytesReadTillNow < this.size) { + long currentWord; + try { + currentWord = mbf.getLong(); + } + catch (java.nio.BufferUnderflowException ex) { + int remainingBytesCount = this.size - bytesReadTillNow; + byte[] remainingBytes = ByteBuffer.allocate(8).putLong(0).array(); + mbf.get(bytesReadTillNow, remainingBytes, 0, remainingBytesCount); + currentWord = ByteBuffer.wrap(remainingBytes).getLong(); + } + semiColonIndex = findSemiColonInLong(currentWord); + if (semiColonIndex > -1) { + endOfStation = bytesReadTillNow + semiColonIndex; + startOfNumber = bytesReadTillNow + semiColonIndex + 1; + mbf.position(startOfNumber); + bytesReadTillNow += semiColonIndex + 1; + } + else { + bytesReadTillNow += 8; + } + } + + int stationLength = endOfStation - startOfStation; + byte[] stationBytes = new byte[stationLength]; + mbf.get(startOfStation, stationBytes, 0, stationLength); + + int eolIndex = -1; + while (eolIndex == -1 && bytesReadTillNow < this.size) { + long currentWord; + try { + currentWord = mbf.getLong(); + } + catch (java.nio.BufferUnderflowException ex) { + int remainingBytesCount = this.size - bytesReadTillNow; + byte[] remainingBytes = ByteBuffer.allocate(8).putLong(0).array(); + mbf.get(bytesReadTillNow, remainingBytes, 0, remainingBytesCount); + currentWord = ByteBuffer.wrap(remainingBytes).getLong(); + isLastRead = true; + } + eolIndex = findEolInLong(currentWord); + if (eolIndex > -1) { + endOfNumber = bytesReadTillNow + eolIndex; + startOfStation = bytesReadTillNow + eolIndex + 1; + mbf.position(startOfStation); + bytesReadTillNow += eolIndex + 1; + } + else { + bytesReadTillNow += 8; + } + if (isLastRead) { + bytesReadTillNow = this.size; + if (eolIndex == -1) { + endOfNumber = this.size; + } + } + } + + int numberLength = endOfNumber - startOfNumber; + byte[] numberBytes = new byte[numberLength]; + mbf.get(startOfNumber, numberBytes, 0, numberLength); + + Measurement measurement = new Measurement(new ByteString(stationBytes), + getIntegerFromTemperatureBytes(numberBytes)); + MeasurementAggregator aggregator = this.map.get(measurement.station); + if (aggregator == null) { + aggregator = new MeasurementAggregator(); + this.map.put(measurement.station, aggregator); + } + aggregator.min = Math.min(aggregator.min, measurement.value); + aggregator.max = Math.max(aggregator.max, measurement.value); + aggregator.sum += measurement.value; + aggregator.count++; + } + } + catch (Exception ex) { + throw ex; + } + + return this.map; + } + + private int getIntegerFromTemperatureBytes(byte[] numberBytes) { + int firstDigitIndex = (numberBytes[0] ^ NEGATIVE_SIGN_BYTE) == 0 ? 1 : 0; + int ret = 0; + for (int i = firstDigitIndex; i < numberBytes.length; i++) { + if ((numberBytes[i] ^ DOT_BYTE) != 0) { + ret = (ret << 3) + (ret << 1) + ((int) numberBytes[i] - 48); + } + } + return (firstDigitIndex > 0) ? -ret : ret; + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_gigiblender.java b/src/main/java/dev/morling/onebrc/CalculateAverage_gigiblender.java new file mode 100644 index 000000000..162d71209 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_gigiblender.java @@ -0,0 +1,501 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import sun.misc.Unsafe; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.reflect.Field; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.TreeMap; + +public class CalculateAverage_gigiblender { + private static final int AVAIL_CORES = Runtime.getRuntime().availableProcessors(); + private static final HashTable[] tables = new HashTable[AVAIL_CORES]; + + private static Unsafe unsafe; + static { + Field theUnsafe = null; + try { + theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + unsafe = (Unsafe) theUnsafe.get(Unsafe.class); + } + catch (IllegalAccessException | NoSuchFieldException ignored) { + } + } + + private static final String FILE = "./measurements.txt"; + + static class HashTable { + + // 10_000 unique hashes -> + private static final int ENTRY_SIZE = 32; + private static final int NUM_ENTRIES = 16384; + private static final int DATA_SIZE = NUM_ENTRIES * ENTRY_SIZE; + + /* + * data[i -> i + 7] = 8 bytes hash + * data[i + 8 -> i + 15] = 7 bytes masked address of the string in the file. 1 byte for the length of the string + * data[i + 16 -> i + 19] = 4 bytes count + * data[i + 20 -> i + 21] = 2 bytes max + * data[i + 22 -> i + 23] = 2 bytes min -- sign preserved + * data[i + 24 -> i + 31] = 8 bytes sum + */ + byte[] data; + + private static final int HASH_OFFSET = 0; + + private static final int ADDR_OFFSET = 8; + private static final long ADDR_MASK = 0x00FFFFFFFFFFFFFFL; + private static final int STRING_LENGTH_SHIFT = 56; + + private static final int COUNT_OFFSET = 16; + + private static final int SUM_OFFSET = 24; + + private int reprobe_count; + + public HashTable() { + data = new byte[DATA_SIZE]; + // reprobe_count = 0; + } + + private long string_addr_and_length(long hash) { + return unsafe.getLong(data, Unsafe.ARRAY_BYTE_BASE_OFFSET + hash + ADDR_OFFSET); + } + + private static long string_addr(long encoded_str_addr) { + return (encoded_str_addr & ADDR_MASK); + } + + private static long string_length(long encoded_str_addr) { + return encoded_str_addr >>> STRING_LENGTH_SHIFT; + } + + private long count_max_min(long hash) { + return unsafe.getLong(data, Unsafe.ARRAY_BYTE_BASE_OFFSET + hash + COUNT_OFFSET); + } + + private static short mask_min(long count_max_min) { + // Preserve the sign + return (short) (count_max_min >> 6 * Byte.SIZE); + } + + private static short mask_max(long count_max_min) { + return (short) (count_max_min >>> 4 * Byte.SIZE); + } + + private static int mask_count(long count_max_min) { + return (int) count_max_min; + } + + private static long encode_count_max_min(int count, short max, short min) { + return ((long) count) | ((((long) max) & 0xFFFF) << 4 * Byte.SIZE) | (((long) min) << 6 * Byte.SIZE); + } + + private long sum(long hash) { + return unsafe.getLong(data, Unsafe.ARRAY_BYTE_BASE_OFFSET + hash + SUM_OFFSET); + } + + private static boolean string_equals(long string_addr, long entry_string_addr, int size_bytes) { + int remaining_bytes = size_bytes % 8; + int i = 0; + for (; i < size_bytes - remaining_bytes; i += 8) { + long entry_bytes = unsafe.getLong(entry_string_addr + i); + long string_bytes = unsafe.getLong(string_addr + i); + if (entry_bytes != string_bytes) { + return false; + } + } + // The hash function is not great, so I end up in this case a lot, so I take some risks. + // This never caused a SIGSEGV even though it might :) If it does, fall back to the commented version below. + // I will try to improve on the hash function + if (remaining_bytes != 0) { + long entry_bytes = unsafe.getLong(entry_string_addr + i); + long string_bytes = unsafe.getLong(string_addr + i); + // mask the bytes we care about + long mask = (1L << (remaining_bytes * Byte.SIZE)) - 1; + entry_bytes &= mask; + string_bytes &= mask; + return entry_bytes == string_bytes; + } + // for (; i < size_bytes; i++) { + // byte entry_byte = unsafe.getByte(entry_string_addr + i); + // byte string_byte = unsafe.getByte(string_addr + i); + // if (entry_byte != string_byte) { + // return false; + // } + // } + return true; + } + + public void insert(long hash, long string_addr, byte string_size, long final_number) { + assert string_addr >>> 56 == 0 : String.format("Expected final 8 bytes to be 0, got %s", Long.toBinaryString(string_addr)); + + long encoded_string_addr_and_length = string_addr | ((long) string_size << STRING_LENGTH_SHIFT); + assert string_addr(encoded_string_addr_and_length) == string_addr : String.format("Expected string addr to be %s, got %s", Long.toHexString(string_addr), + Long.toHexString(string_addr(encoded_string_addr_and_length))); + assert string_length(encoded_string_addr_and_length) == string_size + : String.format("Expected string length to be %s, got %s", string_size, string_length(encoded_string_addr_and_length)); + + long map_entry = apply_mask(hash * ENTRY_SIZE); + while (true) { + int entry_count0 = unsafe.getInt(data, Unsafe.ARRAY_BYTE_BASE_OFFSET + map_entry + COUNT_OFFSET); + if (entry_count0 == 0) { + // dump_insert(map_entry, hash, string_addr, string_size, final_number); + // Found an empty slot. Insert the entry here + unsafe.putLong(data, Unsafe.ARRAY_BYTE_BASE_OFFSET + map_entry + HASH_OFFSET, hash); + unsafe.putLong(data, Unsafe.ARRAY_BYTE_BASE_OFFSET + map_entry + ADDR_OFFSET, encoded_string_addr_and_length); + unsafe.putLong(data, Unsafe.ARRAY_BYTE_BASE_OFFSET + map_entry + COUNT_OFFSET, encode_count_max_min(1, (short) final_number, (short) final_number)); + unsafe.putLong(data, Unsafe.ARRAY_BYTE_BASE_OFFSET + map_entry + SUM_OFFSET, final_number); + + assert mask_count(encode_count_max_min(1, (short) final_number, (short) final_number)) == 1 : String.format("Expected count to be 1, got %s", + Integer.toBinaryString(mask_count(encode_count_max_min(1, (short) final_number, (short) final_number)))); + assert mask_max(encode_count_max_min(1, (short) final_number, (short) final_number)) == (short) final_number + : String.format("Expected max to be %s, got %s", final_number, + Integer.toBinaryString(mask_max(encode_count_max_min(1, (short) final_number, (short) final_number)))); + assert mask_min(encode_count_max_min(1, (short) final_number, (short) final_number)) == (short) final_number + : String.format("Expected min to be %s, got %s", final_number, + Integer.toBinaryString(mask_min(encode_count_max_min(1, (short) final_number, (short) final_number)))); + return; + } + else { + // Check if strings match. If yes, update. Otherwise, look for the next available slot + long entry_string_addr_and_length = string_addr_and_length(map_entry); + long entry_str_size = string_length(entry_string_addr_and_length); + + if (string_size != entry_str_size) { + // Strings are not the same size. Continue looking for the next slot + map_entry = apply_mask(map_entry + ENTRY_SIZE); + // reprobe_count++; + } + else { + long entry_string_addr = string_addr(entry_string_addr_and_length); + if (string_equals(string_addr, entry_string_addr, string_size)) { + // Strings are the same. Update the entry + long entry_count_max_min = count_max_min(map_entry); + int entry_count = mask_count(entry_count_max_min); + short entry_max = mask_max(entry_count_max_min); + short entry_min = mask_min(entry_count_max_min); + + entry_count++; + assert (int) final_number == final_number : String.format("Expected final number to be an int, got %s", final_number); + entry_max = (short) Math.max(entry_max, (int) final_number); + entry_min = (short) Math.min(entry_min, (int) final_number); + + long entry_sum = sum(map_entry); + entry_sum += final_number; + + unsafe.putLong(data, Unsafe.ARRAY_BYTE_BASE_OFFSET + map_entry + COUNT_OFFSET, encode_count_max_min(entry_count, entry_max, entry_min)); + unsafe.putLong(data, Unsafe.ARRAY_BYTE_BASE_OFFSET + map_entry + SUM_OFFSET, entry_sum); + return; + } + else { + // Strings are not the same. Continue looking for the next slot + map_entry = apply_mask(map_entry + ENTRY_SIZE); + // reprobe_count++; + } + } + } + } + } + + private static long apply_mask(long hash) { + return hash & (DATA_SIZE - 1); + } + + public void update_res(TreeMap result_map) { + // System.err.println("Reprobe count: " + reprobe_count); + Result r = new Result(); + + for (int i = 0; i < NUM_ENTRIES; i++) { + long entry_addr_offset = (long) i * ENTRY_SIZE; + long entry_count_max_min = count_max_min(entry_addr_offset); + int entry_count = mask_count(entry_count_max_min); + if (entry_count == 0) { + continue; + } + long entry_string_addr_and_length = string_addr_and_length(entry_addr_offset); + long entry_string_addr = string_addr(entry_string_addr_and_length); + long entry_string_length = string_length(entry_string_addr_and_length); + + // no reason to copy the byte array twice here but what can you do... + byte[] bytes = new byte[(int) entry_string_length]; + unsafe.copyMemory(null, entry_string_addr, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, entry_string_length); + String s = new String(bytes, StandardCharsets.UTF_8); + + short entry_max = mask_max(entry_count_max_min); + short entry_min = mask_min(entry_count_max_min); + + long entry_sum = sum(entry_addr_offset); + + Result ret = result_map.putIfAbsent(s, r); + if (ret == null) { + r.count = entry_count; + r.max = entry_max; + r.min = entry_min; + r.sum = entry_sum; + r = new Result(); + } + else { + ret.count += entry_count; + ret.max = (short) Math.max(ret.max, entry_max); + ret.min = (short) Math.min(ret.min, entry_min); + ret.sum += entry_sum; + } + } + } + + public void dump_insert(long map_entry, long hash, long string_addr, byte string_size, long final_number) { + System.out.println("START dump_insert"); + System.out.println("Inserting " + final_number + " with hash " + hash); + System.out.println("Map entry: " + map_entry); + System.out.println("String addr: " + string_addr + " with length " + string_size); + dump(string_addr, string_addr + string_size); + System.out.println("END dump_insert"); + } + } + + static class Result { + public int count; + public short max; + public short min; + public long sum; + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + @Override + public String toString() { + return round(min / 10.) + "/" + round(sum / (double) (10 * count)) + "/" + round(max / 10.); + } + } + + private static void compute_slice(final long base_addr, final long slice_size, final long file_size, final int thread_index) { + HashTable my_table; + if (!SINGLE_CORE) { + my_table = new HashTable(); + tables[thread_index] = my_table; + } + else { + if (tables[0] == null) { + tables[0] = new HashTable(); + } + my_table = tables[0]; + } + + long cur_addr = base_addr + (long) thread_index * slice_size; + // Lookup the next newline. If thread_index == 0 then start right away + if (thread_index != 0) { + while (unsafe.getByte(cur_addr) != '\n') { + cur_addr++; + } + cur_addr++; + } + + long end_addr = base_addr + (long) (thread_index + 1) * slice_size; + if (thread_index == (AVAIL_CORES - 1)) { + // Last thread. We need to read until the end of the file + end_addr = base_addr + file_size; + } + else { + // look ahead for the next newline + while (unsafe.getByte(end_addr) != '\n') { + end_addr++; + } + end_addr++; + } + + // We now have a well-defined interval [cur_addr, end_addr) to work on + long hash = -2346162244362633811L; + byte string_size = 0; + long string_addr = cur_addr; + while (cur_addr < end_addr) { + long value_mem = unsafe.getLong(cur_addr); + int semicolon_byte_index = get_semicolon_index(value_mem); + + string_size += (byte) semicolon_byte_index; + + // dump(cur_addr, cur_addr + semicolon_byte_index); + + if (semicolon_byte_index != 8) { + long value_mem_up_to_semicolon = value_mem & ((1L << (semicolon_byte_index * Byte.SIZE)) - 1); + + // We have a semicolon, so the hash is complete now. We can construct the number + // and insert it into the hash table + long start_num_addr = cur_addr + semicolon_byte_index + 1; + + // Always read the next 8 bytes for the number. It seems that this is faster than + // checking if the whole number is in the current 8 bytes and only reading if it is not + long number_mem_value = unsafe.getLong(start_num_addr); + long number_len_bytes = get_newline_index(number_mem_value); + + long final_number = extract_number(number_mem_value, number_len_bytes); + + // 0.2421196 % reprobe rate + hash = compute_hash(hash ^ value_mem_up_to_semicolon); + + // We have the final number now. We can insert it into the hash table + my_table.insert(hash, string_addr, string_size, final_number); + // Now we can move on to the next line + hash = -2346162244362633811L; + string_size = 0; + cur_addr = start_num_addr + number_len_bytes + 1; + string_addr = cur_addr; + } + else { + // No semicolon in the 8 bytes read. Continue reading + hash = hash ^ value_mem; + cur_addr += 8; + } + } + assert cur_addr == end_addr : String.format("Expected cur_addr to be %s, got %s", end_addr, cur_addr); + } + + private static long extract_number(long number_mem_value, long number_len_bytes) { + // Pray for GVN/CSE and Sea of Nodes moving the mess below in the proper places because + // I don't want to spend the time to do it properly :) + long number_mem_dot_index = get_dot_index(number_mem_value); + + int fractional_part = get_fractional_part(number_mem_value, number_len_bytes); + int sign = get_sign(number_mem_value); + int skip_sign = skip_sign(number_mem_value); + + long number_mem_value_no_sign = number_mem_value >>> (skip_sign << 3); + // Two cases: either there's a single digit before the dot, or there's two + // Start from the dot index and go backwards + long new_number_mem_dot_index = number_mem_dot_index - skip_sign; + long read_byte_mask = 0xFFL << ((new_number_mem_dot_index - 1) * Byte.SIZE); + long ones = ((number_mem_value_no_sign & read_byte_mask) >>> ((new_number_mem_dot_index - 1) * Byte.SIZE)) - 0x30; + // Should be 0 due to the multiplication if there's only one digit before the dot + long tens = ((number_mem_value_no_sign & 0xFFL) - 0x30) * (new_number_mem_dot_index - 1); + + long final_number = (tens * 100 + ones * 10 + fractional_part) * sign; + return final_number; + } + + private static int get_fractional_part(long number_mem_value, long number_len_bytes) { + return (int) ((number_mem_value >>> ((number_len_bytes - 1) * Byte.SIZE)) & 0xFF) - 0x30; + } + + private static int skip_sign(long number_mem_value) { + // return 1 if char is '-', 0 if it is not + long diff = (number_mem_value & 0xFF) - 0x2D; + long sign = (diff | -diff) >>> 63; + return (int) ((sign - 1) * -1); + } + + private static int get_sign(long number_mem_value) { + // return 1 if char is not '-', -1 if it is + long diff = (number_mem_value & 0xFF) - 0x2D; + long sign = (diff | -diff) >>> 63; + return (int) (-2 * sign + 1) * -1; + } + + private static long compute_hash(long x) { // Hash burrowed from artsiomkorzun and slightly changed + long h = x * -7046029254386353131L; + long h1 = h ^ (h >>> 32); + h = h ^ (h << 32); + return h1 ^ h; + } + + private static void dump(long startAddr, long endAddr) { + byte[] bytes = new byte[(int) (endAddr - startAddr)]; + unsafe.copyMemory(null, startAddr, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, bytes.length); + String s = new String(bytes, StandardCharsets.UTF_8); + System.out.println(s); + // Dump the bytes to binary form + for (byte b : bytes) { + System.out.print(Integer.toBinaryString(b & 0xFF)); + System.out.print(" "); + } + System.out.println(); + // Dump the bytes to hex form + for (byte b : bytes) { + System.out.print(Integer.toHexString(b & 0xFF)); + System.out.print(" "); + } + System.out.println(); + } + + private static int get_byte_0_index(long value) { + long res = (value - 0x0101010101010101L) & (~value & 0x8080808080808080L); + res = Long.numberOfTrailingZeros(res) >> 3; + return (int) res; + } + + private static int get_dot_index(long value) { + long temp = value ^ 0x2E2E2E2E2E2E2E2EL; + return get_byte_0_index(temp); + } + + private static int get_newline_index(long value) { + long temp = value ^ 0x0A0A0A0A0A0A0A0AL; + return get_byte_0_index(temp); + } + + private static int get_semicolon_index(long value) { + long temp = value ^ 0x3B3B3B3B3B3B3B3BL; + return get_byte_0_index(temp); + } + + private static final boolean SINGLE_CORE = false; + + public static void main(String[] args) throws IOException, InterruptedException { + FileChannel file_channel = FileChannel.open(Paths.get(FILE), StandardOpenOption.READ); + long file_size = file_channel.size(); + long base_addr = file_channel.map(FileChannel.MapMode.READ_ONLY, 0, file_size, Arena.global()).address(); + + if (!SINGLE_CORE) { + int num_threads = AVAIL_CORES; + Thread[] threads = new Thread[num_threads]; + for (int i = 0; i < num_threads; i++) { + int finalI = i; + threads[i] = new Thread(() -> { + long slice_size = file_size / AVAIL_CORES; + compute_slice(base_addr, slice_size, file_size, finalI); + }); + threads[i].start(); + } + + TreeMap result_map = new TreeMap<>(); + for (int i = 0; i < num_threads; i++) { + threads[i].join(); + tables[i].update_res(result_map); + } + + System.out.println(result_map); + } + else { + for (int i = 0; i < AVAIL_CORES; i++) { + int finalI = i; + long slice_size = file_size / AVAIL_CORES; + compute_slice(base_addr, slice_size, file_size, finalI); + } + + TreeMap result_map = new TreeMap<>(); + tables[0].update_res(result_map); + + System.out.println(result_map); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_giovannicuccu.java b/src/main/java/dev/morling/onebrc/CalculateAverage_giovannicuccu.java new file mode 100644 index 000000000..cd9591f1a --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_giovannicuccu.java @@ -0,0 +1,457 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.IntVector; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; + +import static java.util.stream.Collectors.*; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.concurrent.*; + +/* + Solution without unsafe that borrows the ideas of splullara, thomasvue, royvanrijn and merykitty + */ + +public class CalculateAverage_giovannicuccu { + + private static final String FILE = "./measurements.txt"; + + private static final VectorSpecies BYTE_SPECIES = ByteVector.SPECIES_256; + private static final int BYTE_SPECIES_LANES = BYTE_SPECIES.length(); + private static final ByteOrder NATIVE_ORDER = ByteOrder.nativeOrder(); + public static final VectorSpecies INT_SPECIES = IntVector.SPECIES_256; + public static final int INT_SPECIES_LANES = INT_SPECIES.length(); + + public static final int KEY_SIZE = 128; + + public static record PartitionBoundary(Path path, long start, long end) { + } + + public static interface PartitionCalculator { + List computePartitionsBoundaries(Path path); + } + + public static class ProcessorPartitionCalculator implements PartitionCalculator { + + public List computePartitionsBoundaries(Path path) { + try { + int numberOfSegments = Runtime.getRuntime().availableProcessors(); + long fileSize = path.toFile().length(); + long segmentSize = fileSize / numberOfSegments; + List segmentBoundaries = new ArrayList<>(numberOfSegments); + try (RandomAccessFile randomAccessFile = new RandomAccessFile(path.toFile(), "r")) { + long segStart = 0; + long segEnd = segmentSize; + for (int i = 0; i < numberOfSegments; i++) { + segEnd = findEndSegment(randomAccessFile, segEnd, fileSize); + segmentBoundaries.add(new PartitionBoundary(path, segStart, segEnd)); + segStart = segEnd; + segEnd = Math.min(segEnd + segmentSize, fileSize); + } + } + return segmentBoundaries; + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private long findEndSegment(RandomAccessFile raf, long location, long fileSize) throws IOException { + raf.seek(location); + while (location < fileSize) { + location++; + if (raf.read() == 10) + break; + } + return location; + } + } + + private static class MeasurementAggregatorVectorized { + + private int min; + private int max; + private double sum; + private long count; + private final int len; + private final int hash; + + private final int offset; + private byte[] data; + + public MeasurementAggregatorVectorized(byte[] data, int offset, int len, int hash, int initialValue) { + min = initialValue; + max = initialValue; + sum = initialValue; + count = 1; + this.len = len; + this.hash = hash; + this.offset = offset; + this.data = data; + } + + public void add(int value) { + if (value < min) { + min = value; + } + if (value > max) { + max = value; + } + sum += value; + count++; + } + + public void merge(MeasurementAggregatorVectorized other) { + min = Math.min(min, other.min); + max = Math.max(max, other.max); + sum += other.sum; + count += other.count; + } + + @Override + public String toString() { + return round(min / 10.) + "/" + round(sum / (double) (10 * count)) + "/" + round(max / 10.); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + public int getMin() { + return min; + } + + public int getHash() { + return hash; + } + + public int getLen() { + return len; + } + + public boolean dataEquals(byte[] data, int offset) { + return Arrays.equals(this.data, this.offset, this.offset + len, data, offset, offset + len); + + } + + public String getName() { + return new String(data, offset, len, StandardCharsets.UTF_8); + } + + public int getOffset() { + return offset; + } + + public byte[] getData() { + return data; + } + } + + private static class MeasurementListVectorized { + private static final int SIZE = 1024 * 64; + private final MeasurementAggregatorVectorized[] measurements = new MeasurementAggregatorVectorized[SIZE]; + private final byte[] keyData = new byte[SIZE * KEY_SIZE]; + + private final MemorySegment dataSegment = MemorySegment.ofArray(keyData); + + private final byte[] lineData = new byte[SIZE]; + + private final MemorySegment lineSegment = MemorySegment.ofArray(lineData); + + public void add(int len, int hash, int value, MemorySegment memorySegment, long offset) { + MemorySegment.copy(memorySegment, offset, lineSegment, 0, len); + int index = hash & (SIZE - 1); + while (measurements[index] != null) { + if (measurements[index].getHash() == hash && measurements[index].getLen() == len) { + if (Arrays.equals(keyData, index * KEY_SIZE, index * KEY_SIZE + len, lineData, 0, len)) { + measurements[index].add(value); + return; + } + } + index = (index + 1) & (SIZE - 1); + } + MemorySegment.copy(memorySegment, offset, dataSegment, (long) index * KEY_SIZE, len); + measurements[index] = new MeasurementAggregatorVectorized(keyData, index * KEY_SIZE, len, hash, value); + } + + public void addWithByteVector(ByteVector chunk1, int len, int hash, int value, MemorySegment memorySegment, long offset) { + int index = hash & (SIZE - 1); + while (measurements[index] != null) { + if (measurements[index].getLen() == len && measurements[index].getHash() == hash) { + var nodeKey = ByteVector.fromArray(BYTE_SPECIES, keyData, index * KEY_SIZE); + long eqMask = chunk1.compare(VectorOperators.EQ, nodeKey).toLong(); + long validMask = -1L >>> (64 - len); + if ((eqMask & validMask) == validMask) { + measurements[index].add(value); + return; + } + } + index = (index + 1) & (SIZE - 1); + } + MemorySegment.copy(memorySegment, offset, dataSegment, (long) index * KEY_SIZE, len); + measurements[index] = new MeasurementAggregatorVectorized(keyData, index * KEY_SIZE, len, hash, value); + } + + public void merge(MeasurementAggregatorVectorized measurementAggregator) { + int index = measurementAggregator.getHash() & (SIZE - 1); + while (measurements[index] != null) { + if (measurements[index].getLen() == measurementAggregator.getLen() && measurements[index].getHash() == measurementAggregator.getHash()) { + if (measurementAggregator.dataEquals(measurements[index].getData(), measurements[index].getOffset())) { + measurements[index].merge(measurementAggregator); + return; + } + } + index = (index + 1) & (SIZE - 1); + } + measurements[index] = measurementAggregator; + } + + public MeasurementAggregatorVectorized[] getMeasurements() { + return measurements; + } + + } + + private static class MMapReaderMemorySegment { + + private final Path path; + private final List boundaries; + private final boolean serial; + private static final ValueLayout.OfLong JAVA_LONG_LT = ValueLayout.JAVA_LONG_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + + public MMapReaderMemorySegment(Path path, PartitionCalculator partitionCalculator, boolean serial) { + this.path = path; + this.serial = serial; + boundaries = partitionCalculator.computePartitionsBoundaries(path); + } + + public TreeMap elaborate() throws IOException { + try (ExecutorService executor = Executors.newFixedThreadPool(boundaries.size()); + FileChannel fileChannel = (FileChannel) Files.newByteChannel((path), StandardOpenOption.READ); + var arena = Arena.ofShared()) { + + List> futures = new ArrayList<>(); + for (PartitionBoundary boundary : boundaries) { + if (serial) { + FutureTask future = new FutureTask<>(() -> computeListForPartition( + fileChannel, boundary)); + future.run(); + futures.add(future); + } + else { + Future future = executor.submit(() -> computeListForPartition( + fileChannel, boundary)); + futures.add(future); + } + } + TreeMap ris = reduce(futures); + return ris; + } + } + + private TreeMap reduce(List> futures) { + try { + TreeMap risMap = new TreeMap<>(); + MeasurementListVectorized ris = new MeasurementListVectorized(); + for (Future future : futures) { + MeasurementListVectorized results = future.get(); + merge(ris, results); + } + for (MeasurementAggregatorVectorized m : ris.getMeasurements()) { + if (m != null) { + risMap.put(m.getName(), m); + } + } + return risMap; + } + catch (InterruptedException | ExecutionException ie) { + System.err.println(ie); + throw new RuntimeException(ie); + } + } + + private void merge(MeasurementListVectorized result, MeasurementListVectorized partial) { + for (MeasurementAggregatorVectorized m : partial.getMeasurements()) { + if (m != null) { + result.merge(m); + } + } + } + + private final long ALL_ONE = -1L; + private static final long DELIMITER_MASK = 0x3B3B3B3B3B3B3B3BL; + + private static final byte SEPARATOR = ';'; + private final static ByteVector SEPARATORS = ByteVector.broadcast(BYTE_SPECIES, SEPARATOR); + + private MeasurementListVectorized computeListForPartition(FileChannel fileChannel, PartitionBoundary boundary) { + try (var arena = Arena.ofConfined()) { + var memorySegment = fileChannel.map(FileChannel.MapMode.READ_ONLY, boundary.start(), boundary.end() - boundary.start(), arena); + MeasurementListVectorized list = new MeasurementListVectorized(); + long size = memorySegment.byteSize(); + long offset = 0; + long safe = size - KEY_SIZE; + while (offset < safe) { + int len = 0; + var line = ByteVector.fromMemorySegment(BYTE_SPECIES, memorySegment, offset, NATIVE_ORDER); + len = line.compare(VectorOperators.EQ, SEPARATORS).firstTrue(); + if (len == BYTE_SPECIES_LANES) { + int position1 = -1; + int incr = BYTE_SPECIES_LANES; + while (position1 == -1) { + long readBuffer = memorySegment.get(JAVA_LONG_LT, offset + incr); + long comparisonResult1 = (readBuffer ^ DELIMITER_MASK); + long highBitMask1 = (comparisonResult1 - 0x0101010101010101L) & (~comparisonResult1 & 0x8080808080808080L); + + boolean noContent1 = highBitMask1 == 0; + position1 = noContent1 ? -1 : Long.numberOfTrailingZeros(highBitMask1) >> 3; + len += noContent1 ? 8 : position1; + incr += 8; + } + int hash = hash(memorySegment, offset, len); + long prevOffset = offset; + offset += len + 1; + + long numberWord = memorySegment.get(JAVA_LONG_LT, offset); + int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); + int value = convertIntoNumber(decimalSepPos, numberWord); + offset += (decimalSepPos >>> 3) + 3; + list.add(len, hash, value, memorySegment, prevOffset); + } + else { + int hash = hash(memorySegment, offset, len); + long prevOffset = offset; + offset += len + 1; + + long numberWord = memorySegment.get(JAVA_LONG_LT, offset); + int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); + int value = convertIntoNumber(decimalSepPos, numberWord); + offset += (decimalSepPos >>> 3) + 3; + list.addWithByteVector(line, len, hash, value, memorySegment, prevOffset); + } + } + + while (offset < size) { + int len = 0; + while (memorySegment.get(ValueLayout.JAVA_BYTE, offset + len) != ';') { + len++; + } + int hash = hash(memorySegment, offset, len); + long prevOffset = offset; + offset += len + 1; + + int value = 0; + if (offset < size - 8) { + long numberWord = memorySegment.get(JAVA_LONG_LT, offset); + int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); + value = convertIntoNumber(decimalSepPos, numberWord); + offset += (decimalSepPos >>> 3) + 3; + } + else { + long currentPosition = offset; + int sign = 1; + byte b = memorySegment.get(ValueLayout.JAVA_BYTE, currentPosition++); + if (b == '-') { + sign = -1; + } + else { + value = b - '0'; + } + while ((b = memorySegment.get(ValueLayout.JAVA_BYTE, currentPosition++)) != '.') { + value = value * 10 + (b - '0'); + } + b = memorySegment.get(ValueLayout.JAVA_BYTE, currentPosition); + value = value * 10 + (b - '0'); + if (sign == -1) { + value = -value; + } + offset = currentPosition + 2; + } + list.add(len, hash, value, memorySegment, prevOffset); + } + return list; + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static final int GOLDEN_RATIO = 0x9E3779B9; + private static final int HASH_LROTATE = 5; + + private static int hash(MemorySegment memorySegment, long start, int len) { + int x; + int y; + if (len >= Integer.BYTES) { + x = memorySegment.get(ValueLayout.JAVA_INT_UNALIGNED, start); + y = memorySegment.get(ValueLayout.JAVA_INT_UNALIGNED, start + len - Integer.BYTES); + } + else { + x = memorySegment.get(ValueLayout.JAVA_BYTE, start); + y = memorySegment.get(ValueLayout.JAVA_BYTE, start + len - Byte.BYTES); + } + return (Integer.rotateLeft(x * GOLDEN_RATIO, HASH_LROTATE) ^ y) * GOLDEN_RATIO; + } + + private static int convertIntoNumber(int decimalSepPos, long numberWord) { + int shift = 28 - decimalSepPos; + // signed is -1 if negative, 0 otherwise + long signed = (~numberWord << 59) >> 63; + long designMask = ~(signed & 0xFF); + // Align the number to a specific position and transform the ascii code + // to actual digit value in each byte + long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; + + // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) + // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = + // 0x000000UU00TTHH00 + + // 0x00UU00TTHH000000 * 10 + + // 0xUU00TTHH00000000 * 100 + // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400 + // This results in our value lies in the bit 32 to 41 of this product + // That was close :) + long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; + long value = (absValue ^ signed) - signed; + return (int) value; + } + + } + + public static void main(String[] args) throws IOException { + MMapReaderMemorySegment reader = new MMapReaderMemorySegment(Paths.get(FILE), new ProcessorPartitionCalculator(), false); + Map measurements = reader.elaborate(); + System.out.println(measurements); + + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_godofwharf.java b/src/main/java/dev/morling/onebrc/CalculateAverage_godofwharf.java new file mode 100644 index 000000000..3d3e0a75b --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_godofwharf.java @@ -0,0 +1,588 @@ +package dev.morling.onebrc; + +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.Vector; +import jdk.incubator.vector.VectorSpecies; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.lang.management.ManagementFactory; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.function.BiConsumer; +import java.util.stream.IntStream; + +import static java.nio.charset.StandardCharsets.UTF_8; + +public class CalculateAverage_godofwharf { + private static final String FILE = "./measurements.txt"; + private static final boolean DEBUG = Boolean.parseBoolean(System.getProperty("debug", "false")); + private static final int NCPU = Runtime.getRuntime().availableProcessors(); + + private static final VectorSpecies PREFERRED_SPECIES = VectorSpecies.ofPreferred(byte.class); + + private static final Vector NEW_LINE_VEC = PREFERRED_SPECIES.broadcast('\n'); + // This array is used for quick conversion of fractional part + private static final double[] DOUBLES = new double[]{ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 }; + // This array is used for quick conversion from ASCII to digit + private static final int[] DIGIT_LOOKUP = new int[]{ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, + 2, 3, 4, 5, 6, 7, 8, 9, -1, -1 }; + private static final int MAX_STR_LEN = 108; + private static final int DEFAULT_HASH_TBL_SIZE = 4096; + private static final int DEFAULT_PAGE_SIZE = 8_388_608; // 8 MB + private static final int PAGE_SIZE = Integer.parseInt(System.getProperty("pageSize", STR."\{DEFAULT_PAGE_SIZE}")); + + public static void main(String[] args) throws Exception { + long startTimeMs = System.currentTimeMillis(); + Map measurements = compute(); + long time1 = System.nanoTime(); + System.out.println(measurements); + printDebugMessage("Print took %d ns%n", (System.nanoTime() - time1)); + printDebugMessage("Took %d ms%n", System.currentTimeMillis() - startTimeMs); + printDebugMessage("Time spent on GC=%d ms%n", ManagementFactory.getGarbageCollectorMXBeans().get(0).getCollectionTime()); + System.exit(0); + } + + private static Map compute() throws Exception { + int nThreads = Integer.parseInt( + System.getProperty("threads", STR."\{NCPU}")); + printDebugMessage("Running program with %d threads %n", nThreads); + Job job = new Job(nThreads - 1); + job.compute(FILE); + return job.sort(); + } + + public static class Job { + private final int nThreads; + private final State[] threadLocalStates; + private final Map globalMap = new ConcurrentHashMap<>(DEFAULT_HASH_TBL_SIZE); + private final ExecutorService executorService; + + public Job(final int nThreads) { + this.threadLocalStates = new State[(nThreads << 4)]; + IntStream.range(0, nThreads << 4) + .forEach(i -> threadLocalStates[i] = new State()); + this.nThreads = nThreads; + this.executorService = Executors.newFixedThreadPool(nThreads); + } + + public void compute(final String path) throws Exception { + // Create a random access file so that we can map the contents of the file into native memory for faster access + try (RandomAccessFile file = new RandomAccessFile(path, "r")) { + // Create a memory segment for the entire file + MemorySegment globalSegment = file.getChannel().map( + FileChannel.MapMode.READ_ONLY, 0, file.length(), Arena.global()); + long fileLength = file.length(); + // Ensure that the split length never exceeds Integer.MAX_VALUE. This is because ByteBuffers cannot + // be larger than 2 GiB. + int splitLength = (int) Math.min(Integer.MAX_VALUE, Math.max(PAGE_SIZE, Math.rint(fileLength * 1.0 / nThreads))); + printDebugMessage("fileLength = %d, splitLength = %d%n", file.length(), splitLength); + long time1 = System.nanoTime(); + // Break the file into multiple splits. One thread would process one split. + // This routine makes sure that the splits are uniformly sized to the best extent possible. + // Each split would either end with a '\n' character or EOF + List splits = breakFileIntoSplits(file, splitLength, PAGE_SIZE, globalSegment, false); + printDebugMessage("Number of splits = %d, splits = [%s]%n", splits.size(), splits); + printDebugMessage("Splits calculation took %d ns%n", System.nanoTime() - time1); + // consume splits in parallel using the common fork join pool + long time = System.nanoTime(); + List> futures = new ArrayList<>(splits.size() * 2); + splits + .forEach(split -> { + // process splits concurrently using a thread pool + futures.add(executorService.submit(() -> { + MemorySegment splitSegment = globalSegment.asSlice(split.offset, split.length); + splitSegment.load(); + int tid = (int) Thread.currentThread().threadId(); + byte[] currentPage = new byte[PAGE_SIZE + MAX_STR_LEN]; + // iterate over each page in split + for (Page page : split.pages) { + // this byte buffer should end with '\n' or EOF + MemorySegment segment = globalSegment.asSlice(page.offset, page.length); + MemorySegment.copy(segment, ValueLayout.JAVA_BYTE, 0L, currentPage, 0, (int) page.length); + SearchResult searchResult = findNewLinesVectorized(currentPage, (int) page.length); + int prevOffset = 0; + int j = 0; + // iterate over search results + while (j < searchResult.len) { + int curOffset = searchResult.offsets[j]; + byte ch1 = currentPage[curOffset - 4]; + byte ch2 = currentPage[curOffset - 5]; + int temperatureLen = 5; + if (ch1 == ';') { + temperatureLen = 3; + } + else if (ch2 == ';') { + temperatureLen = 4; + } + int lineLength = curOffset - prevOffset; + int stationLen = lineLength - temperatureLen - 1; + byte[] station = new byte[stationLen]; + System.arraycopy(currentPage, prevOffset, station, 0, stationLen); + int hashcode = Arrays.hashCode(station); + double temperature = NumberUtils.parseDouble2(currentPage, prevOffset + stationLen + 1, temperatureLen); + Measurement m = new Measurement(station, temperature, hashcode); + threadLocalStates[tid].update(m); + prevOffset = curOffset + 1; + j++; + } + // Explicitly commented out because unload seems to take a lot of time + // segment.unload(); + } + mergeInternal(threadLocalStates[tid]); + })); + }); + for (Future future : futures) { + future.get(); + } + printDebugMessage("Aggregate took %d ns%n", (System.nanoTime() - time)); + } + } + + private void mergeInternal(final State state) { + state.state.forEach((k, v) -> { + globalMap.compute(k.toString(), (ignored, agg) -> { + if (agg == null) { + agg = v; + } + else { + agg.merge(v); + } + return agg; + }); + }); + } + + public Map sort() { + long time = System.nanoTime(); + Map sortedMap = new TreeMap<>(globalMap); + printDebugMessage("Tree map construction took %d ns%n", (System.nanoTime() - time)); + return sortedMap; + } + + private static LineMetadata findNextOccurrenceOfNewLine(final ByteBuffer buffer, + final int capacity, + final int offset) { + int maxLen = capacity - offset; + byte[] src = new byte[Math.min(MAX_STR_LEN, maxLen)]; + byte[] station = new byte[src.length]; + byte[] temperature = new byte[5]; + buffer.position(offset); + buffer.get(src); + int i = 0; + int j = 0; + int k = 0; + boolean isAscii = true; + boolean afterDelim = false; + int hashCode = 0; + for (; i < src.length; i++) { + byte b = src[i]; + if (b < 0) { + isAscii = false; + } + if (!afterDelim && b != '\n') { + if (b == ';') { + afterDelim = true; + } + else { + hashCode = hashCode * 31 + b; + station[j++] = b; + } + } + else if (b != '\n') { + temperature[k++] = b; + } + else { + return new LineMetadata( + station, temperature, j, k, offset + i + 1, hashCode, isAscii); + } + } + if (i == 0 & j == 0 && k == 0) { + hashCode = -1; + } + return new LineMetadata( + station, temperature, j, k, offset + i, hashCode, isAscii); + } + + private static SearchResult findNewLinesVectorized(final byte[] page, + final int pageLen) { + SearchResult ret = new SearchResult(new int[pageLen / 5], 0); + VectorSpecies species = PREFERRED_SPECIES; + int loopBound = pageLen - species.length() * 4; + int i = 0; + int j = 0; + while (j < loopBound) { + Vector v1 = ByteVector.fromArray(species, page, j); + Vector v2 = ByteVector.fromArray(species, page, j + species.length()); + Vector v3 = ByteVector.fromArray(species, page, j + species.length() * 2); + Vector v4 = ByteVector.fromArray(species, page, j + species.length() * 3); + long l1 = NEW_LINE_VEC.eq(v1).toLong(); + long l2 = NEW_LINE_VEC.eq(v2).toLong(); + long l3 = NEW_LINE_VEC.eq(v3).toLong(); + long l4 = NEW_LINE_VEC.eq(v4).toLong(); + long r1 = l1 & 0xFFFFFFFFL | (l2 << species.length()); + long r2 = l3 & 0xFFFFFFFFL | (l4 << (species.length())); + int b1 = Long.bitCount(r1); + int b2 = Long.bitCount(r2); + int k = i; + int it = b1; + while (it > 0) { + int idx = Long.numberOfTrailingZeros(r1); + ret.offsets[k++] = j + idx; + r1 &= (r1 - 1); + it--; + idx = Long.numberOfTrailingZeros(r1); + ret.offsets[k++] = j + idx; + r1 &= (r1 - 1); + it--; + idx = Long.numberOfTrailingZeros(r1); + ret.offsets[k++] = j + idx; + r1 &= (r1 - 1); + it--; + idx = Long.numberOfTrailingZeros(r1); + ret.offsets[k++] = j + idx; + r1 &= (r1 - 1); + it--; + idx = Long.numberOfTrailingZeros(r1); + ret.offsets[k++] = j + idx; + r1 &= (r1 - 1); + it--; + idx = Long.numberOfTrailingZeros(r1); + ret.offsets[k++] = j + idx; + r1 &= (r1 - 1); + it--; + } + i += b1; + j += species.length() * 2; + k = i; + it = b2; + while (it > 0) { + int idx = Long.numberOfTrailingZeros(r2); + ret.offsets[k++] = j + idx; + r2 &= (r2 - 1); + it--; + idx = Long.numberOfTrailingZeros(r2); + ret.offsets[k++] = j + idx; + r2 &= (r2 - 1); + it--; + idx = Long.numberOfTrailingZeros(r2); + ret.offsets[k++] = j + idx; + r2 &= (r2 - 1); + it--; + idx = Long.numberOfTrailingZeros(r2); + ret.offsets[k++] = j + idx; + r2 &= (r2 - 1); + it--; + idx = Long.numberOfTrailingZeros(r2); + ret.offsets[k++] = j + idx; + r2 &= (r2 - 1); + it--; + idx = Long.numberOfTrailingZeros(r2); + ret.offsets[k++] = j + idx; + r2 &= (r2 - 1); + it--; + } + i += b2; + j += species.length() * 2; + } + + // tail loop + while (j < pageLen) { + byte b = page[j]; + if (b == '\n') { + ret.offsets[i++] = j; + } + j++; + } + ret.len = i; + return ret; + } + + private static List breakFileIntoSplits(final RandomAccessFile file, + final int splitLength, + final int pageLength, + final MemorySegment memorySegment, + final boolean enableChecks) + throws IOException { + final List splits = new ArrayList<>(); + // Try to break the file into multiple splits while ensuring that each split has at least splitLength bytes + // and ends with '\n' or EOF + for (long i = 0; i < file.length();) { + long splitStartOffset = i; + long splitEndOffset = Math.min(file.length(), splitStartOffset + splitLength); // not inclusive + if (splitEndOffset == file.length()) { // reached EOF + List pages = breakSplitIntoPages(splitStartOffset, splitEndOffset, pageLength, memorySegment, enableChecks); + splits.add(new Split(splitStartOffset, splitEndOffset - splitStartOffset, pages)); + break; + } + // Look past the end offset to find next '\n' or EOF + long segmentLength = Math.min(MAX_STR_LEN, file.length() - i); + // Create a new memory segment for reading contents beyond splitEndOffset + MemorySegment lookahead = memorySegment.asSlice(splitEndOffset, segmentLength); + ByteBuffer bb = lookahead.asByteBuffer(); + // Find the next offset which has either '\n' or EOF + LineMetadata lineMetadata = findNextOccurrenceOfNewLine(bb, (int) segmentLength, 0); + splitEndOffset += lineMetadata.offset; + if (enableChecks && + memorySegment.asSlice(splitEndOffset - 1, 1).asByteBuffer().get(0) != '\n') { + throw new IllegalStateException("Page doesn't end with NL char"); + } + // Break the split further into multiple pages based on pageLength + List pages = breakSplitIntoPages(splitStartOffset, splitEndOffset, pageLength, memorySegment, enableChecks); + splits.add(new Split(splitStartOffset, splitEndOffset - splitStartOffset, pages)); + i = splitEndOffset; + lookahead.unload(); + } + return splits; + } + + private static List breakSplitIntoPages(final long splitStartOffset, + final long splitEndOffset, + final int pageLength, + final MemorySegment memorySegment, + final boolean enableChecks) { + List pages = new ArrayList<>(); + for (long i = splitStartOffset; i < splitEndOffset;) { + long pageStartOffset = i; + long pageEndOffset = Math.min(splitEndOffset, pageStartOffset + pageLength); // not inclusive + if (pageEndOffset == splitEndOffset) { + pages.add(new Page(pageStartOffset, pageEndOffset - pageStartOffset)); + break; + } + // Look past the end offset to find next '\n' till we reach the end of split + long lookaheadLength = Math.min(MAX_STR_LEN, splitEndOffset - i); + MemorySegment lookahead = memorySegment.asSlice(pageEndOffset, lookaheadLength); + ByteBuffer bb = lookahead.asByteBuffer(); + // Find next offset which has either '\n' or the end of split + LineMetadata lineMetadata = findNextOccurrenceOfNewLine(bb, (int) lookaheadLength, 0); + pageEndOffset += lineMetadata.offset; + if (enableChecks && + memorySegment.asSlice(pageEndOffset - 1, 1).asByteBuffer().get(0) != '\n') { + throw new IllegalStateException("Page doesn't end with NL char"); + } + pages.add(new Page(pageStartOffset, pageEndOffset - pageStartOffset)); + i = pageEndOffset; + lookahead.unload(); + } + return pages; + } + } + + public static class State { + private final Map state; + + public State() { + this.state = new HashMap<>(DEFAULT_HASH_TBL_SIZE); + // insert a DUMMY key to prime the hashmap for usage + AggregationKey dummy = new AggregationKey("DUMMY".getBytes(UTF_8), -1); + this.state.put(dummy, null); + this.state.remove(dummy); + } + + public void update(final Measurement m) { + MeasurementAggregator agg = state.get(m.aggregationKey); + if (agg == null) { + state.put(m.aggregationKey, new MeasurementAggregator(m.temperature, m.temperature, m.temperature, 1L)); + return; + } + agg.count++; + agg.min = m.temperature <= agg.min ? m.temperature : agg.min; + agg.max = m.temperature >= agg.max ? m.temperature : agg.max; + agg.sum += m.temperature; + } + + public static class AggregationKey { + private final byte[] station; + private final int hashCode; + + public AggregationKey(final byte[] station, + final int hashCode) { + this.station = station; + this.hashCode = hashCode; + } + + @Override + public String toString() { + return new String(station, UTF_8); + } + + @Override + public int hashCode() { + return hashCode; + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof AggregationKey)) { + return false; + } + AggregationKey sk = (AggregationKey) other; + return station.length == sk.station.length && Arrays.mismatch(station, sk.station) < 0; + } + } + } + + public static class MeasurementAggregator { + private double min; + private double max; + private double sum; + private long count; + + public MeasurementAggregator(final double min, + final double max, + final double sum, + final long count) { + this.min = min; + this.max = max; + this.sum = sum; + this.count = count; + } + + public String toString() { + double min1 = round(min); + double max1 = round(max); + double mean = round(round(sum) / count); + return min1 + "/" + mean + "/" + max1; + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + private void merge(final MeasurementAggregator m2) { + count += m2.count; + min = Math.min(min, m2.min); + max = Math.max(max, m2.max); + sum += m2.sum; + } + } + + public static class NumberUtils { + public static int toDigit(final char c) { + return DIGIT_LOOKUP[c]; + } + + public static int fastMul10(final int i) { + return (i << 1) + (i << 3); + } + + public static double parseDouble2(final byte[] b, + final int offset, + final int len) { + try { + char ch0 = (char) b[offset]; + char ch1 = (char) b[offset + 1]; + char ch2 = (char) b[offset + 2]; + char ch3 = len > 3 ? (char) b[offset + 3] : ' '; + char ch4 = len > 4 ? (char) b[offset + 4] : ' '; + if (len == 3) { + int decimal = toDigit(ch0); + double fractional = DOUBLES[toDigit(ch2)]; + return decimal + fractional; + } + else if (len == 4) { + // -1.2 or 11.2 + int decimal = (ch0 == '-' ? toDigit(ch1) : (fastMul10(toDigit(ch0)) + toDigit(ch1))); + double fractional = DOUBLES[toDigit(ch3)]; + if (ch0 == '-') { + return Math.negateExact(decimal) - fractional; + } + else { + return decimal + fractional; + } + } + else { + int decimal = fastMul10(toDigit(ch1)) + toDigit(ch2); + double fractional = DOUBLES[toDigit(ch4)]; + return Math.negateExact(decimal) - fractional; + } + } + catch (ArrayIndexOutOfBoundsException e) { + printDebugMessage("Array index out of bounds for string: %s%n", new String(b, 0, len)); + throw new RuntimeException(e); + } + catch (StringIndexOutOfBoundsException e) { + printDebugMessage("String index out of bounds for string: %s%n", new String(b, 0, len)); + throw new RuntimeException(e); + } + } + } + + // record classes + record Measurement(byte[] station, + double temperature, + int hash, + State.AggregationKey aggregationKey) { + + public Measurement(byte[] station, + double temperature, + int hashCode) { + this(station, + temperature, + hashCode, + new State.AggregationKey(station, hashCode)); + } + + } + + record LineMetadata(byte[] station, + byte[] temperature, + int stationLen, + int temperatureLen, + int offset, + int precomputedHashCode, boolean isAscii) { + } + + record Split(long offset, long length, List pages) { + } + + record Page(long offset, long length) { + } + + public static class SearchResult { + private int[] offsets; + private int len; + + public SearchResult(final int[] offsets, + final int len) { + this.offsets = offsets; + this.len = len; + } + } + + private static void printDebugMessage(final String message, + final Object... args) { + if (DEBUG) { + System.err.printf(message, args); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_gonix.java b/src/main/java/dev/morling/onebrc/CalculateAverage_gonix.java new file mode 100644 index 000000000..cbc1127ae --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_gonix.java @@ -0,0 +1,433 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.TreeMap; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class CalculateAverage_gonix { + + private static final String FILE = "./measurements.txt"; + + public static void main(String[] args) throws IOException { + + var file = new RandomAccessFile(FILE, "r"); + + var res = buildChunks(file).stream().parallel() + .flatMap(chunk -> new Aggregator().processChunk(chunk).stream()) + .collect(Collectors.toMap( + Aggregator.Entry::getKey, + Aggregator.Entry::getValue, + Aggregator.Entry::add, + TreeMap::new)); + + System.out.println(res); + System.out.close(); + } + + private static List buildChunks(RandomAccessFile file) throws IOException { + var fileSize = file.length(); + var chunkSize = Math.min(Integer.MAX_VALUE - 512, fileSize / Runtime.getRuntime().availableProcessors()); + if (chunkSize <= 0) { + chunkSize = fileSize; + } + var chunks = new ArrayList((int) (fileSize / chunkSize) + 1); + var start = 0L; + while (start < fileSize) { + var pos = start + chunkSize; + if (pos < fileSize) { + file.seek(pos); + while (file.read() != '\n') { + pos += 1; + } + pos += 1; + } + else { + pos = fileSize; + } + var buf = file.getChannel().map(FileChannel.MapMode.READ_ONLY, start, pos - start); + buf.order(ByteOrder.nativeOrder()); + chunks.add(buf); + start = pos; + } + return chunks; + } + + private static class Aggregator { + private static final int MAX_STATIONS = 10_000; + private static final int MAX_STATION_SIZE = Math.ceilDiv(100, 8) + 5; + private static final int INDEX_SIZE = 1024 * 1024; + private static final int INDEX_MASK = INDEX_SIZE - 1; + private static final int FLD_COUNT = 0; + private static final int FLD_SUM = 1; + private static final int FLD_MIN = 2; + private static final int FLD_MAX = 3; + + // Poor man's hash map: hash code to offset in `mem`. + private final int[] index; + + // Contiguous storage of key (station name) and stats fields of all + // unique stations. + // The idea here is to improve locality so that stats fields would + // possibly be already in the CPU cache after we are done comparing + // the key. + private final long[] mem; + private int memUsed; + + Aggregator() { + assert ((INDEX_SIZE & (INDEX_SIZE - 1)) == 0) : "INDEX_SIZE must be power of 2"; + assert (INDEX_SIZE > MAX_STATIONS) : "INDEX_SIZE must be greater than MAX_STATIONS"; + + index = new int[INDEX_SIZE]; + mem = new long[1 + (MAX_STATIONS * MAX_STATION_SIZE)]; + memUsed = 1; + } + + Aggregator processChunk(MappedByteBuffer buf) { + // To avoid checking if it is safe to read a whole long near the + // end of a chunk, we copy last couple of lines to a padded buffer + // and process that part separately. + int limit = buf.limit(); + int pos = Math.max(limit - 16, -1); + while (pos >= 0 && buf.get(pos) != '\n') { + pos--; + } + pos++; + if (pos > 0) { + processChunkLongs(buf, pos); + } + int tailLen = limit - pos; + var tailBuf = ByteBuffer.allocate(tailLen + 8).order(ByteOrder.nativeOrder()); + buf.get(pos, tailBuf.array(), 0, tailLen); + processChunkLongs(tailBuf, tailLen); + return this; + } + + Aggregator processChunkLongs(ByteBuffer buf, int limit) { + int pos = 0; + while (pos < limit) { + + int start = pos; + long keyLong = buf.getLong(pos); + long valueSepMark = valueSepMark(keyLong); + if (valueSepMark != 0) { + int tailBits = tailBits(valueSepMark); + pos += valueOffset(tailBits); + // assert (UNSAFE.getByte(pos - 1) == ';') : "Expected ';' (1), pos=" + (pos - startAddr); + long tailAndLen = tailAndLen(tailBits, keyLong, pos - start - 1); + + long valueLong = buf.getLong(pos); + int decimalSepMark = decimalSepMark(valueLong); + pos += nextKeyOffset(decimalSepMark); + // assert (UNSAFE.getByte(pos - 1) == '\n') : "Expected '\\n' (1), pos=" + (pos - startAddr); + int measurement = decimalValue(decimalSepMark, valueLong); + + add1(buf, start, tailAndLen, hash(hash1(tailAndLen)), measurement); + continue; + } + + pos += 8; + long keyLong1 = keyLong; + keyLong = buf.getLong(pos); + valueSepMark = valueSepMark(keyLong); + if (valueSepMark != 0) { + int tailBits = tailBits(valueSepMark); + pos += valueOffset(tailBits); + // assert (UNSAFE.getByte(pos - 1) == ';') : "Expected ';' (2), pos=" + (pos - startAddr); + long tailAndLen = tailAndLen(tailBits, keyLong, pos - start - 1); + + long valueLong = buf.getLong(pos); + int decimalSepMark = decimalSepMark(valueLong); + pos += nextKeyOffset(decimalSepMark); + // assert (UNSAFE.getByte(pos - 1) == '\n') : "Expected '\\n' (2), pos=" + (pos - startAddr); + int measurement = decimalValue(decimalSepMark, valueLong); + + add2(buf, start, keyLong1, tailAndLen, hash(hash(hash1(keyLong1), tailAndLen)), measurement); + continue; + } + + long hash = hash1(keyLong1); + do { + pos += 8; + hash = hash(hash, keyLong); + keyLong = buf.getLong(pos); + valueSepMark = valueSepMark(keyLong); + } while (valueSepMark == 0); + int tailBits = tailBits(valueSepMark); + pos += valueOffset(tailBits); + // assert (UNSAFE.getByte(pos - 1) == ';') : "Expected ';' (N), pos=" + (pos - startAddr); + long tailAndLen = tailAndLen(tailBits, keyLong, pos - start - 1); + hash = hash(hash, tailAndLen); + + long valueLong = buf.getLong(pos); + int decimalSepMark = decimalSepMark(valueLong); + pos += nextKeyOffset(decimalSepMark); + // assert (UNSAFE.getByte(pos - 1) == '\n') : "Expected '\\n' (N), pos=" + (pos - startAddr); + int measurement = decimalValue(decimalSepMark, valueLong); + + addN(buf, start, tailAndLen, hash(hash), measurement); + } + + return this; + } + + public Stream stream() { + return Arrays.stream(index) + .filter(offset -> offset != 0) + .mapToObj(offset -> new Entry(mem, offset)); + } + + private static long hash1(long value) { + return value; + } + + private static long hash(long hash, long value) { + return hash ^ value; + } + + private static int hash(long hash) { + hash *= 0x9E3779B97F4A7C15L; // Fibonacci hashing multiplier + return (int) (hash >>> 39); + } + + private static long valueSepMark(long keyLong) { + // Seen this trick used in multiple other solutions. + // Nice breakdown here: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord + long match = keyLong ^ 0x3B3B3B3B_3B3B3B3BL; // 3B == ';' + match = (match - 0x01010101_01010101L) & (~match & 0x80808080_80808080L); + return match; + } + + private static int tailBits(long valueSepMark) { + return Long.numberOfTrailingZeros(valueSepMark >>> 7); + } + + private static int valueOffset(int tailBits) { + return (int) (tailBits >>> 3) + 1; + } + + private static long tailAndLen(int tailBits, long keyLong, long keyLen) { + long tailMask = ~(-1L << tailBits); + long tail = keyLong & tailMask; + return (tail << 8) | ((keyLen >> 3) & 0xFF); + } + + private static int decimalSepMark(long value) { + // Seen this trick used in multiple other solutions. + // Looks like the original author is @merykitty. + + // The 4th binary digit of the ascii of a digit is 1 while + // that of the '.' is 0. This finds the decimal separator + // The value can be 12, 20, 28 + return Long.numberOfTrailingZeros(~value & 0x10101000); + } + + private static int decimalValue(int decimalSepMark, long value) { + // Seen this trick used in multiple other solutions. + // Looks like the original author is @merykitty. + + int shift = 28 - decimalSepMark; + // signed is -1 if negative, 0 otherwise + long signed = (~value << 59) >> 63; + long designMask = ~(signed & 0xFF); + // Align the number to a specific position and transform the ascii code + // to actual digit value in each byte + long digits = ((value & designMask) << shift) & 0x0F000F0F00L; + + // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) + // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = + // 0x000000UU00TTHH00 + + // 0x00UU00TTHH000000 * 10 + + // 0xUU00TTHH00000000 * 100 + // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400 + // This results in our value lies in the bit 32 to 41 of this product + // That was close :) + long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; + return (int) ((absValue ^ signed) - signed); + } + + private static int nextKeyOffset(int decimalSepMark) { + return (decimalSepMark >>> 3) + 3; + } + + private void add1(ByteBuffer buf, int start, long tailAndLen, int hash, int measurement) { + int idx = hash & INDEX_MASK; + for (; index[idx] != 0; idx = (idx + 1) & INDEX_MASK) { + if (update1(index[idx], tailAndLen, measurement)) { + return; + } + } + index[idx] = create(buf, start, tailAndLen, measurement); + } + + private void add2(ByteBuffer buf, int start, long keyLong, long tailAndLen, int hash, int measurement) { + int idx = hash & INDEX_MASK; + for (; index[idx] != 0; idx = (idx + 1) & INDEX_MASK) { + if (update2(index[idx], keyLong, tailAndLen, measurement)) { + return; + } + } + index[idx] = create(buf, start, tailAndLen, measurement); + } + + private void addN(ByteBuffer buf, int start, long tailAndLen, int hash, int measurement) { + int idx = hash & INDEX_MASK; + for (; index[idx] != 0; idx = (idx + 1) & INDEX_MASK) { + if (updateN(index[idx], buf, start, tailAndLen, measurement)) { + return; + } + } + index[idx] = create(buf, start, tailAndLen, measurement); + } + + private int create(ByteBuffer buf, int start, long tailAndLen, int measurement) { + int offset = memUsed; + + mem[offset] = tailAndLen; + + int memPos = offset + 1; + int memEnd = memPos + (int) (tailAndLen & 0xFF); + int bufPos = start; + while (memPos < memEnd) { + mem[memPos] = buf.getLong(bufPos); + memPos += 1; + bufPos += 8; + } + + mem[memPos + FLD_MIN] = measurement; + mem[memPos + FLD_MAX] = measurement; + mem[memPos + FLD_SUM] = measurement; + mem[memPos + FLD_COUNT] = 1; + memUsed = memPos + 4; + + return offset; + } + + private boolean update1(int offset, long tailAndLen, int measurement) { + if (mem[offset] != tailAndLen) { + return false; + } + updateStats(offset + 1, measurement); + return true; + } + + private boolean update2(int offset, long keyLong, long tailAndLen, int measurement) { + if (mem[offset] != tailAndLen || mem[offset + 1] != keyLong) { + return false; + } + updateStats(offset + 2, measurement); + return true; + } + + private boolean updateN(int offset, ByteBuffer buf, int start, long tailAndLen, int measurement) { + var mem = this.mem; + if (mem[offset] != tailAndLen) { + return false; + } + int memPos = offset + 1; + int memEnd = memPos + (int) (tailAndLen & 0xFF); + int bufPos = start; + while (memPos < memEnd) { + if (mem[memPos] != buf.getLong(bufPos)) { + return false; + } + memPos += 1; + bufPos += 8; + } + updateStats(memPos, measurement); + return true; + } + + private void updateStats(int memPos, int measurement) { + mem[memPos + FLD_COUNT] += 1; + mem[memPos + FLD_SUM] += measurement; + if (measurement < mem[memPos + FLD_MIN]) { + mem[memPos + FLD_MIN] = measurement; + } + if (measurement > mem[memPos + FLD_MAX]) { + mem[memPos + FLD_MAX] = measurement; + } + } + + public static class Entry { + private final long[] mem; + private final int offset; + private String key; + + Entry(long[] mem, int offset) { + this.mem = mem; + this.offset = offset; + } + + public String getKey() { + if (key == null) { + int pos = this.offset; + long tailAndLen = mem[pos++]; + int keyLen = (int) (tailAndLen & 0xFF); + var tmpBuf = ByteBuffer.allocate((keyLen << 3) + 8).order(ByteOrder.nativeOrder()); + for (int i = 0; i < keyLen; i++) { + tmpBuf.putLong(mem[pos++]); + } + long tail = tailAndLen >>> 8; + tmpBuf.putLong(tail); + int keyLenBytes = (keyLen << 3) + 8 - (Long.numberOfLeadingZeros(tail) >> 3); + key = new String(tmpBuf.array(), 0, keyLenBytes, StandardCharsets.UTF_8); + } + return key; + } + + public Entry add(Entry other) { + int fldOffset = (int) (mem[offset] & 0xFF) + 1; + int pos = offset + fldOffset; + int otherPos = other.offset + fldOffset; + long[] otherMem = other.mem; + mem[pos + FLD_MIN] = Math.min((int) mem[pos + FLD_MIN], (int) otherMem[otherPos + FLD_MIN]); + mem[pos + FLD_MAX] = Math.max((int) mem[pos + FLD_MAX], (int) otherMem[otherPos + FLD_MAX]); + mem[pos + FLD_SUM] += otherMem[otherPos + FLD_SUM]; + mem[pos + FLD_COUNT] += otherMem[otherPos + FLD_COUNT]; + return this; + } + + public Entry getValue() { + return this; + } + + @Override + public String toString() { + int pos = offset + (int) (mem[offset] & 0xFF) + 1; + return round(mem[pos + FLD_MIN]) + + "/" + round(((double) mem[pos + FLD_SUM]) / mem[pos + FLD_COUNT]) + + "/" + round(mem[pos + FLD_MAX]); + } + + private static double round(double value) { + return Math.round(value) / 10.0; + } + } + } + +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_gonixunsafe.java b/src/main/java/dev/morling/onebrc/CalculateAverage_gonixunsafe.java new file mode 100644 index 000000000..bf75389a9 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_gonixunsafe.java @@ -0,0 +1,553 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.reflect.Field; +import java.nio.channels.FileChannel; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; + +import sun.misc.Unsafe; + +public class CalculateAverage_gonixunsafe { + + private static final String FILE = "./measurements.txt"; + private static final int MAX_THREADS = Runtime.getRuntime().availableProcessors(); + + public static void main(String[] args) throws Exception { + + var file = new RandomAccessFile(FILE, "r"); + + var chunks = Aggregator.buildChunks(file, MAX_THREADS); + var chunksCount = chunks.size(); + var threads = new Thread[chunksCount]; + var result = new AtomicReference(); + for (int i = 0; i < chunksCount; ++i) { + var agg = new Aggregator(); + var chunk = chunks.get(i); + var thread = new Thread(() -> { + agg.processChunk(chunk); + while (!result.compareAndSet(null, agg)) { + Aggregator other = result.getAndSet(null); + if (other != null) { + agg.merge(other); + } + } + }); + thread.start(); + threads[i] = thread; + } + for (int i = 0; i < chunksCount; ++i) { + threads[i].join(); + } + System.out.println(result.get().toString()); + System.out.close(); + } + + private static class Aggregator { + private static final int MAX_STATIONS = 10_000; + private static final int INDEX_SIZE = 256 * 1024 * 8; + private static final int INDEX_MASK = (INDEX_SIZE - 1) & ~7; + + private static final int HEADER_SIZE = 8; + private static final int MAX_KEY_SIZE = 100; + private static final int FLD_COUNT = 0; // long + private static final int FLD_SUM = 8; // long + private static final int FLD_MIN = 16; // int + private static final int FLD_MAX = 20; // int + private static final int FLD_HASH = 24; // int + private static final int FIELDS_SIZE = 28 + 4; // +padding to align to 8 bytes + private static final int MAX_STATION_SIZE = HEADER_SIZE + MAX_KEY_SIZE + FIELDS_SIZE; + + private static final Unsafe UNSAFE; + + static { + try { + Field unsafe = Unsafe.class.getDeclaredField("theUnsafe"); + unsafe.setAccessible(true); + UNSAFE = (Unsafe) unsafe.get(Unsafe.class); + } + catch (Throwable e) { + throw new RuntimeException(e); + } + } + + private static long alloc(long size) { + long addr = UNSAFE.allocateMemory(size); + UNSAFE.setMemory(addr, size, (byte) 0); + return addr; + } + + // Poor man's hash map: hash code to offset in `mem`. + private final long indexAddr = alloc(INDEX_SIZE); + + // Contiguous storage of key (station name) and stats fields of all + // unique stations. + // The idea here is to improve locality so that stats fields would + // possibly be already in the CPU cache after we are done comparing + // the key. + private final long memAddr = alloc(MAX_STATIONS * MAX_STATION_SIZE); + private long memUsed = memAddr; + private int count = 0; + + static List buildChunks(RandomAccessFile file, int count) throws IOException { + var fileSize = file.length(); + var chunkSize = Math.min(Integer.MAX_VALUE - 512, fileSize / count); + if (chunkSize <= 0) { + chunkSize = fileSize; + } + var chunks = new ArrayList((int) (fileSize / chunkSize) + 1); + var mmap = file.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()); + var fileStartAddr = mmap.address(); + var fileEndAddr = mmap.address() + mmap.byteSize(); + var chunkStartAddr = fileStartAddr; + while (chunkStartAddr < fileEndAddr) { + var pos = chunkStartAddr + chunkSize; + if (pos < fileEndAddr) { + while (UNSAFE.getByte(pos) != '\n') { + pos += 1; + } + pos += 1; + } + else { + pos = fileEndAddr; + } + chunks.add(new Chunk(mmap, chunkStartAddr, pos, fileStartAddr, fileEndAddr)); + chunkStartAddr = pos; + } + return chunks; + } + + Aggregator processChunk(Chunk chunk) { + // As an optimization, we assume that we can read past the end + // of file size if as we don't cross page boundary. + final int WANT_PADDING = 8; + final int PAGE_SIZE = UNSAFE.pageSize(); + if (((chunk.chunkEndAddr + WANT_PADDING) / PAGE_SIZE) <= (chunk.fileEndAddr / PAGE_SIZE)) { + return processChunk(chunk.chunkStartAddr, chunk.chunkEndAddr); + } + + // Otherwise, to avoid checking if it is safe to read a whole long + // near the end of a chunk, we copy the last couple of lines to a + // padded buffer and process that part separately. + long pos = Math.max(-1, chunk.chunkEndAddr - WANT_PADDING - 1); + while (pos >= 0 && UNSAFE.getByte(pos) != '\n') { + pos--; + } + pos++; + if (pos > 0) { + processChunk(chunk.chunkStartAddr, pos); + } + long tailLen = chunk.chunkEndAddr - pos; + var tailAddr = alloc(tailLen + WANT_PADDING); + UNSAFE.copyMemory(pos, tailAddr, tailLen); + processChunk(tailAddr, tailAddr + tailLen); + return this; + } + + private Aggregator processChunk(long startAddr, long endAddr) { + long pos = startAddr; + while (pos < endAddr) { + + long start = pos; + long keyLong = UNSAFE.getLong(pos); + long valueSepMark = valueSepMark(keyLong); + if (valueSepMark != 0) { + int tailBits = tailBits(valueSepMark); + pos += valueOffset(tailBits); + // assert (UNSAFE.getByte(pos - 1) == ';') : "Expected ';' (1), pos=" + (pos - startAddr); + long tailAndLen = tailAndLen(tailBits, keyLong, pos - start - 1); + + long valueLong = UNSAFE.getLong(pos); + int decimalSepMark = decimalSepMark(valueLong); + pos += nextKeyOffset(decimalSepMark); + // assert (UNSAFE.getByte(pos - 1) == '\n') : "Expected '\\n' (1), pos=" + (pos - startAddr); + int measurement = decimalValue(decimalSepMark, valueLong); + + add1(start, tailAndLen, hash(hash1(tailAndLen)), measurement); + continue; + } + + pos += 8; + long keyLong1 = keyLong; + keyLong = UNSAFE.getLong(pos); + valueSepMark = valueSepMark(keyLong); + if (valueSepMark != 0) { + int tailBits = tailBits(valueSepMark); + pos += valueOffset(tailBits); + // assert (UNSAFE.getByte(pos - 1) == ';') : "Expected ';' (2), pos=" + (pos - startAddr); + long tailAndLen = tailAndLen(tailBits, keyLong, pos - start - 1); + + long valueLong = UNSAFE.getLong(pos); + int decimalSepMark = decimalSepMark(valueLong); + pos += nextKeyOffset(decimalSepMark); + // assert (UNSAFE.getByte(pos - 1) == '\n') : "Expected '\\n' (2), pos=" + (pos - startAddr); + int measurement = decimalValue(decimalSepMark, valueLong); + + add2(start, keyLong1, tailAndLen, hash(hash(hash1(keyLong1), tailAndLen)), measurement); + continue; + } + + long hash = hash1(keyLong1); + do { + pos += 8; + hash = hash(hash, keyLong); + keyLong = UNSAFE.getLong(pos); + valueSepMark = valueSepMark(keyLong); + } while (valueSepMark == 0); + int tailBits = tailBits(valueSepMark); + pos += valueOffset(tailBits); + // assert (UNSAFE.getByte(pos - 1) == ';') : "Expected ';' (N), pos=" + (pos - startAddr); + long tailAndLen = tailAndLen(tailBits, keyLong, pos - start - 1); + hash = hash(hash, tailAndLen); + + long valueLong = UNSAFE.getLong(pos); + int decimalSepMark = decimalSepMark(valueLong); + pos += nextKeyOffset(decimalSepMark); + // assert (UNSAFE.getByte(pos - 1) == '\n') : "Expected '\\n' (N), pos=" + (pos - startAddr); + int measurement = decimalValue(decimalSepMark, valueLong); + + addN(start, tailAndLen, hash(hash), measurement); + } + + return this; + } + + private static long hash1(long value) { + return value; + } + + private static long hash(long hash, long value) { + return hash ^ value; + } + + private static int hash(long hash) { + hash *= 0x9E3779B97F4A7C15L; // Fibonacci hashing multiplier + return (int) (hash >>> 39); + } + + private static long valueSepMark(long keyLong) { + // Seen this trick used in multiple other solutions. + // Nice breakdown here: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord + long match = keyLong ^ 0x3B3B3B3B_3B3B3B3BL; // 3B == ';' + match = (match - 0x01010101_01010101L) & (~match & 0x80808080_80808080L); + return match; + } + + private static int tailBits(long valueSepMark) { + return Long.numberOfTrailingZeros(valueSepMark >>> 7); + } + + private static int valueOffset(int tailBits) { + return (int) (tailBits >>> 3) + 1; + } + + private static long tailAndLen(int tailBits, long keyLong, long keyLen) { + long tailMask = ~(-1L << tailBits); + long tail = keyLong & tailMask; + return (tail << 8) | (keyLen & 0xFF); + } + + private static int decimalSepMark(long value) { + // Seen this trick used in multiple other solutions. + // Looks like the original author is @merykitty. + + // The 4th binary digit of the ascii of a digit is 1 while + // that of the '.' is 0. This finds the decimal separator + // The value can be 12, 20, 28 + return Long.numberOfTrailingZeros(~value & 0x10101000); + } + + private static int decimalValue(int decimalSepMark, long value) { + // Seen this trick used in multiple other solutions. + // Looks like the original author is @merykitty. + + int shift = 28 - decimalSepMark; + // signed is -1 if negative, 0 otherwise + long signed = (~value << 59) >> 63; + long designMask = ~(signed & 0xFF); + // Align the number to a specific position and transform the ascii code + // to actual digit value in each byte + long digits = ((value & designMask) << shift) & 0x0F000F0F00L; + + // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) + // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = + // 0x000000UU00TTHH00 + + // 0x00UU00TTHH000000 * 10 + + // 0xUU00TTHH00000000 * 100 + // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400 + // This results in our value lies in the bit 32 to 41 of this product + // That was close :) + long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; + return (int) ((absValue ^ signed) - signed); + } + + private static int nextKeyOffset(int decimalSepMark) { + return (decimalSepMark >>> 3) + 3; + } + + private void add1(long keyStartAddr, long tailAndLen, int hash, int measurement) { + int idx = hash & INDEX_MASK; + for (long entryAddr; (entryAddr = UNSAFE.getLong(indexAddr + idx)) != 0; idx = (idx + 8) & INDEX_MASK) { + if (update1(entryAddr, tailAndLen, measurement)) { + return; + } + } + UNSAFE.putLong(indexAddr + idx, create(keyStartAddr, tailAndLen, hash, measurement, '1')); + } + + private void add2(long keyStartAddr, long keyLong, long tailAndLen, int hash, int measurement) { + int idx = hash & INDEX_MASK; + for (long entryAddr; (entryAddr = UNSAFE.getLong(indexAddr + idx)) != 0; idx = (idx + 8) & INDEX_MASK) { + if (update2(entryAddr, keyLong, tailAndLen, measurement)) { + return; + } + } + UNSAFE.putLong(indexAddr + idx, create(keyStartAddr, tailAndLen, hash, measurement, '2')); + } + + private void addN(long keyStartAddr, long tailAndLen, int hash, int measurement) { + int idx = hash & INDEX_MASK; + for (long entryAddr; (entryAddr = UNSAFE.getLong(indexAddr + idx)) != 0; idx = (idx + 8) & INDEX_MASK) { + if (updateN(entryAddr, keyStartAddr, tailAndLen, measurement)) { + return; + } + } + UNSAFE.putLong(indexAddr + idx, create(keyStartAddr, tailAndLen, hash, measurement, 'N')); + } + + private long create(long keyStartAddr, long tailAndLen, int hash, int measurement, char _origin) { + // assert (memUsed + MAX_STATION_SIZE < memAddr + MAX_STATION_SIZE * MAX_STATIONS) : "Too many stations"; + + final long entryAddr = memUsed; + + int keySize = (int) (tailAndLen & 0xF8); + long fieldsAddr = entryAddr + HEADER_SIZE + keySize; + memUsed += HEADER_SIZE + keySize + FIELDS_SIZE; + count++; + + UNSAFE.putLong(entryAddr, tailAndLen); + UNSAFE.copyMemory(keyStartAddr, entryAddr + HEADER_SIZE, keySize); + UNSAFE.putLong(fieldsAddr + FLD_COUNT, 1); + UNSAFE.putLong(fieldsAddr + FLD_SUM, measurement); + UNSAFE.putInt(fieldsAddr + FLD_MIN, measurement); + UNSAFE.putInt(fieldsAddr + FLD_MAX, measurement); + UNSAFE.putInt(fieldsAddr + FLD_HASH, hash); + + return entryAddr; + } + + private static boolean update1(long entryAddr, long tailAndLen, int measurement) { + if (UNSAFE.getLong(entryAddr) != tailAndLen) { + return false; + } + + updateStats(entryAddr + HEADER_SIZE, measurement); + return true; + } + + private static boolean update2(long entryAddr, long keyLong, long tailAndLen, int measurement) { + if (UNSAFE.getLong(entryAddr) != tailAndLen) { + return false; + } + if (UNSAFE.getLong(entryAddr + 8) != keyLong) { + return false; + } + + updateStats(entryAddr + HEADER_SIZE + 8, measurement); + return true; + } + + private static boolean updateN(long entryAddr, long keyStartAddr, long tailAndLen, int measurement) { + if (UNSAFE.getLong(entryAddr) != tailAndLen) { + return false; + } + long memPos = entryAddr + HEADER_SIZE; + long memEnd = memPos + ((int) (tailAndLen & 0xF8)); + long bufPos = keyStartAddr; + while (memPos != memEnd) { + if (UNSAFE.getLong(memPos) != UNSAFE.getLong(bufPos)) { + return false; + } + memPos += 8; + bufPos += 8; + } + + updateStats(memPos, measurement); + return true; + } + + private static void updateStats(long addr, int measurement) { + long oldCount = UNSAFE.getLong(addr + FLD_COUNT); + long oldSum = UNSAFE.getLong(addr + FLD_SUM); + long oldMin = UNSAFE.getInt(addr + FLD_MIN); + long oldMax = UNSAFE.getInt(addr + FLD_MAX); + + UNSAFE.putLong(addr + FLD_COUNT, oldCount + 1); + UNSAFE.putLong(addr + FLD_SUM, oldSum + measurement); + if (measurement < oldMin) { + UNSAFE.putInt(addr + FLD_MIN, measurement); + } + if (measurement > oldMax) { + UNSAFE.putInt(addr + FLD_MAX, measurement); + } + } + + private static void updateStats(long addr, long count, long sum, int min, int max) { + long oldCount = UNSAFE.getLong(addr + FLD_COUNT); + long oldSum = UNSAFE.getLong(addr + FLD_SUM); + long oldMin = UNSAFE.getInt(addr + FLD_MIN); + long oldMax = UNSAFE.getInt(addr + FLD_MAX); + + UNSAFE.putLong(addr + FLD_COUNT, oldCount + count); + UNSAFE.putLong(addr + FLD_SUM, oldSum + sum); + if (min < oldMin) { + UNSAFE.putInt(addr + FLD_MIN, min); + } + if (max > oldMax) { + UNSAFE.putInt(addr + FLD_MAX, max); + } + } + + public Aggregator merge(Aggregator other) { + var otherMemPos = other.memAddr; + var otherMemEnd = other.memUsed; + merge: for (long entrySize; otherMemPos < otherMemEnd; otherMemPos += entrySize) { + int keySize = (int) (UNSAFE.getLong(otherMemPos) & 0xF8); + long otherKeyEnd = otherMemPos + HEADER_SIZE + keySize; + entrySize = HEADER_SIZE + keySize + FIELDS_SIZE; + int hash = UNSAFE.getInt(otherKeyEnd + FLD_HASH); + int idx = hash & INDEX_MASK; + search: for (long entryAddr; (entryAddr = UNSAFE.getLong(indexAddr + idx)) != 0; idx = (idx + 8) & INDEX_MASK) { + var thisPos = entryAddr; + var otherPos = otherMemPos; + while (otherPos < otherKeyEnd) { + if (UNSAFE.getLong(thisPos) != UNSAFE.getLong(otherPos)) { + continue search; + } + thisPos += 8; + otherPos += 8; + } + updateStats( + thisPos, + UNSAFE.getLong(otherPos + FLD_COUNT), + UNSAFE.getLong(otherPos + FLD_SUM), + UNSAFE.getInt(otherPos + FLD_MIN), + UNSAFE.getInt(otherPos + FLD_MAX)); + continue merge; + } + + // create + // assert (memUsed + MAX_STATION_SIZE < memAddr + MAX_STATION_SIZE * MAX_STATIONS) : "Too many stations (merge)"; + long entryAddr = memUsed; + memUsed += entrySize; + count++; + UNSAFE.copyMemory(otherMemPos, entryAddr, entrySize); + UNSAFE.putLong(indexAddr + idx, entryAddr); + } + return this; + } + + @Override + public String toString() { + if (count == 0) { + return "{}"; + } + var entries = new Entry[count]; + int i = 0; + for (long pos = memAddr; pos < memUsed; pos += (int) (UNSAFE.getLong(pos) & 0xF8) + HEADER_SIZE + FIELDS_SIZE) { + entries[i++] = new Entry(pos); + } + Arrays.sort(entries); + var sb = new StringBuilder(count * 50); + sb.append('{'); + entries[0].appendTo(sb); + for (int j = 1; j < entries.length; ++j) { + sb.append(", "); + entries[j].appendTo(sb); + } + sb.append('}'); + return sb.toString(); + } + + static class Chunk { + final MemorySegment file; + final long chunkStartAddr; + final long chunkEndAddr; + final long fileStartAddr; + final long fileEndAddr; + + Chunk(MemorySegment file, long chunkStartAddr, long chunkEndAddr, long fileStartAddr, long fileEndAddr) { + this.file = file; + this.chunkStartAddr = chunkStartAddr; + this.chunkEndAddr = chunkEndAddr; + this.fileStartAddr = fileStartAddr; + this.fileEndAddr = fileEndAddr; + } + } + + static class Entry implements Comparable { + private final long entryAddr; + private final int keySize; + private final String key; + + Entry(long entryAddr) { + this.entryAddr = entryAddr; + this.keySize = (int) UNSAFE.getLong(entryAddr) & 0xF8; + try (var arena = Arena.ofConfined()) { + var ms = arena.allocate(keySize + 8); + UNSAFE.copyMemory(entryAddr + HEADER_SIZE, ms.address(), keySize); + UNSAFE.copyMemory(entryAddr + 1, ms.address() + keySize, 7); + this.key = ms.getUtf8String(0); + } + } + + @Override + public int compareTo(Entry other) { + return key.compareTo(other.key); + } + + @Override + public String toString() { + long pos = entryAddr + HEADER_SIZE + keySize; + return round(UNSAFE.getInt(pos + FLD_MIN)) + + "/" + round(((double) UNSAFE.getLong(pos + FLD_SUM)) / UNSAFE.getLong(pos + FLD_COUNT)) + + "/" + round(UNSAFE.getInt(pos + FLD_MAX)); + } + + void appendTo(StringBuilder sb) { + long pos = entryAddr + HEADER_SIZE + keySize; + sb.append(key); + sb.append('='); + sb.append(round(UNSAFE.getInt(pos + FLD_MIN))); + sb.append('/'); + sb.append(round(((double) UNSAFE.getLong(pos + FLD_SUM)) / UNSAFE.getLong(pos + FLD_COUNT))); + sb.append('/'); + sb.append(round(UNSAFE.getInt(pos + FLD_MAX))); + } + + private static double round(double value) { + return Math.round(value) / 10.0; + } + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_hundredwatt.java b/src/main/java/dev/morling/onebrc/CalculateAverage_hundredwatt.java index 9d935ffce..24a173ae0 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_hundredwatt.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_hundredwatt.java @@ -31,7 +31,7 @@ public class CalculateAverage_hundredwatt { private static final String FILE = "./measurements.txt"; private static final int MAX_ROW_SIZE = 100 + 1 + 5 + 1; // 100 for city name, 1 for ;, 5 for temperature, 1 for \n - private static final int THREAD_COUNT = Math.min(8, Runtime.getRuntime().availableProcessors()); + private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors(); private static final long BUFFER_SIZE = 128 * 1024 * 1024; // 128MB private static final long CHUNK_SIZE = BUFFER_SIZE / THREAD_COUNT; private static final long FILE_CHUNK_SIZE = CHUNK_SIZE - MAX_ROW_SIZE; @@ -209,10 +209,10 @@ private static int processChunk(ByteBuffer bb, HashTable hashTable, long start, short temperature_value; int hashInt; - int i = 0; + int rc = 0; int end = (int) (size - MAX_ROW_SIZE); - while (position < end) { - i++; + while (position <= end) { + // rc++; offset = -1; // Parse city name @@ -257,11 +257,11 @@ private static int processChunk(ByteBuffer bb, HashTable hashTable, long start, position = position + newlinePos / 8 + 2; // +1 for \n - hashInt = (int) (hash ^ (hash >> 32)); + hashInt = (int) (hash ^ (hash >> 32) ^ (hash >> 17)); hashTable.putOrMerge(hashInt, offset + 1, key, temperature_value); } - return i; + return rc; } public static void main(String[] args) throws IOException { @@ -282,7 +282,7 @@ public static void main(String[] args) throws IOException { byte[] trailing = new byte[MAX_ROW_SIZE * 2]; fileChannel.read(ByteBuffer.wrap(trailing), Math.max(0, fileSize - MAX_ROW_SIZE)); var rc = processChunk(ByteBuffer.wrap(trailing), hashTable, Math.max(0, fileSize - MAX_ROW_SIZE), - MAX_ROW_SIZE + Math.min(fileSize, MAX_ROW_SIZE)); + MAX_ROW_SIZE + Math.min(fileSize, MAX_ROW_SIZE) - 1); // rowCount.addAndGet(rc); return hashTable; @@ -292,11 +292,16 @@ public static void main(String[] args) throws IOException { } } + // if file is smaller than max row size, we're done b/c the trailing bytes handler processed the whole file + if (fileSize <= MAX_ROW_SIZE) { + return hashTable; + } + while (start < fileSize) { long end = Math.min(start + CHUNK_SIZE, fileSize); MappedByteBuffer bb = null; try { - bb = fileChannel.map(FileChannel.MapMode.READ_ONLY, start, end - start); + bb = fileChannel.map(FileChannel.MapMode.READ_ONLY, start, Math.min(end - start + 8, fileSize - start)); } catch (IOException e) { throw new RuntimeException(e); diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_ianopolous.java b/src/main/java/dev/morling/onebrc/CalculateAverage_ianopolous.java index 834de7460..4d82d8809 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_ianopolous.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_ianopolous.java @@ -18,85 +18,88 @@ import java.io.*; import java.nio.*; import java.nio.channels.*; +import java.util.concurrent.*; import java.util.stream.*; import java.util.*; -/* A simple implementation that memory maps the file, reads chunks in parallel and minimises allocation without any unsafe. +/* A simple implementation aiming for readability. + * Features: + * * memory mapped file + * * read chunks in parallel + * * minimise allocation + * * no unsafe * * Timings on 4 core i7-7500U CPU @ 2.70GHz: * average_baseline: 4m48s - * ianopolous: 48s + * ianopolous: 36s */ public class CalculateAverage_ianopolous { public static final int MAX_LINE_LENGTH = 107; - public static final int MAX_STATIONS = 10000; + public static final int MAX_STATIONS = 10_000; - public static void main(String[] args) { + public static void main(String[] args) throws Exception { File input = new File("./measurements.txt"); long filesize = input.length(); - long chunkSize = 256 * 1024 * 1024; + // keep chunk size between 256 MB and 1G (1 chunk for files < 256MB) + long chunkSize = Math.min(Math.max(filesize / 32, 256 * 1024 * 1024), 1024 * 1024 * 1024L); int nChunks = (int) ((filesize + chunkSize - 1) / chunkSize); - List> allResults = IntStream.range(0, nChunks).mapToObj(i -> { - HashMap results = new HashMap(512); - parseStats(i * chunkSize, Math.min((i + 1) * chunkSize, filesize), results); - return results; - }).parallel().toList(); - HashMap result = allResults.getFirst(); - for (int i = 1; i < allResults.size(); ++i) { - for (Map.Entry entry : allResults.get(i).entrySet()) { - Stat current = result.putIfAbsent(entry.getKey(), entry.getValue()); - if (current != null) { - current.merge(entry.getValue()); - } - } - } - - System.out.println(new TreeMap<>(result)); - } + ExecutorService pool = Executors.newVirtualThreadPerTaskExecutor(); + List>>> allResults = IntStream.range(0, nChunks) + .mapToObj(i -> pool.submit(() -> parseStats(i * chunkSize, Math.min((i + 1) * chunkSize, filesize)))) + .toList(); - public record Station(String name, ByteBuffer buf) { + TreeMap merged = allResults.stream() + .parallel() + .flatMap(f -> { + try { + return f.get().stream().filter(Objects::nonNull).flatMap(Collection::stream); + } + catch (Exception e) { + return Stream.empty(); + } + }) + .collect(Collectors.toMap(s -> s.name(), s -> s, (a, b) -> a.merge(b), TreeMap::new)); + System.out.println(merged); } - public static boolean matchingStationBytes(int start, int end, MappedByteBuffer buffer, Station existing) { - buffer.position(start); + public static boolean matchingStationBytes(int start, int end, MappedByteBuffer buffer, Stat existing) { for (int i = start; i < end; i++) { - if (existing.buf.get(i - start) != buffer.get(i)) + if (existing.name[i - start] != buffer.get(i)) return false; } return true; } - public static Station parseStation(int start, int end, int hash, MappedByteBuffer buffer, List> stations) { + public static Stat parseStation(int start, int end, int hash, MappedByteBuffer buffer, List> stations) { int index = Math.floorMod(hash, MAX_STATIONS); - List matches = stations.get(index); + List matches = stations.get(index); if (matches == null) { - List value = new ArrayList<>(); + List value = new ArrayList<>(); byte[] stationBuffer = new byte[end - start]; buffer.position(start); buffer.get(stationBuffer); - String name = new String(stationBuffer); - Station res = new Station(name, ByteBuffer.wrap(stationBuffer)); + Stat res = new Stat(stationBuffer); value.add(res); stations.set(index, value); return res; } else { for (int i = 0; i < matches.size(); i++) { - Station s = matches.get(i); + Stat s = matches.get(i); if (matchingStationBytes(start, end, buffer, s)) return s; } byte[] stationBuffer = new byte[end - start]; buffer.position(start); buffer.get(stationBuffer); - Station res = new Station(new String(stationBuffer), ByteBuffer.wrap(stationBuffer)); + Stat res = new Stat(stationBuffer); matches.add(res); return res; } } - public static void parseStats(long startByte, long endByte, Map results) { + public static List> parseStats(long startByte, long endByte) { try { RandomAccessFile file = new RandomAccessFile("./measurements.txt", "r"); long maxEnd = Math.min(file.length(), endByte + MAX_LINE_LENGTH); @@ -117,30 +120,22 @@ public static void parseStats(long startByte, long endByte, Map re } } - List> stations = new ArrayList<>(MAX_STATIONS); + List> stations = new ArrayList<>(MAX_STATIONS); for (int i = 0; i < MAX_STATIONS; i++) stations.add(null); int lineStart = done; int lineSplit = 0; - long temperature = 0; + short temperature = 0; int hash = 1; boolean negative = false; while (done < maxDone) { - Station station = null; + Stat station = null; for (int i = done; i < done + MAX_LINE_LENGTH && i < maxEnd; i++) { byte b = buffer.get(i); if (b == '\n') { done = i + 1; - Stat res = results.get(station.name); - temperature = negative ? -temperature : temperature; - if (res != null) { - res.add(temperature); - } - else { - res = new Stat(); - res.add(temperature); - results.put(station.name, res); - } + temperature = negative ? (short) -temperature : temperature; + station.add(temperature); lineStart = done; station = null; hash = 1; @@ -152,17 +147,18 @@ public static void parseStats(long startByte, long endByte, Map re temperature = 0; negative = false; } - else if (b == '-' && station != null) { - negative = true; + else if (station == null) { + hash = 31 * hash + b; } - else if (b != '.' && station != null) { - temperature = temperature * 10 + (b - 0x30); + else if (b == '-') { + negative = true; } - else { - hash = 31 * hash + b; + else if (b != '.') { + temperature = (short) (temperature * 10 + (b - 0x30)); } } } + return stations; } catch (IOException e) { throw new RuntimeException(e); @@ -170,9 +166,16 @@ else if (b != '.' && station != null) { } public static class Stat { - long min = Long.MAX_VALUE, max = Long.MIN_VALUE, total = 0, count = 0; + final byte[] name; + int count = 0; + short min = Short.MAX_VALUE, max = Short.MIN_VALUE; + long total = 0; - public void add(long value) { + public Stat(byte[] name) { + this.name = name; + } + + public void add(short value) { if (value < min) min = value; if (value > max) @@ -181,19 +184,24 @@ public void add(long value) { count++; } - public void merge(Stat value) { + public Stat merge(Stat value) { if (value.min < min) min = value.min; if (value.max > max) max = value.max; total += value.total; count += value.count; + return this; } private static double round(double value) { return Math.round(value) / 10.0; } + public String name() { + return new String(name); + } + public String toString() { return round((double) min) + "/" + round(((double) total) / count) + "/" + round((double) max); } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_ianopolousfast.java b/src/main/java/dev/morling/onebrc/CalculateAverage_ianopolousfast.java new file mode 100644 index 000000000..92e2f6ecb --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_ianopolousfast.java @@ -0,0 +1,271 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.ByteOrder; +import java.nio.channels.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.stream.*; +import java.util.*; + +import static java.lang.foreign.ValueLayout.*; + +/* A fast implementation with no unsafe. + * Features: + * * memory mapped file using preview Arena FFI + * * semicolon finding and name comparison using incubator vector api + * * read chunks in parallel + * * minimise allocation + * * no unsafe + * * process multiple lines in each thread for better ILP +*/ +public class CalculateAverage_ianopolousfast { + + public static final int MAX_LINE_LENGTH = 107; + public static final int MAX_STATIONS = 1 << 14; + private static final OfLong LONG_LAYOUT = JAVA_LONG_UNALIGNED.withOrder(ByteOrder.BIG_ENDIAN); + private static final VectorSpecies BYTE_SPECIES = ByteVector.SPECIES_PREFERRED.length() >= 16 + ? ByteVector.SPECIES_128 + : ByteVector.SPECIES_64; + + public static void main(String[] args) throws Exception { + Arena arena = Arena.global(); + Path input = Path.of("measurements.txt"); + FileChannel channel = (FileChannel) Files.newByteChannel(input, StandardOpenOption.READ); + long filesize = Files.size(input); + MemorySegment mmap = channel.map(FileChannel.MapMode.READ_ONLY, 0, filesize, arena); + int nChunks = filesize < 4 * 1024 * 1024 ? 1 : Runtime.getRuntime().availableProcessors(); + long chunkSize = (filesize + nChunks - 1) / nChunks; + List allResults = IntStream.range(0, nChunks) + .parallel() + .mapToObj(i -> parseStats(i * chunkSize, Math.min((i + 1) * chunkSize, filesize), mmap)) + .toList(); + + TreeMap merged = allResults.stream() + .parallel() + .flatMap(f -> { + try { + return Arrays.stream(f).filter(Objects::nonNull); + } + catch (Exception e) { + e.printStackTrace(); + return Stream.empty(); + } + }) + .collect(Collectors.toMap(s -> s.name(), s -> s, (a, b) -> a.merge(b), TreeMap::new)); + System.out.println(merged); + } + + public static boolean matchingStationBytes(long start, long end, MemorySegment buffer, Stat existing) { + for (int index = 0; index < end - start; index += BYTE_SPECIES.vectorByteSize()) { + ByteVector line = ByteVector.fromMemorySegment(BYTE_SPECIES, buffer, start + index, ByteOrder.nativeOrder(), BYTE_SPECIES.indexInRange(start + index, end)); + ByteVector found = ByteVector.fromArray(BYTE_SPECIES, existing.name, index); + if (!found.eq(line).allTrue()) + return false; + } + return true; + } + + private static final int GOLDEN_RATIO = 0x9E3779B9; + private static final int HASH_LROTATE = 5; + + // hash from giovannicuccu + private static int hash(MemorySegment memorySegment, long start, int len) { + int x; + int y; + if (len >= Integer.BYTES) { + x = memorySegment.get(JAVA_INT_UNALIGNED, start); + y = memorySegment.get(JAVA_INT_UNALIGNED, start + len - Integer.BYTES); + } + else { + x = memorySegment.get(JAVA_BYTE, start); + y = memorySegment.get(JAVA_BYTE, start + len - Byte.BYTES); + } + return (Integer.rotateLeft(x * GOLDEN_RATIO, HASH_LROTATE) ^ y) * GOLDEN_RATIO; + } + + public static Stat createStation(long start, long end, MemorySegment buffer) { + byte[] stationBuffer = new byte[(int) (end - start)]; + for (long off = start; off < end; off++) + stationBuffer[(int) (off - start)] = buffer.get(JAVA_BYTE, off); + return new Stat(stationBuffer); + } + + public static Stat dedupeStation(long start, long end, MemorySegment buffer, Stat[] stations) { + int hash = hash(buffer, start, (int) (end - start)); + int index = hash & (MAX_STATIONS - 1); + Stat match = stations[index]; + while (match != null) { + if (matchingStationBytes(start, end, buffer, match)) + return match; + index = (index + 1) % stations.length; + match = stations[index]; + } + Stat res = createStation(start, end, buffer); + stations[index] = res; + return res; + } + + public static short getMinus(long d) { + return ((d & 0xff00000000000000L) ^ 0x2d00000000000000L) != 0 ? 0 : (short) -1; + } + + public static void processTemperature(long lineSplit, int size, MemorySegment buffer, Stat station) { + long d = buffer.get(LONG_LAYOUT, lineSplit); + // negative is either 0 or -1 + short negative = getMinus(d); + d = d << (negative * -8); + int dotIndex = size - 2 + negative; + d = (d >> 8) | 0x30000000_00000000L; // add a leading 0 digit + d = d >> 8 * (5 - dotIndex); + short temperature = (short) ((byte) d - '0' + + 10 * (((byte) (d >> 16)) - '0') + + 100 * (((byte) (d >> 24)) - '0')); + temperature = (short) ((temperature ^ negative) - negative); // negative treatment inspired by merkitty + station.add(temperature); + } + + private static int lineSize(long lineStart, MemorySegment buffer) { + ByteVector line = ByteVector.fromMemorySegment(BYTE_SPECIES, buffer, lineStart, ByteOrder.nativeOrder()); + int lineSize = line.compare(VectorOperators.EQ, '\n').firstTrue(); + int index = lineSize; + while (index == BYTE_SPECIES.vectorByteSize()) { + index = ByteVector.fromMemorySegment(BYTE_SPECIES, buffer, lineStart + lineSize, + ByteOrder.nativeOrder()).compare(VectorOperators.EQ, '\n').firstTrue(); + lineSize += index; + } + return lineSize; + } + + private static int keySize(int lineSize, long lineStart, MemorySegment buffer) { + return lineSize - 6 + ByteVector.fromMemorySegment(BYTE_SPECIES, buffer, lineStart + lineSize - 6, + ByteOrder.nativeOrder()).compare(VectorOperators.EQ, ';').firstTrue(); + } + + public static Stat[] parseStats(long start1, long end2, MemorySegment buffer) { + // read first partial line + if (start1 > 0) { + for (int i = 0; i < MAX_LINE_LENGTH; i++) { + byte b = buffer.get(JAVA_BYTE, start1++); + if (b == '\n') { + break; + } + } + } + + Stat[] stations = new Stat[MAX_STATIONS]; + + // Handle reading the very last few lines in the file + // this allows us to not worry about reading beyond the end + // in the inner loop (reducing branches) + // We need at least the vector lane size bytes back + if (end2 == buffer.byteSize()) { + // reverse at least vector lane width + end2 = Math.max(buffer.byteSize() - 2 * BYTE_SPECIES.vectorByteSize(), 0); + while (end2 > 0 && buffer.get(JAVA_BYTE, end2) != '\n') + end2--; + + if (end2 > 0) + end2++; + // copy into a larger buffer to avoid reading off end + MemorySegment end = Arena.global().allocate(MAX_LINE_LENGTH + 2 * BYTE_SPECIES.vectorByteSize()); + for (long i = end2; i < buffer.byteSize(); i++) + end.set(JAVA_BYTE, i - end2, buffer.get(JAVA_BYTE, i)); + int index = 0; + while (end2 + index < buffer.byteSize()) { + int lineSize1 = lineSize(index, end); + int semiSearchStart = index + Math.max(0, lineSize1 - 6); + int keySize1 = semiSearchStart - index + ByteVector.fromMemorySegment(BYTE_SPECIES, end, semiSearchStart, + ByteOrder.nativeOrder()).compare(VectorOperators.EQ, ';').firstTrue(); + Stat station1 = dedupeStation(index, index + keySize1, end, stations); + processTemperature(index + keySize1 + 1, lineSize1 - keySize1 - 1, end, station1); + index += lineSize1 + 1; + } + } + + while (start1 < end2) { + int lineSize1 = lineSize(start1, buffer); + long start2 = start1 + lineSize1 + 1; + int lineSize2 = start2 < end2 ? lineSize(start2, buffer) : 0; + int keySize1 = keySize(lineSize1, start1, buffer); + int keySize2 = keySize(lineSize2, start2, buffer); + Stat station1 = dedupeStation(start1, start1 + keySize1, buffer, stations); + processTemperature(start1 + keySize1 + 1, lineSize1 - keySize1 - 1, buffer, station1); + if (start2 < end2) { + Stat station2 = dedupeStation(start2, start2 + keySize2, buffer, stations); + processTemperature(start2 + keySize2 + 1, lineSize2 - keySize2 - 1, buffer, station2); + start1 = start2 + lineSize2 + 1; + } + else + start1 += lineSize1 + 1; + } + return stations; + } + + public static class Stat { + final byte[] name; + final int namelen; + int count = 0; + short min = Short.MAX_VALUE, max = Short.MIN_VALUE; + long total = 0; + + public Stat(byte[] name) { + int vecSize = BYTE_SPECIES.vectorByteSize(); + int arrayLen = (name.length + vecSize - 1) / vecSize * vecSize; + this.name = Arrays.copyOfRange(name, 0, arrayLen); + this.namelen = name.length; + } + + public void add(short value) { + if (value < min) + min = value; + if (value > max) + max = value; + total += value; + count++; + } + + public Stat merge(Stat value) { + if (value.min < min) + min = value.min; + if (value.max > max) + max = value.max; + total += value.total; + count += value.count; + return this; + } + + private static double round(double value) { + return Math.round(value) / 10.0; + } + + public String name() { + return new String(Arrays.copyOfRange(name, 0, namelen)); + } + + public String toString() { + return round((double) min) + "/" + round(((double) total) / count) + "/" + round((double) max); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_iziamos.java b/src/main/java/dev/morling/onebrc/CalculateAverage_iziamos.java index c0358b9c4..f4ca68f36 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_iziamos.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_iziamos.java @@ -15,12 +15,10 @@ */ package dev.morling.onebrc; -import sun.misc.Unsafe; - import java.io.IOException; +import java.io.UncheckedIOException; import java.lang.foreign.Arena; -import java.lang.foreign.MemorySegment; -import java.lang.reflect.Field; +import java.nio.ByteOrder; import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; @@ -28,50 +26,47 @@ import java.util.TreeMap; import java.util.concurrent.CompletableFuture; -import static dev.morling.onebrc.CalculateAverage_iziamos.ByteBackedResultSet.mask; import static java.nio.channels.FileChannel.MapMode.READ_ONLY; import static java.nio.charset.StandardCharsets.UTF_8; import static java.nio.file.StandardOpenOption.READ; public class CalculateAverage_iziamos { - private static final Unsafe UNSAFE; - - private static final String FILE = "./measurements.txt"; - private static final Arena GLOBAL_ARENA = Arena.global(); - private final static MemorySegment WHOLE_FILE_SEGMENT; - private final static long FILE_SIZE; - private final static long BASE_POINTER; - private final static long END_POINTER; + private static final sun.misc.Unsafe UNSAFE = initUnsafe(); - static { + private static sun.misc.Unsafe initUnsafe() { try { - final Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + java.lang.reflect.Field theUnsafe = sun.misc.Unsafe.class.getDeclaredField("theUnsafe"); theUnsafe.setAccessible(true); - UNSAFE = (Unsafe) theUnsafe.get(Unsafe.class); - - final var fileChannel = (FileChannel) Files.newByteChannel(Path.of(FILE), READ); - WHOLE_FILE_SEGMENT = fileChannel.map(READ_ONLY, 0, fileChannel.size(), GLOBAL_ARENA); - + return (sun.misc.Unsafe) theUnsafe.get(sun.misc.Unsafe.class); } - catch (final NoSuchFieldException | IllegalAccessException | IOException e) { + catch (NoSuchFieldException | IllegalAccessException e) { throw new RuntimeException(e); } - - FILE_SIZE = WHOLE_FILE_SEGMENT.byteSize(); - BASE_POINTER = WHOLE_FILE_SEGMENT.address(); - END_POINTER = BASE_POINTER + FILE_SIZE; } - private static final long CHUNK_SIZE = 64 * 1024 * 1024; - // private static final long CHUNK_SIZE = Long.MAX_VALUE; + + private static final String FILE = "./measurements.txt"; + private static final Arena GLOBAL_ARENA = Arena.global(); public static void main(String[] args) throws Exception { - // Thread.sleep(10_000); + // final long chunkSize = Long.MAX_VALUE; + final long chunkSize = 64 * 1024 * 1024; + + final FileChannel fileChannel; + try { + fileChannel = (FileChannel) Files.newByteChannel(Path.of(FILE), READ); + } + catch (final IOException e) { + throw new UncheckedIOException(e); + } + + final var seg = fileChannel.map(READ_ONLY, 0, fileChannel.size(), GLOBAL_ARENA); - final long threadCount = 1 + FILE_SIZE / CHUNK_SIZE; + final long fileSize = seg.byteSize(); + final long threadCount = 1 + fileSize / chunkSize; final var processingFutures = new CompletableFuture[(int) threadCount]; for (int i = 0; i < threadCount; ++i) { - processingFutures[i] = processSegment(i, CHUNK_SIZE); + processingFutures[i] = processSegment(seg.address(), seg.address() + fileSize, i, chunkSize); } final long aggregate = (long) processingFutures[0].get(); @@ -101,15 +96,18 @@ private double round(double value) { } } - private static CompletableFuture processSegment(final long chunkNumber, final long chunkSize) { + private static CompletableFuture processSegment(final long basePointer, + final long endPointer, + final long chunkNumber, + final long chunkSize) { final var ret = new CompletableFuture(); Thread.ofVirtual().start(() -> { final long relativeStart = chunkNumber * chunkSize; - final long absoluteStart = BASE_POINTER + relativeStart; + final long absoluteStart = basePointer + relativeStart; - final long absoluteEnd = computeAbsoluteEndWithSlack(absoluteStart + chunkSize); - final long startOffsetAfterSkipping = skipIncomplete(WHOLE_FILE_SEGMENT.address(), absoluteStart); + final long absoluteEnd = computeAbsoluteEndWithSlack(absoluteStart + chunkSize, endPointer); + final long startOffsetAfterSkipping = skipIncomplete(basePointer, absoluteStart); final long result = processEvents(startOffsetAfterSkipping, absoluteEnd); ret.complete(result); @@ -118,8 +116,8 @@ private static CompletableFuture processSegment(final long chunkNumber, fi return ret; } - private static long computeAbsoluteEndWithSlack(final long chunk) { - return Long.compareUnsigned(END_POINTER, chunk) > 0 ? chunk : END_POINTER; + private static long computeAbsoluteEndWithSlack(final long chunk, final long endPointer) { + return Long.compareUnsigned(endPointer, chunk) > 0 ? chunk : endPointer; } private static long skipIncomplete(final long basePointer, final long start) { @@ -141,7 +139,7 @@ private static long processEvents(final long start, final long limit) { } private static void scalarLoop(final long start, final long limit, final long result) { - final var cursor = new ScalarLoopCursor(start, limit); + final LoopCursor cursor = new LoopCursor(start, limit); while (cursor.hasMore()) { final long address = cursor.getCurrentAddress(); final int length = cursor.getStringLength(); @@ -151,13 +149,13 @@ private static void scalarLoop(final long start, final long limit, final long re } } - public static class ScalarLoopCursor { + public static class LoopCursor { private long pointer; private final long limit; private int hash = 0; - public ScalarLoopCursor(final long pointer, final long limit) { + public LoopCursor(final long pointer, final long limit) { this.pointer = pointer; this.limit = limit; } @@ -172,7 +170,7 @@ public int getStringLength() { byte b = UNSAFE.getByte(pointer); for (; b != ';'; ++strLen, b = UNSAFE.getByte(pointer + strLen)) { - hash += b << strLen; + hash = 31 * hash + b; } pointer += strLen + 1; @@ -180,41 +178,35 @@ public int getStringLength() { } public int getHash() { - return mask(hash); + return hash; } public int getCurrentValue() { - final byte first = UNSAFE.getByte(pointer++); - final byte second = UNSAFE.getByte(pointer++); - final byte third = UNSAFE.getByte(pointer++); - final byte fourth = UNSAFE.getByte(pointer++); - final byte fifth = UNSAFE.getByte(pointer++); - - int value; - if (second == '.') { - // D.D\n - value = appendDigit(digitCharToInt(first), third); - pointer--; - return value; - } - else if (fourth == '.') { - // -DD.D\n - value = digitCharToInt(second); - value = appendDigit(value, third); - value = -appendDigit(value, fifth); - pointer++; - return value; - } - else if (first == '-') { - // -D.D\n - return -appendDigit(digitCharToInt(second), fourth); - } - else { - // DD.D\n - value = digitCharToInt(first); - value = appendDigit(value, second); - return appendDigit(value, fourth); + return getCurrentValueMeryKitty(); + } + + /** + * No point rewriting what would essentially be the same code <3. + */ + public int getCurrentValueMeryKitty() { + long word = UNSAFE.getLong(pointer); + if (ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN) { + word = Long.reverseBytes(word); } + + int decimalSepPos = Long.numberOfTrailingZeros(~word & 0x10101000); + int shift = 28 - decimalSepPos; + + long signed = (~word << 59) >> 63; + long designMask = ~(signed & 0xFF); + + long digits = ((word & designMask) << shift) & 0x0F000F0F00L; + + long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; + int increment = (decimalSepPos >>> 3) + 3; + + pointer += increment; + return (int) ((absValue ^ signed) - signed); } public boolean hasMore() { @@ -222,22 +214,12 @@ public boolean hasMore() { } } - private static int appendDigit(int value, final byte b) { - value *= 10; - value += digitCharToInt(b); - return value; - } - - private static int digitCharToInt(final byte b) { - return b - '0'; - } - public interface ResultConsumer { void consume(final String name, final int min, final int max, final long sum, final long count); } static class ByteBackedResultSet { - private static final int MAP_SIZE = 16384; + private static final int MAP_SIZE = 16384 * 4; private static final int MASK = MAP_SIZE - 1; private static final long STRUCT_SIZE = 64; private static final long BYTE_SIZE = MAP_SIZE * STRUCT_SIZE; @@ -338,7 +320,7 @@ private static int findSlot(final long baseAddress, final long otherStringAddress, final int otherStringLength) { - for (int slot = hash;; slot = mask(++slot)) { + for (int slot = mask(hash);; slot = mask(++slot)) { final long structBase = baseAddress + ((long) slot * STRUCT_SIZE); final long nameStart = UNSAFE.getLong(structBase); if (nameStart == 0) { @@ -354,22 +336,25 @@ private static int findSlot(final long baseAddress, } } - private static boolean stringEquals(final long thisNameAddress, final int thisStringLength, final long otherNameAddress, final long otherNameLength) { + private static boolean stringEquals(final long thisNameAddress, + final int thisStringLength, + final long otherNameAddress, + final long otherNameLength) { if (thisStringLength != otherNameLength) { return false; } int i = 0; - for (; i < thisStringLength - 3; i += 4) { - if (UNSAFE.getInt(thisNameAddress + i) != UNSAFE.getInt(otherNameAddress + i)) { + for (; i < thisStringLength - 7; i += 8) { + if (UNSAFE.getLong(thisNameAddress + i) != UNSAFE.getLong(otherNameAddress + i)) { return false; } } - final int remainingToCheck = thisStringLength - i; - final int finalBytesMask = ((1 << remainingToCheck * 8)) - 1; - final int thisLastWord = UNSAFE.getInt(thisNameAddress + i); - final int otherLastWord = UNSAFE.getInt(otherNameAddress + i); + final long remainingToCheck = thisStringLength - i; + final long finalBytesMask = ((1L << remainingToCheck * 8)) - 1; + final long thisLastWord = UNSAFE.getLong(thisNameAddress + i); + final long otherLastWord = UNSAFE.getLong(otherNameAddress + i); return 0 == ((thisLastWord ^ otherLastWord) & finalBytesMask); } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_japplis.java b/src/main/java/dev/morling/onebrc/CalculateAverage_japplis.java index fb386bfa5..36eb0172c 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_japplis.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_japplis.java @@ -41,7 +41,9 @@ * - Replaced compute lambda call with synchronized(city.intern()): 43" (due to intern()) * - Removed BufferedInputStream and replaced Measurement with IntSummaryStatistics (thanks davecom): still 23" but cleaner code * - Execute same code on 1BRC server: 41" - * - One HashMap per thread: 17" locally + * - One HashMap per thread: 17" locally (12" on 1BRC server) + * - Read file in multiple threads if available and + * - Changed String to (byte[]) Text with cache: 18" locally (but 8" -> 5" on laptop) * * @author Anthony Goubard - Japplis */ @@ -53,63 +55,112 @@ public class CalculateAverage_japplis { private int precision = -1; private int precisionLimitTenth; - - private Map cityMeasurementMap = new ConcurrentHashMap<>(); + private long fileSize; + private Map cityMeasurementMap = new ConcurrentHashMap<>(10_000); private List previousBlockLastLine = new ArrayList<>(); - private Semaphore readFileLock = new Semaphore(MAX_COMPUTE_THREADS); + private Queue bufferPool = new ConcurrentLinkedQueue<>(); private void parseTemperatures(File measurementsFile) throws Exception { - try (InputStream measurementsFileIS = new FileInputStream(measurementsFile)) { - int readCount = BUFFER_SIZE; - ExecutorService threadPool = Executors.newFixedThreadPool(MAX_COMPUTE_THREADS); - List parseBlockTasks = new ArrayList<>(); - while (readCount > 0) { - byte[] buffer = new byte[BUFFER_SIZE]; - readCount = measurementsFileIS.read(buffer); - if (readCount > 0) { - readFileLock.acquire(); // Wait if all threads are busy + fileSize = measurementsFile.length(); + int blockIndex = 0; + int totalBlocks = (int) (fileSize / BUFFER_SIZE) + 1; + ExecutorService threadPool = Executors.newFixedThreadPool(MAX_COMPUTE_THREADS); + List parseBlockTasks = new ArrayList<>(); - // Process the block in a thread while the main thread continues to read the file - Future parseBlockTask = threadPool.submit(parseTemperaturesBlock(buffer, readCount)); + while (blockIndex < totalBlocks) { + int availableReadThreads = Math.min(readFileLock.availablePermits(), totalBlocks - blockIndex); + if (availableReadThreads == 0) { + readFileLock.acquire(); // No need to loop in the 'while' if all threads are busy + readFileLock.release(); + } + List> readBlockTasks = new ArrayList<>(); + for (int i = 0; i < availableReadThreads; i++) { + readFileLock.acquire(); // Wait if all threads are busy + Callable blockReader = readBlock(measurementsFile, blockIndex); + Future readBlockTask = threadPool.submit(blockReader); + readBlockTasks.add(readBlockTask); + blockIndex++; + } + for (Future readBlockTask : readBlockTasks) { + ByteArray buffer = readBlockTask.get(); + if (buffer.array().length > 0) { + int startIndex = handleSplitLine(buffer.array()); + readFileLock.acquire(); // Wait if all threads are busy + Runnable blockParser = parseTemperaturesBlock(buffer, startIndex); + Future parseBlockTask = threadPool.submit(blockParser); parseBlockTasks.add(parseBlockTask); } } - for (Future parseBlockTask : parseBlockTasks) // Wait for all tasks to finish - parseBlockTask.get(); - threadPool.shutdownNow(); } + for (Future parseBlockTask : parseBlockTasks) { // Wait for all tasks to finish + parseBlockTask.get(); + } + threadPool.shutdownNow(); } - private Runnable parseTemperaturesBlock(byte[] buffer, int readCount) { - int startIndex = handleSplitLine(buffer, readCount); + private Callable readBlock(File measurementsFile, long blockIndex) { + return () -> { + long fileIndex = blockIndex * BUFFER_SIZE; + if (fileIndex >= fileSize) { + readFileLock.release(); + return new ByteArray(0); + } + try (InputStream measurementsFileIS = new FileInputStream(measurementsFile)) { + if (fileIndex > 0) { + long skipped = measurementsFileIS.skip(fileIndex); + while (skipped != fileIndex) { + skipped += measurementsFileIS.skip(fileIndex - skipped); + } + } + long bufferSize = Math.min(BUFFER_SIZE, fileSize - fileIndex); + ByteArray buffer = bufferSize == BUFFER_SIZE ? bufferPool.poll() : new ByteArray((int) bufferSize); + if (buffer == null) { + buffer = new ByteArray(BUFFER_SIZE); + } + int totalRead = measurementsFileIS.read(buffer.array(), 0, (int) bufferSize); + while (totalRead < bufferSize) { + byte[] extraBuffer = new byte[(int) (bufferSize - totalRead)]; + int readCount = measurementsFileIS.read(extraBuffer); + System.arraycopy(extraBuffer, 0, buffer.array(), totalRead, readCount); + totalRead += readCount; + } + readFileLock.release(); + return buffer; + } + }; + } + + private Runnable parseTemperaturesBlock(ByteArray buffer, int startIndex) { Runnable countAverageRun = () -> { int bufferIndex = startIndex; - Map blockCityMeasurementMap = new HashMap<>(); + Map blockCityMeasurementMap = new HashMap<>(10_000); + Map textPool = new HashMap<>(10_000); + byte[] bufferArray = buffer.array(); try { - while (bufferIndex < readCount) { - bufferIndex = readNextLine(bufferIndex, buffer, blockCityMeasurementMap); + while (bufferIndex < bufferArray.length) { + bufferIndex = readNextLine(bufferIndex, bufferArray, blockCityMeasurementMap, textPool); } } catch (ArrayIndexOutOfBoundsException ex) { // Done reading and parsing the buffer } + if (bufferArray.length == BUFFER_SIZE) + bufferPool.add(buffer); mergeBlockResults(blockCityMeasurementMap); readFileLock.release(); }; return countAverageRun; } - private int handleSplitLine(byte[] buffer, int readCount) { + private int handleSplitLine(byte[] buffer) { int bufferIndex = readFirstLines(buffer); - List lastLine = new ArrayList<>(); // Store the last (partial) line of the block - int tailIndex = readCount; - if (tailIndex == buffer.length) { - byte car = buffer[--tailIndex]; - while (car != '\n') { - lastLine.add(0, car); - car = buffer[--tailIndex]; - } + List lastLine = new ArrayList<>(100); // Store the last (partial) line of the block + int tailIndex = buffer.length; + byte car = buffer[--tailIndex]; + while (car != '\n') { + lastLine.add(0, car); + car = buffer[--tailIndex]; } if (previousBlockLastLine.isEmpty()) { previousBlockLastLine = lastLine; @@ -132,7 +183,7 @@ private int readSplitLine(byte[] buffer) { for (int i = 0; i < splitLineBytes.length; i++) { splitLineBytes[i] = previousBlockLastLine.get(i); } - readNextLine(0, splitLineBytes, cityMeasurementMap); + readNextLine(0, splitLineBytes, cityMeasurementMap, new HashMap<>()); return bufferIndex; } @@ -148,8 +199,9 @@ private int readFirstLines(byte[] buffer) { int dotPos = bufferIndex; byte car = buffer[bufferIndex++]; while (car != '\n') { - if (car == '.') + if (car == '.') { dotPos = bufferIndex; + } car = buffer[bufferIndex++]; } precision = bufferIndex - dotPos - 1; @@ -158,40 +210,47 @@ private int readFirstLines(byte[] buffer) { return startIndex; } - private int readNextLine(int bufferIndex, byte[] buffer, Map blockCityMeasurementMap) { + private int readNextLine(int bufferIndex, byte[] buffer, Map blockCityMeasurementMap, Map textPool) { int startLineIndex = bufferIndex; - while (buffer[bufferIndex] != ';') + while (buffer[bufferIndex] != (byte) ';') { bufferIndex++; - String city = new String(buffer, startLineIndex, bufferIndex - startLineIndex, StandardCharsets.UTF_8); + } + // String city = new String(buffer, startLineIndex, bufferIndex - startLineIndex, StandardCharsets.UTF_8); + Text city = Text.getByteText(buffer, startLineIndex, bufferIndex - startLineIndex, textPool); bufferIndex++; // skip ';' int temperature = readTemperature(buffer, bufferIndex); bufferIndex += precision + 3; // digit, dot and CR - if (temperature < 0) + if (temperature < 0) { bufferIndex++; - if (temperature <= -precisionLimitTenth || temperature >= precisionLimitTenth) + } + if (temperature <= -precisionLimitTenth || temperature >= precisionLimitTenth) { bufferIndex++; + } addTemperature(city, temperature, blockCityMeasurementMap); return bufferIndex; } - private int readTemperature(byte[] text, int measurementIndex) { - boolean negative = text[measurementIndex] == '-'; - if (negative) - measurementIndex++; - byte digitChar = text[measurementIndex++]; + private int readTemperature(byte[] buffer, int bufferIndex) { + boolean negative = buffer[bufferIndex] == (byte) '-'; + if (negative) { + bufferIndex++; + } + byte digit = buffer[bufferIndex++]; int temperature = 0; - while (digitChar != '\n') { - temperature = temperature * 10 + (digitChar - '0'); - digitChar = text[measurementIndex++]; - if (digitChar == '.') - digitChar = text[measurementIndex++]; + while (digit != (byte) '\n') { + temperature = temperature * 10 + (digit - (byte) '0'); + digit = buffer[bufferIndex++]; + if (digit == (byte) '.') { // Skip '.' + digit = buffer[bufferIndex++]; + } } - if (negative) + if (negative) { temperature = -temperature; + } return temperature; } - private void addTemperature(String city, int temperature, Map blockCityMeasurementMap) { + private void addTemperature(Text city, int temperature, Map blockCityMeasurementMap) { IntSummaryStatistics measurement = blockCityMeasurementMap.get(city); if (measurement == null) { measurement = new IntSummaryStatistics(); @@ -200,16 +259,20 @@ private void addTemperature(String city, int temperature, Map blockCityMeasurementMap) { + private void mergeBlockResults(Map blockCityMeasurementMap) { blockCityMeasurementMap.forEach((city, measurement) -> { - IntSummaryStatistics oldMeasurement = cityMeasurementMap.putIfAbsent(city, measurement); - if (oldMeasurement != null) - oldMeasurement.combine(measurement); + cityMeasurementMap.compute(city, (town, currentMeasurement) -> { + if (currentMeasurement == null) { + return measurement; + } + currentMeasurement.combine(measurement); + return currentMeasurement; + }); }); } private void printTemperatureStatsByCity() { - Set sortedCities = new TreeSet<>(cityMeasurementMap.keySet()); + Set sortedCities = new TreeSet<>(cityMeasurementMap.keySet()); StringBuilder result = new StringBuilder(cityMeasurementMap.size() * 40); result.append('{'); sortedCities.forEach(city -> { @@ -217,7 +280,9 @@ private void printTemperatureStatsByCity() { result.append(city); result.append(getTemperatureStats(measurement)); }); - result.delete(result.length() - 2, result.length()); + if (!sortedCities.isEmpty()) { + result.delete(result.length() - 2, result.length()); + } result.append('}'); String temperaturesByCity = result.toString(); System.out.println(temperaturesByCity); @@ -242,9 +307,10 @@ private void appendTemperature(StringBuilder resultBuilder, int temperature) { for (int i = temperatureAsText.length(); i < minCharacters; i++) { temperatureAsText = temperature < 0 ? "-0" + temperatureAsText.substring(1) : "0" + temperatureAsText; } - resultBuilder.append(temperatureAsText.substring(0, temperatureAsText.length() - precision)); + int dotPosition = temperatureAsText.length() - precision; + resultBuilder.append(temperatureAsText.substring(0, dotPosition)); resultBuilder.append('.'); - resultBuilder.append(temperatureAsText.substring(temperatureAsText.length() - precision)); + resultBuilder.append(temperatureAsText.substring(dotPosition)); } public static final void main(String... args) throws Exception { @@ -253,4 +319,76 @@ public static final void main(String... args) throws Exception { cityTemperaturesCalculator.parseTemperatures(new File(measurementFile)); cityTemperaturesCalculator.printTemperatureStatsByCity(); } -} + + private class ByteArray { + + private byte[] array; + + private ByteArray(int size) { + array = new byte[size]; + } + + private byte[] array() { + return array; + } + } + + private static class Text implements Comparable { + + private final byte[] textBytes; + private final int hash; + private String text; + + private Text(byte[] buffer, int startIndex, int length, int hash) { + textBytes = new byte[length]; + this.hash = hash; + System.arraycopy(buffer, startIndex, textBytes, 0, length); + } + + private static Text getByteText(byte[] buffer, int startIndex, int length, Map textPool) { + int hash = hashCode(buffer, startIndex, length); + Text textFromPool = textPool.get(hash); + if (textFromPool == null || !Arrays.equals(buffer, startIndex, startIndex + length, textFromPool.textBytes, 0, length)) { + Text newText = new Text(buffer, startIndex, length, hash); + textPool.put(hash, newText); + return newText; + } + return textFromPool; + } + + private static int hashCode(byte[] buffer, int startIndex, int length) { + int hash = 31; + int endIndex = startIndex + length; + for (int i = startIndex; i < endIndex; i++) { + hash = 31 * hash + buffer[i]; + } + return hash; + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public boolean equals(Object other) { + return other != null && + hashCode() == other.hashCode() && + other instanceof Text && + Arrays.equals(textBytes, ((Text) other).textBytes); + } + + @Override + public int compareTo(Text other) { + return toString().compareTo(other.toString()); + } + + @Override + public String toString() { + if (text == null) { + text = new String(textBytes, StandardCharsets.UTF_8); + } + return text; + } + } +} \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_jeevjyot.java b/src/main/java/dev/morling/onebrc/CalculateAverage_jeevjyot.java new file mode 100644 index 000000000..191e407bc --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_jeevjyot.java @@ -0,0 +1,107 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import static java.lang.Math.round; +import static java.util.stream.Collectors.*; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collector; + +public class CalculateAverage_jeevjyot { + + public static final String MEAUREMENT_FILE = "./measurements.txt"; + + public static void main(String[] args) throws IOException { + Map result = new ConcurrentHashMap<>(); + Files.lines(Path.of(MEAUREMENT_FILE)) + .parallel() + .forEach(s -> { + var separatorIndex = s.indexOf(";"); + var stationName = s.substring(0, separatorIndex); + var temp = s.substring(separatorIndex + 1); + result.computeIfAbsent(stationName, d -> new tempMeasurement(parseDoubleFast(temp))) + .recordTemp(parseDoubleFast(temp)); + }); + + TreeMap sortedStats = new TreeMap<>(result); + System.out.println(sortedStats); + } + + public static double parseDoubleFast(String str) { + // Simple implementation - can be improved with more error checking and support for different formats + boolean negative = false; + double result = 0; + int length = str.length(); + int i = 0; + if (str.charAt(0) == '-') { + negative = true; + i++; + } + for (; i < length; i++) { + char c = str.charAt(i); + if (c == '.') { + int divisor = 1; + for (i++; i < length; i++) { + result += (double) (str.charAt(i) - '0') / (divisor *= 10); + } + break; + } + result = result * 10 + (c - '0'); + } + return negative ? -result : result; + } + + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + public static class tempMeasurement { + double minTemp; + double maxTemp; + double sum; + int count; + + public tempMeasurement(double temString) { + this.minTemp = temString; + this.maxTemp = temString; + this.sum = 0.0; + this.count = 0; + } + + public synchronized void recordTemp(Double temp) { + this.minTemp = Math.min(minTemp, temp); + this.maxTemp = Math.max(maxTemp, temp); + sum += temp; + count++; + } + + double getAverage() { + return round(sum) / count; + } + + @Override + public String toString() { + return String.format("%.1f/%.1f/%.1f", round(minTemp), round(getAverage()), round(maxTemp)); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_jerrinot.java b/src/main/java/dev/morling/onebrc/CalculateAverage_jerrinot.java new file mode 100644 index 000000000..88173dac5 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_jerrinot.java @@ -0,0 +1,686 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import sun.misc.Unsafe; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.foreign.Arena; +import java.lang.reflect.Field; +import java.nio.channels.FileChannel.MapMode; +import java.util.*; +import java.util.concurrent.atomic.AtomicLong; + +/** + * I figured out it would be very hard to win the main competition of the One Billion Rows Challenge. + * but I think this code has a good chance to win a special prize for the Ugliest Solution ever! :) + * + * Anyway, if you can make sense out of not exactly idiomatic Java code, and you enjoy pushing performance limits + * then QuestDB - the fastest open-source time-series database - is hiring: https://questdb.io/careers/core-database-engineer/ + *

+ * Credit + *

+ * I stand on shoulders of giants. I wouldn't be able to code this without analyzing and borrowing from solutions of others. + * People who helped me the most: + *

    + *
  • Thomas Wuerthinger (thomaswue): The munmap() trick and work-stealing. In both cases, I shameless copy-pasted their code. + * Including SWAR for detecting new lines. Thomas also gave me helpful hints on how to detect register spilling issues.
  • + *
  • Quan Anh Mai (merykitty): I borrowed their phenomenal branch-free parser.
  • + *
  • Marko Topolnik (mtopolnik): I use a hashing function I saw in his code. It seems the produce good quality hashes + * and it's next-level in speed. Marko joined the challenge before me and our discussions made me to join too!
  • + *
  • Van Phu DO (abeobk): I saw the idea with simple lookup tables instead of complicated bit-twiddling in their code first.
  • + *
  • Roy van Rijn (royvanrijn): I borrowed their SWAR code and initially their hash code impl
  • + *
  • Francesco Nigro (franz1981): For our online discussions about performance. Both before and during this challenge. + * Francesco gave me the idea to check register spilling.
  • + *
+ */ +public class CalculateAverage_jerrinot { + private static final Unsafe UNSAFE = unsafe(); + private static final String MEASUREMENTS_TXT = "measurements.txt"; + // todo: with hyper-threading enable we would be better of with availableProcessors / 2; + // todo: validate the testing env. params. + private static final int EXTRA_THREAD_COUNT = Runtime.getRuntime().availableProcessors() - 1; + // private static final int THREAD_COUNT = 1; + + private static final long SEPARATOR_PATTERN = 0x3B3B3B3B3B3B3B3BL; + private static final long NEW_LINE_PATTERN = 0x0A0A0A0A0A0A0A0AL; + private static final int SEGMENT_SIZE = 4 * 1024 * 1024; + + // credits for the idea with lookup tables instead of bit-shifting: abeobk + private static final long[] HASH_MASKS = new long[]{ + 0x0000000000000000L, // semicolon is the first char + 0x00000000000000ffL, + 0x000000000000ffffL, + 0x0000000000ffffffL, + 0x00000000ffffffffL, + 0x000000ffffffffffL, + 0x0000ffffffffffffL, + 0x00ffffffffffffffL, // semicolon is the last char + 0xffffffffffffffffL // there is no semicolon at all + }; + + private static final long[] ADVANCE_MASKS = new long[]{ + 0x0000000000000000L, + 0x0000000000000000L, + 0x0000000000000000L, + 0x0000000000000000L, + 0x0000000000000000L, + 0x0000000000000000L, + 0x0000000000000000L, + 0x0000000000000000L, + 0xffffffffffffffffL, + }; + + private static Unsafe unsafe() { + try { + Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + return (Unsafe) theUnsafe.get(Unsafe.class); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + public static void main(String[] args) throws Exception { + // credits for spawning new workers: thomaswue + if (args.length == 0 || !("--worker".equals(args[0]))) { + spawnWorker(); + return; + } + calculate(); + } + + private static void spawnWorker() throws IOException { + ProcessHandle.Info info = ProcessHandle.current().info(); + ArrayList workerCommand = new ArrayList<>(); + info.command().ifPresent(workerCommand::add); + info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args))); + workerCommand.add("--worker"); + new ProcessBuilder() + .command(workerCommand) + .inheritIO() + .redirectOutput(ProcessBuilder.Redirect.PIPE) + .start() + .getInputStream() + .transferTo(System.out); + } + + static void calculate() throws Exception { + final File file = new File(MEASUREMENTS_TXT); + final long length = file.length(); + try (var raf = new RandomAccessFile(file, "r")) { + long fileStart = raf.getChannel().map(MapMode.READ_ONLY, 0, length, Arena.global()).address(); + long fileEnd = fileStart + length; + var globalCursor = new AtomicLong(fileStart); + + Processor[] processors = new Processor[EXTRA_THREAD_COUNT]; + Thread[] threads = new Thread[EXTRA_THREAD_COUNT]; + + for (int i = 0; i < EXTRA_THREAD_COUNT; i++) { + Processor processor = new Processor(fileStart, fileEnd, globalCursor); + Thread thread = new Thread(processor); + processors[i] = processor; + threads[i] = thread; + thread.start(); + } + + Processor processor = new Processor(fileStart, fileEnd, globalCursor); + processor.run(); + + var accumulator = new TreeMap(); + processor.accumulateStatus(accumulator); + + for (int i = 0; i < EXTRA_THREAD_COUNT; i++) { + Thread t = threads[i]; + t.join(); + processors[i].accumulateStatus(accumulator); + } + + printResults(accumulator); + } + } + + private static void printResults(TreeMap accumulator) { + var sb = new StringBuilder(10000); + boolean first = true; + for (Map.Entry statsEntry : accumulator.entrySet()) { + if (first) { + sb.append("{"); + first = false; + } + else { + sb.append(", "); + } + var value = statsEntry.getValue(); + var name = statsEntry.getKey(); + int min = value.min; + int max = value.max; + int count = value.count; + long sum2 = value.sum; + sb.append(String.format("%s=%.1f/%.1f/%.1f", name, min / 10.0, Math.round((double) sum2 / count) / 10.0, max / 10.0)); + } + sb.append('}'); + System.out.println(sb); + System.out.close(); + } + + public static int ceilPow2(int i) { + i--; + i |= i >> 1; + i |= i >> 2; + i |= i >> 4; + i |= i >> 8; + i |= i >> 16; + return i + 1; + } + + private static class Processor implements Runnable { + private static final int MAX_UNIQUE_KEYS = 10000; + private static final int MAPS_SLOT_COUNT = ceilPow2(MAX_UNIQUE_KEYS); + private static final int STATION_MAX_NAME_BYTES = 104; + + private static final long MAP_COUNT_OFFSET = 0; + private static final long MAP_MIN_OFFSET = 4; + private static final long MAP_MAX_OFFSET = 8; + private static final long MAP_SUM_OFFSET = 12; + private static final long MAP_LEN_OFFSET = 20; + private static final long SLOW_MAP_NAME_OFFSET = 24; + + // private int longestChain = 0; + + private static final int SLOW_MAP_ENTRY_SIZE_BYTES = Integer.BYTES // count // 0 + + Integer.BYTES // min // +4 + + Integer.BYTES // max // +8 + + Long.BYTES // sum // +12 + + Integer.BYTES // station name len // +20 + + Long.BYTES; // station name ptr // 24 + + private static final long FAST_MAP_NAME_PART1 = 24; + private static final long FAST_MAP_NAME_PART2 = 32; + + private static final int FAST_MAP_ENTRY_SIZE_BYTES = Integer.BYTES // count // 0 + + Integer.BYTES // min // +4 + + Integer.BYTES // max // +8 + + Long.BYTES // sum // +12 + + Integer.BYTES // station name len // +20 + + Long.BYTES // station name part 1 // 24 + + Long.BYTES; // station name part 2 // 32 + + private static final int SLOW_MAP_SIZE_BYTES = MAPS_SLOT_COUNT * SLOW_MAP_ENTRY_SIZE_BYTES; + private static final int FAST_MAP_SIZE_BYTES = MAPS_SLOT_COUNT * FAST_MAP_ENTRY_SIZE_BYTES; + private static final int SLOW_MAP_MAP_NAMES_BYTES = MAX_UNIQUE_KEYS * STATION_MAX_NAME_BYTES; + private static final int MAP_MASK = MAPS_SLOT_COUNT - 1; + private final AtomicLong globalCursor; + + private long slowMap; + private long slowMapNamesPtr; + private long cursorA; + private long endA; + private long cursorB; + private long endB; + private HashMap stats = new HashMap<>(1000); + private final long fileEnd; + private final long fileStart; + + // credit: merykitty + private long parseAndStoreTemperature(long startCursor, long baseEntryPtr, long word) { + long countPtr = baseEntryPtr + MAP_COUNT_OFFSET; + int cnt = UNSAFE.getInt(countPtr); + UNSAFE.putInt(countPtr, cnt + 1); + + long minPtr = baseEntryPtr + MAP_MIN_OFFSET; + long maxPtr = baseEntryPtr + MAP_MAX_OFFSET; + long sumPtr = baseEntryPtr + MAP_SUM_OFFSET; + + int min = UNSAFE.getInt(minPtr); + int max = UNSAFE.getInt(maxPtr); + long sum = UNSAFE.getLong(sumPtr); + + final long negateda = ~word; + final int dotPos = Long.numberOfTrailingZeros(negateda & 0x10101000); + final long signed = (negateda << 59) >> 63; + final long removeSignMask = ~(signed & 0xFF); + final long digits = ((word & removeSignMask) << (28 - dotPos)) & 0x0F000F0F00L; + final long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; + final int temperature = (int) ((absValue ^ signed) - signed); + sum += temperature; + UNSAFE.putLong(sumPtr, sum); + + if (temperature > max) { + UNSAFE.putInt(maxPtr, temperature); + } + if (temperature < min) { + UNSAFE.putInt(minPtr, temperature); + } + return startCursor + (dotPos / 8) + 3; + } + + private static long getDelimiterMask(final long word) { + // credit royvanrijn + final long match = word ^ SEPARATOR_PATTERN; + return (match - 0x0101010101010101L) & (~match & 0x8080808080808080L); + } + + void accumulateStatus(TreeMap accumulator) { + for (Map.Entry entry : stats.entrySet()) { + String name = entry.getKey(); + CalculateAverage_jerrinot.StationStats localStats = entry.getValue(); + + CalculateAverage_jerrinot.StationStats globalStats = accumulator.get(name); + if (globalStats == null) { + accumulator.put(name, localStats); + } + else { + accumulator.put(name, globalStats.mergeWith(localStats)); + } + } + } + + Processor(long fileStart, long fileEnd, AtomicLong globalCursor) { + this.globalCursor = globalCursor; + this.fileEnd = fileEnd; + this.fileStart = fileStart; + } + + private void transferToHeap(long fastMap) { + for (long baseAddress = slowMap; baseAddress < slowMap + SLOW_MAP_SIZE_BYTES; baseAddress += SLOW_MAP_ENTRY_SIZE_BYTES) { + long len = UNSAFE.getInt(baseAddress + MAP_LEN_OFFSET); + if (len == 0) { + continue; + } + byte[] nameArr = new byte[(int) len]; + long baseNameAddr = UNSAFE.getLong(baseAddress + SLOW_MAP_NAME_OFFSET); + for (int i = 0; i < len; i++) { + nameArr[i] = UNSAFE.getByte(baseNameAddr + i); + } + String name = new String(nameArr); + int min = UNSAFE.getInt(baseAddress + MAP_MIN_OFFSET); + int max = UNSAFE.getInt(baseAddress + MAP_MAX_OFFSET); + int count = UNSAFE.getInt(baseAddress + MAP_COUNT_OFFSET); + long sum = UNSAFE.getLong(baseAddress + MAP_SUM_OFFSET); + + stats.put(name, new CalculateAverage_jerrinot.StationStats(min, max, count, sum)); + } + + for (long baseAddress = fastMap; baseAddress < fastMap + FAST_MAP_SIZE_BYTES; baseAddress += FAST_MAP_ENTRY_SIZE_BYTES) { + long len = UNSAFE.getInt(baseAddress + MAP_LEN_OFFSET); + if (len == 0) { + continue; + } + byte[] nameArr = new byte[(int) len]; + long baseNameAddr = baseAddress + FAST_MAP_NAME_PART1; + for (int i = 0; i < len; i++) { + nameArr[i] = UNSAFE.getByte(baseNameAddr + i); + } + String name = new String(nameArr); + int min = UNSAFE.getInt(baseAddress + MAP_MIN_OFFSET); + int max = UNSAFE.getInt(baseAddress + MAP_MAX_OFFSET); + int count = UNSAFE.getInt(baseAddress + MAP_COUNT_OFFSET); + long sum = UNSAFE.getLong(baseAddress + MAP_SUM_OFFSET); + + var v = stats.get(name); + if (v == null) { + stats.put(name, new CalculateAverage_jerrinot.StationStats(min, max, count, sum)); + } + else { + stats.put(name, new CalculateAverage_jerrinot.StationStats(Math.min(v.min, min), Math.max(v.max, max), v.count + count, v.sum + sum)); + } + } + } + + private void doOne(long cursor, long end, long fastMap) { + while (cursor < end) { + // it seems that when pulling just from a single chunk + // then bit-twiddling is faster than lookup tables + // hypothesis: when processing multiple things at once then LOAD latency is partially hidden + // but when processing just one thing then it's better to keep things local as much as possible? maybe:) + + long start = cursor; + long currentWord = UNSAFE.getLong(cursor); + long mask = getDelimiterMask(currentWord); + long firstWordMask = ((mask - 1) ^ mask) >>> 8; + final long isMaskZeroA = ((mask | -mask) >>> 63) ^ 1; + long ext = -isMaskZeroA; + firstWordMask |= ext; + + long maskedFirstWord = currentWord & firstWordMask; + int hash = hash(maskedFirstWord); + int mapIndex = hash & MAP_MASK; + while (mask == 0) { + cursor += 8; + currentWord = UNSAFE.getLong(cursor); + mask = getDelimiterMask(currentWord); + } + final int delimiterByte = Long.numberOfTrailingZeros(mask); + final long semicolon = cursor + (delimiterByte >> 3); + final long maskedWord = currentWord & ((mask - 1) ^ mask) >>> 8; + + int len = (int) (semicolon - start); + if (len > 15) { + long baseEntryPtr = getOrCreateEntryBaseOffsetSlow(len, start, hash, maskedWord); + long temperatureWord = UNSAFE.getLong(semicolon + 1); + cursor = parseAndStoreTemperature(semicolon + 1, baseEntryPtr, temperatureWord); + } + else { + long baseEntryPtr = getOrCreateEntryBaseOffsetFast(mapIndex, len, maskedWord, maskedFirstWord, fastMap); + long temperatureWord = UNSAFE.getLong(semicolon + 1); + cursor = parseAndStoreTemperature(semicolon + 1, baseEntryPtr, temperatureWord); + } + } + } + + private static int hash(long word) { + // credit: mtopolnik + long seed = 0x51_7c_c1_b7_27_22_0a_95L; + int rotDist = 17; + // + long hash = word; + hash *= seed; + hash = Long.rotateLeft(hash, rotDist); + return (int) hash; + } + + private static long nextNewLine(long prev) { + // again: credits to @thomaswue for this code, literally copy'n'paste + while (true) { + long currentWord = UNSAFE.getLong(prev); + long input = currentWord ^ NEW_LINE_PATTERN; + long pos = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; + if (pos != 0) { + prev += Long.numberOfTrailingZeros(pos) >>> 3; + break; + } + else { + prev += 8; + } + } + return prev; + } + + @Override + public void run() { + long fastMap = allocateMem(); + for (;;) { + long startingPtr = globalCursor.addAndGet(SEGMENT_SIZE) - SEGMENT_SIZE; + if (startingPtr >= fileEnd) { + break; + } + setCursors(startingPtr); + mainLoop(fastMap); + doOne(cursorA, endA, fastMap); + doOne(cursorB, endB, fastMap); + } + transferToHeap(fastMap); + } + + private long allocateMem() { + this.slowMap = UNSAFE.allocateMemory(SLOW_MAP_SIZE_BYTES); + this.slowMapNamesPtr = UNSAFE.allocateMemory(SLOW_MAP_MAP_NAMES_BYTES); + long fastMap = UNSAFE.allocateMemory(FAST_MAP_SIZE_BYTES); + UNSAFE.setMemory(slowMap, SLOW_MAP_SIZE_BYTES, (byte) 0); + UNSAFE.setMemory(fastMap, FAST_MAP_SIZE_BYTES, (byte) 0); + UNSAFE.setMemory(slowMapNamesPtr, SLOW_MAP_MAP_NAMES_BYTES, (byte) 0); + return fastMap; + } + + private void mainLoop(long fastMap) { + while (cursorA < endA && cursorB < endB) { + long currentWordA = UNSAFE.getLong(cursorA); + long currentWordB = UNSAFE.getLong(cursorB); + + long delimiterMaskA = getDelimiterMask(currentWordA); + long delimiterMaskB = getDelimiterMask(currentWordB); + + long candidateWordA = UNSAFE.getLong(cursorA + 8); + long candidateWordB = UNSAFE.getLong(cursorB + 8); + + long startA = cursorA; + long startB = cursorB; + + int trailingZerosA = Long.numberOfTrailingZeros(delimiterMaskA) >> 3; + int trailingZerosB = Long.numberOfTrailingZeros(delimiterMaskB) >> 3; + + long advanceMaskA = ADVANCE_MASKS[trailingZerosA]; + long advanceMaskB = ADVANCE_MASKS[trailingZerosB]; + + long wordMaskA = HASH_MASKS[trailingZerosA]; + long wordMaskB = HASH_MASKS[trailingZerosB]; + + long maskedMaskA = advanceMaskA & 8; + long maskedMaskB = advanceMaskB & 8; + + long negAdvanceMaskA = ~advanceMaskA; + long negAdvanceMaskB = ~advanceMaskB; + + cursorA += maskedMaskA; + cursorB += maskedMaskB; + + long nextWordA = (advanceMaskA & candidateWordA) | (negAdvanceMaskA & currentWordA); + long nextWordB = (advanceMaskB & candidateWordB) | (negAdvanceMaskB & currentWordB); + + delimiterMaskA = getDelimiterMask(nextWordA); + delimiterMaskB = getDelimiterMask(nextWordB); + + boolean slowA = delimiterMaskA == 0; + boolean slowB = delimiterMaskB == 0; + trailingZerosA = Long.numberOfTrailingZeros(delimiterMaskA) >> 3; + trailingZerosB = Long.numberOfTrailingZeros(delimiterMaskB) >> 3; + boolean slowSome = (slowA || slowB); + + long maskedFirstWordA = wordMaskA & currentWordA; + long maskedFirstWordB = wordMaskB & currentWordB; + + int hashA = hash(maskedFirstWordA); + int hashB = hash(maskedFirstWordB); + + currentWordA = nextWordA; + currentWordB = nextWordB; + + if (slowSome) { + doSlow(fastMap, delimiterMaskA, currentWordA, delimiterMaskB, currentWordB, startA, startB, hashA, hashB, slowA, maskedFirstWordA, slowB, + maskedFirstWordB); + } + else { + final long semicolonA = cursorA + trailingZerosA; + final long semicolonB = cursorB + trailingZerosB; + + long digitStartA = semicolonA + 1; + long digitStartB = semicolonB + 1; + + long lastWordMaskA = HASH_MASKS[trailingZerosA]; + long lastWordMaskB = HASH_MASKS[trailingZerosB]; + + long temperatureWordA = UNSAFE.getLong(digitStartA); + long temperatureWordB = UNSAFE.getLong(digitStartB); + + final long maskedLastWordA = currentWordA & lastWordMaskA; + final long maskedLastWordB = currentWordB & lastWordMaskB; + + int lenA = (int) (semicolonA - startA); + int lenB = (int) (semicolonB - startB); + + int mapIndexA = hashA & MAP_MASK; + int mapIndexB = hashB & MAP_MASK; + + long baseEntryPtrA; + long baseEntryPtrB; + + baseEntryPtrA = getOrCreateEntryBaseOffsetFast(mapIndexA, lenA, maskedLastWordA, maskedFirstWordA, fastMap); + baseEntryPtrB = getOrCreateEntryBaseOffsetFast(mapIndexB, lenB, maskedLastWordB, maskedFirstWordB, fastMap); + + cursorA = parseAndStoreTemperature(digitStartA, baseEntryPtrA, temperatureWordA); + cursorB = parseAndStoreTemperature(digitStartB, baseEntryPtrB, temperatureWordB); + } + } + } + + private void doSlow(long fastMap, long delimiterMaskA, long currentWordA, long delimiterMaskB, long currentWordB, long startA, long startB, int hashA, int hashB, + boolean slowA, long maskedFirstWordA, boolean slowB, long maskedFirstWordB) { + int trailingZerosB; + int trailingZerosA; + while (delimiterMaskA == 0) { + cursorA += 8; + currentWordA = UNSAFE.getLong(cursorA); + delimiterMaskA = getDelimiterMask(currentWordA); + } + + while (delimiterMaskB == 0) { + cursorB += 8; + currentWordB = UNSAFE.getLong(cursorB); + delimiterMaskB = getDelimiterMask(currentWordB); + } + trailingZerosA = Long.numberOfTrailingZeros(delimiterMaskA) >> 3; + trailingZerosB = Long.numberOfTrailingZeros(delimiterMaskB) >> 3; + + final long semicolonA = cursorA + trailingZerosA; + final long semicolonB = cursorB + trailingZerosB; + + long digitStartA = semicolonA + 1; + long digitStartB = semicolonB + 1; + + long lastWordMaskA = HASH_MASKS[trailingZerosA]; + long lastWordMaskB = HASH_MASKS[trailingZerosB]; + + long temperatureWordA = UNSAFE.getLong(digitStartA); + long temperatureWordB = UNSAFE.getLong(digitStartB); + + final long maskedLastWordA = currentWordA & lastWordMaskA; + final long maskedLastWordB = currentWordB & lastWordMaskB; + + int lenA = (int) (semicolonA - startA); + int lenB = (int) (semicolonB - startB); + + int mapIndexA = hashA & MAP_MASK; + int mapIndexB = hashB & MAP_MASK; + + long baseEntryPtrA; + long baseEntryPtrB; + + if (slowA) { + baseEntryPtrA = getOrCreateEntryBaseOffsetSlow(lenA, startA, hashA, maskedLastWordA); + } + else { + baseEntryPtrA = getOrCreateEntryBaseOffsetFast(mapIndexA, lenA, maskedLastWordA, maskedFirstWordA, fastMap); + } + + if (slowB) { + baseEntryPtrB = getOrCreateEntryBaseOffsetSlow(lenB, startB, hashB, maskedLastWordB); + } + else { + baseEntryPtrB = getOrCreateEntryBaseOffsetFast(mapIndexB, lenB, maskedLastWordB, maskedFirstWordB, fastMap); + } + cursorA = parseAndStoreTemperature(digitStartA, baseEntryPtrA, temperatureWordA); + cursorB = parseAndStoreTemperature(digitStartB, baseEntryPtrB, temperatureWordB); + } + + private void setCursors(long current) { + // Credit for the whole work-stealing scheme: @thomaswue + // I have totally stolen it from him. I changed the order a bit to suite my taste better, + // but it's his code + long segmentStart; + if (current == fileStart) { + segmentStart = current; + } + else { + segmentStart = nextNewLine(current) + 1; + } + long segmentEnd = nextNewLine(Math.min(fileEnd - 1, current + SEGMENT_SIZE)); + + long size = (segmentEnd - segmentStart) / 2; + long mid = nextNewLine(segmentStart + size); + + cursorA = segmentStart; + endA = mid; + cursorB = mid + 1; + endB = segmentEnd; + } + + private static long getOrCreateEntryBaseOffsetFast(int mapIndexA, int lenA, long maskedLastWord, long maskedFirstWord, long fastMap) { + for (;;) { + long basePtr = mapIndexA * FAST_MAP_ENTRY_SIZE_BYTES + fastMap; + long namePart1 = UNSAFE.getLong(basePtr + FAST_MAP_NAME_PART1); + long namePart2 = UNSAFE.getLong(basePtr + FAST_MAP_NAME_PART2); + if (namePart1 == maskedFirstWord && namePart2 == maskedLastWord) { + return basePtr; + } + long lenPtr = basePtr + MAP_LEN_OFFSET; + int len = UNSAFE.getInt(lenPtr); + if (len == 0) { + return newEntryFast(lenA, maskedLastWord, maskedFirstWord, lenPtr, basePtr); + } + mapIndexA = ++mapIndexA & MAP_MASK; + } + } + + private static long newEntryFast(int lenA, long maskedLastWord, long maskedFirstWord, long lenPtr, long basePtr) { + UNSAFE.putInt(lenPtr, lenA); + // todo: this could be a single putLong() + UNSAFE.putInt(basePtr + MAP_MAX_OFFSET, Integer.MIN_VALUE); + UNSAFE.putInt(basePtr + MAP_MIN_OFFSET, Integer.MAX_VALUE); + UNSAFE.putLong(basePtr + FAST_MAP_NAME_PART1, maskedFirstWord); + UNSAFE.putLong(basePtr + FAST_MAP_NAME_PART2, maskedLastWord); + return basePtr; + } + + private long getOrCreateEntryBaseOffsetSlow(int lenA, long startPtr, int hash, long maskedLastWord) { + long fullLen = lenA & ~7L; + long mapIndexA = hash & MAP_MASK; + for (;;) { + long basePtr = mapIndexA * SLOW_MAP_ENTRY_SIZE_BYTES + slowMap; + long lenPtr = basePtr + MAP_LEN_OFFSET; + long namePtr = basePtr + SLOW_MAP_NAME_OFFSET; + int len = UNSAFE.getInt(lenPtr); + if (len == lenA) { + namePtr = UNSAFE.getLong(basePtr + SLOW_MAP_NAME_OFFSET); + if (nameMatchSlow(startPtr, namePtr, fullLen, maskedLastWord)) { + return basePtr; + } + } + else if (len == 0) { + UNSAFE.putLong(namePtr, slowMapNamesPtr); + UNSAFE.putInt(lenPtr, lenA); + UNSAFE.putInt(basePtr + MAP_MAX_OFFSET, Integer.MIN_VALUE); + UNSAFE.putInt(basePtr + MAP_MIN_OFFSET, Integer.MAX_VALUE); + UNSAFE.copyMemory(startPtr, slowMapNamesPtr, lenA); + long alignedLen = (lenA & ~7L) + 8; + slowMapNamesPtr += alignedLen; + return basePtr; + } + mapIndexA = ++mapIndexA & MAP_MASK; + } + } + + private static boolean nameMatchSlow(long start, long namePtr, long fullLen, long maskedLastWord) { + long offset; + for (offset = 0; offset < fullLen; offset += 8) { + if (UNSAFE.getLong(start + offset) != UNSAFE.getLong(namePtr + offset)) { + return false; + } + } + long maskedWordInMap = UNSAFE.getLong(namePtr + fullLen); + return (maskedWordInMap == maskedLastWord); + } + } + + record StationStats(int min, int max, int count, long sum) { + StationStats mergeWith(StationStats other) { + return new StationStats(Math.min(min, other.min), Math.max(max, other.max), count + other.count, sum + other.sum); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_jgrateron.java b/src/main/java/dev/morling/onebrc/CalculateAverage_jgrateron.java index 488650853..f79fe7a30 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_jgrateron.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_jgrateron.java @@ -20,19 +20,21 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Map.Entry; import java.util.stream.Collectors; public class CalculateAverage_jgrateron { private static final String FILE = "./measurements.txt"; - private static final int MAX_LENGTH_LINE = 115; + private static final int MAX_LENGTH_LINE = 255; private static final int MAX_BUFFER = 1024 * 8; private static boolean DEBUG = false; + public static int DECENAS[] = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 }; + public static int CENTENAS[] = { 0, 100, 200, 300, 400, 500, 600, 700, 800, 900 }; public record Particion(long offset, long size) { } @@ -93,21 +95,25 @@ public static void main(String[] args) throws InterruptedException, IOException Locale.setDefault(Locale.US); var startTime = System.nanoTime(); var archivo = new File(FILE); - var totalMediciones = new HashMap(); var tareas = new ArrayList(); + var totalMediciones = new HashMap(); var particiones = dividirArchivo(archivo); for (var p : particiones) { var hilo = Thread.ofVirtual().start(() -> { try (var miTarea = new MiTarea(archivo, p)) { var mediciones = miTarea.calcularMediciones(); - synchronized (totalMediciones) { - for (var entry : mediciones.entrySet()) { - var medicion = totalMediciones.get(entry.getKey()); + for (var entry : mediciones.entrySet()) { + Medicion medicion; + synchronized (totalMediciones) { + medicion = totalMediciones.get(entry.getKey()); if (medicion == null) { totalMediciones.put(entry.getKey(), entry.getValue()); + medicion = entry.getValue(); } - else { + } + synchronized (medicion) { + if (!medicion.equals(entry.getValue())) { var otraMed = entry.getValue(); medicion.update(otraMed.count, otraMed.tempMin, otraMed.tempMax, otraMed.tempSum); } @@ -120,16 +126,18 @@ public static void main(String[] args) throws InterruptedException, IOException }); tareas.add(hilo); } + + Comparator> comparar = (a, b) -> { + return a.getValue().getNombreEstacion().compareTo(b.getValue().getNombreEstacion()); + }; + for (var hilo : tareas) { hilo.join(); } - Comparator> comparar = (a, b) -> { - return a.getKey().compareTo(b.getKey()); - }; var result = totalMediciones.entrySet().stream()// - .sorted(comparar)// - .map(e -> e.getKey() + "=" + e.getValue().toString())// + .sorted(comparar) + .map(e -> e.getValue().toString())// .collect(Collectors.joining(", ")); System.out.println("{" + result + "}"); @@ -143,17 +151,38 @@ public static void main(String[] args) throws InterruptedException, IOException */ static class Index { private int hash; + private byte[] data; + private int fromIndex; + private int length; public Index() { this.hash = 0; } - public Index(int hash) { - this.hash = hash; + public Index(byte data[], int fromIndex, int length) { + this.data = data; + this.fromIndex = fromIndex; + this.length = length; + this.hash = calcHashCode(length, data, fromIndex, length); } - public void setHash(int hash) { - this.hash = hash; + public void setData(byte data[], int fromIndex, int length) { + this.data = data; + this.fromIndex = fromIndex; + this.length = length; + this.hash = calcHashCode(length, data, fromIndex, length); + } + + /* + * Calcula el hash de cada estacion, + * variation of Daniel J Bernstein's algorithm + */ + private int calcHashCode(int result, byte[] a, int fromIndex, int length) { + int end = fromIndex + length; + for (int i = fromIndex; i < end; i++) { + result = ((result << 5) + result) ^ a[i]; + } + return result; } @Override @@ -167,7 +196,8 @@ public boolean equals(Object obj) { return true; } var otro = (Index) obj; - return this.hash == otro.hash; + return Arrays.equals(this.data, this.fromIndex, this.fromIndex + this.length, otro.data, otro.fromIndex, + otro.fromIndex + otro.length); } } @@ -176,14 +206,12 @@ public boolean equals(Object obj) { * RandomAccessFile permite dezplazar el puntero de lectura del archivo * Tenemos un Map para guardar las estadisticas y un map para guardar los * nombres de las estaciones - * */ static class MiTarea implements AutoCloseable { private final RandomAccessFile rFile; private long maxRead; private Index index = new Index(); private Map mediciones = new HashMap<>(); - private Map estaciones = new HashMap<>(); public MiTarea(File file, Particion particion) throws IOException { rFile = new RandomAccessFile(file, "r"); @@ -202,7 +230,7 @@ public void close() throws IOException { * obtiene la posicion de separacion ";" de la estacion y su temperatura * calcula el hash, convierte a double y actualiza las estadisticas */ - public Map calcularMediciones() throws IOException { + public Map calcularMediciones() throws IOException { var buffer = new byte[MAX_BUFFER];// buffer para lectura en el archivo var rest = new byte[MAX_LENGTH_LINE];// Resto que sobra en cada lectura del buffer var lenRest = 0;// Longitud que sobró en cada lectura del buffer @@ -216,17 +244,15 @@ public Map calcularMediciones() throws IOException { if (numBytes == -1) { break; } - var totalLeidos = totalRead + numBytes; - if (totalLeidos > maxRead) { - numBytes = maxRead - totalRead; - } + numBytes = totalRead + numBytes > maxRead ? maxRead - totalRead : numBytes; totalRead += numBytes; int pos = 0; int len = 0; int idx = 0; int semicolon = 0; while (pos < numBytes) { - if (buffer[pos] == '\n' || buffer[pos] == '\r') { + var b = buffer[pos]; + if (b == '\n' || b == '\r') { if (lenRest > 0) { // concatenamos el sobrante anterior con la nueva linea System.arraycopy(buffer, idx, rest, lenRest, len); @@ -243,7 +269,7 @@ public Map calcularMediciones() throws IOException { semicolon = 0; } else { - if (buffer[pos] == ';') { + if (b == ';') { semicolon = len; } len++; @@ -255,7 +281,7 @@ public Map calcularMediciones() throws IOException { lenRest = len; } } - return transformMediciones(); + return mediciones; } /* @@ -275,19 +301,14 @@ public int buscarSemicolon(byte data[], int len) { * Busca una medicion por su hash y crea o actualiza la temperatura */ public void updateMediciones(byte data[], int pos, int semicolon) { - var hashEstacion = calcHashCode(0, data, pos, semicolon); - var temp = strToDouble(data, pos, semicolon); - index.setHash(hashEstacion); - var estacion = estaciones.get(index); - if (estacion == null) { - estacion = new String(data, pos, semicolon); - estaciones.put(new Index(hashEstacion), estacion); - } - index.setHash(hashEstacion); + var temp = strToInt(data, pos, semicolon); + index.setData(data, pos, semicolon); var medicion = mediciones.get(index); if (medicion == null) { - medicion = new Medicion(1, temp, temp, temp); - mediciones.put(new Index(hashEstacion), medicion); + var estacion = new byte[semicolon]; + System.arraycopy(data, pos, estacion, 0, semicolon); + medicion = new Medicion(estacion, 1, temp, temp, temp); + mediciones.put(new Index(estacion, 0, semicolon), medicion); } else { medicion.update(1, temp, temp, temp); @@ -295,51 +316,16 @@ public void updateMediciones(byte data[], int pos, int semicolon) { } /* - * Convierte las estaciones de hash a string - */ - private Map transformMediciones() { - var newMediciones = new HashMap(); - for (var e : mediciones.entrySet()) { - var estacion = estaciones.get(e.getKey()); - var medicion = e.getValue(); - newMediciones.put(estacion, medicion); - } - return newMediciones; - } - - /* - * Calcula el hash de cada estacion, esto es una copia de java.internal.hashcode + * convierte de un arreglo de bytes a integer */ - private int calcHashCode(int result, byte[] a, int fromIndex, int length) { - int end = fromIndex + length; - for (int i = fromIndex; i < end; i++) { - result = 31 * result + a[i]; - } - return result; - } - /* - * convierte de un arreglo de bytes a double - */ - public double strToDouble(byte linea[], int idx, int posSeparator) { - double number = 0; + public int strToInt(byte linea[], int idx, int posSeparator) { int pos = idx + posSeparator + 1; - int esNegativo = linea[pos] == '-' ? -1 : 1; - if (esNegativo == -1) { - pos++; - } - int digit1 = linea[pos] - 48; - pos++; - if (linea[pos] == '.') { - pos++; - number = (digit1 * 10) + (linea[pos] - 48); - } - else { - int digit2 = linea[pos] - 48; - pos += 2; - number = (digit1 * 100) + (digit2 * 10) + (linea[pos] - 48); - } - return number / 10 * esNegativo; + boolean esNegativo = linea[pos] == '-'; + pos = esNegativo ? pos + 1 : pos; + int number = linea[pos + 1] == '.' ? DECENAS[(linea[pos] - 48)] + linea[pos + 2] - 48 + : CENTENAS[(linea[pos] - 48)] + DECENAS[(linea[pos + 1] - 48)] + (linea[pos + 3] - 48); + return esNegativo ? -number : number; } } @@ -348,34 +334,46 @@ public double strToDouble(byte linea[], int idx, int posSeparator) { */ static class Medicion { private int count; - private double tempMin; - private double tempMax; - private double tempSum; + private int tempMin; + private int tempMax; + private int tempSum; + private byte estacion[]; + private String nombreEstacion; - public Medicion(int count, double tempMin, double tempMax, double tempSum) { + public Medicion(byte estacion[], int count, int tempMin, int tempMax, int tempSum) { super(); + this.estacion = estacion; this.count = count; this.tempMin = tempMin; this.tempMax = tempMax; this.tempSum = tempSum; } - public void update(int count, double tempMin, double tempMax, double tempSum) { + public void update(int count, int tempMin, int tempMax, int tempSum) { this.count += count; - if (tempMin < this.tempMin) { - this.tempMin = tempMin; - } - if (tempMax > this.tempMax) { - this.tempMax = tempMax; - } + this.tempMin = Math.min(tempMin, this.tempMin); + this.tempMax = Math.max(tempMax, this.tempMax); this.tempSum += tempSum; } + public double round(double number) { + return Math.round(number) / 10.0; + } + + public String getNombreEstacion() { + if (nombreEstacion == null) { + nombreEstacion = new String(estacion); + } + return nombreEstacion; + } + @Override public String toString() { - double tempPro = (double) tempSum; - tempPro = tempPro / count; - return "%.1f/%.1f/%.1f".formatted(tempMin, tempPro, tempMax); + var min = round(tempMin); + var mid = round(1.0 * tempSum / count); + var max = round(tempMax); + var nombre = getNombreEstacion(); + return "%s=%.1f/%.1f/%.1f".formatted(nombre, min, mid, max); } } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_jincongho.java b/src/main/java/dev/morling/onebrc/CalculateAverage_jincongho.java new file mode 100644 index 000000000..0758703bc --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_jincongho.java @@ -0,0 +1,534 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; +import sun.misc.Unsafe; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.reflect.Field; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.*; + +/** + * Changelog (based on Macbook Pro Intel i7 6-cores 2.6GHz): + * + * Initial 40000 ms + * Parse key as byte vs string 30000 ms + * Parse temp as fixed vs double 15000 ms + * HashMap optimization 10000 ms + * Simd + reduce memory copy 8000 ms + * + */ +public class CalculateAverage_jincongho { + + private static final String FILE = "./measurements.txt"; + + private static final Unsafe UNSAFE = initUnsafe(); + + private static Unsafe initUnsafe() { + try { + Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + return (Unsafe) theUnsafe.get(Unsafe.class); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + /** + * Vectorization utilities with 1BRC-specific optimizations + */ + protected static class VectorUtils { + + // key length is usually less than 32 bytes, having more is just expensive + public static final VectorSpecies BYTE_SPECIES = ByteVector.SPECIES_256; + + /** Vectorized field delimiter search **/ + + public static int findDelimiter(MemorySegment data, long offset) { + return ByteVector.fromMemorySegment(VectorUtils.BYTE_SPECIES, data, offset, ByteOrder.nativeOrder()) + .compare(VectorOperators.EQ, ';') + .firstTrue(); + } + + /** Vectorized Hashing (explicit vectorization seems slower, overkill?) **/ + + // private static int[] HASH_ARRAY = initHashArray(); + // private static final IntVector HASH_VECTOR = IntVector.fromArray(IntVector.SPECIES_256, HASH_ARRAY, 0); + // private static final int HASH_ACCUM = HASH_ARRAY[0] * 31; + // + // private static int[] initHashArray() { + // int[] x = new int[IntVector.SPECIES_256.length()]; + // x[x.length - 1] = 1; + // for (int i = x.length - 2; i >= 0; i--) + // x[i] = x[i + 1] * 31; + // + // return x; + // } + + /** + * Ref: https://github.com/PaulSandoz/vector-api-dev-live-10-2021/blob/main/src/main/java/jmh/BytesHashcode.java + * + * Essentially we are doing this calculation: + * h = h * 31 * 31 * 31 * 31 * 31 * 31 * 31 * 31 + + * a[i + 0] * 31 * 31 * 31 * 31 * 31 * 31 * 31 + + * a[i + 1] * 31 * 31 * 31 * 31 * 31 * 31 + + * a[i + 2] * 31 * 31 * 31 * 31 * 31 + + * a[i + 3] * 31 * 31 * 31 * 31 + + * a[i + 4] * 31 * 31 * 31 + + * a[i + 5] * 31 * 31 + + * a[i + 6] * 31 + + * a[i + 7]; + */ + // public static int hashCode(MemorySegment array, long offset, short length) { + // int h = 1; + // long i = offset, loopBound = offset + ByteVector.SPECIES_64.loopBound(length), tailBound = offset + length; + // for (; i < loopBound; i += ByteVector.SPECIES_64.length()) { + // // load 8 bytes, into a 64-bit vector + // ByteVector b = ByteVector.fromMemorySegment(ByteVector.SPECIES_64, array, i, ByteOrder.nativeOrder()); + // // convert 8 bytes into 8 ints (hashing calculation needs int!) + // IntVector x = (IntVector) b.castShape(IntVector.SPECIES_256, 0); + // h = h * HASH_ACCUM + x.mul(HASH_VECTOR).reduceLanes(VectorOperators.ADD); + // } + // + // for (; i < tailBound; i++) { + // h = 31 * h + array.get(ValueLayout.JAVA_BYTE, i); + // } + // return h; + // } + + // scalar implementation + // public static int hashCode(final MemorySegment array, final long offset, final short length) { + // final long limit = offset + length; + // int h = 1; + // for (long i = offset; i < limit; i++) { + // h = 31 * h + UNSAFE.getByte(array.address() + i); + // } + // return h; + // } + + // fxhash + public static int hashCode(final MemorySegment array, final long offset, final short length) { + final int seed = 0x9E3779B9; + final int rotate = 5; + + int x, y; + if (length >= Integer.BYTES) { + x = UNSAFE.getInt(array.address() + offset); + y = UNSAFE.getInt(array.address() + offset + length - Integer.BYTES); + } + else { + x = UNSAFE.getByte(array.address() + offset); + y = UNSAFE.getByte(array.address() + offset + length - Byte.BYTES); + } + + return (Integer.rotateLeft(x * seed, rotate) ^ y) * seed; + } + + /** Vectorized Key Comparison **/ + + private static boolean notEquals(MemorySegment a, long aOffset, MemorySegment b, long bOffset, short length, VectorSpecies BYTE_SPECIES) { + final long aLimit = aOffset + length, bLimit = bOffset + length; + + // main loop + long loopBound = bOffset + BYTE_SPECIES.loopBound(length); + for (; bOffset < loopBound; aOffset += BYTE_SPECIES.length(), bOffset += BYTE_SPECIES.length()) { + ByteVector av = ByteVector.fromMemorySegment(BYTE_SPECIES, a, + aOffset, ByteOrder.nativeOrder() /* , BYTE_SPECIES.indexInRange(aOffset, Math.min(aOffset + BYTE_SPECIES.length(), aLimit)) */); + ByteVector bv = ByteVector.fromMemorySegment(BYTE_SPECIES, b, + bOffset, ByteOrder.nativeOrder() /* , BYTE_SPECIES.indexInRange(bOffset, Math.min(bOffset + BYTE_SPECIES.length(), bLimit)) */); + if (av.compare(VectorOperators.NE, bv).anyTrue()) + return true; + } + + // tail cleanup - load last N bytes with mask + if (bOffset < bLimit) { + ByteVector av = ByteVector.fromMemorySegment(BYTE_SPECIES, a, aOffset, ByteOrder.nativeOrder(), BYTE_SPECIES.indexInRange(aOffset, aLimit)); + ByteVector bv = ByteVector.fromMemorySegment(BYTE_SPECIES, b, bOffset, ByteOrder.nativeOrder(), BYTE_SPECIES.indexInRange(bOffset, bLimit)); + if (av.compare(VectorOperators.NE, bv).anyTrue()) + return true; + } + + return false; + } + + // scalar implementation + // private static boolean equals(byte[] a, int aOffset, byte[] b, int bOffset, int len) { + // while (bOffset < len) + // if (a[aOffset++] != b[bOffset++]) + // return false; + // return true; + // } + + } + + /** + * Measurement Hash Table (for each partition) + * Uses contiguous byte array to optimize for cache-line (hopefully) + * + * Each entry: + * - KEYS: keyLength (2 bytes) + key (100 bytes) + * - VALUES: min (2 bytes) + max (2 bytes) + count (4 bytes) + sum ( 8 bytes) + */ + protected static class PartitionAggr { + + private static int MAP_SIZE = 1 << 14; // 2^14 = 16384, closes to 10000 + private static int KEY_SIZE = 128; // key length (2 bytes) + key (100 bytes) + private static int KEY_MASK = (MAP_SIZE - 1); + private static int VALUE_SIZE = 16; // min (2 bytes) + max ( 2 bytes) + count (4 bytes) + sum (8 bytes) + + private MemorySegment KEYS = Arena.ofShared().allocate(MAP_SIZE * KEY_SIZE, 64); + private MemorySegment VALUES = Arena.ofShared().allocate(MAP_SIZE * VALUE_SIZE, 16); + + public PartitionAggr() { + // init min and max + final long limit = VALUES.address() + (MAP_SIZE * VALUE_SIZE); + for (long offset = VALUES.address(); offset < limit; offset += VALUE_SIZE) { + UNSAFE.putShort(offset, Short.MAX_VALUE); + UNSAFE.putShort(offset + 2, Short.MIN_VALUE); + } + } + + public void update(MemorySegment key, long keyStart, short keyLength, int keyHash, short value) { + int index = keyHash & KEY_MASK; + long keyOffset = KEYS.address() + (index * KEY_SIZE); + while (((UNSAFE.getShort(keyOffset) != keyLength) || + VectorUtils.notEquals(KEYS, ((index * KEY_SIZE) + 2), key, keyStart, keyLength, VectorUtils.BYTE_SPECIES))) { + if (UNSAFE.getShort(keyOffset) == 0) { + // put key + UNSAFE.putShort(keyOffset, keyLength); + MemorySegment.copy(key, keyStart, KEYS, (index * KEY_SIZE) + 2, keyLength); + break; + } + else { + index = (index + 1) & KEY_MASK; + keyOffset = KEYS.address() + (index * KEY_SIZE); + } + } + + long valueOffset = VALUES.address() + (index * VALUE_SIZE); + UNSAFE.putShort(valueOffset, (short) Math.min(UNSAFE.getShort(valueOffset), value)); + valueOffset += 2; + UNSAFE.putShort(valueOffset, (short) Math.max(UNSAFE.getShort(valueOffset), value)); + valueOffset += 2; + UNSAFE.putInt(valueOffset, UNSAFE.getInt(valueOffset) + 1); + valueOffset += 4; + UNSAFE.putLong(valueOffset, UNSAFE.getLong(valueOffset) + value); + } + + public void mergeTo(ResultAggr result) { + long keyOffset; + short keyLength; + for (int i = 0; i < MAP_SIZE; i++) { + // extract key + keyOffset = KEYS.address() + (i * KEY_SIZE); + if ((keyLength = UNSAFE.getShort(keyOffset)) == 0) + continue; + + // extract values (if key is not null) + final long valueOffset = VALUES.address() + (i * VALUE_SIZE); + result.compute(new ResultAggr.ByteKey(KEYS, (i * KEY_SIZE) + 2, keyLength), (k, v) -> { + if (v == null) { + v = new ResultAggr.Measurement(); + } + v.min = (short) Math.min(UNSAFE.getShort(valueOffset), v.min); + v.max = (short) Math.max(UNSAFE.getShort(valueOffset + 2), v.max); + v.count += UNSAFE.getInt(valueOffset + 4); + v.sum += UNSAFE.getLong(valueOffset + 8); + + return v; + }); + } + } + + } + + /** + * Measurement Aggregation (for all partitions) + * Simple Concurrent Hash Table so all partitions can merge concurrently + */ + protected static class ResultAggr extends HashMap { + + public static class ByteKey implements Comparable { + private final MemorySegment data; + private final long offset; + private final short length; + private String str; + + public ByteKey(MemorySegment data, long offset, short length) { + this.data = data; + this.offset = offset; + this.length = length; + } + + @Override + public boolean equals(Object other) { + return (length == ((ByteKey) other).length) + && !VectorUtils.notEquals(data, offset, ((ByteKey) other).data, ((ByteKey) other).offset, length, VectorUtils.BYTE_SPECIES); + } + + @Override + public int hashCode() { + return VectorUtils.hashCode(data, offset, length); + } + + @Override + public String toString() { + if (str == null) { + // finally has to do a copy! + byte[] copy = new byte[length]; + MemorySegment.copy(data, offset, MemorySegment.ofArray(copy), 0, length); + str = new String(copy, StandardCharsets.UTF_8); + } + return str; + } + + @Override + public int compareTo(ByteKey o) { + return toString().compareTo(o.toString()); + } + } + + protected static class Measurement { + public short min = Short.MAX_VALUE; + public short max = Short.MIN_VALUE; + public int count = 0; + public long sum = 0; + + @Override + public String toString() { + return ((double) min / 10) + "/" + (Math.round((1.0 * sum) / count) / 10.0) + "/" + ((double) max / 10); + } + + } + + public ResultAggr(int initialCapacity, float loadFactor) { + super(initialCapacity, loadFactor); + } + + public Map toSorted() { + return new TreeMap(this); + } + + } + + protected static class Partition implements Runnable { + + private final MemorySegment data; + private long offset; + private final long limit; + private final PartitionAggr result; + + public Partition(MemorySegment data, long offset, long limit, PartitionAggr result) { + this.data = data; + this.offset = offset; + this.limit = limit; + this.result = result; + } + + @Override + public void run() { + // measurement parsing + final PartitionAggr aggr = this.result; + + // main loop (vectorized) + final long loopLimit = limit - (VectorUtils.BYTE_SPECIES.length() * Math.ceilDiv(100, VectorUtils.BYTE_SPECIES.length()) + Long.BYTES); + while (offset < loopLimit) { + long offsetStart = offset; + + // find station name upto ";" + int found; + do { + found = VectorUtils.findDelimiter(data, offset); + offset += found; + } while (found == VectorUtils.BYTE_SPECIES.length()); + short stationLength = (short) (offset - offsetStart); + int stationHash = VectorUtils.hashCode(data, offsetStart, stationLength); + + // find measurement upto "\n" (credit: merykitty) + long numberBits = UNSAFE.getLong(data.address() + ++offset); + final long invNumberBits = ~numberBits; + final int decimalSepPos = Long.numberOfTrailingZeros(invNumberBits & 0x10101000); + + int shift = 28 - decimalSepPos; + long signed = (invNumberBits << 59) >> 63; + long designMask = ~(signed & 0xFF); + long digits = ((numberBits & designMask) << shift) & 0x0F000F0F00L; + long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; + + short fixed = (short) ((absValue ^ signed) - signed); + offset += (decimalSepPos >>> 3) + 3; + + // update measurement + aggr.update(data, offsetStart, stationLength, stationHash, fixed); + } + + // tail loop (simple) + while (offset < limit) { + long offsetStart = offset; + + // find station name upto ";" + short stationLength = 0; + while (UNSAFE.getByte(data.address() + offset++) != ';') + stationLength++; + int stationHash = VectorUtils.hashCode(data, offsetStart, stationLength); + + // find measurement upto "\n" + byte tempBuffer = UNSAFE.getByte(data.address() + offset++); + boolean isNegative = (tempBuffer == '-'); + short fixed = (short) (isNegative ? 0 : (tempBuffer - '0')); + while (true) { + tempBuffer = UNSAFE.getByte(data.address() + offset++); + if (tempBuffer == '.') { + fixed = (short) (fixed * 10 + (UNSAFE.getByte(data.address() + offset) - '0')); + offset += 2; + break; + } + fixed = (short) (fixed * 10 + (tempBuffer - '0')); + } + fixed = isNegative ? (short) -fixed : fixed; + + // update measurement + aggr.update(data, offsetStart, stationLength, stationHash, fixed); + } + + // measurement result collection + // aggr.mergeTo(result); + } + + } + + public static void main(String[] args) throws IOException, InterruptedException { + + // long startTime = System.currentTimeMillis(); + + try (FileChannel fileChannel = (FileChannel) Files.newByteChannel(Path.of(FILE), EnumSet.of(StandardOpenOption.READ)); + Arena arena = Arena.ofShared()) { + + // scan data + MemorySegment data = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size(), arena); + final int processors = Runtime.getRuntime().availableProcessors(); + + // partition split + long[] partition = new long[processors + 1]; + long partitionSize = Math.ceilDiv(data.byteSize(), processors); + for (int i = 0; i < processors; i++) { + partition[i + 1] = partition[i] + partitionSize; + if (partition[i + 1] >= data.byteSize()) { + partition[i + 1] = data.byteSize(); + break; + } + + // note: vectorize this made performance worse :( + while (UNSAFE.getByte(data.address() + partition[i + 1]++) != '\n') + ; + } + + // partition aggregation + var threadList = new Thread[processors]; + PartitionAggr[] partAggrs = new PartitionAggr[processors]; + for (int i = 0; i < processors; i++) { + if (partition[i] == data.byteSize()) + break; + + partAggrs[i] = new PartitionAggr(); + threadList[i] = new Thread(new Partition(data, partition[i], partition[i + 1], partAggrs[i])); + threadList[i].start(); + } + + // result + ResultAggr result = new ResultAggr(1 << 14, 1); + for (int i = 0; i < processors; i++) { + if (partition[i] == data.byteSize()) + break; + + threadList[i].join(); + partAggrs[i].mergeTo(result); + } + System.out.println(result.toSorted()); + } + + // long elapsed = System.currentTimeMillis() - startTime; + // System.out.println("Elapsed: " + ((double) elapsed / 1000.0)); + + } + + /** Unit Tests **/ + + public static void testMain(String[] args) { + testHashCode(); + testNotEquals(); + } + + private static void testHashCode() { + // test key length from 1 to 100 + for (int i = 1; i <= 100; i++) { + byte[] array = new byte[i]; + for (int j = 0; j < i; j++) + array[j] = (byte) j; + + // compare with java default implementation + assertTrue(VectorUtils.hashCode(MemorySegment.ofArray(array), 0, (short) i) == Arrays.hashCode(array)); + } + } + + private static void testNotEquals() { + byte[] a = new byte[128]; + byte[] b = new byte[128]; + + // all equals + for (int i = 1; i < 100; i++) { + a[(i + 2) - 1] = 0; + b[i - 1] = 0; + a[(i + 2)] = 10; + b[i] = 10; + assertTrue(!VectorUtils.notEquals(MemorySegment.ofArray(a), 2, MemorySegment.ofArray(b), 0, (short) 100, ByteVector.SPECIES_64)); + assertTrue(!VectorUtils.notEquals(MemorySegment.ofArray(a), 2, MemorySegment.ofArray(b), 0, (short) 100, ByteVector.SPECIES_128)); + assertTrue(!VectorUtils.notEquals(MemorySegment.ofArray(a), 2, MemorySegment.ofArray(b), 0, (short) 100, ByteVector.SPECIES_256)); + assertTrue(!VectorUtils.notEquals(MemorySegment.ofArray(a), 2, MemorySegment.ofArray(b), 0, (short) 100, ByteVector.SPECIES_512)); + } + + // one el not equals + for (int i = 1; i < 100; i++) { + a[(i + 2) - 1] = 0; + b[i - 1] = 0; + a[(i + 2)] = 20; + b[i] = 10; + assertTrue(VectorUtils.notEquals(MemorySegment.ofArray(a), 2, MemorySegment.ofArray(b), 0, (short) 100, ByteVector.SPECIES_64)); + assertTrue(VectorUtils.notEquals(MemorySegment.ofArray(a), 2, MemorySegment.ofArray(b), 0, (short) 100, ByteVector.SPECIES_128)); + assertTrue(VectorUtils.notEquals(MemorySegment.ofArray(a), 2, MemorySegment.ofArray(b), 0, (short) 100, ByteVector.SPECIES_256)); + assertTrue(VectorUtils.notEquals(MemorySegment.ofArray(a), 2, MemorySegment.ofArray(b), 0, (short) 100, ByteVector.SPECIES_512)); + } + } + + private static void assertTrue(boolean condition) { + if (!condition) { + throw new RuntimeException("Failed test"); + } + } + +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_jonathanaotearoa.java b/src/main/java/dev/morling/onebrc/CalculateAverage_jonathanaotearoa.java new file mode 100644 index 000000000..cd626347b --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_jonathanaotearoa.java @@ -0,0 +1,587 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import sun.misc.Unsafe; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.reflect.Field; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.concurrent.ForkJoinPool; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class CalculateAverage_jonathanaotearoa { + + public static final Unsafe UNSAFE; + + static { + try { + final Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + UNSAFE = (Unsafe) theUnsafe.get(null); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(STR."Error getting instance of \{Unsafe.class.getName()}"); + } + } + + private static final int WORD_BYTES = Long.BYTES; + private static final Path FILE_PATH = Path.of("./measurements.txt"); + private static final Path SAMPLE_DIR_PATH = Path.of("./src/test/resources/samples"); + private static final byte MAX_LINE_BYTES = 107; + private static final byte NEW_LINE_BYTE = '\n'; + private static final long SEPARATOR_XOR_MASK = 0x3b3b3b3b3b3b3b3bL; + + // A mask where the 4th bit of the 5th, 6th and 7th bytes is set to 1. + // Leverages the fact that the 4th bit of a digit byte will 1. + // Whereas the 4th bit of the decimal point byte will be 0. + // Assumes little endianness. + private static final long DECIMAL_POINT_MASK = 0x10101000L; + + // This mask performs two tasks: + // Sets the right-most and 3 left-most bytes to zero. + // Given a temp value be at most 5 bytes in length, .e.g -99.9, we can safely ignore the last 3 bytes. + // Subtracts 48, i.e. the UFT-8 value offset, from the digits bytes. + // As a result, '0' (48) becomes 0, '1' (49) becomes 1, and so on. + private static final long TEMP_DIGITS_MASK = 0x0f000f0f00L; + + public static void main(final String[] args) throws IOException { + assert ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN : "Big endian byte order is not supported"; + System.out.println(resultsToString(processFile(FILE_PATH))); + } + + /** + * A custom version of AbstractMap's toString() method. + *

+ * This should be more performant as we can: + *

    + *
  • Set the initial capacity of the string builder
  • + *
  • Append double values directly, which avoids string creation
  • + *
+ *

+ * + * @param results the results. + * @return a string representation of the results. + */ + private static String resultsToString(final Map results) { + final Iterator> i = results.entrySet().iterator(); + if (!i.hasNext()) { + System.out.println("{}"); + } + // Capacity based the output for measurements.txt. + final StringBuilder sb = new StringBuilder(1100).append('{'); + while (i.hasNext()) { + Map.Entry e = i.next(); + sb.append(e.getKey()) + .append('=') + .append(e.getValue().getMin()) + .append('/') + .append(e.getValue().getMean()) + .append('/') + .append(e.getValue().getMax()); + if (i.hasNext()) { + sb.append(',').append(' '); + } + } + sb.append('}'); + return sb.toString(); + } + + /** + * Processes the specified file. + *

+ * Extracted from the main method for testability. + *

+ * + * @param filePath the path of the file we want to process. + * @return a sorted map of station data keyed by station name. + * @throws IOException if an error occurs. + */ + private static SortedMap processFile(final Path filePath) throws IOException { + assert filePath != null : "filePath cannot be null"; + assert Files.isRegularFile(filePath) : STR."\{filePath.toAbsolutePath()} is not a valid file"; + + try (final FileChannel fc = FileChannel.open(filePath, StandardOpenOption.READ)) { + final long fileSize = fc.size(); + if (fileSize < WORD_BYTES) { + // The file size is less than our word size. + // Keep it simple and fall back to non-performant processing. + return processTinyFile(fc, fileSize); + } + return processFile(fc, fileSize); + } + } + + /** + * An unoptimised method for processing a tiny file. + *

+ * Handling tiny files in a separate method reduces the complexity of {@link #processFile(FileChannel, long)}. + *

+ * + * @param fc the file channel to read from. + * @param fileSize the file size in bytes. + * @return a sorted map of station data keyed by station name. + * @throws IOException if an error occurs reading from the file channel. + */ + private static SortedMap processTinyFile(final FileChannel fc, final long fileSize) throws IOException { + final ByteBuffer byteBuffer = ByteBuffer.allocate((int) fileSize); + fc.read(byteBuffer); + return new String(byteBuffer.array(), StandardCharsets.UTF_8) + .lines() + .map(line -> line.trim().split(";")) + .map(tokens -> { + final String stationName = tokens[0]; + final short temp = Short.parseShort(tokens[1].replace(".", "")); + return new SimpleStationData(stationName, temp); + }) + .collect(Collectors.toMap( + sd -> sd.name, + sd -> sd, + TemperatureData::merge, + TreeMap::new)); + } + + /** + * An optimised method for processing files > {@link Long#BYTES} in size. + * + * @param fc the file channel to map into memory. + * @param fileSize the file size in bytes. + * @return a sorted map of station data keyed by station name. + * @throws IOException if an error occurs mapping the file channel into memory. + */ + private static SortedMap processFile(final FileChannel fc, final long fileSize) throws IOException { + assert fileSize >= WORD_BYTES : STR."File size cannot be less than word size \{WORD_BYTES}, but was \{fileSize}"; + + try (final Arena arena = Arena.ofConfined()) { + final long fileAddress = fc.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, arena).address(); + return createChunks(fileAddress, fileSize) + .parallel() + .map(CalculateAverage_jonathanaotearoa::processChunk) + .flatMap(Repository::entries) + .collect(Collectors.toMap( + StationData::getName, + sd -> sd, + TemperatureData::merge, + TreeMap::new)); + } + } + + /** + * Divides the file into chunks that can be processed in parallel. + *

+ * If dividing the file into {@link ForkJoinPool#getCommonPoolParallelism() parallelism} chunks would result in a + * chunk size less than the maximum line size in bytes, then a single chunk is returned for the entire file. + *

+ * + * @param fileAddress the address of the file. + * @param fileSize the size of the file in bytes. + * @return a stream of chunks. + */ + private static Stream createChunks(final long fileAddress, final long fileSize) { + // The number of cores - 1. + final int parallelism = ForkJoinPool.getCommonPoolParallelism(); + final long chunkStep = fileSize / parallelism; + final long lastFileByteAddress = fileAddress + fileSize - 1; + if (chunkStep < MAX_LINE_BYTES) { + // We're dealing with a small file, return a single chunk. + return Stream.of(new Chunk(fileAddress, lastFileByteAddress, true)); + } + final Chunk[] chunks = new Chunk[parallelism]; + long startAddress = fileAddress; + for (int i = 0, n = parallelism - 1; i < n; i++) { + // Find end of the *previous* line. + // We know there's a previous line in this chunk because chunkStep >= MAX_LINE_BYTES. + // The last chunk may be slightly bigger than the others. + // For a 1 billion line file, this has zero impact. + long lastByteAddress = startAddress + chunkStep; + while (UNSAFE.getByte(lastByteAddress) != NEW_LINE_BYTE) { + lastByteAddress--; + } + // We've found the end of the previous line. + chunks[i] = new Chunk(startAddress, lastByteAddress, false); + startAddress = ++lastByteAddress; + } + // The remaining bytes are assigned to the last chunk. + chunks[chunks.length - 1] = (new Chunk(startAddress, lastFileByteAddress, true)); + return Stream.of(chunks); + } + + /** + * Does the work of processing a chunk. + * + * @param chunk the chunk to process. + * @return a repository containing the chunk's station data. + */ + private static Repository processChunk(final Chunk chunk) { + final Repository repo = new Repository(); + long address = chunk.startAddress; + + while (address <= chunk.lastByteAddress) { + // Read station name. + long nameAddress = address; + long nameWord; + long separatorMask; + int nameHash = 1; + + while (true) { + nameWord = chunk.getWord(address); + + // Based on the Hacker's Delight "Find First 0-Byte" branch-free, 5-instruction, algorithm. + // See also https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord + final long separatorXorResult = nameWord ^ SEPARATOR_XOR_MASK; + // If the separator is not present, all bits in the mask will be zero. + // If the separator is present, the first bit of the corresponding byte in the mask will be 1. + separatorMask = (separatorXorResult - 0x0101010101010101L) & (~separatorXorResult & 0x8080808080808080L); + if (separatorMask == 0) { + address += Long.BYTES; + // Multiplicative hashing, as per Arrays.hashCode(). + // We could use XOR here, but it "might" produce more collisions. + nameHash = 31 * nameHash + Long.hashCode(nameWord); + } + else { + break; + } + } + + // We've found the separator. + // We only support little endian, so we use the *trailing* number of zeros to get the number of name bits. + final int numberOfNameBits = Long.numberOfTrailingZeros(separatorMask) & ~7; + final int numberOfNameBytes = numberOfNameBits >> 3; + final long separatorAddress = address + numberOfNameBytes; + + if (numberOfNameBytes > 0) { + // Truncate the word, so we only have the portion before the separator, i.e. the name bytes. + final int bitsToDiscard = Long.SIZE - numberOfNameBits; + // Little endian. + final long truncatedNameWord = (nameWord << bitsToDiscard) >>> bitsToDiscard; + nameHash = 31 * nameHash + Long.hashCode(truncatedNameWord); + } + + final long tempAddress = separatorAddress + 1; + final long tempWord = chunk.getWord(tempAddress); + + // "0" in UTF-8 is 48, which is 00110000 in binary. + // The first 4 bits of any UTF-8 digit byte are therefore 0011. + + // Get the position of the decimal point... + // "." in UTF-8 is 46, which is 00101110 in binary. + // We can therefore use the 4th bit to check which byte is the decimal point. + final int decimalPointIndex = Long.numberOfTrailingZeros(~tempWord & DECIMAL_POINT_MASK) >> 3; + + // Check if we've got a negative or positive number... + // "-" in UTF-8 is 45, which is 00101101 in binary. + // As per above, we use the 4th bit to check if the word contains a positive, or negative, temperature. + // If the temperature is negative, the value of "sign" will be -1. If it's positive, it'll be 0. + final long sign = (~tempWord << 59) >> 63; + + // Create a mask that zeros out the minus-sign byte, if present. + // Little endian, i.e. the minus sign is the right-most byte. + final long signMask = ~(sign & 0xFF); + + // To get the temperature value, we left-shift the digit bytes into the following, known, positions. + // 0x00 0x00 0x00 0x00 0x00 + // Because we're ANDing with the sign mask, if the value only has a single integer-part digit, the right-most one will be zero. + final int leftShift = (3 - decimalPointIndex) * Byte.SIZE; + final long digitsWord = ((tempWord & signMask) << leftShift) & TEMP_DIGITS_MASK; + + // Get the unsigned int value. + final byte b100 = (byte) (digitsWord >> 8); + final byte b10 = (byte) (digitsWord >> 16); + final byte b1 = (byte) (digitsWord >> 32); + final short unsignedTemp = (short) (b100 * 100 + b10 * 10 + b1); + final short temp = (short) ((unsignedTemp + sign) ^ sign); + + final byte nameSize = (byte) (separatorAddress - nameAddress); + repo.addTemp(nameHash, nameAddress, nameSize, temp); + + // Calculate the address of the next line. + address = tempAddress + decimalPointIndex + 3; + } + + return repo; + } + + /** + * Represents a portion of a file containing 1 or more whole lines. + * + * @param startAddress the memory address of the first byte. + * @param lastByteAddress the memory address of the last byte. + * @param lastWordAddress the memory address of the last whole word. + * @param isLast whether this is the last chunk. + */ + private record Chunk(long startAddress, long lastByteAddress, long lastWordAddress, boolean isLast) { + + public Chunk(final long startAddress, final long lastByteAddress, final boolean isLast) { + this(startAddress, lastByteAddress, lastByteAddress - (Long.BYTES - 1), isLast); + + assert lastByteAddress > startAddress : STR."lastByteAddress \{lastByteAddress} must be > startAddress \{startAddress}"; + assert lastWordAddress >= startAddress : STR."lastWordAddress \{lastWordAddress} must be >= startAddress \{startAddress}"; + } + + /** + * Gets an 8 byte word from this chunk. + *

+ * If the specified address is greater than {@link Chunk#lastWordAddress} and {@link Chunk#isLast}, the word + * will be truncated. This ensures we never read beyond the end of the file. + *

+ * + * @param address the address of the word we want. + * @return the word at the specified address. + */ + public long getWord(final long address) { + assert address >= startAddress : STR."address must be >= startAddress \{startAddress}, but was \{address}"; + assert address < lastByteAddress : STR."address must be < lastByteAddress \{lastByteAddress}, but was \{address}"; + + if (isLast && address > lastWordAddress) { + // Make sure we don't read beyond the end of the file and potentially crash the JVM. + final long word = UNSAFE.getLong(lastWordAddress); + final int bytesToDiscard = (int) (address - lastWordAddress); + // As with elsewhere, this assumes little endianness. + return word >>> (bytesToDiscard << 3); + } + return UNSAFE.getLong(address); + } + } + + /** + * Abstract class encapsulating temperature data. + */ + private static abstract class TemperatureData { + + private short min; + private short max; + private long sum; + private int count; + + protected TemperatureData(final short temp) { + min = max = temp; + sum = temp; + count = 1; + } + + void addTemp(final short temp) { + if (temp < min) { + min = temp; + } + else if (temp > max) { + max = temp; + } + sum += temp; + count++; + } + + TemperatureData merge(final TemperatureData other) { + if (other.min < min) { + min = other.min; + } + if (other.max > max) { + max = other.max; + } + sum += other.sum; + count += other.count; + return this; + } + + double getMin() { + return round(((double) min) / 10.0); + } + + double getMax() { + return round(((double) max) / 10.0); + } + + double getMean() { + return round((((double) sum) / 10.0) / count); + } + + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + /** + * For use with tiny files. + * + * @see CalculateAverage_jonathanaotearoa#processTinyFile(FileChannel, long). + */ + private static final class SimpleStationData extends TemperatureData implements Comparable { + + private final String name; + + SimpleStationData(final String name, final short temp) { + super(temp); + this.name = name; + } + + @Override + public int compareTo(final SimpleStationData other) { + return name.compareTo(other.name); + } + } + + private static final class StationData extends TemperatureData implements Comparable { + + private final int nameHash; + private final long nameAddress; + private final byte nameSize; + private String name; + + StationData(final int nameHash, final long nameAddress, final byte nameSize, final short temp) { + super(temp); + this.nameAddress = nameAddress; + this.nameSize = nameSize; + this.nameHash = nameHash; + } + + @Override + public int compareTo(final StationData other) { + return getName().compareTo(other.getName()); + } + + String getName() { + if (name == null) { + final byte[] nameBytes = new byte[nameSize]; + UNSAFE.copyMemory(null, nameAddress, nameBytes, UNSAFE.arrayBaseOffset(nameBytes.getClass()), nameSize); + name = new String(nameBytes, StandardCharsets.UTF_8); + } + return name; + } + } + + /** + * Open addressing, linear probing, hash map repository. + */ + private static final class Repository { + + private static final int CAPACITY = 100_003; + private static final int LAST_INDEX = CAPACITY - 1; + + private final StationData[] table; + + public Repository() { + this.table = new StationData[CAPACITY]; + } + + /** + * Adds a station temperature value to this repository. + * + * @param nameHash the station name hash. + * @param nameAddress the station name address in memory. + * @param nameSize the station name size in bytes. + * @param temp the temperature value. + */ + public void addTemp(final int nameHash, final long nameAddress, final byte nameSize, short temp) { + final int index = findIndex(nameHash, nameAddress, nameSize); + if (table[index] == null) { + table[index] = new StationData(nameHash, nameAddress, nameSize, temp); + } + else { + table[index].addTemp(temp); + } + } + + public Stream entries() { + return Arrays.stream(table).filter(Objects::nonNull); + } + + private int findIndex(int nameHash, final long nameAddress, final byte nameSize) { + // Think about replacing modulo. + // https://lemire.me/blog/2018/08/20/performance-of-ranged-accesses-into-arrays-modulo-multiply-shift-and-masks/ + int index = (nameHash & 0x7FFFFFFF) % CAPACITY; + while (isCollision(index, nameHash, nameAddress, nameSize)) { + index = index == LAST_INDEX ? 0 : index + 1; + } + return index; + } + + private boolean isCollision(final int index, final long nameHash, final long nameAddress, final byte nameSize) { + final StationData existing = table[index]; + if (existing == null) { + return false; + } + if (nameHash != existing.nameHash) { + return true; + } + if (nameSize != existing.nameSize) { + return true; + } + // Last resort; check if the names are the same. + // This is real performance hit :( + return !isMemoryEqual(nameAddress, existing.nameAddress, nameSize); + } + + /** + * Checks if two locations in memory have the same value. + * + * @param address1 the address of the first location. + * @param address2 the address of the second locations. + * @param size the number of bytes to check for equality. + * @return true if both addresses contain the same bytes. + */ + private static boolean isMemoryEqual(final long address1, final long address2, final byte size) { + // Checking 1 byte at a time, so we can bail as early as possible. + for (int offset = 0; offset < size; offset++) { + final byte b1 = UNSAFE.getByte(address1 + offset); + final byte b2 = UNSAFE.getByte(address2 + offset); + if (b1 != b2) { + return false; + } + } + return true; + } + } + + /** + * Helper for running tests without blowing away the main measurements.txt file. + * Saves regenerating the 1 billion line file after each test run. + * Enable assertions in the IDE run config. + */ + public static final class TestRunner { + public static void main(String[] args) throws IOException { + final StringBuilder testResults = new StringBuilder(); + try (DirectoryStream dirStream = Files.newDirectoryStream(SAMPLE_DIR_PATH, "*.txt")) { + dirStream.forEach(filePath -> { + testResults.append(STR."Testing '\{filePath.getFileName()}'... "); + final String expectedResultFileName = filePath.getFileName().toString().replace(".txt", ".out"); + try { + final String expected = Files.readString(SAMPLE_DIR_PATH.resolve(expectedResultFileName)); + final SortedMap results = processFile(filePath); + // Appending \n to the results string to mimic println(). + final String actual = STR."\{resultsToString(results)}\n"; + if (actual.equals(expected)) { + testResults.append("Passed\n"); + } else { + testResults.append("Failed. Actual output does not match expected\n"); + } + } catch (IOException e) { + throw new RuntimeException(STR."Error testing '\{filePath.getFileName()}"); + } + }); + } finally { + System.out.println(testResults); + } + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_jparera.java b/src/main/java/dev/morling/onebrc/CalculateAverage_jparera.java new file mode 100644 index 000000000..194dbccec --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_jparera.java @@ -0,0 +1,382 @@ +//COMPILE_OPTIONS -source 21 --enable-preview --add-modules jdk.incubator.vector +//RUNTIME_OPTIONS --enable-preview --add-modules jdk.incubator.vector +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.List; +import java.util.TreeMap; +import java.util.function.Function; +import java.util.stream.Collectors; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorSpecies; +import jdk.incubator.vector.VectorOperators; + +public class CalculateAverage_jparera { + private static final String FILE = "./measurements.txt"; + + private static final VarHandle BYTE_HANDLE = MethodHandles + .memorySegmentViewVarHandle(ValueLayout.JAVA_BYTE); + + private static final VarHandle INT_HANDLE = MethodHandles + .memorySegmentViewVarHandle(ValueLayout.JAVA_INT_UNALIGNED); + + private static final VarHandle LONG_LE_HANDLE = MethodHandles + .memorySegmentViewVarHandle(ValueLayout.JAVA_LONG_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN)); + + private static final VectorSpecies BYTE_SPECIES = ByteVector.SPECIES_PREFERRED; + + private static final int BYTE_SPECIES_LANES = BYTE_SPECIES.length(); + + private static final ByteOrder NATIVE_ORDER = ByteOrder.nativeOrder(); + + private static final byte LF = '\n'; + + private static final byte SEPARATOR = ';'; + + private static final byte DECIMAL_SEPARATOR = '.'; + + private static final byte NEG = '-'; + + public static void main(String[] args) throws IOException, InterruptedException { + try (var fc = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { + try (var arena = Arena.ofShared()) { + var fs = fc.map(MapMode.READ_ONLY, 0, fc.size(), arena); + var cpus = Runtime.getRuntime().availableProcessors(); + var output = chunks(fs, cpus).stream() + .parallel() + .map(Chunk::parse) + .flatMap(List::stream) + .collect(Collectors.toMap( + Entry::key, + Function.identity(), + Entry::merge, + TreeMap::new)); + System.out.println(output); + } + } + } + + private static List chunks(MemorySegment ms, int splits) { + long fileSize = ms.byteSize(); + long expectedChunkSize = Math.ceilDiv(fileSize, splits); + var chunks = new ArrayList(); + long offset = 0; + while (offset < fileSize) { + var end = Math.min(offset + expectedChunkSize, fileSize); + while (end < fileSize && (byte) BYTE_HANDLE.get(ms, end++) != LF) { + } + long len = end - offset; + chunks.add(new Chunk(ms.asSlice(offset, len))); + offset = end; + } + return chunks; + } + + private static final class Chunk { + private static final int KEY_LOG2_BYTES = 7; + + private static final int KEY_BYTES = 1 << KEY_LOG2_BYTES; + + private static final int ENTRIES_LOG2_CAPACITY = 16; + + private static final int ENTRIES_CAPACITY = 1 << ENTRIES_LOG2_CAPACITY; + + private static final int ENTRIES_MASK = ENTRIES_CAPACITY - 1; + + private final MemorySegment segment; + + private final long size; + + private final Entry[] entries = new Entry[ENTRIES_CAPACITY]; + + private final byte[] keys = new byte[ENTRIES_CAPACITY * KEY_BYTES]; + + private final MemorySegment kms = MemorySegment.ofArray(this.keys); + + private static final int KEYS_MASK = (ENTRIES_CAPACITY * KEY_BYTES) - 1; + + private long offset; + + private byte current; + + private boolean hasCurrent = true; + + Chunk(MemorySegment segment) { + this.segment = segment; + this.size = segment.byteSize(); + } + + public List parse() { + long safe = size - KEY_BYTES; + while (offset < safe) { + vectorizedEntry().add(vectorizedValue()); + } + next(); + while (hasCurrent()) { + entry().add(value()); + } + var output = new ArrayList(entries.length); + for (int i = 0, o = 0; i < entries.length; i++, o += KEY_BYTES) { + var e = entries[i]; + if (e != null) { + e.setkey(keys, o); + output.add(e); + } + } + return output; + } + + private Entry vectorizedEntry() { + var separators = ByteVector.broadcast(BYTE_SPECIES, SEPARATOR); + int len = 0; + for (int i = 0;; i += BYTE_SPECIES_LANES) { + var block = ByteVector.fromMemorySegment(BYTE_SPECIES, this.segment, offset + i, NATIVE_ORDER); + int equals = block.compare(VectorOperators.EQ, separators).firstTrue(); + len += equals; + if (equals != BYTE_SPECIES_LANES) { + break; + } + } + var start = this.offset; + this.offset = start + len + 1; + int hash = hash(segment, start, len); + int index = (hash - (hash >>> -ENTRIES_LOG2_CAPACITY)) & ENTRIES_MASK; + int keyOffset = index << KEY_LOG2_BYTES; + int count = 0; + while (count < ENTRIES_MASK) { + index = index & ENTRIES_MASK; + keyOffset = keyOffset & KEYS_MASK; + var e = this.entries[index]; + if (e == null) { + MemorySegment.copy(this.segment, start, kms, keyOffset, len); + return this.entries[index] = new Entry(len, hash); + } + else if (e.hash == hash && e.keyLength == len) { + int total = 0; + for (int i = 0; i < KEY_BYTES; i += BYTE_SPECIES_LANES) { + var ekey = ByteVector.fromArray(BYTE_SPECIES, keys, keyOffset + i); + var okey = ByteVector.fromMemorySegment(BYTE_SPECIES, this.segment, start + i, NATIVE_ORDER); + int equals = ekey.compare(VectorOperators.NE, okey).firstTrue(); + total += equals; + if (equals != BYTE_SPECIES_LANES) { + break; + } + } + if (total >= len) { + return e; + } + } + count++; + index++; + keyOffset += KEY_BYTES; + } + throw new IllegalStateException("Map is full!"); + } + + private Entry entry() { + long start = this.offset - 1; + int len = 0; + while (hasCurrent() && current != SEPARATOR) { + len++; + next(); + } + expect(SEPARATOR); + int hash = hash(segment, start, len); + int index = (hash - (hash >>> -ENTRIES_LOG2_CAPACITY)) & ENTRIES_MASK; + int keyOffset = index << KEY_LOG2_BYTES; + int count = 0; + while (count < ENTRIES_MASK) { + index = index & ENTRIES_MASK; + keyOffset = keyOffset & KEYS_MASK; + var e = this.entries[index]; + if (e == null) { + MemorySegment.copy(this.segment, start, kms, keyOffset, len); + return this.entries[index] = new Entry(len, hash); + } + else if (e.hash == hash && e.keyLength == len) { + int total = 0; + for (int i = 0; i < len; i++) { + if (((byte) BYTE_HANDLE.get(this.segment, start + i)) != this.keys[keyOffset + i]) { + break; + } + total++; + } + if (total >= len) { + return e; + } + } + count++; + index++; + keyOffset += KEY_BYTES; + } + throw new IllegalStateException("Map is full!"); + } + + private static final long MULTIPLY_ADD_DIGITS = 100 * (1L << 24) + 10 * (1L << 16) + 1; + + private int vectorizedValue() { + long dw = (long) LONG_LE_HANDLE.get(this.segment, this.offset); + int zeros = Long.numberOfTrailingZeros(~dw & 0x10101000L); + boolean negative = ((dw & 0xFF) ^ NEG) == 0; + dw = ((negative ? (dw & ~0xFF) : dw) << (28 - zeros)) & 0x0F000F0F00L; + int value = (int) (((dw * MULTIPLY_ADD_DIGITS) >>> 32) & 0x3FF); + this.offset += (zeros >>> 3) + 3; + return negative ? -value : value; + } + + private int value() { + int value = 0; + var negative = false; + if (consume(NEG)) { + negative = true; + } + while (hasCurrent()) { + if ((current & 0xF0) == 0x30) { + value *= 10; + value += current - '0'; + } + else if (current != DECIMAL_SEPARATOR) { + break; + } + next(); + } + if (hasCurrent()) { + expect(LF); + } + return negative ? -value : value; + } + + private static final int GOLDEN_RATIO = 0x9E3779B9; + private static final int HASH_LROTATE = 5; + + private static int hash(MemorySegment ms, long start, int len) { + int x, y; + if (len >= Integer.BYTES) { + x = (int) INT_HANDLE.get(ms, start); + y = (int) INT_HANDLE.get(ms, start + len - Integer.BYTES); + } + else { + x = (byte) BYTE_HANDLE.get(ms, start) & 0xFF; + y = (byte) BYTE_HANDLE.get(ms, start + len - Byte.BYTES) & 0xFF; + } + return (Integer.rotateLeft(x * GOLDEN_RATIO, HASH_LROTATE) ^ y) * GOLDEN_RATIO; + } + + private void expect(byte b) { + if (!consume(b)) { + throw new IllegalStateException("Unexpected token!"); + } + } + + private boolean consume(byte b) { + if (current == b) { + next(); + return true; + } + return false; + } + + private boolean hasCurrent() { + return hasCurrent; + } + + private void next() { + if (offset < size) { + this.current = (byte) BYTE_HANDLE.get(segment, offset++); + } + else { + this.hasCurrent = false; + } + } + } + + private static final class Entry { + final int keyLength; + + final int hash; + + private int min = Integer.MAX_VALUE; + + private int max = Integer.MIN_VALUE; + + private long sum; + + private int count; + + private String key; + + Entry(int keyLength, int hash) { + this.keyLength = keyLength; + this.hash = hash; + } + + public String key() { + return key; + } + + void setkey(byte[] keys, int offset) { + this.key = new String(keys, offset, keyLength, StandardCharsets.UTF_8); + } + + public void add(int value) { + min = Math.min(min, value); + max = Math.max(max, value); + sum += value; + count++; + } + + public Entry merge(Entry o) { + min = Math.min(min, o.min); + max = Math.max(max, o.max); + sum += o.sum; + count += o.count; + return this; + } + + @Override + public String toString() { + var average = Math.round(((sum / 10.0) / count) * 10.0); + return decimal(min) + '/' + decimal(average) + '/' + decimal(max); + } + + private static String decimal(long value) { + var builder = new StringBuilder(); + if (value < 0) { + builder.append((char) NEG); + } + value = Math.abs(value); + builder.append(value / 10); + builder.append((char) DECIMAL_SEPARATOR); + builder.append(value % 10); + return builder.toString(); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_justplainlaake.java b/src/main/java/dev/morling/onebrc/CalculateAverage_justplainlaake.java new file mode 100644 index 000000000..505ec3f53 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_justplainlaake.java @@ -0,0 +1,459 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.reflect.Field; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import sun.misc.Unsafe; + +/* + Possibilities to improve: + * Reduce Standard Memory Reads and/or Swaps for threading + - For the read file; using Unsafe or MemorySegment to map the file to an existing register instead of keeping the bytes local + - For normal variables; Most of the time reading a value performs a load from memory and registers it for faster lookups, but with multithreading causes each thread to re read and register each get [volatile] keyword + * Add multithreading to process multiple segments at once (When you have 1,000,000,000 cars driving might as well open as many lanes as possible) + * Improve Mapping of entries (More O(1) lookups the better, i.e. hashed key maps, preferebly open maps to skip needing linked lists or trees, also simplifies since we don't need to delete anything) + * Remove use of java streams (They can be much slower than expected, good for developer readability but not for performance 90% of the time) + * Reduce amount of bytecode instructions (Usually just a micro-optimization, but since we are reading 1,000,000,000 lines, then this is really helpful in the processing code) + * Never use division in processing code, division is 2x+ slower than multiplication (Easy fix is multiplying by decimal 2/2 vs 2*0.5) + + My System: + Device: + Processor(16) 11th Gen Intel(R) Core(TM) i7-11700K @ 3.60GHz 3.60 GHz + Installed RAM 32.0 GB (31.8 GB usable) + System type 64-bit operating system, x64-based processor + Pen and touch No pen or touch input is available for this display + Windows Specification: + Edition Windows 11 Home + Version 23H2 + OS build 22635.3061 + Experience Windows Feature Experience Pack 1000.22684.1000.0 + + + Runs (Only IDE open, just after complete shutdown, measured using System.nanoTime around main method): + - Baseline + * 144,403.3814ms + - merrykittyunsafe (#1 on LB) + * 2,757.8295ms + - royvanrijn (#2 on LB) + * 1,643.9123ms ??? Assuming this is because of my system specs compared to specs on testing system + //Obviously there were more runs than this, but these were the significant jumps + - Me run 1 (Initial attempt;multithreading, file mapped to global Unsafe, long hash of name, read byte by byte, store in hashmap and merge from threads) + * 5,423.4432ms + - Me run 2 (Read longs instead of bytes to determine name hash) + * 3,937.3234ms + - Me run 3 (Swap to using a rolling long hash with murmur3 hashing function, change hashmap to be an openmap with unboxed long as the key) + * 2,951.6891ms + - Me run 4 (Change entire line reading to be long based with bit operations to determine number) + * 2,684.9823ms + - Me run 5 (Use main thread as one of the processing threads) + * 2,307.3038ms + - Me run 6 (Remove use of math.min and math.max in favor of ternary operator (Reduces getStatic operation)) + * 2,265.3521ms + */ + +public class CalculateAverage_justplainlaake { + + // Constants + private static final String FILE = "./measurements.txt"; + private static final byte SEPERATOR_BYTE = ';'; + private static final byte NEW_LINE_BYTE = '\n'; + private static final DecimalFormat STATION_FORMAT = new DecimalFormat("#,##0.0"); + + private static final long[] OFFSET_CLEARS = { + 0x0000000000000000L, // 8 Offset (Clear whole thing) + 0x00000000000000FFL, + 0x000000000000FFFFL, + 0x0000000000FFFFFFL, + 0x00000000FFFFFFFFL, + 0x000000FFFFFFFFFFL, + 0x0000FFFFFFFFFFFFL, + 0x00FFFFFFFFFFFFFFL, + 0xFFFFFFFFFFFFFFFFL,// 0 Offset (Clear nothing) + }; + + private static final Unsafe UNSAFE; + static { + Unsafe _unsafe = null; + try { + Field unsafe = Unsafe.class.getDeclaredField("theUnsafe"); + unsafe.setAccessible(true); + _unsafe = (Unsafe) unsafe.get(Unsafe.class); + } + catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e) { + e.printStackTrace(); + System.exit(1); + } + UNSAFE = _unsafe;// Just to get around "The blank final field UNSAFE may not have been initialized" + } + + public static void main(String[] args) throws IOException { + int processors = Runtime.getRuntime().availableProcessors(); + + ExecutorService e = null; + + List> futures = new ArrayList<>(); + OpenMap mainMap = null; + try (FileChannel channel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { + long fileSize = channel.size(); + + if (fileSize < 10_000) {// File is smaller than 10,000 bytes, we will lose performance trying to multithread so just set processors to 1 which will skip the futures and only use main thread + processors = 1; + } + else { + e = Executors.newFixedThreadPool(processors);// Create a ThreadPool based executor using the count of processors available + } + + long chunkSize = fileSize / processors;// Determine approximate size of each chunk based on amount of processors available + + long startAddress = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global())// Map the file channel into memory using the global arena (accessible by all threads) + .address();// And get the starting address of mapped section + + long endAddress = startAddress + fileSize; + long currentAddress = startAddress + chunkSize; + long chunkStart = startAddress; + + for (int i = 0; i < processors; i++) {// We need to chunk the file for each processor/thread + + while (currentAddress < endAddress) {// While loop to locate the next new line character from the chunk we are in + long match = UNSAFE.getLong(currentAddress);// Read the next 8 bytes as a long from the memory address + short offset = getMaskOffset(match, NEW_LINE_BYTE);// find the byte in the long which equals 10 aka '\n', if it is not found this returns -1 + if (offset != -1) {// We found the offset, so add it to the current adress and break the while loop + currentAddress += offset; + break; + } + currentAddress += 8;// No offset was found so advance 8 bytes, aka 1 long + } + + long finalChunkStart = chunkStart, finalChunkEnd = Math.min(endAddress, currentAddress - 1);// Create final fields to pass to the thread call below, + // Also Math.min doesn't matter here since its called x times where x = count of processors + + if (i == processors - 1) {// if on last processor use main thread to optimize threading, doing on last processor means the others are already processing while this runs + mainMap = process(finalChunkStart, finalChunkEnd); + } + else { + futures.add(e.submit(() -> process(finalChunkStart, finalChunkEnd))); + } + chunkStart = currentAddress + 1;// Advance the start of the next chunk to be the end of this chunk + 1 to move past the new line character + currentAddress = Math.min(currentAddress + chunkSize, endAddress);// Advance the next chunks end to be the end of the mapped file or the end of the approximated chunk + } + } + + OpenMap merged = mainMap;// Set the main map created with the process called on main thread to make it effectively final + + if (processors > 1) {// If there is only one processor then we only used the main thread so no point in merging the futures + // The merging of processing takes ~10ms + for (Future f : futures) { + try { + + OpenMap processed = f.get();// Waits until the process task is done but then returns the callable value from the process method + + // Simple way to merge both lists, tried doing it more inline inside the map and ended up taking a 10ms longer + processed.forEach((i, s) -> { + merged.merge(i, s); + }); + } + catch (InterruptedException | ExecutionException e1) { + e1.printStackTrace(); + } + } + // Mark threadpool to be shutdown, call it here to let the threadpool finish out while the rest of the processing occurs + e.shutdown(); + } + + // Ordering and printing takes 50ms + Station[] nameOrdered = merged.toArray();// Turn the merged map into an array to quickly sort it + + Arrays.sort(nameOrdered, (n1, n2) -> n1.name.compareTo(n2.name));// Sort based on name, this might be optimizable based on the longs of the name, but would likely only gain some ms?? + + // Print results to the sys out + System.out.print("{"); + for (int i = 0; i < nameOrdered.length; i++) { + if (i != 0) { + System.out.print(", "); + } + System.out.print(nameOrdered[i]); + } + System.out.print("}\n");// Need newline character to meet specs + } + + // Core processing functionality, processes a chunk of memory + private static OpenMap process(long fromAddress, long toAddress) { + + OpenMap stationsLookup = new OpenMap();// Create a new map for this specific chunk, this is also the returned value for the callable + + long blockStart = fromAddress; + long currentAddress = fromAddress; + + while (currentAddress < toAddress) {// Just keep looping until we exhaust the chunk + + long read = 0l; + short offset = -1; + // The hash is a long hash based on the murmur3 algorithm. Look at the getMurmurHash3 method to find link + long hash = 1; + + while ((offset = getMaskOffset(read = UNSAFE.getLong(currentAddress), SEPERATOR_BYTE)) == -1) {// Read and compute the hash until we locate the seperator byte 59 or ';' + currentAddress += 8;// forwardscan + hash = (997 * hash) ^ getMurmurHash3(991 * read); + } + + // Compute the final hash based using the last read long but only the effective bits (anything before the byte 59 or ';'). + // Using the OFFSET_CLEARS masks that are defined statically we can essentially segregate the important bits of the name based on the offset read above + hash = (997 * hash) ^ getMurmurHash3(991 * (read & OFFSET_CLEARS[offset])); + + // Advance the current address/pointer to be 1 character past the end of the name Example: BillyJoel;29 would make the current address start at the '2' character + currentAddress += offset + 1; + + Station station = stationsLookup.getOrCreate(hash, currentAddress, blockStart); + + /* + * Possible combinations (x = number) -99.9 -> 99.9; ex: 54.4, -31.7, -4.5, 1.9 + * x.x + * xx.x + * -x.x + * -xx.x + */ + + // Encoding is UTF8 however, since numbers in UTF8 are all single byte characters we can do some byte math to determin the number; 0=48 and 9=57, so character - 48 = number + // And since - and . are also single byte characters we can make some assumptions, leading us with the primary one that no matter what the number will be 3 to 5 bytes (see above combinations) + // Unfortunately since an integer is only 4 bytes we must read the long; Something to test would be to see if we could read an integer and then read an extra byte if it is the 5 character edge case + read = UNSAFE.getLong(currentAddress); + + offset = 0;// reinitiate the offset to reuse the local address + + byte sign = (byte) ((read >> offset) ^ 45);// Check the first byte of the new long to see if it is 45 aka '-', if it is this byte will be 0 + + // The logic below is based on the fact that we are reading + int num = sign == 0 ? (((byte) (read >> (offset += 8))) - 48) : (((byte) read) - 48);// Start the number reading, if it is a negative advance 8 bits in the long (8 bits = 1 byte) + currentAddress += 4;// There will always be at least 3 digits to read and the newline digit (4 total) + if ((byte) ((read >> (offset + 8)) ^ 46) != 0) {// There can only be one more possible number for cases of (XY.X | -XY.X) where Y is that other number + num *= 10; + num += ((byte) (read >> (offset += 8))) - 48; + currentAddress++;// Add one digit read if temp is 3 digits + } + num *= 10; + num += ((byte) (read >> (offset + 16))) - 48;// Read the decimal character (no matter what it is 16 bits past the offset here, since 8 bits is the last number and 8 bits is the decimal) + if (sign == 0) { + num *= -1; + currentAddress++;// Add another digit read for the negative sign + } + + // Assign the values, don't use Math.min or any special bit manipulation. Faster to just use ternary + station.min = station.min < num ? station.min : num; + station.max = station.max > num ? station.max : num; + station.count++; + station.sum += num; + // And now set the next block to start at the current address + blockStart = currentAddress; + } + return stationsLookup; + } + + // Avalanche hashing function for longs: https://github.com/aappleby/smhasher/blob/master/README.md + public final static long getMurmurHash3(long x) { + x ^= x >>> 33; + x *= 0xff51afd7ed558ccdL; + x ^= x >>> 33; + x *= 0xc4ceb9fe1a85ec53L; + x ^= x >>> 33; + return x; + } + + // Simple way to identify if a byte is set in a long at any of the 8 spots, and also to get the offset of that byte. + // On average this is fast but certain cases could make it slow (checking 500,000,000,000 longs that don't have the test byte at all...) + private static short getMaskOffset(long value, byte test) { + for (short i = 0; i < 8; i++) { + if (((byte) value & 0xFF) == test) { + return i; + } + value = value >> 8; + } + return -1; + } + + private static class Station { + private final long nameStart, nameEnd;// Store the starting and ending address of the name, to fill it later + private final int nameLength; + private int min = Integer.MAX_VALUE, max = Integer.MIN_VALUE, count; + private long sum; + private String name; + + Station(long nameStart, long nameEnd) { + this.nameStart = nameStart; + this.nameEnd = nameEnd; + this.nameLength = (int) (nameEnd - nameStart) + 1;// Add 1 to include seperator + } + + protected void fillName() { + byte[] nameBuffer = new byte[(int) (nameEnd - nameStart)]; + UNSAFE.copyMemory(null, this.nameStart, nameBuffer, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameBuffer.length);// Quick memory copy, using null as src copies from the file we mapped earlier + name = new String(nameBuffer, StandardCharsets.UTF_8); + } + + @Override + public String toString() {// Use decimal format to print numbers + return name + "=" + STATION_FORMAT.format(Math.round(min) * 0.1) + "/" + STATION_FORMAT.format(Math.round(((double) sum) / count) * 0.1) + "/" + + STATION_FORMAT.format(Math.round(max) * 0.1); + } + + } + + public static class OpenMap { + public static final float LOAD_FACTOR = 0.75f; + public static final int EXPECTED_INITIAL_SIZE = 100_000; + + protected transient long[] keys;// Use unboxed long values as a key, faster than a doing new HashMap() as with generics it will box/unbox every action (can be costly in large quantities) + protected transient Station[] values; + protected transient int capacity; + protected transient int maxFill; + protected transient int mask; + protected int size; + + public OpenMap() { + // capacity = (int) getNextPowerOfTwo((long) Math.ceil(EXPECTED_INITIAL_SIZE / LOAD_FACTOR));// need to base the capacity on the next power of two for the mask to work properly + // initial size of 100k gives 262,144 Capacity, since we know this and its way oversized for a max of 10k keys theres no need to recalculate + capacity = 262_144; + mask = capacity - 1; + maxFill = (int) Math.ceil(capacity * 0.75f);// Only allow 75% of capacity before resizing + keys = new long[capacity]; + values = new Station[capacity]; + } + + public void merge(long key, Station toMerge) { + // Simple compute function, if exists pass existing, if it doesn't pass null + int pos = (int) key & mask;// Key has already been hashed as we read, but cap it by mask + while (values[pos] != null) { + if (keys[pos] == key) { + final Station oldValue = values[pos]; + + // If names are different size but key was same, then continue to next step as hash collided + // Compare memory values to see if the name is same as well, prevents hash collision + if (oldValue.nameLength == toMerge.nameLength && compareMemory(toMerge.nameStart, oldValue.nameStart, oldValue.nameLength)) { + // Memory was the same, making these the same station + oldValue.count += toMerge.count; + oldValue.sum += toMerge.sum; + oldValue.min = oldValue.min < toMerge.min ? oldValue.min : toMerge.min; + oldValue.max = oldValue.max > toMerge.max ? oldValue.max : toMerge.max; + return; + } + } + pos = (pos + 1) & mask; + } + keys[pos] = key; + values[pos] = toMerge; + size++; + } + + public Station getOrCreate(final long key, long currentAddress, long blockStart) { + int pos = (int) key & mask;// Key has already been hashed as we read, but cap it by mask + while (values[pos] != null) {// While position is set + if (keys[pos] == key) {// Check if key is correct + + // If names are different size but key was same, then continue to next step as hash collided + // Compare memory values to see if the name is same as well, prevents hash collision + if (values[pos].nameLength == currentAddress - blockStart && compareMemory(blockStart, values[pos].nameStart, values[pos].nameLength)) { + return values[pos]; + } + } + pos = (pos + 1) & mask;// Since this is an open map we keep checking next masked key for an open spot (Faster than tree or linked list on a specific node) + } + keys[pos] = key; + size++; + return values[pos] = new Station(blockStart, currentAddress - 1);// Since current address contains the splitter (we will subtract by 1 here, better to do here since this is only called when it doesn't exist less math = performance) + } + + // Simple iterator for each set value + public void forEach(OpenConsumer consumer) { + for (int i = 0; i < this.capacity; i++) { + if (values[i] != null) { + consumer.accept(keys[i], values[i]); + } + } + } + + public Station[] toArray() { + Station[] array = new Station[size]; + int setter = 0; + for (int i = 0; i < capacity; i++) { + if (values[i] != null) { + array[setter++] = values[i]; + values[i].fillName(); + } + } + return array; + } + + // Bit function to get the next power of two on some number, used to determine best capacity based on initial size + public long getNextPowerOfTwo(long length) { + if (length-- == 0) + return 1; + length |= length >> 1; + length |= length >> 2; + length |= length >> 4; + length |= length >> 8; + length |= length >> 16; + return (length | length >> 32) + 1; + } + + private boolean compareMemory(long start1, long start2, int length) { + while (length > 0) { + if (length >= 8) { + if (UNSAFE.getLong(start1) != UNSAFE.getLong(start2)) { + return false; + } + } + else { + if ((UNSAFE.getLong(start1) & OFFSET_CLEARS[length]) != (UNSAFE.getLong(start2) & OFFSET_CLEARS[length])) { + System.out.println("Found collision: " + start1 + ": " + start2); + System.out.println("Found collision: " + UNSAFE.getLong(start1) + ": " + UNSAFE.getLong(start2)); + System.out.println("Length: " + length); + return false; + } + } + length -= 8; + start1 += 8; + start2 += 8; + } + return true; + } + + @FunctionalInterface + public static interface OpenConsumer { + void accept(long key, Station value); + } + + @FunctionalInterface + public static interface OpenFunction { + Station action(long key, Station value); + } + + } + +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_karthikeyan97.java b/src/main/java/dev/morling/onebrc/CalculateAverage_karthikeyan97.java new file mode 100644 index 000000000..de151c1a5 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_karthikeyan97.java @@ -0,0 +1,368 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import sun.misc.Unsafe; + +import static java.util.stream.Collectors.*; + +import java.io.FileInputStream; + +import java.io.RandomAccessFile; +import java.lang.foreign.Arena; +import java.lang.reflect.Field; +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Scanner; +import java.util.Set; +import java.util.TreeMap; +import java.util.function.BiConsumer; +import java.util.function.BinaryOperator; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collector; +import java.util.stream.Collectors; + +public class CalculateAverage_karthikeyan97 { + + private static final Unsafe UNSAFE = initUnsafe(); + + private static final String FILE = "./measurements.txt"; + + private static Unsafe initUnsafe() { + try { + Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + return (Unsafe) theUnsafe.get(Unsafe.class); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + private record Measurement(modifiedbytearray station, double value) { + } + + private record customPair(String stationName, MeasurementAggregator agg) { + } + + private static class MeasurementAggregator { + private long min = Long.MAX_VALUE; + private long max = Long.MIN_VALUE; + private long sum; + private long count; + + public String toString() { + return new StringBuffer(14) + .append(round((1.0 * min))) + .append("/") + .append(round((1.0 * sum) / count)) + .append("/") + .append(round((1.0 * max))).toString(); + } + + private double round(double value) { + return Math.round(value) / 10.0; + } + } + + public static void main(String[] args) throws Exception { + // long start = System.nanoTime(); + // System.setSecurityManager(null); + Collector, MeasurementAggregator, MeasurementAggregator> collector = Collector.of( + MeasurementAggregator::new, + (a, m) -> { + MeasurementAggregator agg = m.getValue(); + if (a.min >= agg.min) { + a.min = agg.min; + } + if (a.max <= agg.max) { + a.max = agg.max; + } + a.max = Math.max(a.max, m.getValue().max); + a.sum += m.getValue().sum; + a.count += m.getValue().count; + }, + (agg1, agg2) -> { + if (agg1.min <= agg2.min) { + agg2.min = agg1.min; + } + if (agg1.max >= agg2.max) { + agg2.max = agg1.max; + } + agg2.sum = agg1.sum + agg2.sum; + agg2.count = agg1.count + agg2.count; + + return agg2; + }, + agg -> agg); + + RandomAccessFile raf = new RandomAccessFile(FILE, "r"); + FileChannel fileChannel = raf.getChannel(); + final long mappedAddress = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, raf.length(), Arena.global()).address(); + long length = raf.length(); + final long endAddress = mappedAddress + length - 1; + int cores = length > 1000 ? Runtime.getRuntime().availableProcessors() : 1; + long boundary[][] = new long[cores][2]; + long segments = length / (cores); + long before = -1; + for (int i = 0; i < cores - 1; i++) { + boundary[i][0] = before + 1; + if (before + segments - 107 > 0) { + raf.seek(before + segments - 107); + } + else { + raf.seek(0); + } + while (raf.read() != '\n') { + } + boundary[i][1] = raf.getChannel().position() - 1; + before = boundary[i][1]; + } + boundary[cores - 1][0] = before + 1; + boundary[cores - 1][1] = length - 1; + + int l3Size = (12 * 1024 * 1024);// unsafe.l3Size(); + + System.out.println(new TreeMap((Arrays.stream(boundary).parallel().map(i -> { + try { + int seglen = (int) (i[1] - i[0] + 1); + HashMap resultmap = new HashMap<>(4000); + long segstart = mappedAddress + i[0]; + int bytesRemaining = seglen; + long num = 0; + boolean isNumber = false; + byte bi; + int sign = 1; + modifiedbytearray stationName = null; + int hascode = 5381; + // System.out.println("start:" + System.nanoTime() / 1000000); + while (bytesRemaining > 0) { + int bytesptr = 0; + // int bytesread = buffer.remaining() > l3Size ? l3Size : buffer.remaining(); + // byte[] bufferArr = new byte[bytesread]; + // buffer.get(bufferArr); + int bbstart = 0; + int readSize = bytesRemaining > l3Size ? l3Size : bytesRemaining; + int actualReadSize = (segstart + readSize + 110 > endAddress || readSize + 110 > i[1]) ? readSize : readSize + 110; + byte[] readArr = new byte[actualReadSize]; + + UNSAFE.copyMemory(null, segstart, readArr, UNSAFE.ARRAY_BYTE_BASE_OFFSET, actualReadSize); + while (bytesptr < actualReadSize) { + bi = readArr[bytesptr++];// UNSAFE.getByte(segstart + bytesReading++); + if (!isNumber) { + while (bi != 59) { + hascode = (hascode << 5) + hascode ^ bi; + bi = readArr[bytesptr++]; + } + isNumber = true; + stationName = new modifiedbytearray(readArr, bbstart, bytesptr - 2, hascode & 0xFFFFFFFF); + bbstart = 0; + hascode = 5381; + } + else { + while (bi != 10) { + if (bi == 0x2D) { + sign = -1; + } + else if (bi != 0x2E) { + num = num * 10 + (bi - 0x30); + } + bi = readArr[bytesptr++]; + } + hascode = 5381; + isNumber = false; + bbstart = bytesptr; + num *= sign; + MeasurementAggregator agg = resultmap.get(stationName); + if (agg == null) { + agg = new MeasurementAggregator(); + agg.min = num; + agg.max = num; + agg.sum = (long) (num); + agg.count = 1; + resultmap.put(stationName, agg); + } + else { + if (agg.min >= num) { + agg.min = num; + } + if (agg.max <= num) { + agg.max = num; + } + agg.sum += (long) (num); + agg.count++; + } + num = 0; + sign = 1; + if (bytesptr >= readSize) { + break; + } + } + } + bytesRemaining -= bytesptr; + segstart += bytesptr; + } + // System.out.println("end:" + System.nanoTime() / 1000000); + /* + * while (bytesReading < (i[1] - i[0] + 1) && buffer.position() < buffer.limit()) { + * buffer.clear(); + * bytesRead = fileChannel.read(buffer); + * buffer.flip(); + * while (bytesReading <= (i[1] - i[0]) && buffer.position() < buffer.limit()) { + * bytesReading += 1; + * bi = buffer.get(); + * String s; + * if (ctr > 0) { + * hascode = 31 * hascode + bi; + * ctr--; + * } + * else { + * if (bi >= 240) { + * ctr = 3; + * } + * else if (bi >= 224) { + * ctr = 2; + * } + * else if (bi >= 192) { + * ctr = 1; + * } + * else if (bi == 59) { + * isNumber = true; + * System.out.println(buffer); + * stationName = new modifiedbytearray(bbstart, buffer.position() - 1, hascode, buffer); + * hascode = 1; + * bbstart = buffer.position(); + * } + * else if (bi == 10) { + * hascode = 1; + * isNumber = false; + * MeasurementAggregator agg = resultmap.get(stationName); + * if (agg == null) { + * agg = new MeasurementAggregator(); + * agg.min = num * sign; + * agg.max = num * sign; + * agg.sum = (long) (num * sign); + * agg.count = 1; + * resultmap.put(stationName, agg); + * } + * else { + * agg.min = Math.min(agg.min, num * sign); + * agg.max = Math.max(agg.max, num * sign); + * agg.sum += (long) (num * sign); + * agg.count++; + * } + * num = 1; + * bbstart = buffer.position(); + * } + * else { + * hascode = 31 * hascode + bi; + * if (isNumber) { + * switch (bi) { + * case 0x2E: + * break; + * case 0x2D: + * num = num * -1; + * break; + * default: + * num = num * 10 + (bi - 0x30); + * } + * } + * } + * } + * } + * } + */ + return resultmap; + } + catch (Exception e) { + e.printStackTrace(); + } + return null; + }).flatMap(e -> e.entrySet().stream()).collect(groupingBy(e -> e.getKey(), collector)))) { + @Override + public Object put(Object key, Object value) { + return super.put(((modifiedbytearray) key).getStationName(), value); + } + }); + + /* + * .map(a -> { + * return a.stream().parallel().collect(groupingBy(m -> m.station(), collector)); + * }).flatMap(m -> m.entrySet() + * .stream() + */ + // Get the FileChannel from the FileInputStream + + // System.out.println("time taken1:" + (System.nanoTime() - start) / 1000000); + // System.out.println(measurements); + } + +} + +class modifiedbytearray { + private int length; + private int start; + private int end; + private byte[] arr; + public int hashcode; + + modifiedbytearray(byte[] arr, int start, int end, int hashcode) { + this.arr = arr; + this.length = end - start + 1; + this.end = end; + this.start = start; + this.hashcode = hashcode; + } + + public String getStationName() { + return new String(this.getArr(), start, length, StandardCharsets.UTF_8); + } + + public byte[] getArr() { + return this.arr; + } + + @Override + public String toString() { + return getStationName(); + } + + @Override + public boolean equals(Object obj) { + modifiedbytearray b = (modifiedbytearray) obj; + return Arrays.equals(this.getArr(), start, end, b.arr, b.start, b.end); + } + + public int getHashcode() { + return hashcode; + } + + @Override + public int hashCode() { + return hashcode; + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java b/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java index c61116656..68ace02cf 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java @@ -17,75 +17,77 @@ import java.io.File; import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.ByteOrder; -import java.nio.MappedByteBuffer; +import java.lang.foreign.Arena; +import java.lang.reflect.Field; import java.nio.channels.FileChannel; import java.nio.channels.FileChannel.MapMode; -import java.nio.file.Files; -import java.nio.file.Path; import java.nio.file.StandardOpenOption; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; +import java.util.Iterator; +import java.util.Spliterator; +import java.util.Spliterators; import java.util.TreeMap; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; +import sun.misc.Unsafe; public class CalculateAverage_kuduwa_keshavram { private static final String FILE = "./measurements.txt"; + private static final Unsafe UNSAFE = initUnsafe(); + + private static Unsafe initUnsafe() { + try { + final Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + return (Unsafe) theUnsafe.get(Unsafe.class); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } public static void main(String[] args) throws IOException, InterruptedException { - TreeMap resultMap = getFileSegments(new File(FILE)).stream() - .parallel() - .map( + TreeMap resultMap = getFileSegments(new File(FILE)) + .flatMap( segment -> { - final Measurement[][] measurements = new Measurement[1024 * 128][3]; - try (FileChannel fileChannel = (FileChannel) Files.newByteChannel(Path.of(FILE), StandardOpenOption.READ)) { - MappedByteBuffer byteBuffer = fileChannel.map( - MapMode.READ_ONLY, segment.start, segment.end - segment.start); - byteBuffer.order(ByteOrder.nativeOrder()); - while (byteBuffer.hasRemaining()) { - byte[] city = new byte[100]; - byte b; - int hash = 0; - int i = 0; - while ((b = byteBuffer.get()) != 59) { - hash = 31 * hash + b; - city[i++] = b; - } + Result result = new Result(); + while (segment.start < segment.end) { + byte[] city = new byte[100]; + byte b; + int hash = 0; + int i = 0; + while ((b = UNSAFE.getByte(segment.start++)) != 59) { + hash = 31 * hash + b; + city[i++] = b; + } - byte[] newCity = new byte[i]; - System.arraycopy(city, 0, newCity, 0, i); - int measurement = 0; - boolean negative = false; - while ((b = byteBuffer.get()) != 10) { - if (b == 45) { - negative = true; - } - else if (b == 46) { - // skip - } - else { - final int n = b - '0'; - measurement = measurement * 10 + n; - } + byte[] newCity = new byte[i]; + System.arraycopy(city, 0, newCity, 0, i); + int measurement = 0; + boolean negative = false; + while ((b = UNSAFE.getByte(segment.start++)) != 10) { + if (b == 45) { + negative = true; + } + else if (b == 46) { + // skip + } + else { + final int n = b - '0'; + measurement = measurement * 10 + n; } - putOrMerge( - measurements, - new Measurement( - hash, newCity, negative ? measurement * -1 : measurement)); } + putOrMerge( + result, + new Measurement(hash, newCity, negative ? measurement * -1 : measurement)); } - catch (IOException e) { - throw new RuntimeException(e); - } - return measurements; + Iterator iterator = getMeasurementIterator(result); + return StreamSupport.stream( + Spliterators.spliteratorUnknownSize(iterator, Spliterator.NONNULL), true); }) - .flatMap(measurements -> Arrays.stream(measurements).flatMap(Arrays::stream)) - .filter(Objects::nonNull) .collect( Collectors.toMap( measurement -> new String(measurement.city), @@ -99,13 +101,48 @@ else if (b == 46) { System.out.println(resultMap); } - private static void putOrMerge(Measurement[][] measurements, Measurement measurement) { - int index = measurement.hash & (measurements.length - 1); - Measurement[] existing = measurements[index]; + private static Iterator getMeasurementIterator(Result result) { + return new Iterator<>() { + final int uniqueIndex = result.uniqueIndex; + final int[] indexArray = result.indexArray; + final Measurement[][] measurements = result.measurements; + + int i = 0; + int j = 0; + + @Override + public boolean hasNext() { + return i < uniqueIndex; + } + + @Override + public Measurement next() { + Measurement measurement = measurements[indexArray[i]][j++]; + if (measurements[indexArray[i]][j] == null) { + i++; + j = 0; + } + return measurement; + } + }; + } + + static class Result { + final Measurement[][] measurements = new Measurement[1024 * 128][3]; + final int[] indexArray = new int[10_000]; + int uniqueIndex = 0; + } + + private static void putOrMerge(Result result, Measurement measurement) { + int index = measurement.hash & (result.measurements.length - 1); + Measurement[] existing = result.measurements[index]; for (int i = 0; i < existing.length; i++) { Measurement existingMeasurement = existing[i]; if (existingMeasurement == null) { - measurements[index][i] = measurement; + result.measurements[index][i] = measurement; + if (i == 0) { + result.indexArray[result.uniqueIndex++] = index; + } return; } if (equals(existingMeasurement.city, measurement.city)) { @@ -124,13 +161,20 @@ private static boolean equals(byte[] city1, byte[] city2) { return true; } - private record FileSegment(long start, long end) { + private static final class FileSegment { + long start; + long end; + + private FileSegment(long start, long end) { + this.start = start; + this.end = end; + } } private static final class Measurement { - private int hash; - private byte[] city; + private final int hash; + private final byte[] city; int min; int max; @@ -158,45 +202,28 @@ public String toString() { } } - private static List getFileSegments(final File file) throws IOException { + private static Stream getFileSegments(final File file) throws IOException { final int numberOfSegments = Runtime.getRuntime().availableProcessors() * 4; - final long fileSize = file.length(); - final long segmentSize = fileSize / numberOfSegments; - if (segmentSize < 1000) { - return List.of(new FileSegment(0, fileSize)); - } - - try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r")) { - int lastSegment = numberOfSegments - 1; - return IntStream.range(0, numberOfSegments) - .mapToObj( - i -> { - long segStart = i * segmentSize; - long segEnd = (i == lastSegment) ? fileSize : segStart + segmentSize; - try { - segStart = findSegment(i, 0, randomAccessFile, segStart, segEnd); - segEnd = findSegment(i, lastSegment, randomAccessFile, segEnd, fileSize); - } - catch (IOException e) { - throw new RuntimeException(e); - } - return new FileSegment(segStart, segEnd); - }) - .toList(); - } - } - - private static long findSegment( - final int i, final int skipSegment, RandomAccessFile raf, long location, final long fileSize) - throws IOException { - if (i != skipSegment) { - raf.seek(location); - while (location < fileSize) { - location++; - if (raf.read() == '\n') - return location; + final long[] chunks = new long[numberOfSegments + 1]; + try (var fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ)) { + final long fileSize = fileChannel.size(); + final long segmentSize = (fileSize + numberOfSegments - 1) / numberOfSegments; + final long mappedAddress = fileChannel.map(MapMode.READ_ONLY, 0, fileSize, Arena.global()).address(); + chunks[0] = mappedAddress; + final long endAddress = mappedAddress + fileSize; + for (int i = 1; i < numberOfSegments; ++i) { + long chunkAddress = mappedAddress + i * segmentSize; + // Align to first row start. + while (chunkAddress < endAddress && UNSAFE.getByte(chunkAddress++) != '\n') { + // nop + } + chunks[i] = Math.min(chunkAddress, endAddress); } + chunks[numberOfSegments] = endAddress; } - return location; + return IntStream.range(0, chunks.length - 1) + .mapToObj(chunkIndex -> new FileSegment(chunks[chunkIndex], chunks[chunkIndex + 1])) + .parallel(); } + } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_kumarsaurav123.java b/src/main/java/dev/morling/onebrc/CalculateAverage_kumarsaurav123.java index 5b59d057c..87458d1d3 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_kumarsaurav123.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_kumarsaurav123.java @@ -15,242 +15,250 @@ */ package dev.morling.onebrc; +import java.io.IOException; import java.io.RandomAccessFile; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; -import java.nio.file.Paths; import java.util.*; -import java.util.concurrent.ConcurrentSkipListMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.ReentrantLock; import java.util.stream.Collector; -import java.util.stream.IntStream; +import java.util.stream.Collectors; import static java.util.stream.Collectors.groupingBy; public class CalculateAverage_kumarsaurav123 { private static final String FILE = "./measurements.txt"; + private static AtomicInteger indexCount = new AtomicInteger(0); + private static final ReentrantLock lock = new ReentrantLock(); + private static final int MAX_UNIQUE_KEYS = 11000; + private static Map indexMap; - private static record Measurement(String station, double value) { - private Measurement(String[] parts) { - this(parts[0], Double.parseDouble(parts[1])); - } - } + private static record Store(double[] min, double[] max, double[] sum, + int[] count) { - private static record ResultRow(String station,double min, double mean, double max,double sum,double count) { - public String toString() { - return round(min) + "/" + round(mean) + "/" + round(max); - } private double round(double value) { return Math.round(value * 10.0) / 10.0; } - } - ; + @Override + public String toString() { + return new TreeMap<>(indexMap.entrySet() + .stream() + .map(e -> Map.entry(e.getKey().toString(), + round(min[e.getValue()]) + "/" + round((Math.round(sum[e.getValue()] * 10.0) / 10.0) / count[e.getValue()]) + "/" + round(max[e.getValue()]) + )) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))).toString(); + } + } - private static class MeasurementAggregator { - private double min = Double.POSITIVE_INFINITY; - private double max = Double.NEGATIVE_INFINITY; - private double sum; - private long count; + private static record Pair(long start, int size) { + } - private String station; + public static void main(String[] args) throws IOException, ExecutionException, InterruptedException { + long start = System.currentTimeMillis(); + System.out.println(run(FILE)); } - public static void main(String[] args) { - HashMap map = new HashMap<>(); - map.put((byte) 48, 0); - map.put((byte) 49, 1); - map.put((byte) 50, 2); - map.put((byte) 51, 3); - map.put((byte) 52, 4); - map.put((byte) 53, 5); - map.put((byte) 54, 6); - map.put((byte) 55, 7); - map.put((byte) 56, 8); - map.put((byte) 57, 9); - Collector collector2 = Collector.of( - MeasurementAggregator::new, - (a, m) -> { - a.min = Math.min(a.min, m.min); - a.max = Math.max(a.max, m.max); - a.sum += m.sum; - a.count += m.count; - }, - (agg1, agg2) -> { - var res = new MeasurementAggregator(); - res.min = Math.min(agg1.min, agg2.min); - res.max = Math.max(agg1.max, agg2.max); - res.sum = agg1.sum + agg2.sum; - res.count = agg1.count + agg2.count; + public static String run(String filePath) throws IOException, InterruptedException, ExecutionException { + indexCount = new AtomicInteger(0); + indexMap = new HashMap<>(MAX_UNIQUE_KEYS); + ExecutorService executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 2); + CompletionService completionService = new ExecutorCompletionService<>(executorService); + Map> leftOutsMap = new ConcurrentSkipListMap<>(); + RandomAccessFile file = new RandomAccessFile(filePath, "r"); + long filelength = file.length(); + AtomicInteger kk = new AtomicInteger(); + MemorySegment memorySegment = file.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, filelength, Arena.ofShared()); + int nChunks = 1000; - return res; - }, - agg -> { - return new ResultRow(agg.station, agg.min, agg.sum / agg.count, agg.max, agg.sum, agg.count); - }); - Collector collector = Collector.of( - MeasurementAggregator::new, - (a, m) -> { - a.min = Math.min(a.min, m.value); - a.max = Math.max(a.max, m.value); - a.sum += m.value; - a.station = m.station; - a.count++; - }, - (agg1, agg2) -> { - var res = new MeasurementAggregator(); - res.min = Math.min(agg1.min, agg2.min); - res.max = Math.max(agg1.max, agg2.max); - res.sum = agg1.sum + agg2.sum; - res.count = agg1.count + agg2.count; + int pChunkSize = Math.min(Integer.MAX_VALUE, (int) (memorySegment.byteSize() / (1000))); + if (pChunkSize < 100) { + pChunkSize = (int) memorySegment.byteSize(); + nChunks = 1; + } + ArrayList chunks = createStartAndEnd(pChunkSize, nChunks, memorySegment); + chunks.stream() + .parallel() + .map(p -> { - return res; - }, - agg -> { - return new ResultRow(agg.station, agg.min, agg.sum / agg.count, agg.max, agg.sum, agg.count); - }); + return createRunnable(memorySegment, p); + }) + .forEach(completionService::submit); + executorService.shutdown(); + int i = 0; + double[] min = new double[MAX_UNIQUE_KEYS]; + double[] max = new double[MAX_UNIQUE_KEYS]; + double[] sum = new double[MAX_UNIQUE_KEYS]; + int[] count = new int[MAX_UNIQUE_KEYS]; + initArray(i, count, min, max, sum); + i = 0; + final Store cureentStore = new Store(min, max, sum, count); + while (i < chunks.size()) { + Store newStore = completionService.take().get(); + Map reverseMap = indexMap.entrySet() + .stream().collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); + reverseMap.forEach((key, value) -> { + cureentStore.sum[key] += newStore.sum[key]; + cureentStore.count[key] += newStore.count[key]; + cureentStore.min[key] = Math.min(cureentStore.min[key], + newStore.min[key]); + cureentStore.max[key] = Math.max(cureentStore.max[key], + newStore.max[key]); + }); + i++; + } - long start = System.currentTimeMillis(); - long len = Paths.get(FILE).toFile().length(); - Map> leftOutsMap = new ConcurrentSkipListMap<>(); - int chunkSize = 1_0000_00; - long proc = Math.max(1, (len / chunkSize)); - ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 2 * 2 * 2); - List measurements = Collections.synchronizedList(new ArrayList()); - IntStream.range(0, (int) proc) - .mapToObj(i -> { - return new Runnable() { - @Override - public void run() { - try { - RandomAccessFile file = new RandomAccessFile(FILE, "r"); - byte[] allBytes2 = new byte[chunkSize]; - file.seek((long) i * (long) chunkSize); - int l = file.read(allBytes2); - byte[] eol = "\n".getBytes(StandardCharsets.UTF_8); - byte[] sep = ";".getBytes(StandardCharsets.UTF_8); + return cureentStore.toString(); + } - List mst = new ArrayList<>(); - int st = 0; - int cnt = 0; - ArrayList local = new ArrayList<>(); + private static void initArray(int i, int[] count, double[] min, double[] max, double[] sum) { + for (; i < count.length; i++) { + min[i] = Double.POSITIVE_INFINITY; + max[i] = Double.NEGATIVE_INFINITY; + sum[i] = 0.0d; + count[i] = 0; + } + } - for (int i = 0; i < l; i++) { - if (allBytes2[i] == eol[0]) { - if (i != 0) { - byte[] s2 = new byte[i - st]; - System.arraycopy(allBytes2, st, s2, 0, s2.length); - if (cnt != 0) { - for (int j = 0; j < s2.length; j++) { - if (s2[j] == sep[0]) { - byte[] city = new byte[j]; - byte[] value = new byte[s2.length - j - 1]; - System.arraycopy(s2, 0, city, 0, city.length); - System.arraycopy(s2, city.length + 1, value, 0, value.length); - double d = 0.0; - int s = -1; - for (int k = value.length - 1; k >= 0; k--) { - if (value[k] == 45) { - d = d * -1; - } - else if (value[k] == 46) { - } - else { - d = d + map.get(value[k]).intValue() * Math.pow(10, s); - s++; - } - } - mst.add(new Measurement(new String(city), d)); + private static ArrayList createStartAndEnd(int chunksize, int nChunks, MemorySegment memorySegment) { + ArrayList startSizePairs = new ArrayList<>(); + byte eol = "\n".getBytes(StandardCharsets.UTF_8)[0]; + long start = 0; + long end = -1; + if (nChunks == 1) { + startSizePairs.add(new Pair(0, chunksize)); + return startSizePairs; + } + else { + while (start < memorySegment.byteSize()) { + start = end + 1; + end = Math.min(memorySegment.byteSize() - 1, start + chunksize - 1); + while (memorySegment.get(ValueLayout.JAVA_BYTE, end) != eol) { + end--; - } - } + } + startSizePairs.add(new Pair(start, (int) (end - start + 1))); + } + } + return startSizePairs; + } + + public static Callable createRunnable(MemorySegment memorySegment, Pair p) { + return new Callable() { + @Override + public Store call() { + try { + double[] min = new double[MAX_UNIQUE_KEYS]; + double[] max = new double[MAX_UNIQUE_KEYS]; + double[] sum = new double[MAX_UNIQUE_KEYS]; + int[] count = new int[MAX_UNIQUE_KEYS]; + for (int i = 0; i < count.length; i++) { + min[i] = Double.POSITIVE_INFINITY; + max[i] = Double.NEGATIVE_INFINITY; + sum[i] = 0.0d; + count[i] = 0; + } - } - else { - local.add(s2); - } + byte[] allBytes2 = memorySegment.asSlice(p.start, p.size).toArray(ValueLayout.JAVA_BYTE); + byte[] eol = "\n".getBytes(StandardCharsets.UTF_8); + byte[] sep = ";".getBytes(StandardCharsets.UTF_8); + + int st = 0; + for (int i = 0; i < allBytes2.length; i++) { + if (allBytes2[i] == eol[0]) { + ; + byte[] s2 = new byte[i - st]; + System.arraycopy(allBytes2, st, s2, 0, s2.length); + for (int j = 0; j < s2.length; j++) { + if (s2[j] == sep[0]) { + byte[] city = new byte[j]; + byte[] value = new byte[s2.length - j - 1]; + System.arraycopy(s2, 0, city, 0, city.length); + System.arraycopy(s2, city.length + 1, value, 0, value.length); + double d = getaDouble(value); + StringHolder citys = new StringHolder(city); + Integer index = indexMap.get(citys); + if (Objects.isNull(index)) { + lock.lock(); + if (Objects.isNull(indexMap.get(citys))) { + index = indexCount.getAndIncrement(); + indexMap.putIfAbsent(citys, index); } - cnt++; - st = i + 1; + index = indexMap.get(citys); + lock.unlock(); } + + count[index] = count[index] + 1; + max[index] = Math.max(max[index], d); + min[index] = Math.min(min[index], d); + sum[index] = Double.sum(sum[index], d); + break; } - if (st < l) { - byte[] s2 = new byte[allBytes2.length - st]; - System.arraycopy(allBytes2, st, s2, 0, s2.length); - local.add(s2); - } - leftOutsMap.put(i, local); - allBytes2 = null; - measurements.addAll(mst.stream() - .collect(groupingBy(Measurement::station, collector)) - .values()); - // System.out.println(measurements.size()); - } - catch (Exception e) { - // throw new RuntimeException(e); - System.out.println(""); } + st = i + 1; } - }; - }) - .forEach(executor::submit); - executor.shutdown(); + } + // System.out.println("Task " + kk + "Completed in " + (System.nanoTime() - start)); + return new Store(min, max, sum, count); + } + catch (Exception e) { + // throw new RuntimeException(e); + throw e; + } + } + }; + } - try { - executor.awaitTermination(10, TimeUnit.MINUTES); - } - catch (InterruptedException e) { - throw new RuntimeException(e); + private static double getaDouble(byte[] value) { + double d = 0.0; + int s = -1; + for (int k = value.length - 1; k >= 0; k--) { + if (value[k] == 45) { + d = d * -1; + } + else if (value[k] == 46) { + } + else { + d = d + (((int) value[k]) - 48) * Math.pow(10, s); + s++; + } } - Collection lMeasure = new ArrayList<>(); - List leftOuts = leftOutsMap.values() - .stream() - .flatMap(List::stream) - .toList(); - int size = 0; - for (int i = 0; i < leftOuts.size(); i++) { - size = size + leftOuts.get(i).length; - } - byte[] allBytes = new byte[size]; - int pos = 0; - for (int i = 0; i < leftOuts.size(); i++) { - System.arraycopy(leftOuts.get(i), 0, allBytes, pos, leftOuts.get(i).length); - pos = pos + leftOuts.get(i).length; - } - List l = Arrays.asList(new String(allBytes).split(";")); - List measurements1 = new ArrayList<>(); - String city = l.get(0); - for (int i = 0; i < l.size() - 1; i++) { - int sIndex = l.get(i + 1).indexOf('.') + 2; + return d; + } - String tempp = l.get(i + 1).substring(0, sIndex); + static class StringHolder implements Comparable { + byte[] bytes; - measurements1.add(new Measurement(city, Double.parseDouble(tempp))); - city = l.get(i + 1).substring(sIndex); + public StringHolder(byte[] bytes) { + this.bytes = bytes; + } + + @Override + public String toString() { + return new String(this.bytes); } - measurements.addAll(measurements1.stream() - .collect(groupingBy(Measurement::station, collector)) - .values()); - Map measurements2 = new TreeMap<>(measurements - .stream() - .parallel() - .collect(groupingBy(ResultRow::station, collector2))); - // Read from bytes 1000 to 2000 - // Something like this + @Override + public int hashCode() { + return Arrays.hashCode(this.bytes); + } - // - // Map measurements = new TreeMap<>(Files.lines(Paths.get(FILE)) - // .map(l -> new Measurement(l.split(";"))) - // .collect(groupingBy(m -> m.station(), collector))); + @Override + public boolean equals(Object obj) { + return Arrays.equals(this.bytes, ((StringHolder) obj).bytes); + } - System.out.println(measurements2); - // System.out.println(System.currentTimeMillis() - start); + @Override + public int compareTo(StringHolder o) { + return new String(this.bytes).compareTo(new String(o.bytes)); + } } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_mahadev_k.java b/src/main/java/dev/morling/onebrc/CalculateAverage_mahadev_k.java new file mode 100644 index 000000000..4d4ccd5b2 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_mahadev_k.java @@ -0,0 +1,152 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.FileDescriptor; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.io.RandomAccessFile; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.StringTokenizer; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadFactory; + +public class CalculateAverage_mahadev_k { + + private static final String FILE = "./measurements.txt"; + + private static Map stationMap = new ConcurrentSkipListMap<>(); + + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + private static class MeasurementAggregator { + double minima = Double.POSITIVE_INFINITY, maxima = Double.NEGATIVE_INFINITY, total = 0, count = 0; + + public synchronized void accept(double value) { + if (minima > value) + minima = value; + if (maxima < value) + maxima = value; + total += value; + count++; + } + + public double min() { + return round(minima); + } + + public double max() { + return round(maxima); + } + + public double avg() { + return round((Math.round(total * 10.0) / 10.0) / count); + } + } + + public static void main(String[] args) throws IOException { + int chunkSize = args.length == 1 ? Integer.parseInt(args[0]) : 1_000_000; + readAndProcess(chunkSize); + print(); + } + + public static void readAndProcess(int chunkSize) { + final ThreadFactory factory = Thread.ofVirtual().name("routine-", 0).factory(); + + try (RandomAccessFile file = new RandomAccessFile(FILE, "r")) { + try (var executor = Executors.newThreadPerTaskExecutor(factory)) { + + var channel = file.getChannel(); + var size = channel.size(); + long start = 0; + while (start <= size) { + long end = start + chunkSize; + String letter = ""; + do { + end--; + ByteBuffer buffer = ByteBuffer.allocate(1); + channel.read(buffer, end); + buffer.flip(); + letter = StandardCharsets.UTF_8.decode(buffer).toString(); + } while (!letter.equals("\n")); + + if (end < start) + end = start + chunkSize; + + final long currentStart = start; + final long currentEnd = end; + executor.submit(() -> { + ByteBuffer buffer = ByteBuffer.allocate((int) (currentEnd - currentStart + 1)); + try { + channel.read(buffer, currentStart); + } + catch (IOException e) { + e.printStackTrace(); + } + buffer.flip(); + String data = StandardCharsets.UTF_8.decode(buffer).toString(); + processData(data); + }); + start = end + 1; + } + } + + } + catch (IOException e) { + e.printStackTrace(); + } + } + + public static void processData(String dataBlock) { + StringTokenizer tokenizer = new StringTokenizer(dataBlock, "\n"); + while (tokenizer.hasMoreElements()) { + StringTokenizer tokens = new StringTokenizer(tokenizer.nextToken(), ";"); + String station = tokens.nextToken(); + double value = Double.parseDouble(tokens.nextToken()); + processMinMaxMean(station, value); + } + } + + private static void processMinMaxMean(String station, double temp) { + var values = stationMap.get(station); + if (values == null) { + values = new MeasurementAggregator(); + stationMap.putIfAbsent(station, values); + } + values = stationMap.get(station); + values.accept(temp); + } + + public static void print() throws UnsupportedEncodingException { + System.setOut(new PrintStream(new FileOutputStream(FileDescriptor.out), true, StandardCharsets.UTF_8)); + System.out.print("{"); + int i = stationMap.size(); + for (var kv : stationMap.entrySet()) { + System.out.printf("%s=%s/%s/%s", kv.getKey(), kv.getValue().min(), kv.getValue().avg(), kv.getValue().max()); + if (i > 1) + System.out.print(", "); + i--; + } + System.out.println("}"); + } +} \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_makohn.java b/src/main/java/dev/morling/onebrc/CalculateAverage_makohn.java new file mode 100644 index 000000000..7b1a08057 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_makohn.java @@ -0,0 +1,287 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Collection; +import java.util.stream.Collectors; + +// +// This implementation is partially inspired by +// +// - GavinRay97: 1BRC in Kotlin (memory mapping, chunking) | https://github.com/gunnarmorling/1brc/discussions/154 +// - dannyvankooten: 1BRC in C (integer parsing, linear probing) | https://github.com/gunnarmorling/1brc/discussions/46 +// +public class CalculateAverage_makohn { + + private static final String FILE = "./measurements.txt"; + + private static class Measurement implements Comparable { + final String city; + int min; + int max; + int count = 1; + int sum; + + Measurement(String city, int val) { + this.city = city; + this.min = val; + this.max = val; + this.sum = val; + } + + @Override + public String toString() { + return STR."\{city}=\{round(min)}/\{round((1.0 * sum) / count)}/\{round(max)}"; + } + + private double round(double value) { + return Math.round(value) / 10.0; + } + + @Override + public int compareTo(Measurement other) { + return this.city.compareTo(other.city); + } + } + + // Convert a given byte array of temperature data to an int value + // Since the temperate values only have one decimal, we can use integer arithmetic until the end + // + // buffer: [..., '-', '1', '9', '.', '7', ...] + // -------------> offset + // ............ = s + // + // We initialize a "pointer" s with the offset. Depending on whether the first char is a '-' or not, we set the + // sign and increment the pointer. + // + // Then we only have to distinguish between one-digit and two-digit numbers. + // Depending on that, we set an index for the respective parts of the number. + // + private static int toInt(byte[] in, int offset) { + int sign = 1; + int s = offset; + if (in[s] == '-') { + sign = -1; + s++; + } + + if (in[s + 1] == '.') + return sign * ((in[s] - '0') * 10 + (in[s + 2] - '0')); + + return sign * ((in[s] - '0') * 100 + (in[s + 1] - '0') * 10 + (in[s + 3] - '0')); + } + + // 10_000 distinct station names as per specification + // We use the next power of two (2^14 = 16384) to allow for bit-masking our hash (instead of using modulo) + private static final int MAX_STATIONS = 2 << 14; + + // Twice as big as the maximum number of stations + private static final int MAP_CAPACITY = MAX_STATIONS * 2; + + // We start at 1 to allow for checking our hash-index map for > 0 + private static final int RES_FIRST_INDEX = 1; + + private static class ResultMap { + final int[] map = new int[MAP_CAPACITY]; // hash -> index + final Measurement[] measurements = new Measurement[MAX_STATIONS]; // index -> measurement + private int lastIndex = 0; + + private void put(int hash, Measurement measurement) { + lastIndex++; + measurements[lastIndex] = measurement; + map[hash] = lastIndex; + } + + private boolean contains(int hash) { + return map[hash] > 0; + } + + private Measurement get(int hash) { + return measurements[map[hash]]; + } + } + + // We use linear probing as our hash-collision strategy + // + // We use MAP_CAPACITY - 1 as a bitmask to force the hash to be lower than our capacity + // Let's consider a hash 16390. If our capacity is 2^14 = 16384, the hash is out of bounds. + // + // 16390 : 100000000000110 + // 16383 : 011111111111111 + // ....... 000000000000110 = 3 + private static int linearProbe(ResultMap res, String key) { + var hash = key.hashCode() & (MAP_CAPACITY - 1); + while (res.map[hash] > 0 && !(res.measurements[res.map[hash]].city.equals(key))) { + hash = (hash + 1) & (MAP_CAPACITY - 1); + } + return hash; + } + + // Custom Quicksort implementation, seems to be slightly faster than Arrays.sort + private static void quickSort(Measurement[] arr, int begin, int end) { + if (begin < end) { + final var partitionIndex = partition(arr, begin, end); + + quickSort(arr, begin, partitionIndex - 1); + quickSort(arr, partitionIndex + 1, end); + } + } + + private static int partition(Measurement[] arr, int begin, int end) { + final var pivot = arr[end]; + int i = (begin - 1); + + for (int j = begin; j < end; j++) { + if (arr[j].compareTo(pivot) <= 0) { + i++; + final var tmp = arr[i]; + arr[i] = arr[j]; + arr[j] = tmp; + } + } + + final var tmp = arr[i + 1]; + arr[i + 1] = arr[end]; + arr[end] = tmp; + + return i + 1; + } + + private static Collection getChunks(MemorySegment memory, long chunkSize, long fileSize) { + final var chunks = new ArrayList(); + var chunkStart = 0L; + var chunkEnd = 0L; + while (chunkStart < fileSize) { + chunkEnd = Math.min((chunkStart + chunkSize), fileSize); + // starting from the calculated chunkEnd, seek the next newline to get the real chunkEnd + while (chunkEnd < fileSize && (memory.getAtIndex(ValueLayout.JAVA_BYTE, chunkEnd) & 0xFF) != '\n') + chunkEnd++; + // we have found our chunk boundaries, add a slice of memory with these boundaries to our list of chunks + if (chunkEnd < fileSize) + chunks.add(memory.asSlice(chunkStart, chunkEnd - chunkStart + 1).asByteBuffer()); + else + // special case: we are at the end of the file + chunks.add(memory.asSlice(chunkStart, chunkEnd - chunkStart).asByteBuffer()); + + // next chunk + chunkStart = chunkEnd + 1; + } + return chunks; + } + + // Station name: <= 100 bytes + // Temperature: <= 5 bytes + // + // Semicolon and new line are ignored + private static final int MAX_BYTES_PER_ROW = 105; + + private static ResultMap processChunk(ByteBuffer chunk) { + final var map = new ResultMap(); + final var buffer = new byte[MAX_BYTES_PER_ROW]; + var i = 0; + var delimiter = 0; + // Process the chunk byte by byte and store each line in buffer + while (chunk.hasRemaining()) { + final var c = chunk.get(); + // System.out.println((char) (c & 0xFF)); + switch (c & 0xFF) { + // Memorize the position of the semicolon, such that we can divide the buffer afterward + case ';' -> delimiter = i; + // If we encounter newline, we can do the actual calculations for the current line + case '\n' -> { + final var key = new String(buffer, 0, delimiter, StandardCharsets.UTF_8); + final var value = toInt(buffer, delimiter); + final var hash = linearProbe(map, key); + if (map.contains(hash)) { + final var current = map.get(hash); + current.min = Math.min(current.min, value); + current.max = Math.max(current.max, value); + current.count++; + current.sum += value; + } + else { + map.put(hash, new Measurement(key, value)); + } + i = 0; + delimiter = 0; + } + default -> { + buffer[i] = c; + i++; + } + } + } + return map; + } + + // File size is approximately 13 GB, ByteBuffer has a 2 GB limit + // Chunks should have a maximum size of approximately 13 GB / 8 = 1.625 GB + private static final int MIN_NUMBER_THREADS = 8; + + public static void main(String[] args) throws Exception { + final var numProcessors = Math.max(Runtime.getRuntime().availableProcessors(), MIN_NUMBER_THREADS); + // memory-map the input file + try (final var channel = FileChannel.open(Paths.get(FILE), StandardOpenOption.READ)) { + final var fileSize = channel.size(); + final var chunkSize = (fileSize / numProcessors); + final var mappedMemory = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()); + // process the mapped data concurrently in chunks. Each chunk is processed on a dedicated thread + final var chunks = getChunks(mappedMemory, chunkSize, fileSize); + final var processed = chunks + .parallelStream() + .map(CalculateAverage_makohn::processChunk) + .collect(Collectors.toList()); // materialize and thus synchronize + // merge the results, we can initialize with the first result, to avoid redundant probing + final var first = processed.removeFirst(); + final var res = processed + .stream() + .reduce(first, (acc, partial) -> { + for (int i = RES_FIRST_INDEX; i <= partial.lastIndex; i++) { + final var value = partial.measurements[i]; + final var hash = linearProbe(acc, value.city); + if (acc.contains(hash)) { + final var cur = acc.get(hash); + cur.min = Math.min(cur.min, value.min); + cur.max = Math.max(cur.max, value.max); + cur.count += value.count; + cur.sum += value.sum; + } + else { + acc.put(hash, value); + } + } + return acc; + }); + + quickSort(res.measurements, RES_FIRST_INDEX, res.lastIndex); + final var sb = new StringBuilder("{"); + for (int i = RES_FIRST_INDEX; i < res.lastIndex; i++) { + sb.append(res.measurements[i]).append(',').append(' '); + } + sb.append(res.measurements[res.lastIndex]).append('}'); + System.out.println(sb); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_manishgarg90.java b/src/main/java/dev/morling/onebrc/CalculateAverage_manishgarg90.java new file mode 100644 index 000000000..11cad07ff --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_manishgarg90.java @@ -0,0 +1,169 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +public class CalculateAverage_manishgarg90 { + + private static final String FILE = "./measurements.txt"; + private static int nProcessors = Runtime.getRuntime().availableProcessors(); + + public static void main(String[] args) throws IOException { + try (FileChannel channel = FileChannel.open(Paths.get(FILE), StandardOpenOption.READ)) { + long fileSize = channel.size(); + long chunkSize = (fileSize + nProcessors - 1) / nProcessors; + long pos = 0; + + List buffers = new ArrayList<>(nProcessors); + + for (int i = 0; i < nProcessors; i++) { + long endPosition = getEndPosition(channel, pos + chunkSize); + long size = endPosition - pos; + MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, pos, size); + pos = pos + size; + buffers.add(buffer); + } + + Map s = readBufferAndCalculateMeauremenst(buffers); + Map tm = new TreeMap(s); + System.out.println(tm); + } + catch (IOException e) { + e.printStackTrace(); + } + + } + + private static Map readBufferAndCalculateMeauremenst(List chunks) { + return chunks.parallelStream().map(buffer -> { + Map map = new HashMap<>(10_000, 1); + int lineStart = 0; + int doubleStart = 0; + int length = buffer.limit(); + String station = null; + for (int i = 0; i < length; ++i) { + byte b = buffer.get(i); + if (b == ';') { + byte[] stationBuffer = new byte[i - lineStart]; + buffer.position(lineStart); + buffer.get(stationBuffer); + station = new String(stationBuffer, StandardCharsets.UTF_8); + doubleStart = i + 1; + } + else if (b == '\n') { + byte[] doubleBuffer = new byte[i - doubleStart]; + buffer.position(doubleStart); + buffer.get(doubleBuffer); + Double temperature = Double.parseDouble(new String(doubleBuffer)); + lineStart = i + 1; + + // I have station name and temp + Stat s = map.get(station); + if (s == null) { + map.put(station, new Stat(temperature)); + } + else { + s.update(temperature); + } + } + } + return map; + }).reduce(new HashMap<>(), (map1, map2) -> { + Stat s = new Stat(); + s.merge(map1); + s.merge(map2); + return s.getResultMap(); + }); + + } + + private static long getEndPosition(FileChannel channel, long position) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(1); + while (position < channel.size()) { + channel.read(buffer, position); + + if (buffer.get(0) == '\n') { + return position + 1; + } + position++; + buffer.clear(); + } + return channel.size(); + } + + private static final class Stat { + + private Double min = Double.MAX_VALUE; + private Double max = Double.MIN_VALUE; + private Double sum = 0d; + private long count = 0L; + + private Map resultMap = null; + + public Stat() { + this.resultMap = new HashMap<>(10_000, 1); + } + + public Stat(Double value) { + this.min = value; + this.max = value; + this.sum += value; + this.count++; + } + + private void update(Double value) { + this.min = Math.min(this.min, value); + this.max = Math.max(this.max, value); + this.sum = round(this.sum + value); + this.count++; + } + + private void merge(Map result) { + result.forEach((city, resultRow) -> resultMap.merge(city, resultRow, (existing, incoming) -> { + existing.min = Math.min(existing.min, incoming.min); + existing.max = Math.max(existing.max, incoming.max); + existing.sum += incoming.sum; + existing.count += incoming.count; + return existing; + })); + } + + public Map getResultMap() { + return resultMap; + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + @Override + public String toString() { + return round(min) + "/" + round(sum / count) + "/" + round(max); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java new file mode 100644 index 000000000..073f157c3 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_martin2038.java @@ -0,0 +1,337 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel.MapMode; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class CalculateAverage_martin2038 { + + // private static final String FILE = "/Users/martin/Garden/blog/1BRC/1brc/./measurements.txt"; + + private static final String FILE = "./measurements.txt"; + + private static class MeasurementAggregator { + private int min = Integer.MAX_VALUE; + private int max = Integer.MIN_VALUE; + private long sum; + private int count; + + void update(int temp) { + update(1, temp, temp, temp); + } + + void update(int cnt, long sm, int min, int max) { + sum += sm; + count += cnt; + if (this.min > min) { + this.min = min; + } + if (this.max < max) { + this.max = max; + } + } + + void merge(MeasurementAggregator it) { + update(it.count, it.sum, it.min, it.max); + } + + public String toString() { + var mean = this.sum / 10.0 / this.count; + return (min / 10f) + "/" + Math.round(mean * 10) / 10f + "/" + (max / 10f); + } + } + + public static void main(String[] args) throws IOException { + + var file = new RandomAccessFile(FILE, "r"); + final int maxNameLength = 110; + var fc = file.getChannel(); + split(file).stream().parallel().map(ck -> { + // StrFastHashKey 比string快500ms + var map = new HashMap(200); + // var pb = System.currentTimeMillis(); + try { + var mb = fc.map(MapMode.READ_ONLY, ck.start, ck.length); + var buff = new byte[maxNameLength]; + while (mb.hasRemaining()) { + var name = readNextHashKey(buff, mb); + // var name = readNextString(buff, mb);// .intern(); + var temp = readNextInt10Times(buff, mb); + add2map(map, name, temp); + } + // long end = ck.start + ck.length; + // do { + // var name = readNext(file, ';', 30).intern(); + // var temp = Double.parseDouble(readNext(file, '\n', 6)); + // var agg = map.computeIfAbsent(name,it->new MeasurementAggregator()); + // agg.update(temp); + // }while (file.getFilePointer() { + + var sb = new StringBuilder(map.size() * 100); + sb.append('{'); + map.entrySet().stream().sorted(Map.Entry.comparingByKey()) + .forEachOrdered(kv -> sb.append(kv.getKey()).append('=').append(kv.getValue()).append(", ")); + sb.deleteCharAt(sb.length() - 1); + sb.setCharAt(sb.length() - 1, '}'); + var resultStr = sb.toString(); + System.out.println(resultStr); + // System.out.println(resultStr.hashCode()); + }); + + } + + static HashMap reduceMap(HashMap aMap, HashMap bMap) { + aMap.forEach((k, v) -> { + var b = bMap.get(k); + if (null == b) { + bMap.put(k, v); + } + else { + b.merge(v); + } + }); + return bMap; + } + + static void add2map(Map map, Key name, int temp) { + // 比computeIfAbsent 节约1秒 + var agg = map.get(name); + if (null == agg) { + agg = new MeasurementAggregator(); + map.put(name, agg); + } + // var agg = map.computeIfAbsent(name,it->new MeasurementAggregator()); + agg.update(temp); + } + + record FileChunk(long start, long length) { + } + + static List split(RandomAccessFile file) throws IOException { + long total = file.length(); + var threadNum = Math.max((int) (total / Integer.MAX_VALUE + 1), Runtime.getRuntime().availableProcessors()); + long avgChunkSize = total / threadNum; + // System.out.println(avgChunkSize +" \t avgChunkSize : INT/MAX \t"+Integer.MAX_VALUE); + // Exception in thread "main" java.lang.IllegalArgumentException: Size exceeds Integer.MAX_VALUE + // at java.base/sun.nio.ch.FileChannelImpl.map(FileChannelImpl.java:1183) + long lastStart = 0; + var list = new ArrayList(threadNum); + for (var i = 0; i < threadNum - 1; i++) { + var length = avgChunkSize; + file.seek(lastStart + length); + while (file.readByte() != '\n') { + // file.seek(lastStart+ ++length); + ++length; + } + // include the '\n' + length++; + list.add(new FileChunk(lastStart, length)); + lastStart += length; + if (lastStart >= total) { + return list; + } + } + list.add(new FileChunk(lastStart, total - lastStart)); + return list; + } + + static StrFastHashKey readNextHashKey(byte[] buf, MappedByteBuffer mb) { + int i = 1; + mb.get(buf, 0, i); + byte b; + while ((b = mb.get()) != ';') { + buf[i++] = b; + } + return new StrFastHashKey(buf, i); + } + + static String readNextString(byte[] buf, MappedByteBuffer mb) { + int i = 1; + mb.get(buf, 0, i); + byte b; + while ((b = mb.get()) != ';') { + buf[i++] = b; + } + return new String(buf, 0, i); + } + + // copy from CalculateAverage_3j5a + // 替换 Double.parse + // 时间 38秒 -> 5418 ms + static int readNextInt10Times(byte[] buf, MappedByteBuffer mb) { + final int min_number_len = 3; + int i = min_number_len; + mb.get(buf, 0, i); + byte b; + while ((b = mb.get()) != '\n') { + buf[i++] = b; + } + // -3.2 + var zeroAscii = '0'; + int temperature = buf[--i] - zeroAscii; + i--; // skipping dot + var base = 10; + while (i > 0) { + b = buf[--i]; + if (b == '-') { + temperature = -temperature; + } + else { + temperature = base * (b - zeroAscii) + temperature; + base *= base; + } + } + return temperature; + } + + // static String readNext(RandomAccessFile file, char endFlag,int initLength) throws IOException { + // StringBuilder input = new StringBuilder(initLength); + // int c = -1; + // //boolean eol = false; + // + // while (true) { + // c = file.read(); + // if( c == endFlag || c == -1) { + // break; + // } + // input.append((char)c); + // } + // + // //if ((c == -1) && (input.length() == 0)) { + // // return null; + // //} + // return input.toString(); + // } + + static class StrFastHashKey implements Comparable { + final byte[] name; + final int hash; + + String nameStr; + + StrFastHashKey(byte[] buf, int size) { + name = new byte[size]; + System.arraycopy(buf, 0, name, 0, size); + // hash = calculateHash(name, 0, size - 1); + // FNV1a save 100+ms than calculateHash + hash = hashFNV1a(name, size); + } + + @Override + public boolean equals(Object o) { + // if (this == o) {return true;} + // if (o == null || getClass() != o.getClass()) {return false;} + StrFastHashKey that = (StrFastHashKey) o; + return hash == that.hash && Arrays.equals(name, that.name); + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public String toString() { + if (null == nameStr) { + nameStr = new String(name); + } + return nameStr; + } + + @Override + public int compareTo(StrFastHashKey o) { + return toString().compareTo(o.toString()); + } + } + + private static final VarHandle LONG_VIEW = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.nativeOrder()) + .withInvokeExactBehavior(); + private static final VarHandle INT_VIEW = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.nativeOrder()) + .withInvokeExactBehavior(); + + /** + * This is a prime number that gives pretty + * good hash distributions + * on the data in this challenge. + */ + private static final long RANDOM_PRIME = 0x7A646E4D; + + /** + * The hash calculation is inspired by + * QuestDB FastMap + */ + private static int calculateHash(byte[] buffer, int startPosition, int endPosition) { + long hash = 0; + + int position = startPosition; + for (; position + Long.BYTES <= endPosition; position += Long.BYTES) { + long value = (long) LONG_VIEW.get(buffer, position); + hash = hash * RANDOM_PRIME + value; + } + + if (position + Integer.BYTES <= endPosition) { + int value = (int) INT_VIEW.get(buffer, position); + hash = hash * RANDOM_PRIME + value; + position += Integer.BYTES; + } + + for (; position <= endPosition; position++) { + hash = hash * RANDOM_PRIME + buffer[position]; + } + hash = hash * RANDOM_PRIME; + return (int) hash ^ (int) (hash >>> 32); + } + + private static final int FNV1_32_INIT = 0x811c9dc5; + private static final int FNV1_PRIME_32 = 16777619; + + /** + * https://github.com/prasanthj/hasher/blob/master/src/main/java/hasher/FNV1a.java + * + * FNV1a 32 bit variant. + * + * @param data - input byte array + * @param length - length of array + * @return - hashcode + */ + public static int hashFNV1a(byte[] data, int length) { + int hash = FNV1_32_INIT; + for (int i = 0; i < length; i++) { + hash ^= (data[i] & 0xff); + hash *= FNV1_PRIME_32; + } + + return hash; + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_mattiz.java b/src/main/java/dev/morling/onebrc/CalculateAverage_mattiz.java new file mode 100644 index 000000000..52c31ba39 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_mattiz.java @@ -0,0 +1,324 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.*; +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.*; +import static java.nio.channels.FileChannel.MapMode.READ_ONLY; + +public class CalculateAverage_mattiz { + private static final int TWO_BYTE_TO_INT = 480 + 48; // 48 is the ASCII code for '0' + private static final int THREE_BYTE_TO_INT = 4800 + 480 + 48; + private static final String FILE = "./measurements.txt"; + public static final int PARTS = 8; + + public static void main(String[] args) throws Exception { + var result = new CalculateAverage_mattiz().calculate(FILE, PARTS); + System.out.println(result); + } + + StationList calculate(String file, int numParts) throws Exception { + var buffers = createBuffers(Paths.get(file), numParts); + + return buffers + .parallelStream() + .map(this::aggregate) + .reduce(StationList::merge) + .orElseThrow(); + } + + record BufferAndSize(ByteBuffer buffer, long size) { + } + + List createBuffers(Path file, int numParts) throws IOException { + FileChannel fileChannel = FileChannel.open(file, StandardOpenOption.READ); + + var fileSize = fileChannel.size(); + + if (fileSize < (1024 * 1024)) { // Only one core for small files + numParts = 1; + } + + var chunkSize = fileSize / numParts; + var buffers = new ArrayList(); + long filePointer = 0; + + for (int i = 0; i < numParts; i++) { + if (i != numParts - 1) { // not last element + var adjustedChunkSize = getBuffer(fileChannel, filePointer, chunkSize, true); + buffers.add(adjustedChunkSize.buffer()); + filePointer += adjustedChunkSize.size(); + } + else { + var adjustedChunkSize = getBuffer(fileChannel, filePointer, fileSize - filePointer, false); + buffers.add(adjustedChunkSize.buffer()); + } + } + + return buffers; + } + + BufferAndSize getBuffer(FileChannel fileChannel, long start, long size, boolean adjust) throws IOException { + MappedByteBuffer buffer = fileChannel.map(READ_ONLY, start, size); + + var actualSize = ((int) size); + + if (adjust) { + while (buffer.get(actualSize - 1) != '\n') { + actualSize--; + } + } + + buffer.limit(actualSize); + + return new BufferAndSize(buffer, actualSize); + } + + private StationList aggregate(ByteBuffer buffer) { + var measurements = new StationList(); + + while (buffer.hasRemaining()) { + int startPos = buffer.position(); + + byte b; + int hash = 0; + while ((b = buffer.get()) != ';') { + hash = ((hash << 5) - hash) + b; + } + + if (hash < 0) { + hash = -hash; + } + + int length = buffer.position() - startPos - 1; + byte[] station = new byte[length]; + buffer.get(startPos, station); + + int value = readValue(buffer); + + measurements.update(station, length, hash, value); + } + + return measurements; + } + + /* + * Read decimal number from ascii characters (copied from arjenw) + * + * Example: + * If you have the decimal number 1.4, + * then byte 1 contain 49 (ascii code for '1') + * and byte 3 contain 52 (ascii code for '4') + * Subtract 480 + 48 (48 is the ASCII code for '0') + * to move number from ascii number to int + * + * 49 * 10 + 52 - 528 = 14 + */ + private static int readValue(ByteBuffer buffer) { + int value; + byte b1 = buffer.get(); + byte b2 = buffer.get(); + byte b3 = buffer.get(); + byte b4 = buffer.get(); + + if (b2 == '.') {// value is n.n + value = (b1 * 10 + b3 - TWO_BYTE_TO_INT); + } + else { + if (b4 == '.') { // value is -nn.n + value = -(b2 * 100 + b3 * 10 + buffer.get() - THREE_BYTE_TO_INT); + } + else if (b1 == '-') { // value is -n.n + value = -(b2 * 10 + b4 - TWO_BYTE_TO_INT); + } + else { // value is nn.n + value = (b1 * 100 + b2 * 10 + b4 - THREE_BYTE_TO_INT); + } + buffer.get(); // new line + } + return value; + } +} + +class CustomMap { + private static final int SIZE = 1024 * 64; + private final Station[] stationList = new Station[SIZE]; + + public void addOrUpdate(byte[] stationName, int length, int hash, int value) { + int slot = hash & (SIZE - 1); + var station = stationList[slot]; + + while (station != null + && station.getHash() != hash + && !Arrays.equals( + station.getName(), 0, station.getName().length, + stationName, 0, length)) { + + slot = (slot + 1) & (SIZE - 1); + station = stationList[slot]; + } + + if (station == null) { + stationList[slot] = new Station(stationName, hash); + } + + stationList[slot].add(value); + } + + public Station get(byte[] stationName) { + return stationList[findSlot(stationName)]; + } + + public void put(byte[] stationName, Station newStation) { + stationList[findSlot(stationName)] = newStation; + } + + private int findSlot(byte[] stationName) { + int hash = getHash(stationName); + int slot = hash & (SIZE - 1); + var station = stationList[slot]; + + while (station != null + && station.getHash() != hash + && !Arrays.equals(station.getName(), stationName)) { + + slot = (slot + 1) & (SIZE - 1); + station = stationList[slot]; + } + + return slot; + } + + private int getHash(byte[] key) { + int hash = 0; + + for (byte b : key) { + hash = hash * 31 + b; + } + + if (hash < 0) { + hash = -hash; + } + + return hash; + } + + public Set> entrySet() { + var sorted = new HashMap(); + + for (var s : stationList) { + if (s != null) { + sorted.put(s.getName(), s); + } + } + + return sorted.entrySet(); + } + + public Map sorted() { + var sorted = new TreeMap(); + + for (var s : stationList) { + if (s != null) { + sorted.put(new String(s.getName(), StandardCharsets.UTF_8), s); + } + } + + return sorted; + } +} + +class StationList { + private final CustomMap stations = new CustomMap(); + + public void update(byte[] stationName, int length, int hash, int value) { + stations.addOrUpdate(stationName, length, hash, value); + } + + public StationList merge(StationList other) { + for (var aggregator : other.stations.entrySet()) { + var agg = stations.get(aggregator.getKey()); + + if (agg == null) { + stations.put(aggregator.getKey(), aggregator.getValue()); + } + else { + agg.merge(aggregator.getValue()); + } + } + + return this; + } + + @Override + public String toString() { + return stations.sorted().toString(); + } +} + +class Station { + private final byte[] name; + private final int hash; + private int min = Integer.MAX_VALUE; + private int max = Integer.MIN_VALUE; + private int sum; + private int count; + + public Station(byte[] name, int hash) { + this.name = name; + this.hash = hash; + } + + public void add(int max, int min, int sum, int count) { + this.max = Math.max(this.max, max); + this.min = Math.min(this.min, min); + this.sum += sum; + this.count += count; + } + + public void add(int value) { + this.max = Math.max(this.max, value); + this.min = Math.min(this.min, value); + this.sum += value; + this.count++; + } + + public void merge(Station other) { + this.max = Math.max(this.max, other.max); + this.min = Math.min(this.min, other.min); + this.sum += other.sum; + this.count += other.count; + } + + public String toString() { + return (min / 10.0) + "/" + (Math.round(((double) sum) / count)) / 10.0 + "/" + (max / 10.0); + } + + public byte[] getName() { + return name; + } + + public int getHash() { + return hash; + } +} \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_melgenek.java b/src/main/java/dev/morling/onebrc/CalculateAverage_melgenek.java new file mode 100644 index 000000000..924cf15d8 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_melgenek.java @@ -0,0 +1,551 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import jdk.incubator.vector.*; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.TreeMap; +import java.util.concurrent.*; + +/** + * The implementation: + * - reads a file with buffered IO + * - uses VarHandles to get longs/ints from a byte array + * - delimiter search is vectorized + * - there is a custom hash function, that provides a low collision rate and short probe distances in hash tables + * - has 2 custom open addressing hash tables: one for strings <=8 bytes in length, and one more for strings of any length + */ +public class CalculateAverage_melgenek { + + private static final VarHandle LONG_VIEW = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.nativeOrder()).withInvokeExactBehavior(); + private static final VarHandle INT_VIEW = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.nativeOrder()).withInvokeExactBehavior(); + private static final int CORES_COUNT = Runtime.getRuntime().availableProcessors(); + + private static final String FILE = "./measurements.txt"; + /** + * This is a prime number that gives pretty + * good hash distributions + * on the data in this challenge. + */ + private static final long RANDOM_PRIME = 0x7A646E4D; + private static final int ZERO_CHAR_3_SUM = 100 * '0' + 10 * '0' + '0'; + private static final int ZERO_CHAR_2_SUM = 10 * '0' + '0'; + private static final byte NEWLINE = '\n'; + private static final byte SEMICOLON = ';'; + private static final VectorSpecies BYTE_SPECIES = ByteVector.SPECIES_PREFERRED; + private static final int BYTE_SPECIES_BYTE_SIZE = BYTE_SPECIES.vectorByteSize(); + private static final Vector NEWLINE_VECTOR = BYTE_SPECIES.broadcast(NEWLINE); + private static final Vector SEMICOLON_VECTOR = BYTE_SPECIES.broadcast(SEMICOLON); + private static final int MAX_LINE_LENGTH = 107; // 100 + len(";-11.1\n") = 100+7 + + public static void main(String[] args) throws Throwable { + long totalSize = Files.size(Path.of(FILE)); + long chunkSize = Math.max(MAX_LINE_LENGTH, totalSize / CORES_COUNT); + var result = new TreeMap(); + try (var executor = Executors.newFixedThreadPool(CORES_COUNT)) { + var service = new ExecutorCompletionService(executor); + int i = 0; + for (; i * chunkSize < totalSize; i++) { + long currentOffset = Math.max(0, i * chunkSize - 1); + long maxOffset = Math.min((i + 1) * chunkSize, totalSize); + service.submit(() -> processRange(currentOffset, maxOffset)); + } + for (; i > 0; i--) { + service.take().get().addRows(result); + } + } + System.out.println(printTree(result)); + } + + private static String printTree(TreeMap result) { + var sb = new StringBuilder(50 * result.size()); + sb.append("{"); + boolean first = true; + for (var entry : result.entrySet()) { + if (first) { + first = false; + } + else { + sb.append(", "); + } + sb.append(entry.getKey()); + sb.append('='); + entry.getValue().appendToStringBuffer(sb); + } + sb.append("}"); + return sb.toString(); + } + + private static CompositeTable processRange(long startOffset, long maxOffset) { + final var table = new CompositeTable(); + try (var file = new BufferedFile(startOffset, maxOffset)) { + processChunk(file, table); + } + catch (Exception e) { + throw new RuntimeException(e); + } + return table; + } + + private static void processChunk(BufferedFile file, CompositeTable table) throws IOException { + if (file.offset != 0) { + file.refillBuffer(); + int newlinePosition = findDelimiter(file, 0, NEWLINE_VECTOR, NEWLINE); + file.bufferPosition = newlinePosition + 1; + file.offset += file.bufferPosition; + } + while (file.offset < file.maxOffset) { + file.refillBuffer(); + int bytesProcessed = processOneRow(file, table); + file.offset += bytesProcessed; + } + } + + private static int processOneRow(BufferedFile file, CompositeTable table) { + int stringStart = file.bufferPosition; + int stringEnd = findDelimiter(file, stringStart, SEMICOLON_VECTOR, SEMICOLON); + + file.bufferPosition = stringEnd + 1; + short value = parseValue(file); + + table.add(file.buffer, stringStart, stringEnd, value); + + return file.bufferPosition - stringStart; + } + + private static short parseValue(BufferedFile file) { + byte firstDigit = file.buffer[file.bufferPosition]; + int sign = 1; + if (firstDigit == '-') { + sign = -1; + file.bufferPosition++; + firstDigit = file.buffer[file.bufferPosition]; + } + + byte secondDigit = file.buffer[file.bufferPosition + 1]; + int result; + if (secondDigit == '.') { + result = firstDigit * 10 + file.buffer[file.bufferPosition + 2] - ZERO_CHAR_2_SUM; + file.bufferPosition += 4; + } + else { + result = firstDigit * 100 + secondDigit * 10 + file.buffer[file.bufferPosition + 3] - ZERO_CHAR_3_SUM; + file.bufferPosition += 5; + } + return (short) (result * sign); + } + + /** + * Finds a delimiter in a byte array using vectorized comparisons. + */ + private static int findDelimiter(BufferedFile file, int startPosition, Vector repeatedDelimiter, byte delimiter) { + int position = startPosition; + int vectorLoopBound = startPosition + BYTE_SPECIES.loopBound(file.bufferLimit - startPosition); + for (; position < vectorLoopBound; position += BYTE_SPECIES_BYTE_SIZE) { + var vector = ByteVector.fromArray(BYTE_SPECIES, file.buffer, position); + var comparisonResult = vector.compare(VectorOperators.EQ, repeatedDelimiter); + if (comparisonResult.anyTrue()) { + return position + comparisonResult.firstTrue(); + } + } + + while (file.buffer[position] != delimiter) { + position++; + } + + return position; + } + + private static long keepLastBytes(long value, int numBytesToKeep) { + // Number of bits to shift, so that the mask covers only `numBytesToKeep` least significant bits + int bitShift = (Long.BYTES - numBytesToKeep) * Byte.SIZE; + // Mask with the specified number of the least significant bits set to 1 + long mask = -1L >>> bitShift; + return value & mask; + } + + /** + * The function transforms a string with the length <=8 bytes to a java String. + * The function assumes that the string is 0 terminated. + */ + private static String longToString(long value) { + int strLength = Long.BYTES - Long.numberOfLeadingZeros(value) / Byte.SIZE; + var bytes = new byte[strLength]; + for (int i = 0; i < strLength; i++) { + bytes[i] = (byte) (value >> (i * Byte.SIZE)); + } + return new String(bytes, StandardCharsets.UTF_8); + } + + /** + * Store measurements based on string lengths. + * Stores strings of length <= 8 and other strings separately. + * This table is a simplified implementation of strings hash table in ClickHouse. + * The original parer that describes benefits of the approach is SAHA: A String Adaptive Hash Table for Analytical Databases. + */ + private static final class CompositeTable { + private final LongTable longTable = new LongTable(); + private final RegularTable regularTable = new RegularTable(); + + private void add(byte[] buffer, int stringStart, int stringEnd, short value) { + int stringLength = stringEnd - stringStart; + if (stringLength <= Long.BYTES) { + long str = keepLastBytes((long) LONG_VIEW.get(buffer, stringStart), stringLength); + this.longTable.add(str, value); + } + else { + int hash = calculateHash(buffer, stringStart, stringEnd); + this.regularTable.add(buffer, stringStart, stringLength, hash, value); + } + } + + public void addRows(TreeMap result) { + this.longTable.addRows(result); + this.regularTable.addRows(result); + } + } + + /** + * The hash calculation is inspired by + * QuestDB FastMap + */ + private static int calculateHash(byte[] buffer, int startPosition, int endPosition) { + long hash = 0; + + int position = startPosition; + for (; position + Long.BYTES < endPosition; position += Long.BYTES) { + long value = (long) LONG_VIEW.get(buffer, position); + hash = hash * RANDOM_PRIME + value; + } + + if (position + Integer.BYTES < endPosition) { + int value = (int) INT_VIEW.get(buffer, position); + hash = hash * RANDOM_PRIME + value; + position += Integer.BYTES; + } + + for (; position < endPosition; position++) { + hash = hash * RANDOM_PRIME + buffer[position]; + } + hash = hash * RANDOM_PRIME; + return (int) hash ^ (int) (hash >>> 32); + } + + private static int calculateLongHash(long str) { + long hash = str * RANDOM_PRIME; + return (int) hash ^ (int) (hash >>> 32); + } + + /** + * This tables stores strings of length <= 8 bytes. + * Does not store hashes. + */ + private static final class LongTable { + private static final int TABLE_CAPACITY = 32768; + private static final int TABLE_CAPACITY_MASK = TABLE_CAPACITY - 1; + /** + * The buckets use 3 longs to store strings and measurements: + * long 1) station name + * long 2) sum of measurements + * long 3) count (int) | min (short) | max (short) <-- packed into one long + */ + private final long[] buckets = new long[TABLE_CAPACITY * 3]; + + public void add(long str, short value) { + int hash = calculateLongHash(str); + int bucketIdx = hash & TABLE_CAPACITY_MASK; + + long bucketStr = buckets[bucketIdx * 3]; + if (bucketStr == str) { + updateBucket(bucketIdx, value); + } + else if (bucketStr == 0L) { + createBucket(bucketIdx, str, value); + } + else { + addWithProbing(str, value, (bucketIdx + 1) & TABLE_CAPACITY_MASK); + } + } + + private void addWithProbing(long str, short value, int bucketIdx) { + int distance = 1; + while (true) { + long bucketStr = buckets[bucketIdx * 3]; + if (bucketStr == str) { + updateBucket(bucketIdx, value); + break; + } + else if (bucketStr == 0L) { + createBucket(bucketIdx, str, value); + break; + } + else { + distance++; + // A new bucket index is calculated based on quadratic probing https://thenumb.at/Hashtables/#quadratic-probing + // Quadratic probing decreases the number of collisions and max probing distance. + // Linear: + // - capacity 16k, 28.6M collisions, 14-17 max distance + // - capacity 32k, 9.5M collisions, 5-7 max distance + // Quadratic: + // - capacity 16k 25M collisions, 8-10 max distance + // - capacity 32k, 9.3M collisions, 4-7 max distance + bucketIdx = (bucketIdx + distance) & TABLE_CAPACITY_MASK; + } + } + } + + public void addRows(TreeMap result) { + for (int bucketIdx = 0; bucketIdx < TABLE_CAPACITY; bucketIdx++) { + int bucketOffset = bucketIdx * 3; + long str = buckets[bucketOffset]; + if (str != 0L) { + long sum = buckets[bucketOffset + 1]; + long countMinMax = buckets[bucketOffset + 2]; + int count = (int) ((countMinMax >> 32)); + short min = (short) ((countMinMax >> 16) & 0xFFFF); + short max = (short) (countMinMax & 0xFFFF); + + result.compute(longToString(str), (k, resultRow) -> { + if (resultRow == null) { + return new ResultRow(sum, count, min, max); + } + else { + resultRow.add(sum, count, min, max); + return resultRow; + } + }); + } + } + } + + private void createBucket(int bucketIdx, long str, short value) { + int offset = bucketIdx * 3; + buckets[offset] = str; + buckets[offset + 1] = value; + buckets[offset + 2] = (1L << 32) | ((long) (value & 0xFFFF) << 16) | (long) (value & 0xFFFF); + } + + private void updateBucket(int bucketIdx, short value) { + int offset = bucketIdx * 3; + long sum = buckets[offset + 1]; + buckets[offset + 1] = sum + value; + + long countMinMax = buckets[offset + 2]; + int count = (int) ((countMinMax >> 32)); + short min = (short) ((countMinMax >> 16) & 0xFFFF); + short max = (short) (countMinMax & 0xFFFF); + if (value < min) { + min = value; + } + if (value > max) { + max = value; + } + buckets[offset + 2] = ((long) (count + 1) << 32) | ((long) (min & 0xFFFF) << 16) | (long) (max & 0xFFFF); + } + } + + /** + * An open addressing hash table that stores strings as byte arrays. + * Stores hashes. + */ + private static final class RegularTable { + private static final int TABLE_CAPACITY = 16384; + private static final int TABLE_CAPACITY_MASK = TABLE_CAPACITY - 1; + private final Bucket[] buckets = new Bucket[TABLE_CAPACITY]; + + public void add(byte[] data, int start, int stringLength, int hash, short value) { + int bucketIdx = hash & TABLE_CAPACITY_MASK; + + var bucket = buckets[bucketIdx]; + if (bucket == null) { + buckets[bucketIdx] = new Bucket(data, start, stringLength, hash, value); + } + else if (hash == bucket.hash && bucket.isEqual(data, start, stringLength)) { + bucket.update(value); + } + else { + addWithProbing(data, start, stringLength, hash, value, (bucketIdx + 1) & TABLE_CAPACITY_MASK); + } + } + + private void addWithProbing(byte[] data, int start, int stringLength, int hash, short value, int bucketIdx) { + int distance = 1; + while (true) { + var bucket = buckets[bucketIdx]; + if (bucket == null) { + buckets[bucketIdx] = new Bucket(data, start, stringLength, hash, value); + break; + } + else if (hash == bucket.hash && bucket.isEqual(data, start, stringLength)) { + bucket.update(value); + break; + } + else { + distance++; + bucketIdx = (bucketIdx + distance) & TABLE_CAPACITY_MASK; + } + } + } + + public void addRows(TreeMap result) { + for (var bucket : buckets) { + if (bucket != null) { + result.compute(new String(bucket.str, StandardCharsets.UTF_8), (k, resultRow) -> { + if (resultRow == null) { + return new ResultRow(bucket.sum, bucket.count, bucket.min, bucket.max); + } + else { + resultRow.add(bucket.sum, bucket.count, bucket.min, bucket.max); + return resultRow; + } + }); + } + } + } + + private static final class Bucket { + int hash; + byte[] str; + long sum; + int count; + short max = Short.MIN_VALUE; + short min = Short.MAX_VALUE; + + Bucket(byte[] data, int start, int stringLength, int hash, short value) { + this.str = new byte[stringLength]; + System.arraycopy(data, start, this.str, 0, stringLength); + this.hash = hash; + update(value); + } + + public void update(short value) { + this.sum += value; + this.count++; + if (max < value) + max = value; + if (min > value) + min = value; + } + + public boolean isEqual(byte[] data, int start, int length) { + if (str.length != length) + return false; + int i = 0; + int vectorLoopBound = BYTE_SPECIES.loopBound(str.length); + for (; i < vectorLoopBound; i += BYTE_SPECIES_BYTE_SIZE) { + var vector1 = ByteVector.fromArray(BYTE_SPECIES, str, i); + var vector2 = ByteVector.fromArray(BYTE_SPECIES, data, start + i); + var comparisonResult = vector1.compare(VectorOperators.NE, vector2); + if (comparisonResult.anyTrue()) + return false; + } + for (; i + Long.BYTES < str.length; i += Long.BYTES) { + long value1 = (long) LONG_VIEW.get(str, i); + long value2 = (long) LONG_VIEW.get(data, start + i); + if (value1 != value2) + return false; + } + if (i + Integer.BYTES < str.length) { + int value1 = (int) INT_VIEW.get(str, i); + int value2 = (int) INT_VIEW.get(data, start + i); + if (value1 != value2) + return false; + i += Integer.BYTES; + } + for (; i < str.length; i++) { + if (data[start + i] != str[i]) + return false; + } + return true; + } + } + } + + private static class ResultRow { + long sum; + int count; + short min; + short max; + + public ResultRow(long sum, int count, short min, short max) { + this.sum = sum; + this.count = count; + this.min = min; + this.max = max; + } + + public void add(long anotherSum, int anotherCount, short anotherMin, short anotherMax) { + sum += anotherSum; + count += anotherCount; + if (max < anotherMax) + max = anotherMax; + if (min > anotherMin) + min = anotherMin; + } + + public void appendToStringBuffer(StringBuilder sb) { + sb.append(Math.round((double) min) / 10.0); + sb.append('/'); + sb.append(Math.round((double) sum / count) / 10.0); + sb.append('/'); + sb.append(Math.round((double) max) / 10.0); + } + } + + /** + * A utility class that uses the RandomAccessFile to read at offset. + * Keeps the in-memory buffer, as well as current offsets in the buffer and the file. + */ + private static final class BufferedFile implements AutoCloseable { + private static final int BUFFER_SIZE = 512 * 1024; + private final byte[] buffer = new byte[BUFFER_SIZE]; + private int bufferLimit = 0; + private int bufferPosition = 0; + private final long maxOffset; + private final RandomAccessFile file; + private long offset; + + private BufferedFile(long startOffset, long maxOffset) throws IOException { + this.offset = startOffset; + this.maxOffset = maxOffset; + this.file = new RandomAccessFile(FILE, "r"); + } + + private void refillBuffer() throws IOException { + int remainingBytes = bufferLimit - bufferPosition; + if (remainingBytes < MAX_LINE_LENGTH) { + bufferPosition = 0; + file.seek(offset); + int bytesRead = file.read(buffer, 0, BUFFER_SIZE); + bufferLimit = Math.max(bytesRead, 0); + } + } + + @Override + public void close() throws Exception { + file.close(); + } + } + +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java b/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java index 1f5acf376..502002f09 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java @@ -75,8 +75,12 @@ private static class PoorManMap { } void observe(Aggregator node, long value) { - node.min = Math.min(node.min, value); - node.max = Math.max(node.max, value); + if (node.min > value) { + node.min = value; + } + if (node.max < value) { + node.max = value; + } node.sum += value; node.count++; } @@ -109,7 +113,7 @@ Aggregator insertInto(int bucket, MemorySegment data, long offset, int size) { var node = new Aggregator(); node.keySize = size; this.nodes[bucket] = node; - MemorySegment.copy(data, offset, MemorySegment.ofArray(this.keyData), (long) bucket * KEY_SIZE, size); + MemorySegment.copy(data, offset, MemorySegment.ofArray(this.keyData), (long) bucket * KEY_SIZE, size + 1); return node; } @@ -222,11 +226,12 @@ private static long iterate(PoorManMap aggrMap, MemorySegment data, long offset) var line = ByteVector.fromMemorySegment(BYTE_SPECIES, data, offset, ByteOrder.nativeOrder()); // Find the delimiter ';' - int keySize = line.compare(VectorOperators.EQ, ';').firstTrue(); + long semicolons = line.compare(VectorOperators.EQ, ';').toLong(); // If we cannot find the delimiter in the vector, that means the key is // longer than the vector, fall back to scalar processing - if (keySize == BYTE_SPECIES.vectorByteSize()) { + if (semicolons == 0) { + int keySize = BYTE_SPECIES.length(); while (data.get(ValueLayout.JAVA_BYTE, offset + keySize) != ';') { keySize++; } @@ -235,6 +240,7 @@ private static long iterate(PoorManMap aggrMap, MemorySegment data, long offset) } // We inline the searching of the value in the hash map + int keySize = Long.numberOfTrailingZeros(semicolons); int x; int y; if (keySize >= Integer.BYTES) { @@ -260,7 +266,7 @@ private static long iterate(PoorManMap aggrMap, MemorySegment data, long offset) var nodeKey = ByteVector.fromArray(BYTE_SPECIES, aggrMap.keyData, bucket * PoorManMap.KEY_SIZE); long eqMask = line.compare(VectorOperators.EQ, nodeKey).toLong(); - long validMask = -1L >>> -keySize; + long validMask = semicolons ^ (semicolons - 1); if ((eqMask & validMask) == validMask) { break; } @@ -269,28 +275,63 @@ private static long iterate(PoorManMap aggrMap, MemorySegment data, long offset) return parseDataPoint(aggrMap, node, data, offset + keySize + 1); } - // Process all lines that start in [offset, limit) - private static PoorManMap processFile(MemorySegment data, long offset, long limit) { - var aggrMap = new PoorManMap(); - // Find the start of a new line - if (offset != 0) { - offset--; - while (offset < limit) { - if (data.get(ValueLayout.JAVA_BYTE, offset++) == '\n') { - break; - } + private static long findOffset(MemorySegment data, long offset, long limit) { + if (offset == 0) { + return offset; + } + + offset--; + while (offset < limit) { + if (data.get(ValueLayout.JAVA_BYTE, offset++) == '\n') { + break; } } + return offset; + } - // If there is no line starting in this segment, just return + // Process all lines that start in [offset, limit) + private static PoorManMap processFile(MemorySegment data, long offset, long limit) { + var aggrMap = new PoorManMap(); if (offset == limit) { return aggrMap; } + int batches = 2; + long batchSize = Math.ceilDiv(limit - offset, batches); + long offset0 = offset; + long offset1 = offset + batchSize; + long limit0 = Math.min(offset1, limit); + long limit1 = limit; - // The main loop, optimized for speed - while (offset < limit - Math.max(BYTE_SPECIES.vectorByteSize(), - Long.BYTES + 1 + KEY_MAX_SIZE)) { - offset = iterate(aggrMap, data, offset); + // Find the start of a new line + offset0 = findOffset(data, offset0, limit0); + offset1 = findOffset(data, offset1, limit1); + + long mainLoopMinWidth = Math.max(BYTE_SPECIES.vectorByteSize(), KEY_MAX_SIZE + 1 + Long.BYTES); + if (limit1 - offset1 < mainLoopMinWidth) { + offset = findOffset(data, offset, limit); + while (offset < limit - mainLoopMinWidth) { + offset = iterate(aggrMap, data, offset); + } + } + else { + while (true) { + boolean finish = false; + if (offset0 < limit0) { + offset0 = iterate(aggrMap, data, offset0); + } + else { + finish = true; + } + if (offset1 < limit1 - mainLoopMinWidth) { + offset1 = iterate(aggrMap, data, offset1); + } + else { + if (finish) { + break; + } + } + } + offset = offset1; } // Now we are at the tail, just be simple diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java b/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java index 498369410..4b1fc0ddf 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java @@ -91,10 +91,12 @@ private static class PoorManMap { void observe(long entryOffset, long value) { long baseOffset = Unsafe.ARRAY_BYTE_BASE_OFFSET + entryOffset; - UNSAFE.putShort(this.data, baseOffset + MIN_OFFSET, - (short) Math.min(value, UNSAFE.getShort(this.data, baseOffset + MIN_OFFSET))); - UNSAFE.putShort(this.data, baseOffset + MAX_OFFSET, - (short) Math.max(value, UNSAFE.getShort(this.data, baseOffset + MAX_OFFSET))); + if (UNSAFE.getShort(this.data, baseOffset + MIN_OFFSET) > value) { + UNSAFE.putShort(this.data, baseOffset + MIN_OFFSET, (short) value); + } + if (UNSAFE.getShort(this.data, baseOffset + MAX_OFFSET) < value) { + UNSAFE.putShort(this.data, baseOffset + MAX_OFFSET, (short) value); + } UNSAFE.putLong(this.data, baseOffset + SUM_OFFSET, value + UNSAFE.getLong(this.data, baseOffset + SUM_OFFSET)); UNSAFE.putLong(this.data, baseOffset + COUNT_OFFSET, @@ -307,31 +309,70 @@ private static long iterate(PoorManMap aggrMap, long address) { return parseDataPoint(aggrMap, entryOffset, address + keySize + 1); } + private static long findOffset(long base, long offset, long limit) { + if (offset == 0) { + return offset; + } + + offset--; + while (offset < limit) { + if (UNSAFE.getByte(base + (offset++)) == '\n') { + break; + } + } + return offset; + } + // Process all lines that start in [offset, limit) private static PoorManMap processFile(MemorySegment data, long offset, long limit) { var aggrMap = new PoorManMap(); + if (offset == limit) { + return aggrMap; + } long base = data.address(); - long begin = base + offset; - long end = base + limit; + int batches = 2; + long batchSize = Math.ceilDiv(limit - offset, batches); + long offset0 = offset; + long offset1 = offset + batchSize; + long limit0 = Math.min(offset1, limit); + long limit1 = limit; + // Find the start of a new line - if (offset != 0) { - begin--; - while (begin < end) { - if (UNSAFE.getByte(begin++) == '\n') { - break; - } - } - } + offset0 = findOffset(base, offset0, limit0); + offset1 = findOffset(base, offset1, limit1); - // If there is no line starting in this segment, just return - if (begin == end) { - return aggrMap; + long begin; + long end = base + limit; + long mainLoopMinWidth = Math.max(BYTE_SPECIES.vectorByteSize(), KEY_MAX_SIZE + 1 + Long.BYTES); + if (limit1 - offset1 < mainLoopMinWidth) { + begin = base + findOffset(base, offset, limit); + while (begin < end - mainLoopMinWidth) { + begin = iterate(aggrMap, begin); + } } - - // The main loop, optimized for speed - while (begin < end - Math.max(BYTE_SPECIES.vectorByteSize(), - Long.BYTES + 1 + KEY_MAX_SIZE)) { - begin = iterate(aggrMap, begin); + else { + long begin0 = base + offset0; + long begin1 = base + offset1; + long end0 = base + limit0; + long end1 = base + limit1; + while (true) { + boolean finish = false; + if (begin0 < end0) { + begin0 = iterate(aggrMap, begin0); + } + else { + finish = true; + } + if (begin1 < end1 - mainLoopMinWidth) { + begin1 = iterate(aggrMap, begin1); + } + else { + if (finish) { + break; + } + } + } + begin = begin1; } // Now we are at the tail, just be simple diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_mtopolnik.java b/src/main/java/dev/morling/onebrc/CalculateAverage_mtopolnik.java index fe487fcda..61294a4f9 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_mtopolnik.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_mtopolnik.java @@ -29,18 +29,15 @@ import java.util.ArrayList; import java.util.Arrays; +import static java.lang.ProcessBuilder.Redirect.PIPE; +import static java.util.Arrays.asList; + public class CalculateAverage_mtopolnik { private static final Unsafe UNSAFE = unsafe(); private static final int MAX_NAME_LEN = 100; private static final int STATS_TABLE_SIZE = 1 << 16; private static final int TABLE_INDEX_MASK = STATS_TABLE_SIZE - 1; private static final String MEASUREMENTS_TXT = "measurements.txt"; - private static final byte SEMICOLON = ';'; - private static final long BROADCAST_SEMICOLON = broadcastByte(SEMICOLON); - - // These two are just informative, I let the IDE calculate them for me - private static final long NATIVE_MEM_PER_THREAD = StatsAccessor.SIZEOF * STATS_TABLE_SIZE; - private static final long NATIVE_MEM_ON_8_THREADS = 8 * NATIVE_MEM_PER_THREAD; private static Unsafe unsafe() { try { @@ -53,31 +50,23 @@ private static Unsafe unsafe() { } } - static class StationStats implements Comparable { - String name; - long sum; - int count; - int min; - int max; - - @Override - public String toString() { - return String.format("%s=%.1f/%.1f/%.1f", name, min / 10.0, Math.round((double) sum / count) / 10.0, max / 10.0); - } - - @Override - public boolean equals(Object that) { - return that.getClass() == StationStats.class && ((StationStats) that).name.equals(this.name); - } - - @Override - public int compareTo(StationStats that) { - return name.compareTo(that.name); - } - } - public static void main(String[] args) throws Exception { - calculate(); + if (args.length >= 1 && args[0].equals("--worker")) { + calculate(); + System.out.close(); + return; + } + var curProcInfo = ProcessHandle.current().info(); + var cmdLine = new ArrayList(); + cmdLine.add(curProcInfo.command().get()); + cmdLine.addAll(asList(curProcInfo.arguments().get())); + cmdLine.add("--worker"); + var process = new ProcessBuilder() + .command(cmdLine) + .inheritIO().redirectOutput(PIPE) + .start() + .getInputStream().transferTo(System.out); + } static void calculate() throws Exception { @@ -113,7 +102,6 @@ static void calculate() throws Exception { } private static class ChunkProcessor implements Runnable { - private static final long NAMEBUF_SIZE = 2 * Long.BYTES; private static final int CACHELINE_SIZE = 64; private final long inputBase; @@ -122,8 +110,6 @@ private static class ChunkProcessor implements Runnable { private final int myIndex; private StatsAccessor stats; - private long nameBufBase; - private long cursor; ChunkProcessor(long chunkStart, long chunkLimit, StationStats[][] results, int myIndex) { this.inputBase = chunkStart; @@ -138,16 +124,12 @@ public void run() { long totalAllocated = 0; String threadName = Thread.currentThread().getName(); long statsByteSize = STATS_TABLE_SIZE * StatsAccessor.SIZEOF; - var diagnosticString = String.format("Thread %s needs %,d bytes, managed to allocate before OOM: ", - threadName, statsByteSize + NAMEBUF_SIZE); + var diagnosticString = String.format("Thread %s needs %,d bytes", threadName, statsByteSize); try { stats = new StatsAccessor(confinedArena.allocate(statsByteSize, CACHELINE_SIZE)); - totalAllocated = statsByteSize; - nameBufBase = confinedArena.allocate(NAMEBUF_SIZE).address(); } catch (OutOfMemoryError e) { System.err.print(diagnosticString); - System.err.println(totalAllocated); throw e; } processChunk(); @@ -156,197 +138,109 @@ public void run() { } private void processChunk() { + final long inputSize = this.inputSize; + final long inputBase = this.inputBase; + long cursor = 0; + long lastNameWord; while (cursor < inputSize) { - long word1; - long word2; - if (cursor + 2 * Long.BYTES <= inputSize) { - word1 = UNSAFE.getLong(inputBase + cursor); - word2 = UNSAFE.getLong(inputBase + cursor + Long.BYTES); + long nameStartAddress = inputBase + cursor; + long nameWord0 = UNSAFE.getLong(nameStartAddress); + long nameWord1 = 0; + long matchBits = semicolonMatchBits(nameWord0); + long hash; + int nameLen; + int temperature; + if (matchBits != 0) { + nameLen = nameLen(matchBits); + nameWord0 = maskWord(nameWord0, matchBits); + cursor += nameLen; + long tempWord = UNSAFE.getLong(inputBase + cursor); + int dotPos = dotPos(tempWord); + temperature = parseTemperature(tempWord, dotPos); + cursor += (dotPos >> 3) + 3; + hash = hash(nameWord0); + if (stats.gotoName0(hash, nameWord0)) { + stats.observe(temperature); + continue; + } + lastNameWord = nameWord0; } - else { - UNSAFE.putLong(nameBufBase, 0); - UNSAFE.putLong(nameBufBase + Long.BYTES, 0); - UNSAFE.copyMemory(inputBase + cursor, nameBufBase, Long.min(NAMEBUF_SIZE, inputSize - cursor)); - word1 = UNSAFE.getLong(nameBufBase); - word2 = UNSAFE.getLong(nameBufBase + Long.BYTES); + else { // nameLen > 8 + hash = hash(nameWord0); + nameWord1 = UNSAFE.getLong(nameStartAddress + Long.BYTES); + matchBits = semicolonMatchBits(nameWord1); + if (matchBits != 0) { + nameLen = Long.BYTES + nameLen(matchBits); + nameWord1 = maskWord(nameWord1, matchBits); + cursor += nameLen; + long tempWord = UNSAFE.getLong(inputBase + cursor); + int dotPos = dotPos(tempWord); + temperature = parseTemperature(tempWord, dotPos); + cursor += (dotPos >> 3) + 3; + if (stats.gotoName1(hash, nameWord0, nameWord1)) { + stats.observe(temperature); + continue; + } + lastNameWord = nameWord1; + } + else { // nameLen > 16 + nameLen = 2 * Long.BYTES; + while (true) { + lastNameWord = UNSAFE.getLong(nameStartAddress + nameLen); + matchBits = semicolonMatchBits(lastNameWord); + if (matchBits != 0) { + nameLen += nameLen(matchBits); + lastNameWord = maskWord(lastNameWord, matchBits); + cursor += nameLen; + long tempWord = UNSAFE.getLong(inputBase + cursor); + int dotPos = dotPos(tempWord); + temperature = parseTemperature(tempWord, dotPos); + cursor += (dotPos >> 3) + 3; + break; + } + nameLen += Long.BYTES; + } + } } - long posOfSemicolon = posOfSemicolon(word1, word2); - word1 = maskWord(word1, posOfSemicolon - cursor); - word2 = maskWord(word2, posOfSemicolon - cursor - Long.BYTES); - long hash = hash(word1); - long namePos = cursor; - long nameLen = posOfSemicolon - cursor; - assert nameLen <= 100 : "nameLen > 100"; - int temperature = parseTemperatureAndAdvanceCursor(posOfSemicolon); - updateStats(hash, namePos, nameLen, word1, word2, temperature); + stats.gotoAndObserve(hash, nameStartAddress, nameLen, nameWord0, nameWord1, lastNameWord, temperature); } } - private void updateStats(long hash, long namePos, long nameLen, long nameWord1, long nameWord2, int temperature) { - int tableIndex = (int) (hash & TABLE_INDEX_MASK); - while (true) { - stats.gotoIndex(tableIndex); - if (stats.hash() == hash && stats.nameLen() == nameLen - && nameEquals(stats.nameAddress(), inputBase + namePos, nameLen, nameWord1, nameWord2)) { - stats.setSum(stats.sum() + temperature); - stats.setCount(stats.count() + 1); - stats.setMin((short) Integer.min(stats.min(), temperature)); - stats.setMax((short) Integer.max(stats.max(), temperature)); - return; - } - if (stats.nameLen() != 0) { - tableIndex = (tableIndex + 1) & TABLE_INDEX_MASK; - continue; - } - stats.setHash(hash); - stats.setNameLen((int) nameLen); - stats.setSum(temperature); - stats.setCount(1); - stats.setMin((short) temperature); - stats.setMax((short) temperature); - UNSAFE.copyMemory(inputBase + namePos, stats.nameAddress(), nameLen); - return; - } + private static final long BROADCAST_SEMICOLON = 0x3B3B3B3B3B3B3B3BL; + private static final long BROADCAST_0x01 = 0x0101010101010101L; + private static final long BROADCAST_0x80 = 0x8080808080808080L; + + private static long semicolonMatchBits(long word) { + long diff = word ^ BROADCAST_SEMICOLON; + return (diff - BROADCAST_0x01) & (~diff & BROADCAST_0x80); } - private int parseTemperatureAndAdvanceCursor(long semicolonPos) { - long startOffset = semicolonPos + 1; - if (startOffset <= inputSize - Long.BYTES) { - return parseTemperatureSwarAndAdvanceCursor(startOffset); - } - return parseTemperatureSimpleAndAdvanceCursor(startOffset); + // credit: artsiomkorzun + private static long maskWord(long word, long matchBits) { + long mask = matchBits ^ (matchBits - 1); + return word & mask; } - // Credit: merykitty - private int parseTemperatureSwarAndAdvanceCursor(long startOffset) { - long word = UNSAFE.getLong(inputBase + startOffset); - final long negated = ~word; - final int dotPos = Long.numberOfTrailingZeros(negated & 0x10101000); - final long signed = (negated << 59) >> 63; + // credit: merykitty + private static int dotPos(long word) { + return Long.numberOfTrailingZeros(~word & 0x10101000); + } + + // credit: merykitty + private static int parseTemperature(long word, int dotPos) { + final long signed = (~word << 59) >> 63; final long removeSignMask = ~(signed & 0xFF); final long digits = ((word & removeSignMask) << (28 - dotPos)) & 0x0F000F0F00L; final long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; - final int temperature = (int) ((absValue ^ signed) - signed); - cursor = startOffset + (dotPos / 8) + 3; - return temperature; - } - - private int parseTemperatureSimpleAndAdvanceCursor(long startOffset) { - final byte minus = (byte) '-'; - final byte zero = (byte) '0'; - final byte dot = (byte) '.'; - - // Temperature plus the following newline is at least 4 chars, so this is always safe: - int fourCh = UNSAFE.getInt(inputBase + startOffset); - final int mask = 0xFF; - byte ch = (byte) (fourCh & mask); - int shift = 0; - int temperature; - int sign; - if (ch == minus) { - sign = -1; - shift += 8; - ch = (byte) ((fourCh & (mask << shift)) >>> shift); - } - else { - sign = 1; - } - temperature = ch - zero; - shift += 8; - ch = (byte) ((fourCh & (mask << shift)) >>> shift); - if (ch == dot) { - shift += 8; - ch = (byte) ((fourCh & (mask << shift)) >>> shift); - } - else { - temperature = 10 * temperature + (ch - zero); - shift += 16; - // The last character may be past the four loaded bytes, load it from memory. - // Checking that with another `if` is self-defeating for performance. - ch = UNSAFE.getByte(inputBase + startOffset + (shift / 8)); - } - temperature = 10 * temperature + (ch - zero); - // `shift` holds the number of bits in the temperature field. - // A newline character follows the temperature, and so we advance - // the cursor past the newline to the start of the next line. - cursor = startOffset + (shift / 8) + 2; - return sign * temperature; - } - - private static long hash(long word1) { - long seed = 0x51_7c_c1_b7_27_22_0a_95L; - int rotDist = 17; - - long hash = word1; - hash *= seed; - hash = Long.rotateLeft(hash, rotDist); - // hash ^= word2; - // hash *= seed; - // hash = Long.rotateLeft(hash, rotDist); - return hash; - } - - private static boolean nameEquals(long statsAddr, long inputAddr, long len, long inputWord1, long inputWord2) { - boolean mismatch1 = maskWord(inputWord1, len) != UNSAFE.getLong(statsAddr); - boolean mismatch2 = maskWord(inputWord2, len - Long.BYTES) != UNSAFE.getLong(statsAddr + Long.BYTES); - if (mismatch1 | mismatch2) { - return false; - } - for (int i = 2 * Long.BYTES; i < len; i++) { - if (UNSAFE.getByte(inputAddr + i) != UNSAFE.getByte(statsAddr + i)) { - return false; - } - } - return true; - } - - private static long maskWord(long word, long len) { - long halfShiftDistance = Long.max(0, Long.BYTES - len) << 2; - long mask = (~0L >>> halfShiftDistance) >>> halfShiftDistance; // avoid Java trap of shiftDist % 64 - return word & mask; + return (int) ((absValue ^ signed) - signed); } - private static final long BROADCAST_0x01 = broadcastByte(0x01); - private static final long BROADCAST_0x80 = broadcastByte(0x80); - - // Adapted from https://jameshfisher.com/2017/01/24/bitwise-check-for-zero-byte/ - // and https://github.com/ashvardanian/StringZilla/blob/14e7a78edcc16b031c06b375aac1f66d8f19d45a/stringzilla/stringzilla.h#L139-L169 - long posOfSemicolon(long word1, long word2) { - long diff = word1 ^ BROADCAST_SEMICOLON; - long matchBits1 = (diff - BROADCAST_0x01) & ~diff & BROADCAST_0x80; - diff = word2 ^ BROADCAST_SEMICOLON; - long matchBits2 = (diff - BROADCAST_0x01) & ~diff & BROADCAST_0x80; - if ((matchBits1 | matchBits2) != 0) { - int trailing1 = Long.numberOfTrailingZeros(matchBits1); - int match1IsNonZero = trailing1 & 63; - match1IsNonZero |= match1IsNonZero >>> 3; - match1IsNonZero |= match1IsNonZero >>> 1; - match1IsNonZero |= match1IsNonZero >>> 1; - // Now match1IsNonZero is 1 if it's non-zero, else 0. Use it to - // raise the lowest bit in traling2 if trailing1 is nonzero. This forces - // trailing2 to be zero if trailing1 is non-zero. - int trailing2 = Long.numberOfTrailingZeros(matchBits2 | match1IsNonZero) & 63; - return cursor + ((trailing1 | trailing2) >> 3); - } - long offset = cursor + 2 * Long.BYTES; - for (; offset <= inputSize - Long.BYTES; offset += Long.BYTES) { - var block = UNSAFE.getLong(inputBase + offset); - diff = block ^ BROADCAST_SEMICOLON; - long matchBits = (diff - BROADCAST_0x01) & ~diff & BROADCAST_0x80; - if (matchBits != 0) { - return offset + Long.numberOfTrailingZeros(matchBits) / 8; - } - } - return posOfSemicolonSimple(offset); + private static int nameLen(long separator) { + return (Long.numberOfTrailingZeros(separator) >>> 3) + 1; } - private long posOfSemicolonSimple(long offset) { - for (; offset < inputSize; offset++) { - if (UNSAFE.getByte(inputBase + offset) == SEMICOLON) { - return offset; - } - } - throw new RuntimeException("Semicolon not found"); + private static long hash(long word) { + return Long.rotateLeft(word * 0x51_7c_c1_b7_27_22_0a_95L, 17); } // Copies the results from native memory to Java heap and puts them into the results array. @@ -374,22 +268,6 @@ private void exportResults() { Arrays.sort(exported); results[myIndex] = exported; } - - private final ByteBuffer buf = ByteBuffer.allocate(8).order(ByteOrder.nativeOrder()); - - private String longToString(long word) { - buf.clear(); - buf.putLong(word); - return new String(buf.array(), StandardCharsets.UTF_8); // + "|" + Arrays.toString(buf.array()); - } - } - - private static long broadcastByte(int b) { - long nnnnnnnn = b; - nnnnnnnn |= nnnnnnnn << 8; - nnnnnnnn |= nnnnnnnn << 16; - nnnnnnnn |= nnnnnnnn << 32; - return nnnnnnnn; } static class StatsAccessor { @@ -417,6 +295,16 @@ void gotoIndex(int index) { slotBase = address + index * SIZEOF; } + private boolean gotoName0(long hash, long nameWord0) { + gotoIndex((int) (hash & TABLE_INDEX_MASK)); + return hash() == hash && nameWord0() == nameWord0; + } + + private boolean gotoName1(long hash, long nameWord0, long nameWord1) { + gotoIndex((int) (hash & TABLE_INDEX_MASK)); + return hash() == hash && nameWord0() == nameWord0 && nameWord1() == nameWord1; + } + long hash() { return UNSAFE.getLong(slotBase + HASH_OFFSET); } @@ -445,9 +333,17 @@ long nameAddress() { return slotBase + NAME_OFFSET; } + long nameWord0() { + return UNSAFE.getLong(nameAddress()); + } + + long nameWord1() { + return UNSAFE.getLong(nameAddress() + Long.BYTES); + } + String exportNameString() { - final var bytes = new byte[nameLen()]; - UNSAFE.copyMemory(null, nameAddress(), bytes, ARRAY_BASE_OFFSET, nameLen()); + final var bytes = new byte[nameLen() - 1]; + UNSAFE.copyMemory(null, nameAddress(), bytes, ARRAY_BASE_OFFSET, bytes.length); return new String(bytes, StandardCharsets.UTF_8); } @@ -474,6 +370,59 @@ void setMin(short min) { void setMax(short max) { UNSAFE.putShort(slotBase + MAX_OFFSET, max); } + + void gotoAndObserve( + long hash, long nameStartAddress, int nameLen, long nameWord0, long nameWord1, long lastNameWord, + int temperature) { + int tableIndex = (int) (hash & TABLE_INDEX_MASK); + while (true) { + gotoIndex(tableIndex); + if (hash() == hash && nameLen() == nameLen && nameEquals( + nameAddress(), nameStartAddress, nameLen, nameWord0, nameWord1, lastNameWord)) { + observe(temperature); + break; + } + if (nameLen() != 0) { + tableIndex = (tableIndex + 1) & TABLE_INDEX_MASK; + continue; + } + initialize(hash, nameLen, nameStartAddress, temperature); + break; + } + } + + void initialize(long hash, long nameLen, long nameStartAddress, int temperature) { + setHash(hash); + setNameLen((int) nameLen); + setSum(temperature); + setCount(1); + setMin((short) temperature); + setMax((short) temperature); + UNSAFE.copyMemory(nameStartAddress, nameAddress(), nameLen); + } + + void observe(int temperature) { + setSum(sum() + temperature); + setCount(count() + 1); + setMin((short) Integer.min(min(), temperature)); + setMax((short) Integer.max(max(), temperature)); + } + + private static boolean nameEquals( + long statsAddr, long inputAddr, long len, long inputWord1, long inputWord2, long lastInputWord) { + boolean mismatch1 = inputWord1 != UNSAFE.getLong(statsAddr); + boolean mismatch2 = inputWord2 != UNSAFE.getLong(statsAddr + Long.BYTES); + if (len <= 2 * Long.BYTES) { + return !(mismatch1 | mismatch2); + } + int i = 2 * Long.BYTES; + for (; i <= len - Long.BYTES; i += Long.BYTES) { + if (UNSAFE.getLong(inputAddr + i) != UNSAFE.getLong(statsAddr + i)) { + return false; + } + } + return i == len || lastInputWord == UNSAFE.getLong(statsAddr + i); + } } private static void mergeSortAndPrint(StationStats[][] results) { @@ -527,4 +476,34 @@ else if (min.equals(curr)) { } System.out.println('}'); } + + static class StationStats implements Comparable { + String name; + long sum; + int count; + int min; + int max; + + @Override + public String toString() { + return String.format("%s=%.1f/%.1f/%.1f", name, min / 10.0, Math.round((double) sum / count) / 10.0, max / 10.0); + } + + @Override + public boolean equals(Object that) { + return that.getClass() == StationStats.class && ((StationStats) that).name.equals(this.name); + } + + @Override + public int compareTo(StationStats that) { + return name.compareTo(that.name); + } + } + + private static String longToString(long word) { + final ByteBuffer buf = ByteBuffer.allocate(8).order(ByteOrder.nativeOrder()); + buf.clear(); + buf.putLong(word); + return new String(buf.array(), StandardCharsets.UTF_8); + } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_netrunnereve.java b/src/main/java/dev/morling/onebrc/CalculateAverage_netrunnereve.java index 7ff3cdd16..13919cfb6 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_netrunnereve.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_netrunnereve.java @@ -21,11 +21,18 @@ import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.CountDownLatch; import java.lang.Math; public class CalculateAverage_netrunnereve { private static final String FILE = "./measurements.txt"; + private static final int NUM_THREADS = 8; // test machine + private static final int LEN_EXTEND = 200; // guarantees a newline + private static final int HASHT_SIZE = 16384; // size of hash table, adjust tradeoff between colisions and cache utilization + private static final int DJB2_INIT = 5831; private static class MeasurementAggregator { // min, max, sum stored as 0.1/unit private MeasurementAggregator next = null; // linked list of entries for handling hash colisions @@ -36,62 +43,92 @@ private static class MeasurementAggregator { // min, max, sum stored as 0.1/unit private int count = 0; } + private static class ThreadCalcs { + private MeasurementAggregator[] hashSpace = null; + private String[] staArr = null; + private int numStations = 0; + } + // djb2 hash private static int calc_hash(byte[] input, int len) { - int hash = 5831; + int hash = DJB2_INIT; for (int i = 0; i < len; i++) { hash = ((hash << 5) + hash) + Byte.toUnsignedInt(input[i]); } - return Math.abs(hash % 16384); + return Math.abs(hash % HASHT_SIZE); } - public static void main(String[] args) { - try { - RandomAccessFile mraf = new RandomAccessFile(FILE, "r"); - long fileSize = mraf.getChannel().size(); - long bufSize = Integer.MAX_VALUE; // Java requirement is <= Integer.MAX_VALUE - int numStations = 0; + private static class ThreadedParser extends Thread { + private MappedByteBuffer mbuf; + private int mbs; + private ThreadCalcs[] threadOut; + private int threadID; + private CountDownLatch tpLatch; + + private ThreadedParser(MappedByteBuffer mbuf, int mbs, ThreadCalcs[] threadOut, int threadID, CountDownLatch tpLatch) { + this.mbuf = mbuf; + this.mbs = mbs; + this.threadOut = threadOut; + this.threadID = threadID; + this.tpLatch = tpLatch; + } - MeasurementAggregator[] hashSpace = new MeasurementAggregator[16384]; // 14-bit hash + public void run() { + MeasurementAggregator[] hashSpace = new MeasurementAggregator[HASHT_SIZE]; // hash table byte[] scratch = new byte[100]; // <= 100 characters in station name String[] staArr = new String[10000]; // max 10000 station names MeasurementAggregator ma = null; - long h = 0; - while (h < fileSize) { - long length = bufSize; - boolean finished = false; - if (h + length > fileSize) { - length = fileSize - h; - finished = true; - } + int numStations = 0; + int negMul = 1; + int head = 0; + int tempCnt = -1; // 0 if 1 digit measurement, 1 if 2 digit + int hash = DJB2_INIT; // do calc_hash manually in loop - MappedByteBuffer mbuf = mraf.getChannel().map(FileChannel.MapMode.READ_ONLY, h, length); - int mbs = mbuf.capacity(); + int i = 0; // byte by byte iterator + while (true) { + byte cur = mbuf.get(i); + if (cur == 59) { // ; + hash = Math.abs(hash % HASHT_SIZE); - // check for last newline and split there, anything after goes to next buffer - if (!finished) { - for (int i = mbs - 1; true; i--) { - byte cur = mbuf.get(i - 1); - if (cur == 10) { // \n - mbs = i; + // this is faster than filling scratch immediately after each byte is read + int len = i - head; + mbuf.position(head); + mbuf.get(scratch, 0, len); + + ma = hashSpace[hash]; + MeasurementAggregator prev = null; + + while (true) { + if (ma == null) { + ma = new MeasurementAggregator(); + ma.station = Arrays.copyOfRange(scratch, 0, len); + staArr[numStations] = new String(scratch, 0, len, StandardCharsets.UTF_8); + + if (prev != null) { + prev.next = ma; + } + else { + hashSpace[hash] = ma; + } + + numStations++; + break; + } + else if ((len != ma.station.length) || (Arrays.compare(scratch, 0, len, ma.station, 0, len) != 0)) { // hash collision + prev = ma; + ma = ma.next; + } + else { // hit break; } } - } - - boolean state = false; // 0 for station pickup, 1 for measurement pickup - int negMul = 1; - int head = 0; - int tempCnt = -1; // 0 if 1 digit measurement, 1 if 2 digit - for (int i = 0; i < mbs; i++) { - byte cur = mbuf.get(i); - if (state == true) { + i++; + while (true) { + cur = mbuf.get(i); if (cur == 46) { // . - int tempa = mbuf.get(i + 1) - 48; - tempa += (scratch[0] - 48) * (10 + 90 * tempCnt) + (scratch[1] - 48) * (10 * tempCnt); // branchless - tempa *= negMul; + int tempa = (negMul) * ((10 + 90 * tempCnt) * (scratch[0] - 48) + (10 * tempCnt) * (scratch[1] - 48) + (mbuf.get(i + 1) - 48)); // branchless if (tempa < ma.min) { ma.min = tempa; @@ -102,11 +139,13 @@ public static void main(String[] args) { ma.sum += tempa; ma.count++; - i += 2; // go to start of new line - state = false; + // this line is finished! + i += 2; // newline char + hash = DJB2_INIT; negMul = 1; - head = i + 1; + head = i + 1; // start of next line tempCnt = -1; + break; } else if (cur == 45) { // ascii - negMul = -1; @@ -115,74 +154,135 @@ else if (cur == 45) { // ascii - scratch[tempCnt + 1] = cur; tempCnt++; } + i++; } - else if (cur == 59) { // ; - int len = i - head; - - // this is faster than filling scratch immediately after each byte is read - mbuf.position(head); - mbuf.get(scratch, 0, len); - - int hash = calc_hash(scratch, len); - ma = hashSpace[hash]; - MeasurementAggregator prev = null; - - while (true) { - if (ma == null) { - ma = new MeasurementAggregator(); - ma.station = Arrays.copyOfRange(scratch, 0, len); - staArr[numStations] = new String(scratch, 0, len, StandardCharsets.UTF_8); - - if (prev != null) { - prev.next = ma; - } - else { - hashSpace[hash] = ma; - } - - numStations++; - break; - } - else if ((len != ma.station.length) || (Arrays.compare(scratch, 0, len, ma.station, 0, len) != 0)) { // hash collision - prev = ma; - ma = ma.next; - } - else { // hit - break; - } + if (head >= mbs) { + break; + } + } + else { + hash = ((hash << 5) + hash) + Byte.toUnsignedInt(cur); + } + i++; + } + threadOut[threadID] = new ThreadCalcs(); + threadOut[threadID].hashSpace = hashSpace; + threadOut[threadID].staArr = staArr; + threadOut[threadID].numStations = numStations; + tpLatch.countDown(); + } + } + + public static void main(String[] args) { + try { + RandomAccessFile mraf = new RandomAccessFile(FILE, "r"); + long fileSize = mraf.getChannel().size(); + long threadNum = NUM_THREADS; + + long minThreads = (fileSize / Integer.MAX_VALUE) + 1; // minimum # of threads required due to MappedByteBuffer size limit + if (threadNum < minThreads) { + threadNum = minThreads; + } + long bufSize = fileSize / threadNum; + + // don't bother multithreading for small files + if (bufSize < 1000000) { + threadNum = 1; + bufSize = Integer.MAX_VALUE; + } + + ThreadCalcs[] threadOut = new ThreadCalcs[(int) threadNum]; + CountDownLatch tpLatch = new CountDownLatch((int) threadNum); + int threadID = 0; + + long h = 0; + while (h < fileSize) { + long length = bufSize; + boolean finished = false; + + if ((h == 0) && (length + LEN_EXTEND < Integer.MAX_VALUE)) { // add a bit of extra bytes to first thread to avoid generating new thread for the remainder + length += LEN_EXTEND; // arbitary bytes to guarantee a newline somewhere + } + if (h + length > fileSize) { // past the end + length = fileSize - h; + finished = true; + } + + MappedByteBuffer mbuf = mraf.getChannel().map(FileChannel.MapMode.READ_ONLY, h, length); + int mbs = mbuf.capacity(); + + // check for last newline and split there, anything after goes to next buffer + if (!finished) { + for (int i = mbs - 1; true; i--) { + byte cur = mbuf.get(i - 1); + if (cur == 10) { // \n + mbs = i; + break; } - state = true; - head = i + 1; } } + + ThreadedParser tpThr = new ThreadedParser(mbuf, mbs, threadOut, threadID, tpLatch); + tpThr.start(); + h += mbs; + threadID++; } - Arrays.sort(staArr, 0, numStations); + try { + tpLatch.await(); + } + catch (InterruptedException ex) { + System.exit(1); + } + // use treemap to sort and uniquify + Map staMap = new TreeMap<>(); + for (int i = 0; i < threadID; i++) { + for (int j = 0; j < threadOut[i].numStations; j++) { + staMap.put(threadOut[i].staArr[j], false); + } + } + + boolean started = false; String out = "{"; - for (int i = 0; i < numStations; i++) { - byte[] strBuf = staArr[i].getBytes(StandardCharsets.UTF_8); + for (String i : staMap.keySet()) { + if (started) { + out += ", "; + } + else { + started = true; + } + + byte[] strBuf = i.getBytes(StandardCharsets.UTF_8); int hash = calc_hash(strBuf, strBuf.length); - ma = hashSpace[hash]; + MeasurementAggregator mSum = new MeasurementAggregator(); + for (int j = 0; j < threadID; j++) { + MeasurementAggregator ma = threadOut[j].hashSpace[hash]; - while (true) { - if ((strBuf.length != ma.station.length) || (Arrays.compare(strBuf, ma.station) != 0)) { // hash collision - ma = ma.next; - continue; - } - else { // hit - double min = Math.round(Double.valueOf(ma.min)) / 10.0; - double avg = Math.round(Double.valueOf(ma.sum) / Double.valueOf(ma.count)) / 10.0; - double max = Math.round(Double.valueOf(ma.max)) / 10.0; - out += staArr[i] + "=" + min + "/" + avg + "/" + max; - if (i != (numStations - 1)) { - out += ", "; + while (true) { + if ((strBuf.length != ma.station.length) || (Arrays.compare(strBuf, ma.station) != 0)) { // hash collision + ma = ma.next; + continue; + } + else { // hit + if (ma.min < mSum.min) { + mSum.min = ma.min; + } + if (ma.max > mSum.max) { + mSum.max = ma.max; + } + mSum.sum += ma.sum; + mSum.count += ma.count; + break; } - break; } } + double min = Math.round(Double.valueOf(mSum.min)) / 10.0; + double avg = Math.round(Double.valueOf(mSum.sum) / Double.valueOf(mSum.count)) / 10.0; + double max = Math.round(Double.valueOf(mSum.max)) / 10.0; + out += i + "=" + min + "/" + avg + "/" + max; } out += "}\n"; System.out.print(out); diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_parkertimmins.java b/src/main/java/dev/morling/onebrc/CalculateAverage_parkertimmins.java index 71412fb78..c689ff1ad 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_parkertimmins.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_parkertimmins.java @@ -16,28 +16,21 @@ package dev.morling.onebrc; import jdk.incubator.vector.ByteVector; -import jdk.incubator.vector.VectorMask; -import jdk.incubator.vector.VectorOperators; import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; import java.lang.foreign.ValueLayout; -import java.lang.reflect.Array; -import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.io.IOException; import java.io.RandomAccessFile; -import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.util.*; import java.util.concurrent.atomic.AtomicLong; -import java.util.zip.CRC32C; public class CalculateAverage_parkertimmins { private static final String FILE = "./measurements.txt"; - // private static final String FILE = "./full_measurements.no_license"; private static record ResultRow(double min, double mean, double max) { public String toString() { @@ -51,14 +44,16 @@ private double round(double value) { static class OpenHashTable { static class Entry { + + // key always stored as multiple of 32 bytes byte[] key; - short min; - short max; + byte keyLen; + short min = Short.MAX_VALUE; + short max = Short.MIN_VALUE; long sum = 0; long count = 0; - int hash; - void merge(OpenHashTable.Entry other) { + void merge(Entry other) { min = (short) Math.min(min, other.min); max = (short) Math.max(max, other.max); sum += other.sum; @@ -80,15 +75,20 @@ void add(byte[] buf, int sLen, short val, int hash) { // key not present, so add it if (entry == null) { entry = entries[idx] = new Entry(); - entry.key = Arrays.copyOf(buf, sLen); + + int rem = sLen % 32; + int arrayLen = rem == 0 ? sLen : sLen + 32 - rem; + entry.key = Arrays.copyOf(buf, arrayLen); + Arrays.fill(entry.key, sLen, arrayLen, (byte) 0); + entry.keyLen = (byte) sLen; + entry.min = entry.max = val; entry.sum += val; entry.count++; - entry.hash = hash; break; } else { - if (entry.hash == hash && entry.key.length == sLen && Arrays.equals(entry.key, 0, sLen, buf, 0, sLen)) { + if (entry.keyLen == sLen && eq(buf, entry.key, entry.keyLen)) { entry.min = (short) Math.min(entry.min, val); entry.max = (short) Math.max(entry.max, val); entry.sum += val; @@ -103,6 +103,23 @@ void add(byte[] buf, int sLen, short val, int hash) { } } + static boolean eq(byte[] buf, byte[] entryKey, int sLen) { + int needed = sLen; + for (int offset = 0; offset <= 96; offset += 32) { + var a = ByteVector.fromArray(ByteVector.SPECIES_256, buf, offset); + var b = ByteVector.fromArray(ByteVector.SPECIES_256, entryKey, offset); + int matches = a.eq(b).not().firstTrue(); + if (needed <= 32) { + return matches >= needed; + } + else if (matches < 32) { + return false; + } + needed -= 32; + } + return false; + } + static long findNextEntryStart(MemorySegment ms, long offset) { long curr = offset; while (ms.get(ValueLayout.JAVA_BYTE, curr) != '\n') { @@ -112,8 +129,17 @@ static long findNextEntryStart(MemorySegment ms, long offset) { return curr; } - static short[] digits10s = { 0, 100, 200, 300, 400, 500, 600, 700, 800, 900 }; - static short[] digits1s = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 }; + static short[] digits2s = new short[256]; + static short[] digits1s = new short[256]; + static short[] digits0s = new short[256]; + + static { + for (int i = 0; i < 10; ++i) { + digits2s[i + ((int) '0')] = (short) (i * 100); + digits1s[i + ((int) '0')] = (short) (i * 10); + digits0s[i + ((int) '0')] = (short) i; + } + } static void processRangeScalar(MemorySegment ms, long start, long end, final OpenHashTable localAgg) { byte[] buf = new byte[128]; @@ -139,9 +165,10 @@ static void processRangeScalar(MemorySegment ms, long start, long end, final Ope boolean neg = ms.get(ValueLayout.JAVA_BYTE, tempIdx) == '-'; boolean twoDig = ms.get(ValueLayout.JAVA_BYTE, tempIdx + 1 + (neg ? 1 : 0)) == '.'; int len = 3 + (neg ? 1 : 0) + (twoDig ? 0 : 1); - int d0 = ((char) ms.get(ValueLayout.JAVA_BYTE, tempIdx + len - 1)) - '0'; - int d1 = ((char) ms.get(ValueLayout.JAVA_BYTE, tempIdx + len - 3)) - '0'; - int base = d0 + digits1s[d1] + (twoDig ? 0 : digits10s[((char) ms.get(ValueLayout.JAVA_BYTE, tempIdx + len - 4)) - '0']); + int d0 = ((char) ms.get(ValueLayout.JAVA_BYTE, tempIdx + len - 1)); + int d1 = ((char) ms.get(ValueLayout.JAVA_BYTE, tempIdx + len - 3)); + int d2 = ((char) ms.get(ValueLayout.JAVA_BYTE, tempIdx + len - 4)); // could be - or \n + int base = digits0s[d0] + digits1s[d1] + digits2s[d2]; short temp = (short) (neg ? -base : base); localAgg.add(buf, sLen, temp, hash); @@ -150,100 +177,55 @@ static void processRangeScalar(MemorySegment ms, long start, long end, final Ope } static int hash(byte[] buf, int sLen) { - // TODO find a hash that works directly from byte array - // if shorter than 8 chars, mask out upper bits - long mask = sLen < 8 ? -(1L << ((8 - sLen) << 3)) : 0xFFFFFFFFL; - long val = ((buf[0] & 0xffL) << 56) | ((buf[1] & 0xffL) << 48) | ((buf[2] & 0xffL) << 40) | ((buf[3] & 0xffL) << 32) | ((buf[4] & 0xffL) << 24) - | ((buf[5] & 0xffL) << 16) | ((buf[6] & 0xFFL) << 8) | (buf[7] & 0xffL); + int shift = Math.max(0, 8 - sLen) << 3; + long mask = (~0L) >>> shift; + long val = ((buf[7] & 0xffL) << 56) | ((buf[6] & 0xffL) << 48) | ((buf[5] & 0xffL) << 40) | ((buf[4] & 0xffL) << 32) | ((buf[3] & 0xffL) << 24) + | ((buf[2] & 0xffL) << 16) | ((buf[1] & 0xFFL) << 8) | (buf[0] & 0xffL); val &= mask; - - // also worth trying: https://lemire.me/blog/2015/10/22/faster-hashing-without-effort/ // lemire: https://lemire.me/blog/2023/07/14/recognizing-string-prefixes-with-simd-instructions/ int hash = (int) (((((val >> 32) ^ val) & 0xffffffffL) * 3523216699L) >> 32); return hash; } - static void processRangeSIMD(MemorySegment ms, boolean frontPad, boolean backPad, long start, long end, final OpenHashTable localAgg) { + static void processRangeSIMD(MemorySegment ms, boolean isFirst, boolean isLast, long start, long end, final OpenHashTable localAgg) { byte[] buf = new byte[128]; - long curr = frontPad ? findNextEntryStart(ms, start) : start; - long limit = end - padding; + long curr = isFirst ? start : findNextEntryStart(ms, start); + long limit = isLast ? end - padding : end; - var needle = ByteVector.broadcast(ByteVector.SPECIES_256, ';'); while (curr < limit) { - - int segStart = 0; - int sLen; - - while (true) { - var section = ByteVector.fromMemorySegment(ByteVector.SPECIES_256, ms, curr + segStart, ByteOrder.LITTLE_ENDIAN); - section.intoArray(buf, segStart); - VectorMask matches = section.compare(VectorOperators.EQ, needle); - int idx = matches.firstTrue(); + int nl = 0; + for (int offset = 0; offset < 128; offset += 32) { + ByteVector section = ByteVector.fromMemorySegment(ByteVector.SPECIES_256, ms, curr + offset, ByteOrder.LITTLE_ENDIAN); + section.intoArray(buf, offset); + var idx = section.eq((byte) '\n').firstTrue(); if (idx != 32) { - sLen = segStart + idx; + nl = offset + idx; break; } - segStart += 32; } - int hash = hash(buf, sLen); - - curr += sLen; - curr++; // semicolon - - long tempIdx = curr; - boolean neg = ms.get(ValueLayout.JAVA_BYTE, tempIdx) == '-'; - boolean twoDig = ms.get(ValueLayout.JAVA_BYTE, tempIdx + 1 + (neg ? 1 : 0)) == '.'; - int len = 3 + (neg ? 1 : 0) + (twoDig ? 0 : 1); - int d0 = ((char) ms.get(ValueLayout.JAVA_BYTE, tempIdx + len - 1)) - '0'; - int d1 = ((char) ms.get(ValueLayout.JAVA_BYTE, tempIdx + len - 3)) - '0'; - int base = d0 + digits1s[d1] + (twoDig ? 0 : digits10s[((char) ms.get(ValueLayout.JAVA_BYTE, tempIdx + len - 4)) - '0']); + int nl1 = buf[nl - 1]; + int nl3 = buf[nl - 3]; + int nl4 = buf[nl - 4]; + int nl5 = buf[nl - 5]; + int base = (nl1 - '0') + 10 * (nl3 - '0') + digits2s[nl4]; + boolean neg = nl4 == '-' || (nl4 != ';' && nl5 == '-'); short temp = (short) (neg ? -base : base); + int tempLen = 4 + (neg ? 1 : 0) + (base >= 100 ? 1 : 0); + int semi = nl - tempLen; - localAgg.add(buf, sLen, temp, hash); - curr = tempIdx + len + 1; + int hash = hash(buf, semi); + localAgg.add(buf, semi, temp, hash); + curr += (nl + 1); } // last batch is near end of file, process without SIMD to avoid out-of-bounds - if (!backPad) { + if (isLast) { processRangeScalar(ms, curr, end, localAgg); } } - /** - * For debugging issues with hash function - */ - static void checkHashDistributionQuality(ArrayList localAggs) { - HashSet uniquesHashValues = new HashSet(); - HashSet uniqueCities = new HashSet(); - HashMap> cityToHash = new HashMap<>(); - - for (var agg : localAggs) { - for (OpenHashTable.Entry entry : agg.entries) { - if (entry == null) { - continue; - } - uniquesHashValues.add(entry.hash); - String station = new String(entry.key, StandardCharsets.UTF_8); // for UTF-8 encoding - uniqueCities.add(station); - - if (!cityToHash.containsKey(station)) { - cityToHash.put(station, new HashSet<>()); - } - cityToHash.get(station).add(entry.hash); - } - } - - for (var pair : cityToHash.entrySet()) { - if (pair.getValue().size() > 1) { - System.err.println("multiple hashes: " + pair.getKey() + " " + pair.getValue()); - } - } - - System.err.println("Unique stations: " + uniqueCities.size() + ", unique hash values: " + uniquesHashValues.size()); - } - /** * Combine thread local values */ @@ -254,7 +236,7 @@ static HashMap mergeAggregations(ArrayList mergeAggregations(ArrayList localAggs = new ArrayList<>(numThreads); Thread[] threads = new Thread[numThreads]; @@ -299,11 +282,9 @@ public void run() { break; } final long endBatch = Math.min(startBatch + batchSize, fileSize); - final boolean first = startBatch == 0; - final boolean frontPad = !first; - final boolean last = endBatch == fileSize; - final boolean backPad = !last; - processRangeSIMD(ms, frontPad, backPad, startBatch, endBatch, localAgg); + final boolean isFirstBatch = startBatch == 0; + final boolean isLastBatch = endBatch == fileSize; + processRangeSIMD(ms, isFirstBatch, isLastBatch, startBatch, endBatch, localAgg); } } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_phd3.java b/src/main/java/dev/morling/onebrc/CalculateAverage_phd3.java index e3d1cdbef..97f832b30 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_phd3.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_phd3.java @@ -15,18 +15,24 @@ */ package dev.morling.onebrc; -import static java.nio.charset.StandardCharsets.*; import static java.util.stream.Collectors.*; import java.io.File; import java.io.RandomAccessFile; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.TreeMap; -import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; import java.util.stream.IntStream; public class CalculateAverage_phd3 { @@ -34,12 +40,16 @@ public class CalculateAverage_phd3 { private static final int NUM_THREADS = Runtime.getRuntime().availableProcessors() * 2; private static final String FILE = "./measurements.txt"; private static final long FILE_SIZE = new File(FILE).length(); + // A chunk is a unit for processing, the file will be divided in chunks of the following size private static final int CHUNK_SIZE = 65536 * 1024; + // Read a little more data into the buffer to finish processing current line private static final int PADDING = 512; + // Minor : Precompute powers to avoid recalculating while parsing doubles (temperatures) private static final double[] POWERS_OF_10 = IntStream.range(0, 6).mapToDouble(x -> Math.pow(10.0, x)).toArray(); - private static final Map globalMap = new ConcurrentHashMap<>(); - + /** + * A Utility to print aggregated information in the desired format + */ private record ResultRow(double min, double mean, double max) { public String toString() { @@ -52,7 +62,7 @@ private double round(double value) { }; public static ResultRow resultRow(AggregationInfo aggregationInfo) { - return new ResultRow(aggregationInfo.min, aggregationInfo.sum / aggregationInfo.count, aggregationInfo.max); + return new ResultRow(aggregationInfo.min, (Math.round(aggregationInfo.sum * 10.0) / 10.0) / (aggregationInfo.count), aggregationInfo.max); } public static void main(String[] args) throws Exception { @@ -60,19 +70,37 @@ public static void main(String[] args) throws Exception { int numChunks = (int) Math.ceil(fileLength * 1.0 / CHUNK_SIZE); ExecutorService executorService = Executors.newFixedThreadPool(NUM_THREADS); BufferDataProvider provider = new RandomAccessBasedProvider(FILE, FILE_SIZE); + List> futures = new ArrayList<>(); + // Process chunks in parallel for (int chunkIndex = 0; chunkIndex < numChunks; chunkIndex++) { - executorService.submit(new Aggregator(chunkIndex, provider)); + futures.add(executorService.submit(new Aggregator(chunkIndex, provider))); } executorService.shutdown(); executorService.awaitTermination(10, TimeUnit.MINUTES); - Map measurements = new TreeMap<>(globalMap.entrySet().stream() + Map info = futures.stream().map(f -> { + try { + return f.get(); + } + catch (ExecutionException | InterruptedException e) { + throw new RuntimeException(e); + } + }) + .map(LinearProbingHashMap::toMap) + .flatMap(map -> map.entrySet().stream()) + .sequential() + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, AggregationInfo::update)); + + Map measurements = new TreeMap<>(info.entrySet().stream() .collect(toMap(Map.Entry::getKey, e -> resultRow(e.getValue())))); System.out.println(measurements); } + /** + * Stores required running aggregation information to be able to compute min/max/average at the end + */ private static class AggregationInfo { double min = Double.POSITIVE_INFINITY; double max = Double.NEGATIVE_INFINITY; @@ -108,13 +136,14 @@ private interface BufferDataProvider { int read(byte[] buffer, long offset) throws Exception; } + /** + * uses RandomAccessFile seek and read APIs to load data into a buffer. + */ private static class RandomAccessBasedProvider implements BufferDataProvider { private final String filePath; - private final long fileSize; RandomAccessBasedProvider(String filePath, long fileSize) { this.filePath = filePath; - this.fileSize = fileSize; } @Override @@ -133,7 +162,10 @@ public int read(byte[] buffer, long offset) throws Exception { } } - private static class Aggregator implements Runnable { + /** + * Task to processes a chunk of file and return a custom linear probing hashmap for performance + */ + private static class Aggregator implements Callable { private final long startByte; private final BufferDataProvider dataProvider; @@ -143,7 +175,7 @@ public Aggregator(long chunkIndex, BufferDataProvider dataProvider) { } @Override - public void run() { + public LinearProbingHashMap call() { try { // offset for the last byte to be processed (excluded) long endByte = Math.min(startByte + CHUNK_SIZE, FILE_SIZE); @@ -151,25 +183,15 @@ public void run() { long bufferSize = endByte - startByte + ((endByte == FILE_SIZE) ? 0 : PADDING); byte[] buffer = new byte[(int) bufferSize]; int bytes = dataProvider.read(buffer, startByte); - // Partial aggregation to avoid accessing global concurrent map for every entry - Map updated = processBuffer( - buffer, startByte == 0, endByte - startByte); - // Full aggregation with global map - updated.entrySet().forEach(entry -> { - globalMap.compute(entry.getKey(), (k, v) -> { - if (v == null) { - return entry.getValue(); - } - return v.update(entry.getValue()); - }); - }); + // Partial aggregation in a hashmap + return processBuffer(buffer, startByte == 0, endByte - startByte); } catch (Throwable e) { throw new RuntimeException(e); } } - private static Map processBuffer(byte[] buffer, boolean isFileStart, long nextChunkStart) { + private static LinearProbingHashMap processBuffer(byte[] buffer, boolean isFileStart, long nextChunkStart) { int start = 0; // Move to the next entry after '\n'. Don't do this if we're at the start of // the file to avoid missing first entry. @@ -180,13 +202,15 @@ private static Map processBuffer(byte[] buffer, boolean start += 1; } - // local map for this thread, don't need thread safety - Map chunkMap = new HashMap<>(); + LinearProbingHashMap chunkLocalMap = new LinearProbingHashMap(); while (true) { LineInfo lineInfo = getNextLine(buffer, start); - String key = new String(buffer, start, lineInfo.semicolonIndex - start); + byte[] keyBytes = new byte[lineInfo.semicolonIndex - start]; + System.arraycopy(buffer, start, keyBytes, 0, keyBytes.length); double value = parseDouble(buffer, lineInfo.semicolonIndex + 1, lineInfo.nextStart - 1); - update(chunkMap, key, value); + // Update aggregated value for the given key with the new line + AggregationInfo info = chunkLocalMap.get(keyBytes, lineInfo.keyHash); + info.update(value); if ((lineInfo.nextStart > nextChunkStart) || (lineInfo.nextStart >= buffer.length)) { // we are already at a point where the next line will be processed in the next chunk, @@ -196,9 +220,12 @@ private static Map processBuffer(byte[] buffer, boolean start = lineInfo.nextStart(); } - return chunkMap; + return chunkLocalMap; } + /** + * Converts bytes to double value without intermediate string conversion, faster than Double.parseDouble. + */ private static double parseDouble(byte[] bytes, int offset, int end) { boolean negative = (bytes[offset] == '-'); int current = negative ? offset + 1 : offset; @@ -216,26 +243,97 @@ private static double parseDouble(byte[] bytes, int offset, int end) { return (preFloat + ((postFloat) / POWERS_OF_10[end - postFloatStart])) * (negative ? -1 : 1); } - private static void update(Map state, String key, double value) { - AggregationInfo info = state.computeIfAbsent(key, k -> new AggregationInfo()); - info.update(value); - } - - // identifies indexes of the next ';' and '\n', which will be used to get entry key and value from line + /** + * Identifies indexes of the next ';' and '\n', which will be used to get entry key and value from line. Also + * computes the hash value for the key while iterating. + */ private static LineInfo getNextLine(byte[] buffer, int start) { // caller guarantees that the access is in bounds, so no index check + int hash = 0; while (buffer[start] != ';') { start++; + hash = hash * 31 + buffer[start]; } + // The following is just to further reduce the probability of collisions + hash = hash ^ (hash << 16); int semicolonIndex = start; // caller guarantees that the access is in bounds, so no index check while (buffer[start] != '\n') { start++; } - return new LineInfo(semicolonIndex, start + 1); + return new LineInfo(semicolonIndex, start + 1, hash); + } + } + + private record LineInfo(int semicolonIndex, int nextStart, int keyHash) { + } + + /** + * A simple map with pre-configured fixed bucket count. With 2^13 buckets and current hash function, seeing 4 + * collisions which is not too bad. Every bucket is implemented with a linked list. The map is NOT thread safe. + */ + private static class LinearProbingHashMap { + private final static int BUCKET_COUNT = 8191; + private final Node[] buckets; + + LinearProbingHashMap() { + this.buckets = new Node[BUCKET_COUNT]; + } + + /** + * Given a key, returns the current value of AggregationInfo. If not present, creates a new empty node at the + * front of the bucket + */ + public AggregationInfo get(byte[] key, int keyHash) { + // find bucket index through bitwise AND, works for bucketCount = (2^p - 1) + int bucketIndex = BUCKET_COUNT & keyHash; + Node current = buckets[bucketIndex]; + while (current != null) { + if (Arrays.equals(current.entry.key(), key)) { + return current.entry.aggregationInfo(); + } + current = current.next; + } + + // Entry does not exist, so add a new node in the linked list + AggregationInfo newInfo = new AggregationInfo(); + KeyValuePair pair = new KeyValuePair(key, keyHash, newInfo); + Node newNode = new Node(pair, buckets[bucketIndex]); + buckets[bucketIndex] = newNode; + return newNode.entry.aggregationInfo(); + } + + /** + * A helper to convert to Java's hash map to build the final aggregation after partial aggregations + */ + private Map toMap() { + Map map = new HashMap<>(); + for (Node bucket : buckets) { + while (bucket != null) { + map.put(new String(bucket.entry.key, StandardCharsets.UTF_8), bucket.entry.aggregationInfo()); + bucket = bucket.next; + } + } + return map; + } + } + + /** + * Linked List node to implement a bucket of custom hash map + */ + private static class Node { + KeyValuePair entry; + Node next; + + public Node(KeyValuePair entry, Node next) { + this.entry = entry; + this.next = next; } } - private record LineInfo(int semicolonIndex, int nextStart) { + /** + * a wrapper class to store information needed for storing a measurement information in the hashmap + */ + private record KeyValuePair(byte[] key, int keyHash, AggregationInfo aggregationInfo) { } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_plbpietrz.java b/src/main/java/dev/morling/onebrc/CalculateAverage_plbpietrz.java new file mode 100644 index 000000000..9fb382582 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_plbpietrz.java @@ -0,0 +1,273 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.RandomAccessFile; +import java.io.UncheckedIOException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.Charset; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class CalculateAverage_plbpietrz { + + private static final String FILE = "./measurements.txt"; + private static final int READ_SIZE = 1024; + private static final int CPU_COUNT = Runtime.getRuntime().availableProcessors(); + + private static class TemperatureStats { + double min = 999, max = -999d; + double accumulated; + int count; + + public void update(double temp) { + this.min = Math.min(this.min, temp); + this.max = Math.max(this.max, temp); + this.accumulated += temp; + this.count++; + } + } + + private record FilePart(long pos, long size) { + } + + private static class WeatherStation { + private int length; + private int nameHash; + private byte[] nameBytes; + private String string; + + public WeatherStation() { + nameBytes = new byte[128]; + } + + public WeatherStation(WeatherStation station) { + this.nameBytes = Arrays.copyOf(station.nameBytes, station.length); + this.length = station.length; + this.nameHash = station.nameHash; + } + + @Override + public int hashCode() { + return nameHash; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o instanceof WeatherStation s) { + return this.nameHash == s.nameHash && Arrays.equals(this.nameBytes, 0, this.length, s.nameBytes, 0, s.length); + } + return false; + } + + @Override + public String toString() { + if (string == null) + string = new String(nameBytes, 0, length, Charset.defaultCharset()); + return string; + } + + public void appendByte(byte b) { + string = null; + nameBytes[length++] = b; + nameHash = nameHash * 31 + b; + } + + public void clear() { + this.length = 0; + this.nameHash = 0; + this.string = null; + } + + } + + public static void main(String[] args) throws IOException { + Path inputFilePath = Path.of(FILE); + Map results; + try (RandomAccessFile inputFile = new RandomAccessFile(inputFilePath.toFile(), "r")) { + var parsedBuffers = partitionInput(inputFile) + .stream() + .parallel() + .map(fp -> getMappedByteBuffer(fp, inputFile)) + .map(CalculateAverage_plbpietrz::parseBuffer); + results = parsedBuffers.flatMap(m -> m.entrySet().stream()) + .collect( + Collectors.groupingBy( + Map.Entry::getKey, + Collectors.reducing( + new TemperatureStats(), + Map.Entry::getValue, + CalculateAverage_plbpietrz::mergeTemperatureStats))); + try (PrintWriter pw = new PrintWriter(new BufferedOutputStream(System.out))) { + formatResults(pw, results); + } + } + } + + private static List partitionInput(RandomAccessFile inputFile) throws IOException { + List fileParts = new ArrayList<>(); + long fileLength = inputFile.length(); + + long blockSize = Math.min(fileLength, Math.max(READ_SIZE, fileLength / CPU_COUNT)); + + for (long start = 0, end; start < fileLength; start = end) { + end = findMinBlockOffset(inputFile, start, blockSize); + fileParts.add(new FilePart(start, end - start)); + } + return fileParts; + } + + private static long findMinBlockOffset(RandomAccessFile file, long startPosition, long minBlockSize) throws IOException { + long length = file.length(); + if (startPosition + minBlockSize < length) { + file.seek(startPosition + minBlockSize); + while (file.readByte() != '\n') { + } + return file.getFilePointer(); + } + else { + return length; + } + } + + private static MappedByteBuffer getMappedByteBuffer(FilePart fp, RandomAccessFile inputFile) { + try { + return inputFile.getChannel().map(FileChannel.MapMode.READ_ONLY, fp.pos, fp.size); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private static Map parseBuffer(MappedByteBuffer buffer) { + byte[] readLong = new byte[READ_SIZE]; + byte[] temperature = new byte[32]; + int temperatureLineLenght = 0; + + int limit = buffer.limit(); + boolean readingName = true; + Map temperatures = new HashMap<>(); + WeatherStation station = new WeatherStation(); + + int bytesToRead = Math.min(READ_SIZE, limit - buffer.position()); + while (bytesToRead > 0) { + if (bytesToRead == READ_SIZE) { + buffer.get(readLong); + } + else { + for (int j = 0; j < bytesToRead; ++j) + readLong[j] = buffer.get(); + } + + for (int i = 0; i < bytesToRead; ++i) { + byte aChar = readLong[i]; + if (readingName) { + if (aChar != ';') { + if (aChar != '\n') { + station.appendByte(aChar); + } + } + else { + readingName = false; + } + } + else { + if (aChar != '\n') { + temperature[temperatureLineLenght++] = aChar; + } + else { + double temp = parseTemperature(temperature, temperatureLineLenght); + + if (!temperatures.containsKey(station)) { + temperatures.put(new WeatherStation(station), new TemperatureStats()); + } + TemperatureStats weatherStats = temperatures.get(station); + weatherStats.update(temp); + + station.clear(); + temperatureLineLenght = 0; + readingName = true; + } + } + } + + bytesToRead = Math.min(READ_SIZE, limit - buffer.position()); + } + return temperatures; + } + + private static double parseTemperature(byte[] temperature, int temperatureSize) { + double sign = 1; + double manitssa = 0; + double exponent = 1; + for (int i = 0; i < temperatureSize; ++i) { + byte c = temperature[i]; + switch (c) { + case '-': + sign = -1; + break; + case '.': + for (int j = i; j < temperatureSize - 1; ++j) + exponent *= 0.1; + break; + default: + manitssa = manitssa * 10 + (c - 48); + } + } + return sign * manitssa * exponent; + } + + private static TemperatureStats mergeTemperatureStats(TemperatureStats v1, TemperatureStats v2) { + TemperatureStats acc = new TemperatureStats(); + acc.min = Math.min(v1.min, v2.min); + acc.max = Math.max(v1.max, v2.max); + acc.accumulated = v1.accumulated + v2.accumulated; + acc.count = v1.count + v2.count; + return acc; + } + + private static void formatResults(PrintWriter pw, Map resultsMap) { + pw.print('{'); + var results = new ArrayList<>(resultsMap.entrySet()); + results.sort(Comparator.comparing(e -> e.getKey().toString())); + var iterator = results.iterator(); + while (iterator.hasNext()) { + var entry = iterator.next(); + TemperatureStats stats = entry.getValue(); + pw.printf("%s=%.1f/%.1f/%.1f", + entry.getKey(), + stats.min, + stats.accumulated / stats.count, + stats.max); + if ((iterator.hasNext())) + pw.print(", "); + } + pw.println('}'); + } + +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_plevart.java b/src/main/java/dev/morling/onebrc/CalculateAverage_plevart.java new file mode 100644 index 000000000..80c9e892a --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_plevart.java @@ -0,0 +1,402 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorOperators; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.Comparator; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +public class CalculateAverage_plevart { + private static final Path FILE = Path.of("measurements.txt"); + + private static final int MAX_CITY_LEN = 100; + // 100 (city name) + 1 (;) + 5 (-99.9) + 1 (NL) + private static final int MAX_LINE_LEN = MAX_CITY_LEN + 7; + + private static final int INITIAL_TABLE_CAPACITY = 8192; + + public static void main(String[] args) throws IOException { + System.setProperty("jdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK", "0"); + try ( + var channel = (FileChannel) Files.newByteChannel(FILE, StandardOpenOption.READ); + var arena = Arena.ofShared()) { + var segment = channel.map(FileChannel.MapMode.READ_ONLY, 0, Files.size(FILE), arena); + int regions = Runtime.getRuntime().availableProcessors(); + IntStream + .range(0, regions) + .parallel() + .mapToObj(r -> calculateRegion(segment, regions, r)) + .reduce(StatsTable::reduce) + .ifPresent(System.out::println); + } + } + + private static StatsTable calculateRegion(MemorySegment segment, int regions, int r) { + long start = (segment.byteSize() * r) / regions; + long end = (segment.byteSize() * (r + 1)) / regions; + if (r > 0) { + start = skipPastNl(segment, start); + } + if (r + 1 < regions) { + end = skipPastNl(segment, end); + } + + return calculateAdjustedRegion(segment, start, end); + } + + private static long skipPastNl(MemorySegment segment, long i) { + int skipped = 0; + while (skipped++ < MAX_LINE_LEN && segment.get(ValueLayout.JAVA_BYTE, i++) != '\n') { + } + if (skipped > MAX_LINE_LEN) { + throw new IllegalArgumentException( + "Encountered line that exceeds " + MAX_LINE_LEN + " bytes at offset: " + i); + } + return i; + } + + private static StatsTable calculateAdjustedRegion(MemorySegment segment, long start, long end) { + var stats = new StatsTable(segment, INITIAL_TABLE_CAPACITY); + + var species = ByteVector.SPECIES_PREFERRED; + + long cityStart = start, numberStart = 0; + int cityLen = 0; + + for (long i = start, j = i; i < end; j = i) { + long semiNlSet; + if (end - i >= species.vectorByteSize()) { + var vec = ByteVector.fromMemorySegment(species, segment, i, ByteOrder.nativeOrder()); + semiNlSet = vec.compare(VectorOperators.EQ, (byte) ';') + .or(vec.compare(VectorOperators.EQ, (byte) '\n')) + .toLong(); + i += species.vectorByteSize(); + } + else { // tail, smaller than speciesByteSize + semiNlSet = 0; + long mask = 1; + while (i < end && mask != 0) { + int c = segment.get(ValueLayout.JAVA_BYTE, i++); + if (c == '\n' || c == ';') { + semiNlSet |= mask; + } + mask <<= 1; + } + } + + for (int step = Long.numberOfTrailingZeros(semiNlSet); step < 64; semiNlSet >>>= (step + 1), step = Long.numberOfTrailingZeros(semiNlSet)) { + j += step; + if (numberStart == 0) { // semi + cityLen = (int) (j - cityStart); + numberStart = ++j; + } + else { // nl + int numberLen = (int) (j - numberStart); + stats.calculateEntry(cityStart, cityLen, numberStart, numberLen); + cityStart = ++j; + numberStart = 0; + } + } + } + + return stats; + } + + final static class StatsTable { + private static final int LOAD_FACTOR = 16; + // offsets of fields + private static final int _lenHash = 0, + _off = 1, + _count = 2, + _sum = 3, + _min = 4, + _max = 5; + private final MemorySegment segment; + private int pow2cap, loadedSize; + private long[] table; + + StatsTable(MemorySegment segment, int capacity) { + this.segment = Objects.requireNonNull(segment); + int pow2cap = Integer.highestOneBit(capacity); + if (pow2cap < capacity) { + pow2cap <<= 1; + } + this.pow2cap = pow2cap; + this.table = new long[idx(pow2cap)]; + } + + private StatsTable(StatsTable st) { + this.segment = st.segment; + this.pow2cap = st.pow2cap; + this.loadedSize = st.loadedSize; + this.table = st.table; + } + + private static int idx(int i) { + return i << 3; + } + + private static long lenHash(int len, int hash) { + return ((long) len << 32) | ((long) hash & 0x00000000FFFFFFFFL); + } + + private static int len(long lenHash) { + return (int) (lenHash >>> 32); + } + + private static int hash(long lenHash) { + return (int) (lenHash & 0x00000000FFFFFFFFL); + } + + private static final long[] LEN_LONG_MASK; + private static final int[] LEN_INT_MASK; + + static { + LEN_LONG_MASK = new long[Long.BYTES + 1]; + for (int len = 0; len <= Long.BYTES; len++) { + LEN_LONG_MASK[len] = len == 0 + ? 0L + : ValueLayout.JAVA_LONG_UNALIGNED.order() == ByteOrder.LITTLE_ENDIAN + ? -1L >>> ((Long.BYTES - len) * Byte.SIZE) + : -1L << ((Long.BYTES - len) * Byte.SIZE); + } + LEN_INT_MASK = new int[Integer.BYTES + 1]; + for (int len = 0; len <= Integer.BYTES; len++) { + LEN_INT_MASK[len] = len == 0 + ? 0 + : ValueLayout.JAVA_LONG_UNALIGNED.order() == ByteOrder.LITTLE_ENDIAN + ? -1 >>> ((Integer.BYTES - len) * Byte.SIZE) + : -1 << ((Integer.BYTES - len) * Byte.SIZE); + } + } + + void calculateEntry(long cityStart, int cityLen, long numberStart, int numberLen) { + int hash = hash(cityStart, cityLen); + int number = parseNumber(numberStart, numberLen); + aggregate(cityStart, cityLen, hash, 1, number, number, number); + } + + int parseNumber(long off, int len) { + int c0 = segment.get(ValueLayout.JAVA_BYTE, off); + int d0; + int sign; + if (c0 == '-') { + off++; + len--; + d0 = segment.get(ValueLayout.JAVA_BYTE, off) - '0'; + sign = -1; + } else { + d0 = c0 - '0'; + sign = 1; + } + return sign * switch (len) { + case 1 -> d0 * 10; // 9 + case 2 -> { + int d1 = segment.get(ValueLayout.JAVA_BYTE, off + 1) - '0'; + yield d0 * 100 + d1 * 10; // 99 + } + case 3 -> { + int d2 = segment.get(ValueLayout.JAVA_BYTE, off + 2) - '0'; + yield d0 * 10 + d2; // 9.9 + } + case 4 -> { + int d1 = segment.get(ValueLayout.JAVA_BYTE, off + 1) - '0'; + int d3 = segment.get(ValueLayout.JAVA_BYTE, off + 3) - '0'; + yield d0 * 100 + d1 * 10 + d3; // 99.9 + } + default -> + throw new IllegalArgumentException( + "Invalid number: " + + new String(segment.asSlice(off, len).toArray(ValueLayout.JAVA_BYTE), StandardCharsets.UTF_8) + ); + }; + } + + int hash(long off, int len) { + if (len > Integer.BYTES) { + int head = segment.get(ValueLayout.JAVA_INT_UNALIGNED, off); + int tail = segment.get(ValueLayout.JAVA_INT_UNALIGNED, off + len - Integer.BYTES); + return (head * 31) ^ tail; + } + else { + // assert len >= 0 && len <= 4; + // each city name starts at least 4 bytes before segment end + // assert off + Integer.BYTES <= segment.byteSize(); + return segment.get(ValueLayout.JAVA_INT_UNALIGNED, off) & LEN_INT_MASK[len]; + } + } + + private static boolean bothLessThan(long a, long b, long threshold) { + return (a < threshold) && (b < threshold); + } + + boolean equals(long off1, long off2, int len) { + while (len >= Long.BYTES) { + if (segment.get(ValueLayout.JAVA_LONG_UNALIGNED, off1) != segment.get(ValueLayout.JAVA_LONG_UNALIGNED, off2)) { + return false; + } + off1 += Long.BYTES; + off2 += Long.BYTES; + len -= Long.BYTES; + } + // still enough memory to compare two longs, but masked? + if (bothLessThan(off1, off2, segment.byteSize() - Long.BYTES + 1)) { + long mask = LEN_LONG_MASK[len]; + return (segment.get(ValueLayout.JAVA_LONG_UNALIGNED, off1) & mask) == (segment.get(ValueLayout.JAVA_LONG_UNALIGNED, off2) & mask); + } + else { + return equalsAtBorder(off1, off2, len); + } + } + + private boolean equalsAtBorder(long off1, long off2, int len) { + if (len > Integer.BYTES) { + if (segment.get(ValueLayout.JAVA_INT_UNALIGNED, off1) != segment.get(ValueLayout.JAVA_INT_UNALIGNED, off2)) { + return false; + } + len -= Integer.BYTES; + off1 += Integer.BYTES; + off2 += Integer.BYTES; + } + // assert len >= 0 && len <= 4; + // each city name starts at least 4 bytes before segment end + // assert Math.max(off1, off2) + Integer.BYTES <= segment.byteSize(); + int mask = LEN_INT_MASK[len]; + return (segment.get(ValueLayout.JAVA_INT_UNALIGNED, off1) & mask) == (segment.get(ValueLayout.JAVA_INT_UNALIGNED, off2) & mask); + } + + void aggregate( + // key + long off, int len, int hash, + // value + long count, long sum, int min, int max) { + long lenHash = lenHash(len, hash); + int mask = pow2cap - 1; + for (int i = hash & mask, probe = 0; probe < pow2cap; i = (i + 1) & mask, probe++) { + int idx = idx(i); + long lenHash_i = table[idx + _lenHash]; + if (lenHash_i == 0) { + table[idx + _lenHash] = lenHash; + table[idx + _off] = off; + table[idx + _count] = count; + table[idx + _sum] = sum; + table[idx + _min] = min; + table[idx + _max] = max; + loadedSize += LOAD_FACTOR; + if (loadedSize >= pow2cap) { + grow(); + } + return; + } + if (lenHash_i == lenHash && equals(off, table[idx + _off], len)) { + table[idx + _count] += count; + table[idx + _sum] += sum; + table[idx + _min] = Math.min(min, (int) table[idx + _min]); + table[idx + _max] = Math.max(max, (int) table[idx + _max]); + return; + } + } + throw new OutOfMemoryError("StatsTable capacity exceeded due to poor hash"); + } + + private void grow() { + if (idx(pow2cap) >= 0x4000_0000) { + throw new OutOfMemoryError("StatsTable capacity exceeded"); + } + else { + var oldStats = new StatsTable(this); + pow2cap <<= 1; + table = new long[idx(pow2cap)]; + loadedSize = 0; + reduce(oldStats); + } + } + + StatsTable reduce(StatsTable other) { + other + .idxStream() + .forEach( + idx -> aggregate( + other.table[idx + _off], + len(other.table[idx + _lenHash]), + hash(other.table[idx + _lenHash]), + other.table[idx + _count], + other.table[idx + _sum], + (int) other.table[idx + _min], + (int) other.table[idx + _max])); + return this; + } + + IntStream idxStream() { + return IntStream + .range(0, pow2cap) + .map(StatsTable::idx) + .filter(idx -> table[idx + _lenHash] != 0); + } + + Stream stream() { + return idxStream() + .mapToObj( + idx -> new Entry( + new String( + segment + .asSlice(table[idx + _off], len(table[idx + _lenHash])) + .toArray(ValueLayout.JAVA_BYTE), + StandardCharsets.UTF_8), + table[idx + _count], + table[idx + _sum], + table[idx + _min], + table[idx + _max])); + } + + @Override + public String toString() { + return stream() + .sorted(Comparator.comparing(StatsTable.Entry::city)) + .map(Entry::toString) + .collect(Collectors.joining(", ", "{", "}")); + } + + record Entry(String city, long count, long sum, long min, long max) { + double average() { + return count > 0L ? (double) sum / (double) count : 0d; + } + + @Override + public String toString() { + return String.format( + "%s=%.1f/%.1f/%.1f", + city(), (double) min() / 10d, average() / 10d, (double) max() / 10d + ); + } + } + } +} \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_rcasteltrione.java b/src/main/java/dev/morling/onebrc/CalculateAverage_rcasteltrione.java new file mode 100644 index 000000000..d7d93e548 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_rcasteltrione.java @@ -0,0 +1,309 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.time.Duration; +import java.time.Instant; +import java.util.*; + +import static java.util.stream.Collectors.toMap; + +//baseline: 266s + +public class CalculateAverage_rcasteltrione { + private static final String FILE = "./measurements.txt"; + // private static final String FILE = "./backup/measurements.txt"; + + public static void main(String[] args) throws IOException, InterruptedException { + Path path = Paths.get(FILE); + Instant start = Instant.now(); + + var segList = FileSegment.forFile(path, Runtime.getRuntime().availableProcessors()); + var results = new ByteArrayToMeasurementMap[segList.size()]; + var threads = new Thread[segList.size()]; + try (var channel = FileChannel.open(path, StandardOpenOption.READ)) { + for (int i = 0; i < segList.size(); i++) { + int finalI = i; + FileSegment fileSegment = segList.get(finalI); + var t = Thread.ofPlatform().start(() -> results[finalI] = processSegment(channel, fileSegment)); + threads[i] = t; + } + for (Thread thread : threads) { + thread.join(); + } + } + + Map aggregatedMap = Arrays.stream(results) + .flatMap(m -> m.entries().stream()) + .collect(toMap( + ByteArrayToMeasurementMap.Entry::key, + ByteArrayToMeasurementMap.Entry::value, + Measurement::merge, + TreeMap::new)); + + System.out.println(aggregatedMap); + // System.out.println(Duration.between(start, Instant.now()).toMillis()); + } + + private static ByteArrayToMeasurementMap processSegment(FileChannel channel, FileSegment seg) { + try { + MappedByteBuffer mbb = channel.map(FileChannel.MapMode.READ_ONLY, seg.start(), seg.size()); + byte b; + var result = new ByteArrayToMeasurementMap(); + var lineBuffer = new byte[1 << 13]; + var segmentPosition = mbb.position(); + var limit = mbb.limit(); + var lastLineOffset = 0; + + while (segmentPosition < mbb.limit()) { + + int remaining = limit - segmentPosition; + int chunk = Math.min(remaining, lineBuffer.length); + mbb.get(segmentPosition, lineBuffer, 0, chunk); + for (int i = chunk - 1; i >= 0; i--) { + if (lineBuffer[i] == '\n') { + lastLineOffset = i; + break; + } + } + for (int lineBufferOffset = 0; lineBufferOffset < lastLineOffset;) { + int nameHash = 0; + int nameLength = 0; + int nameStart = lineBufferOffset; + while ((b = lineBuffer[lineBufferOffset++]) != ';') { + nameHash = 31 * nameHash + b; + nameLength++; + } + + int temp; + int negative = 1; + // var s = new String(Arrays.copyOfRange(lineBuffer, nameStart, lineOffset - 1), StandardCharsets.UTF_8); + if (lineBuffer[lineBufferOffset] == '-') { + lineBufferOffset++; + negative = -1; + } + + // Temperature value: non-null double between -99.9 (inclusive) and 99.9 (inclusive), always with one fractional digit + if (lineBuffer[lineBufferOffset + 1] == '.') { + temp = (lineBuffer[lineBufferOffset] - '0') * 10 + (lineBuffer[lineBufferOffset + 2] - '0'); + lineBufferOffset += 3; + } + else { + temp = (lineBuffer[lineBufferOffset] - '0') * 100 + + (lineBuffer[lineBufferOffset + 1] - '0') * 10 + + (lineBuffer[lineBufferOffset + 3] - '0'); + lineBufferOffset += 4; + } + if (lineBuffer[lineBufferOffset] == '\r') { + lineBufferOffset++; + } + lineBufferOffset++; + + temp *= negative; + result.mergeOrCreate(lineBuffer, nameStart, nameLength, nameHash, temp); + // segmentPosition += lineOffset; + // i += lineoffset; + } + + segmentPosition += lastLineOffset + 1; + + } + + return result; + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + record FileSegment(long start, long size) { + public static List forFile(Path file, int desiredSegmentsCount) throws IOException { + try (var raf = new RandomAccessFile(file.toFile(), "r")) { + var segments = new ArrayList(); + var fileSize = raf.length(); + if (fileSize < 1000000) { + return Collections.singletonList(new FileSegment(0, fileSize)); + } + var segmentSize = fileSize / desiredSegmentsCount; + for (int segmentIdx = 0; segmentIdx < desiredSegmentsCount; segmentIdx++) { + var segStart = segmentIdx * segmentSize; + var segEnd = (segmentIdx == desiredSegmentsCount - 1) ? fileSize : segStart + segmentSize; + segStart = findSegmentBoundary(raf, segmentIdx, 0, segStart, segEnd); + segEnd = findSegmentBoundary(raf, segmentIdx, desiredSegmentsCount - 1, segEnd, fileSize); + + var segSize = segEnd - segStart; + + segments.add(new FileSegment(segStart, segSize)); + } + return segments; + } + } + + private static long findSegmentBoundary(RandomAccessFile raf, int i, int skipForSegment, long location, long fileSize) throws IOException { + if (i == skipForSegment) return location; + + raf.seek(location); + while (location < fileSize) { + location++; + if (raf.read() == '\n') break; + } + return location; + } + } + + static class Measurement { + int min, max, n; + long sum; + + private Measurement(int min, int max, long sum, int n) { + this.min = min; + this.max = max; + this.sum = sum; + this.n = n; + } + + public Measurement(int temp) { + this(temp, temp, temp, 1); + } + + final Measurement merge(Measurement other) { + this.min = Math.min(other.min, this.min); + this.max = Math.max(other.max, this.max); + this.sum += other.sum; + this.n += other.n; + return this; + } + + @Override + public String toString() { + return STR."\{round(min)}/\{round(((double) sum / n))}/\{round(max)}"; + } + + double round(double v) { + return Math.round(v) / 10.0; + } + } + + static class ByteArrayToMeasurementMap { + + public static final int DEFAULT_CAPACITY = 1024; + public static final float LOAD_FACTOR = 0.75f; + MeasurementSlot[] slots = new MeasurementSlot[DEFAULT_CAPACITY]; + int threshold = (int) (DEFAULT_CAPACITY * LOAD_FACTOR); + int size = 0; + + private record MeasurementSlot(int hash, byte[] key, String city, Measurement measurement) { + } + + public final void mergeOrCreate(byte[] line, int nameStart, int nameLength, int hash, int temperature) { + int hashMask = slots.length - 1; + + for (int idx = hash & hashMask;; idx = (idx + 1) & hashMask) { + MeasurementSlot slot = slots[idx]; + if (slot == null) { + size++; + if (size > threshold) { + idx = resize(hash); + } + byte[] nameBuffer = new byte[nameLength]; + System.arraycopy(line, nameStart, nameBuffer, 0, nameLength); + slots[idx] = new MeasurementSlot( + hash, + nameBuffer, + new String(nameBuffer, StandardCharsets.UTF_8), + new Measurement(temperature)); + return; + } + + if (slot.hash == hash && arrayEquals(slot.key, line, nameStart, nameLength)) { + Measurement value = slots[idx].measurement; + value.min = Math.min(value.min, temperature); + value.max = Math.max(value.max, temperature); + value.sum += temperature; + value.n++; + return; + } + } + } + + private int resize(int hash) { + var oldSlots = slots; + var newSlots = new MeasurementSlot[oldSlots.length << 1]; + var mask = newSlots.length - 1; + for (MeasurementSlot oldSlot : oldSlots) { + if (oldSlot == null) { + continue; + } + int idx = oldSlot.hash & mask; + while (newSlots[idx] != null) { + idx = (idx + 1) & mask; + } + newSlots[idx] = oldSlot; + } + + slots = newSlots; + threshold = (int) (newSlots.length * LOAD_FACTOR); + int hashMask = slots.length - 1; + int idx; + for (idx = hash & hashMask; slots[idx] != null; idx = (idx + 1) & hashMask) { + } + return idx; + } + + private boolean arrayEquals(byte[] storedKey, byte[] line, int nameStart, int nameLength) { + if (storedKey.length != nameLength) { + return false; + } + + for (int i = 0; i < storedKey.length; i++) { + if (storedKey[i] != line[nameStart + i]) { + return false; + } + } + return true; + } + + private static int hashCode(int h) { + h ^= (h >>> 20) ^ (h >>> 12); + h ^= (h >>> 7) ^ (h >>> 4); + h += h << 7; + return h; + } + + public final List entries() { + var result = new ArrayList(slots.length); + for (MeasurementSlot slot : slots) { + if (slot != null) { + result.add(new Entry(slot.city, slot.measurement)); + } + } + return result; + } + + public record Entry(String key, Measurement value) { + } + + } + +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java b/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java index c869b7d9c..7529e8ad8 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java @@ -24,51 +24,54 @@ import java.lang.reflect.Field; import java.nio.channels.FileChannel; import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; import java.util.TreeMap; import java.util.stream.IntStream; public class CalculateAverage_roman_r_m { - public static final int DOT_3_RD_BYTE_MASK = (byte) '.' << 16; private static final String FILE = "./measurements.txt"; - private static MemorySegment ms; private static Unsafe UNSAFE; - // based on http://0x80.pl/notesen/2023-03-06-swar-find-any.html - static long hasZeroByte(long l) { - return ((l - 0x0101010101010101L) & ~(l) & 0x8080808080808080L); - } - - static long firstSetByteIndex(long l) { - return ((((l - 1) & 0x101010101010101L) * 0x101010101010101L) >> 56) - 1; - } - - static long broadcast(byte b) { + private static long broadcast(byte b) { return 0x101010101010101L * b; } - static long SEMICOLON_MASK = broadcast((byte) ';'); - static long LINE_END_MASK = broadcast((byte) '\n'); - - static long find(long l, long mask) { - long xor = l ^ mask; - long match = hasZeroByte(xor); - return match != 0 ? firstSetByteIndex(match) : -1; + private static final long SEMICOLON_MASK = broadcast((byte) ';'); + private static final long LINE_END_MASK = broadcast((byte) '\n'); + private static final long DOT_MASK = broadcast((byte) '.'); + private static final long ZEROES_MASK = broadcast((byte) '0'); + + // from netty + + /** + * Applies a compiled pattern to given word. + * Returns a word where each byte that matches the pattern has the highest bit set. + */ + private static long applyPattern(final long word, final long pattern) { + long input = word ^ pattern; + long tmp = (input & 0x7F7F7F7F7F7F7F7FL) + 0x7F7F7F7F7F7F7F7FL; + return ~(tmp | input | 0x7F7F7F7F7F7F7F7FL); } - static long nextNewline(long from) { + static long nextNewline(long from, MemorySegment ms) { long start = from; long i; long next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, start); - while ((i = find(next, LINE_END_MASK)) < 0) { + while ((i = applyPattern(next, LINE_END_MASK)) == 0) { start += 8; next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, start); } - return start + i; + return start + Long.numberOfTrailingZeros(i) / 8; + } + + static int hashFull(long word) { + return (int) (word ^ (word >>> 32)); + } + + static int hashPartial(long word, int bytes) { + long h = Long.reverseBytes(word) >>> (8 * (8 - bytes)); + return (int) (h ^ (h >>> 32)); } public static void main(String[] args) throws Exception { @@ -79,98 +82,115 @@ public static void main(String[] args) throws Exception { long fileSize = new File(FILE).length(); var channel = FileChannel.open(Paths.get(FILE)); - ms = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.ofAuto()); + MemorySegment ms = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.ofConfined()); int numThreads = fileSize > Integer.MAX_VALUE ? Runtime.getRuntime().availableProcessors() : 1; long chunk = fileSize / numThreads; + + var bounds = IntStream.range(0, numThreads).mapToLong(i -> { + boolean lastChunk = i == numThreads - 1; + return lastChunk ? fileSize : nextNewline((i + 1) * chunk, ms); + }).toArray(); + + ms.unload(); + var result = IntStream.range(0, numThreads) .parallel() .mapToObj(i -> { - boolean lastChunk = i == numThreads - 1; - long chunkStart = i == 0 ? 0 : nextNewline(i * chunk) + 1; - long chunkEnd = lastChunk ? fileSize : nextNewline((i + 1) * chunk); - - var resultStore = new ResultStore(); - var station = new ByteString(); - - long offset = chunkStart; - while (offset < chunkEnd) { - long start = offset; - long pos = -1; - - while (chunkEnd - offset >= 8) { - long next = UNSAFE.getLong(ms.address() + offset); - pos = find(next, SEMICOLON_MASK); - if (pos >= 0) { - offset += pos; - break; + try { + long segmentStart = i == 0 ? 0 : bounds[i - 1] + 1; + long segmentEnd = bounds[i]; + var segment = channel.map(FileChannel.MapMode.READ_ONLY, segmentStart, segmentEnd - segmentStart, Arena.ofConfined()); + + var resultStore = new ResultStore(); + var station = new ByteString(segment); + long offset = segment.address(); + long end = offset + segment.byteSize(); + long tailMask; + while (offset < end) { + // parsing station name + long start = offset; + long next = UNSAFE.getLong(offset); + long pattern = applyPattern(next, SEMICOLON_MASK); + int bytes; + if (pattern == 0) { + station.hash = hashFull(next); + do { + offset += 8; + next = UNSAFE.getLong(offset); + pattern = applyPattern(next, SEMICOLON_MASK); + } while (pattern == 0); + + bytes = Long.numberOfTrailingZeros(pattern) / 8; + offset += bytes; + tailMask = ((1L << (8 * bytes)) - 1); } else { - offset += 8; - } - } - if (pos < 0) { - while (UNSAFE.getByte(ms.address() + offset++) != ';') { + bytes = Long.numberOfTrailingZeros(pattern) / 8; + offset += bytes; + tailMask = ((1L << (8 * bytes)) - 1); + + station.hash = hashPartial(next, bytes); } - offset--; - } - int len = (int) (offset - start); - // TODO can we not copy and use a reference into the memory segment to perform table lookup? + int len = (int) (offset - start); + station.offset = start; + station.len = len; + station.tail = next & tailMask; - station.offset = start; - station.len = len; - station.hash = 0; + offset++; - offset++; + // parsing temperature + // TODO next may contain temperature as well, maybe try using it if we know the full number is there + // 8 - bytes >= 5 -> bytes <= 3 + long val; + if (end - offset >= 8) { + long encodedVal = UNSAFE.getLong(offset); - long val; - boolean neg; - if (!lastChunk || fileSize - offset >= 8) { - long encodedVal = UNSAFE.getLong(ms.address() + offset); - neg = (encodedVal & (byte) '-') == (byte) '-'; - if (neg) { - encodedVal >>= 8; - offset++; - } + int neg = 1 - Integer.bitCount((int) (encodedVal & 0x10)); + encodedVal >>>= 8 * neg; + + long numLen = applyPattern(encodedVal, DOT_MASK); + numLen = Long.numberOfTrailingZeros(numLen) / 8; + + encodedVal ^= ZEROES_MASK; + + int intPart = (int) (encodedVal & ((1 << (8 * numLen)) - 1)); + intPart <<= 8 * (2 - numLen); + intPart *= (100 * 256 + 10); + intPart = (intPart & 0x3FF80) >>> 8; - if ((encodedVal & DOT_3_RD_BYTE_MASK) == DOT_3_RD_BYTE_MASK) { - val = (encodedVal & 0xFF - 0x30) * 100 + (encodedVal >> 8 & 0xFF - 0x30) * 10 + (encodedVal >> 24 & 0xFF - 0x30); - offset += 5; + int frac = (int) ((encodedVal >>> (8 * (numLen + 1))) & 0xFF); + + offset += neg + numLen + 3; // 1 for . + 1 for fractional part + 1 for new line char + int sign = 1 - 2 * neg; + val = sign * (intPart + frac); } else { - // based on http://0x80.pl/articles/simd-parsing-int-sequences.html#parsing-and-conversion-of-signed-numbers - val = Long.compress(encodedVal, 0xFF00FFL) - 0x303030; - val = ((val * 2561) >> 8) & 0xff; - offset += 4; - } - } - else { - neg = UNSAFE.getByte(ms.address() + offset) == '-'; - if (neg) { - offset++; - } - val = UNSAFE.getByte(ms.address() + offset++) - '0'; - byte b; - while ((b = UNSAFE.getByte(ms.address() + offset++)) != '.') { + int neg = 1 - Integer.bitCount(UNSAFE.getByte(offset) & 0x10); + offset += neg; + + val = UNSAFE.getByte(offset++) - '0'; + byte b; + while ((b = UNSAFE.getByte(offset++)) != '.') { + val = val * 10 + (b - '0'); + } + b = UNSAFE.getByte(offset); val = val * 10 + (b - '0'); + offset += 2; + val *= 1 - (2L * neg); } - b = UNSAFE.getByte(ms.address() + offset); - val = val * 10 + (b - '0'); - offset += 2; - } - if (neg) { - val = -val; + resultStore.update(station, (int) val); } - var a = resultStore.get(station); - a.min = Math.min(a.min, val); - a.max = Math.max(a.max, val); - a.sum += val; - a.count++; + segment.unload(); + + return resultStore.toMap(); + } + catch (Exception e) { + throw new RuntimeException(e); } - return resultStore.toMap(); }).reduce((m1, m2) -> { m2.forEach((k, v) -> m1.merge(k, v, ResultRow::merge)); return m1; @@ -181,22 +201,27 @@ public static void main(String[] args) throws Exception { static final class ByteString { + private final MemorySegment ms; private long offset; private int len = 0; private int hash = 0; + private long tail = 0L; - @Override - public String toString() { - var bytes = new byte[len]; - MemorySegment.copy(ms, ValueLayout.JAVA_BYTE, offset, bytes, 0, len); - return new String(bytes, 0, len); + ByteString(MemorySegment ms) { + this.ms = ms; + } + + public String asString(byte[] reusable) { + UNSAFE.copyMemory(null, offset, reusable, Unsafe.ARRAY_BYTE_BASE_OFFSET, len); + return new String(reusable, 0, len); } public ByteString copy() { - var copy = new ByteString(); + var copy = new ByteString(ms); copy.offset = this.offset; copy.len = this.len; copy.hash = this.hash; + copy.tail = this.tail; return copy; } @@ -212,44 +237,40 @@ public boolean equals(Object o) { if (len != that.len) return false; - int i = 0; - - long base1 = ms.address() + offset; - long base2 = ms.address() + that.offset; - for (; i + 3 < len; i += 4) { - int i1 = UNSAFE.getInt(base1 + i); - int i2 = UNSAFE.getInt(base2 + i); - if (i1 != i2) { + for (int i = 0; i + 7 < len; i += 8) { + long l1 = UNSAFE.getLong(offset + i); + long l2 = UNSAFE.getLong(that.offset + i); + if (l1 != l2) { return false; } } - for (; i < len; i++) { - byte i1 = UNSAFE.getByte(base1 + i); - byte i2 = UNSAFE.getByte(base2 + i); - if (i1 != i2) { - return false; - } - } - return true; + return this.tail == that.tail; } @Override public int hashCode() { - if (hash == 0) { - // not sure why but it seems to be working a bit better - hash = UNSAFE.getInt(ms.address() + offset); - hash = hash >>> (8 * Math.max(0, 4 - len)); - hash |= len; - } return hash; } + + @Override + public String toString() { + byte[] buf = new byte[100]; + return asString(buf); + } } private static final class ResultRow { - long min = 1000; - long sum = 0; - long max = -1000; - int count = 0; + long min; + long sum; + long max; + int count; + + public ResultRow(int[] values) { + min = values[0]; + max = values[1]; + sum = values[2]; + count = values[3]; + } public String toString() { return round(min / 10.0) + "/" + round(sum / 10.0 / count) + "/" + round(max / 10.0); @@ -269,25 +290,50 @@ public ResultRow merge(ResultRow other) { } static class ResultStore { - private final ArrayList results = new ArrayList<>(10000); - private final Map indices = new HashMap<>(10000); + private static final int SIZE = 16384; + private final ByteString[] keys = new ByteString[SIZE]; + private final int[][] values = new int[SIZE][]; - ResultRow get(ByteString s) { - var idx = indices.get(s); - if (idx != null) { - return results.get(idx); - } - else { - ResultRow next = new ResultRow(); - results.add(next); - indices.put(s.copy(), results.size() - 1); - return next; + void update(ByteString s, int value) { + int h = s.hashCode(); + int idx = (SIZE - 1) & h; + + var keys = this.keys; + + int idx0 = idx; + int i = 0; + while (true) { + if (keys[idx] != null && keys[idx].equals(s)) { + values[idx][0] = Math.min(values[idx][0], value); + values[idx][1] = Math.max(values[idx][1], value); + values[idx][2] += value; + values[idx][3] += 1; + return; + } + else if (keys[idx] == null) { + keys[idx] = s.copy(); + values[idx] = new int[4]; + values[idx][0] = value; + values[idx][1] = value; + values[idx][2] = value; + values[idx][3] = 1; + return; + } + else { + i++; + idx = (idx0 + i * i) % SIZE; + } } } TreeMap toMap() { + byte[] buf = new byte[100]; var result = new TreeMap(); - indices.forEach((name, idx) -> result.put(name.toString(), results.get(idx))); + for (int i = 0; i < SIZE; i++) { + if (keys[i] != null) { + result.put(keys[i].asString(buf), new ResultRow(values[i])); + } + } return result; } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java b/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java index 307833f70..14c40e2e2 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java @@ -22,10 +22,11 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.nio.file.StandardOpenOption; -import java.util.HashMap; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; -import java.util.stream.IntStream; import sun.misc.Unsafe; @@ -55,220 +56,480 @@ * Remove writing to buffer: 1335 ms * Optimized collecting at the end: 1310 ms * Adding a lot of comments: priceless + * Changed to flyweight byte[]: 1290 ms (adds even more Unsafe, was initially slower, now faster) + * More LOC now parallel: 1260 ms (moved more to processMemoryArea, recombining in ConcurrentHashMap) + * Storing only the address: 1240 ms (this is now faster, tried before, was slower) + * Unrolling scan-loop: 1200 ms (seems to help, perhaps even more on target machine) + * Adding more readable reader: 1300 ms (scores got worse on target machine anyway) * - * Big thanks to Francesco Nigro, Thomas Wuerthinger, Quan Anh Mai for ideas. + * Using old x86 MacBook and perf: 3500 ms (different machine for testing) + * Decided to rewrite loop for 16 b: 3050 ms + * Small changes, limited heap: 2950 ms + * + * I have some instructions that could be removed, but faster with... + * + * Big thanks to Francesco Nigro, Thomas Wuerthinger, Quan Anh Mai and many others for ideas. * * Follow me at: @royvanrijn */ public class CalculateAverage_royvanrijn { private static final String FILE = "./measurements.txt"; + // private static final String FILE = "src/test/resources/samples/measurements-1.txt"; private static final Unsafe UNSAFE = initUnsafe(); - private static Unsafe initUnsafe() { - try { - final Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); - theUnsafe.setAccessible(true); - return (Unsafe) theUnsafe.get(Unsafe.class); - } - catch (NoSuchFieldException | IllegalAccessException e) { - throw new RuntimeException(e); - } + // Twice the processors, smoothens things out. + private static final int PROCESSORS = Runtime.getRuntime().availableProcessors(); + + /** + * Flyweight entry in a byte[], max 128 bytes. + *

+ * long: sum + * int: min + * int: max + * int: count + * byte: length + * byte[]: cityname + */ + // ------------------------------------------------------------------------ + private static final int ENTRY_LENGTH = (Unsafe.ARRAY_BYTE_BASE_OFFSET); + private static final int ENTRY_SUM = (ENTRY_LENGTH + Byte.BYTES); + private static final int ENTRY_MIN = (ENTRY_SUM + Long.BYTES); + private static final int ENTRY_MAX = (ENTRY_MIN + Integer.BYTES); + private static final int ENTRY_COUNT = (ENTRY_MAX + Integer.BYTES); + private static final int ENTRY_NAME = (ENTRY_COUNT + Integer.BYTES); + private static final int ENTRY_NAME_8 = ENTRY_NAME + 8; + private static final int ENTRY_NAME_16 = ENTRY_NAME + 16; + + private static final int ENTRY_BASESIZE_WHITESPACE = ENTRY_NAME + 7; // with enough empty bytes to fill a long + // ------------------------------------------------------------------------ + private static final int PREMADE_MAX_SIZE = 1 << 5; // pre-initialize some entries in memory, keep them close + private static final int PREMADE_ENTRIES = 512; // amount of pre-created entries we should use + private static final int TABLE_SIZE = 1 << 19; // large enough for the contest. + private static final int TABLE_MASK = (TABLE_SIZE - 1); + + // Idea of thomaswue, don't wait for slow unmap: + private static void spawnWorker() throws IOException { + ProcessHandle.Info info = ProcessHandle.current().info(); + ArrayList workerCommand = new ArrayList<>(); + info.command().ifPresent(workerCommand::add); + info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args))); + workerCommand.add("--worker"); + new ProcessBuilder() + .command(workerCommand) + .inheritIO() + .redirectOutput(ProcessBuilder.Redirect.PIPE) + .start() + .getInputStream() + .transferTo(System.out); } public static void main(String[] args) throws Exception { + + if (args.length == 0 || !("--worker".equals(args[0]))) { + spawnWorker(); + return; + } + // Calculate input segments. - final int numberOfChunks = Runtime.getRuntime().availableProcessors(); - final long[] chunks = getSegments(numberOfChunks); - - final Map measurements = HashMap.newHashMap(1 << 10); - IntStream.range(0, chunks.length - 1) - .mapToObj(chunkIndex -> processMemoryArea(chunks[chunkIndex], chunks[chunkIndex + 1])) - .parallel() - .forEachOrdered(repo -> { // make sure it's ordered, no concurrent map - for (Entry entry : repo) { - if (entry != null) - measurements.merge(turnLongArrayIntoString(entry.data, entry.length), entry, Entry::mergeWith); + final FileChannel fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ); + final long fileSize = fileChannel.size(); + final long segmentSize = (fileSize + PROCESSORS - 1) / PROCESSORS; + final long mapAddress = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()).address(); + + final Thread[] parallelThreads = new Thread[PROCESSORS - 1]; + + // This is where the entries will land: + final ConcurrentHashMap measurements = new ConcurrentHashMap(1 << 10); + + // We create separate threads for twice the amount of processors. + long lastAddress = mapAddress; + final long endOfFile = mapAddress + fileSize; + for (int i = 0; i < PROCESSORS - 1; ++i) { + + final long fromAddress = lastAddress; + final long toAddress = Math.min(endOfFile, fromAddress + segmentSize); + + final Thread thread = new Thread(() -> { + // The actual work is done here: + final byte[][] table = processMemoryArea(fromAddress, toAddress, fromAddress == mapAddress); + + for (byte[] entry : table) { + if (entry != null) { + measurements.merge(entryToName(entry), entry, CalculateAverage_royvanrijn::mergeEntry); } - }); + } + }); + thread.start(); // start a.s.a.p. + parallelThreads[i] = thread; + lastAddress = toAddress; + } + + // Use the current thread for the part of memory: + final byte[][] table = processMemoryArea(lastAddress, mapAddress + fileSize, false); + for (byte[] entry : table) { + if (entry != null) { + measurements.merge(entryToName(entry), entry, CalculateAverage_royvanrijn::mergeEntry); + } + } + // Wait for all threads to finish: + for (Thread thread : parallelThreads) { + // Can we implement work-stealing? Not sure how... + thread.join(); + } + + // If we don't reach start of file, System.out.print("{" + - measurements.entrySet().stream().sorted(Map.Entry.comparingByKey()).map(Object::toString).collect(Collectors.joining(", "))); + measurements.entrySet().stream().sorted(Map.Entry.comparingByKey()) + .map(entry -> entry.getKey() + '=' + entryValuesToString(entry.getValue())) + .collect(Collectors.joining(", "))); System.out.println("}"); + + System.out.close(); // close the stream to stop } - /** - * Simpler way to get the segments and launch parallel processing by thomaswue - */ - private static long[] getSegments(final int numberOfChunks) throws IOException { - try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { - final long fileSize = fileChannel.size(); - final long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks; - final long[] chunks = new long[numberOfChunks + 1]; - final long mappedAddress = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()).address(); - chunks[0] = mappedAddress; - final long endAddress = mappedAddress + fileSize; - for (int i = 1; i < numberOfChunks; ++i) { - long chunkAddress = mappedAddress + i * segmentSize; - // Align to first row start. - while (chunkAddress < endAddress && UNSAFE.getByte(chunkAddress++) != '\n') { - // nop - } - chunks[i] = Math.min(chunkAddress, endAddress); - } - chunks[numberOfChunks] = endAddress; - return chunks; - } + private static byte[] fillEntry(final byte[] entry, final long fromAddress, final int entryLength, final int temp, final long readBuffer1, final long readBuffer2) { + UNSAFE.putLong(entry, ENTRY_SUM, temp); + UNSAFE.putInt(entry, ENTRY_MIN, temp); + UNSAFE.putInt(entry, ENTRY_MAX, temp); + UNSAFE.putInt(entry, ENTRY_COUNT, 1); + UNSAFE.putByte(entry, ENTRY_LENGTH, (byte) entryLength); + UNSAFE.copyMemory(null, fromAddress, entry, ENTRY_NAME, entryLength - 16); + UNSAFE.putLong(entry, ENTRY_NAME + entryLength - 16, readBuffer1); + UNSAFE.putLong(entry, ENTRY_NAME + entryLength - 8, readBuffer2); + return entry; } - // This is where I store the hashtable entry data in the "hot loop" - // The long[] contains the name in bytes (yeah, confusing) - // I've tried flyweight-ing, carrying all the data in a single byte[], - // where you offset type-indices: min:int,max:int,count:int,etc. - // - // The performance was just a little worse than this simple class. - static final class Entry { - - private int min, max, count; - private byte length; - private long sum; - private final long[] data; - - Entry(final long[] data, byte length, int temp) { - this.data = data; - this.length = length; - this.min = temp; - this.max = temp; - this.sum = temp; - this.count = 1; - } + private static byte[] fillEntry16(final byte[] entry, final int entryLength, final int temp, final long readBuffer1, final long readBuffer2) { + UNSAFE.putLong(entry, ENTRY_SUM, temp); + UNSAFE.putInt(entry, ENTRY_MIN, temp); + UNSAFE.putInt(entry, ENTRY_MAX, temp); + UNSAFE.putInt(entry, ENTRY_COUNT, 1); + UNSAFE.putByte(entry, ENTRY_LENGTH, (byte) entryLength); + UNSAFE.putLong(entry, ENTRY_NAME + entryLength - 16, readBuffer1); + UNSAFE.putLong(entry, ENTRY_NAME + entryLength - 8, readBuffer2); + return entry; + } - public void updateWith(int measurement) { - min = Math.min(min, measurement); - max = Math.max(max, measurement); - sum += measurement; - count++; - } + public static void updateEntry(final byte[] entry, final int temp) { - public Entry mergeWith(Entry entry) { - min = Math.min(min, entry.min); - max = Math.max(max, entry.max); - sum += entry.sum; - count += entry.count; - return this; - } + int entryMin = UNSAFE.getInt(entry, ENTRY_MIN); + int entryMax = UNSAFE.getInt(entry, ENTRY_MAX); + long entrySum = UNSAFE.getLong(entry, ENTRY_SUM) + temp; + int entryCount = UNSAFE.getInt(entry, ENTRY_COUNT) + 1; - public String toString() { - return round(min) + "/" + round((1.0 * sum) / count) + "/" + round(max); + if (temp < entryMin) { + UNSAFE.putInt(entry, ENTRY_MIN, temp); } + else if (temp > entryMax) { + UNSAFE.putInt(entry, ENTRY_MAX, temp); + } + UNSAFE.putInt(entry, ENTRY_COUNT, entryCount); + UNSAFE.putLong(entry, ENTRY_SUM, entrySum); + } + + public static byte[] mergeEntry(final byte[] entry, final byte[] merge) { + + long sum = UNSAFE.getLong(merge, ENTRY_SUM); + final int mergeMin = UNSAFE.getInt(merge, ENTRY_MIN); + final int mergeMax = UNSAFE.getInt(merge, ENTRY_MAX); + int count = UNSAFE.getInt(merge, ENTRY_COUNT); + + sum += UNSAFE.getLong(entry, ENTRY_SUM); + count += UNSAFE.getInt(entry, ENTRY_COUNT); + + int entryMin = UNSAFE.getInt(entry, ENTRY_MIN); + int entryMax = UNSAFE.getInt(entry, ENTRY_MAX); + entryMin = Math.min(entryMin, mergeMin); + entryMax = Math.max(entryMax, mergeMax); + UNSAFE.putInt(entry, ENTRY_MIN, entryMin); + UNSAFE.putInt(entry, ENTRY_MAX, entryMax); + + UNSAFE.putLong(entry, ENTRY_SUM, sum); + UNSAFE.putInt(entry, ENTRY_COUNT, count); + return entry; + } + + private static String entryToName(final byte[] entry) { + // Get the length from memory: + int length = UNSAFE.getByte(entry, ENTRY_LENGTH); + + byte[] name = new byte[length]; + UNSAFE.copyMemory(entry, ENTRY_NAME, name, Unsafe.ARRAY_BYTE_BASE_OFFSET, length); + + // Create a new String with the existing byte[]: + return new String(name, StandardCharsets.UTF_8).trim(); + } + + private static String entryValuesToString(final byte[] entry) { + return (round(UNSAFE.getInt(entry, ENTRY_MIN)) + + "/" + + round((1.0 * UNSAFE.getLong(entry, ENTRY_SUM)) / + UNSAFE.getInt(entry, ENTRY_COUNT)) + + "/" + + round(UNSAFE.getInt(entry, ENTRY_MAX))); + } - private static double round(double value) { - return Math.round(value) / 10.0; + // Print a piece of memory: + // For debug. + private static String printMemory(final Object target, final long address, int length) { + String result = ""; + for (int i = 0; i < length; i++) { + result += (char) UNSAFE.getByte(target, address + i); } + return result; } - // Only parse the String at the final end, when we have only the needed entries left that we need to output: - private static String turnLongArrayIntoString(final long[] data, final int length) { - // Create our target byte[] - final byte[] bytes = new byte[length]; - // The power of magic allows us to just copy the memory in there. - UNSAFE.copyMemory(data, Unsafe.ARRAY_LONG_BASE_OFFSET, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, length); - // And construct a String() - return new String(bytes, StandardCharsets.UTF_8); + // Print a piece of memory: + // For debug. + private static String printMemory(final long value, int length) { + String result = ""; + for (int i = 0; i < length; i++) { + result += (char) ((value >> (i << 3)) & 0xFF); + } + return result; } - private static Entry createNewEntry(final long fromAddress, final int lengthLongs, final byte lengthBytes, final int temp) { - // Make a copy of our working buffer, store this in a new Entry: - final long[] bufferCopy = new long[lengthLongs]; - // Just copy everything over, bytes into the long[] - UNSAFE.copyMemory(null, fromAddress, bufferCopy, Unsafe.ARRAY_BYTE_BASE_OFFSET, lengthBytes); - return new Entry(bufferCopy, lengthBytes, temp); + private static double round(final double value) { + return Math.round(value) / 10.0; } - private static final int TABLE_SIZE = 1 << 19; - private static final int TABLE_MASK = (TABLE_SIZE - 1); + private static final class Reader { - private static Entry[] processMemoryArea(final long fromAddress, final long toAddress) { + private long ptr; + private long readBuffer1; + private long readBuffer2; - int packedBytes; - long hash; - long ptr = fromAddress; - long word; - long mask; + private long hash; + private long entryStart; + private int entryLength; // in bytes rounded to nearest 16 - final Entry[] table = new Entry[TABLE_SIZE]; + private final long endAddress; - // Go from start to finish address through the bytes: - while (ptr < toAddress) { + Reader(final long startAddress, final long endAddress, final boolean isFileStart) { - final long startAddress = ptr; + this.ptr = startAddress; + this.endAddress = endAddress; - packedBytes = 1; + // Adjust start to next delimiter: + if (!isFileStart) { + ptr--; + while (ptr < endAddress) { + if (UNSAFE.getByte(ptr++) == '\n') { + break; + } + } + } + } + + private void processStart() { hash = 0; - word = UNSAFE.getLong(ptr); - mask = getDelimiterMask(word); - - // Removed writing to a buffer here, why would we, we know the address and we'll need to check there anyway. - while (mask == 0) { - // If the mask is zero, we have no ';' - packedBytes++; - // So we continue building the hash: - hash ^= word; - ptr += 8; - - // And getting a new value and mask: - word = UNSAFE.getLong(ptr); - mask = getDelimiterMask(word); + entryStart = ptr; + entryLength = 0; + } + + private boolean hasNext() { + return (ptr < endAddress); + } + + private static final long DELIMITER_MASK = 0x3B3B3B3B3B3B3B3BL; + + private boolean readNext() { + + long lastRead = UNSAFE.getLong(ptr); + + entryLength += 16; + + // Find delimiter and create mask for long1 + long comparisonResult1 = (lastRead ^ DELIMITER_MASK); + long highBitMask1 = (comparisonResult1 - 0x0101010101010101L) & (~comparisonResult1 & 0x8080808080808080L); + + boolean noContent1 = highBitMask1 == 0; + long mask1 = noContent1 ? 0 : ~((highBitMask1 >>> 7) - 1); + int position1 = noContent1 ? 0 : 1 + (Long.numberOfTrailingZeros(highBitMask1) >> 3); + + readBuffer1 = lastRead & ~mask1; + hash ^= readBuffer1; + + int delimiter1 = position1 == 0 ? 0 : position1; // not nnecessary, but faster? + + if (delimiter1 != 0) { + hash ^= hash >> 32; + readBuffer2 = 0; + ptr += delimiter1; + return false; } - // Found delimiter: - final int delimiterByte = Long.numberOfTrailingZeros(mask); - final long delimiterAddress = ptr + (delimiterByte >> 3); + lastRead = UNSAFE.getLong(ptr + 8); + + // Repeat for long2 + long comparisonResult2 = (lastRead ^ DELIMITER_MASK); + long highBitMask2 = (comparisonResult2 - 0x0101010101010101L) & (~comparisonResult2 & 0x8080808080808080L); + boolean noContent2 = highBitMask2 == 0; + long mask2 = noContent2 ? 0 : ~((highBitMask2 >>> 7) - 1); + int position2 = noContent2 ? 0 : 1 + (Long.numberOfTrailingZeros(highBitMask2) >> 3); + + // Apply masks + readBuffer2 = lastRead & ~mask2; + hash ^= readBuffer2; + + int delimiter2 = position2 == 0 ? 0 : position2 + 8; // not necessary, but faster? - // Finish the masks and hash: - final long partialWord = word & ((mask >>> 7) - 1); - hash ^= partialWord; + hash ^= hash >> 32; - // Read a long value from memory starting from the delimiter + 1, the number part: - final long numberBytes = UNSAFE.getLong(delimiterAddress + 1); + if (delimiter2 != 0) { + ptr += delimiter2; + return false; + } + ptr += 16; + return true; + } + + private int processEndAndGetTemperature() { + finalizeHash(); + return readTemperature(); + } + + private void finalizeHash() { + hash ^= hash >> 17; // extra entropy + } + + private static final long DOT_BITS = 0x10101000; + private static final long MAGIC_MULTIPLIER = (100 * 0x1000000 + 10 * 0x10000 + 1); + + // Awesome idea of merykitty: + private int readTemperature() { + // This is the number part: X.X, -X.X, XX.x or -XX.X + final long numberBytes = UNSAFE.getLong(ptr); final long invNumberBytes = ~numberBytes; - // Adjust our pointer - final int decimalSepPos = Long.numberOfTrailingZeros(invNumberBytes & DOT_BITS); - ptr = delimiterAddress + (decimalSepPos >> 3) + 4; + final int dotPosition = Long.numberOfTrailingZeros(invNumberBytes & DOT_BITS); + + // Calculates the sign + final long signed = (invNumberBytes << 59) >> 63; + final int min28 = (dotPosition ^ 0b11100); + final long minusFilter = ~(signed & 0xFF); + // Use the pre-calculated decimal position to adjust the values + final long digits = ((numberBytes & minusFilter) << min28) & 0x0F000F0F00L; + + // Update the pointer here, bit awkward, but we have all the data + ptr += (dotPosition >> 3) + 3; + + // Multiply by a magic (100 * 0x1000000 + 10 * 0x10000 + 1), to get the result + final long absValue = ((digits * MAGIC_MULTIPLIER) >>> 32) & 0x3FF; + // And perform abs() + return (int) ((absValue + signed) ^ signed); // non-patented method of doing the same trick + } + + private boolean matches(final byte[] entry) { + int step = 0; + for (; step < entryLength - 16;) { + if (compare(null, entryStart + step, entry, ENTRY_NAME + step)) { + return false; + } + step += 8; + } + if (compare(readBuffer1, entry, ENTRY_NAME + step)) { + return false; + } + step += 8; + if (compare(readBuffer2, entry, ENTRY_NAME + step)) { + return false; + } + return true; + } + + private boolean matches16(final byte[] entry) { + if (compare(readBuffer1, entry, ENTRY_NAME)) { + return false; + } + if (compare(readBuffer2, entry, ENTRY_NAME + 8)) { + return false; + } + return true; + } + } + + private static byte[][] processMemoryArea(final long startAddress, final long endAddress, boolean isFileStart) { + + final byte[][] table = new byte[TABLE_SIZE][]; + final byte[][] preConstructedEntries = new byte[PREMADE_ENTRIES][ENTRY_BASESIZE_WHITESPACE + PREMADE_MAX_SIZE]; + + final Reader reader = new Reader(startAddress, endAddress, isFileStart); - // Calculate the final hash and index of the table: - int intHash = (int) (hash ^ (hash >> 32)); - intHash = intHash ^ (intHash >> 17); - int index = intHash & TABLE_MASK; + byte[] entry; + int entryCount = 0; + + // Find the correct starting position + while (reader.hasNext()) { + + reader.processStart(); + + if (!reader.readNext()) { + // First 16 bytes: + + int temperature = reader.processEndAndGetTemperature(); + + // Find or insert the entry: + int index = (int) (reader.hash & TABLE_MASK); + while (true) { + entry = table[index]; + if (entry == null) { + byte[] entryBytes = (entryCount < PREMADE_ENTRIES) ? preConstructedEntries[entryCount++] + : new byte[ENTRY_BASESIZE_WHITESPACE + 16]; // with enough room + table[index] = fillEntry16(entryBytes, 16, temperature, reader.readBuffer1, reader.readBuffer2); + break; + } + else if (reader.matches16(entry)) { + updateEntry(entry, temperature); + break; + } + else { + // Move to the next index + index = (index + 1) & TABLE_MASK; + } + } + continue; + } + while (reader.readNext()) + ; + + int temperature = reader.processEndAndGetTemperature(); // Find or insert the entry: + int index = (int) (reader.hash & TABLE_MASK); while (true) { - Entry tableEntry = table[index]; - if (tableEntry == null) { - final int temp = extractTemp(decimalSepPos, invNumberBytes, numberBytes); - // Create a new entry: - final byte length = (byte) (delimiterAddress - startAddress); - table[index] = createNewEntry(startAddress, packedBytes, length, temp); + entry = table[index]; + if (entry == null) { + int length = reader.entryLength; + byte[] entryBytes = (length < PREMADE_MAX_SIZE && entryCount < PREMADE_ENTRIES) ? preConstructedEntries[entryCount++] + : new byte[ENTRY_BASESIZE_WHITESPACE + length]; // with enough room + table[index] = fillEntry(entryBytes, reader.entryStart, length, temperature, reader.readBuffer1, reader.readBuffer2); break; } - // Don't bother re-checking things here like hash or length. - // we'll need to check the content anyway if it's a hit, which is most times - else if (memoryEqualsEntry(startAddress, tableEntry.data, partialWord, packedBytes)) { - // temperature, you're not temporary my friend - final int temp = extractTemp(decimalSepPos, invNumberBytes, numberBytes); - // No differences, same entry: - tableEntry.updateWith(temp); + else if (reader.matches(entry)) { + updateEntry(entry, temperature); break; } - // Move to the next in the table, linear probing: - index = (index + 1) & TABLE_MASK; + else { + // Move to the next index + index = (index + 1) & TABLE_MASK; + } } } return table; } + private static boolean compare(final Object object1, final long address1, final Object object2, final long address2) { + return UNSAFE.getLong(object1, address1) != UNSAFE.getLong(object2, address2); + } + + private static boolean compare(final long value1, final Object object2, final long address2) { + return value1 != UNSAFE.getLong(object2, address2); + } + /* * `___` ___ ___ _ ___` ` ___ ` _ ` _ ` _` ___ * / ` \| _ \ __| \| \ \ / /_\ | | | | | | __| @@ -277,52 +538,16 @@ else if (memoryEqualsEntry(startAddress, tableEntry.data, partialWord, packedByt * ---------------- BETTER SOFTWARE, FASTER -- * * https://www.openvalue.eu/ - * - * Made you look. - * */ - private static final long DOT_BITS = 0x10101000; - private static final long MAGIC_MULTIPLIER = (100 * 0x1000000 + 10 * 0x10000 + 1); - - private static int extractTemp(final int decimalSepPos, final long invNumberBits, final long numberBits) { - // Awesome idea of merykitty: - int min28 = (28 - decimalSepPos); - // Calculates the sign - final long signed = (invNumberBits << 59) >> 63; - final long minusFilter = ~(signed & 0xFF); - // Use the pre-calculated decimal position to adjust the values - final long digits = ((numberBits & minusFilter) << min28) & 0x0F000F0F00L; - // Multiply by a magic (100 * 0x1000000 + 10 * 0x10000 + 1), to get the result - final long absValue = ((digits * MAGIC_MULTIPLIER) >>> 32) & 0x3FF; - // And perform abs() - final int temp = (int) ((absValue + signed) ^ signed); // non-patented method of doing the same trick - return temp; - } - - private static final long SEPARATOR_PATTERN = 0x3B3B3B3B3B3B3B3BL; - - // Takes a long and finds the bytes where this exact pattern is present. - // Cool bit manipulation technique: SWAR (SIMD as a Register). - private static long getDelimiterMask(final long word) { - final long match = word ^ SEPARATOR_PATTERN; - return (match - 0x0101010101010101L) & (~match & 0x8080808080808080L); - // I've put some brackets separating the first and second part, this is faster. - // Now they run simultaneous after 'match' is altered, instead of waiting on each other. - } - - /** - * For case multiple hashes are equal (however unlikely) check the actual key (using longs) - */ - private static boolean memoryEqualsEntry(final long startAddress, final long[] entry, final long finalBytes, final int amountLong) { - for (int i = 0; i < (amountLong - 1); i++) { - int step = i << 3; // step by 8 bytes - if (UNSAFE.getLong(startAddress + step) != entry[i]) - return false; + private static Unsafe initUnsafe() { + try { + final Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + return (Unsafe) theUnsafe.get(Unsafe.class); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); } - // If all previous 'whole' 8-packed byte-long values are equal - // We still need to check the final bytes that don't fit. - // and we've already calculated them for the hash. - return finalBytes == entry[amountLong - 1]; } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_serkan_ozal.java b/src/main/java/dev/morling/onebrc/CalculateAverage_serkan_ozal.java new file mode 100644 index 000000000..e4f5aaa82 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_serkan_ozal.java @@ -0,0 +1,854 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; +import sun.misc.Unsafe; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.reflect.Field; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.TreeMap; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +/** + * @author serkan-ozal + */ +public class CalculateAverage_serkan_ozal { + + private static final String FILE = System.getProperty("file.path", "./measurements.txt"); + + private static final VectorSpecies BYTE_SPECIES = ByteVector.SPECIES_PREFERRED.length() >= 16 + // Since majority (99%) of the city names <= 16 bytes, according to my experiments, + // 128 bit (16 byte) vectors perform better than 256 bit (32 byte) or 512 bit (64 byte) vectors + // even though supported by platform. + ? ByteVector.SPECIES_128 + : ByteVector.SPECIES_64; + private static final int BYTE_SPECIES_SIZE = BYTE_SPECIES.vectorByteSize(); + private static final MemorySegment NULL = MemorySegment.NULL.reinterpret(Long.MAX_VALUE); + private static final ByteOrder NATIVE_BYTE_ORDER = ByteOrder.nativeOrder(); + + private static final char NEW_LINE_SEPARATOR = '\n'; + private static final char KEY_VALUE_SEPARATOR = ';'; + private static final int MAX_LINE_LENGTH = 128; + + // Get configurations + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + private static final boolean VERBOSE = false; // getBooleanConfig("VERBOSE", false); + private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors(); // getIntegerConfig("THREAD_COUNT", Runtime.getRuntime().availableProcessors()); + private static final boolean USE_VTHREADS = false; // getBooleanConfig("USE_VTHREADS", false); + private static final int VTHREAD_COUNT = 1024; // getIntegerConfig("VTHREAD_COUNT", 1024); + private static final int REGION_COUNT = 256; // getIntegerConfig("REGION_COUNT", -1); + private static final boolean USE_SHARED_ARENA = true; // getBooleanConfig("USE_SHARED_ARENA", true); + private static final boolean USE_SHARED_REGION = true; // getBooleanConfig("USE_SHARED_REGION", true); + private static final int MAP_CAPACITY = 1 << 17; // getIntegerConfig("MAP_CAPACITY", 1 << 17); + private static final boolean CLOSE_STDOUT_ON_RESULT = true; // getBooleanConfig("CLOSE_STDOUT_ON_RESULT", true); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + // My dear old friend Unsafe + private static final Unsafe U; + + static { + try { + Field f = Unsafe.class.getDeclaredField("theUnsafe"); + f.setAccessible(true); + U = (Unsafe) f.get(null); + } + catch (Exception e) { + throw new IllegalStateException(e); + } + } + + public static void main(String[] args) throws Exception { + long start = System.currentTimeMillis(); + if (VERBOSE) { + System.out.println("Processing started at " + start); + System.out.println("Vector byte size: " + BYTE_SPECIES.vectorByteSize()); + System.out.println("Use shared memory arena: " + USE_SHARED_ARENA); + if (USE_VTHREADS) { + System.out.println("Virtual thread count: " + VTHREAD_COUNT); + } + else { + System.out.println("Thread count: " + THREAD_COUNT); + } + System.out.println("Map capacity: " + MAP_CAPACITY); + } + + int concurrency = USE_VTHREADS ? VTHREAD_COUNT : THREAD_COUNT; + int regionCount = REGION_COUNT > 0 ? REGION_COUNT : concurrency; + ByteBuffer lineBuffer = getByteBuffer(MAX_LINE_LENGTH); + Result result = new Result(); + + RandomAccessFile file = new RandomAccessFile(FILE, "r"); + FileChannel fc = file.getChannel(); + Arena arena = USE_SHARED_ARENA ? Arena.ofShared() : null; + try { + long fileSize = fc.size(); + long regionSize = fileSize / regionCount; + long startPos = 0; + ExecutorService executor = USE_VTHREADS + ? Executors.newVirtualThreadPerTaskExecutor() + : Executors.newFixedThreadPool(concurrency, new RegionProcessorThreadFactory()); + MemorySegment region = null; + if (USE_SHARED_REGION) { + arena = Arena.ofShared(); + region = fc.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, arena); + } + + List tasks = new ArrayList<>(regionCount); + // Split whole file into regions and create tasks for each region + List> futures = new ArrayList<>(regionCount); + for (int i = 0; i < regionCount; i++) { + long endPos = Math.min(fileSize, startPos + regionSize); + // Lines might split into different regions. + // If so, move back to the line starting at the end of previous region + long closestLineEndPos = (i < regionCount - 1) + ? findClosestLineEnd(fc, endPos, lineBuffer) + : fileSize; + Task task = new Task(fc, region, startPos, closestLineEndPos); + tasks.add(task); + startPos = closestLineEndPos; + } + + Queue sharedTasks = new ConcurrentLinkedQueue<>(tasks); + + // Start region processors to process tasks for each region + for (int i = 0; i < concurrency; i++) { + Request request = new Request(arena, sharedTasks, result); + RegionProcessor regionProcessor = createRegionProcessor(request); + Future future = executor.submit(regionProcessor); + futures.add(future); + } + + // Wait processors to complete + for (Future future : futures) { + future.get(); + } + + long finish = System.currentTimeMillis(); + if (VERBOSE) { + System.out.println("Processing completed at " + finish); + System.out.println("Processing completed in " + (finish - start) + " milliseconds"); + } + + // Print result to stdout + result.print(); + + if (CLOSE_STDOUT_ON_RESULT) { + // After printing result, close stdout. + // So parent process can complete without waiting this process completed. + // Saves a few hundred milliseconds caused by unmap. + System.out.close(); + } + } + finally { + // Close memory arena if it is managed globally here (shared arena) + if (arena != null) { + arena.close(); + } + fc.close(); + if (VERBOSE) { + long finish = System.currentTimeMillis(); + System.out.println("All completed at " + finish); + System.out.println("All Completed in " + ((finish - start)) + " milliseconds"); + } + } + } + + private static boolean getBooleanConfig(String envVarName, boolean defaultValue) { + String envVarValue = System.getenv(envVarName); + if (envVarValue == null) { + return defaultValue; + } + else { + return Boolean.parseBoolean(envVarValue); + } + } + + private static int getIntegerConfig(String envVarName, int defaultValue) { + String envVarValue = System.getenv(envVarName); + if (envVarValue == null) { + return defaultValue; + } + else { + return Integer.parseInt(envVarValue); + } + } + + private static ByteBuffer getByteBuffer(int size) { + ByteBuffer bb = ByteBuffer.allocateDirect(size); + bb.order(NATIVE_BYTE_ORDER); + return bb; + } + + private static long findClosestLineEnd(FileChannel fc, long endPos, ByteBuffer lineBuffer) throws IOException { + long lineCheckStartPos = Math.max(0, endPos - MAX_LINE_LENGTH); + lineBuffer.rewind(); + fc.read(lineBuffer, lineCheckStartPos); + int i = MAX_LINE_LENGTH; + while (lineBuffer.get(i - 1) != NEW_LINE_SEPARATOR) { + i--; + } + return lineCheckStartPos + i; + } + + private static RegionProcessor createRegionProcessor(Request request) { + return new RegionProcessor(request); + } + + private static class RegionProcessorThreadFactory implements ThreadFactory { + + @Override + public Thread newThread(Runnable r) { + Thread t = new Thread(r); + t.setDaemon(true); + t.setPriority(Thread.MAX_PRIORITY); + return t; + } + + } + + /** + * Region processor + */ + private static class RegionProcessor implements Callable { + + private final Arena arena; + private final Queue sharedTasks; + private final Result result; + private OpenMap map; + + private RegionProcessor(Request request) { + this.arena = request.arena; + this.sharedTasks = request.sharedTasks; + this.result = request.result; + } + + @Override + public Response call() throws Exception { + if (VERBOSE) { + System.out.println("[Processor-" + Thread.currentThread().getName() + "] Processing started at " + System.currentTimeMillis()); + } + try { + processRegion(); + return new Response(map); + } + finally { + if (VERBOSE) { + System.out.println("[Processor-" + Thread.currentThread().getName() + "] Processing finished at " + System.currentTimeMillis()); + } + } + } + + private void processRegion() throws Exception { + // Create map in its own thread + this.map = new OpenMap(); + + boolean arenaGiven = arena != null; + // If no shared global memory arena is used, create and use its own local memory arena + Arena a = arenaGiven ? arena : Arena.ofConfined(); + try { + for (Task task = sharedTasks.poll(); task != null; task = sharedTasks.poll()) { + boolean regionGiven = task.region != null; + MemorySegment r = regionGiven + ? task.region + : task.fileChannel.map(FileChannel.MapMode.READ_ONLY, task.start, task.size, a); + long regionStart = regionGiven ? (r.address() + task.start) : r.address(); + long regionEnd = regionStart + task.size; + + doProcessRegion(regionStart, regionEnd); + } + + if (VERBOSE) { + System.out.println("[Processor-" + Thread.currentThread().getName() + "] Region processed at " + System.currentTimeMillis()); + } + + // Some threads/processors might finish slightly before others. + // So, instead of releasing their cores idle, merge their own results here. + + // If there is no another processor merging its results now, merge now. + // Otherwise (there is already another thread/processor got the lock of merging), + // Close current processor's own local memory arena (if no shared global memory arena is used) now + // and merge its own results after then. + + boolean merged = result.tryMergeInto(map); + if (VERBOSE && merged) { + System.out.println("[Processor-" + Thread.currentThread().getName() + "] Result merged at " + System.currentTimeMillis()); + } + if (!merged) { + if (!arenaGiven) { + a.close(); + a = null; + if (VERBOSE) { + System.out.println("[Processor-" + Thread.currentThread().getName() + "] Arena closed at " + System.currentTimeMillis()); + } + } + result.mergeInto(map); + if (VERBOSE) { + System.out.println("[Processor-" + Thread.currentThread().getName() + "] Result merged at " + System.currentTimeMillis()); + } + } + } + finally { + // If local memory arena is managed here and not closed yet, close it here + if (!arenaGiven && a != null) { + a.close(); + if (VERBOSE) { + System.out.println("[Processor-" + Thread.currentThread().getName() + "] Arena closed at " + System.currentTimeMillis()); + } + } + } + } + + private long findClosestLineEnd(long endPos, long minPos) { + int i = 0; + int maxI = Math.min(MAX_LINE_LENGTH, (int) (endPos - minPos)); + while (i <= maxI && U.getByte(endPos - i) != NEW_LINE_SEPARATOR) { + i++; + } + return endPos - i + 1; + } + + // Credits: merykitty + private long extractValue(long regionPtr, long word, OpenMap map, int entryOffset) { + // Parse and extract value + + // 1. level instruction set (no dependency between each other so can be run in parallel) + long signed = (~word << 59) >> 63; + int decimalSepPos = Long.numberOfTrailingZeros(~word & 0x10101000); + + // 2. level instruction set (no dependency between each other so can be run in parallel) + long nextPtr = regionPtr + (decimalSepPos >>> 3) + 3; + int shift = 28 - decimalSepPos; + long designMask = ~(signed & 0xFF); + + long digits = ((word & designMask) << shift) & 0x0F000F0F00L; + long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; + int value = (int) ((absValue ^ signed) - signed); + + // Put extracted value into map + map.putValue(entryOffset, value); + + // Return new position + return nextPtr; + } + + private void doProcessRegion(long regionStart, long regionEnd) { + final long size = regionEnd - regionStart; + final long segmentSize = size / 2; + + final long regionStart1 = regionStart; + final long regionEnd1 = Math.max(regionStart1, findClosestLineEnd(regionStart1 + segmentSize, regionStart)); + + final long regionStart2 = regionEnd1; + final long regionEnd2 = regionEnd; + + long regionPtr1, regionPtr2; + + // Read and process region - main + // Inspired by: @jerrinot + // - two lines at a time (according to my experiment, this is optimum value in terms of register spilling) + // - most of the implementation is inlined + // - so get the benefit of ILP (Instruction Level Parallelism) better + for (regionPtr1 = regionStart1, regionPtr2 = regionStart2; regionPtr1 < regionEnd1 && regionPtr2 < regionEnd2;) { + // Search key/value separators and find keys' start and end positions + //////////////////////////////////////////////////////////////////////////////////////////////////////// + long keyStartPtr1 = regionPtr1; + long keyStartPtr2 = regionPtr2; + + ByteVector keyVector1 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr1, NATIVE_BYTE_ORDER); + ByteVector keyVector2 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr2, NATIVE_BYTE_ORDER); + + int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue(); + int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue(); + + if (keyLength1 != BYTE_SPECIES_SIZE && keyLength2 != BYTE_SPECIES_SIZE) { + regionPtr1 += (keyLength1 + 1); + regionPtr2 += (keyLength2 + 1); + } + else { + if (keyLength1 != BYTE_SPECIES_SIZE) { + regionPtr1 += (keyLength1 + 1); + } + else { + regionPtr1 += BYTE_SPECIES_SIZE; + for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++) + ; + keyLength1 = (int) (regionPtr1 - keyStartPtr1); + regionPtr1++; + } + if (keyLength2 != BYTE_SPECIES_SIZE) { + regionPtr2 += (keyLength2 + 1); + } + else { + regionPtr2 += BYTE_SPECIES_SIZE; + for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++) + ; + keyLength2 = (int) (regionPtr2 - keyStartPtr2); + regionPtr2++; + } + } + + // Read first words as they will be used while extracting values later + long word1 = U.getLong(regionPtr1); + long word2 = U.getLong(regionPtr2); + if (NATIVE_BYTE_ORDER == ByteOrder.BIG_ENDIAN) { + word1 = Long.reverseBytes(word1); + word2 = Long.reverseBytes(word2); + } + //////////////////////////////////////////////////////////////////////////////////////////////////////// + + // Calculate key hashes and find entry indexes + //////////////////////////////////////////////////////////////////////////////////////////////////////// + int x1, y1, x2, y2; + if (keyLength1 > 3 && keyLength2 > 3) { + x1 = U.getInt(keyStartPtr1); + y1 = U.getInt(regionPtr1 - 5); + x2 = U.getInt(keyStartPtr2); + y2 = U.getInt(regionPtr2 - 5); + } + else { + if (keyLength1 > 3) { + x1 = U.getInt(keyStartPtr1); + y1 = U.getInt(regionPtr1 - 5); + } + else { + x1 = U.getByte(keyStartPtr1); + y1 = U.getByte(regionPtr1 - 2); + } + if (keyLength2 > 3) { + x2 = U.getInt(keyStartPtr2); + y2 = U.getInt(regionPtr2 - 5); + } + else { + x2 = U.getByte(keyStartPtr2); + y2 = U.getByte(regionPtr2 - 2); + } + } + + int keyHash1 = (Integer.rotateLeft(x1 * OpenMap.HASH_SEED, OpenMap.HASH_ROTATE) ^ y1) * OpenMap.HASH_SEED; + int keyHash2 = (Integer.rotateLeft(x2 * OpenMap.HASH_SEED, OpenMap.HASH_ROTATE) ^ y2) * OpenMap.HASH_SEED; + + int entryIdx1 = (keyHash1 & OpenMap.ENTRY_HASH_MASK) << OpenMap.ENTRY_SIZE_SHIFT; + int entryIdx2 = (keyHash2 & OpenMap.ENTRY_HASH_MASK) << OpenMap.ENTRY_SIZE_SHIFT; + //////////////////////////////////////////////////////////////////////////////////////////////////////// + + // Put keys and calculate entry offsets to put values + //////////////////////////////////////////////////////////////////////////////////////////////////////// + int entryOffset1 = map.putKey(keyVector1, keyStartPtr1, keyLength1, entryIdx1); + int entryOffset2 = map.putKey(keyVector2, keyStartPtr2, keyLength2, entryIdx2); + //////////////////////////////////////////////////////////////////////////////////////////////////////// + + // Extract values by parsing and put them into map + //////////////////////////////////////////////////////////////////////////////////////////////////////// + regionPtr1 = extractValue(regionPtr1, word1, map, entryOffset1); + regionPtr2 = extractValue(regionPtr2, word2, map, entryOffset2); + //////////////////////////////////////////////////////////////////////////////////////////////////////// + } + + // Read and process region - tail + doProcessTail(regionPtr1, regionEnd1, regionPtr2, regionEnd2); + } + + private void doProcessTail(long regionPtr1, long regionEnd1, long regionPtr2, long regionEnd2) { + while (regionPtr1 < regionEnd1) { + long keyStartPtr1 = regionPtr1; + ByteVector keyVector1 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr1, NATIVE_BYTE_ORDER); + int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue(); + if (keyLength1 != BYTE_SPECIES_SIZE) { + regionPtr1 += (keyLength1 + 1); + } + else { + regionPtr1 += BYTE_SPECIES_SIZE; + for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++) + ; + keyLength1 = (int) (regionPtr1 - keyStartPtr1); + regionPtr1++; + } + int entryIdx1 = map.calculateEntryIndex(keyStartPtr1, keyLength1); + int entryOffset1 = map.putKey(keyVector1, keyStartPtr1, keyLength1, entryIdx1); + long word1 = U.getLong(regionPtr1); + if (NATIVE_BYTE_ORDER == ByteOrder.BIG_ENDIAN) { + word1 = Long.reverseBytes(word1); + } + regionPtr1 = extractValue(regionPtr1, word1, map, entryOffset1); + } + while (regionPtr2 < regionEnd2) { + long keyStartPtr2 = regionPtr2; + ByteVector keyVector2 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr2, NATIVE_BYTE_ORDER); + int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue(); + if (keyLength2 != BYTE_SPECIES_SIZE) { + regionPtr2 += (keyLength2 + 1); + } + else { + regionPtr2 += BYTE_SPECIES_SIZE; + for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++) + ; + keyLength2 = (int) (regionPtr2 - keyStartPtr2); + regionPtr2++; + } + int entryIdx2 = map.calculateEntryIndex(keyStartPtr2, keyLength2); + int entryOffset2 = map.putKey(keyVector2, keyStartPtr2, keyLength2, entryIdx2); + long word2 = U.getLong(regionPtr2); + if (NATIVE_BYTE_ORDER == ByteOrder.BIG_ENDIAN) { + word2 = Long.reverseBytes(word2); + } + regionPtr2 = extractValue(regionPtr2, word2, map, entryOffset2); + } + } + + } + + /** + * Region processor task + */ + private static final class Task { + + private final FileChannel fileChannel; + private final MemorySegment region; + private final long start; + private final long end; + private final long size; + + private Task(FileChannel fileChannel, MemorySegment region, long start, long end) { + this.fileChannel = fileChannel; + this.region = region; + this.start = start; + this.end = end; + this.size = end - start; + } + + } + + /** + * Region processor request + */ + private static final class Request { + + private final Arena arena; + private final Queue sharedTasks; + private final Result result; + + private Request(Arena arena, Queue sharedTasks, Result result) { + this.arena = arena; + this.sharedTasks = sharedTasks; + this.result = result; + } + + } + + /** + * Region processor response + */ + private static final class Response { + + private final OpenMap map; + + private Response(OpenMap map) { + this.map = map; + } + + } + + /** + * Result of each key (city) + */ + private static final class KeyResult { + + private int count; + private int minValue; + private int maxValue; + private long sum; + + private KeyResult(int count, int minValue, int maxValue, long sum) { + this.count = count; + this.minValue = minValue; + this.maxValue = maxValue; + this.sum = sum; + } + + private void merge(KeyResult result) { + count += result.count; + minValue = Math.min(minValue, result.minValue); + maxValue = Math.max(maxValue, result.maxValue); + sum += result.sum; + } + + @Override + public String toString() { + return (minValue / 10.0) + "/" + round(sum / (double) (count * 10)) + "/" + (maxValue / 10.0); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + } + + /** + * Global result + */ + private static final class Result { + + private final Lock lock = new ReentrantLock(); + private final Map resultMap; + + private Result() { + this.resultMap = new TreeMap<>(); + } + + private boolean tryMergeInto(OpenMap map) { + // Use lock (not "synchronized" block) to be virtual threads friendly + if (!lock.tryLock()) { + return false; + } + try { + map.merge(this.resultMap); + return true; + } + finally { + lock.unlock(); + } + } + + private void mergeInto(OpenMap map) { + // Use lock (not "synchronized" block) to be virtual threads friendly + lock.lock(); + try { + map.merge(this.resultMap); + } + finally { + lock.unlock(); + } + } + + private void print() { + StringBuilder sb = new StringBuilder(1 << 14); + boolean firstEntryAppended = false; + sb.append("{"); + for (Map.Entry e : resultMap.entrySet()) { + if (firstEntryAppended) { + sb.append(", "); + } + String key = e.getKey(); + KeyResult value = e.getValue(); + sb.append(key).append("=").append(value); + firstEntryAppended = true; + } + sb.append('}'); + System.out.println(sb); + } + + } + + /** + * Custom map implementation to store results + */ + private static final class OpenMap { + + // Layout + // ================================ + // 0 : 4 bytes - count + // 4 : 2 bytes - min value + // 6 : 2 bytes - max value + // 8 : 8 bytes - value sum + // 16 : 4 bytes - key size + // 20 : 4 bytes - padding + // 24 : 100 bytes - key + // 124 : 4 bytes - padding + // ================================ + // 128 bytes - total + + private static final int ENTRY_SIZE = 128; + private static final int ENTRY_SIZE_SHIFT = 7; + + private static final int COUNT_OFFSET = 0; + private static final int MIN_VALUE_OFFSET = 4; + private static final int MAX_VALUE_OFFSET = 6; + private static final int VALUE_SUM_OFFSET = 8; + private static final int KEY_SIZE_OFFSET = 16; + private static final int KEY_OFFSET = 24; + + private static final int ENTRY_HASH_MASK = MAP_CAPACITY - 1; + private static final int MAP_SIZE = ENTRY_SIZE * MAP_CAPACITY; + private static final int ENTRY_MASK = MAP_SIZE - 1; + private static final int KEY_ARRAY_OFFSET = KEY_OFFSET - Unsafe.ARRAY_BYTE_BASE_OFFSET; + + private static final int HASH_SEED = 0x9E3779B9; + private static final int HASH_ROTATE = 5; + + private final byte[] data; + private final int[] entryOffsets; + private int entryOffsetIdx; + + private OpenMap() { + this.data = new byte[MAP_SIZE]; + // Max number of unique keys are 10K, so 1 << 14 (16384) is long enough to hold offsets for all of them + this.entryOffsets = new int[1 << 14]; + this.entryOffsetIdx = 0; + } + + // Credits: merykitty + private int calculateEntryIndex(long address, int keyLength) { + int x, y; + if (keyLength >= Integer.BYTES) { + x = U.getInt(address); + y = U.getInt(address + keyLength - Integer.BYTES); + } + else { + x = U.getByte(address); + y = U.getByte(address + keyLength - Byte.BYTES); + } + // Calculate key hash + int keyHash = (Integer.rotateLeft(x * HASH_SEED, HASH_ROTATE) ^ y) * HASH_SEED; + // Get the position of the entry in the linear map based on calculated hash + return (keyHash & ENTRY_HASH_MASK) << ENTRY_SIZE_SHIFT; + } + + private int putKey(ByteVector keyVector, long keyStartAddress, int keyLength, int entryIdx) { + // Start searching from the calculated position + // and continue until find an available slot in case of hash collision + // TODO Prevent infinite loop if all the slots are in use for other keys + for (int entryOffset = Unsafe.ARRAY_BYTE_BASE_OFFSET + entryIdx;; entryOffset = (entryOffset + ENTRY_SIZE) & ENTRY_MASK) { + int keySize = U.getInt(data, entryOffset + KEY_SIZE_OFFSET); + // Check whether current index is empty (no another key is inserted yet) + if (keySize == 0) { + // Initialize entry slot for new key + U.putShort(data, entryOffset + MIN_VALUE_OFFSET, Short.MAX_VALUE); + U.putShort(data, entryOffset + MAX_VALUE_OFFSET, Short.MIN_VALUE); + U.putInt(data, entryOffset + KEY_SIZE_OFFSET, keyLength); + U.copyMemory(null, keyStartAddress, data, entryOffset + KEY_OFFSET, keyLength); + entryOffsets[entryOffsetIdx++] = entryOffset; + return entryOffset; + } + // Check for hash collision (hashes are same, but keys are different). + // If there is no collision (both hashes and keys are equals), return current slot's offset. + // Otherwise, continue iterating until find an available slot. + if (keySize == keyLength && keysEqual(keyVector, keyStartAddress, keyLength, entryOffset + KEY_ARRAY_OFFSET)) { + return entryOffset; + } + } + } + + private boolean keysEqual(ByteVector keyVector, long keyStartAddress, int keyLength, int keyStartArrayOffset) { + // Use vectorized search for the comparison of keys. + // Since majority of the city names >= 8 bytes and <= 16 bytes, + // this way is more efficient (according to my experiments) than any other comparisons (byte by byte or 2 longs). + ByteVector entryKeyVector = ByteVector.fromArray(BYTE_SPECIES, data, keyStartArrayOffset); + int eqCount = keyVector.compare(VectorOperators.EQ, entryKeyVector).trueCount(); + if (eqCount == keyLength) { + return true; + } + else if (keyLength <= BYTE_SPECIES_SIZE) { + return false; + } + + // Compare remaining parts of the keys + + int normalizedKeyLength = keyLength; + if (NATIVE_BYTE_ORDER == ByteOrder.BIG_ENDIAN) { + normalizedKeyLength = Integer.reverseBytes(normalizedKeyLength); + } + + long keyStartOffset = keyStartArrayOffset + Unsafe.ARRAY_BYTE_BASE_OFFSET; + int alignedKeyLength = normalizedKeyLength & 0xFFFFFFF8; + int i; + for (i = BYTE_SPECIES_SIZE; i < alignedKeyLength; i += Long.BYTES) { + if (U.getLong(keyStartAddress + i) != U.getLong(data, keyStartOffset + i)) { + return false; + } + } + + long wordA = U.getLong(keyStartAddress + i); + long wordB = U.getLong(data, keyStartOffset + i); + if (NATIVE_BYTE_ORDER == ByteOrder.BIG_ENDIAN) { + wordA = Long.reverseBytes(wordA); + wordB = Long.reverseBytes(wordB); + } + int halfShift = (Long.BYTES - (normalizedKeyLength & 0x00000007)) << 2; + long mask = (0xFFFFFFFFFFFFFFFFL >>> halfShift) >> halfShift; + wordA = wordA & mask; + // No need to mask "wordB" (word from key in the map), because it is already padded with 0s + return wordA == wordB; + } + + private void putValue(int entryOffset, int value) { + int countOffset = entryOffset + COUNT_OFFSET; + int minValueOffset = entryOffset + MIN_VALUE_OFFSET; + int maxValueOffset = entryOffset + MAX_VALUE_OFFSET; + int sumOffset = entryOffset + VALUE_SUM_OFFSET; + + U.putInt(data, countOffset, U.getInt(data, countOffset) + 1); + if (value < U.getShort(data, minValueOffset)) { + U.putShort(data, minValueOffset, (short) value); + } + if (value > U.getShort(data, maxValueOffset)) { + U.putShort(data, maxValueOffset, (short) value); + } + U.putLong(data, sumOffset, U.getLong(data, sumOffset) + value); + } + + private void merge(Map resultMap) { + // Merge this local map into global result map + Arrays.sort(entryOffsets, 0, entryOffsetIdx); + for (int i = 0; i < entryOffsetIdx; i++) { + int entryOffset = entryOffsets[i]; + int keyLength = U.getInt(data, entryOffset + KEY_SIZE_OFFSET); + if (keyLength == 0) { + // No entry is available for this index, so continue iterating + continue; + } + int entryArrayIdx = entryOffset + KEY_OFFSET - Unsafe.ARRAY_BYTE_BASE_OFFSET; + String key = new String(data, entryArrayIdx, keyLength, StandardCharsets.UTF_8); + int count = U.getInt(data, entryOffset + COUNT_OFFSET); + short minValue = U.getShort(data, entryOffset + MIN_VALUE_OFFSET); + short maxValue = U.getShort(data, entryOffset + MAX_VALUE_OFFSET); + long sum = U.getLong(data, entryOffset + VALUE_SUM_OFFSET); + KeyResult result = new KeyResult(count, minValue, maxValue, sum); + KeyResult existingResult = resultMap.get(key); + if (existingResult == null) { + resultMap.put(key, result); + } + else { + existingResult.merge(result); + } + } + } + + } + +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_shipilev.java b/src/main/java/dev/morling/onebrc/CalculateAverage_shipilev.java new file mode 100644 index 000000000..f8b78a050 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_shipilev.java @@ -0,0 +1,729 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.lang.reflect.InaccessibleObjectException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; +import java.util.concurrent.*; +import java.util.function.Supplier; + +public class CalculateAverage_shipilev { + + // Detour: This implementation tries to balance the speed and readability. + // + // While the original contest suggests we pull off every trick in the + // book to get the peak performance, here we set a more pragmatic goal: + // how fast we can get without going too far into hacks. Or, putting it + // in another way, what would be the reasonably fast implementation that + // would *also* pass a code review in a reasonable project, would be usable + // in production without waking people up in the middle of the night, and + // would work through JDK updates, upgrades, and migrations. + // + // To that end, this implementation uses vanilla and standard Java as much + // as possible, without relying on Unsafe tricks and preview features. + // When any non-standard things are used, they are guarded by a feature flag, + // which allows to cleanly turn them off when anything goes off the rails. + // + // For performance reasons, the implementation takes more care to be reliably + // parallel to survive I/O stalls and scheduling oddities. This would not + // show up in laboratory conditions, but it is a necessary thing for a reliable + // code in production. It also tries not to miss simple optimizations without + // going too far into the woods. + // + // Note that some of the magic to run this workload fast in evaluation + // conditions is done separately in the invocation script. Most of that + // is only needed for the short-running scenarios. In real life, this code + // would likely run well without any of that. + // + + // ========================= Tunables ========================= + + // Workload data file. + private static final String FILE = "./measurements.txt"; + + // Max distance to search for line separator when scanning for line + // boundaries. 100 bytes name should fit into this power-of-two buffer. + // Should probably never change. + private static final int MAX_LINE_LENGTH = 128; + + // Fixed size of the measurements map. Must be the power of two. Should + // be large enough to accomodate all the station names. Rules say there are + // 10K station names max, so anything more than 16K works well. + private static final int MAP_SIZE = 1 << 15; + + // The largest mmap-ed chunk. This can be be Integer.MAX_VALUE, but + // it is normally tuned down to seed the workers with smaller mmap regions + // more efficiently. This also allows to incrementally unmap chunks as we + // complete working on them. + private static final int MMAP_CHUNK_SIZE = Integer.MAX_VALUE / 32; + + // The largest slice as unit of work, processed serially by a worker. + // Set it too low and there would be more tasks and less batching, but + // more parallelism. Set it too high, and the reverse would be true. + // Something around a large page would likely hit the right balance. + private static final int UNIT_SLICE_SIZE = 4 * 1024 * 1024; + + // Employ direct unmapping techniques to alleviate the cost of system + // unmmapping on process termination. This matters for very short runs + // on highly parallel machines. This unfortunately calls into private + // methods of buffers themselves. If not available on target JVM, the + // feature would automatically turn off. + private static final boolean DIRECT_UNMMAPS = true; + + // ========================= Storage ========================= + + // Thread-local measurement maps, each thread gets one. + // This allows workers to work nearly unimpeded without synchronization. + // Even though crude, avoid lambdas here to alleviate startup costs. + private static final ThreadLocal MAPS = ThreadLocal.withInitial(new Supplier<>() { + @Override + public MeasurementsMap get() { + MeasurementsMap m = new MeasurementsMap(); + ALL_MAPS.add(m); + return m; + } + }); + + // After worker threads finish, the data is available here. The reporting + // code would pull the maps from here, once all workers finish. + private static final ConcurrentLinkedQueue ALL_MAPS = new ConcurrentLinkedQueue<>(); + + // Releasable mmaped buffers that workers are done with. These can be un-mapped + // in background. Main thread would wait on this queue, until it gets the poison + // pill from the root task. + private static final LinkedBlockingQueue RELEASABLE_BUFFERS = new LinkedBlockingQueue<>(); + private static final ByteBuffer RELEASABLE_BUFFER_POISON_PILL = ByteBuffer.allocate(1); + + // ========================= MEATY GRITTY PARTS: PARSE AND AGGREGATE ========================= + + public static final class Bucket { + // Raw station name, encoded as two prefixes and the name tail, + // its total length, and hash. + public final byte[] nameTail; + public final int len; + public final int hash; + public final int prefix1, prefix2; + + // Temperature values, in 10x scale. + public long sum; + public int count; + public int min; + public int max; + + public Bucket(ByteBuffer slice, int begin, int end, int hash, int temp) { + len = end - begin; + + // Decode the station name. It is handy to have a few prefixes + // available to simplify matches later. + int tailStart = 0; + if (len >= 8) { + prefix1 = slice.getInt(begin + 0); + prefix2 = slice.getInt(begin + 4); + tailStart += 8; + } + else if (len >= 4) { + prefix1 = slice.getInt(begin + 0); + prefix2 = 0; + tailStart += 4; + } + else { + prefix1 = 0; + prefix2 = 0; + } + + // The rest goes to tail byte array. We are checking reading it on hot-path. + // Therefore, it is convenient to keep allocation for names near the buckets. + // One can avoid this by carefully recording the tail in a separate field, + // like the prefixes above, but this is simple enough to gain enough perf. + int tailLen = len - tailStart; + nameTail = new byte[tailLen]; + slice.get(begin + tailStart, nameTail, 0, tailLen); + + // Seed the bucket with initial value. + this.hash = hash; + this.sum = temp; + this.count = 1; + this.min = temp; + this.max = temp; + } + + // Little helper method to compare the array with given ByteBuffer range. + public boolean matches(ByteBuffer cand, int begin, int end) { + int origLen = len; + int candLen = end - begin; + if (origLen != candLen) { + return false; + } + + // Check the prefixes first, if we can. + int tailStart = 0; + if (origLen >= 8) { + if (prefix1 != cand.getInt(begin)) { + return false; + } + if (prefix2 != cand.getInt(begin + 4)) { + return false; + } + tailStart += 8; + } + else if (origLen >= 4) { + if (prefix1 != cand.getInt(begin)) { + return false; + } + tailStart += 4; + } + + // Check the rest. + for (int i = 0; i < origLen - tailStart; i++) { + if (nameTail[i] != cand.get(begin + tailStart + i)) { + return false; + } + } + return true; + } + + // Check if current Bucket matches another. + public boolean matches(Bucket other) { + return len == other.len && + prefix1 == other.prefix1 && + prefix2 == other.prefix2 && + Arrays.equals(nameTail, other.nameTail); + } + + // Merge the temp value. Hot-path, should be fairly efficient. + public void merge(int value) { + sum += value; + count++; + + // We rarely do the updates, so these branches are almost + // never taken. Writing them as explicit branches instead of + // Math.{min,max} improves performance a bit. + if (value < min) { + min = value; + } + if (value > max) { + max = value; + } + } + + // Merge the buckets. Called during reporting, not a hot path. + public void merge(Bucket s) { + sum += s.sum; + count += s.count; + min = Math.min(min, s.min); + max = Math.max(max, s.max); + } + + public Row toRow() { + // Reconstruct the name first. The prefixes and the tail were copied + // from the little-endian slice, so we need to match the endianness here. + ByteBuffer bb = ByteBuffer.allocate(len); + bb.order(ByteOrder.LITTLE_ENDIAN); + if (len >= 4) { + bb.putInt(prefix1); + } + if (len >= 8) { + bb.putInt(prefix2); + } + bb.put(nameTail); + + return new Row( + new String(Arrays.copyOf(bb.array(), len)), + Math.round((double) min) / 10.0, + Math.round((double) sum / count) / 10.0, + Math.round((double) max) / 10.0); + } + } + + // Quick and dirty linear-probing hash map. YOLO. + public static final class MeasurementsMap { + // Individual map buckets. Inlining these straight into map complicates + // the implementation without much of the performance improvement. + // The map is likely sparse, so whatever footprint loss we have due to + // Bucket headers we gain by allocating the buckets lazily. The memory + // dereference costs are still high in both cases. The additional benefit + // for explicit fields in Bucket is that we only need to pay for a single + // null-check on bucket instead of multiple range-checks on inlined array. + private final Bucket[] buckets = new Bucket[MAP_SIZE]; + + // Fast path is inlined in seqCompute. This is a slow-path that is taken + // rarely, usually when there is a hash collision. We normally do not enter here. + private void updateSlow(ByteBuffer name, int begin, int end, int hash, int temp) { + int idx = hash & (MAP_SIZE - 1); + + while (true) { + Bucket cur = buckets[idx]; + if (cur == null) { + // No bucket yet, lucky us. Create the bucket and be done. + buckets[idx] = new Bucket(name, begin, end, hash, temp); + return; + } + else if ((cur.hash == hash) && cur.matches(name, begin, end)) { + // Same as bucket fastpath. Check for collision by checking the full hash + // first (since the index is truncated by map size), and then the exact name. + cur.merge(temp); + return; + } + else { + // No dice. Keep searching. + idx = (idx + 1) & (MAP_SIZE - 1); + } + } + } + + // Same as update(), really, but for merging maps. See the comments there. + public void merge(MeasurementsMap otherMap) { + for (Bucket other : otherMap.buckets) { + if (other == null) + continue; + int idx = other.hash & (MAP_SIZE - 1); + while (true) { + Bucket cur = buckets[idx]; + if (cur == null) { + buckets[idx] = other; + break; + } + else if ((cur.hash == other.hash) && cur.matches(other)) { + cur.merge(other); + break; + } + else { + idx = (idx + 1) & (MAP_SIZE - 1); + } + } + } + } + + // Convert from internal representation to the rows. This does several + // major things: filters away null-s, instantates full Strings, and + // computes the final rows. + public int fill(Row[] rows) { + int idx = 0; + for (Bucket bucket : buckets) { + if (bucket == null) + continue; + rows[idx++] = bucket.toRow(); + } + return idx; + } + } + + // The heavy-weight, where most of the magic happens. This is not a usual + // RecursiveAction, but rather a CountedCompleter in order to be more robust + // in presence of I/O stalls and other scheduling irregularities. + public static final class ParsingTask extends CountedCompleter { + private final MappedByteBuffer mappedBuf; + private final ByteBuffer buf; + + // Entered from the root task, records the original mmap-ed slice + // for later cleanup. + public ParsingTask(CountedCompleter p, MappedByteBuffer mappedBuf) { + super(p); + this.mappedBuf = mappedBuf; + this.buf = mappedBuf; + } + + // Entered from the other parsing tasks. + public ParsingTask(CountedCompleter p, ByteBuffer buf) { + super(p); + this.mappedBuf = null; + this.buf = buf; + } + + @Override + public void compute() { + try { + internalCompute(); + } + catch (Exception e) { + // Meh, YOLO. + e.printStackTrace(); + throw new IllegalStateException("Internal error", e); + } + } + + @Override + public void onCompletion(CountedCompleter caller) { + // FJP API: Would be called when this task completes. At that point, + // we know the mmap-ed slice is not needed anymore, and can give it + // out for unmmaps. We do not do unmmap here, let the main thread + // handle it for us, as we go on doing other hot work. + if (DIRECT_UNMMAPS && (mappedBuf != null)) { + RELEASABLE_BUFFERS.offer(mappedBuf); + } + } + + private void internalCompute() throws Exception { + int len = buf.limit(); + if (len > UNIT_SLICE_SIZE) { + // Still a large chunk, let's split it in half. + int mid = len / 2; + + // Figure out the boundary that does not split the line. + int w = mid + MAX_LINE_LENGTH; + while (buf.get(w - 1) != '\n') { + w--; + } + mid = w; + + // Fork out! The stack depth would be shallow enough for us to + // execute one of the computations directly. + // FJP API: Tell there is a pending task. + setPendingCount(1); + new ParsingTask(this, buf.slice(0, mid)).fork(); + + // The stack depth would be shallow enough for us to + // execute one of the computations directly. + new ParsingTask(this, buf.slice(mid, len - mid)).compute(); + } + else { + // Small enough chunk, time to process it. + // The call to seqCompute would normally be non-inlined. + // Do setup stuff here to save inlining budget. + MeasurementsMap map = MAPS.get(); + + // Force the order we need for bit extraction to work. This fits + // most of the hardware very well without introducing platform + // dependencies. Note that it would be wrong to use nativeOrder() + // here, because we _need_ a particular byte ordering for our + // computations to work. It just so happens that most hardware + // we have is LE. + buf.order(ByteOrder.LITTLE_ENDIAN); + + // Go! + seqCompute(map, buf, len); + + // FJP API: Notify that this task have completed. + tryComplete(); + } + } + + private void seqCompute(MeasurementsMap map, ByteBuffer origSlice, int length) throws IOException { + Bucket[] buckets = map.buckets; + + // Slice up our slice! Pecular note here: this instantiates a full new buffer + // object, which allows compiler to trust its fields more thoroughly. + ByteBuffer slice = origSlice.slice(); + + // New slice lost the endianness setting, set it up as the original slice. + slice.order(ByteOrder.LITTLE_ENDIAN); + + // Touch the buffer once to let the compiler eject the common checks + // for this slice from the loop here. This is an odd, flaky, and sometimes + // desperate, but a safe thing to do. + slice.get(0); + + int idx = 0; + while (idx < length) { + // Parse out the name, computing the hash on the fly. + // Reading with ints allows us to guarantee that read would always + // be in bounds, since the temperature+EOL is at least 4 bytes + // long themselves. This implementation prefers simplicity over + // advanced tricks like SWAR. + int nameBegin = idx; + int nameHash = 0; + + outer: while (true) { + int intName = slice.getInt(idx); + for (int c = 0; c < 4; c++) { + int b = (intName >> (c << 3)) & 0xFF; + if (b == ';') { + idx += c + 1; + break outer; + } + nameHash ^= b * 82805; + } + idx += 4; + } + int nameEnd = idx - 1; + + // Parse out the temperature. The rules specify temperatures + // are within -99.9..99.9. This means even in the shortest case of + // "0.0", we are not out of bounds for the int-sized read. + int intTemp = slice.getInt(idx); + + int neg = 1; + if ((intTemp & 0xFF) == '-') { + // Unlucky, there is a sign. Record it, shift one byte and read + // the remaining digit again. Surprisingly, doing a second read + // is not significantly worse than reading into long and trying + // to do bit shifts on it. But it is significantly simpler. + neg = -1; + intTemp >>>= 8; + intTemp |= slice.get(idx + 4) << 24; + idx++; + } + + // Since the sign is consumed, we are only left with two cases, + // which means we can trivially extract the number from int. + int temp = 0; + if ((intTemp >>> 24) == '\n') { + // Case 1: EOL-digitL-point-digitH + temp = (((intTemp & 0xFF)) - '0') * 10 + + ((intTemp >> 16) & 0xFF) - '0'; + idx += 4; + } + else { + // Case 2: digitL-point-digitH-digitHH + temp = (((intTemp & 0xFF)) - '0') * 100 + + (((intTemp >> 8) & 0xFF) - '0') * 10 + + (((intTemp >>> 24)) - '0'); + idx += 5; + } + + // All done, just flip the sign, if needed. + temp *= neg; + + // Time to update! + Bucket bucket = buckets[nameHash & (MAP_SIZE - 1)]; + if ((bucket != null) && (nameHash == bucket.hash) && bucket.matches(slice, nameBegin, nameEnd)) { + // Lucky fast path: matching bucket hit. Most of the time we complete here. + bucket.merge(temp); + } + else { + // Unlucky, slow path. The method would not be inlined, it is useful + // to give it the original slice, so that we keep current hot slice + // metadata provably unmodified. + map.updateSlow(origSlice, nameBegin, nameEnd, nameHash, temp); + } + } + } + } + + // Fork out the initial tasks. We would normally just fork out one large + // task and let it split, but unfortunately buffer API does not allow us + // "long" start-s and length-s. So we have to chunk at least by mmap-ed + // size first. It is a CountedCompleter for the same reason ParsingTask is. + // This also gives us a very nice opportunity to process mmap-ed chunks + // one by one, thus allowing incremental unmmaps. + public static final class RootTask extends CountedCompleter { + public RootTask() { + super(null); + } + + @Override + public void compute() { + try { + internalCompute(); + } + catch (Exception e) { + // Meh, YOLO. + e.printStackTrace(); + throw new IllegalStateException("Internal error", e); + } + } + + private void internalCompute() throws Exception { + ByteBuffer buf = ByteBuffer.allocateDirect(MAX_LINE_LENGTH); + FileChannel fc = FileChannel.open(Path.of(FILE), StandardOpenOption.READ); + + long start = 0; + long size = fc.size(); + while (start < size) { + long end = Math.min(size, start + MMAP_CHUNK_SIZE); + + // Read a little chunk into a little buffer. + long minEnd = Math.max(0, end - MAX_LINE_LENGTH); + buf.rewind(); + fc.read(buf, minEnd); + + // Figure out the boundary that does not split the line. + int w = MAX_LINE_LENGTH; + while (buf.get(w - 1) != '\n') { + w--; + } + end = minEnd + w; + + // Fork out the large slice. + long len = end - start; + MappedByteBuffer slice = fc.map(FileChannel.MapMode.READ_ONLY, start, len); + start += len; + + // FJP API: Announce we have a pending task before forking. + addToPendingCount(1); + + // ...and fork it! + new ParsingTask(this, slice).fork(); + } + + // All mappings are up, can close the channel now. + fc.close(); + + // FJP API: We have finished, try to complete the whole task tree. + propagateCompletion(); + } + + @Override + public void onCompletion(CountedCompleter caller) { + // FJP API: This would be called when root task completes along with + // all subtasks. This means the processing is done, we can go and + // tell main thread about that. + try { + RELEASABLE_BUFFERS.put(RELEASABLE_BUFFER_POISON_PILL); + } + catch (Exception e) { + throw new IllegalStateException(e); + } + } + } + + // ========================= Invocation ========================= + + public static void main(String[] args) throws Exception { + // Instantiate a separate FJP to match the parallelism accurately, without + // relying on common pool defaults. + ForkJoinPool pool = new ForkJoinPool(Runtime.getRuntime().availableProcessors()); + + // This little line carries the whole world + pool.submit(new RootTask()); + + // While the root task is working, prepare what we need for the + // end of the run. Go and try to report something to prepare the + // reporting code for execution. This prepares classes, storage, + // and some profiles for eventual execution. + MeasurementsMap map = new MeasurementsMap(); + Row[] rows = new Row[MAP_SIZE]; + StringBuilder sb = new StringBuilder(16384); + + report(map, rows, sb); + sb.setLength(0); + + // Nothing else is left to do preparation-wise. Now see if we can clean up + // buffers that tasks do not need anymore. The root task would communicate + // that it is done by giving us a poison pill. + ByteBuffer buf; + while ((buf = RELEASABLE_BUFFERS.take()) != RELEASABLE_BUFFER_POISON_PILL) { + DirectUnmaps.invokeCleaner(buf); + } + + // All done. Merge results from thread-local maps... + for (MeasurementsMap m : ALL_MAPS) { + map.merge(m); + } + + // ...and truly report them + System.out.println(report(map, rows, sb)); + } + + private static String report(MeasurementsMap map, Row[] rows, StringBuilder sb) { + int rowCount = map.fill(rows); + Arrays.sort(rows, 0, rowCount); + + sb.append("{"); + boolean first = true; + for (int c = 0; c < rowCount; c++) { + if (c != 0) { + sb.append(", "); + } + rows[c].printTo(sb); + } + sb.append("}"); + return sb.toString(); + } + + // ========================= Reporting ========================= + + private static final class Row implements Comparable { + private final String name; + private final double min; + private final double max; + private final double avg; + + public Row(String name, double min, double avg, double max) { + this.name = name; + this.min = min; + this.max = max; + this.avg = avg; + } + + @Override + public int compareTo(Row o) { + return name.compareTo(o.name); + } + + public void printTo(StringBuilder sb) { + sb.append(name); + sb.append("="); + sb.append(min); + sb.append("/"); + sb.append(avg); + sb.append("/"); + sb.append(max); + } + } + + // ========================= Utils ========================= + + // Tries to figure out if calling Cleaner directly on the DirectByteBuffer + // is possible. If this fails, we still go on. + public static class DirectUnmaps { + private static final Method METHOD_GET_CLEANER; + private static final Method METHOD_CLEANER_CLEAN; + + static Method getCleaner() { + try { + ByteBuffer dbb = ByteBuffer.allocateDirect(1); + Method m = dbb.getClass().getMethod("cleaner"); + m.setAccessible(true); + return m; + } + catch (NoSuchMethodException | InaccessibleObjectException e) { + return null; + } + } + + static Method getCleanerClean(Method methodGetCleaner) { + try { + ByteBuffer dbb = ByteBuffer.allocateDirect(1); + Object cleaner = methodGetCleaner.invoke(dbb); + Method m = cleaner.getClass().getMethod("clean"); + m.setAccessible(true); + m.invoke(cleaner); + return m; + } + catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InaccessibleObjectException e) { + return null; + } + } + + static { + METHOD_GET_CLEANER = getCleaner(); + METHOD_CLEANER_CLEAN = (METHOD_GET_CLEANER != null) ? getCleanerClean(METHOD_GET_CLEANER) : null; + } + + public static void invokeCleaner(ByteBuffer bb) { + if (METHOD_GET_CLEANER == null || METHOD_CLEANER_CLEAN == null) { + return; + } + try { + METHOD_CLEANER_CLEAN.invoke(METHOD_GET_CLEANER.invoke(bb)); + } + catch (InvocationTargetException | IllegalAccessException e) { + throw new IllegalStateException("Cannot happen at this point", e); + } + } + } + +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_slovdahl.java b/src/main/java/dev/morling/onebrc/CalculateAverage_slovdahl.java new file mode 100644 index 000000000..d22409177 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_slovdahl.java @@ -0,0 +1,278 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.channels.FileChannel; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.StringJoiner; +import java.util.TreeMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import static java.util.stream.Collectors.collectingAndThen; +import static java.util.stream.Collectors.groupingBy; +import static java.util.stream.Collectors.reducing; + +public class CalculateAverage_slovdahl { + + private static final String FILE = "./measurements.txt"; + + private static final int SLICE_SIZE = 1_048_576; + + public static void main(String[] args) throws IOException, ExecutionException, InterruptedException { + int segments = Runtime.getRuntime().availableProcessors() - 1; + + try (Arena arena = Arena.ofShared(); + FileChannel channel = FileChannel.open(Paths.get(FILE), StandardOpenOption.READ); + ExecutorService executor = Executors.newThreadPerTaskExecutor(Executors.defaultThreadFactory())) { + + long size = channel.size(); + if (size < SLICE_SIZE) { + segments = 1; + } + + long idealSegmentSize = size / segments; + + MemorySegment mappedFile = channel.map(FileChannel.MapMode.READ_ONLY, 0, size, arena); + var futures = new ArrayList>>(segments); + + long segmentStart = 0; + for (int i = 1; i <= segments; i++) { + long actualSegmentOffset = idealSegmentSize * i; + + while (actualSegmentOffset < size && mappedFile.get(ValueLayout.JAVA_BYTE, actualSegmentOffset) != (byte) '\n') { + actualSegmentOffset++; + } + + long end = actualSegmentOffset - segmentStart; + if (segmentStart + actualSegmentOffset - segmentStart + 1 < size) { + end += 1; + } + + MemorySegment segment = mappedFile.asSlice(segmentStart, end); + segmentStart = actualSegmentOffset + 1; + + futures.add(executor.submit(() -> { + byte[] array = new byte[SLICE_SIZE]; + MemorySegment bufferSegment = MemorySegment.ofArray(array); + + long position = 0; + long segmentSize = segment.byteSize(); + Map map = HashMap.newHashMap(10_000); + + while (position < segmentSize) { + long thisSliceSize = Math.min(SLICE_SIZE, segmentSize - position); + + MemorySegment.copy( + segment, + ValueLayout.JAVA_BYTE, + position, + bufferSegment, + ValueLayout.JAVA_BYTE, + 0, + thisSliceSize); + + if (thisSliceSize % 8 != 0) { + bufferSegment + .asSlice(thisSliceSize) + .fill((byte) 0); + } + + int newlinePosition = 0; + int startOffset = 0; + while (true) { + int semicolonPosition = nextOccurrence(array, (byte) ';', startOffset); + if (semicolonPosition < 0) { + break; + } + + int eolPosition = nextOccurrence(array, (byte) '\n', startOffset); + if (eolPosition < 0) { + if (semicolonPosition < segmentSize - 4) { + break; + } + else { + newlinePosition = (int) segmentSize; + } + } + else { + newlinePosition = eolPosition; + } + + byte[] nameArray = new byte[semicolonPosition - startOffset]; + System.arraycopy(array, startOffset, nameArray, 0, semicolonPosition - startOffset); + Station station = new Station(nameArray); + + int temperatureStart = semicolonPosition + 1; + int temperatureLength = newlinePosition - semicolonPosition - 1; + + int temperatureIntValue; + if (array[temperatureStart] == '-') { + if (temperatureLength == 4) { + temperatureIntValue = -1 * ((array[temperatureStart + 1] - 48) * 10 + + (array[temperatureStart + 3] - 48)); + } + else { + temperatureIntValue = -1 * ((array[temperatureStart + 1] - 48) * 100 + + (array[temperatureStart + 2] - 48) * 10 + + (array[temperatureStart + 4] - 48)); + } + } + else { + if (temperatureLength == 3) { + temperatureIntValue = (array[temperatureStart] - 48) * 10 + + (array[temperatureStart + 2] - 48); + } + else { + temperatureIntValue = (array[temperatureStart] - 48) * 100 + + (array[temperatureStart + 1] - 48) * 10 + + (array[temperatureStart + 3] - 48); + } + } + + MeasurementAggregator agg = map.get(station); + if (agg == null) { + agg = new MeasurementAggregator(); + map.put(station, agg); + } + + agg.min = Math.min(agg.min, temperatureIntValue); + agg.max = Math.max(agg.max, temperatureIntValue); + agg.sum += temperatureIntValue; + agg.count++; + + // Make sure the next iteration won't find the same delimiters. + array[semicolonPosition] = (byte) 0; + array[newlinePosition] = (byte) 0; + + startOffset = newlinePosition + 1; + } + + position += newlinePosition + 1; + } + + return map; + })); + } + + TreeMap result = futures.stream() + .map(f -> { + try { + return f.get(); + } + catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }) + .flatMap(m -> m.entrySet().stream()) + .collect(groupingBy( + e -> new String(e.getKey().name()), + TreeMap::new, + collectingAndThen( + reducing( + new MeasurementAggregator(), + Map.Entry::getValue, + (agg1, agg2) -> { + MeasurementAggregator res = new MeasurementAggregator(); + res.min = Math.min(agg1.min, agg2.min); + res.max = Math.max(agg1.max, agg2.max); + res.sum = agg1.sum + agg2.sum; + res.count = agg1.count + agg2.count; + + return res; + }), + agg -> new ResultRow( + agg.min / 10.0, + (Math.round((agg.sum / 10.0) * 10.0) / 10.0) / agg.count, + agg.max / 10.0)))); + + System.out.println(result); + + executor.shutdownNow(); + } + } + + private static int nextOccurrence(byte[] data, byte needle, int offset) { + while (offset < data.length) { + if (data[offset] == needle) { + return offset; + } + offset++; + } + return -1; + } + + private record Station(byte[] name, int hash) { + private Station(byte[] name) { + this(name, Arrays.hashCode(name)); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Station station = (Station) o; + return Arrays.equals(name, station.name); + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public String toString() { + return new StringJoiner(", ", Station.class.getSimpleName() + "[", "]") + .add("name=" + new String(name)) + .add("hash=" + hash) + .toString(); + } + } + + private static class MeasurementAggregator { + private int min = Integer.MAX_VALUE; + private int max = Integer.MIN_VALUE; + private long sum; + private long count; + } + + private record ResultRow(double min, double mean, double max) { + + @Override + public String toString() { + return round(min) + "/" + round(mean) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_stephenvonworley.java b/src/main/java/dev/morling/onebrc/CalculateAverage_stephenvonworley.java new file mode 100644 index 000000000..a51b24d71 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_stephenvonworley.java @@ -0,0 +1,530 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.*; +import java.lang.foreign.*; +import java.lang.reflect.Field; +import java.nio.*; +import java.nio.channels.*; +import java.nio.file.*; +import java.nio.charset.*; +import java.util.*; +import java.util.concurrent.*; +import java.util.stream.*; +import sun.misc.Unsafe; + +/* + * Stephen Von Worley's (von@von.io) entry to Gunnar Morling's "One Billion Row Challenge": + * https://www.morling.dev/blog/one-billion-row-challenge/ + * + * To compute the desired result, this program: + * 1. Memory maps the input file. + * 2. Partitions the file into a queue of Chunks, which delimit sections of the file. + * 3. Spawns one thread per processor. Each thread: + * a. Allocates a Table, which will accumulate names and tallies (min/max/total/count). + * b. Get a Chunk from the queue. + * c. Processes the Chunk using a parser that reads the Chunk simultaneously at three + * different, evenly-spaced locations, using heavily-optimized scalar code. + * d. Repeats steps b and c until there are no more Chunks. + * 4. Aggregates the resulting Tables into a treemap of names to Tallies. + * 5. Outputs the names and Tallies in ascending name order. + * + * Runs fastest as a natively-compiled, standalone binary, as might be produced by Graal's + * `native-image` utility. Tested with Oracle Graal 21.0.2. + * + * Incorporates code authored by a number of submitters, including Thomas Wue, Quan Anh + * Mai, and others. + * + * Thanks y'all, and Happy Rowing! + * Steve + * von@von.io + * www.von.io + */ + +public class CalculateAverage_stephenvonworley { + + private static final int NAME_LIMIT = 10000; + + private static final long CHUNK_SIZE = 5000000; + private static final long CHUNK_PAD = 200; + private static final long CHUNK_PARSE3_LIMIT = 1000; + + private static final long GOLDEN_LONG = 0x9e3779b97f4a7c15L; + private static final long TALLY_BITS = 7; + private static final long TALLY_SIZE = 1L << TALLY_BITS; + private static final long HASH_BITS = 16; + private static final long HASH_MASK = ((1L << HASH_BITS) - 1) << TALLY_BITS; + private static final long TABLE_SIZE = 1L << (HASH_BITS + TALLY_BITS); + + private static final long OFFSET_MIN = 0; + private static final long OFFSET_MAX = 2; + private static final long OFFSET_COUNT = 4; + private static final long OFFSET_TOTAL = 8; + private static final long OFFSET_LEN = 16; + private static final long OFFSET_NAME = 17; + + private static final Unsafe unsafe; + static { + try { + Field f = Unsafe.class.getDeclaredField("theUnsafe"); + f.setAccessible(true); + unsafe = (Unsafe) f.get(null); + } + catch (Exception e) { + throw new RuntimeException("Exception initializing unsafe", e); + } + } + + public static void main(String[] args) throws IOException, InterruptedException { + if (!List.of(args).contains("--worker")) { + spawnWorker(); + return; + } + + MemorySegment in = map("./measurements.txt"); + Queue chunks = partition(in); + List tables = process(chunks, processorCount()); + Map nameToTally = aggregate(tables); + + System.out.println(nameToTally); + System.out.close(); + } + + // credit: "Spawn worker" code by Thomas Wue + private static void spawnWorker() throws IOException { + ProcessHandle.Info info = ProcessHandle.current().info(); + ArrayList workerCommand = new ArrayList<>(); + info.command().ifPresent(workerCommand::add); + info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args))); + workerCommand.add("--worker"); + new ProcessBuilder().command(workerCommand).inheritIO().redirectOutput(ProcessBuilder.Redirect.PIPE) + .start().getInputStream().transferTo(System.out); + } + + private static int processorCount() { + return Runtime.getRuntime().availableProcessors(); + } + + private static MemorySegment map(String path) throws IOException { + FileChannel file = FileChannel.open(Path.of(path), StandardOpenOption.READ); + return file.map(FileChannel.MapMode.READ_ONLY, 0, file.size(), Arena.global()); + } + + private static MemorySegment allocate(long len) { + return Arena.global().allocate(len, 4096); + } + + private static Queue partition(MemorySegment in) throws IOException { + Queue chunks = new ConcurrentLinkedDeque<>(); + long address = in.address(); + long len = in.byteSize(); + long start = address; + while (start < address + len) { + long end = start + CHUNK_SIZE; + if (end >= address + len) { + end = address + len; + } + else { + end = afterNewline(end); + } + Chunk chunk; + if (end + CHUNK_PAD < address + len) { + chunk = new Chunk(start, end); + } + else { + MemorySegment padded = allocate(end - start + CHUNK_PAD); + MemorySegment.copy(in, start - address, padded, 0, end - start); + chunk = new Chunk(padded.address(), padded.address() + (end - start)); + } + chunks.offer(chunk); + start = end; + } + return chunks; + } + + private static List
process(Queue chunks, int threadCount) throws InterruptedException { + List
tables = Collections.synchronizedList(new ArrayList<>(threadCount)); + List threads = new ArrayList<>(threadCount); + for (int i = 0; i < threadCount; i++) { + Thread thread = new Thread(() -> { + Table t = new Table(); + tables.add(t); + Chunk chunk; + while ((chunk = chunks.poll()) != null) { + parse3(chunk.start(), chunk.end(), t); + } + }); + threads.add(thread); + thread.start(); + } + for (Thread thread : threads) { + thread.join(); + } + return tables; + } + + private static Map aggregate(List
tables) { + Map nameToTally = new TreeMap<>(); + tables.forEach(table -> aggregate(nameToTally, table)); + return nameToTally; + } + + private static void aggregate(Map nameToTally, Table table) { + table.process((name, min, max, total, count) -> nameToTally.computeIfAbsent(name, _ -> new Tally()).add(min, max, total, count)); + } + + private static void parse3(long start, long end, Table table) { + + if (end - start < CHUNK_PARSE3_LIMIT) { + parse1(start, end, table); + return; + } + + final long tallies = table.tallies; + + long part = (end - start) / 3; + long startA = start; + long startB = afterNewline(start + part); + long startC = afterNewline(start + 2 * part); + long endA = startB; + long endB = startC; + long endC = end; + + while (true) { + long N = min( + remaining(startA, endA), + remaining(startB, endB), + remaining(startC, endC)); + + if (N <= 1) { + break; + } + + while (N > 0) { + long semicolonA = semicolon(startA); + long semicolonB = semicolon(startB); + long semicolonC = semicolon(startC); + + long tallyA = locate(startA, semicolonA, tallies, table); + long tallyB = locate(startB, semicolonB, tallies, table); + long tallyC = locate(startC, semicolonC, tallies, table); + + long numberA = number(semicolonA); + tally(tallyA, numberA); + long numberB = number(semicolonB); + tally(tallyB, numberB); + long numberC = number(semicolonC); + tally(tallyC, numberC); + + startA = next(semicolonA); + startB = next(semicolonB); + startC = next(semicolonC); + N--; + } + } + + parse1(startA, endA, table); + parse1(startB, endB, table); + parse1(startC, endC, table); + } + + private static void parse1(long start, long end, Table table) { + final long tallies = table.tallies; + + while (start < end) { + long semicolon = semicolon(start); + long tally = locate(start, semicolon, tallies, table); + long number = number(semicolon); + tally(tally, number); + start = next(semicolon); + } + } + + private static long remaining(long start, long end) { + return (end - start) >> 7; + } + + // credit: Adapted from code by Thomas Wue + private static long semicolon(long start) { + start++; + long word = getLong(start); + long input = word ^ 0x3B3B3B3B3B3B3B3BL; + long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; + if (tmp != 0) { + return start + (Long.numberOfTrailingZeros(tmp) >>> 3); + } + while (true) { + start += 8; + long word2 = getLong(start); + long input2 = word2 ^ 0x3B3B3B3B3B3B3B3BL; + long tmp2 = (input2 - 0x0101010101010101L) & ~input2 & 0x8080808080808080L; + if (tmp2 != 0) { + return start + (Long.numberOfTrailingZeros(tmp2) >>> 3); + } + } + } + + private static long trim(long value, long remove) { + long shift = remove << 3; + return ((value << shift) >>> shift); + } + + // https://softwareengineering.stackexchange.com/questions/402542/where-do-magic-hashing-constants-like-0x9e3779b9-and-0x9e3779b1-come-from + private static long locate(long start, long semicolon, long tallies, Table table) { + long len = semicolon - start; + long word = getLong(start); + if (len <= 8) { + word = trim(word, 8 - len); + long hash = word * GOLDEN_LONG; + long offset = (hash >>> (64 - HASH_BITS)) << TALLY_BITS; + while (true) { + long tally = tallies + offset; + long tlen = getByte(tally + OFFSET_LEN); + long tword = getLong(tally + OFFSET_NAME); + if (len == tlen && word == tword) { + return tally; + } + if (tword == 0) { + init(tally, start, len, table); + return tally; + } + offset = (offset + TALLY_SIZE) & HASH_MASK; + } + } + else { + long word2 = getLong(semicolon - 8); + long hash = (word + word2) * GOLDEN_LONG; + long offset = (hash >>> (64 - HASH_BITS)) << TALLY_BITS; + while (true) { + long tally = tallies + offset; + long tword = getLong(tally + OFFSET_NAME); + if (len <= 16) { + long tlen = getByte(tally + OFFSET_LEN); + long tword2 = getLong(tally + OFFSET_NAME + len - 8); + if (len == tlen && word == tword && word2 == tword2) { + return tally; + } + } + else { + if (match(tally, start, len)) { + return tally; + } + } + if (tword == 0) { + init(tally, start, len, table); + return tally; + } + offset = (offset + TALLY_SIZE) & HASH_MASK; + } + } + } + + private static void init(long tally, long start, long len, Table t) { + setShort(tally + OFFSET_MIN, Short.MAX_VALUE); + setShort(tally + OFFSET_MAX, Short.MIN_VALUE); + setByte(tally + OFFSET_LEN, (byte) len); + copyMemory(start, tally + OFFSET_NAME, len); + t.addresses[t.count++] = tally; + } + + private static boolean match(long tally, long name, long len) { + if (getByte(tally + OFFSET_LEN) != len) { + return false; + } + long a = name; + long b = tally + OFFSET_NAME; + while (len > 7) { + if (getLong(a) != getLong(b)) { + return false; + } + a += 8; + b += 8; + len -= 8; + } + if (len > 0) { + return (trim(getLong(a), 8 - len) == getLong(b)); + } + return true; + } + + // credit: Wonderfully-fast number parsing implementation by Quan Anh Mai + private static long number(long semicolon) { + long numberWord = getLong(semicolon + 1); + int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); + int shift = 28 - decimalSepPos; + // signed is -1 if negative, 0 otherwise + long signed = (~numberWord << 59) >> 63; + long designMask = ~(signed & 0xFF); + // Align the number to a specific position and transform the ascii to digit value + long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; + // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) + // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = + // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100 + long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; + return (absValue ^ signed) - signed; + } + + private static void tally(long tally, long number) { + short min = getShort(tally + OFFSET_MIN); + short max = getShort(tally + OFFSET_MAX); + int count = getInt(tally + OFFSET_COUNT); + long total = getLong(tally + OFFSET_TOTAL); + if (number < min) { + setShort(tally + OFFSET_MIN, (short) number); + } + if (number > max) { + setShort(tally + OFFSET_MAX, (short) number); + } + setInt(tally + OFFSET_COUNT, count + 1); + setLong(tally + OFFSET_TOTAL, total + number); + } + + private static long next(long semicolon) { + long word = getLong(semicolon); + semicolon += 7; + semicolon -= (~word >>> (24 + 4)) & 1; + semicolon -= (~word >>> (16 + 4 - 1)) & 2; + return semicolon; + } + + private static long afterNewline(long start) { + while (getByte(start) != '\n') + start++; + return start + 1; + } + + private static long min(long a, long b, long c) { + return Math.min(a, Math.min(b, c)); + } + + private static byte getByte(long addr) { + return unsafe.getByte(addr); + } + + private static short getShort(long addr) { + return unsafe.getShort(addr); + } + + private static int getInt(long addr) { + return unsafe.getInt(addr); + } + + private static long getLong(long addr) { + return unsafe.getLong(addr); + } + + private static void setByte(long addr, byte value) { + unsafe.putByte(addr, value); + } + + private static void setShort(long addr, short value) { + unsafe.putShort(addr, value); + } + + private static void setInt(long addr, int value) { + unsafe.putInt(addr, value); + } + + private static void setLong(long addr, long value) { + unsafe.putLong(addr, value); + } + + private static void copyMemory(long srcAddr, long dstAddr, long count) { + unsafe.copyMemory(srcAddr, dstAddr, count); + } + + private static record Chunk(long start, long end) { + } + + private static class Table { + public final long tallies; + public final long[] addresses; + public int count; + + public Table() { + tallies = allocate(TABLE_SIZE).address(); + addresses = new long[NAME_LIMIT]; + count = 0; + } + + public void process(Consumer consumer) { + for (int i = 0; i < count; i++) { + long address = addresses[i]; + int len = getByte(address + OFFSET_LEN); + byte[] bytes = new byte[len]; + for (int j = 0; j < len; j++) { + bytes[j] = getByte(address + OFFSET_NAME + j); + } + String name = new String(bytes, StandardCharsets.UTF_8); + long min = getShort(address + OFFSET_MIN); + long max = getShort(address + OFFSET_MAX); + long total = getLong(address + OFFSET_TOTAL); + long count = getInt(address + OFFSET_COUNT); + consumer.consume(name, min, max, total, count); + } + } + } + + private static interface Consumer { + public void consume(String name, long min, long max, long total, long count); + } + + private static class Tally { + + private long min; + private long max; + private long total; + private long count; + + public Tally() { + this.min = Short.MAX_VALUE; + this.max = Short.MIN_VALUE; + this.total = 0; + this.count = 0; + } + + public void add(long addMin, long addMax, long addTotal, long addCount) { + min = Math.min(min, addMin); + max = Math.max(max, addMax); + total += addTotal; + count += addCount; + } + + public long getMin() { + return min; + } + + public long getMax() { + return max; + } + + public long getTotal() { + return total; + } + + public long getCount() { + return count; + } + + public String toString() { + return String.format("%.1f/%.1f/%.1f", + getMin() / 10.0, + getTotal() / (10.0 * getCount()), + getMax() / 10.0); + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_sudhirtumati.java b/src/main/java/dev/morling/onebrc/CalculateAverage_sudhirtumati.java new file mode 100644 index 000000000..813c561a5 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_sudhirtumati.java @@ -0,0 +1,304 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Semaphore; + +public class CalculateAverage_sudhirtumati { + + private static final String FILE = "./measurements.txt"; + private static final int bufferSize = 8192; + private static final byte SEMICOLON = (byte) ';'; + private static final byte NEW_LINE = (byte) '\n'; + private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors(); + private static final Semaphore PERMITS = new Semaphore(THREAD_COUNT); + private static final MeasurementAggregator globalAggregator = new MeasurementAggregator(); + private static final Semaphore AGGREGATOR_PERMITS = new Semaphore(1); + private static final Map LOCATION_STORE = new ConcurrentHashMap<>(); + + public static void main(String[] args) throws IOException, InterruptedException { + CalculateAverage_sudhirtumati instance = new CalculateAverage_sudhirtumati(); + instance.chunkProcess(); + } + + private void chunkProcess() throws IOException, InterruptedException { + try (FileInputStream is = new FileInputStream(FILE); + FileChannel fc = is.getChannel()) { + for (int i = 0; i < THREAD_COUNT; i++) { + PERMITS.acquire(); + Thread t = new ChunkProcessingThread(i, fc); + t.setName(STR."T\{i}"); + t.start(); + } + do { + Thread.sleep(100); + } while (PERMITS.availablePermits() != THREAD_COUNT); + } + System.out.println(globalAggregator.getResult()); + } + + static class ChunkProcessingThread extends Thread { + + private int index; + private final FileChannel fc; + private final MeasurementAggregator aggregator; + + ChunkProcessingThread(int index, FileChannel fc) { + this.index = index; + this.fc = fc; + aggregator = new MeasurementAggregator(); + } + + @Override + public void run() { + ByteBuffer buffer = ByteBuffer.allocate(index == 0 ? bufferSize : bufferSize + 50); + long fcPosition = index == 0 ? 0 : (((long) index * bufferSize) - 50); + try { + while (fc.read(buffer, fcPosition) != -1) { + buffer.flip(); + if (index != 0 /* && fc.position() != bufferSize */) { + seekStartPos(buffer); + } + processBuffer(buffer); + index += THREAD_COUNT; + fcPosition = ((long) index * bufferSize) - 50L; + if (buffer.capacity() == 8192) { + buffer = ByteBuffer.allocate(bufferSize + 50); + } + buffer.position(0); + } + AGGREGATOR_PERMITS.acquire(); + globalAggregator.process(aggregator); + AGGREGATOR_PERMITS.release(); + } + catch (IOException | InterruptedException e) { + throw new RuntimeException(e); + } + PERMITS.release(); + } + + private void processBuffer(ByteBuffer buffer) throws IOException { + int mStartMark = buffer.position(); + int tStartMark = -1; + int count = buffer.position(); + do { + byte b = buffer.get(count); + if (b == SEMICOLON) { + tStartMark = count; + } + else if (b == NEW_LINE) { + byte[] locArr = new byte[tStartMark - mStartMark]; + byte[] tempArr = new byte[count - tStartMark]; + buffer.get(mStartMark, locArr); + buffer.get(mStartMark + locArr.length + 1, tempArr); + aggregator.process(locArr, tempArr); + mStartMark = count + 1; + } + count++; + } while (count < buffer.limit()); + } + + private void seekStartPos(ByteBuffer buffer) { + int i = buffer.limit() > 50 ? 49 : buffer.limit() - 2; + for (; i >= 0; i--) { + if (buffer.get(i) == NEW_LINE) { + buffer.position(i + 1); + break; + } + } + } + } + + static final class MeasurementAggregator { + private static final long MAX_VALUE_DIVIDE_10 = Long.MAX_VALUE / 10; + private final Map store = new HashMap<>(); + + public void process(MeasurementAggregator other) { + other.store.forEach((k, v) -> { + Measurement m = store.get(k); + if (m == null) { + m = new Measurement(); + store.put(k, m); + } + m.process(v); + }); + } + + public void process(byte[] location, byte[] temperature) throws IOException { + Integer hashCode = Arrays.hashCode(location); + LOCATION_STORE.computeIfAbsent(hashCode, _ -> new String(location)); + // String loc = new String(location); + Measurement measurement = store.get(hashCode); + if (measurement == null) { + measurement = new Measurement(); + store.put(hashCode, measurement); + } + double tempD = parseDouble(temperature); + measurement.process(tempD); + } + + public double parseDouble(byte[] bytes) { + long value = 0; + int exp = 0; + boolean negative = false; + int decimalPlaces = Integer.MIN_VALUE; + int index = 0; + int ch = bytes[index]; + if (ch == '-') { + negative = true; + ch = bytes[++index]; + } + while (index < bytes.length) { + if (ch >= '0' && ch <= '9') { + while (value >= MAX_VALUE_DIVIDE_10) { + value >>>= 1; + exp++; + } + value = value * 10 + (ch - '0'); + decimalPlaces++; + + } + else if (ch == '.') { + decimalPlaces = 0; + } + if (index == bytes.length - 1) { + break; + } + else { + ch = bytes[++index]; + } + } + return asDouble(value, exp, negative, decimalPlaces); + } + + private static double asDouble(long value, int exp, boolean negative, int decimalPlaces) { + if (decimalPlaces > 0 && value < Long.MAX_VALUE / 2) { + if (value < Long.MAX_VALUE / (1L << 32)) { + exp -= 32; + value <<= 32; + } + if (value < Long.MAX_VALUE / (1L << 16)) { + exp -= 16; + value <<= 16; + } + if (value < Long.MAX_VALUE / (1L << 8)) { + exp -= 8; + value <<= 8; + } + if (value < Long.MAX_VALUE / (1L << 4)) { + exp -= 4; + value <<= 4; + } + if (value < Long.MAX_VALUE / (1L << 2)) { + exp -= 2; + value <<= 2; + } + if (value < Long.MAX_VALUE / (1L << 1)) { + exp -= 1; + value <<= 1; + } + } + for (; decimalPlaces > 0; decimalPlaces--) { + exp--; + long mod = value % 5; + value /= 5; + int modDiv = 1; + if (value < Long.MAX_VALUE / (1L << 4)) { + exp -= 4; + value <<= 4; + modDiv <<= 4; + } + if (value < Long.MAX_VALUE / (1L << 2)) { + exp -= 2; + value <<= 2; + modDiv <<= 2; + } + if (value < Long.MAX_VALUE / (1L << 1)) { + exp -= 1; + value <<= 1; + modDiv <<= 1; + } + if (decimalPlaces > 1) + value += modDiv * mod / 5; + else + value += (modDiv * mod + 4) / 5; + } + final double d = Math.scalb((double) value, exp); + return negative ? -d : d; + } + + public String getResult() { + Map sortedMap = new TreeMap<>(); + store.forEach((k, v) -> sortedMap.put(LOCATION_STORE.get(k), v)); + return sortedMap.toString(); + } + } + + static final class Measurement { + private double min = Double.POSITIVE_INFINITY; + private double max = Double.NEGATIVE_INFINITY; + private double sum; + private long count; + + public void process(double value) { + if (value < min) { + min = value; + } + if (value > max) { + max = value; + } + sum += value; + count++; + } + + public void process(Measurement other) { + if (other.min < min) { + this.min = other.min; + } + if (other.max > max) { + this.max = other.max; + } + this.sum += other.sum; + this.count += other.count; + } + + public String toString() { + ResultRow result = new ResultRow(min, sum, count, max); + return result.toString(); + } + } + + private record ResultRow(double min, double sum, double count, double max) { + + public String toString() { + return STR."\{round(min)}/\{round((Math.round(sum * 10.0) / 10.0) / count)}/\{round(max)}"; + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java b/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java index 10e92fc6d..8e311fa89 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java @@ -15,305 +15,394 @@ */ package dev.morling.onebrc; -import sun.misc.Unsafe; - import java.io.IOException; -import java.lang.foreign.Arena; -import java.lang.reflect.Field; import java.nio.channels.FileChannel; -import java.nio.channels.FileChannel.MapMode; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; import java.util.*; -import java.util.stream.IntStream; +import java.util.concurrent.atomic.AtomicLong; /** - * Simple solution that memory maps the input file, then splits it into one segment per available core and uses - * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision. - *

- * Runs in 0.66s on my Intel i9-13900K - * Perf stats: - * 35,935,262,091 cpu_core/cycles/ - * 47,305,591,173 cpu_atom/cycles/ + * The solution starts a child worker process for the actual work such that clean up of the memory mapping can occur + * while the main process already returns with the result. The worker then memory maps the input file, creates a worker + * thread per available core, and then processes segments of size {@link #SEGMENT_SIZE} at a time. The segments are + * split into 3 parts and cursors for each of those parts are processing the segment simultaneously in the same thread. + * Results are accumulated into {@link Result} objects and a tree map is used to sequentially accumulate the results in + * the end. + * Runs in 0.31 on an Intel i9-13900K while the reference implementation takes 120.37s. + * Credit: + * Quan Anh Mai for branchless number parsing code + * Alfonso² Peterssen for suggesting memory mapping with unsafe and the subprocess idea + * Artsiom Korzun for showing the benefits of work stealing at 2MB segments instead of equal split between workers + * Jaromir Hamala for showing that avoiding the branch misprediction between <8 and 8-16 cases is a big win even if + * more work is performed + * Van Phu DO for demonstrating the lookup tables based on masks instead of bit shifting */ public class CalculateAverage_thomaswue { private static final String FILE = "./measurements.txt"; - - // Holding the current result for a single city. - private static class Result { - long lastNameLong, secondLastNameLong, nameAddress; - int nameLength, remainingShift; - int min, max, count; - long sum; - - private Result(long nameAddress) { - this.nameAddress = nameAddress; - this.min = Integer.MAX_VALUE; - this.max = Integer.MIN_VALUE; - } - - public String toString() { - return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); - } - - private static double round(double value) { - return Math.round(value * 10.0) / 10.0; + private static final int MIN_TEMP = -999; + private static final int MAX_TEMP = 999; + private static final int MAX_NAME_LENGTH = 100; + private static final int MAX_CITIES = 10000; + private static final int SEGMENT_SIZE = 1 << 21; + private static final int HASH_TABLE_SIZE = 1 << 17; + + public static void main(String[] args) throws IOException, InterruptedException { + // Start worker subprocess if this process is not the worker. + if (args.length == 0 || !("--worker".equals(args[0]))) { + spawnWorker(); + return; } - // Accumulate another result into this one. - private void add(Result other) { - min = Math.min(min, other.min); - max = Math.max(max, other.max); - sum += other.sum; - count += other.count; - } + int numberOfWorkers = Runtime.getRuntime().availableProcessors(); + try (var fileChannel = FileChannel.open(java.nio.file.Path.of(FILE), java.nio.file.StandardOpenOption.READ)) { + long fileSize = fileChannel.size(); + final long fileStart = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, java.lang.foreign.Arena.global()).address(); + final long fileEnd = fileStart + fileSize; + final AtomicLong cursor = new AtomicLong(fileStart); + + // Parallel processing of segments. + Thread[] threads = new Thread[numberOfWorkers]; + List[] allResults = new List[numberOfWorkers]; + for (int i = 0; i < threads.length; ++i) { + final int index = i; + threads[i] = new Thread(() -> { + List results = new ArrayList<>(MAX_CITIES); + parseLoop(cursor, fileEnd, fileStart, results); + allResults[index] = results; + }); + threads[i].start(); + } + for (Thread thread : threads) { + thread.join(); + } - public String calcName() { - return new Scanner(nameAddress, nameAddress + nameLength).getString(nameLength); + // Final output. + System.out.println(accumulateResults(allResults)); + System.out.close(); } } - public static void main(String[] args) throws IOException { - // Calculate input segments. - int numberOfChunks = Runtime.getRuntime().availableProcessors(); - long[] chunks = getSegments(numberOfChunks); - - // Parallel processing of segments. - List> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1])) - .map(resultArray -> { - List results = new ArrayList<>(); - for (Result r : resultArray) { - if (r != null) { - results.add(r); - } - } - return results; - }).parallel().toList(); - - // Final output. - System.out.println(accumulateResults(allResults)); + private static void spawnWorker() throws IOException { + ProcessHandle.Info info = ProcessHandle.current().info(); + ArrayList workerCommand = new ArrayList<>(); + info.command().ifPresent(workerCommand::add); + info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args))); + workerCommand.add("--worker"); + new ProcessBuilder().command(workerCommand).inheritIO().redirectOutput(ProcessBuilder.Redirect.PIPE) + .start().getInputStream().transferTo(System.out); } - // Accumulate results sequentially for simplicity. - private static TreeMap accumulateResults(List> allResults) { + private static TreeMap accumulateResults(List[] allResults) { TreeMap result = new TreeMap<>(); for (List resultArr : allResults) { for (Result r : resultArr) { - String name = r.calcName(); - Result current = result.putIfAbsent(name, r); + Result current = result.putIfAbsent(r.calcName(), r); if (current != null) { - current.add(r); + current.accumulate(r); } } } return result; } - // Main parse loop. - private static Result[] parseLoop(long chunkStart, long chunkEnd) { - Result[] results = new Result[1 << 18]; - Scanner scanner = new Scanner(chunkStart, chunkEnd); - while (scanner.hasNext()) { - long nameAddress = scanner.pos(); - long hash = 0; - - // Search for ';', one long at a time. - long word = scanner.getLong(); - int pos = findDelimiter(word); - if (pos != 8) { - scanner.add(pos); - word = mask(word, pos); - hash ^= word; - - Result existingResult = results[hashToIndex(hash, results)]; - if (existingResult != null && existingResult.lastNameLong == word) { - scanAndRecord(scanner, existingResult); - continue; - } + private static void parseLoop(AtomicLong counter, long fileEnd, long fileStart, List collectedResults) { + Result[] results = new Result[HASH_TABLE_SIZE]; + while (true) { + long current = counter.addAndGet(SEGMENT_SIZE) - SEGMENT_SIZE; + if (current >= fileEnd) { + return; + } + + long segmentEnd = nextNewLine(Math.min(fileEnd - 1, current + SEGMENT_SIZE)); + long segmentStart; + if (current == fileStart) { + segmentStart = current; } else { - scanner.add(8); - hash ^= word; - long prevWord = word; + segmentStart = nextNewLine(current) + 1; + } + + long dist = (segmentEnd - segmentStart) / 3; + long midPoint1 = nextNewLine(segmentStart + dist); + long midPoint2 = nextNewLine(segmentStart + dist + dist); + + Scanner scanner1 = new Scanner(segmentStart, midPoint1); + Scanner scanner2 = new Scanner(midPoint1 + 1, midPoint2); + Scanner scanner3 = new Scanner(midPoint2 + 1, segmentEnd); + while (true) { + if (!scanner1.hasNext()) { + break; + } + if (!scanner2.hasNext()) { + break; + } + if (!scanner3.hasNext()) { + break; + } + long word1 = scanner1.getLong(); + long word2 = scanner2.getLong(); + long word3 = scanner3.getLong(); + long delimiterMask1 = findDelimiter(word1); + long delimiterMask2 = findDelimiter(word2); + long delimiterMask3 = findDelimiter(word3); + long word1b = scanner1.getLongAt(scanner1.pos() + 8); + long word2b = scanner2.getLongAt(scanner2.pos() + 8); + long word3b = scanner3.getLongAt(scanner3.pos() + 8); + long delimiterMask1b = findDelimiter(word1b); + long delimiterMask2b = findDelimiter(word2b); + long delimiterMask3b = findDelimiter(word3b); + Result existingResult1 = findResult(word1, delimiterMask1, word1b, delimiterMask1b, scanner1, results, collectedResults); + Result existingResult2 = findResult(word2, delimiterMask2, word2b, delimiterMask2b, scanner2, results, collectedResults); + Result existingResult3 = findResult(word3, delimiterMask3, word3b, delimiterMask3b, scanner3, results, collectedResults); + long number1 = scanNumber(scanner1); + long number2 = scanNumber(scanner2); + long number3 = scanNumber(scanner3); + record(existingResult1, number1); + record(existingResult2, number2); + record(existingResult3, number3); + } + + while (scanner1.hasNext()) { + long word = scanner1.getLong(); + long pos = findDelimiter(word); + long wordB = scanner1.getLongAt(scanner1.pos() + 8); + long posB = findDelimiter(wordB); + record(findResult(word, pos, wordB, posB, scanner1, results, collectedResults), scanNumber(scanner1)); + } + while (scanner2.hasNext()) { + long word = scanner2.getLong(); + long pos = findDelimiter(word); + long wordB = scanner2.getLongAt(scanner2.pos() + 8); + long posB = findDelimiter(wordB); + record(findResult(word, pos, wordB, posB, scanner2, results, collectedResults), scanNumber(scanner2)); + } + while (scanner3.hasNext()) { + long word = scanner3.getLong(); + long pos = findDelimiter(word); + long wordB = scanner3.getLongAt(scanner3.pos() + 8); + long posB = findDelimiter(wordB); + record(findResult(word, pos, wordB, posB, scanner3, results, collectedResults), scanNumber(scanner3)); + } + } + } + + private static final long[] MASK1 = new long[]{ 0xFFL, 0xFFFFL, 0xFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFFFL, 0xFFFFFFFFFFFFL, 0xFFFFFFFFFFFFFFL, 0xFFFFFFFFFFFFFFFFL, + 0xFFFFFFFFFFFFFFFFL }; + private static final long[] MASK2 = new long[]{ 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0xFFFFFFFFFFFFFFFFL }; + + private static Result findResult(long initialWord, long initialDelimiterMask, long wordB, long delimiterMaskB, Scanner scanner, Result[] results, + List collectedResults) { + Result existingResult; + long word = initialWord; + long delimiterMask = initialDelimiterMask; + long hash; + long nameAddress = scanner.pos(); + long word2 = wordB; + long delimiterMask2 = delimiterMaskB; + if ((delimiterMask | delimiterMask2) != 0) { + int letterCount1 = Long.numberOfTrailingZeros(delimiterMask) >>> 3; // value between 1 and 8 + int letterCount2 = Long.numberOfTrailingZeros(delimiterMask2) >>> 3; // value between 0 and 8 + long mask = MASK2[letterCount1]; + word = word & MASK1[letterCount1]; + word2 = mask & word2 & MASK1[letterCount2]; + hash = word ^ word2; + existingResult = results[hashToIndex(hash, results)]; + scanner.add(letterCount1 + (letterCount2 & mask)); + if (existingResult != null && existingResult.firstNameWord == word && existingResult.secondNameWord == word2) { + return existingResult; + } + } + else { + // Slow-path for when the ';' could not be found in the first 16 bytes. + hash = word ^ word2; + scanner.add(16); + while (true) { word = scanner.getLong(); - pos = findDelimiter(word); - if (pos != 8) { - scanner.add(pos); - word = mask(word, pos); + delimiterMask = findDelimiter(word); + if (delimiterMask != 0) { + int trailingZeros = Long.numberOfTrailingZeros(delimiterMask); + word = (word << (63 - trailingZeros)); + scanner.add(trailingZeros >>> 3); hash ^= word; - Result existingResult = results[hashToIndex(hash, results)]; - if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) { - scanAndRecord(scanner, existingResult); - continue; - } + break; } else { scanner.add(8); hash ^= word; - while (true) { - word = scanner.getLong(); - pos = findDelimiter(word); - if (pos != 8) { - scanner.add(pos); - word = mask(word, pos); - hash ^= word; - break; - } - else { - scanner.add(8); - hash ^= word; - } - } } } + } - // Save length of name for later. - int nameLength = (int) (scanner.pos() - nameAddress); - scanner.add(1); - - long numberWord = scanner.getLong(); - int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); - int number = convertIntoNumber(decimalSepPos, numberWord); - scanner.add((decimalSepPos >>> 3) + 3); - - // Final calculation for index into hash table. - int tableIndex = hashToIndex(hash, results); - outer: while (true) { - Result existingResult = results[tableIndex]; - if (existingResult == null) { - existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner); - } - // Check for collision. - int i = 0; - for (; i < nameLength + 1 - 8; i += 8) { - if (scanner.getLongAt(existingResult.nameAddress + i) != scanner.getLongAt(nameAddress + i)) { - tableIndex = (tableIndex + 1) & (results.length - 1); - continue outer; - } - } - if (((existingResult.lastNameLong ^ scanner.getLongAt(nameAddress + i)) << existingResult.remainingShift) == 0) { - record(existingResult, number); - break; - } - else { + // Save length of name for later. + int nameLength = (int) (scanner.pos() - nameAddress); + + // Final calculation for index into hash table. + int tableIndex = hashToIndex(hash, results); + outer: while (true) { + existingResult = results[tableIndex]; + if (existingResult == null) { + existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner, collectedResults); + } + // Check for collision. + int i = 0; + for (; i < nameLength + 1 - 8; i += 8) { + if (scanner.getLongAt(existingResult.nameAddress + i) != scanner.getLongAt(nameAddress + i)) { // Collision error, try next. - tableIndex = (tableIndex + 1) & (results.length - 1); + tableIndex = (tableIndex + 31) & (results.length - 1); + continue outer; } } + + int remainingShift = (64 - ((nameLength + 1 - i) << 3)); + if (((scanner.getLongAt(existingResult.nameAddress + i) ^ (scanner.getLongAt(nameAddress + i))) << remainingShift) == 0) { + break; + } + else { + // Collision error, try next. + tableIndex = (tableIndex + 31) & (results.length - 1); + } } - return results; + return existingResult; } - private static void scanAndRecord(Scanner scanPtr, Result existingResult) { - scanPtr.add(1); - long numberWord = scanPtr.getLong(); - int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); - int number = convertIntoNumber(decimalSepPos, numberWord); - scanPtr.add((decimalSepPos >>> 3) + 3); - record(existingResult, number); + private static long nextNewLine(long prev) { + while (true) { + long currentWord = Scanner.UNSAFE.getLong(prev); + long input = currentWord ^ 0x0A0A0A0A0A0A0A0AL; + long pos = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; + if (pos != 0) { + prev += Long.numberOfTrailingZeros(pos) >>> 3; + break; + } + else { + prev += 8; + } + } + return prev; } - private static void record(Result existingResult, int number) { - existingResult.min = Math.min(existingResult.min, number); - existingResult.max = Math.max(existingResult.max, number); + private static long scanNumber(Scanner scanPtr) { + long numberWord = scanPtr.getLongAt(scanPtr.pos() + 1); + int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000L); + long number = convertIntoNumber(decimalSepPos, numberWord); + scanPtr.add((decimalSepPos >>> 3) + 4); + return number; + } + + private static void record(Result existingResult, long number) { + if (number < existingResult.min) { + existingResult.min = (short) number; + } + if (number > existingResult.max) { + existingResult.max = (short) number; + } existingResult.sum += number; existingResult.count++; } private static int hashToIndex(long hash, Result[] results) { - int hashAsInt = (int) (hash ^ (hash >>> 32)); - int finalHash = (hashAsInt ^ (hashAsInt >>> 18)); - return (finalHash & (results.length - 1)); - } - - private static long mask(long word, int pos) { - return word & (-1L >>> ((8 - pos - 1) << 3)); + long hashAsInt = hash ^ (hash >>> 33) ^ (hash >>> 15); + return (int) (hashAsInt & (results.length - 1)); } - // Special method to convert a number in the specific format into an int value without branches created by - // Quan Anh Mai. - private static int convertIntoNumber(int decimalSepPos, long numberWord) { + // Special method to convert a number in the ascii number into an int without branches created by Quan Anh Mai. + private static long convertIntoNumber(int decimalSepPos, long numberWord) { int shift = 28 - decimalSepPos; // signed is -1 if negative, 0 otherwise long signed = (~numberWord << 59) >> 63; long designMask = ~(signed & 0xFF); - // Align the number to a specific position and transform the ascii code - // to actual digit value in each byte + // Align the number to a specific position and transform the ascii to digit value long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; - // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = - // 0x000000UU00TTHH00 + - // 0x00UU00TTHH000000 * 10 + - // 0xUU00TTHH00000000 * 100 - // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400 - // This results in our value lies in the bit 32 to 41 of this product - // That was close :) + // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100 long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; - long value = (absValue ^ signed) - signed; - return (int) value; + return (absValue ^ signed) - signed; } - private static int findDelimiter(long word) { + private static long findDelimiter(long word) { long input = word ^ 0x3B3B3B3B3B3B3B3BL; - long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; - return Long.numberOfTrailingZeros(tmp) >>> 3; + return (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; } - private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner) { - Result r = new Result(nameAddress); + private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner, List collectedResults) { + Result r = new Result(); results[hash] = r; - - int i = 0; - for (; i < nameLength + 1 - 8; i += 8) { - r.secondLastNameLong = (scanner.getLongAt(nameAddress + i)); + int totalLength = nameLength + 1; + r.firstNameWord = scanner.getLongAt(nameAddress); + r.secondNameWord = scanner.getLongAt(nameAddress + 8); + if (totalLength <= 8) { + r.firstNameWord = r.firstNameWord & MASK1[totalLength - 1]; + r.secondNameWord = 0; + } + else if (totalLength < 16) { + r.secondNameWord = r.secondNameWord & MASK1[totalLength - 9]; } - r.remainingShift = (64 - (nameLength + 1 - i) << 3); - r.lastNameLong = (scanner.getLongAt(nameAddress + i) & (-1L >>> r.remainingShift)); - r.nameLength = nameLength; + r.nameAddress = nameAddress; + collectedResults.add(r); return r; } - private static long[] getSegments(int numberOfChunks) throws IOException { - try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { - long fileSize = fileChannel.size(); - long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks; - long[] chunks = new long[numberOfChunks + 1]; - long mappedAddress = fileChannel.map(MapMode.READ_ONLY, 0, fileSize, Arena.global()).address(); - chunks[0] = mappedAddress; - long endAddress = mappedAddress + fileSize; - Scanner s = new Scanner(mappedAddress, mappedAddress + fileSize); - for (int i = 1; i < numberOfChunks; ++i) { - long chunkAddress = mappedAddress + i * segmentSize; - // Align to first row start. - while (chunkAddress < endAddress && (s.getLongAt(chunkAddress++) & 0xFF) != '\n') { - // nop - } - chunks[i] = Math.min(chunkAddress, endAddress); + private static final class Result { + long firstNameWord, secondNameWord; + short min, max; + int count; + long sum; + long nameAddress; + + private Result() { + this.min = MAX_TEMP; + this.max = MIN_TEMP; + } + + public String toString() { + return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); + } + + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + private void accumulate(Result other) { + if (other.min < min) { + min = other.min; + } + if (other.max > max) { + max = other.max; } - chunks[numberOfChunks] = endAddress; - return chunks; + sum += other.sum; + count += other.count; } - } - private static class Scanner { + public String calcName() { + Scanner scanner = new Scanner(nameAddress, nameAddress + MAX_NAME_LENGTH + 1); + int nameLength = 0; + while (scanner.getByteAt(nameAddress + nameLength) != ';') { + nameLength++; + } + byte[] array = new byte[nameLength]; + for (int i = 0; i < nameLength; ++i) { + array[i] = scanner.getByteAt(nameAddress + i); + } + return new String(array, java.nio.charset.StandardCharsets.UTF_8); + } + } - private static final Unsafe UNSAFE = initUnsafe(); + private static final class Scanner { + private static final sun.misc.Unsafe UNSAFE = initUnsafe(); + private long pos; + private final long end; - private static Unsafe initUnsafe() { + private static sun.misc.Unsafe initUnsafe() { try { - Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + java.lang.reflect.Field theUnsafe = sun.misc.Unsafe.class.getDeclaredField("theUnsafe"); theUnsafe.setAccessible(true); - return (Unsafe) theUnsafe.get(Unsafe.class); + return (sun.misc.Unsafe) theUnsafe.get(sun.misc.Unsafe.class); } catch (NoSuchFieldException | IllegalAccessException e) { throw new RuntimeException(e); } } - long pos, end; - public Scanner(long start, long end) { this.pos = start; this.end = end; @@ -327,7 +416,7 @@ long pos() { return pos; } - void add(int delta) { + void add(long delta) { pos += delta; } @@ -339,10 +428,8 @@ long getLongAt(long pos) { return UNSAFE.getLong(pos); } - public String getString(int nameLength) { - byte[] bytes = new byte[nameLength]; - UNSAFE.copyMemory(null, pos, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength); - return new String(bytes, StandardCharsets.UTF_8); + byte getByteAt(long pos) { + return UNSAFE.getByte(pos); } } -} +} \ No newline at end of file diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_tivrfoa.java b/src/main/java/dev/morling/onebrc/CalculateAverage_tivrfoa.java new file mode 100644 index 000000000..e6e963281 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_tivrfoa.java @@ -0,0 +1,386 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Solution based on thomaswue solution, commit: + * commit d0a28599c293d3afe3291fc3cf169a7b25ae9ae6 + * Author: Thomas Wuerthinger + * Date: Sun Jan 21 20:13:48 2024 +0100 + * + * The goal here was to try to improve the runtime of his 10k + * solution of: 00:04.516 + * + * With Thomas latest changes, his time is probably much better + * already, and maybe even 1st place for the 10k too. + * See: https://github.com/gunnarmorling/1brc/pull/606 + * + * As I was not able to make it faster ... so I'll make it slower, + * because my current solution should *not* stay at the top, as it added + * basically nothing. + */ +public class CalculateAverage_tivrfoa { + private static final String FILE = "./measurements.txt"; + + private static final int MAX_CITIES = 10_000; + private static final int BUCKETS_LEN = 1 << 17; + private static final int LAST_BUCKET_ENTRY = BUCKETS_LEN - 1; + private static final int NUM_CPUS = Runtime.getRuntime().availableProcessors(); + private static final AtomicInteger chunkIdx = new AtomicInteger(); + private static long[] chunks; + private static int numChunks; + + // Holding the current result for a single city. + private static class Result { + long lastNameLong; + long[] name; + int count; + short min, max; + long sum; + + private Result(short number, long nameAddress, byte nameLength, Scanner scanner) { + this.min = number; + this.max = number; + this.sum = number; + this.count = 1; + + name = new long[(nameLength / Long.BYTES) + 1]; + int pos = 0, i = 0; + for (; i < nameLength + 1 - Long.BYTES; i += Long.BYTES) { + name[pos++] = scanner.getLongAt(nameAddress + i); + } + + int remainingShift = (64 - (nameLength + 1 - i) << 3); + lastNameLong = (scanner.getLongAt(nameAddress + i) << remainingShift); + name[pos] = lastNameLong >> remainingShift; + } + + public String toString() { + return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); + } + + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + // Accumulate another result into this one. + private void add(Result other) { + if (other.min < min) { + min = other.min; + } + if (other.max > max) { + max = other.max; + } + sum += other.sum; + count += other.count; + } + + private void add(short number) { + if (number < min) { + min = number; + } + if (number > max) { + max = number; + } + sum += number; + count++; + } + + public String calcName() { + ByteBuffer bb = ByteBuffer.allocate(name.length * Long.BYTES).order(ByteOrder.nativeOrder()); + bb.asLongBuffer().put(name); + byte[] array = bb.array(); + int i = 0; + while (array[i++] != ';') + ; + return new String(array, 0, i - 1, StandardCharsets.UTF_8); + } + } + + /** + * From: + * https://github.com/OpenHFT/Zero-Allocation-Hashing/blob/ea/src/main/java/net/openhft/hashing/XXH3.java + * + * Less collisions, but it will make the code slower. xD + * + * One interesting thing about Thomas' solution that I + * started to work with (d0a28599), is that it basically does not have + * any collision for the small data set (sometimes none!), but it + * has lots of collisions for the 10k, hence its poor performance. + * + */ + private static long XXH3_avalanche(long h64) { + h64 ^= h64 >>> 37; + h64 *= 0x165667919E3779F9L; + return h64 ^ (h64 >>> 32); + } + + private static final class SolveChunk extends Thread { + private int chunkStartIdx; + private Result[] results = new Result[MAX_CITIES]; + private Result[] buckets = new Result[BUCKETS_LEN]; + private int resIdx = 0; + + public SolveChunk(int chunkStartIdx) { + this.chunkStartIdx = chunkStartIdx; + } + + @Override + public void run() { + for (; chunkStartIdx < numChunks; chunkStartIdx = chunkIdx.getAndIncrement()) { + Scanner scanner = new Scanner(chunks[chunkStartIdx], chunks[chunkStartIdx + 1]); + long word = scanner.getLong(); + long pos = findDelimiter(word); + while (scanner.hasNext()) { + long nameAddress = scanner.pos(); + long hash = 0; + + while (true) { + if (pos != 0) { + pos = Long.numberOfTrailingZeros(pos) >>> 3; + scanner.add(pos); + word = mask(word, pos); + hash ^= XXH3_avalanche(word); + break; + } + else { + scanner.add(8); + hash ^= XXH3_avalanche(word); + } + + word = scanner.getLong(); + pos = findDelimiter(word); + } + + byte nameLength = (byte) (scanner.pos() - nameAddress); + short number = scanNumber(scanner); + + int tableIndex = hashToIndex(hash); + outer: while (true) { + Result existingResult = buckets[tableIndex]; + if (existingResult == null) { + var newResult = new Result(number, nameAddress, nameLength, scanner); + buckets[tableIndex] = newResult; + results[resIdx++] = newResult; + break; + } + int i = 0; + int namePos = 0; + for (; i < nameLength + 1 - 8; i += 8) { + if (namePos >= existingResult.name.length || existingResult.name[namePos++] != scanner.getLongAt(nameAddress + i)) { + tableIndex = (tableIndex + 31) & (LAST_BUCKET_ENTRY); + continue outer; + } + } + + int remainingShift = (64 - (nameLength + 1 - i) << 3); + if (((existingResult.lastNameLong ^ (scanner.getLongAt(nameAddress + i) << remainingShift)) == 0)) { + existingResult.add(number); + break; + } + else { + tableIndex = (tableIndex + 31) & (LAST_BUCKET_ENTRY); + } + } + + word = scanner.getLong(); + pos = findDelimiter(word); + } + } + } + } + + private static void mergeIntoFinalMap(TreeMap map, Result[] newResults) { + for (var r : newResults) { + if (r == null) + return; + Result current = map.putIfAbsent(r.calcName(), r); + if (current != null) { + current.add(r); + } + } + } + + public static void main(String[] args) throws InterruptedException, IOException { + chunks = getSegments(NUM_CPUS); + numChunks = chunks.length - 1; + final SolveChunk[] threads = new SolveChunk[NUM_CPUS]; + chunkIdx.set(NUM_CPUS); + for (int i = 0; i < NUM_CPUS; i++) { + threads[i] = new SolveChunk(i); + threads[i].start(); + } + + System.out.println(getMap(threads)); + System.out.close(); + } + + private static TreeMap getMap(SolveChunk[] threads) throws InterruptedException { + TreeMap map = new TreeMap<>(); + threads[0].join(); + for (var r : threads[0].results) { + if (r == null) + break; + map.put(r.calcName(), r); + } + for (int i = 1; i < NUM_CPUS; ++i) { + threads[i].join(); + mergeIntoFinalMap(map, threads[i].results); + } + + return map; + } + + private static short scanNumber(Scanner scanPtr) { + scanPtr.add(1); + long numberWord = scanPtr.getLong(); + int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); + int number = convertIntoNumber(decimalSepPos, numberWord); + scanPtr.add((decimalSepPos >>> 3) + 3); + return (short) number; + } + + private static int hashToIndex(long hash) { + int hashAsInt = (int) (hash ^ (hash >>> 28)); + int finalHash = (hashAsInt ^ (hashAsInt >>> 17)); + return (finalHash & LAST_BUCKET_ENTRY); + } + + private static long mask(long word, long pos) { + return (word << ((7 - pos) << 3)); + } + + // Special method to convert a number in the ascii number into an int without branches created by Quan Anh Mai. + private static int convertIntoNumber(int decimalSepPos, long numberWord) { + int shift = 28 - decimalSepPos; + // signed is -1 if negative, 0 otherwise + long signed = (~numberWord << 59) >> 63; + long designMask = ~(signed & 0xFF); + // Align the number to a specific position and transform the ascii to digit value + long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; + // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) + // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = + // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100 + long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; + long value = (absValue ^ signed) - signed; + return (int) value; + } + + private static long findDelimiter(long word) { + long input = word ^ 0x3B3B3B3B3B3B3B3BL; + long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; + return tmp; + } + + /** + * - Split 70% of the file in even chunks for all cpus; + * - Create smaller chunks for the remainder of the file. + */ + private static long[] getSegments(int cpus) throws IOException { + try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { + final long fileSize = fileChannel.size(); + final long part1 = (long) (fileSize * 0.7); + final long part2 = (long) (fileSize * 0.2); + final long part3 = fileSize - part1 - part2; + final long bigChunkSize = (part1 - 1) / cpus; + final long smallChunkSize1 = (part2 - 1) / (cpus * 3); + final long smallChunkSize2 = (part3 - 1) / (cpus * 3); + final int numChunks = cpus + cpus * 3 + cpus * 3; + final long[] sizes = new long[numChunks]; + int l = 0, r = cpus; + Arrays.fill(sizes, l, r, bigChunkSize); + l = r; + r = l + cpus * 3; + Arrays.fill(sizes, l, r, smallChunkSize1); + l = r; + r = l + cpus * 3; + Arrays.fill(sizes, l, r, smallChunkSize2); + final long[] chunks = new long[sizes.length + 1]; + final long mappedAddress = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, java.lang.foreign.Arena.global()).address(); + chunks[0] = mappedAddress; + final long endAddress = mappedAddress + fileSize; + final Scanner s = new Scanner(mappedAddress, mappedAddress + fileSize); + for (int i = 1, sizeIdx = 0; i < chunks.length - 1; ++i, sizeIdx = (sizeIdx + 1) % sizes.length) { + long chunkAddress = chunks[i - 1] + sizes[sizeIdx]; + // Align to first row start. + while (chunkAddress < endAddress && (s.getLongAt(chunkAddress++) & 0xFF) != '\n') + ; + chunks[i] = Math.min(chunkAddress, endAddress); + // System.err.printf("Chunk size %d\n", chunks[i] - chunks[i - 1]); + } + chunks[chunks.length - 1] = endAddress; + // System.err.printf("Chunk size %d\n", chunks[chunks.length - 1] - chunks[chunks.length - 2]); + return chunks; + } + } + + private static class Scanner { + + private static final sun.misc.Unsafe UNSAFE = initUnsafe(); + + private static sun.misc.Unsafe initUnsafe() { + try { + java.lang.reflect.Field theUnsafe = sun.misc.Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + return (sun.misc.Unsafe) theUnsafe.get(sun.misc.Unsafe.class); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + long pos, end; + + public Scanner(long start, long end) { + this.pos = start; + this.end = end; + } + + boolean hasNext() { + return pos < end; + } + + long pos() { + return pos; + } + + void add(long delta) { + pos += delta; + } + + long getLong() { + return UNSAFE.getLong(pos); + } + + long getLongAt(long pos) { + return UNSAFE.getLong(pos); + } + + void setPos(long l) { + this.pos = l; + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_tkosachev.java b/src/main/java/dev/morling/onebrc/CalculateAverage_tkosachev.java new file mode 100644 index 000000000..cfacfe1f5 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_tkosachev.java @@ -0,0 +1,172 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.*; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +public class CalculateAverage_tkosachev { + + private static final String FILE = "./measurements.txt"; + public static int numThreads = Math.min(Runtime.getRuntime().availableProcessors(), 8); + + private record ResultRow(int min, double mean, int max) { + public String toString() { + return STR."\{round(min)}/\{round(mean)}/\{round(max)}"; + } + + private double round(double value) { + return Math.round(value) / 10.0; + } + } + + private static class MeasurementAggregator { + private int min = Integer.MAX_VALUE; + private int max = Integer.MIN_VALUE; + private long sum = 0; + private long count = 0; + + public void newValue(int m) { + if (m < min) { + min = m; + } + if (m > max) { + max = m; + } + sum += m; + count++; + } + + public void mergeIn(MeasurementAggregator add) { + if (add.min < min) { + min = add.min; + } + if (add.max > max) { + max = add.max; + } + sum += add.sum; + count += add.count; + } + } + + public static void main(String[] args) { + Path path = Paths.get(args.length == 0 ? FILE : args[0]); + + Map total; + try (RandomAccessFile aFile = new RandomAccessFile(path.toFile(), "r"); + ExecutorService executorService = Executors.newFixedThreadPool(numThreads)) { + FileChannel inChannel = aFile.getChannel(); + int numChunks = args.length > 1 ? Integer.parseInt(args[1]) : 100; + + if (inChannel.size() < 1024 * 1024 * 1024) { + numThreads = 1; + numChunks = 1; + } + + List>> futures = new ArrayList<>(numThreads); + int bufferSize = (int) (inChannel.size() / numChunks) + 100; + for (int i = 0; i < numChunks; i++) { + final int finalI = i; + futures.add(executorService.submit(() -> processBuffer(inChannel, bufferSize, finalI))); + } + executorService.shutdown(); + total = new HashMap<>(); + for (Future> future : futures) { + mergeIn(total, future.get()); + } + } + catch (IOException | InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + printResults(total); + } + + private static void mergeIn(Map total, Map result) { + for (String name : result.keySet()) { + MeasurementAggregator totalAggregator = total.computeIfAbsent(name, _ -> new MeasurementAggregator()); + totalAggregator.mergeIn(result.get(name)); + } + } + + private static Map processBuffer(FileChannel channel, int bufferSize, int nr) throws IOException { + HashMap aggregatorMap = new HashMap<>(); + long start = ((long) nr) * bufferSize; + long length = Math.min(bufferSize, channel.size() - start); + ByteBuffer byteBuffer = channel.map( + FileChannel.MapMode.READ_ONLY, + start, + length); + int i = 0; + int smcIndex = -1; + byte[] buf = new byte[1024]; + int count = 0; + if (nr > 0) { + do { + i++; + } while (byteBuffer.get() != '\n'); + } + while (i < length) { + byte b = byteBuffer.get(); + buf[count] = b; + if (b == ';') { + smcIndex = count; + } + count++; + if (b == '\n') { + String name = new String(buf, 0, smcIndex); + int value = fastParse(buf, smcIndex + 1, count - smcIndex - 2); + aggregatorMap.computeIfAbsent(name, _ -> new MeasurementAggregator()).newValue(value); + count = 0; + } + i++; + } + + return aggregatorMap; + } + + private static void printResults(Map result) { + Map measurements = new TreeMap<>(); + for (Map.Entry entry : result.entrySet()) { + MeasurementAggregator value = entry.getValue(); + measurements.put(entry.getKey(), new ResultRow(value.min, ((double) value.sum / value.count), value.max)); + } + System.out.println(measurements); + } + + public static int fastParse(byte[] buf, int start, int len) { + int i = 0; + int sign = 1; + for (int index = start; index < start + len; index++) { + byte b = buf[index]; + if (b == '-') { + sign = -1; + } + if (b >= '0' && b <= '9') { + i = i * 10 + (b - '0'); + } + } + return i * sign; + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_tonivade.java b/src/main/java/dev/morling/onebrc/CalculateAverage_tonivade.java new file mode 100644 index 000000000..9deb3f229 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_tonivade.java @@ -0,0 +1,286 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.StructuredTaskScope; +import java.util.concurrent.StructuredTaskScope.Subtask; + +public class CalculateAverage_tonivade { + + private static final String FILE = "./measurements.txt"; + + private static final int MIN_CHUNK_SIZE = 1024; + private static final int MAX_NAME_LENGTH = 128; + private static final int MAX_TEMP_LENGTH = 8; + + public static void main(String[] args) throws IOException, InterruptedException, ExecutionException { + System.out.println(readFile()); + } + + private static Map readFile() throws IOException, InterruptedException, ExecutionException { + Map result = new TreeMap<>(); + try (var channel = FileChannel.open(Paths.get(FILE), StandardOpenOption.READ)) { + long consumed = 0; + long remaining = channel.size(); + while (remaining > 0) { + var buffer = channel.map( + MapMode.READ_ONLY, consumed, Math.min(remaining, Integer.MAX_VALUE)); + + int chunks = Runtime.getRuntime().availableProcessors(); + int chunkSize = buffer.remaining() / chunks; + int leftover = buffer.remaining() % chunks; + if (chunkSize < MIN_CHUNK_SIZE) { + var partialResult = new Chunk(buffer, 0, buffer.remaining()).read(); + + consumed += partialResult.end(); + remaining -= partialResult.end(); + + partialResult.merge(result); + } + else { + try (var scope = new StructuredTaskScope.ShutdownOnFailure()) { + var tasks = new ArrayList>(chunks); + for (int i = 0; i < chunks; i++) { + int start = i * chunkSize; + int length = chunkSize + (i < chunks ? leftover : 0); + tasks.add(scope.fork(new Chunk(buffer, start, length)::read)); + } + scope.join(); + scope.throwIfFailed(); + + for (var subtask : tasks) { + subtask.get().merge(result); + } + consumed += tasks.getLast().get().end(); + remaining -= tasks.getLast().get().end(); + } + } + } + } + return result; + } + + static final class Chunk { + + private static final int EOL = 10; + private static final int MINUS = 45; + private static final int SEMICOLON = 59; + + final ByteBuffer buffer; + final int start; + final int end; + + final byte[] name = new byte[MAX_NAME_LENGTH]; + final byte[] temp = new byte[MAX_TEMP_LENGTH]; + final Stations stations = new Stations(); + + int hash; + + Chunk(ByteBuffer buffer, int start, int length) { + this.buffer = buffer; + this.start = findStart(buffer, start); + this.end = start + length; + } + + private static int findStart(ByteBuffer buffer, int start) { + if (start > 0 && buffer.get(start - 1) != EOL) { + for (int i = start - 2; i > 0; i--) { + byte b = buffer.get(i); + if (b == EOL) { + return i + 1; + } + } + } + return start; + } + + PartialResult read() { + int position = start; + while (position < end) { + int semicolon = readName(position, end - position); + if (semicolon < 0) { + break; + } + + int endOfLine = readTemp(semicolon + 1, end - semicolon - 1); + if (endOfLine < 0) { + break; + } + + stations.find(name, semicolon - position, hash) + .add(parseTemp(temp, endOfLine - semicolon - 1)); + + // skip end of line + position = endOfLine + 1; + } + return new PartialResult(position, stations.buckets); + } + + private int readName(int offset, int length) { + hash = 1; + for (int i = 0; i < length; i++) { + byte b = buffer.get(i + offset); + if (b == SEMICOLON) { + return i + offset; + } + name[i] = b; + hash = 31 * hash + b; + } + return -1; + } + + private int readTemp(int offset, int length) { + for (int i = 0; i < length; i++) { + byte b = buffer.get(i + offset); + if (b == EOL) { + return i + offset; + } + temp[i] = b; + } + return -1; + } + + // non null double between -99.9 (inclusive) and 99.9 (inclusive), always with one fractional digit + private static int parseTemp(byte[] value, int length) { + int period = length - 2; + if (value[0] == MINUS) { + int left = parseLeft(value, 1, period - 1); + int right = toInt(value[period + 1]); + return -(left + right); + } + int left = parseLeft(value, 0, period); + int right = toInt(value[period + 1]); + return left + right; + } + + private static int parseLeft(byte[] value, int start, int length) { + if (length == 1) { + return toInt(value[start]) * 10; + } + // two chars + int a = toInt(value[start]) * 100; + int b = toInt(value[start + 1]) * 10; + return a + b; + } + + private static int toInt(byte c) { + return c - 48; + } + } + + static final class Stations { + + private static final int NUMBER_OF_BUCKETS = 1000; + private static final int BUCKET_SIZE = 50; + + final Station[][] buckets = new Station[NUMBER_OF_BUCKETS][BUCKET_SIZE]; + + Station find(byte[] name, int length, int hash) { + var bucket = buckets[Math.abs(hash % NUMBER_OF_BUCKETS)]; + for (int i = 0; i < BUCKET_SIZE; i++) { + if (bucket[i] == null) { + bucket[i] = new Station(name, length, hash); + return bucket[i]; + } + else if (bucket[i].sameName(length, hash)) { + return bucket[i]; + } + } + throw new IllegalStateException("no more space left"); + } + } + + static final class Station { + + private final byte[] name; + private final int hash; + + private int min = 1000; + private int max = -1000; + private int sum; + private long count; + + Station(byte[] source, int length, int hash) { + name = new byte[length]; + System.arraycopy(source, 0, name, 0, length); + this.hash = hash; + } + + String getName() { + return new String(name, StandardCharsets.UTF_8); + } + + void add(int value) { + min = Math.min(min, value); + max = Math.max(max, value); + sum += value; + count++; + } + + Station merge(Station other) { + min = Math.min(min, other.min); + max = Math.max(max, other.max); + sum += other.sum; + count += other.count; + return this; + } + + @Override + public String toString() { + return toDouble(min) + "/" + round(mean()) + "/" + toDouble(max); + } + + boolean sameName(int length, int hash) { + return name.length == length && this.hash == hash; + } + + private double mean() { + return toDouble(sum) / count; + } + + private double toDouble(int value) { + return value / 10.; + } + + private double round(double value) { + return Math.round(value * 10.) / 10.; + } + } + + static record PartialResult(int end, Station[][] stations) { + + void merge(Map result) { + for (Station[] bucket : stations) { + for (Station station : bucket) { + if (station != null) { + result.merge(station.getName(), station, Station::merge); + } + } + } + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_unbounded.java b/src/main/java/dev/morling/onebrc/CalculateAverage_unbounded.java new file mode 100644 index 000000000..351dc49a5 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_unbounded.java @@ -0,0 +1,437 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import jdk.incubator.vector.*; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.lang.foreign.ValueLayout.*; +import static java.nio.ByteOrder.BIG_ENDIAN; + +public class CalculateAverage_unbounded { + private static final Path FILE = Path.of("./measurements.txt"); + private static final int MAX_STATION_NAME_LEN = 100; + private static final int MAX_UNIQUE_STATIONS = 10000; + + // this is *really* expensive + private static final OfInt BIG_ENDIAN_INT = JAVA_INT_UNALIGNED.withOrder(BIG_ENDIAN); + private static final VectorSpecies LINE_SCAN_SPECIES = ByteVector.SPECIES_256; + private static final int LINE_SCAN_LEN = LINE_SCAN_SPECIES.length(); + private static final VectorSpecies NAME_HASH_SPECIES = IntVector.SPECIES_256; + private static final VectorSpecies HASH_LOOKUP_SPECIES = ShortVector.SPECIES_256; + private static final VectorSpecies ACCUMULATOR_SPECIES = LongVector.SPECIES_256; + + private static final int CHUNK_SIZE = 16 * 1024 * 1024; + + // Arbitrarily chosen primes + private static final int[] HASH_PRIMES = { 661, 1663, 2293, 3581, 5449, 5953, 6311, 6841, 7573, 7669, 7703, 7789, 7901, 8887, 8581, 8831 }; + private static final byte[] PREFIX_MASK = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; + private static final int[] DIGIT_MULTIPLIERS = { + 0, 10, 1, 1, + 100, 10, 1, 1, + 0, -10, 1, -1, + -100, -10, 1, -1, + }; + private static final int[] DIGIT_MASK = { + 0x000fff0f, + 0x0f0fff0f, + 0x000fff0f, + 0x0f0fff0f, + }; + private static final int[] DIGIT_FLIPS = { 0, 0, -1, -1 }; + + record Segment(long start, int len) { + } + + static class StationStat { + long count; + long totalTemp; + int min; + int max; + + StationStat(long count, long totalTemp, int min, int max) { + this.count = count; + this.totalTemp = totalTemp; + this.min = min; + this.max = max; + } + + StationStat merge(StationStat other) { + this.count += other.count; + this.totalTemp += other.totalTemp; + this.min = Math.min(this.min, other.min); + this.max = Math.max(this.max, other.max); + return this; + } + + @Override + public String toString() { + return STR."\{min/10.0}/\{Math.round(1.0 * totalTemp / count)/10.0}/\{max/10.0}"; + } + } + + public static void main(String[] args) throws IOException, InterruptedException { + long fileSize = Files.size(FILE); + int lastChunkSize = (int) Math.min(200, fileSize); + int numSegments = (int) (fileSize / CHUNK_SIZE + 10); + + var segments = new ArrayBlockingQueue((int) (fileSize / CHUNK_SIZE + 10)); + for (long i = 0; i < fileSize - lastChunkSize; i += CHUNK_SIZE) { + segments.put(new Segment(i, (int) Math.min(CHUNK_SIZE, fileSize - i - lastChunkSize))); + } + + int numThreads = Runtime.getRuntime().availableProcessors(); + var results = new ArrayBlockingQueue>(numThreads); + var toMerge = new ArrayList>(numThreads + 1); + try (var ch = FileChannel.open(FILE, StandardOpenOption.READ); var arena = Arena.ofConfined()) { + var threads = IntStream.range(0, numThreads).mapToObj((ignored) -> new ProcessorThread(segments, ch, results::add)).toList(); + threads.forEach(Thread::start); + + // Process last piece without OOB + int margin = lastChunkSize < fileSize ? 1 : 0; + var mem = ch.map(FileChannel.MapMode.READ_ONLY, fileSize - lastChunkSize - margin, lastChunkSize + margin, arena); + slowProcessChunk(mem, margin, lastChunkSize, toMerge::add); + + for (var thread : threads) { + thread.join(); + } + } + + results.drainTo(toMerge); + var merged = toMerge.stream().reduce((a, b) -> { + b.forEach((k, v) -> a.merge(k, v, StationStat::merge)); + return a; + }).get(); + printResult(merged); + } + + // Simple implementation for the end - so we don't need to worry about reading past the end of the file + private static void slowProcessChunk(MemorySegment mem, int startPos, int endPos, Consumer> report) { + int index = scanForStartPos(mem, startPos); + byte[] nameBuf = new byte[MAX_STATION_NAME_LEN]; + while (index < endPos) { + int nameLen = 0; + while (mem.get(JAVA_BYTE, index) != ';') { + nameBuf[nameLen++] = mem.get(JAVA_BYTE, index); + index++; + } + var name = new String(nameBuf, 0, nameLen); + index++; + StringBuilder numStr = new StringBuilder(5); + while (mem.get(JAVA_BYTE, index) != '\n') { + if (mem.get(JAVA_BYTE, index) != '.') { + numStr.append((char) mem.get(JAVA_BYTE, index)); + } + index++; + } + index++; + int num = Integer.parseInt(numStr.toString()); + var entry = new HashMap(1); + entry.put(name, new StationStat(1, num, num, num)); + report.accept(entry); + } + } + + static class ProcessorThread extends Thread { + + static final int NUM_BUCKETS = 1024; + static final int BUCKET_MASK = 0x3ff; + static final int BUCKET_SIZE = 16; + + // n-way hash table state + // 16 buckets, then 16 name pointers + private final short[] hashTable = new short[2 * BUCKET_SIZE * NUM_BUCKETS]; + // storage of station name keys for hash collision check + private final byte[] nameTable = new byte[MAX_UNIQUE_STATIONS * (MAX_STATION_NAME_LEN + 1)]; + // values for the hash key stable + private final short[] stationIndexes = new short[BUCKET_SIZE * NUM_BUCKETS]; + private final int[] nextNamePos = { 0 }; + private final int[] nextStationIndex = { 0 }; + + // Accumulator for (10s, 1s, (count*-2), .1s) per station + private final long[] accumulators = new long[4 * MAX_UNIQUE_STATIONS]; + // min and max per station + private final int[] minMax = new int[2 * MAX_UNIQUE_STATIONS]; + + private final Queue segments; + private final FileChannel channel; + private final Consumer> report; + + ProcessorThread(Queue segments, FileChannel channel, Consumer> report) { + this.segments = segments; + this.channel = channel; + this.report = report; + for (int i = 0; i < minMax.length; i += 2) { + minMax[i] = Integer.MAX_VALUE; + minMax[i + 1] = Integer.MIN_VALUE; + } + } + + @Override + public void run() { + try { + while (true) { + var segment = segments.poll(); + if (segment == null) { + break; + } + int startMargin = segment.start == 0 ? 0 : 1; + int endMargin = 64; + try (var arena = Arena.ofConfined()) { + var mem = channel.map(FileChannel.MapMode.READ_ONLY, segment.start - startMargin, segment.len + endMargin + startMargin, arena); + processChunk(mem, startMargin, segment.len + startMargin, hashTable, nameTable, stationIndexes, minMax, accumulators, nextNamePos, nextStationIndex); + } + } + report.accept(decodeResult(hashTable, nameTable, stationIndexes, accumulators, minMax)); + } catch (IOException e) { + System.err.println(STR."I/O Exception: \{e}"); + throw new RuntimeException(e); + } + } + + private static void processChunk(MemorySegment mem, int startPos, int endPos, short[] hashTable, byte[] nameTable, short[] stationIndexes, int[] minMax, + long[] accumulators, int[] nextNamePos, int[] nextStationIndex) { + int index = scanForStartPos(mem, startPos); + var primeVec = IntVector.fromArray(NAME_HASH_SPECIES, HASH_PRIMES, 0); + while (index < endPos) { + var lineVec = ByteVector.fromMemorySegment(LINE_SCAN_SPECIES, mem, index, ByteOrder.LITTLE_ENDIAN); + int numPos = lineVec.eq((byte) ';').firstTrue() + 1; + int nlPos = 0; + int stationIndex; + if (numPos != LINE_SCAN_LEN + 1) { + // Fast path, station name fits in one SIMD register + nlPos = lineVec.eq((byte) '\n').firstTrue(); + if (nlPos == LINE_SCAN_LEN) { + while (mem.get(JAVA_BYTE, index + nlPos) != '\n') { + nlPos++; + } + } + var nameVec = lineVec.and(ByteVector.fromArray(LINE_SCAN_SPECIES, PREFIX_MASK, 33 - numPos)); + int nameHash = nameVec.reinterpretAsInts().mul(primeVec).reduceLanes(VectorOperators.ADD); + + stationIndex = fastLookupHash(nameHash, nameVec, hashTable, nameTable, stationIndexes, nextNamePos, nextStationIndex); + } + else { + // Slow path, station name larger than SIMD register + while (mem.get(JAVA_BYTE, index + numPos - 1) != ';') + numPos++; + while (mem.get(JAVA_BYTE, index + nlPos) != '\n') + nlPos++; + + int nameHash = lineVec.reinterpretAsInts().mul(primeVec).reduceLanes(VectorOperators.ADD); + for (int i = LINE_SCAN_LEN; i < numPos - 1; i++) { + nameHash = nameHash * 33 + mem.get(JAVA_BYTE, index + i); + } + stationIndex = lookupHash(nameHash, mem.asSlice(index, numPos - 1), hashTable, nameTable, stationIndexes, nextNamePos, nextStationIndex); + } + boolean isNegative = mem.get(JAVA_BYTE, index + numPos) == '-'; + // format; 0: 9.9, 1: 99.9, 2: -9.9, 3: -99.9 + int numFormat = nlPos - numPos - 3 + (isNegative ? 1 : 0); + + // accumulate sums for mean + var numPartsVec = ByteVector.fromMemorySegment(ByteVector.SPECIES_128, mem, index + nlPos - 4, ByteOrder.LITTLE_ENDIAN) + .sub((byte) '0') + .convert(VectorOperators.B2I, 0); + var multiplyVec = IntVector.fromArray(IntVector.SPECIES_128, DIGIT_MULTIPLIERS, 4 * numFormat); + var toAdd = numPartsVec.mul(multiplyVec).castShape(ACCUMULATOR_SPECIES, 0); + var acc = LongVector.fromArray(ACCUMULATOR_SPECIES, accumulators, 4 * stationIndex); + acc.add(toAdd).intoArray(accumulators, 4 * stationIndex); + + // record min/max + // encode ASCII value to sortable format without parsing + int encoded = (mem.get(BIG_ENDIAN_INT, index + nlPos - 4) & DIGIT_MASK[numFormat]) ^ DIGIT_FLIPS[numFormat]; + minMax[2 * stationIndex] = Math.min(minMax[2 * stationIndex], encoded); + minMax[2 * stationIndex + 1] = Math.max(minMax[2 * stationIndex + 1], encoded); + + index += nlPos + 1; + } + } + + // Look up name that fits in a vector + private static int fastLookupHash(int nameHash, ByteVector nameVec, short[] hashTable, byte[] nameTable, short[] stationIndexes, int[] nextNamePos, + int[] nextStationIndex) { + int bucketIdx = nameHash & BUCKET_MASK; + short shortHash = (short) (0x8000 | (nameHash >> 16)); + + // Look up the station name to find the index + while (true) { + var bucketVec = ShortVector.fromArray(HASH_LOOKUP_SPECIES, hashTable, 2 * BUCKET_SIZE * bucketIdx); + var bucketPos = bucketVec.eq(shortHash).firstTrue(); + if (bucketPos != HASH_LOOKUP_SPECIES.length()) { + int slotNamePos = 32 * Short.toUnsignedInt(hashTable[2 * BUCKET_SIZE * bucketIdx + BUCKET_SIZE + bucketPos]); + var slotNameVec = ByteVector.fromArray(LINE_SCAN_SPECIES, nameTable, slotNamePos); + if (nameVec.eq(slotNameVec).allTrue()) { + // Hit + return stationIndexes[BUCKET_SIZE * bucketIdx + bucketPos]; + } + else { + bucketPos = handleHashCollision(shortHash, bucketIdx, MemorySegment.ofArray(nameVec.toArray()), hashTable, nameTable); + if (bucketPos != -1) { + return stationIndexes[BUCKET_SIZE * bucketIdx + bucketPos]; + } + } + } + var emptyPos = bucketVec.eq((short) 0).firstTrue(); + if (emptyPos != HASH_LOOKUP_SPECIES.length()) { + // Miss, insert + int stationIndex = nextStationIndex[0]++; + nameVec.intoArray(nameTable, nextNamePos[0]); + hashTable[2 * BUCKET_SIZE * bucketIdx + emptyPos] = shortHash; + hashTable[2 * BUCKET_SIZE * bucketIdx + BUCKET_SIZE + emptyPos] = (short) (nextNamePos[0] / 32); + stationIndexes[BUCKET_SIZE * bucketIdx + emptyPos] = (short) stationIndex; + nextNamePos[0] += nameVec.length(); + return stationIndex; + } + // Try next bucket + bucketIdx = (bucketIdx + 1) & BUCKET_MASK; + } + } + + // Look up long name + private static int lookupHash(int nameHash, MemorySegment nameSeg, short[] hashTable, byte[] nameTable, short[] stationIndexes, int[] nextNamePos, + int[] nextStationIndex) { + int bucketIdx = nameHash & BUCKET_MASK; + short shortHash = (short) (0x8000 | (nameHash >> 16)); + + // Look up the station name to find the index + while (true) { + var bucketVec = ShortVector.fromArray(HASH_LOOKUP_SPECIES, hashTable, 2 * BUCKET_SIZE * bucketIdx); + var bucketPos = bucketVec.eq(shortHash).firstTrue(); + if (bucketPos != HASH_LOOKUP_SPECIES.length()) { + int slotNamePos = 32 * Short.toUnsignedInt(hashTable[2 * BUCKET_SIZE * bucketIdx + BUCKET_SIZE + bucketPos]); + boolean match = true; + for (int i = 0; i < nameSeg.byteSize(); i++) { + if (nameSeg.get(JAVA_BYTE, i) != nameTable[slotNamePos + i]) { + match = false; + } + } + match = match && nameTable[slotNamePos + (int) nameSeg.byteSize()] == '\0'; + if (match) { + // Hit + return stationIndexes[BUCKET_SIZE * bucketIdx + bucketPos]; + } + else { + bucketPos = handleHashCollision(shortHash, bucketIdx, nameSeg, hashTable, nameTable); + if (bucketPos != -1) { + return stationIndexes[BUCKET_SIZE * bucketIdx + bucketPos]; + } + } + } + var emptyPos = bucketVec.eq((short) 0).firstTrue(); + if (emptyPos != HASH_LOOKUP_SPECIES.length()) { + // Miss, insert + int stationIndex = nextStationIndex[0]++; + hashTable[2 * BUCKET_SIZE * bucketIdx + emptyPos] = shortHash; + hashTable[2 * BUCKET_SIZE * bucketIdx + BUCKET_SIZE + emptyPos] = (short) (nextNamePos[0] / 32); + stationIndexes[BUCKET_SIZE * bucketIdx + emptyPos] = (short) stationIndex; + for (int i = 0; i < nameSeg.byteSize(); i++) { + nameTable[nextNamePos[0]++] = nameSeg.get(JAVA_BYTE, i); + } + nameTable[nextNamePos[0]++] = '\0'; + while (nextNamePos[0] % 32 != 0) + nextNamePos[0]++; + return stationIndex; + } + // Try next bucket + bucketIdx = (bucketIdx + 1) & BUCKET_MASK; + } + } + + private static int handleHashCollision(short shortHash, int bucketIdx, MemorySegment nameSeg, short[] hashTable, byte[] nameTable) { + for (int i = 0; i < BUCKET_SIZE; i++) { + if (hashTable[2 * BUCKET_SIZE * bucketIdx + i] == shortHash) { + int namePos = 32 * Short.toUnsignedInt(hashTable[2 * BUCKET_SIZE * bucketIdx + BUCKET_SIZE + i]); + if (Arrays.equals(nameSeg.toArray(JAVA_BYTE), Arrays.copyOfRange(nameTable, namePos, namePos + (int) nameSeg.byteSize())) + && nameTable[namePos + (int) nameSeg.byteSize()] == '\0') { + return i; + } + } + } + return -1; + } + } + + // Find next record + private static int scanForStartPos(MemorySegment mem, int startPos) { + if (startPos == 0) { + return startPos; + } + while (mem.get(JAVA_BYTE, startPos - 1) != '\n') { + startPos++; + } + return startPos; + } + + // Decode the accumulator values to StationStats + private static Map decodeResult(short[] hashTable, byte[] nameTable, short[] stationIndexes, long[] accumulators, int[] minMax) { + var result = new HashMap(MAX_UNIQUE_STATIONS); + for (int i = 0; i < hashTable.length; i += 32) { + for (int j = 0; j < 16; j++) { + if (hashTable[i + j] != 0) { + int namePos = 32 * Short.toUnsignedInt(hashTable[i + j + 16]); + int nameLen = 1; + while (nameTable[namePos + nameLen] != '\0') { + nameLen++; + } + int stationIdx = stationIndexes[i / 2 + j]; + // Number of '-2' valued dots seen + long count = accumulators[4 * stationIdx + 2] / -2; + long total = accumulators[4 * stationIdx]; + total += accumulators[4 * stationIdx + 1]; + total += accumulators[4 * stationIdx + 3]; + int min = decodeInteger(minMax[2 * stationIdx]); + int max = decodeInteger(minMax[2 * stationIdx + 1]); + result.put(new String(nameTable, namePos, nameLen), new StationStat(count, total, min, max)); + } + } + } + return result; + } + + private static int decodeInteger(int encoded) { + int mask = encoded >> 31; + int orig = (encoded ^ mask) & 0x7fffffff; + int val = (orig & 0xff) + ((orig >> 16) & 0xff) * 10 + ((orig >> 24) & 0xff) * 100; + return val * (mask | 1); + } + + private static void printResult(Map stats) { + System.out.print("{"); + System.out.print( + stats.keySet().stream().sorted() + .map(key -> { + var s = stats.get(key); + return STR."\{key}=\{s}"; + }) + .collect(Collectors.joining(", ")) + ); + System.out.println("}"); + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_vaidhy.java b/src/main/java/dev/morling/onebrc/CalculateAverage_vaidhy.java index 5795077b3..f63374a10 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_vaidhy.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_vaidhy.java @@ -21,6 +21,7 @@ import java.lang.foreign.Arena; import java.lang.reflect.Field; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; import java.nio.file.Path; @@ -37,69 +38,149 @@ public class CalculateAverage_vaidhy { private static final class HashEntry { private long startAddress; - private long endAddress; + private long keyLength; private long suffix; - private int hash; - + private int next; IntSummaryStatistics value; } private static class PrimitiveHashMap { private final HashEntry[] entries; + private final long[] hashes; + private final int twoPow; + private int next = -1; PrimitiveHashMap(int twoPow) { this.twoPow = twoPow; this.entries = new HashEntry[1 << twoPow]; + this.hashes = new long[1 << twoPow]; for (int i = 0; i < entries.length; i++) { this.entries[i] = new HashEntry(); } } - public HashEntry find(long startAddress, long endAddress, long suffix, int hash) { + public IntSummaryStatistics find(long startAddress, long endAddress, long hash, long suffix) { int len = entries.length; - int i = (hash ^ (hash >> twoPow)) & (len - 1); + int h = Long.hashCode(hash); + int initialIndex = (h ^ (h >> twoPow)) & (len - 1); + int i = initialIndex; + long lookupLength = endAddress - startAddress; - do { + long hashEntry = hashes[i]; + + if (hashEntry == hash) { HashEntry entry = entries[i]; - if (entry.value == null) { - return entry; + if (lookupLength <= 7) { + // This works because + // hash = suffix , when simpleHash is just xor. + // Since length is not 8, suffix will have a 0 at the end. + // Since utf-8 strings can't have 0 in middle of a string this means + // we can stop here. + return entry.value; } - if (entry.hash == hash) { - long entryLength = entry.endAddress - entry.startAddress; - long lookupLength = endAddress - startAddress; - if ((entryLength == lookupLength) && (entry.suffix == suffix)) { - boolean found = compareEntryKeys(startAddress, endAddress, entry); - - if (found) { - return entry; - } + boolean found = (entry.suffix == suffix && + compareEntryKeys(startAddress, endAddress, entry.startAddress)); + if (found) { + return entry.value; + } + } + + if (hashEntry == 0) { + HashEntry entry = entries[i]; + entry.startAddress = startAddress; + entry.keyLength = lookupLength; + hashes[i] = hash; + entry.suffix = suffix; + entry.next = next; + this.next = i; + entry.value = new IntSummaryStatistics(); + return entry.value; + } + + i++; + if (i == len) { + i = 0; + } + + if (i == initialIndex) { + return null; + } + + do { + hashEntry = hashes[i]; + if (hashEntry == hash) { + HashEntry entry = entries[i]; + if (lookupLength <= 7) { + return entry.value; + } + boolean found = (entry.suffix == suffix && + compareEntryKeys(startAddress, endAddress, entry.startAddress)); + if (found) { + return entry.value; } } + if (hashEntry == 0) { + HashEntry entry = entries[i]; + entry.startAddress = startAddress; + entry.keyLength = lookupLength; + hashes[i] = hash; + entry.suffix = suffix; + entry.next = next; + this.next = i; + entry.value = new IntSummaryStatistics(); + return entry.value; + } + i++; if (i == len) { i = 0; } - } while (i != hash); + } while (i != initialIndex); return null; } - private static boolean compareEntryKeys(long startAddress, long endAddress, HashEntry entry) { - long entryIndex = entry.startAddress; + private static boolean compareEntryKeys(long startAddress, long endAddress, long entryStartAddress) { + long entryIndex = entryStartAddress; long lookupIndex = startAddress; + long endAddressStop = endAddress - 7; - for (; (lookupIndex + 7) < endAddress; lookupIndex += 8) { + for (; lookupIndex < endAddressStop; lookupIndex += 8) { if (UNSAFE.getLong(entryIndex) != UNSAFE.getLong(lookupIndex)) { return false; } entryIndex += 8; } + return true; } + + public Iterable entrySet() { + return () -> new Iterator<>() { + int scan = next; + + @Override + public boolean hasNext() { + return scan != -1; + } + + @Override + public HashEntry next() { + HashEntry entry = entries[scan]; + scan = entry.next; + return entry; + } + }; + } } private static final String FILE = "./measurements.txt"; + private static long simpleHash(long hash, long nextData) { + return hash ^ nextData; + // return (hash ^ Long.rotateLeft((nextData * C1), R1)) * C2; + } + private static Unsafe initUnsafe() { try { Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); @@ -145,7 +226,7 @@ private static int parseDouble(long startAddress, long endAddress) { interface MapReduce { - void process(long keyStartAddress, long keyEndAddress, int hash, int temperature, long suffix); + void process(long keyStartAddress, long keyEndAddress, long hash, long suffix, int temperature); I result(); } @@ -173,9 +254,13 @@ static class LineStream { private final long chunkEnd; private long position; - private int hash; + private long hash; + private long suffix; - byte[] b = new byte[4]; + + private final ByteBuffer buf = ByteBuffer + .allocate(8) + .order(ByteOrder.LITTLE_ENDIAN); public LineStream(FileService fileService, long offset, long chunkSize) { long fileStart = fileService.address(); @@ -186,50 +271,38 @@ public LineStream(FileService fileService, long offset, long chunkSize) { } public boolean hasNext() { - return position <= chunkEnd && position < fileEnd; + return position <= chunkEnd; } public long findSemi() { - int h = 0; - long s = 0; - long i = position; - while ((i + 3) < fileEnd) { - // Adding 16 as it is the offset for primitive arrays - ByteBuffer.wrap(b).putInt(UNSAFE.getInt(i)); - - if (b[3] == 0x3B) { - break; - } - i++; - h = ((h << 5) - h) ^ b[3]; - s = (s << 8) ^ b[3]; + long h = 0; + buf.rewind(); - if (b[2] == 0x3B) { - break; + for (long i = position; i < fileEnd; i++) { + byte ch = UNSAFE.getByte(i); + if (ch == ';') { + int discard = buf.remaining(); + buf.rewind(); + long nextData = (buf.getLong() << discard) >>> discard; + this.suffix = nextData; + this.hash = simpleHash(h, nextData); + position = i + 1; + return i; } - i++; - h = ((h << 5) - h) ^ b[2]; - s = (s << 8) ^ b[2]; - - if (b[1] == 0x3B) { - break; + if (buf.hasRemaining()) { + buf.put(ch); } - i++; - h = ((h << 5) - h) ^ b[1]; - s = (s << 8) ^ b[1]; - - if (b[0] == 0x3B) { - break; + else { + buf.flip(); + long nextData = buf.getLong(); + h = simpleHash(h, nextData); + buf.rewind(); } - i++; - h = ((h << 5) - h) ^ b[0]; - s = (s << 8) ^ b[0]; } - this.hash = h; - this.suffix = s; - position = i + 1; - return i; + this.suffix = buf.getLong(); + position = fileEnd; + return fileEnd; } public long skipLine() { @@ -258,7 +331,94 @@ public long findTemperature() { } } - private void worker(long offset, long chunkSize, MapReduce lineConsumer) { + private static final long START_BYTE_INDICATOR = 0x0101_0101_0101_0101L; + private static final long END_BYTE_INDICATOR = START_BYTE_INDICATOR << 7; + + private static final long NEW_LINE_DETECTION = START_BYTE_INDICATOR * '\n'; + + private static final long SEMI_DETECTION = START_BYTE_INDICATOR * ';'; + + private static final long ALL_ONES = 0xffff_ffff_ffff_ffffL; + + private long findByteOctet(long data, long pattern) { + long match = data ^ pattern; + return (match - START_BYTE_INDICATOR) & ((~match) & END_BYTE_INDICATOR); + } + + private void bigWorker(long offset, long chunkSize, MapReduce lineConsumer) { + long chunkStart = offset + fileService.address(); + long chunkEnd = chunkStart + chunkSize; + long fileEnd = fileService.address() + fileService.length(); + long stopPoint = Math.min(chunkEnd + 1, fileEnd); + + boolean skip = offset != 0; + for (long position = chunkStart; position < stopPoint;) { + if (skip) { + long data = UNSAFE.getLong(position); + long newLineMask = findByteOctet(data, NEW_LINE_DETECTION); + if (newLineMask != 0) { + int newLinePosition = Long.numberOfTrailingZeros(newLineMask) >>> 3; + skip = false; + position = position + newLinePosition + 1; + } + else { + position = position + 8; + } + continue; + } + + long stationStart = position; + long stationEnd = -1; + long hash = 0; + long suffix = 0; + do { + long data = UNSAFE.getLong(position); + long semiMask = findByteOctet(data, SEMI_DETECTION); + if (semiMask != 0) { + int semiPosition = Long.numberOfTrailingZeros(semiMask) >>> 3; + stationEnd = position + semiPosition; + position = stationEnd + 1; + + if (semiPosition != 0) { + suffix = data & (ALL_ONES >>> (64 - (semiPosition << 3))); + } + else { + suffix = UNSAFE.getLong(position - 8); + } + hash = simpleHash(hash, suffix); + break; + } + else { + hash = simpleHash(hash, data); + position = position + 8; + } + } while (true); + + int temperature = 0; + { + byte ch = UNSAFE.getByte(position++); + boolean negative = false; + if (ch == '-') { + negative = true; + ch = UNSAFE.getByte(position++); + } + do { + if (ch != '.') { + temperature *= 10; + temperature += (ch ^ '0'); + } + ch = UNSAFE.getByte(position++); + } while (ch != '\n'); + if (negative) { + temperature = -temperature; + } + } + + lineConsumer.process(stationStart, stationEnd, hash, suffix, temperature); + } + } + + private void smallWorker(long offset, long chunkSize, MapReduce lineConsumer) { LineStream lineStream = new LineStream(fileService, offset, chunkSize); if (offset != 0) { @@ -274,29 +434,58 @@ private void worker(long offset, long chunkSize, MapReduce lineConsumer) { while (lineStream.hasNext()) { long keyStartAddress = lineStream.position; long keyEndAddress = lineStream.findSemi(); - long keySuffix = lineStream.suffix; - int keyHash = lineStream.hash; + long keyHash = lineStream.hash; + long suffix = lineStream.suffix; long valueStartAddress = lineStream.position; long valueEndAddress = lineStream.findTemperature(); int temperature = parseDouble(valueStartAddress, valueEndAddress); - lineConsumer.process(keyStartAddress, keyEndAddress, keyHash, temperature, keySuffix); + // System.out.println("Small worker!"); + lineConsumer.process(keyStartAddress, keyEndAddress, keyHash, suffix, temperature); } } - public T master(long chunkSize, ExecutorService executor) { - long len = fileService.length(); + // file size = 7 + // (0,0) (0,0) small chunk= (0,7) + // a;0.1\n + + public T master(int shards, ExecutorService executor) { List> summaries = new ArrayList<>(); + long len = fileService.length(); + + if (len > 128) { + long bigChunk = Math.floorDiv(len, shards); + long bigChunkReAlign = bigChunk & 0xffff_ffff_ffff_fff8L; + + long smallChunkStart = bigChunkReAlign * shards; + long smallChunkSize = len - smallChunkStart; + + for (long offset = 0; offset < smallChunkStart; offset += bigChunkReAlign) { + MapReduce mr = chunkProcessCreator.get(); + final long transferOffset = offset; + Future task = executor.submit(() -> { + bigWorker(transferOffset, bigChunkReAlign, mr); + return mr.result(); + }); + summaries.add(task); + } + + MapReduce mrLast = chunkProcessCreator.get(); + Future lastTask = executor.submit(() -> { + smallWorker(smallChunkStart, smallChunkSize - 1, mrLast); + return mrLast.result(); + }); + summaries.add(lastTask); + } + else { - for (long offset = 0; offset < len; offset += chunkSize) { - long workerLength = Math.min(len, offset + chunkSize) - offset; - MapReduce mr = chunkProcessCreator.get(); - final long transferOffset = offset; - Future task = executor.submit(() -> { - worker(transferOffset, workerLength, mr); - return mr.result(); + MapReduce mrLast = chunkProcessCreator.get(); + Future lastTask = executor.submit(() -> { + smallWorker(0, len - 1, mrLast); + return mrLast.result(); }); - summaries.add(task); + summaries.add(lastTask); } + List summariesDone = summaries.stream() .map(task -> { try { @@ -336,22 +525,12 @@ public long address() { private static class ChunkProcessorImpl implements MapReduce { // 1 << 14 > 10,000 so it works - private final PrimitiveHashMap statistics = new PrimitiveHashMap(14); + private final PrimitiveHashMap statistics = new PrimitiveHashMap(15); @Override - public void process(long keyStartAddress, long keyEndAddress, int hash, int temperature, long suffix) { - HashEntry entry = statistics.find(keyStartAddress, keyEndAddress, suffix, hash); - if (entry == null) { - throw new IllegalStateException("Hash table too small :("); - } - if (entry.value == null) { - entry.startAddress = keyStartAddress; - entry.endAddress = keyEndAddress; - entry.suffix = suffix; - entry.hash = hash; - entry.value = new IntSummaryStatistics(); - } - entry.value.accept(temperature); + public void process(long keyStartAddress, long keyEndAddress, long hash, long suffix, int temperature) { + IntSummaryStatistics stats = statistics.find(keyStartAddress, keyEndAddress, hash, suffix); + stats.accept(temperature); } @Override @@ -368,13 +547,10 @@ public static void main(String[] args) throws IOException { ChunkProcessorImpl::new, CalculateAverage_vaidhy::combineOutputs); - int proc = 2 * Runtime.getRuntime().availableProcessors(); - - long fileSize = diskFileService.length(); - long chunkSize = Math.ceilDiv(fileSize, proc); + int proc = Runtime.getRuntime().availableProcessors(); ExecutorService executor = Executors.newFixedThreadPool(proc); - Map output = calculateAverageVaidhy.master(chunkSize, executor); + Map output = calculateAverageVaidhy.master(2 * proc, executor); executor.shutdown(); Map outputStr = toPrintMap(output); @@ -395,11 +571,12 @@ private static Map toPrintMap(Map private static Map combineOutputs( List list) { - Map output = new HashMap<>(10000); + Map output = HashMap.newHashMap(10000); for (PrimitiveHashMap map : list) { - for (HashEntry entry : map.entries) { + for (HashEntry entry : map.entrySet()) { if (entry.value != null) { - String keyStr = unsafeToString(entry.startAddress, entry.endAddress); + String keyStr = unsafeToString(entry.startAddress, + entry.startAddress + entry.keyLength); output.compute(keyStr, (ignore, val) -> { if (val == null) { diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_vemana.java b/src/main/java/dev/morling/onebrc/CalculateAverage_vemana.java index 7673fb573..3e64ac905 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_vemana.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_vemana.java @@ -17,21 +17,27 @@ import java.io.IOException; import java.io.RandomAccessFile; +import java.lang.reflect.Method; +import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel.MapMode; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Optional; import java.util.TreeMap; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; @@ -41,55 +47,68 @@ * remain readable for a majority of SWEs. At a high level, the approach relies on a few principles * listed herein. * - *

- * [Exploit Parallelism] Distribute the work into Shards. Separate threads (one per core) process + *

[Exploit Parallelism] Distribute the work into Shards. Separate threads (one per core) process * Shards and follow it up by merging the results. parallelStream() is appealing but carries * potential run-time variance (i.e. std. deviation) penalties based on informal testing. Variance * is not ideal when trying to minimize the maximum worker latency. * - *

- * [Use ByteBuffers over MemorySegment] Each Shard is further divided in Chunks. This would've been - * unnecessary except that Shards are too big to be backed by ByteBuffers. Besides, MemorySegment - * appears slower than ByteBuffers. So, to use ByteBuffers, we have to use smaller chunks. + *

[Understand that unmapping is serial and runs in exit()]. This is very much about exploiting + * parallelism. After adding tracing (plain old printfs), it was clear that the JVM was taking 400ms + * (out of 1500ms) just to exit. Turns out that the kernel tries to unmap all the mappings as part + * of the exit() call. Even strace wouldn't report this because the unmapping is running as part of + * the exit() call. perf stat barely hinted at it, but we had more insights by actually running a + * couple of experiments: reduce touched pages --> JVM shutdown latency went down; manually run + * unmap() call to free up the ByteBuffers --> parallel execution doesn't help at all. From this it + * was conclusive that unmap() executes serially and the 400ms was being spent purely unmapping. + * Now, the challenge is to both (1) unmap a MappedByteBuffer (no such methods exposed) from code + * rather than via exit() syscall and (2) do it in parallel without causing lock contention. For 1, + * use Reflection and (2) is an interesting math problem with a provably optimal solution. + * Parallelism in munmap() is achieved by using a fast lock that prevents two threads from + * simultaneously cleaning (i.e. munmap()) the ByteBuffer. * - *

- * [Straggler freedom] The optimization function here is to minimize the maximal worker thread + *

[Use ByteBuffers over MemorySegment] Each Shard is further divided in Chunks. This would've + * been unnecessary except that Shards are too big to be backed by ByteBuffers. Besides, + * MemorySegment appears slower than ByteBuffers. So, to use ByteBuffers, we have to use smaller + * chunks. + * + *

[Straggler freedom] The optimization function here is to minimize the maximal worker thread * completion. Law of large number averages means that all the threads will end up with similar * amounts of work and similar completion times; but, however ever so often there could be a bad * sharding and more importantly, Cores are not created equal; some will be throttled more than * others. So, we have a shared {@code LazyShardQueue} that aims to distribute work to minimize the * latest completion time. * - *

- * [Work Assignment with LazyShardQueue] The queue provides each thread with its next big-chunk + *

[Work Assignment with LazyShardQueue] The queue provides each thread with its next big-chunk * until X% of the work remains. Big-chunks belong to the thread and will not be provided to another - * thread. Then, it switches to providing small-chunk sizes. Small-chunks comprise the last X% of + * thread. Then, it switches to providing small-chunk sizes. Small-chunks comprise the last X% of * work and every thread can participate in completing the chunk. Even though the queue is shared * across threads, there's no communication across thread during the big-chunk phases. The queue is * effectively a per-thread queue while processing big-chunks. The small-chunk phase uses an * AtomicLong to coordinate chunk allocation across threads. * - *

- * [Chunk processing] Chunk processing is typical. Process line by line. Find a hash function + *

[Chunk processing] Chunk processing is typical. Process line by line. Find a hash function * (polynomial hash fns are slow, but will work fine), hash the city name, resolve conflicts using * linear probing and then accumulate the temperature into the appropriate hash slot. The key * element then is how fast can you identify the hash slot, read the temperature and update the new * temperature in the slot (i.e. min, max, count). * - *

- * [Cache friendliness] 7502P and my machine (7950X) offer 4MB L3 cache/core. This means we can hope - * to fit all our datastructures in L3 cache. Since SMT is turned on, the Runtime's available + *

[Cache friendliness] 7502P and my machine (7950X) offer 4MB L3 cache/core. This means we can + * hope to fit all our datastructures in L3 cache. Since SMT is turned on, the Runtime's available * processors will show twice the number of actual cores and so we get 2MB L3 cache/thread. To be * safe, we try to stay within 1.8 MB/thread and size our hashtable appropriately. * - *

- * [Allocation] Since MemorySegment seemed slower than ByteBuffers, backing Chunks by bytebuffers + *

[Native ByteOrder is MUCH better] There was almost a 10% lift by reading ints from bytebuffers + * using native byteorder . It so happens that both the eval machine (7502P) and my machine 7950X + * use native LITTLE_ENDIAN order, which again apparently is because X86[-64] is little-endian. But, + * by default, ByteBuffers use BIG_ENDIAN order, which appears to be a somewhat strange default from + * Java. + * + *

[Allocation] Since MemorySegment seemed slower than ByteBuffers, backing Chunks by bytebuffers * was the logical option. Creating one ByteBuffer per chunk was no bueno because the system doesn't * like it (JVM runs out of mapped file handle quota). Other than that, allocation in the hot path * was avoided. * - *

- * [General approach to fast hashing and temperature reading] Here, it helps to understand the + *

[General approach to fast hashing and temperature reading] Here, it helps to understand the * various bottlenecks in execution. One particular thing that I kept coming back to was to * understand the relative costs of instructions: See * https://www.agner.org/optimize/instruction_tables.pdf It is helpful to think of hardware as a @@ -102,24 +121,22 @@ * endPos" in a tight loop by breaking it into two pieces: one piece where the check will not be * needed and a tail piece where it will be needed. * - *

- * [Understand What Cores like]. Cores like to go straight and loop back. Despite good branch + *

[Understand What Cores like]. Cores like to go straight and loop back. Despite good branch * prediction, performance sucks with mispredicted branches. * - *

- * [JIT] Java performance requires understanding the JIT. It is helpful to understand what the JIT - * likes though it is still somewhat of a mystery to me. In general, it inlines small methods very - * well and after constant folding, it can optimize quite well across a reasonably deep call chain. - * My experience with the JIT was that everything I tried to tune it made it worse except for one - * parameter. I have a new-found respect for JIT - it likes and understands typical Java idioms. + *

[JIT] Java performance requires understanding the JIT. It is helpful to understand what the + * JIT likes though it is still somewhat of a mystery to me. In general, it inlines small methods + * very well and after constant folding, it can optimize quite well across a reasonably deep call + * chain. My experience with the JIT was that everything I tried to tune it made it worse except for + * one parameter. I have a new-found respect for JIT - it likes and understands typical Java idioms. * - *

[Tuning] Nothing was more insightful than actually playing with various tuning parameters. - * I can have all the theories but the hardware and JIT are giant blackboxes. I used a bunch of - * tools to optimize: (1) Command line parameters to tune big and small chunk sizes etc. This was - * also very helpful in forming a mental model of the JIT. Sometimes, it would compile some methods - * and sometimes it would just run them interpreted since the compilation threshold wouldn't be - * reached for intermediate methods. (2) AsyncProfiler - this was the first line tool to understand - * cache misses and cpu time to figure where to aim the next optimization effort. (3) JitWatch - + *

[Tuning] Nothing was more insightful than actually playing with various tuning parameters. I + * can have all the theories but the hardware and JIT are giant blackboxes. I used a bunch of tools + * to optimize: (1) Command line parameters to tune big and small chunk sizes etc. This was also + * very helpful in forming a mental model of the JIT. Sometimes, it would compile some methods and + * sometimes it would just run them interpreted since the compilation threshold wouldn't be reached + * for intermediate methods. (2) AsyncProfiler - this was the first line tool to understand cache + * misses and cpu time to figure where to aim the next optimization effort. (3) JitWatch - * invaluable for forming a mental model and attempting to tune the JIT. * *

[Things that didn't work]. This is a looong list and the hit rate is quite low. In general, @@ -140,19 +157,21 @@ */ public class CalculateAverage_vemana { - public static void checkArg(boolean condition) { - if (!condition) { - throw new IllegalArgumentException(); - } - } - public static void main(String[] args) throws Exception { + Tracing.recordAppStart(); + Runtime.getRuntime() + .addShutdownHook( + new Thread( + () -> { + Tracing.recordEvent("In Shutdown hook"); + })); + // First process in large chunks without coordination among threads // Use chunkSizeBits for the large-chunk size int chunkSizeBits = 20; // For the last commonChunkFraction fraction of total work, use smaller chunk sizes - double commonChunkFraction = 0; + double commonChunkFraction = 0.03; // Use commonChunkSizeBits for the small-chunk size int commonChunkSizeBits = 18; @@ -160,20 +179,47 @@ public static void main(String[] args) throws Exception { // Size of the hashtable (attempt to fit in L3) int hashtableSizeBits = 14; - if (args.length > 0) { - chunkSizeBits = Integer.parseInt(args[0]); - } + int minReservedBytesAtFileTail = 9; - if (args.length > 1) { - commonChunkFraction = Double.parseDouble(args[1]); - } + int nThreads = -1; - if (args.length > 2) { - commonChunkSizeBits = Integer.parseInt(args[2]); - } + String inputFile = "measurements.txt"; - if (args.length > 3) { - hashtableSizeBits = Integer.parseInt(args[3]); + double munmapFraction = 0.03; + + boolean fakeAdvance = false; + + for (String arg : args) { + String key = arg.substring(0, arg.indexOf('=')).trim(); + String value = arg.substring(key.length() + 1).trim(); + switch (key) { + case "chunkSizeBits": + chunkSizeBits = Integer.parseInt(value); + break; + case "commonChunkFraction": + commonChunkFraction = Double.parseDouble(value); + break; + case "commonChunkSizeBits": + commonChunkSizeBits = Integer.parseInt(value); + break; + case "hashtableSizeBits": + hashtableSizeBits = Integer.parseInt(value); + break; + case "inputfile": + inputFile = value; + break; + case "munmapFraction": + munmapFraction = Double.parseDouble(value); + break; + case "fakeAdvance": + fakeAdvance = Boolean.parseBoolean(value); + break; + case "nThreads": + nThreads = Integer.parseInt(value); + break; + default: + throw new IllegalArgumentException("Unknown argument: " + arg); + } } // System.err.println(STR.""" @@ -184,18 +230,32 @@ public static void main(String[] args) throws Exception { // - hashtableSizeBits = \{hashtableSizeBits} // """); - System.out.println(new Runner( - Path.of("measurements.txt"), - chunkSizeBits, - commonChunkFraction, - commonChunkSizeBits, - hashtableSizeBits).getSummaryStatistics()); + System.out.println( + new Runner( + Path.of(inputFile), + nThreads, + chunkSizeBits, + commonChunkFraction, + commonChunkSizeBits, + hashtableSizeBits, + minReservedBytesAtFileTail, + munmapFraction, + fakeAdvance) + .getSummaryStatistics()); + + Tracing.recordEvent("Final result printed"); } - public interface LazyShardQueue { + public record AggregateResult(Map tempStats) { - ByteRange take(int shardIdx); + @Override + public String toString() { + return this.tempStats().entrySet().stream() + .sorted(Entry.comparingByKey()) + .map(entry -> "%s=%s".formatted(entry.getKey(), entry.getValue())) + .collect(Collectors.joining(", ", "{", "}")); } + } // Mutable to avoid allocation public static class ByteRange { @@ -203,7 +263,9 @@ public static class ByteRange { private static final int BUF_SIZE = 1 << 30; private final long fileSize; + private final long maxEndPos; // Treat as if the file ends here private final RandomAccessFile raf; + private final List unclosedBuffers = new ArrayList<>(); // ***************** What this is doing and why ***************** // Reading from ByteBuffer appears faster from MemorySegment, but ByteBuffer can only be @@ -221,7 +283,6 @@ public static class ByteRange { // tuning // - This enables (relatively) allocation free chunking implementation. Our chunking impl uses // fine grained chunking for the last say X% of work to avoid being hostage to stragglers - // The PUBLIC API public MappedByteBuffer byteBuffer; public int endInBuf; // where the chunk ends inside the buffer @@ -231,8 +292,9 @@ public static class ByteRange { private long bufferStart; // byteBuffer's begin coordinate // Uninitialized; for mutability - public ByteRange(RandomAccessFile raf) { + public ByteRange(RandomAccessFile raf, long maxEndPos) { this.raf = raf; + this.maxEndPos = maxEndPos; try { this.fileSize = raf.length(); } @@ -242,6 +304,20 @@ public ByteRange(RandomAccessFile raf) { bufferEnd = bufferStart = -1; } + public void close(String closerId, int shardIdx) { + Tracing.recordWorkStart(closerId, shardIdx); + if (byteBuffer != null) { + unclosedBuffers.add(byteBuffer); + } + for (MappedByteBuffer buf : unclosedBuffers) { + close(buf); + } + unclosedBuffers.clear(); + bufferEnd = bufferStart = -1; + byteBuffer = null; + Tracing.recordWorkEnd(closerId, shardIdx); + } + public void setRange(long rangeStart, long rangeEnd) { if (rangeEnd + 1024 > bufferEnd || rangeStart < bufferStart) { bufferStart = rangeStart; @@ -252,12 +328,15 @@ public void setRange(long rangeStart, long rangeEnd) { if (rangeStart > 0) { rangeStart = 1 + nextNewLine(rangeStart); } + else { + rangeStart = 0; + } - if (rangeEnd < fileSize) { + if (rangeEnd < maxEndPos) { rangeEnd = 1 + nextNewLine(rangeEnd); } else { - rangeEnd = fileSize; + rangeEnd = maxEndPos; } startInBuf = (int) (rangeStart - bufferStart); @@ -267,13 +346,25 @@ public void setRange(long rangeStart, long rangeEnd) { @Override public String toString() { return STR.""" - ByteRange { - startInBuf = \{startInBuf} - endInBuf = \{endInBuf} - } - """; + ByteRange { + bufferStart = \{bufferStart} + bufferEnd = \{bufferEnd} + startInBuf = \{startInBuf} + endInBuf = \{endInBuf} + } + """; } + private void close(MappedByteBuffer buffer) { + Method cleanerMethod = Reflection.findMethodNamed(buffer, "cleaner"); + cleanerMethod.setAccessible(true); + Object cleaner = Reflection.invoke(buffer, cleanerMethod); + + Method cleanMethod = Reflection.findMethodNamed(cleaner, "clean"); + cleanMethod.setAccessible(true); + Reflection.invoke(cleaner, cleanMethod); + } + private long nextNewLine(long pos) { int nextPos = (int) (pos - bufferStart); while (byteBuffer.get(nextPos) != '\n') { @@ -283,8 +374,12 @@ private long nextNewLine(long pos) { } private void setByteBufferToRange(long start, long end) { + if (byteBuffer != null) { + unclosedBuffers.add(byteBuffer); + } try { byteBuffer = raf.getChannel().map(MapMode.READ_ONLY, start, end - start); + byteBuffer.order(ByteOrder.nativeOrder()); } catch (IOException e) { throw new RuntimeException(e); @@ -292,72 +387,155 @@ private void setByteBufferToRange(long start, long end) { } } - public record Result(Map tempStats) { + public static final class Checks { - @Override - public String toString() { - return this.tempStats() - .entrySet() - .stream() - .sorted(Entry.comparingByKey()) - .map(entry -> "%s=%s".formatted(entry.getKey(), entry.getValue())) - .collect(Collectors.joining(", ", "{", "}")); + public static void checkArg(boolean condition) { + if (!condition) { + throw new IllegalArgumentException(); + } + } + + private Checks() { + } + } + + public interface LazyShardQueue { + + void close(String closerId, int shardIdx); + + Optional fileTailEndWork(int idx); + + ByteRange take(int shardIdx); + } + + static final class Reflection { + + static Method findMethodNamed(Object object, String name, Class... paramTypes) { + try { + return object.getClass().getMethod(name, paramTypes); + } + catch (NoSuchMethodException e) { + throw new RuntimeException(e); + } + } + + static Object invoke(Object receiver, Method method, Object... params) { + try { + return method.invoke(receiver, params); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } } - } public static class Runner { private final double commonChunkFraction; private final int commonChunkSizeBits; + private final boolean fakeAdvance; private final int hashtableSizeBits; private final Path inputFile; + private final int minReservedBytesAtFileTail; + private final double munmapFraction; + private final int nThreads; private final int shardSizeBits; public Runner( - Path inputFile, int chunkSizeBits, double commonChunkFraction, int commonChunkSizeBits, - int hashtableSizeBits) { + Path inputFile, + int nThreads, + int chunkSizeBits, + double commonChunkFraction, + int commonChunkSizeBits, + int hashtableSizeBits, + int minReservedBytesAtFileTail, + double munmapFraction, + boolean fakeAdvance) { this.inputFile = inputFile; + this.nThreads = nThreads; this.shardSizeBits = chunkSizeBits; this.commonChunkFraction = commonChunkFraction; this.commonChunkSizeBits = commonChunkSizeBits; this.hashtableSizeBits = hashtableSizeBits; + this.minReservedBytesAtFileTail = minReservedBytesAtFileTail; + this.munmapFraction = munmapFraction; + this.fakeAdvance = fakeAdvance; } - Result getSummaryStatistics() throws Exception { - int processors = Runtime.getRuntime().availableProcessors(); + AggregateResult getSummaryStatistics() throws Exception { + int nThreads = this.nThreads < 0 ? Runtime.getRuntime().availableProcessors() : this.nThreads; + LazyShardQueue shardQueue = new SerialLazyShardQueue( 1L << shardSizeBits, inputFile, - processors, + nThreads, commonChunkFraction, - commonChunkSizeBits); + commonChunkSizeBits, + minReservedBytesAtFileTail, + munmapFraction, + fakeAdvance); - List> results = new ArrayList<>(); ExecutorService executorService = Executors.newFixedThreadPool( - processors, + nThreads, runnable -> { Thread thread = new Thread(runnable); thread.setDaemon(true); return thread; }); - long[] finishTimes = new long[processors]; - - for (int i = 0; i < processors; i++) { - final int I = i; - final Callable callable = () -> { - Result result = new ShardProcessor(shardQueue, hashtableSizeBits, I).processShard(); - finishTimes[I] = System.nanoTime(); + List> results = new ArrayList<>(); + for (int i = 0; i < nThreads; i++) { + final int shardIdx = i; + final Callable callable = () -> { + Tracing.recordWorkStart("Shard", shardIdx); + AggregateResult result = new ShardProcessor(shardQueue, hashtableSizeBits, shardIdx).processShard(); + Tracing.recordWorkEnd("Shard", shardIdx); return result; }; results.add(executorService.submit(callable)); } - // printFinishTimes(finishTimes); - return executorService.submit(() -> merge(results)).get(); + Tracing.recordEvent("Basic push time"); + + // This particular sequence of Futures is so that both merge and munmap() can work as shards + // finish their computation without blocking on the entire set of shards to complete. In + // particular, munmap() doesn't need to wait on merge. + // First, submit a task to merge the results and then submit a task to cleanup bytebuffers + // from completed shards. + Future resultFutures = executorService.submit(() -> merge(results)); + // Note that munmap() is serial and not parallel and hence we use just one thread. + executorService.submit(() -> closeByteBuffers(results, shardQueue)); + + AggregateResult result = resultFutures.get(); + Tracing.recordEvent("Merge results received"); + + Tracing.recordEvent("About to shutdown executor and wait"); + executorService.shutdown(); + executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS); + Tracing.recordEvent("Executor terminated"); + + Tracing.analyzeWorkThreads(nThreads); + return result; } - private Result merge(List> results) + private void closeByteBuffers( + List> results, LazyShardQueue shardQueue) { + int n = results.size(); + boolean[] isDone = new boolean[n]; + int remaining = results.size(); + while (remaining > 0) { + for (int i = 0; i < n; i++) { + if (!isDone[i] && results.get(i).isDone()) { + remaining--; + isDone[i] = true; + shardQueue.close("Ending Cleaner", i); + } + } + } + } + + private AggregateResult merge(List> results) throws ExecutionException, InterruptedException { + Tracing.recordEvent("Merge start time"); Map output = null; boolean[] isDone = new boolean[results.size()]; int remaining = results.size(); @@ -374,60 +552,82 @@ private Result merge(List> results) for (Entry entry : results.get(i).get().tempStats().entrySet()) { output.compute( entry.getKey(), - (key, value) -> value == null ? entry.getValue() - : Stat.merge(value, entry.getValue())); + (key, value) -> value == null ? entry.getValue() : Stat.merge(value, entry.getValue())); } } } } } - return new Result(output); + Tracing.recordEvent("Merge end time"); + return new AggregateResult(output); } - - private void printFinishTimes(long[] finishTimes) { - Arrays.sort(finishTimes); - int n = finishTimes.length; - System.err.println(STR."Finish Delta: \{(finishTimes[n - 1] - finishTimes[0]) / 1_000_000}ms"); - } } public static class SerialLazyShardQueue implements LazyShardQueue { - private static long roundToNearestHigherMultipleOf(long divisor, long value) { - return (value + divisor - 1) / divisor * divisor; + private static long roundToNearestLowerMultipleOf(long divisor, long value) { + return value / divisor * divisor; } private final ByteRange[] byteRanges; private final long chunkSize; private final long commonChunkSize; private final AtomicLong commonPool; + private final long effectiveFileSize; + private final boolean fakeAdvance; private final long fileSize; - private final long[] nextStarts; + private final long[] perThreadData; + private final RandomAccessFile raf; + private final SeqLock seqLock; public SerialLazyShardQueue( - long chunkSize, Path filePath, int shards, double commonChunkFraction, - int commonChunkSizeBits) + long chunkSize, + Path filePath, + int shards, + double commonChunkFraction, + int commonChunkSizeBits, + int fileTailReservedBytes, + double munmapFraction, + boolean fakeAdvance) throws IOException { - checkArg(commonChunkFraction < 0.9 && commonChunkFraction >= 0); - var raf = new RandomAccessFile(filePath.toFile(), "r"); + this.fakeAdvance = fakeAdvance; + Checks.checkArg(commonChunkFraction < 0.9 && commonChunkFraction >= 0); + Checks.checkArg(fileTailReservedBytes >= 0); + this.raf = new RandomAccessFile(filePath.toFile(), "r"); this.fileSize = raf.length(); + fileTailReservedBytes = fileTailReservedBytes == 0 + ? 0 + : consumeToPreviousNewLineExclusive(raf, fileTailReservedBytes); + this.effectiveFileSize = fileSize - fileTailReservedBytes; // Common pool long commonPoolStart = Math.min( - roundToNearestHigherMultipleOf(chunkSize, (long) (fileSize * (1 - commonChunkFraction))), - fileSize); + roundToNearestLowerMultipleOf( + chunkSize, (long) (effectiveFileSize * (1 - commonChunkFraction))), + effectiveFileSize); this.commonPool = new AtomicLong(commonPoolStart); this.commonChunkSize = 1L << commonChunkSizeBits; // Distribute chunks to shards - this.nextStarts = new long[shards << 4]; // thread idx -> 16*idx to avoid cache line conflict - for (long i = 0, currentStart = 0, remainingChunks = (commonPoolStart + chunkSize - 1) / chunkSize; i < shards; i++) { + this.perThreadData = new long[shards << 4]; // thread idx -> 16*idx to avoid cache line conflict + for (long i = 0, + currentStart = 0, + remainingChunks = (commonPoolStart + chunkSize - 1) / chunkSize; i < shards; i++) { long remainingShards = shards - i; long currentChunks = (remainingChunks + remainingShards - 1) / remainingShards; // Shard i handles: [currentStart, currentStart + currentChunks * chunkSize) int pos = (int) i << 4; - nextStarts[pos] = currentStart; - nextStarts[pos + 1] = currentStart + currentChunks * chunkSize; + perThreadData[pos] = currentStart; // next chunk begin + perThreadData[pos + 1] = currentStart + currentChunks * chunkSize; // shard end + perThreadData[pos + 2] = currentChunks; // active chunks remaining + // threshold below which need to shrink + // 0.03 is a practical number but the optimal strategy is this: + // Shard number N (1-based) should unmap as soon as it completes (R/(R+1))^N fraction of + // its work, where R = relative speed of unmap compared to the computation. + // For our problem, R ~ 75 because unmap unmaps 30GB/sec (but, it is serial) while + // cores go through data at the rate of 400MB/sec. + perThreadData[pos + 3] = (long) (currentChunks * (munmapFraction * (shards - i))); + perThreadData[pos + 4] = 1; // true iff munmap() hasn't been triggered yet currentStart += currentChunks * chunkSize; remainingChunks -= currentChunks; } @@ -435,53 +635,132 @@ public SerialLazyShardQueue( this.byteRanges = new ByteRange[shards << 4]; for (int i = 0; i < shards; i++) { - byteRanges[i << 4] = new ByteRange(raf); + byteRanges[i << 4] = new ByteRange(raf, effectiveFileSize); } + + this.seqLock = new SeqLock(); } @Override - public ByteRange take(int idx) { - // Try for thread local range - final int pos = idx << 4; - long rangeStart = nextStarts[pos]; - final long chunkEnd = nextStarts[pos + 1]; + public void close(String closerId, int shardIdx) { + byteRanges[shardIdx << 4].close(closerId, shardIdx); + } + @Override + public Optional fileTailEndWork(int idx) { + if (idx == 0 && effectiveFileSize < fileSize) { + ByteRange chunk = new ByteRange(raf, fileSize); + chunk.setRange( + effectiveFileSize == 0 ? 0 : effectiveFileSize - 1 /* will consume newline at eFS-1 */, + fileSize); + return Optional.of(chunk); + } + return Optional.empty(); + } + + @Override + public ByteRange take(int shardIdx) { + // Try for thread local range + final int pos = shardIdx << 4; + final long rangeStart; final long rangeEnd; - if (rangeStart < chunkEnd) { + if (perThreadData[pos + 2] >= 1) { + rangeStart = perThreadData[pos]; rangeEnd = rangeStart + chunkSize; - nextStarts[pos] = rangeEnd; + // Don't do this in the if-check; it causes negative values that trigger intermediate + // cleanup + perThreadData[pos + 2]--; + if (!fakeAdvance) { + perThreadData[pos] = rangeEnd; + } } else { rangeStart = commonPool.getAndAdd(commonChunkSize); // If that's exhausted too, nothing remains! - if (rangeStart >= fileSize) { + if (rangeStart >= effectiveFileSize) { return null; } rangeEnd = rangeStart + commonChunkSize; } + if (perThreadData[pos + 2] < perThreadData[pos + 3] && perThreadData[pos + 4] > 0) { + if (attemptIntermediateClose(shardIdx)) { + perThreadData[pos + 4]--; + } + } + ByteRange chunk = byteRanges[pos]; chunk.setRange(rangeStart, rangeEnd); return chunk; } + + private boolean attemptIntermediateClose(int shardIdx) { + if (seqLock.acquire()) { + close("Intermediate Cleaner", shardIdx); + seqLock.release(); + return true; + } + return false; + } + + private int consumeToPreviousNewLineExclusive(RandomAccessFile raf, int minReservedBytes) { + try { + long pos = Math.max(raf.length() - minReservedBytes - 1, -1); + if (pos < 0) { + return (int) raf.length(); + } + + long start = Math.max(pos - 512, 0); + ByteBuffer buf = raf.getChannel().map(MapMode.READ_ONLY, start, pos + 1 - start); + while (pos >= 0 && buf.get((int) (pos - start)) != '\n') { + pos--; + } + pos++; + return (int) (raf.length() - pos); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + } + + /** A low-traffic non-blocking lock. */ + static class SeqLock { + + private final AtomicBoolean isOccupied = new AtomicBoolean(false); + + boolean acquire() { + return !isOccupied.get() && isOccupied.compareAndSet(false, true); + } + + void release() { + isOccupied.set(false); + } } public static class ShardProcessor { + private final int shardIdx; private final LazyShardQueue shardQueue; private final ShardProcessorState state; - private final int threadIdx; - public ShardProcessor(LazyShardQueue shardQueue, int hashtableSizeBits, int threadIdx) { + public ShardProcessor(LazyShardQueue shardQueue, int hashtableSizeBits, int shardIdx) { this.shardQueue = shardQueue; - this.threadIdx = threadIdx; + this.shardIdx = shardIdx; this.state = new ShardProcessorState(hashtableSizeBits); } - public Result processShard() { + public AggregateResult processShard() { + return processShardReal(); + } + + public AggregateResult processShardReal() { + // First process the file tail work to give ourselves freedom to go past ranges in parsing + shardQueue.fileTailEndWork(shardIdx).ifPresent(this::processRangeSlow); + ByteRange range; - while ((range = shardQueue.take(threadIdx)) != null) { + while ((range = shardQueue.take(shardIdx)) != null) { processRange(range); } return result(); @@ -497,15 +776,23 @@ private void processRange(ByteRange range) { } } - private Result result() { + private void processRangeSlow(ByteRange range) { + int nextPos = range.startInBuf; + while (nextPos < range.endInBuf) { + nextPos = state.processLineSlow(range.byteBuffer, nextPos); + } + } + + private AggregateResult result() { return state.result(); } } public static class ShardProcessorState { + public static final long ONE_MASK = 0x0101010101010101L; private static final ByteOrder NATIVE_BYTE_ORDER = ByteOrder.nativeOrder(); - + private static final long SEMICOLON_MASK = 0x3b3b3b3b3b3b3b3bL; private final byte[][] cityNames; private final int slotsMask; private final Stat[] stats; @@ -527,30 +814,30 @@ public int processLine(MappedByteBuffer mmb, int nextPos) { x = Integer.reverseBytes(x); } - byte a = (byte) (x >>> 24); + byte a = (byte) (x >>> 0); if (a == ';') { nextPos += 1; break; } - byte b = (byte) (x >>> 16); + byte b = (byte) (x >>> 8); if (b == ';') { nextPos += 2; - hash = hash * 31 + ((0xFF000000 & x)); + hash = hash * 31 + (0xFF & x); break; } - byte c = (byte) (x >>> 8); + byte c = (byte) (x >>> 16); if (c == ';') { nextPos += 3; - hash = hash * 31 + ((0xFFFF0000 & x)); + hash = hash * 31 + (0xFFFF & x); break; } - byte d = (byte) (x >>> 0); + byte d = (byte) (x >>> 24); if (d == ';') { nextPos += 4; - hash = hash * 31 + ((0xFFFFFF00 & x)); + hash = hash * 31 + (0xFFFFFF & x); break; } @@ -582,24 +869,58 @@ public int processLine(MappedByteBuffer mmb, int nextPos) { } linearProbe( - cityLen, - hash & slotsMask, - negative ? -temperature : temperature, - mmb, - originalPos); + cityLen, hash & slotsMask, negative ? -temperature : temperature, mmb, originalPos); return nextPos; } - public Result result() { + /** A slow version which is used only for the tail part of the file. */ + public int processLineSlow(MappedByteBuffer mmb, int nextPos) { + int originalPos = nextPos; + byte nextByte; + int hash = 0; + + outer: while (true) { + int accumulated = 0; + for (int i = 0; i < 4; i++) { + nextByte = mmb.get(nextPos++); + if (nextByte == ';') { + if (i > 0) { + hash = hash * 31 + accumulated; + } + break outer; + } + else { + accumulated |= ((int) nextByte << (8 * i)); + } + } + hash = hash * 31 + accumulated; + } + int cityLen = nextPos - 1 - originalPos; + + int temperature = 0; + boolean negative = mmb.get(nextPos) == '-'; + while ((nextByte = mmb.get(nextPos++)) != '\n') { + if (nextByte != '-' && nextByte != '.') { + temperature = temperature * 10 + (nextByte - '0'); + } + } + + linearProbe( + cityLen, hash & slotsMask, negative ? -temperature : temperature, mmb, originalPos); + + return nextPos; + } + + public AggregateResult result() { int N = stats.length; - TreeMap map = new TreeMap<>(); + Map map = new LinkedHashMap<>(5_000); for (int i = 0; i < N; i++) { if (stats[i] != null) { map.put(new String(cityNames[i]), stats[i]); } } - return new Result(map); + return new AggregateResult(map); } private byte[] copyFrom(MappedByteBuffer mmb, int offsetInMmb, int len) { @@ -619,6 +940,11 @@ private boolean equals(byte[] left, MappedByteBuffer right, int offsetInMmb, int return true; } + private boolean hasSemicolonByte(long value) { + long a = value ^ SEMICOLON_MASK; + return (((a - ONE_MASK) & ~a) & (0x8080808080808080L)) != 0; + } + private void linearProbe(int len, int hash, int temp, MappedByteBuffer mmb, int offsetInMmb) { for (int i = hash;; i = (i + 1) & slotsMask) { var curBytes = cityNames[i]; @@ -628,11 +954,6 @@ private void linearProbe(int len, int hash, int temp, MappedByteBuffer mmb, int return; } else { - // Overall, this tradeoff seems better than Arrays.equals(..) - // City name param is encoded as (mmb, offsetnInMmb, len) - // This avoids copying it into a (previously allocated) byte[] - // The downside is that we have to manually implement 'equals' and it can lose out - // to vectorized 'equals'; but the trade off seems to work in this particular case if (len == curBytes.length && equals(curBytes, mmb, offsetInMmb, len)) { stats[i].mergeReading(temp); return; @@ -642,6 +963,7 @@ private void linearProbe(int len, int hash, int temp, MappedByteBuffer mmb, int } } + /** Represents aggregate stats. */ public static class Stat { public static Stat firstReading(int temp) { @@ -689,4 +1011,125 @@ public String toString() { return "%.1f/%.1f/%.1f".formatted(min / 10.0, sum / 10.0 / count, max / 10.0); } } + + static class Tracing { + + private static final Map knownWorkThreadEvents; + private static long startTime; + + static { + // Maintain the ordering to be chronological in execution + // Map.of(..) screws up ordering + knownWorkThreadEvents = new LinkedHashMap<>(); + for (String id : List.of("Shard", "Intermediate Cleaner", "Ending Cleaner")) { + knownWorkThreadEvents.put(id, new ThreadTimingsArray(id, 1 << 6 << 1)); + } + } + + static void analyzeWorkThreads(int nThreads) { + for (ThreadTimingsArray array : knownWorkThreadEvents.values()) { + errPrint(array.analyze(nThreads)); + } + } + + static void recordAppStart() { + startTime = System.nanoTime(); + } + + static void recordEvent(String event) { + printEvent(event, System.nanoTime()); + } + + static void recordWorkEnd(String id, int threadId) { + knownWorkThreadEvents.get(id).recordEnd(threadId); + } + + static void recordWorkStart(String id, int threadId) { + knownWorkThreadEvents.get(id).recordStart(threadId); + } + + ///////////////////////////////////////////////////////////////////////////////////////////////// + + private static void errPrint(String message) { + System.err.println(message); + } + + private static void printEvent(String message, long nanoTime) { + errPrint(STR."\{message} = \{(nanoTime - startTime) / 1_000_000}ms"); + } + + public static class ThreadTimingsArray { + + private static String toString(long[] array) { + return Arrays.stream(array) + .map(x -> x < 0 ? -1 : x) + .mapToObj(x -> String.format("%6d", x)) + .collect(Collectors.joining(", ", "[ ", " ]")); + } + + private final String id; + private final long[] timestamps; + private boolean hasData = false; + + public ThreadTimingsArray(String id, int maxSize) { + this.timestamps = new long[maxSize]; + this.id = id; + } + + public String analyze(int nThreads) { + if (!hasData) { + return "%s has no thread timings data".formatted(id); + } + Checks.checkArg(nThreads <= timestamps.length); + long minDuration = Long.MAX_VALUE, maxDuration = Long.MIN_VALUE; + long minBegin = Long.MAX_VALUE, maxCompletion = Long.MIN_VALUE; + long maxBegin = Long.MIN_VALUE, minCompletion = Long.MAX_VALUE; + + long[] durationsMs = new long[nThreads]; + long[] completionsMs = new long[nThreads]; + long[] beginMs = new long[nThreads]; + for (int i = 0; i < nThreads; i++) { + long durationNs = timestamps[2 * i + 1] - timestamps[2 * i]; + durationsMs[i] = durationNs / 1_000_000; + completionsMs[i] = (timestamps[2 * i + 1] - startTime) / 1_000_000; + beginMs[i] = (timestamps[2 * i] - startTime) / 1_000_000; + + minDuration = Math.min(minDuration, durationNs); + maxDuration = Math.max(maxDuration, durationNs); + + minBegin = Math.min(minBegin, timestamps[2 * i] - startTime); + maxBegin = Math.max(maxBegin, timestamps[2 * i] - startTime); + + maxCompletion = Math.max(maxCompletion, timestamps[2 * i + 1] - startTime); + minCompletion = Math.min(minCompletion, timestamps[2 * i + 1] - startTime); + } + return STR.""" + ------------------------------------------------------------------------------------------- + \{id} Stats + ------------------------------------------------------------------------------------------- + Max duration = \{maxDuration / 1_000_000} ms + Min duration = \{minDuration / 1_000_000} ms + Timespan[max(end)-min(start)] = \{(maxCompletion - minBegin) / 1_000_000} ms [\{maxCompletion / 1_000_000} - \{minBegin / 1_000_000} ] + Completion Timespan[max(end)-min(end)] = \{(maxCompletion - minCompletion) / 1_000_000} ms + Begin Timespan[max(begin)-min(begin)] = \{(maxBegin - minBegin) / 1_000_000} ms + Average Duration = \{Arrays.stream(durationsMs) + .average() + .getAsDouble()} ms + Durations = \{toString(durationsMs)} ms + Begin Timestamps = \{toString(beginMs)} ms + Completion Timestamps = \{toString(completionsMs)} ms + """; + } + + public void recordEnd(int idx) { + timestamps[2 * idx + 1] = System.nanoTime(); + hasData = true; + } + + public void recordStart(int idx) { + timestamps[2 * idx] = System.nanoTime(); + hasData = true; + } + } + } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_vemanaNonIdiomatic.java b/src/main/java/dev/morling/onebrc/CalculateAverage_vemanaNonIdiomatic.java new file mode 100644 index 000000000..10b9c1e89 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_vemanaNonIdiomatic.java @@ -0,0 +1,1654 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.lang.invoke.MethodHandles; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.TreeMap; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; +import sun.misc.Unsafe; + +/** + * Unlike its sister submission {@code CalculateAverage_vemana}, this submission employs non + * idiomatic methods such as SWAR and Unsafe. + * + *

For details on how this solution works, check the documentation on the sister submission. + */ +public class CalculateAverage_vemanaNonIdiomatic { + + public static void main(String[] args) throws Exception { + String className = MethodHandles.lookup().lookupClass().getSimpleName(); + System.err.println( + STR.""" + ------------------------------------------------ + Running \{className} + ------------------------------------------------- + """); + Tracing.recordAppStart(); + Runtime.getRuntime() + .addShutdownHook( + new Thread( + () -> { + Tracing.recordEvent("In Shutdown hook"); + })); + + // First process in large chunks without coordination among threads + // Use chunkSizeBits for the large-chunk size + int chunkSizeBits = 20; + + // For the last commonChunkFraction fraction of total work, use smaller chunk sizes + double commonChunkFraction = 0.03; + + // Use commonChunkSizeBits for the small-chunk size + int commonChunkSizeBits = 18; + + // Size of the hashtable (attempt to fit in L2 of 512KB of eval machine) + int hashtableSizeBits = className.toLowerCase().contains("nonidiomatic") ? 13 : 16; + + // Reserve some number of lines at the end to give us freedom in reading LONGs past ranges + int minReservedBytesAtFileTail = 9; + + // Number of threads + int nThreads = -1; + + String inputFile = "measurements.txt"; + + // Parallelize unmap. Thread #n (n=1,2,..N) unmaps its bytebuffer when + // munmapFraction * n work remains. + double munmapFraction = 0.03; + + boolean fakeAdvance = false; + + for (String arg : args) { + String key = arg.substring(0, arg.indexOf('=')).trim(); + String value = arg.substring(key.length() + 1).trim(); + switch (key) { + case "chunkSizeBits": + chunkSizeBits = Integer.parseInt(value); + break; + case "commonChunkFraction": + commonChunkFraction = Double.parseDouble(value); + break; + case "commonChunkSizeBits": + commonChunkSizeBits = Integer.parseInt(value); + break; + case "hashtableSizeBits": + hashtableSizeBits = Integer.parseInt(value); + break; + case "inputFile": + inputFile = value; + break; + case "munmapFraction": + munmapFraction = Double.parseDouble(value); + break; + case "fakeAdvance": + fakeAdvance = Boolean.parseBoolean(value); + break; + case "nThreads": + nThreads = Integer.parseInt(value); + break; + default: + throw new IllegalArgumentException("Unknown argument: " + arg); + } + } + + System.out.println( + new Runner( + Path.of(inputFile), + nThreads, + chunkSizeBits, + commonChunkFraction, + commonChunkSizeBits, + hashtableSizeBits, + minReservedBytesAtFileTail, + munmapFraction, + fakeAdvance) + .getSummaryStatistics()); + + Tracing.recordEvent("Final result printed"); + } + + public record AggregateResult(Map tempStats) { + + @Override + public String toString() { + return this.tempStats().entrySet().stream() + .sorted(Map.Entry.comparingByKey()) + .map(entry -> "%s=%s".formatted(entry.getKey(), entry.getValue())) + .collect(Collectors.joining(", ", "{", "}")); + } + } + + // Mutable to avoid allocation + public static class ByteRange { + + private static final int BUF_SIZE = 1 << 28; + + private final long fileSize; + private final long maxEndPos; // Treat as if the file ends here + private final RandomAccessFile raf; + private final int shardIdx; + private final List unclosedBuffers = new ArrayList<>(); + // ***************** What this is doing and why ***************** + // Reading from ByteBuffer appears faster from MemorySegment, but ByteBuffer can only be + // Integer.MAX_VALUE long; Creating one byteBuffer per chunk kills native memory quota + // and JVM crashes without futher parameters. + // + // So, in this solution, create a sliding window of bytebuffers: + // - Create a large bytebuffer that spans the chunk + // - If the next chunk falls outside the byteBuffer, create another byteBuffer that spans the + // chunk. Because chunks are allocated serially, a single large (1<<30) byteBuffer spans + // many successive chunks. + // - In fact, for serial chunk allocation (which is friendly to page faulting anyway), + // the number of created ByteBuffers doesn't exceed [size of shard/(1<<30)] which is less than + // 100/thread and is comfortably below what the JVM can handle (65K) without further param + // tuning + // - This enables (relatively) allocation free chunking implementation. Our chunking impl uses + // fine grained chunking for the last say X% of work to avoid being hostage to stragglers + + ///////////// The PUBLIC API + + public MappedByteBuffer byteBuffer; + public long endAddress; // the virtual memory address corresponding to 'endInBuf' + public int endInBuf; // where the chunk ends inside the buffer + public long startAddress; // the virtual memory address corresponding to 'startInBuf' + public int startInBuf; // where the chunk starts inside the buffer + + ///////////// Private State + + long bufferBaseAddr; // buffer's base virtual memory address + long extentEnd; // byteBuffer's ending coordinate + long extentStart; // byteBuffer's begin coordinate + + // Uninitialized; for mutability + public ByteRange(RandomAccessFile raf, long maxEndPos, int shardIdx) { + this.raf = raf; + this.maxEndPos = maxEndPos; + this.shardIdx = shardIdx; + try { + this.fileSize = raf.length(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + bufferCleanSlate(); + } + + public void close(String closerId) { + Tracing.recordWorkStart(closerId, shardIdx); + bufferCleanSlate(); + for (MappedByteBuffer buf : unclosedBuffers) { + close(buf); + } + unclosedBuffers.clear(); + Tracing.recordWorkEnd(closerId, shardIdx); + } + + public void setRange(long rangeStart, long rangeEnd) { + if (rangeEnd + 1024 > extentEnd || rangeStart < extentStart) { + setByteBufferExtent(rangeStart, Math.min(rangeStart + BUF_SIZE, fileSize)); + } + + if (rangeStart > 0) { + rangeStart = 1 + nextNewLine(rangeStart); + } + else { + rangeStart = 0; + } + + if (rangeEnd < maxEndPos) { + // rangeEnd = 1 + nextNewLine(rangeEnd); // not needed + rangeEnd = 1 + rangeEnd; + } + else { + rangeEnd = maxEndPos; + } + + startInBuf = (int) (rangeStart - extentStart); + endInBuf = (int) (rangeEnd - extentStart); + startAddress = bufferBaseAddr + startInBuf; + endAddress = bufferBaseAddr + endInBuf; + } + + @Override + public String toString() { + return STR.""" + ByteRange { + shard = \{shardIdx} + extentStart = \{extentStart} + extentEnd = \{extentEnd} + startInBuf = \{startInBuf} + endInBuf = \{endInBuf} + startAddress = \{startAddress} + endAddress = \{endAddress} + } + """; + } + + private void bufferCleanSlate() { + if (byteBuffer != null) { + unclosedBuffers.add(byteBuffer); + byteBuffer = null; + } + extentEnd = extentStart = bufferBaseAddr = startAddress = endAddress = -1; + } + + private void close(MappedByteBuffer buffer) { + Method cleanerMethod = Reflection.findMethodNamed(buffer, "cleaner"); + cleanerMethod.setAccessible(true); + Object cleaner = Reflection.invoke(buffer, cleanerMethod); + + Method cleanMethod = Reflection.findMethodNamed(cleaner, "clean"); + cleanMethod.setAccessible(true); + Reflection.invoke(cleaner, cleanMethod); + } + + private long getBaseAddr(MappedByteBuffer buffer) { + Method addressMethod = Reflection.findMethodNamed(buffer, "address"); + addressMethod.setAccessible(true); + return (long) Reflection.invoke(buffer, addressMethod); + } + + private long nextNewLine(long pos) { + int nextPos = (int) (pos - extentStart); + while (byteBuffer.get(nextPos) != '\n') { + nextPos++; + } + return nextPos + extentStart; + } + + /** + * Extent different from Range. Range is what needs to be processed. Extent is what the byte + * buffer can read without failing. + */ + private void setByteBufferExtent(long start, long end) { + bufferCleanSlate(); + try { + byteBuffer = raf.getChannel().map(MapMode.READ_ONLY, start, end - start); + byteBuffer.order(ByteOrder.nativeOrder()); + } + catch (IOException e) { + throw new RuntimeException(e); + } + extentStart = start; + extentEnd = end; + bufferBaseAddr = getBaseAddr(byteBuffer); + } + } + + public static final class Checks { + + public static void checkArg(boolean condition) { + if (!condition) { + throw new IllegalArgumentException(); + } + } + + private Checks() { + } + } + + /* + * ENTRY SHAPE + * Ensure alignment boundaries. 4 bytes on 4 byte, 2 bytes on 2 byte etc. + * 32 bytes per entry. + * 96 KB L1 cache. 2048 entries should fully fit + * ------------------- + * str: 14 bytes [Defined by constant STR_FIELD_LEN] + * hash: 2 bytes + * cityNameOffset: 3 bytes // Index in city names array if len > STR_FIELD_LEN bytes + * len: 1 byte // Length of string, in bytes + * sum: 4 bytes + * count: 4 bytes + * max: 2 bytes + * min: 2 bytes + */ + static class EntryData { + + public static final int ENTRY_SIZE_BITS = 5; + + /////////// OFFSETS /////////////// + private static final int OFFSET_STR = 0; + private static final int STR_FIELD_LEN = 14; + private static final int OFFSET_HASH = OFFSET_STR + STR_FIELD_LEN; + private static final int OFFSET_CITY_NAME_EXTRA = OFFSET_HASH + 2; + private static final int OFFSET_LEN = OFFSET_CITY_NAME_EXTRA + 3; + private static final int OFFSET_SUM = OFFSET_LEN + 1; + private static final int OFFSET_COUNT = OFFSET_SUM + 4; + private static final int OFFSET_MAX = OFFSET_COUNT + 4; + private static final int OFFSET_MIN = OFFSET_MAX + 2; + + public static int strFieldLen() { + return STR_FIELD_LEN; + } + + private final EntryMeta entryMeta; + + private long baseAddress; + + public EntryData(EntryMeta entryMeta) { + this.entryMeta = entryMeta; + } + + public long baseAddress() { + return baseAddress; + } + + public String cityNameString() { + int len = len(); + byte[] zeBytes = new byte[len]; + + for (int i = 0; i < Math.min(len, strFieldLen()); i++) { + zeBytes[i] = Unsafely.readByte(baseAddress + i); + } + + if (len > strFieldLen()) { + int rem = len - strFieldLen(); + long ptr = entryMeta.cityNamesAddress(cityNamesOffset()); + for (int i = 0; i < rem; i++) { + zeBytes[strFieldLen() + i] = Unsafely.readByte(ptr + i); + } + } + + return new String(zeBytes); + } + + public int cityNamesOffset() { + return Unsafely.readInt(baseAddress + OFFSET_CITY_NAME_EXTRA) & 0xFFFFFF; + } + + public int count() { + return Unsafely.readInt(baseAddress + OFFSET_COUNT); + } + + public short hash16() { + return Unsafely.readShort(baseAddress + OFFSET_HASH); + } + + public int index() { + return (int) ((baseAddress() - entryMeta.baseAddress(0)) >> ENTRY_SIZE_BITS); + } + + public void init(long srcAddr, int len, short hash16, short temperature) { + // Copy the string + Unsafely.copyMemory(srcAddr, strAddress(), Math.min(len, EntryData.strFieldLen())); + if (len > EntryData.strFieldLen()) { + int remaining = len - EntryData.strFieldLen(); + int cityNamesOffset = entryMeta.getAndIncrementCityNames(remaining); + Unsafely.copyMemory( + srcAddr + EntryData.strFieldLen(), + entryMeta.cityNamesAddress(cityNamesOffset), + remaining); + setCityNameOffset(cityNamesOffset, len); + } + else { + setLen((byte) len); + } + + // and then update the others + setHash16(hash16); + setSum(temperature); + setCount(1); + setMax(temperature); + setMin(temperature); + } + + public boolean isPresent() { + return len() > 0; + } + + public int len() { + return Unsafely.readByte(baseAddress + OFFSET_LEN); + } + + public short max() { + return Unsafely.readShort(baseAddress + OFFSET_MAX); + } + + public short min() { + return Unsafely.readShort(baseAddress + OFFSET_MIN); + } + + public void setBaseAddress(long baseAddress) { + this.baseAddress = baseAddress; + } + + public void setCityNameOffset(int cityNamesOffset, int len) { + // The 24 here is 3 bytes for Cityname extra index + 1 byte for actual len + // that writes 4 bytes in one shot. It is not an offset. + Unsafely.setInt(baseAddress + OFFSET_CITY_NAME_EXTRA, cityNamesOffset | (len << 24)); + } + + public void setCount(int value) { + Unsafely.setInt(baseAddress + OFFSET_COUNT, value); + } + + public void setHash16(short value) { + Unsafely.setShort(baseAddress + OFFSET_HASH, value); + } + + public void setIndex(int index) { + setBaseAddress(entryMeta.baseAddress(index)); + } + + public void setLen(byte value) { + Unsafely.setByte(baseAddress + OFFSET_LEN, value); + } + + public void setMax(short value) { + Unsafely.setShort(baseAddress + OFFSET_MAX, value); + } + + public void setMin(short value) { + Unsafely.setShort(baseAddress + OFFSET_MIN, value); + } + + public void setSum(int value) { + Unsafely.setInt(baseAddress + OFFSET_SUM, value); + } + + public Stat stat() { + return new Stat(min(), max(), sum(), count()); + } + + public long strAddress() { + return baseAddress + OFFSET_STR; + } + + public int sum() { + return Unsafely.readInt(baseAddress + OFFSET_SUM); + } + + public String toString() { + return STR.""" + min = \{min()} + max = \{max()} + count = \{count()} + sum = \{sum()} + """; + } + + public void update(short temperature) { + setMin((short) Math.min(min(), temperature)); + setMax((short) Math.max(max(), temperature)); + setCount(count() + 1); + setSum(sum() + temperature); + } + + public boolean updateOnMatch( + EntryMeta entryMeta, long srcAddr, int len, short hash16, short temperature) { + + // Quick paths + if (len() != len) { + return false; + } + if (hash16() != hash16) { + return false; + } + + // Actual string comparison + if (len <= STR_FIELD_LEN) { + if (!Unsafely.matches(srcAddr, strAddress(), len)) { + return false; + } + } + else { + if (!Unsafely.matches(srcAddr, strAddress(), STR_FIELD_LEN)) { + return false; + } + if (!Unsafely.matches( + srcAddr + STR_FIELD_LEN, + entryMeta.cityNamesAddress(cityNamesOffset()), + len - STR_FIELD_LEN)) { + return false; + } + } + update(temperature); + return true; + } + } + + /** Metadata for the collection of entries */ + static class EntryMeta { + + static int toIntFromUnsignedShort(short x) { + int ret = x; + if (ret < 0) { + ret += (1 << 16); + } + return ret; + } + + private final long baseAddress; + private final long cityNamesBaseAddress; // For city names that overflow Entry.STR_FIELD_LEN + private final int hashMask; + private final int n_entries; + private final int n_entriesBits; + private long cityNamesEndAddress; // [cityNamesBaseAddress, cityNamesEndAddress) + + EntryMeta(int n_entriesBits, EntryMeta oldEntryMeta) { + this.n_entries = 1 << n_entriesBits; + this.hashMask = (1 << n_entriesBits) - 1; + this.n_entriesBits = n_entriesBits; + this.baseAddress = Unsafely.allocateZeroedCacheLineAligned(this.n_entries << EntryData.ENTRY_SIZE_BITS); + if (oldEntryMeta == null) { + this.cityNamesBaseAddress = Unsafely.allocateZeroedCacheLineAligned(1 << 17); + this.cityNamesEndAddress = cityNamesBaseAddress; + } + else { + this.cityNamesBaseAddress = oldEntryMeta.cityNamesBaseAddress; + this.cityNamesEndAddress = oldEntryMeta.cityNamesEndAddress; + } + } + + public long cityNamesAddress(int extraLenOffset) { + return cityNamesBaseAddress + extraLenOffset; + } + + public int indexFromHash16(short hash16) { + return indexFromHash32(toIntFromUnsignedShort(hash16)); + } + + public int nEntriesBits() { + return n_entriesBits; + } + + // Base Address of nth entry + long baseAddress(int n) { + return baseAddress + ((long) n << EntryData.ENTRY_SIZE_BITS); + } + + // Size of each entry + int entrySizeInBytes() { + return 1 << EntryData.ENTRY_SIZE_BITS; + } + + int getAndIncrementCityNames(int len) { + long ret = cityNamesEndAddress; + cityNamesEndAddress += ((len + 7) >> 3) << 3; // use aligned 8 bytes + return (int) (ret - cityNamesBaseAddress); + } + + // Index of an entry with given hash32 + int indexFromHash32(int hash32) { + return hash32 & hashMask; + } + + // Number of entries + int nEntries() { + return n_entries; + } + + int nextIndex(int index) { + return (index + 1) & hashMask; + } + } + + static class Hashtable { + + // State + int n_filledEntries; + // A single Entry to avoid local allocation + private EntryData entry; + private EntryMeta entryMeta; + // Invariants + // hash16 = (short) hash32 + // index = hash16 & hashMask + private int hashHits = 0, hashMisses = 0; + + Hashtable(int slotsBits) { + entryMeta = new EntryMeta(slotsBits, null); + this.entry = new EntryData(entryMeta); + } + + public void addDataPoint(long srcAddr, int len, int hash32, short temperature) { + // hashHits++; + for (int index = entryMeta.indexFromHash32(hash32);; index = entryMeta.nextIndex(index)) { + entry.setIndex(index); + + if (!entry.isPresent()) { + entry.init(srcAddr, len, (short) hash32, temperature); + onNewEntry(); + return; + } + + if (entry.updateOnMatch(entryMeta, srcAddr, len, (short) hash32, temperature)) { + return; + } + // hashMisses++; + } + } + + public AggregateResult result() { + Map map = new LinkedHashMap<>(5_000); + for (int i = 0; i < entryMeta.nEntries(); i++) { + entry.setIndex(i); + if (entry.isPresent()) { + map.put(entry.cityNameString(), entry.stat()); + } + } + System.err.println( + STR.""" + HashHits = \{hashHits} + HashMisses = \{hashMisses} (\{hashMisses * 100.0 / hashHits}) + """); + return new AggregateResult(map); + } + + private EntryData getNewEntry(EntryData oldEntry, EntryMeta newEntryMeta) { + EntryData newEntry = new EntryData(newEntryMeta); + for (int index = newEntryMeta.indexFromHash16(oldEntry.hash16());; index = newEntryMeta.nextIndex(index)) { + newEntry.setIndex(index); + if (!newEntry.isPresent()) { + return newEntry; + } + } + } + + private void onNewEntry() { + if (++n_filledEntries == 450) { + reHash(16); + } + } + + private void reHash(int new_N_EntriesBits) { + EntryMeta oldEntryMeta = this.entryMeta; + EntryData oldEntry = new EntryData(oldEntryMeta); + Checks.checkArg(new_N_EntriesBits <= 16); + Checks.checkArg(new_N_EntriesBits > oldEntryMeta.nEntriesBits()); + EntryMeta newEntryMeta = new EntryMeta(new_N_EntriesBits, oldEntryMeta); + for (int i = 0; i < oldEntryMeta.nEntries(); i++) { + oldEntry.setIndex(i); + if (oldEntry.isPresent()) { + Unsafely.copyMemory( + oldEntry.baseAddress(), + getNewEntry(oldEntry, newEntryMeta).baseAddress(), + oldEntryMeta.entrySizeInBytes()); + } + } + this.entryMeta = newEntryMeta; + this.entry = new EntryData(this.entryMeta); + } + } + + public interface LazyShardQueue { + + void close(String closerId, int shardIdx); + + Optional fileTailEndWork(int idx); + + ByteRange take(int shardIdx); + } + + static final class Reflection { + + static Method findMethodNamed(Object object, String name, Class... paramTypes) { + try { + return object.getClass().getMethod(name, paramTypes); + } + catch (NoSuchMethodException e) { + throw new RuntimeException(e); + } + } + + static Object invoke(Object receiver, Method method, Object... params) { + try { + return method.invoke(receiver, params); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + } + + public static class Runner { + + private final double commonChunkFraction; + private final int commonChunkSizeBits; + private final boolean fakeAdvance; + private final int hashtableSizeBits; + private final Path inputFile; + private final int minReservedBytesAtFileTail; + private final double munmapFraction; + private final int nThreads; + private final int shardSizeBits; + + public Runner( + Path inputFile, + int nThreads, + int chunkSizeBits, + double commonChunkFraction, + int commonChunkSizeBits, + int hashtableSizeBits, + int minReservedBytesAtFileTail, + double munmapFraction, + boolean fakeAdvance) { + this.inputFile = inputFile; + this.nThreads = nThreads; + this.shardSizeBits = chunkSizeBits; + this.commonChunkFraction = commonChunkFraction; + this.commonChunkSizeBits = commonChunkSizeBits; + this.hashtableSizeBits = hashtableSizeBits; + this.minReservedBytesAtFileTail = minReservedBytesAtFileTail; + this.munmapFraction = munmapFraction; + this.fakeAdvance = fakeAdvance; + } + + AggregateResult getSummaryStatistics() throws Exception { + int nThreads = this.nThreads < 0 ? Runtime.getRuntime().availableProcessors() : this.nThreads; + + LazyShardQueue shardQueue = new SerialLazyShardQueue( + 1L << shardSizeBits, + inputFile, + nThreads, + commonChunkFraction, + commonChunkSizeBits, + minReservedBytesAtFileTail, + munmapFraction, + fakeAdvance); + + ExecutorService executorService = Executors.newFixedThreadPool( + nThreads, + runnable -> { + Thread thread = new Thread(runnable); + thread.setDaemon(true); + return thread; + }); + + List> results = new ArrayList<>(); + for (int i = 0; i < nThreads; i++) { + final int shardIdx = i; + final Callable callable = () -> { + Tracing.recordWorkStart("Shard", shardIdx); + AggregateResult result = new ShardProcessor(shardQueue, hashtableSizeBits, shardIdx).processShard(); + Tracing.recordWorkEnd("Shard", shardIdx); + return result; + }; + results.add(executorService.submit(callable)); + } + Tracing.recordEvent("Basic push time"); + + // This particular sequence of Futures is so that both merge and munmap() can work as shards + // finish their computation without blocking on the entire set of shards to complete. In + // particular, munmap() doesn't need to wait on merge. + // First, submit a task to merge the results and then submit a task to cleanup bytebuffers + // from completed shards. + Future resultFutures = executorService.submit(() -> merge(results)); + // Note that munmap() is serial and not parallel and hence we use just one thread. + executorService.submit(() -> closeByteBuffers(results, shardQueue)); + + AggregateResult result = resultFutures.get(); + Tracing.recordEvent("Merge results received"); + + Tracing.recordEvent("About to shutdown executor and wait"); + executorService.shutdown(); + executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS); + Tracing.recordEvent("Executor terminated"); + + Tracing.analyzeWorkThreads(nThreads); + return result; + } + + private void closeByteBuffers( + List> results, LazyShardQueue shardQueue) { + int n = results.size(); + boolean[] isDone = new boolean[n]; + int remaining = results.size(); + while (remaining > 0) { + for (int i = 0; i < n; i++) { + if (!isDone[i] && results.get(i).isDone()) { + remaining--; + isDone[i] = true; + shardQueue.close("Ending Cleaner", i); + } + } + } + } + + private AggregateResult merge(List> results) + throws ExecutionException, InterruptedException { + Tracing.recordEvent("Merge start time"); + Map output = null; + boolean[] isDone = new boolean[results.size()]; + int remaining = results.size(); + // Let's be naughty and spin in a busy loop + while (remaining > 0) { + for (int i = 0; i < results.size(); i++) { + if (!isDone[i] && results.get(i).isDone()) { + isDone[i] = true; + remaining--; + if (output == null) { + output = new TreeMap<>(results.get(i).get().tempStats()); + } + else { + for (Entry entry : results.get(i).get().tempStats().entrySet()) { + output.compute( + entry.getKey(), + (key, value) -> value == null ? entry.getValue() : Stat.merge(value, entry.getValue())); + } + } + } + } + } + Tracing.recordEvent("Merge end time"); + return new AggregateResult(output); + } + } + + public static class SerialLazyShardQueue implements LazyShardQueue { + + private static long roundToNearestLowerMultipleOf(long divisor, long value) { + return value / divisor * divisor; + } + + private final ByteRange[] byteRanges; + private final long chunkSize; + private final long commonChunkSize; + private final AtomicLong commonPool; + private final long effectiveFileSize; + private final boolean fakeAdvance; + private final long fileSize; + private final long[] perThreadData; + private final RandomAccessFile raf; + private final SeqLock seqLock; + + public SerialLazyShardQueue( + long chunkSize, + Path filePath, + int shards, + double commonChunkFraction, + int commonChunkSizeBits, + int fileTailReservedBytes, + double munmapFraction, + boolean fakeAdvance) + throws IOException { + this.fakeAdvance = fakeAdvance; + Checks.checkArg(commonChunkFraction < 0.9 && commonChunkFraction >= 0); + Checks.checkArg(fileTailReservedBytes >= 0); + this.raf = new RandomAccessFile(filePath.toFile(), "r"); + this.fileSize = raf.length(); + fileTailReservedBytes = fileTailReservedBytes == 0 + ? 0 + : consumeToPreviousNewLineExclusive(raf, fileTailReservedBytes); + this.effectiveFileSize = fileSize - fileTailReservedBytes; + + // Common pool + long commonPoolStart = Math.min( + roundToNearestLowerMultipleOf( + chunkSize, (long) (effectiveFileSize * (1 - commonChunkFraction))), + effectiveFileSize); + this.commonPool = new AtomicLong(commonPoolStart); + this.commonChunkSize = 1L << commonChunkSizeBits; + + // Distribute chunks to shards + this.perThreadData = new long[shards << 4]; // thread idx -> 16*idx to avoid cache line conflict + for (long i = 0, + currentStart = 0, + remainingChunks = (commonPoolStart + chunkSize - 1) / chunkSize; i < shards; i++) { + long remainingShards = shards - i; + long currentChunks = (remainingChunks + remainingShards - 1) / remainingShards; + // Shard i handles: [currentStart, currentStart + currentChunks * chunkSize) + int pos = (int) i << 4; + perThreadData[pos] = currentStart; // next chunk begin + perThreadData[pos + 1] = currentStart + currentChunks * chunkSize; // shard end + perThreadData[pos + 2] = currentChunks; // active chunks remaining + // threshold below which need to shrink + // 0.03 is a practical number but the optimal strategy is this: + // Shard number N (1-based) should unmap as soon as it completes (R/(R+1))^N fraction of + // its work, where R = relative speed of unmap compared to the computation. + // For our problem, R ~ 75 because unmap unmaps 30GB/sec (but, it is serial) while + // cores go through data at the rate of 400MB/sec. + perThreadData[pos + 3] = (long) (currentChunks * (munmapFraction * (shards - i))); + perThreadData[pos + 4] = 1; // true iff munmap() hasn't been triggered yet + currentStart += currentChunks * chunkSize; + remainingChunks -= currentChunks; + } + this.chunkSize = chunkSize; + + this.byteRanges = new ByteRange[shards << 4]; + for (int i = 0; i < shards; i++) { + byteRanges[i << 4] = new ByteRange(raf, effectiveFileSize, i); + } + + this.seqLock = new SeqLock(); + } + + @Override + public void close(String closerId, int shardIdx) { + byteRanges[shardIdx << 4].close(closerId); + } + + @Override + public Optional fileTailEndWork(int idx) { + if (idx == 0 && effectiveFileSize < fileSize) { + ByteRange chunk = new ByteRange(raf, fileSize, 0); + chunk.setRange( + effectiveFileSize == 0 ? 0 : effectiveFileSize - 1 /* will consume newline at eFS-1 */, + fileSize); + return Optional.of(chunk); + } + return Optional.empty(); + } + + @Override + public ByteRange take(int shardIdx) { + // Try for thread local range + final int pos = shardIdx << 4; + final long rangeStart; + final long rangeEnd; + + if (perThreadData[pos + 2] >= 1) { + rangeStart = perThreadData[pos]; + rangeEnd = rangeStart + chunkSize; + // Don't do this in the if-check; it causes negative values that trigger intermediate + // cleanup + perThreadData[pos + 2]--; + if (!fakeAdvance) { + perThreadData[pos] = rangeEnd; + } + } + else { + rangeStart = commonPool.getAndAdd(commonChunkSize); + // If that's exhausted too, nothing remains! + if (rangeStart >= effectiveFileSize) { + return null; + } + rangeEnd = rangeStart + commonChunkSize; + } + + if (perThreadData[pos + 2] < perThreadData[pos + 3] && perThreadData[pos + 4] > 0) { + if (attemptIntermediateClose(shardIdx)) { + perThreadData[pos + 4]--; + } + } + + ByteRange chunk = byteRanges[pos]; + chunk.setRange(rangeStart, rangeEnd); + return chunk; + } + + private boolean attemptIntermediateClose(int shardIdx) { + if (seqLock.acquire()) { + close("Intermediate Cleaner", shardIdx); + seqLock.release(); + return true; + } + return false; + } + + private int consumeToPreviousNewLineExclusive(RandomAccessFile raf, int minReservedBytes) { + try { + long pos = Math.max(raf.length() - minReservedBytes - 1, -1); + if (pos < 0) { + return (int) raf.length(); + } + + long start = Math.max(pos - 512, 0); + ByteBuffer buf = raf.getChannel().map(MapMode.READ_ONLY, start, pos + 1 - start); + while (pos >= 0 && buf.get((int) (pos - start)) != '\n') { + pos--; + } + pos++; + return (int) (raf.length() - pos); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + } + + /** A low-traffic non-blocking lock. */ + static class SeqLock { + + private final AtomicBoolean isOccupied = new AtomicBoolean(false); + + boolean acquire() { + return !isOccupied.get() && isOccupied.compareAndSet(false, true); + } + + void release() { + isOccupied.set(false); + } + } + + public static class ShardProcessor { + + private final int shardIdx; + private final LazyShardQueue shardQueue; + private final FastShardProcessorState state; + + public ShardProcessor(LazyShardQueue shardQueue, int hashtableSizeBits, int shardIdx) { + this.shardQueue = shardQueue; + this.shardIdx = shardIdx; + this.state = new FastShardProcessorState(hashtableSizeBits); + } + + public AggregateResult processShard() { + return processShardReal(); + } + + private void processRange(ByteRange range) { + long nextPos = range.startAddress; + while (nextPos < range.endAddress) { + nextPos = state.processLine(nextPos); + } + } + + private void processRangeSlow(ByteRange range) { + long nextPos = range.startAddress; + while (nextPos < range.endAddress) { + nextPos = state.processLineSlow(nextPos); + } + } + + private AggregateResult processShardReal() { + // First process the file tail work to give ourselves freedom to go past ranges in parsing + shardQueue.fileTailEndWork(shardIdx).ifPresent(this::processRangeSlow); + + ByteRange range; + while ((range = shardQueue.take(shardIdx)) != null) { + processRange(range); + } + return result(); + } + + private AggregateResult result() { + return state.result(); + } + } + + public static class FastShardProcessorState { + + private static final long LEADING_ONE_BIT_MASK = 0x8080808080808080L; + private static final long ONE_MASK = 0x0101010101010101L; + private static final long SEMICOLON_MASK = 0x3b3b3b3b3b3b3b3bL; + private final Hashtable hashtable; + private final Map slowProcessStats = new HashMap<>(); + + public FastShardProcessorState(int slotsBits) { + this.hashtable = new Hashtable(slotsBits); + Checks.checkArg(slotsBits <= 16); // since this.hashes is 'short' + } + + public long processLine(long nextPos) { + final long origPos = nextPos; + + // Trying to extract this into a function made it slower.. so, leaving it at inlining. + // It's a pity since the extracted version was more elegant to read + long firstLong; + int hash = 0; + // Don't run Long.numberOfTrailingZeros in hasSemiColon; it is not needed to establish + // whether there's a semicolon; only needed for pin-pointing length of the tail. + long s = hasSemicolon(firstLong = Unsafely.readLong(nextPos)); + final int trailingZeroes; + if (s == 0) { + hash = doHash(firstLong); + do { + nextPos += 8; + s = hasSemicolon(Unsafely.readLong(nextPos)); + } while (s == 0); + trailingZeroes = Long.numberOfTrailingZeros(s) + 1; // 8, 16, 24, .. # past ; + } + else { + trailingZeroes = Long.numberOfTrailingZeros(s) + 1; // 8, 16, 24, .. # past ; + hash = doHash(firstLong & maskOf(trailingZeroes - 8)); + } + // Sometimes we do mix a tail of length 0.. + nextPos += (trailingZeroes >> 3); + + final int temp = readTemperature(nextPos); + hashtable.addDataPoint(origPos, (int) (nextPos - 1 - origPos), hash, (short) (temp >> 3)); + return nextPos + (temp & 7); + } + + /** + * A slow version which is used only for the tail part of the file. Maintaining hashcode sync + * between this and the fast version is a pain for experimentation. So, we'll simply use a naive + * approach. + */ + public long processLineSlow(long nextPos) { + byte nextByte; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + while ((nextByte = Unsafely.readByte(nextPos++)) != ';') { + baos.write(nextByte); + } + + int temperature = 0; + boolean negative = Unsafely.readByte(nextPos) == '-'; + while ((nextByte = Unsafely.readByte(nextPos++)) != '\n') { + if (nextByte != '-' && nextByte != '.') { + temperature = temperature * 10 + (nextByte - '0'); + } + } + if (negative) { + temperature = -temperature; + } + + updateStat(slowProcessStats, baos.toString(), Stat.firstReading(temperature)); + return nextPos; + } + + public AggregateResult result() { + AggregateResult result = hashtable.result(); + if (!slowProcessStats.isEmpty()) { + // bah.. just mutate the arg of the record... + for (Entry entry : slowProcessStats.entrySet()) { + updateStat(result.tempStats(), entry.getKey(), entry.getValue()); + } + } + return result; + } + + int readTemperature(long nextPos) { + // This Dependency chain + // read -> shift -> xor -> compare -> 2 in parallel [ shift -> read ] -> add -> shift + // Chain latency: 2 reads + 2 add + 4 logical [assuming compare = add] + // vs + // Prior Dependency chain (slightly optimized by hand) + // read -> compare to '-' -> read -> compare to '.' -> 3 in parallel [read -> imul] -> add + // Chain latency: 3 reads + 3 add + 1 mul [assuming compare = add] + long data = Unsafely.readLong(nextPos); + long d = data ^ (data >> 4); + if ((data & 0xFF) == '-') { + return TemperatureLookup.firstNeg(d >> 8) + TemperatureLookup.secondNeg(d >> 24); + } + else { + return TemperatureLookup.firstPos(d >> 0) + TemperatureLookup.secondPos(d >> 16); + } + } + + private int doHash(long value) { + long hash = 31L * (int) value + (int) (value >> 32); + return (int) (hash ^ (hash >> 17) ^ (hash >> 28)); + } + + private long hasSemicolon(long x) { + long a = (x ^ SEMICOLON_MASK); + return (a - ONE_MASK) & (~a) & LEADING_ONE_BIT_MASK; + } + + private long maskOf(int bits) { + return ~(-1L << bits); + } + + private void updateStat(Map map, String key, Stat curValue) { + map.compute(key, (_, value) -> value == null ? curValue : Stat.merge(value, curValue)); + } + } + + /** Represents aggregate stats. */ + public static class Stat { + + public static Stat firstReading(int temp) { + return new Stat(temp, temp, temp, 1); + } + + public static Stat merge(Stat left, Stat right) { + return new Stat( + Math.min(left.min, right.min), + Math.max(left.max, right.max), + left.sum + right.sum, + left.count + right.count); + } + + long count, sum; + int min, max; + + public Stat(int min, int max, long sum, long count) { + this.min = min; + this.max = max; + this.sum = sum; + this.count = count; + } + + // Caution: Mutates + public Stat mergeReading(int curTemp) { + // Can this be improved furhter? + // Assuming random values for curTemp, + // min (&max) gets updated roughly log(N)/N fraction of the time (a small number) + // In the worst case, there will be at-most one branch misprediction. + if (curTemp > min) { // Mostly passes. On branch misprediction, just update min. + if (curTemp > max) { // Mostly fails. On branch misprediction, just update max. + max = curTemp; + } + } + else { + min = curTemp; + } + sum += curTemp; + count++; + return this; + } + + @Override + public String toString() { + return "%.1f/%.1f/%.1f".formatted(min / 10.0, sum / 10.0 / count, max / 10.0); + } + } + + /** + * Lookup table for temperature parsing. + * + *

+     * 0       0011-0000
+     * 9       0011-1001
+     * .       0010-1110
+     * \n      0000-1010
+     *
+     * Notice that there's no overlap in the last 4 bits. This means, if we are given two successive
+     * bytes X, Y all of which belong to the above characters, we can REVERSIBLY hash it to
+     * a single byte by doing 8-bit-hash = (last 4 bits of X) concat (last 4 bits of Y).
+     *
+     * Such a hash requires a few more operations than ideal. A more practical hash is:
+     * (X>>4) ^ Y ^ (Y >> 4). This means if you read 4 bytes after the '-',
+     * L = X Y Z W, where each of X Y Z W is a byte, then,
+     * L ^ (L >> 4) = D hash(X, Y) hash(Y, Z) hash(Z, W) where D = don't care. In other words, we
+     * can SWAR the hash.
+     *
+     * This has potential for minor conflicts; for e.g. (3, NewLine) collides with (0, 9). But, we
+     * don't have any collisions between two digits. That is (x, y) will never collide with (a, b)
+     * where x, y, a, b are digits (proof left as an exercise, lol). Along with a couple of other
+     * such no-conflict observations, it suffices for our purposes.
+     *
+     * If we just precompute some values like
+     * - BigTable[hash(X,Y)] = 100*X + 10*Y
+     * - SmallTable[hash(Z,W)] = 10*Z + W
+     *
+     * where potentially X, Y, Z, W can be '.' or '\n', (and the arithmetic adjusted), we can lookup
+     * the temperature pieces from BigTable and SmallTable and add them together.
+     * 
+ * + *

This class is an implementation of the above idea. The lookup tables being 256 ints long + * will always be resident in L1 cache. What remains then is to also add the information on how + * much input is to be consumed; i.e. count the - and newlines too. That can be piggy backed on + * top of the values. + * + *

FWIW, this lookup appears to have reduced the temperature reading overhead substantially on + * a Ryzen 7950X machine. But, it wasn't done systematically; so, YMMV. + */ + public static class TemperatureLookup { + + // Second is the smaller (units place) + // First is the larger (100 & 10) + + // _NEG tables simply negate the value so that call-site can always simply add the values from + // the first and second units. Call-sites adding-up First and Second units adds up the + // amount of input to consume. + + // Here, 2 is the amount of bytes consumed. This informs how much the reading pointer + // should move. + // For pattern XY value = ((-100*X -10*Y) << 3) + 2 [2 = 1 for X, 1 for Y] + // For pattern Y. value = ((-10*Y) << 3) + 2 [2 = 1 for Y, 1 for .] + private static final int[] FIRST_NEG = make(true, true); + + // For pattern XY value = ((100*X + 10*Y) << 3) + 2 + // For pattern Y. value = ((10*Y) << 3) + 2 + private static final int[] FIRST_POS = make(true, false); + + // We count newline and any initial '-' as part of SECOND + // For pattern .Z value = (-Z << 3) + 2 + 2 [1 each for . and Z, 1 for newline, 1 for minus] + // For pattern Zn value = (-Z << 3) + 1 + 2 [1 for Z, 1 for newline, 1 for minus] + private static final int[] SECOND_NEG = make(false, true); + + // For pattern .Z value = (Z << 3) + 2 + 1 [1 each for . and Z, 1 for newline] + // For pattern Zn value = (Z << 3) + 1 + 1 [1 for Z, 1 for newline] + private static final int[] SECOND_POS = make(false, false); + + public static int firstNeg(long b) { + return FIRST_NEG[(int) (b & 255)]; + } + + public static int firstPos(long b) { + return FIRST_POS[(int) (b & 255)]; + } + + public static int secondNeg(long b) { + return SECOND_NEG[(int) (b & 255)]; + } + + public static int secondPos(long b) { + return SECOND_POS[(int) (b & 255)]; + } + + private static byte[] allDigits() { + byte[] out = new byte[10]; + for (byte a = '0'; a <= '9'; a++) { + out[a - '0'] = a; + } + return out; + } + + private static int hash(byte msb, byte lsb) { + // If K = [D msb lsb], then (K ^ (K>>4)) & 255 == hash(msb, lsb). D = don't care + return (msb << 4) ^ lsb ^ (lsb >> 4); + } + + private static int[] make(boolean isFirst, boolean isNegative) { + int[] ret = new int[256]; + boolean[] done = new boolean[256]; + + // Conventions: X = 100s place, Y = 10s place, Z = 1s place, n = new line + + // All the cases to handle + // X Y . Z + // Y . Z n + + // In little-endian order it becomes (byte-wise), shown in place value notation + // Z . Y X + // n Z . Y + // First = YX or .Y + // Second = Z. or nZ + + // Pattern 'YX' + for (byte x : allDigits()) { + for (byte y : allDigits()) { + int index = hash(y, x); + // Shouldn't occur in Second + int value = isFirst ? (y - '0') * 10 + (x - '0') * 100 : 12345; + int delta = isFirst ? 2 : 12345; + update(index, isNegative ? -value : value, delta, ret, done); + } + } + + // Pattern 'Z.' + for (byte z : allDigits()) { + int index = hash(z, (byte) '.'); + // shouldn't occur in First + int value = isFirst ? 12345 : (z - '0'); + int delta = isFirst ? 12345 : 2; + update(index, isNegative ? -value : value, delta, ret, done); + } + + // Pattern '.Y' + for (byte y : allDigits()) { + int index = hash((byte) '.', y); + // Shouldn't occur in Second + int value = isFirst ? 10 * (y - '0') : 12345; + int delta = isFirst ? 2 : 12345; + update(index, isNegative ? -value : value, delta, ret, done); + } + + // Pattern 'nZ' + for (byte z : allDigits()) { + int index = hash((byte) '\n', z); + // shouldn't occur in First + int value = isFirst ? 12345 : (z - '0'); + int delta = isFirst ? 12345 : 1; + update(index, isNegative ? -value : value, delta, ret, done); + } + + if (!isFirst) { + // Adjust the deltas to reflect how much input needs to be consumed + // need to consume the newline and any - sign in front + for (int i = 0; i < ret.length; i++) { + ret[i] += (isNegative ? 1 : 0) /* for - sign */ + 1 /* for new line */; + } + } + return ret; + } + + private static void update(int index, int value, int delta, int[] ret, boolean[] done) { + index &= 255; + Checks.checkArg(!done[index]); // just a sanity check that our hashing is indeed reversible + ret[index] = (value << 3) | delta; + done[index] = true; + } + } + + static class Tracing { + + private static final Map knownWorkThreadEvents; + private static long startTime; + + static { + // Maintain the ordering to be chronological in execution + // Map.of(..) screws up ordering + knownWorkThreadEvents = new LinkedHashMap<>(); + for (String id : List.of("Shard", "Intermediate Cleaner", "Ending Cleaner", "Buffer Creation")) { + knownWorkThreadEvents.put(id, new ThreadTimingsArray(id, 1 << 10)); + } + } + + static void analyzeWorkThreads(int nThreads) { + for (ThreadTimingsArray array : knownWorkThreadEvents.values()) { + errPrint(array.analyze(nThreads)); + } + } + + static void recordAppStart() { + startTime = System.nanoTime(); + printEvent("Start time", startTime); + } + + static void recordEvent(String event) { + printEvent(event, System.nanoTime()); + } + + static void recordWorkEnd(String id, int threadId) { + knownWorkThreadEvents.get(id).recordEnd(threadId); + } + + static void recordWorkStart(String id, int threadId) { + knownWorkThreadEvents.get(id).recordStart(threadId); + } + + ///////////////////////////////////////////////////////////////////////////////////////////////// + + private static void errPrint(String message) { + System.err.println(message); + } + + private static void printEvent(String message, long nanoTime) { + errPrint(STR."\{message} = \{(nanoTime - startTime) / 1_000_000}ms"); + } + + public static class ThreadTimingsArray { + + private static String toString(long[] array) { + return Arrays.stream(array) + .map(x -> x < 0 ? -1 : x) + .mapToObj(x -> String.format("%6d", x)) + .collect(Collectors.joining(", ", "[ ", " ]")); + } + + private final String id; + private final long[] timestamps; + private boolean hasData = false; + + public ThreadTimingsArray(String id, int maxSize) { + this.timestamps = new long[maxSize]; + this.id = id; + } + + public String analyze(int nThreads) { + if (!hasData) { + return "%s has no thread timings data".formatted(id); + } + Checks.checkArg(nThreads <= timestamps.length); + long minDuration = Long.MAX_VALUE, maxDuration = Long.MIN_VALUE; + long minBegin = Long.MAX_VALUE, maxCompletion = Long.MIN_VALUE; + long maxBegin = Long.MIN_VALUE, minCompletion = Long.MAX_VALUE; + + long[] durationsMs = new long[nThreads]; + long[] completionsMs = new long[nThreads]; + long[] beginMs = new long[nThreads]; + for (int i = 0; i < nThreads; i++) { + long durationNs = timestamps[2 * i + 1] - timestamps[2 * i]; + durationsMs[i] = durationNs / 1_000_000; + completionsMs[i] = (timestamps[2 * i + 1] - startTime) / 1_000_000; + beginMs[i] = (timestamps[2 * i] - startTime) / 1_000_000; + + minDuration = Math.min(minDuration, durationNs); + maxDuration = Math.max(maxDuration, durationNs); + + minBegin = Math.min(minBegin, timestamps[2 * i] - startTime); + maxBegin = Math.max(maxBegin, timestamps[2 * i] - startTime); + + maxCompletion = Math.max(maxCompletion, timestamps[2 * i + 1] - startTime); + minCompletion = Math.min(minCompletion, timestamps[2 * i + 1] - startTime); + } + return STR.""" + ------------------------------------------------------------------------------------------- + \{id} Stats + ------------------------------------------------------------------------------------------- + Max duration = \{maxDuration / 1_000_000} ms + Min duration = \{minDuration / 1_000_000} ms + Timespan[max(end)-min(start)] = \{(maxCompletion - minBegin) / 1_000_000} ms [\{maxCompletion / 1_000_000} - \{minBegin / 1_000_000} ] + Completion Timespan[max(end)-min(end)] = \{(maxCompletion - minCompletion) / 1_000_000} ms + Begin Timespan[max(begin)-min(begin)] = \{(maxBegin - minBegin) / 1_000_000} ms + Average Duration = \{Arrays.stream(durationsMs) + .average() + .getAsDouble()} ms + Durations = \{toString(durationsMs)} ms + Begin Timestamps = \{toString(beginMs)} ms + Completion Timestamps = \{toString(completionsMs)} ms + """; + } + + public void recordEnd(int idx) { + timestamps[2 * idx + 1] = System.nanoTime(); + hasData = true; + } + + public void recordStart(int idx) { + timestamps[2 * idx] = System.nanoTime(); + hasData = true; + } + } + } + + static class Unsafely { + + private static final Unsafe unsafe = getUnsafe(); + + public static long allocateZeroedCacheLineAligned(int size) { + long address = unsafe.allocateMemory(size + 63); + unsafe.setMemory(address, size + 63, (byte) 0); + return (address + 63) & ~63; + } + + public static void copyMemory(long srcAddress, long destAddress, long byteCount) { + unsafe.copyMemory(srcAddress, destAddress, byteCount); + } + + public static boolean matches(long srcAddr, long destAddress, int len) { + if (len < 8) { + return (readLong(srcAddr) & ~(-1L << (len << 3))) == (readLong(destAddress) & ~(-1L << (len << 3))); + } + if (readLong(srcAddr) != readLong(destAddress)) { + return false; + } + len -= 8; + + if (len < 8) { + return (readLong(srcAddr + 8) & ~(-1L << (len << 3))) == (readLong(destAddress + 8) & ~(-1L << (len << 3))); + } + if (readLong(srcAddr + 8) != readLong(destAddress + 8)) { + return false; + } + len -= 8; + srcAddr += 16; + destAddress += 16; + + int idx = 0; + for (; idx < (len & ~7); idx += 8) { + if (Unsafely.readLong(srcAddr + idx) != Unsafely.readLong(destAddress + idx)) { + return false; + } + } + + if (idx < (len & ~3)) { + if (Unsafely.readInt(srcAddr + idx) != Unsafely.readInt(destAddress + idx)) { + return false; + } + idx += 4; + } + + if (idx < (len & ~1)) { + if (Unsafely.readShort(srcAddr + idx) != Unsafely.readShort(destAddress + idx)) { + return false; + } + idx += 2; + } + + return idx >= len || Unsafely.readByte(srcAddr + idx) == Unsafely.readByte(destAddress + idx); + } + + public static byte readByte(long address) { + return unsafe.getByte(address); + } + + public static int readInt(long address) { + return unsafe.getInt(address); + } + + public static long readLong(long address) { + return unsafe.getLong(address); + } + + public static short readShort(long address) { + return unsafe.getShort(address); + } + + public static void setByte(long address, byte len) { + unsafe.putByte(address, len); + } + + public static void setInt(long address, int value) { + unsafe.putInt(address, value); + } + + public static void setShort(long address, short len) { + unsafe.putShort(address, len); + } + + private static Unsafe getUnsafe() { + try { + Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe"); + unsafeField.setAccessible(true); + return (Unsafe) unsafeField.get(null); + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_xpmatteo.java b/src/main/java/dev/morling/onebrc/CalculateAverage_xpmatteo.java new file mode 100644 index 000000000..94904ffb4 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_xpmatteo.java @@ -0,0 +1,261 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.TreeMap; +import java.util.stream.Collectors; + +@SuppressWarnings({ "ReassignedVariable", "StatementWithEmptyBody" }) +public class CalculateAverage_xpmatteo { + + private static final String FILE = "./measurements.txt"; + + public static void main(String[] args) throws IOException, InterruptedException { + var fileName = dataFileName(args); + + try ( + var file = new RandomAccessFile(new File(fileName), "r"); + var channel = file.getChannel()) { + var numCpus = Runtime.getRuntime().availableProcessors(); + var threads = split(channel, numCpus).stream() + .map(Worker::new) + .toList(); + threads.forEach(Thread::start); + for (Worker thread : threads) { + thread.join(); + } + var results = threads.stream().map(Worker::getResults) + .reduce(CalculateAverage_xpmatteo::merge) + .orElseThrow(); + printCities(results); + } + } + + public static class Worker extends Thread { + private final ByteBuffer buffer; + private Results results; + + public Worker(ByteBuffer buffer) { + this.buffer = buffer; + } + + @Override + public void run() { + this.results = parseData(this.buffer); + } + + public Results getResults() { + return results; + } + } + + protected static List split(FileChannel channel, int numCpus) throws IOException { + if (channel.size() < 10_000) { + return List.of(channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size())); + } + + long[] increments = new long[numCpus + 1]; + for (int i = 0; i < numCpus; i++) { + increments[i] = i * channel.size() / numCpus; + // adjust the increments so that they start on the beginning of a city + while (increments[i] > 0 && byteAt(channel, increments[i] - 1) != '\n') { + increments[i]--; + } + } + increments[numCpus] = channel.size(); + + List result = new ArrayList<>(numCpus); + for (int i = 0; i < numCpus; i++) { + long from = increments[i]; + long to = increments[i + 1]; + result.add(channel.map(FileChannel.MapMode.READ_ONLY, from, to - from)); + } + return result; + } + + private static byte byteAt(FileChannel channel, long offset) throws IOException { + ByteBuffer buf = ByteBuffer.allocate(1); + channel.position(offset); + channel.read(buf); + buf.flip(); + var bytes = new byte[1]; + buf.get(bytes); + return bytes[0]; + } + + public static String dataFileName(String[] args) { + if (args.length == 1) { + return args[0]; + } + return FILE; + } + + protected static byte[] readAllData(String fileName) throws IOException { + return Files.readAllBytes(Path.of(fileName)); + } + + protected static ByteBuffer memoryMap(String fileName) throws IOException { + try (RandomAccessFile file = new RandomAccessFile(new File(fileName), "r")) { + // Get file channel in read-only mode + FileChannel fileChannel = file.getChannel(); + + return fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size()); + } + } + + protected enum State { + PARSING_CITY_NAME, + SKIPPING_SEMICOLON, + PARSING_TEMPERATURE + } + + protected static Results parseData(ByteBuffer data) { + var results = new Results(); + var state = State.PARSING_CITY_NAME; + int cityStartOffset = 0, cityEndOffset = 0; + int temp = 0, sign = 0; + + for (int i = 0; i < data.limit(); i++) { + byte currentChar = data.get(); + if (state == State.PARSING_CITY_NAME && currentChar == ';') { + state = State.SKIPPING_SEMICOLON; + cityEndOffset = i; + } + else if (state == State.PARSING_CITY_NAME) { + // do nothing + } + else if (state == State.SKIPPING_SEMICOLON && currentChar == '-') { + state = State.PARSING_TEMPERATURE; + temp = 0; + sign = -1; + } + else if (state == State.SKIPPING_SEMICOLON && currentChar >= '0' && currentChar <= '9') { + state = State.PARSING_TEMPERATURE; + temp = currentChar - '0'; + sign = 1; + } + else if (state == State.PARSING_TEMPERATURE && currentChar >= '0' && currentChar <= '9') { + temp = temp * 10 + currentChar - '0'; + } + else if (state == State.PARSING_TEMPERATURE && currentChar == '.') { + // do nothing + } + else if (state == State.PARSING_TEMPERATURE && currentChar == '\n') { + byte[] bytes = new byte[cityEndOffset - cityStartOffset]; + data.get(cityStartOffset, bytes); + var cityName = new String(bytes); + accumulate(results, cityName, temp * sign); + state = State.PARSING_CITY_NAME; + cityStartOffset = i + 1; + } + } + + return results; + } + + private static void accumulate(Results results, String cityName, int tempTimesTen) { + var existing = results.get(cityName); + if (existing == null) { + results.put(cityName, new CityData(tempTimesTen, tempTimesTen, tempTimesTen, 1)); + } + else { + existing.min = Math.min(existing.min, tempTimesTen); + existing.sum = existing.sum + tempTimesTen; + existing.max = Math.max(existing.max, tempTimesTen); + existing.count++; + } + } + + protected static Results merge(Results a, Results b) { + for (var entry : b.entrySet()) { + CityData valueInA = a.get(entry.getKey()); + if (null == valueInA) { + a.put(entry.getKey(), entry.getValue()); + } + else { + var valueInB = entry.getValue(); + valueInA.min = Math.min(valueInA.min, valueInB.min); + valueInA.sum += valueInB.sum; + valueInA.max = Math.max(valueInA.max, valueInB.max); + valueInA.count += valueInB.count; + } + } + + return a; + } + + protected static class Results extends TreeMap { + + } + + protected static class CityData { + int min, sum, max, count; + + public CityData(int min, int sum, int max, int count) { + this.min = min; + this.sum = sum; + this.max = max; + this.count = count; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + CityData cityData = (CityData) o; + return min == cityData.min && sum == cityData.sum && max == cityData.max && count == cityData.count; + } + + @Override + public int hashCode() { + return Objects.hash(min, sum, max, count); + } + + @Override + public String toString() { + return STR."CityData{min=\{min}, sum=\{sum}, max=\{max}, count=\{count}\{'}'}"; + } + } + + protected static void printCities(Results cities) { + System.out.print("{"); + for (String city : cities.keySet()) { + CityData data = cities.get(city); + var min = data.min / 10.0; + var mean = (data.sum * 10.0 / data.count) / 100.0; + var max = data.max / 10.0; + System.out.printf( + "%s=%.1f/%.1f/%.1f, ", + city, + min, + mean, + max); + } + System.out.print("}"); + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_yavuztas.java b/src/main/java/dev/morling/onebrc/CalculateAverage_yavuztas.java index e33fe7e92..0e589a4b7 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_yavuztas.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_yavuztas.java @@ -17,15 +17,16 @@ import sun.misc.Unsafe; -import java.io.IOException; import java.lang.foreign.Arena; import java.lang.reflect.Field; -import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.TreeMap; import java.util.function.Consumer; @@ -35,8 +36,9 @@ public class CalculateAverage_yavuztas { private static final Unsafe UNSAFE = unsafe(); - // Tried all there: MappedByteBuffer, MemorySegment and Unsafe - // Accessing the memory using Unsafe is still the fastest in my experience + // I compared all three: MappedByteBuffer, MemorySegment and Unsafe. + // Accessing the memory using Unsafe is still the fastest in my experience. + // However, I would never use it in production, single programming error will crash your app. private static Unsafe unsafe() { try { final Field f = Unsafe.class.getDeclaredField("theUnsafe"); @@ -48,296 +50,419 @@ private static Unsafe unsafe() { } } - // Only one object, both for measurements and keys, less object creation in hotpots is always faster - static class Record { - - // keep memory starting address for each segment - // since we use Unsafe, this is enough to align and fetch the data - long segment; - int start; - int length; - int hash; + /** + * Extract bytes from a long + */ + private static long partial(long word, int length) { + final long mask = (~0L) << (length << 3); + return word & (~mask); + } - private int min = 1000; // calculations over int is faster than double, we convert to double in the end only once - private int max = -1000; + // Only one object, both for measurements and keys, less object creation in hotpots is always faster + private static final class Record { + + private final long start; // memory address of the underlying data + private final int length; + private final long word1; + private final long word2; + private final long wordLast; + private final int hash; + private Record next; // linked list to resolve hash collisions + + private int min; // calculations over int is faster than double, we convert to double in the end only once + private int max; private long sum; - private long count; + private int count; - public Record(long segment, int start, int length, int hash) { - this.segment = segment; + public Record(long start, int length, long word1, long word2, long wordLast, int hash, int temp) { this.start = start; this.length = length; + this.word1 = word1; + this.word2 = word2; + this.wordLast = wordLast; this.hash = hash; + this.min = temp; + this.max = temp; + this.sum = temp; + this.count = 1; } @Override public boolean equals(Object o) { final Record record = (Record) o; - return equals(record.segment, record.start, record.length, record.hash); + return equals(record.start, record.word1, record.word2, record.wordLast, record.length); } - /** - * Stateless equals, no Record object needed - */ - public boolean equals(long segment, int start, int length, int hash) { - if (this.length != length || this.hash != hash) - return false; + private static boolean notEquals(long address1, long address2, int step) { + return UNSAFE.getLong(address1 + step) != UNSAFE.getLong(address2 + step); + } - int i = 0; // bytes mismatch check - while (i < this.length - && UNSAFE.getByte(this.segment + this.start + i) == UNSAFE.getByte(segment + start + i)) { - i++; + private static boolean equalsComparingLongs(long start1, long start2, int length) { + // first shortcuts + if (length < 24) + return true; + if (length < 32) + return !notEquals(start1, start2, 16); + + int step = 24; // starting from 3rd long + length -= step; + while (length >= 8) { // scan longs + if (notEquals(start1, start2, step)) { + return false; + } + length -= 8; + step += 8; // 8 bytes } - return i == this.length; + return true; } - @Override - public int hashCode() { - return this.hash; + private boolean equals(long start, long word1, long word2, long last, int length) { + if (this.word1 != word1) + return false; + if (this.word2 != word2) + return false; + + // equals check is done by comparing longs instead of byte by byte check, this is faster + return equalsComparingLongs(this.start, start, length) && this.wordLast == last; } @Override public String toString() { final byte[] bytes = new byte[this.length]; - int i = 0; - while (i < this.length) { - bytes[i] = UNSAFE.getByte(this.segment + this.start + i++); - } - + UNSAFE.copyMemory(null, this.start, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, this.length); return new String(bytes, StandardCharsets.UTF_8); } - public Record collect(int temp) { - this.min = Math.min(this.min, temp); - this.max = Math.max(this.max, temp); + private void collect(int temp) { + if (temp < this.min) + this.min = temp; + if (temp > this.max) + this.max = temp; this.sum += temp; this.count++; - return this; } - public void merge(Record other) { - this.min = Math.min(this.min, other.min); - this.max = Math.max(this.max, other.max); - this.sum += other.sum; - this.count += other.count; + private void merge(Record that) { + if (that.min < this.min) + this.min = that.min; + if (that.max > this.max) + this.max = that.max; + this.sum += that.sum; + this.count += that.count; } - public String measurements() { + private String measurements() { // here is only executed once for each unique key, so StringBuilder creation doesn't harm final StringBuilder sb = new StringBuilder(14); - sb.append(this.min / 10.0); - sb.append("/"); - sb.append(round((this.sum / 10.0) / this.count)); - sb.append("/"); - sb.append(this.max / 10.0); + sb.append(round(this.min)).append("/"); + sb.append(round(1.0 * this.sum / this.count)).append("/"); + sb.append(round(this.max)); return sb.toString(); } } // Inspired by @spullara - customized hashmap on purpose - // The main difference is we hold only one array instead of two - static class RecordMap { + // The main difference is we hold only one array instead of two, fewer objects is faster + private static final class RecordMap { - static final int SIZE = 1 << 15; // 32k - bigger bucket size less collisions - static final int BITMASK = SIZE - 1; - Record[] keys = new Record[SIZE]; + // Bigger bucket size less collisions, but you have to find a sweet spot otherwise it is becoming slower. + // Also works good enough for 10K stations + private static final int SIZE = 1 << 14; // 16kb - enough for 10K + private static final int BITMASK = SIZE - 1; + private final Record[] keys = new Record[SIZE]; - static int hashBucket(int hash) { + // int collision; + + private boolean hasNoRecord(int index) { + return this.keys[index] == null; + } + + private Record getRecord(int index) { + return this.keys[index]; + } + + private static int hashBucket(int hash) { hash = hash ^ (hash >>> 16); // naive bit spreading but surprisingly decreases collision :) return hash & BITMASK; // fast modulo, to find bucket } - void putAndCollect(long segment, int start, int length, int hash, int temp) { - int bucket = hashBucket(hash); - Record existing = this.keys[bucket]; - if (existing == null) { - this.keys[bucket] = new Record(segment, start, length, hash) - .collect(temp); + private void putAndCollect(int hash, int temp, long start, int length, long word1, long word2, long wordLast) { + final int bucket = hashBucket(hash); + if (hasNoRecord(bucket)) { + this.keys[bucket] = new Record(start, length, word1, word2, wordLast, hash, temp); return; } - if (!existing.equals(segment, start, length, hash)) { - // collision, linear probing to find a slot - while ((existing = this.keys[++bucket & BITMASK]) != null && !existing.equals(segment, start, length, hash)) { - // can be stuck here if all the buckets are full :( - // However, since the data set is max 10K (unique) this shouldn't happen - // So, I'm happily leave here branchless :) - } - if (existing == null) { - this.keys[bucket & BITMASK] = new Record(segment, start, length, hash) - .collect(temp); - return; - } + Record existing = getRecord(bucket); + if (existing.equals(start, word1, word2, wordLast, length)) { existing.collect(temp); + return; } - else { - existing.collect(temp); + + // collision++; + // find possible slot by scanning the slot linked list + while (existing.next != null) { + if (existing.next.equals(start, word1, word2, wordLast, length)) { + existing.next.collect(temp); + return; + } + existing = existing.next; // go on to next + // collision++; } + existing.next = new Record(start, length, word1, word2, wordLast, hash, temp); } - void putOrMerge(Record key) { - int bucket = hashBucket(key.hash); - Record existing = this.keys[bucket]; - if (existing == null) { + private void putOrMerge(Record key) { + final int bucket = hashBucket(key.hash); + if (hasNoRecord(bucket)) { + key.next = null; this.keys[bucket] = key; return; } - if (!existing.equals(key)) { - // collision, linear probing to find a slot - while ((existing = this.keys[++bucket & BITMASK]) != null && !existing.equals(key)) { - // can be stuck here if all the buckets are full :( - // However, since the data set is max 10K (unique keys) this shouldn't happen - // So, I'm happily leave here branchless :) - } - if (existing == null) { - this.keys[bucket & BITMASK] = key; - return; - } + Record existing = getRecord(bucket); + if (existing.equals(key)) { existing.merge(key); + return; } - else { - existing.merge(key); + + // collision++; + // find possible slot by scanning the slot linked list + while (existing.next != null) { + if (existing.next.equals(key)) { + existing.next.merge(key); + return; + } + existing = existing.next; // go on to next + // collision++; } + key.next = null; + existing.next = key; } - void forEach(Consumer consumer) { + private void forEach(Consumer consumer) { int pos = 0; Record key; - while (pos < this.keys.length) { + while (pos < SIZE) { if ((key = this.keys[pos++]) == null) { continue; } + Record next = key.next; consumer.accept(key); + while (next != null) { // also traverse the records in the collision list + final Record tmp = next.next; + consumer.accept(next); + next = tmp; + } } } - void merge(RecordMap other) { + private void merge(RecordMap other) { other.forEach(this::putOrMerge); } } // One actor for one thread, no synchronization - static class RegionActor { - - final FileChannel channel; - final long startPos; - final int size; - final RecordMap map = new RecordMap(); - long segmentAddress; - int position; - Thread runner; // each actor has its own thread - - public RegionActor(FileChannel channel, long startPos, int size) { - this.channel = channel; + private static final class RegionActor extends Thread { + + private final long startPos; // start of region memory address + private final int size; + + private final RecordMap map = new RecordMap(); + + public RegionActor(long startPos, int size) { this.startPos = startPos; this.size = size; } - void accumulate() { - this.runner = new Thread(() -> { - try { - // get the segment memory address, this is the only thing we need for Unsafe - this.segmentAddress = this.channel.map(FileChannel.MapMode.READ_ONLY, this.startPos, this.size, Arena.global()).address(); - } - catch (IOException e) { - // no-op - skip intentionally, no handling for the purpose of this challenge - } - - int start; - int keyHash; - int length; - while (this.position < this.size) { - byte b; - start = this.position; // save line start position - keyHash = UNSAFE.getByte(this.segmentAddress + this.position++); // first byte is guaranteed not to be ';' - length = 1; // min key length - while ((b = UNSAFE.getByte(this.segmentAddress + this.position++)) != ';') { // read until semicolon - keyHash = calculateHash(keyHash, b); // calculate key hash ahead, eleminates one more loop later - length++; - } + private static long getWord(long address) { + return UNSAFE.getLong(address); + } - final int temp = readTemperature(); - this.map.putAndCollect(this.segmentAddress, start, length, keyHash, temp); + // hasvalue & haszero + // adapted from https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord + private static long hasSemicolon(long word) { + // semicolon pattern + final long hasVal = word ^ 0x3B3B3B3B3B3B3B3BL; // hasvalue + return ((hasVal - 0x0101010101010101L) & ~hasVal & 0x8080808080808080L); // haszero + } - this.position++; // skip linebreak - } - }); - this.runner.start(); + private static int semicolonPos(long hasVal) { + return Long.numberOfTrailingZeros(hasVal) >>> 3; } - static int calculateHash(int hash, int b) { - return 31 * hash + b; + private static int decimalPos(long numberWord) { + return Long.numberOfTrailingZeros(~numberWord & 0x10101000); } - // 1. Inspired by @yemreinci - Reading temparature value without Double.parse - // 2. Inspired by @obourgain - Fetching first 4 bytes ahead, then masking - int readTemperature() { - int temp = 0; - // read 4 bytes ahead - final int first4 = UNSAFE.getInt(this.segmentAddress + this.position); - this.position += 3; + private static final int MAX_INNER_LOOP_SIZE = 11; - final byte b1 = (byte) first4; // first byte - final byte b2 = (byte) ((first4 >> 8) & 0xFF); // second byte - final byte b3 = (byte) ((first4 >> 16) & 0xFF); // third byte - if (b1 == '-') { - if (b3 == '.') { - temp -= 10 * (b2 - '0') + (byte) ((first4 >> 24) & 0xFF) - '0'; // fourth byte - this.position++; - } - else { - this.position++; // skip dot - temp -= 100 * (b2 - '0') + 10 * (b3 - '0') + UNSAFE.getByte(this.segmentAddress + this.position++) - '0'; // fifth byte - } - } - else { - if (b2 == '.') { - temp = 10 * (b1 - '0') + b3 - '0'; + @Override + public void run() { + long pointer = this.startPos; + final long size = pointer + this.size; + while (pointer < size) { // line start + long hash = 0; // reset hash + long s; // semicolon check word + final int pos; // semicolon position + long word1 = getWord(pointer); + if ((s = hasSemicolon(word1)) != 0) { + pos = semicolonPos(s); + // read temparature + final long numberWord = getWord(pointer + pos + 1); + final int decimalPos = decimalPos(numberWord); + final int temp = convertIntoNumber(decimalPos, numberWord); + + word1 = partial(word1, pos); // last word + this.map.putAndCollect(completeHash(hash, word1), temp, pointer, pos, word1, 0, 0); + + pointer += pos + (decimalPos >>> 3) + 4; } else { - temp = 100 * (b1 - '0') + 10 * (b2 - '0') + (byte) ((first4 >> 24) & 0xFF) - '0'; // fourth byte - this.position++; + long word2 = getWord(pointer + 8); + if ((s = hasSemicolon(word2)) != 0) { + pos = semicolonPos(s); + // read temparature + final int length = pos + 8; + final long numberWord = getWord(pointer + length + 1); + final int decimalPos = decimalPos(numberWord); + final int temp = convertIntoNumber(decimalPos, numberWord); + + word2 = partial(word2, pos); // last word + this.map.putAndCollect(completeHash(hash, word1, word2), temp, pointer, length, word1, word2, 0); + + pointer += length + (decimalPos >>> 3) + 4; // seek to the line end + } + else { + long word = 0; + int length = 16; + hash = appendHash(hash, word1, word2); + // Let the compiler know the loop size ahead + // Then it's automatically unrolled + // Max key length is 13 longs, 2 we've read before, 11 left + for (int i = 0; i < MAX_INNER_LOOP_SIZE; i++) { + if ((s = hasSemicolon((word = getWord(pointer + length)))) != 0) { + break; + } + hash = appendHash(hash, word); + length += 8; + } + + pos = semicolonPos(s); + length += pos; + // read temparature + final long numberWord = getWord(pointer + length + 1); + final int decimalPos = decimalPos(numberWord); + final int temp = convertIntoNumber(decimalPos, numberWord); + + word = partial(word, pos); // last word + this.map.putAndCollect(completeHash(hash, word), temp, pointer, length, word1, word2, word); + + pointer += length + (decimalPos >>> 3) + 4; // seek to the line end + } } } + } + + // Hashes are calculated by a Mersenne Prime (1 << 7) -1 + // This is faster than multiplication in some machines + private static long appendHash(long hash, long word) { + return (hash << 7) - hash + word; + } + + private static long appendHash(long hash, long word1, long word2) { + hash = (hash << 7) - hash + word1; + return (hash << 7) - hash + word2; + } + + private static int completeHash(long hash, long partial) { + hash = (hash << 7) - hash + partial; + return (int) (hash ^ (hash >>> 25)); + } + + private static int completeHash(long hash, long word1, long word2) { + hash = (hash << 7) - hash + word1; + hash = (hash << 7) - hash + word2; + return (int) hash ^ (int) (hash >>> 25); + } - return temp; + // Credits to @merrykitty. Magical solution to parse temparature values branchless! + // Taken as without modification, comments belong to @merrykitty + private static int convertIntoNumber(int decimalSepPos, long numberWord) { + final int shift = 28 - decimalSepPos; + // signed is -1 if negative, 0 otherwise + final long signed = (~numberWord << 59) >> 63; + final long designMask = ~(signed & 0xFF); + // Align the number to a specific position and transform the ascii code + // to actual digit value in each byte + final long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; + // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) + // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = + // 0x000000UU00TTHH00 + + // 0x00UU00TTHH000000 * 10 + + // 0xUU00TTHH00000000 * 100 + // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400 + // This results in our value lies in the bit 32 to 41 of this product + // That was close :) + final long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; + final long value = (absValue ^ signed) - signed; + return (int) value; } /** * blocks until the map is fully collected */ - RecordMap get() throws InterruptedException { - this.runner.join(); + private RecordMap get() throws InterruptedException { + join(); return this.map; } } private static double round(double value) { - return Math.round(value * 10.0) / 10.0; + return Math.round(value) / 10.0; } /** * Scans the given buffer to the left */ - private static long findClosestLineEnd(long start, int size, FileChannel channel) throws IOException { - final long position = start + size; - final long left = Math.max(position - 101, 0); - final ByteBuffer buffer = ByteBuffer.allocate(101); // enough size to find at least one '\n' - if (channel.read(buffer.clear(), left) != -1) { - int bufferPos = buffer.position() - 1; - while (buffer.get(bufferPos) != '\n') { - bufferPos--; - size--; - } + private static long findClosestLineEnd(long start, int size) { + long position = start + size; + while (UNSAFE.getByte(--position) != '\n') { + // read until a linebreak + size--; } return size; } - public static void main(String[] args) throws IOException, InterruptedException { + private static boolean isWorkerProcess(String[] args) { + return Arrays.asList(args).contains("--worker"); + } + + private static void runAsWorker() throws Exception { + final ProcessHandle.Info info = ProcessHandle.current().info(); + final List commands = new ArrayList<>(); + info.command().ifPresent(commands::add); + info.arguments().ifPresent(args -> commands.addAll(Arrays.asList(args))); + commands.add("--worker"); + + new ProcessBuilder() + .command(commands) + .start() + .getInputStream() + .transferTo(System.out); + } + + public static void main(String[] args) throws Exception { - var concurrency = Runtime.getRuntime().availableProcessors(); + // Dased on @thomaswue's idea, to cut unmapping delay. + // Strangely, unmapping delay doesn't occur on macOS/M1 however in Linux/AMD it's substantial - ~200ms + if (!isWorkerProcess(args)) { + runAsWorker(); + return; + } + + var concurrency = 2 * Runtime.getRuntime().availableProcessors(); final long fileSize = Files.size(FILE); long regionSize = fileSize / concurrency; @@ -353,30 +478,36 @@ public static void main(String[] args) throws IOException, InterruptedException long startPos = 0; final FileChannel channel = (FileChannel) Files.newByteChannel(FILE, StandardOpenOption.READ); + // get the memory address, this is the only thing we need for Unsafe + final long memoryAddress = channel.map(FileChannel.MapMode.READ_ONLY, startPos, fileSize, Arena.global()).address(); + final RegionActor[] actors = new RegionActor[concurrency]; for (int i = 0; i < concurrency; i++) { // calculate boundaries long maxSize = (startPos + regionSize > fileSize) ? fileSize - startPos : regionSize; // shift position to back until we find a linebreak - maxSize = findClosestLineEnd(startPos, (int) maxSize, channel); + maxSize = findClosestLineEnd(memoryAddress + startPos, (int) maxSize); - final RegionActor region = (actors[i] = new RegionActor(channel, startPos, (int) maxSize)); - region.accumulate(); + final RegionActor region = (actors[i] = new RegionActor(memoryAddress + startPos, (int) maxSize)); + region.start(); // start processing startPos += maxSize; } - final RecordMap output = new RecordMap(); // output to merge all regions + final RecordMap output = new RecordMap(); // output to merge all records for (RegionActor actor : actors) { final RecordMap partial = actor.get(); // blocks until get the result output.merge(partial); + // System.out.println("collisions: " + partial.collision); } // sort and print the result final TreeMap sorted = new TreeMap<>(); - output.forEach(key -> sorted.put(key.toString(), key.measurements())); + output.forEach(key -> { + sorted.put(key.toString(), key.measurements()); + }); System.out.println(sorted); - + System.out.close(); // closing the stream will trigger the main process to pick up the output early } } diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_yonatang.java b/src/main/java/dev/morling/onebrc/CalculateAverage_yonatang.java new file mode 100644 index 000000000..be02e5bd3 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_yonatang.java @@ -0,0 +1,320 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.File; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.Map; +import java.util.TreeMap; + +public class CalculateAverage_yonatang { + private static final String FILE = "./measurements.txt"; + + private static final int DICT_OFFSET_STATION = 2; + private static final int DICT_OFFSET_SUM = 1; + private static final int DICT_SIZE = 15000; + private static final int DICT_STATION_RECORD_SIZE = 13; + private static final int DICT_RECORD_SIZE = DICT_OFFSET_STATION + DICT_STATION_RECORD_SIZE; + private static final int DICT_SIZE_BYTES = DICT_SIZE * DICT_RECORD_SIZE; + private static final long[] DICT_ZERO_RECORD = new long[DICT_RECORD_SIZE]; + private static final long DICT_BASELINE_MEASURES = ((long) Short.MAX_VALUE & 0xFFFF) | (((long) Short.MIN_VALUE & 0xFFFF) << 16); + + public static class HashTable { + + // Continuous array of [key, min, max, count, sum], which will be more CPU cache friendly. + private final long[] data = new long[DICT_SIZE_BYTES]; + + public HashTable() { + for (int i = 0; i < DICT_SIZE_BYTES; i += DICT_RECORD_SIZE) { + data[i] = DICT_BASELINE_MEASURES; + } + } + + private int getIndex(long[] station) { + long key = 0; + short len = (short) (station[0] & 0xFF); + int longs = ((len + 1) / 8) + 1; + for (int i = 0; i < longs; i++) { + key = key ^ station[i]; + } + int idx = Math.abs((int) (key % DICT_SIZE)) * DICT_RECORD_SIZE; + + while (true) { + if (data[idx] == DICT_BASELINE_MEASURES) { + break; + } + if (Arrays.equals(station, 0, longs, + data, + idx + DICT_OFFSET_STATION, idx + DICT_OFFSET_STATION + longs)) { + break; + } + idx += DICT_RECORD_SIZE; + if (idx >= DICT_SIZE_BYTES) { + idx = 0; + } + } + return idx; + } + + private void addRawMeasurementAgg(long[] title, long measurements, long sum) { + int idx = getIndex(title); + short currentMin = (short) (data[idx] & 0xFFFF); + short currentMax = (short) ((data[idx] >> 16) & 0xFFFF); + int currentCount = (int) (data[idx] >> 32); + + short thisMin = (short) (measurements & 0xFFFF); + short thisMax = (short) ((measurements >> 16) & 0xFFFF); + int thisCount = (int) (measurements >> 32); + + thisMin = (short) Math.min(thisMin, currentMin); + thisMax = (short) Math.max(thisMax, currentMax); + thisCount += currentCount; + + data[idx] = ((long) thisMin & 0xFFFF) | (((long) thisMax & 0xFFFF) << 16) | (((long) thisCount) << 32); + + data[idx + DICT_OFFSET_SUM] += sum; + System.arraycopy(title, 0, data, idx + DICT_OFFSET_STATION, DICT_STATION_RECORD_SIZE); + } + + public TreeMap toMap() { + TreeMap finalMap = new TreeMap<>(); + byte[] bytes = new byte[128]; + ByteBuffer bb = ByteBuffer.allocate(136); + bb.order(ByteOrder.nativeOrder()); + for (int i = 0; i < DICT_SIZE_BYTES; i += DICT_RECORD_SIZE) { + if (data[i] == DICT_BASELINE_MEASURES) + continue; + + short min = (short) (data[i] & 0xFFFF); + short max = (short) ((data[i] >> 16) & 0xFFFF); + int count = (int) (data[i] >> 32); + long sum = data[i + DICT_OFFSET_SUM]; + for (int j = 0; j < DICT_STATION_RECORD_SIZE; j++) { + bb.putLong(data[i + DICT_OFFSET_STATION + j]); + } + bb.flip(); + byte len = bb.get(); + bb.get(1, bytes, 0, len); + bb.clear(); + String station = new String(bytes, 0, len, Charset.defaultCharset()); + finalMap.put(station, new ResultRow(min / 10.0, (sum / 10.0) / count, max / 10.0)); + + } + return finalMap; + } + + public void addMeasurement(long[] title, short temp) { + int idx = getIndex(title); + short min = (short) (data[idx] & 0xFFFF); + short max = (short) ((data[idx] >> 16) & 0xFFFF); + int count = (int) (data[idx] >> 32); + min = (short) Math.min(min, temp); + max = (short) Math.max(max, temp); + count += 1; + + data[idx] = ((long) min & 0xFFFF) | (((long) max & 0xFFFF) << 16) | (((long) count) << 32); + data[idx + DICT_OFFSET_SUM] += temp; + System.arraycopy(title, 0, data, idx + DICT_OFFSET_STATION, DICT_STATION_RECORD_SIZE); + } + + public void mergeInto(HashTable other) { + long[] title = new long[DICT_STATION_RECORD_SIZE]; + for (int i = 0; i < DICT_SIZE_BYTES; i += DICT_RECORD_SIZE) { + if (data[i] == DICT_BASELINE_MEASURES) + continue; + System.arraycopy(data, i + DICT_OFFSET_STATION, title, 0, DICT_STATION_RECORD_SIZE); + other.addRawMeasurementAgg(title, data[i], data[i + DICT_OFFSET_SUM]); + } + } + + } + + private static class ResultRow { + final double min; + final double mean; + final double max; + + ResultRow(double min, double mean, double max) { + this.min = min; + this.mean = mean; + this.max = max; + } + + public String toString() { + return round(min) + "/" + round(mean) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + public static boolean parseStation(MappedByteBuffer byteBuffer, ByteBuffer tempBb, long[] station) { + System.arraycopy(DICT_ZERO_RECORD, 0, station, 0, DICT_STATION_RECORD_SIZE); + byte len = 1; + boolean valid = false; + tempBb.clear(); + tempBb.put((byte) 0); + while (byteBuffer.hasRemaining()) { + byte ch = byteBuffer.get(); + if (ch == '\n') { + continue; + } + if (ch == ';') { + valid = true; + break; + } + tempBb.put(ch); + // long theNew = ((long) ch) << (len * 8); + // stationId[0] = stationId[0] ^ theNew; + // int arrIdx = len / 8; + // station[arrIdx] = station[arrIdx] ^ theNew; + len++; + } + tempBb.put(0, (byte) (len - 1)); + if (!valid) { + return false; + } + tempBb.position(0); + tempBb.asLongBuffer().get(station); + + int pivotIdx = (len) / 8; + long pivotBits = (len % 8) * 8; + long pivotMask = (1L << pivotBits) - 1; + station[pivotIdx] = station[pivotIdx] & pivotMask; + return true; + } + + public static short parseShort(MappedByteBuffer byteBuffer) { + boolean valid = false; + boolean negative = false; + int num = 0; + while (byteBuffer.hasRemaining()) { + byte ch = byteBuffer.get(); + if (ch == '\n') { + valid = true; + break; + } + if (ch == '-') { + negative = true; + } + else if (ch == '.') { + // noop + } + else { + num = (num * 10 + (ch - '0')); + } + } + if (!valid) { + return Short.MIN_VALUE; + } + + return (short) (negative ? -num : num); + } + + private static final int MARGIN = 130; + + private static void processChunk(FileChannel fc, int j, long chunkSize, HashTable[] maps, boolean isLast) { + try { + HashTable agg = new HashTable(); + maps[j] = agg; + long[] station = new long[DICT_STATION_RECORD_SIZE]; + ByteBuffer tempBb = ByteBuffer.allocate((DICT_STATION_RECORD_SIZE + 1) * Long.BYTES); + tempBb.order(ByteOrder.nativeOrder()); + + long startIdx = Math.max(j * chunkSize - MARGIN, 0); + int padding; + if (isLast) { + chunkSize = fc.size() - startIdx; + padding = 0; + } + else { + padding = j == 0 ? 0 : MARGIN; + } + if (chunkSize == 0) { + return; + } + MappedByteBuffer byteBuffer = fc.map(FileChannel.MapMode.READ_ONLY, startIdx, chunkSize + padding); + // search back for the actual start line, at \n + if (startIdx > 0) { + int i = MARGIN; + while (i > 0) { + byte ch = byteBuffer.get(i); + if (ch == '\n') { + break; + } + i--; + } + byteBuffer.position(i); + } + + while (byteBuffer.hasRemaining()) { + if (!parseStation(byteBuffer, tempBb, station)) { + continue; + } + short value = parseShort(byteBuffer); + if (value == Short.MIN_VALUE) { + continue; + } + agg.addMeasurement(station, value); + } + } + catch (Exception e) { + e.printStackTrace(); + } + } + + public static void main(String[] args) throws Exception { + // long start = System.nanoTime(); + + File f = new File(FILE); + try (RandomAccessFile raf = new RandomAccessFile(f, "r"); + FileChannel fc = raf.getChannel()) { + + int chunks = f.length() < 1_048_576 ? 1 : (Runtime.getRuntime().availableProcessors()); + + long chunkSize = f.length() / chunks; + + Thread[] threads = new Thread[chunks]; + HashTable totalAgg = new HashTable(); + HashTable[] maps = new HashTable[chunks]; + + for (int i = 0; i < chunks; i++) { + final int j = i; + Thread thread = new Thread(() -> processChunk(fc, j, chunkSize, maps, j == chunks - 1)); + threads[i] = thread; + thread.start(); + } + for (int i = 0; i < chunks; i++) { + threads[i].join(); + maps[i].mergeInto(totalAgg); + } + + Map finalMap = totalAgg.toMap(); + // long end = System.nanoTime(); + + System.out.println(finalMap); + // System.err.println("Total time: " + java.time.Duration.ofNanos(end - start).toMillis() + "ms"); + } + + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_yourwass.java b/src/main/java/dev/morling/onebrc/CalculateAverage_yourwass.java new file mode 100644 index 000000000..ad57b5004 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_yourwass.java @@ -0,0 +1,291 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.util.TreeMap; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.reflect.Field; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.nio.charset.StandardCharsets; +import java.nio.ByteOrder; +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; +import sun.misc.Unsafe; + +public class CalculateAverage_yourwass { + static final class Record { + private long cityAddr; + private long cityLength; + private int min; + private int max; + private int count; + private long sum; + + Record(final long cityAddr, final long cityLength) { + this.cityAddr = cityAddr; + this.cityLength = cityLength; + this.min = 1000; + this.max = -1000; + this.sum = 0; + this.count = 0; + } + + private Record merge(Record r) { + if (r.min < this.min) + this.min = r.min; + if (r.max > this.max) + this.max = r.max; + this.sum += r.sum; + this.count += r.count; + return this; + } + } + + private final static Lock _mutex = new ReentrantLock(true); + private final static TreeMap aggregateResults = new TreeMap<>(); + private static short lookupDecimal[]; + private static byte lookupFraction[]; + private static byte lookupDotPositive[]; + private static byte lookupDotNegative[]; + private static MemorySegment VAS; + private static final VectorSpecies SPECIES = ByteVector.SPECIES_PREFERRED; + private static final int MAXINDEX = (1 << 16) + 10000; // short hash + max allowed cities for collisions at the end :p + private static final String FILE = "measurements.txt"; + private static long unsafeResults; + private static int RECORDSIZE = 36; + private static final Unsafe UNSAFE = getUnsafe(); + + private static Unsafe getUnsafe() { + try { + final Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + Unsafe unsafe = (Unsafe) theUnsafe.get(null); + return unsafe; + } + catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + public static void main(String[] args) throws IOException, Throwable { + // prepare lookup tables + // the parsing reads two shorts after possible '-' + // first short, the Decimal part, can be N. or NN with N:[0..9] + // second short, the Fraction part, can be N\n or .N + lookupDecimal = new short[('9' << 8) + '9' + 1]; + lookupFraction = new byte[('9' << 8) + '.' + 1]; + lookupDotPositive = new byte[('9' << 8) + '.' + 1]; + lookupDotNegative = new byte[('9' << 8) + '.' + 1]; + for (short i = 0; i < 10; i++) { + final int ones = i * 10; + final int ix256 = i << 8; + // case N. i.e. single digit decimals: skip to 11824 = ('.'<<8)+'0' + lookupDecimal[11824 + i] = (short) ones; + for (short j = 1; j < 10; j++) { + // case NN i.e double digits decimals: skip to 12236 = ('0'<<8)+'0' + lookupDecimal[12336 + ix256 + j] = (short) (j * 100 + ones); + } + // case N\n skip to 2608 = ('\n'<<8)+'0' + lookupFraction[2608 + i] = (byte) i; + lookupDotPositive[2608 + i] = 4; + lookupDotNegative[2608 + i] = 5; + // case .N skip to 12334 = ('0'<<8)+'.' + lookupFraction[12334 + ix256] = (byte) i; + lookupDotPositive[12334 + ix256] = 5; + lookupDotNegative[12334 + ix256] = 6; + } + + // open file + final FileChannel fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ); + final long fileSize = fileChannel.size(); + final long mmapAddr = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()).address(); + // VAS: Virtual Address Space, as a MemorySegment upto and including the mmaped file. + // If the mmaped MemorySegment is used for Vector creation as is, then there are two problems: + // 1) fromMemorySegment takes an offset and not an address, so we have to do arithmetic + // this is solved by creating a MemorySegment from Address=0 + // 2) fromMemorySegment checks bounds for memory segment's size - Vector size + // this is solved by adding SPECIES.length() to the size of the segment, but + // XXX there lies the possibility for an out of bounds read at the end of file, which is not handled here. + VAS = MemorySegment.ofAddress(0).reinterpret(mmapAddr + fileSize + SPECIES.length()); + + // allocate memory for results + final int nThreads = Runtime.getRuntime().availableProcessors(); + unsafeResults = UNSAFE.allocateMemory(RECORDSIZE * MAXINDEX * nThreads); + UNSAFE.setMemory(unsafeResults, RECORDSIZE * MAXINDEX * nThreads, (byte) 0); + + // start and wait for threads to finish + Thread[] threadList = new Thread[nThreads]; + final long chunkSize = fileSize / nThreads; + for (int i = 0; i < nThreads; i++) { + final int threadIndex = i; + final long startAddr = mmapAddr + i * chunkSize; + final long endAddr = (i == nThreads - 1) ? mmapAddr + fileSize : mmapAddr + (i + 1) * chunkSize; + threadList[i] = new Thread(() -> threadMain(threadIndex, startAddr, endAddr, nThreads)); + threadList[i].start(); + } + for (int i = 0; i < nThreads; i++) + threadList[i].join(); + + // prepare string and print + StringBuilder sb = new StringBuilder(); + sb.append("{"); + for (var entry : aggregateResults.entrySet()) { + Record record = entry.getValue(); + float min = record.min; + min /= 10.f; + float max = record.max; + max /= 10.f; + double avg = Math.round((record.sum * 1.0) / record.count) / 10.; + sb.append(entry.getKey()).append("=").append(min).append("/").append(avg).append("/").append(max).append(", "); + } + int stringLength = sb.length(); + sb.setCharAt(stringLength - 2, '}'); + sb.setCharAt(stringLength - 1, '\n'); + System.out.print(sb.toString()); + System.out.close(); + } + + private static final boolean citiesDiffer(final long a, final long b, final long len) { + int part = 0; + for (; part < (len - 1) >> 3; part++) + if (UNSAFE.getLong(a + (part << 3)) != UNSAFE.getLong(b + (part << 3))) + return true; + if (((UNSAFE.getLong(a + (part << 3)) ^ (UNSAFE.getLong(b + (part << 3)))) << ((8 - (len & 7)) << 3)) != 0) + return true; + return false; + } + + private static void threadMain(int id, long startAddr, long endAddr, long nThreads) { + // snap to newlines + if (id != 0) + while (UNSAFE.getByte(startAddr++) != '\n') + ; + if (id != nThreads - 1) + while (UNSAFE.getByte(endAddr++) != '\n') + ; + + final long threadResults = unsafeResults + id * MAXINDEX * RECORDSIZE; + final Record[] results = new Record[MAXINDEX]; + final long VECTORBYTESIZE = SPECIES.length(); + final ByteOrder BYTEORDER = ByteOrder.nativeOrder(); + final ByteVector delim = ByteVector.broadcast(SPECIES, ';'); + long cityAddr = startAddr; + long ptr = 0; + while (cityAddr < endAddr) { + // parse city + ByteVector parsed = ByteVector.fromMemorySegment(SPECIES, VAS, cityAddr, BYTEORDER); + long mask = parsed.compare(VectorOperators.EQ, delim).toLong(); + while (mask == 0) { + ptr += VECTORBYTESIZE; + mask = ByteVector.fromMemorySegment(SPECIES, VAS, cityAddr + ptr, BYTEORDER).compare(VectorOperators.EQ, delim).toLong(); + } + final long cityLength = ptr + Long.numberOfTrailingZeros(mask); + final long tempAddr = cityAddr + cityLength + 1; + ptr = 0; + + // compute hash table index + int index; + if (cityLength > 1) + index = (UNSAFE.getByte(cityAddr) // mix the first, + ^ (UNSAFE.getByte(cityAddr + 2) << 4) // the third (even if it is the delimiter ';') + ^ (UNSAFE.getByte(tempAddr - 2) << 8) // and the last two bytes of each city's name + ^ (UNSAFE.getByte(tempAddr - 3) << 12)) + & 0xFFFF; + else + index = (UNSAFE.getByte(cityAddr) << 8) & 0xFF00; + // resolve collisions with linear probing + // use vector api here also, but only if city name fits in one vector length, for faster default case + long record = threadResults + index * RECORDSIZE; + long recordCityLength = UNSAFE.getLong(record); + if (cityLength <= VECTORBYTESIZE) { + while (recordCityLength > 0) { + if (cityLength == recordCityLength) { + long sameMask = ByteVector.fromMemorySegment(SPECIES, VAS, UNSAFE.getLong(record + 8), BYTEORDER) + .compare(VectorOperators.EQ, parsed).toLong(); + if (Long.numberOfTrailingZeros(~sameMask) >= cityLength) + break; + } + index++; + record = threadResults + index * RECORDSIZE; + recordCityLength = UNSAFE.getLong(record); + } + } + else { // slower normal case for city names with length > VECTORBYTESIZE + while (recordCityLength > 0 && (cityLength != recordCityLength || citiesDiffer(UNSAFE.getLong(record + 8), cityAddr, cityLength))) { + index++; + record = threadResults + index * RECORDSIZE; + recordCityLength = UNSAFE.getLong(record); + } + } + + // add record for new key + if (recordCityLength == 0) { + UNSAFE.putLong(record, cityLength); + UNSAFE.putLong(record + 8, cityAddr); + UNSAFE.putInt(record + 16, 1000); + UNSAFE.putInt(record + 20, -1000); + } + + // parse temp with lookup tables + int temp; + if (UNSAFE.getByte(tempAddr) == '-') { + temp = -lookupDecimal[UNSAFE.getShort(tempAddr + 1)] - lookupFraction[UNSAFE.getShort(tempAddr + 3)]; + cityAddr = tempAddr + lookupDotNegative[UNSAFE.getShort(tempAddr + 3)]; + } + else { + temp = lookupDecimal[UNSAFE.getShort(tempAddr)] + lookupFraction[UNSAFE.getShort(tempAddr + 2)]; + cityAddr = tempAddr + lookupDotPositive[UNSAFE.getShort(tempAddr + 2)]; + } + + // merge + if (temp < UNSAFE.getInt(record + 16)) + UNSAFE.putInt(record + 16, temp); + if (temp > UNSAFE.getInt(record + 20)) + UNSAFE.putInt(record + 20, temp); + UNSAFE.putLong(record + 24, UNSAFE.getLong(record + 24) + temp); + UNSAFE.putInt(record + 32, UNSAFE.getInt(record + 32) + 1); + } + + // create strings from raw data + // and aggregate results onto TreeMap + int idx = 0; + byte b[] = new byte[100]; + _mutex.lock(); + for (int i = 0; i < MAXINDEX; i++) { + if (UNSAFE.getLong(threadResults + i * RECORDSIZE) == 0) + continue; + final long recordAddress = threadResults + i * RECORDSIZE; + + results[idx] = new Record(UNSAFE.getLong(recordAddress + 8), UNSAFE.getLong(recordAddress)); + results[idx].min = UNSAFE.getInt(recordAddress + 16); + results[idx].max = UNSAFE.getInt(recordAddress + 20); + results[idx].sum = UNSAFE.getLong(recordAddress + 24); + results[idx].count = UNSAFE.getInt(recordAddress + 32); + UNSAFE.copyMemory(null, UNSAFE.getLong(recordAddress + 8), b, Unsafe.ARRAY_BYTE_BASE_OFFSET, UNSAFE.getLong(recordAddress)); + final Record record = results[idx]; + aggregateResults.compute(new String(b, 0, (int) results[idx].cityLength, StandardCharsets.UTF_8), (k, v) -> (v == null) ? record : v.merge(record)); + idx++; + } + _mutex.unlock(); + } +} diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java b/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java index cd4e3d74e..47974ce67 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_zerninv.java @@ -26,64 +26,14 @@ import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.*; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; public class CalculateAverage_zerninv { private static final String FILE = "./measurements.txt"; - private static final int MIN_FILE_SIZE = 1024 * 1024 * 16; - private static final char DELIMITER = ';'; - private static final char LINE_SEPARATOR = '\n'; - private static final char ZERO = '0'; - private static final char NINE = '9'; - private static final char MINUS = '-'; + private static final int CORES = Runtime.getRuntime().availableProcessors(); + private static final int CHUNK_SIZE = 1024 * 1024 * 32; private static final Unsafe UNSAFE = initUnsafe(); - public static void main(String[] args) throws IOException { - var results = new HashMap(); - try (var channel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { - var fileSize = channel.size(); - var memorySegment = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()); - long address = memorySegment.address(); - var cores = Runtime.getRuntime().availableProcessors(); - var minChunkSize = fileSize < MIN_FILE_SIZE ? fileSize : fileSize / cores; - var chunks = splitByChunks(address, address + fileSize, minChunkSize); - - var executor = Executors.newFixedThreadPool(cores); - List>> fResults = new ArrayList<>(); - for (int i = 1; i < chunks.size(); i++) { - final long prev = chunks.get(i - 1); - final long curr = chunks.get(i); - fResults.add(executor.submit(() -> calcForChunk(prev, curr))); - } - - fResults.forEach(f -> { - try { - f.get().forEach((key, value) -> { - var result = results.get(key); - if (result != null) { - result.merge(value); - } - else { - results.put(key, value); - } - }); - } - catch (InterruptedException | ExecutionException e) { - e.printStackTrace(); - } - }); - executor.shutdown(); - } - - var bos = new BufferedOutputStream(System.out); - bos.write(new TreeMap<>(results).toString().getBytes(StandardCharsets.UTF_8)); - bos.write('\n'); - bos.flush(); - } - private static Unsafe initUnsafe() { try { Field unsafe = Unsafe.class.getDeclaredField("theUnsafe"); @@ -95,152 +45,160 @@ private static Unsafe initUnsafe() { } } - private static List splitByChunks(long address, long end, long minChunkSize) { - List result = new ArrayList<>(); - result.add(address); - while (address < end) { - address += Math.min(end - address, minChunkSize); - while (address < end && UNSAFE.getByte(address++) != LINE_SEPARATOR) { - } - result.add(address); - } - return result; - } + public static void main(String[] args) throws IOException, InterruptedException { + try (var channel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { + var fileSize = channel.size(); + var minChunkSize = Math.min(fileSize, CHUNK_SIZE); + var segment = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()); - private static Map calcForChunk(long offset, long end) { - var results = new MeasurementContainer(); + var tasks = new TaskThread[CORES]; + for (int i = 0; i < tasks.length; i++) { + tasks[i] = new TaskThread((int) (fileSize / minChunkSize / CORES + 1)); + } - long cityOffset; - int hashCode, temperature, multiplier; - byte cityNameSize, b; + var chunks = splitByChunks(segment.address(), segment.address() + fileSize, minChunkSize); + for (int i = 0; i < chunks.size() - 1; i++) { + var task = tasks[i % tasks.length]; + task.addChunk(chunks.get(i), chunks.get(i + 1)); + } - while (offset < end) { - cityOffset = offset; - hashCode = 0; - while ((b = UNSAFE.getByte(offset++)) != DELIMITER) { - hashCode = hashCode * 31 + b; + for (var task : tasks) { + task.start(); } - cityNameSize = (byte) (offset - cityOffset - 1); - multiplier = 1; - temperature = UNSAFE.getByte(offset++) - ZERO; - if (temperature == MINUS - ZERO) { - multiplier = -1; - temperature = 0; + var results = new HashMap(); + for (var task : tasks) { + task.join(); + task.collectTo(results); } - while ((b = UNSAFE.getByte(offset++)) != LINE_SEPARATOR) { - if (b >= ZERO && b <= NINE) { - temperature = temperature * 10 + (b - ZERO); - } + + var bos = new BufferedOutputStream(System.out); + bos.write(new TreeMap<>(results).toString().getBytes(StandardCharsets.UTF_8)); + bos.write('\n'); + bos.flush(); + } + } + + private static List splitByChunks(long address, long end, long minChunkSize) { + // split by chunks + List result = new ArrayList<>((int) ((end - address) / minChunkSize + 1)); + result.add(address); + while (address < end) { + address += Math.min(end - address, minChunkSize); + while (address < end && UNSAFE.getByte(address++) != '\n') { } - results.put(cityOffset, cityNameSize, hashCode, (short) (temperature * multiplier)); + result.add(address); } - return results.toStringMap(); + return result; } - private static final class MeasurementAggregation { + private static final class TemperatureAggregation { private long sum; private int count; private short min; private short max; - public MeasurementAggregation(long sum, int count, short min, short max) { + public TemperatureAggregation(long sum, int count, short min, short max) { this.sum = sum; this.count = count; this.min = min; this.max = max; } - public void merge(MeasurementAggregation o) { - if (o == null) { - return; - } - sum += o.sum; - count += o.count; - min = min < o.min ? min : o.min; - max = max > o.max ? max : o.max; + public void merge(long sum, int count, short min, short max) { + this.sum += sum; + this.count += count; + this.min = this.min < min ? this.min : min; + this.max = this.max > max ? this.max : max; } @Override public String toString() { - return String.format("%.1f/%.1f/%.1f", min / 10d, sum / 10d / count, max / 10d); + return min / 10d + "/" + Math.round(sum / 1d / count) / 10d + "/" + max / 10d; } } private static final class MeasurementContainer { - private static final int SIZE = 1024 * 16; + private static final int SIZE = 1 << 17; - private static final int ENTRY_SIZE = 4 + 4 + 1 + 8 + 8 + 2 + 2; + private static final int ENTRY_SIZE = 4 + 4 + 8 + 1 + 8 + 8 + 2 + 2; private static final int COUNT_OFFSET = 0; private static final int HASH_OFFSET = 4; - private static final int SIZE_OFFSET = 8; - private static final int ADDRESS_OFFSET = 9; - private static final int SUM_OFFSET = 17; - private static final int MIN_OFFSET = 25; - private static final int MAX_OFFSET = 27; + private static final int LAST_BYTES_OFFSET = 8; + private static final int SIZE_OFFSET = 16; + private static final int ADDRESS_OFFSET = 17; + private static final int SUM_OFFSET = 25; + private static final int MIN_OFFSET = 33; + private static final int MAX_OFFSET = 35; private final long address; private MeasurementContainer() { address = UNSAFE.allocateMemory(ENTRY_SIZE * SIZE); UNSAFE.setMemory(address, ENTRY_SIZE * SIZE, (byte) 0); - for (long ptr = address; ptr < address + SIZE * ENTRY_SIZE; ptr += ENTRY_SIZE) { - UNSAFE.putShort(ptr + MIN_OFFSET, Short.MAX_VALUE); - UNSAFE.putShort(ptr + MAX_OFFSET, Short.MIN_VALUE); - } } - public void put(long address, byte size, int hash, short value) { + public void put(long address, byte size, int hash, long lastBytes, short value) { int idx = Math.abs(hash % SIZE); long ptr = this.address + idx * ENTRY_SIZE; int count; + boolean fastEqual; while ((count = UNSAFE.getInt(ptr + COUNT_OFFSET)) != 0) { - if (UNSAFE.getInt(ptr + HASH_OFFSET) == hash - && UNSAFE.getByte(ptr + SIZE_OFFSET) == size - && isEqual(UNSAFE.getLong(ptr + ADDRESS_OFFSET), address, size)) { - break; + fastEqual = UNSAFE.getInt(ptr + HASH_OFFSET) == hash && UNSAFE.getLong(ptr + LAST_BYTES_OFFSET) == lastBytes; + if (fastEqual && UNSAFE.getByte(ptr + SIZE_OFFSET) == size && isEqual(UNSAFE.getLong(ptr + ADDRESS_OFFSET), address, size - 8)) { + + UNSAFE.putInt(ptr + COUNT_OFFSET, count + 1); + UNSAFE.putLong(ptr + ADDRESS_OFFSET, address); + UNSAFE.putLong(ptr + SUM_OFFSET, UNSAFE.getLong(ptr + SUM_OFFSET) + value); + if (value < UNSAFE.getShort(ptr + MIN_OFFSET)) { + UNSAFE.putShort(ptr + MIN_OFFSET, value); + } + if (value > UNSAFE.getShort(ptr + MAX_OFFSET)) { + UNSAFE.putShort(ptr + MAX_OFFSET, value); + } + return; } idx = (idx + 1) % SIZE; ptr = this.address + idx * ENTRY_SIZE; } - UNSAFE.putInt(ptr + COUNT_OFFSET, count + 1); + UNSAFE.putInt(ptr + COUNT_OFFSET, 1); UNSAFE.putInt(ptr + HASH_OFFSET, hash); + UNSAFE.putLong(ptr + LAST_BYTES_OFFSET, lastBytes); UNSAFE.putByte(ptr + SIZE_OFFSET, size); UNSAFE.putLong(ptr + ADDRESS_OFFSET, address); - UNSAFE.putLong(ptr + SUM_OFFSET, UNSAFE.getLong(ptr + SUM_OFFSET) + value); - if (value < UNSAFE.getShort(ptr + MIN_OFFSET)) { - UNSAFE.putShort(ptr + MIN_OFFSET, value); - } - if (value > UNSAFE.getShort(ptr + MAX_OFFSET)) { - UNSAFE.putShort(ptr + MAX_OFFSET, value); - } + UNSAFE.putLong(ptr + SUM_OFFSET, value); + UNSAFE.putShort(ptr + MIN_OFFSET, value); + UNSAFE.putShort(ptr + MAX_OFFSET, value); } - public Map toStringMap() { - var result = new HashMap(); + public void collectTo(Map results) { int count; for (int i = 0; i < SIZE; i++) { long ptr = this.address + i * ENTRY_SIZE; count = UNSAFE.getInt(ptr + COUNT_OFFSET); if (count != 0) { - var measurements = new MeasurementAggregation( - UNSAFE.getLong(ptr + SUM_OFFSET), - count, - UNSAFE.getShort(ptr + MIN_OFFSET), - UNSAFE.getShort(ptr + MAX_OFFSET)); - var key = createString(UNSAFE.getLong(ptr + ADDRESS_OFFSET), UNSAFE.getByte(ptr + SIZE_OFFSET)); - result.put(key, measurements); + var station = createString(UNSAFE.getLong(ptr + ADDRESS_OFFSET), UNSAFE.getByte(ptr + SIZE_OFFSET)); + var result = results.get(station); + if (result == null) { + results.put(station, new TemperatureAggregation( + UNSAFE.getLong(ptr + SUM_OFFSET), + count, + UNSAFE.getShort(ptr + MIN_OFFSET), + UNSAFE.getShort(ptr + MAX_OFFSET))); + } + else { + result.merge(UNSAFE.getLong(ptr + SUM_OFFSET), count, UNSAFE.getShort(ptr + MIN_OFFSET), UNSAFE.getShort(ptr + MAX_OFFSET)); + } } } - return result; } - private boolean isEqual(long address, long address2, byte size) { - for (int i = 0; i < size; i++) { - if (UNSAFE.getByte(address + i) != UNSAFE.getByte(address2 + i)) { + private boolean isEqual(long address, long address2, int size) { + for (int i = 0; i < size; i += 8) { + if (UNSAFE.getLong(address + i) != UNSAFE.getLong(address2 + i)) { return false; } } @@ -255,4 +213,145 @@ private String createString(long address, byte size) { return new String(arr); } } + + private static class TaskThread extends Thread { + // #.## + private static final int THREE_DIGITS_MASK = 0x2e0000; + // #.# + private static final int TWO_DIGITS_MASK = 0x2e00; + // #.#- + private static final int TWO_NEGATIVE_DIGITS_MASK = 0x2e002d; + private static final int BYTE_MASK = 0xff; + + private static final int ZERO = '0'; + private static final long DELIMITER_MASK = 0x3b3b3b3b3b3b3b3bL; + private static final long[] SIGNIFICANT_BYTES_MASK = { + 0, + 0xff, + 0xffff, + 0xffffff, + 0xffffffffL, + 0xffffffffffL, + 0xffffffffffffL, + 0xffffffffffffffL, + 0xffffffffffffffffL + }; + + private final MeasurementContainer container; + private final List begins; + private final List ends; + + private TaskThread(int chunks) { + this.container = new MeasurementContainer(); + this.begins = new ArrayList<>(chunks); + this.ends = new ArrayList<>(chunks); + } + + public void addChunk(long begin, long end) { + begins.add(begin); + ends.add(end); + } + + @Override + public void run() { + for (int i = 0; i < begins.size(); i++) { + var begin = begins.get(i); + var end = ends.get(i) - 1; + while (end > begin && UNSAFE.getByte(end - 1) != '\n') { + end--; + } + calcForChunk(begin, end); + calcLastLine(end); + } + } + + private void calcLastLine(long offset) { + long cityOffset = offset; + long lastBytes = 0; + int hashCode = 0; + byte cityNameSize = 0; + + byte b; + while ((b = UNSAFE.getByte(offset++)) != ';') { + lastBytes = (lastBytes << 8) | b; + hashCode = hashCode * 31 + b; + cityNameSize++; + } + + int temperature; + int word = UNSAFE.getInt(offset); + offset += 4; + + if ((word & TWO_NEGATIVE_DIGITS_MASK) == TWO_NEGATIVE_DIGITS_MASK) { + word >>>= 8; + temperature = ZERO * 11 - ((word & BYTE_MASK) * 10 + ((word >>> 16) & BYTE_MASK)); + } + else if ((word & THREE_DIGITS_MASK) == THREE_DIGITS_MASK) { + temperature = (word & BYTE_MASK) * 100 + ((word >>> 8) & BYTE_MASK) * 10 + ((word >>> 24) & BYTE_MASK) - ZERO * 111; + } + else if ((word & TWO_DIGITS_MASK) == TWO_DIGITS_MASK) { + temperature = (word & BYTE_MASK) * 10 + ((word >>> 16) & BYTE_MASK) - ZERO * 11; + } + else { + // #.##- + word = (word >>> 8) | (UNSAFE.getByte(offset) << 24); + temperature = ZERO * 111 - ((word & BYTE_MASK) * 100 + ((word >>> 8) & BYTE_MASK) * 10 + ((word >>> 24) & BYTE_MASK)); + } + container.put(cityOffset, cityNameSize, hashCode, lastBytes, (short) temperature); + } + + private void calcForChunk(long offset, long end) { + long cityOffset, lastBytes, city, masked, hashCode; + int temperature, word, delimiterIdx; + byte cityNameSize; + + while (offset < end) { + cityOffset = offset; + lastBytes = 0; + hashCode = 0; + delimiterIdx = 8; + + while (delimiterIdx == 8) { + city = UNSAFE.getLong(offset); + masked = city ^ DELIMITER_MASK; + masked = (masked - 0x0101010101010101L) & ~masked & 0x8080808080808080L; + delimiterIdx = Long.numberOfTrailingZeros(masked) >>> 3; + if (delimiterIdx == 0) { + break; + } + offset += delimiterIdx; + lastBytes = city & SIGNIFICANT_BYTES_MASK[delimiterIdx]; + hashCode = ((hashCode >>> 5) ^ lastBytes) * 0x517cc1b727220a95L; + } + + cityNameSize = (byte) (offset - cityOffset); + + word = UNSAFE.getInt(++offset); + offset += 4; + + if ((word & TWO_NEGATIVE_DIGITS_MASK) == TWO_NEGATIVE_DIGITS_MASK) { + word >>>= 8; + temperature = ZERO * 11 - ((word & BYTE_MASK) * 10 + ((word >>> 16) & BYTE_MASK)); + } + else if ((word & THREE_DIGITS_MASK) == THREE_DIGITS_MASK) { + temperature = (word & BYTE_MASK) * 100 + ((word >>> 8) & BYTE_MASK) * 10 + ((word >>> 24) & BYTE_MASK) - ZERO * 111; + } + else if ((word & TWO_DIGITS_MASK) == TWO_DIGITS_MASK) { + temperature = (word & BYTE_MASK) * 10 + ((word >>> 16) & BYTE_MASK) - ZERO * 11; + offset--; + } + else { + // #.##- + word = (word >>> 8) | (UNSAFE.getByte(offset++) << 24); + temperature = ZERO * 111 - ((word & BYTE_MASK) * 100 + ((word >>> 8) & BYTE_MASK) * 10 + ((word >>> 24) & BYTE_MASK)); + } + offset++; + container.put(cityOffset, cityNameSize, Long.hashCode(hashCode), lastBytes, (short) temperature); + } + } + + public void collectTo(Map results) { + container.collectTo(results); + } + } } diff --git a/src/main/java/dev/morling/onebrc/CreateMeasurements3.java b/src/main/java/dev/morling/onebrc/CreateMeasurements3.java index 804b83ca9..9bcc16dfb 100644 --- a/src/main/java/dev/morling/onebrc/CreateMeasurements3.java +++ b/src/main/java/dev/morling/onebrc/CreateMeasurements3.java @@ -55,7 +55,7 @@ public static void main(String[] args) throws Exception { out.write(station.name); out.write(';'); out.write(Double.toString(Math.round(temp * 10.0) / 10.0)); - out.newLine(); + out.write('\n'); if (i % 50_000_000 == 0) { System.out.printf("Wrote %,d measurements in %,d ms%n", i, System.currentTimeMillis() - start); } diff --git a/src/main/python/create_measurements.py b/src/main/python/create_measurements.py index f48972aba..26ec768a0 100755 --- a/src/main/python/create_measurements.py +++ b/src/main/python/create_measurements.py @@ -1,4 +1,19 @@ #!/usr/bin/env python +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# # Based on https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CreateMeasurements.java @@ -95,15 +110,18 @@ def build_test_data(weather_station_names, num_rows_to_create): coldest_temp = -99.9 hottest_temp = 99.9 station_names_10k_max = random.choices(weather_station_names, k=10_000) - progress_step = max(1, int(num_rows_to_create / 100)) + batch_size = 10000 # instead of writing line by line to file, process a batch of stations and put it to disk + progress_step = max(1, (num_rows_to_create // batch_size) // 100) print('Building test data...') try: with open("../../../data/measurements.txt", 'w') as file: - for s in range(0,num_rows_to_create): - random_station = random.choice(station_names_10k_max) - random_temp = round(random.uniform(coldest_temp, hottest_temp), 1) - file.write(f"{random_station};{random_temp}\n") + for s in range(0,num_rows_to_create // batch_size): + + batch = random.choices(station_names_10k_max, k=batch_size) + prepped_deviated_batch = '\n'.join([f"{station};{random.uniform(coldest_temp, hottest_temp):.1f}" for station in batch]) # :.1f should quicker than round on a large scale, because round utilizes mathematical operation + file.write(prepped_deviated_batch + '\n') + # Update progress bar every 1% if s % progress_step == 0 or s == num_rows_to_create - 1: sys.stdout.write('\r') diff --git a/test_ci.sh b/test_ci.sh new file mode 100755 index 000000000..1f399e66e --- /dev/null +++ b/test_ci.sh @@ -0,0 +1,124 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -eo pipefail + +if [ -z "$1" ] + then + echo "Usage: test_ci.sh ( ...)" + echo " for each fork, there must be a 'calculate_average_.sh' script and an optional 'prepare_.sh'." + exit 1 +fi + +BOLD_WHITE='\033[1;37m' +CYAN='\033[0;36m' +GREEN='\033[0;32m' +PURPLE='\033[0;35m' +BOLD_RED='\033[1;31m' +RED='\033[0;31m' +BOLD_YELLOW='\033[1;33m' +RESET='\033[0m' # No Color + +MEASUREMENTS_FILE="measurements_10M.txt" +RUNS=5 +DEFAULT_JAVA_VERSION="21.0.1-open" +RUN_TIME_LIMIT=300 # seconds + +TIMEOUT="" +if [ "$(uname -s)" == "Linux" ]; then + TIMEOUT="timeout -v $RUN_TIME_LIMIT" +else # MacOs + if [ -x "$(command -v gtimeout)" ]; then + TIMEOUT="gtimeout -v $RUN_TIME_LIMIT" # from `brew install coreutils` + else + echo -e "${BOLD_YELLOW}WARNING${RESET} gtimeout not available, benchmark runs may take indefinitely long." + fi +fi + +function check_command_installed { + if ! [ -x "$(command -v $1)" ]; then + echo "Error: $1 is not installed." >&2 + exit 1 + fi +} + +function print_and_execute() { + echo "+ $@" >&2 + "$@" +} + +check_command_installed java + +# Validate that ./calculate_average_.sh exists for each fork +for fork in "$@"; do + if [ ! -f "./calculate_average_$fork.sh" ]; then + echo -e "${BOLD_RED}ERROR${RESET}: ./calculate_average_$fork.sh does not exist." >&2 + exit 1 + fi +done + +## SDKMAN Setup +# 1. Custom check for sdkman installed; not sure why check_command_installed doesn't detect it properly +if [ ! -f "$HOME/.sdkman/bin/sdkman-init.sh" ]; then + echo -e "${BOLD_RED}ERROR${RESET}: sdkman is not installed." >&2 + exit 1 +fi + +# 2. Init sdkman in this script +source "$HOME/.sdkman/bin/sdkman-init.sh" + +# 3. make sure the default java version is installed +if [ ! -d "$HOME/.sdkman/candidates/java/$DEFAULT_JAVA_VERSION" ]; then + print_and_execute sdk install java $DEFAULT_JAVA_VERSION +fi + +# 4. Install missing SDK java versions in any of the prepare_*.sh scripts for the provided forks +for fork in "$@"; do + if [ -f "./prepare_$fork.sh" ]; then + grep -h "^sdk use" "./prepare_$fork.sh" | cut -d' ' -f4 | while read -r version; do + if [ ! -d "$HOME/.sdkman/candidates/java/$version" ]; then + print_and_execute sdk install java $version + fi + done || true # grep returns exit code 1 when no match, `|| true` prevents the script from exiting early + fi +done +## END - SDKMAN Setup + +# Run tests and benchmark for each fork +filetimestamp=$(date +"%Y%m%d%H%M%S") # same for all fork.out files from this run +failed=() +for fork in "$@"; do + set +e # we don't want prepare.sh, test.sh or hyperfine failing on 1 fork to exit the script early + + # Run prepare script + if [ -f "./prepare_$fork.sh" ]; then + print_and_execute source "./prepare_$fork.sh" + else + print_and_execute sdk use java $DEFAULT_JAVA_VERSION + fi + + # Run the test suite + print_and_execute $TIMEOUT ./test.sh $fork + if [ $? -ne 0 ]; then + failed+=("$fork") + echo "" + echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork failed" + + exit 1 + fi + echo "" +done