diff --git a/go.mod b/go.mod index ef086cd84..051b44be0 100644 --- a/go.mod +++ b/go.mod @@ -19,7 +19,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.12.1 github.com/prometheus/common v0.32.1 - github.com/segmentio/kafka-go v0.4.35 + github.com/segmentio/kafka-go v0.4.38 github.com/sirupsen/logrus v1.8.1 github.com/spf13/cobra v1.3.0 github.com/spf13/pflag v1.0.5 @@ -62,7 +62,7 @@ require ( github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect - github.com/klauspost/compress v1.15.7 // indirect + github.com/klauspost/compress v1.15.9 // indirect github.com/libp2p/go-reuseport v0.1.0 // indirect github.com/magiconair/properties v1.8.5 // indirect github.com/mailru/easyjson v0.7.6 // indirect @@ -72,7 +72,6 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect github.com/pelletier/go-toml v1.9.4 // indirect - github.com/pierrec/lz4 v2.6.1+incompatible // indirect github.com/pierrec/lz4/v4 v4.1.15 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_model v0.2.0 // indirect diff --git a/go.sum b/go.sum index 9d85e65bc..546a2fb41 100644 --- a/go.sum +++ b/go.sum @@ -202,7 +202,6 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3 github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= github.com/eapache/go-resiliency v1.2.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= -github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 h1:YEetp8/yCZMuEPMUDHG0CW/brkkEp8mzqk2+ODEitlw= github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= github.com/eclipse/paho.mqtt.golang v1.2.0/go.mod h1:H9keYFcgq3Qr5OUJm/JZI/i6U7joQ8SYLhZwfeOo6Ts= @@ -235,10 +234,7 @@ github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoD github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= -github.com/frankban/quicktest v1.10.2 h1:19ARM85nVi4xH7xPXuc5eM/udya5ieh7b/Sv+d844Tk= github.com/frankban/quicktest v1.10.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= -github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY= -github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.5.1 h1:mZcQUHVQUQWoPXXtuf9yuEXKudkV2sx1E06UadKWpgI= @@ -591,12 +587,9 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.9.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.9.8/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.11.0/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc= -github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.15.7 h1:7cgTQxJCU/vy+oP/E3B9RGbQTgbiVzIJWIKOLoAsPok= -github.com/klauspost/compress v1.15.7/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY= +github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg= github.com/klauspost/pgzip v1.0.2-0.20170402124221-0bf5dcad4ada/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= @@ -704,8 +697,6 @@ github.com/netobserv/gopipes v0.2.0 h1:CnJQq32+xNuM85eVYy/HOf+StTJdh2K6RdaEg7NAJ github.com/netobserv/gopipes v0.2.0/go.mod h1:eGoHZW1ON8Dx/zmDXUhsbVNqatPjtpdO0UZBmGZGmVI= github.com/netobserv/loki-client-go v0.0.0-20220927092034-f37122a54500 h1:RmnoJe/ci5q+QdM7upFdxiU+D8F3L3qTd5wXCwwHefw= github.com/netobserv/loki-client-go v0.0.0-20220927092034-f37122a54500/go.mod h1:LHXpc5tjKvsfZn0pwLKrvlgEhZcCaw3Di9mUEZGAI4E= -github.com/netobserv/netobserv-ebpf-agent v0.1.1-0.20220608092850-3fd4695b7cc2 h1:K7SjoqEfzpMfIjHV85Lg8UDMvZu8rPfrsgKRoo7W30o= -github.com/netobserv/netobserv-ebpf-agent v0.1.1-0.20220608092850-3fd4695b7cc2/go.mod h1:996FEHp8Xj+AKCkiN4eH3dl/yF2DzuYM0kchWZOrapM= github.com/netobserv/netobserv-ebpf-agent v0.2.2-0.20221031160841-ccb7addde22f h1:SBWcIM6Y3u9YQHyMmO2MwTa1OvzffZxhF/NR9O/DH3I= github.com/netobserv/netobserv-ebpf-agent v0.2.2-0.20221031160841-ccb7addde22f/go.mod h1:R42+rKAUVXzr0KExHMhPu1FlTg6I7lCyNnV9ZrBhNUI= github.com/netobserv/prometheus-common v0.31.2-0.20220720134304-43e74fd22881 h1:hx5bi6xBovRjmwUoVJBzhJ3EDo4K4ZUsqqKrJuQ2vMI= @@ -765,9 +756,6 @@ github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pierrec/lz4 v2.6.0+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pierrec/lz4 v2.6.1+incompatible h1:9UY3+iC23yxF0UfGaYrGplQ+79Rg+h/q9FV9ix19jjM= -github.com/pierrec/lz4 v2.6.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pion/dtls/v2 v2.0.3/go.mod h1:TUjyL8bf8LH95h81Xj7kATmzMRt29F/4lxpIPj2Xe4Y= @@ -850,10 +838,8 @@ github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdh github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/segmentio/kafka-go v0.1.0/go.mod h1:X6itGqS9L4jDletMsxZ7Dz+JFWxM6JHfPOCvTvk+EJo= github.com/segmentio/kafka-go v0.2.0/go.mod h1:X6itGqS9L4jDletMsxZ7Dz+JFWxM6JHfPOCvTvk+EJo= -github.com/segmentio/kafka-go v0.4.28 h1:ATYbyenAlsoFxnV+VpIJMF87bvRuRsX7fezHNfpwkdM= -github.com/segmentio/kafka-go v0.4.28/go.mod h1:XzMcoMjSzDGHcIwpWUI7GB43iKZ2fTVmryPSGLf/MPg= -github.com/segmentio/kafka-go v0.4.35 h1:TAsQ7q1SjS39PcFvU0zDJhCuVAxHomy7xOAfbdSuhzs= -github.com/segmentio/kafka-go v0.4.35/go.mod h1:GAjxBQJdQMB5zfNA21AhpaqOB2Mu+w3De4ni3Gbm8y0= +github.com/segmentio/kafka-go v0.4.38 h1:iQdOBbUSdfuYlFpvjuALgj7N6DrdPA0HfB4AhREOdtg= +github.com/segmentio/kafka-go v0.4.38/go.mod h1:ikyuGon/60MN/vXFgykf7Zm8P5Be49gJU6vezwjnnhU= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749/go.mod h1:ZY1cvUeJuFPAdZ/B6v7RHavJWZn2YPVFQ1OSXhCGOkg= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= @@ -901,7 +887,6 @@ github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3 github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= github.com/stretchr/objx v0.4.0 h1:M2gUjqZET1qApGOWNSnZ49BAIMX4F/1plDv3+l31EJ4= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -912,7 +897,6 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= @@ -933,12 +917,10 @@ github.com/vladimirvivien/gexe v0.1.1/go.mod h1:LHQL00w/7gDUKIak24n801ABp8C+ni6e github.com/vmware/go-ipfix v0.5.12 h1:mqQknlvnvDY25apPNy9c27ri3FMDFIhzvO68Kk5Qp58= github.com/vmware/go-ipfix v0.5.12/go.mod h1:yzbG1rv+yJ8GeMrRm+MDhOV3akygNZUHLhC1pDoD2AY= github.com/willf/bitset v1.1.3/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= -github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c h1:u40Z8hqBAAQyv+vATcGgV0YCnDjqSL7/q/JyPhhJSPk= github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= github.com/xdg/scram v1.0.5 h1:TuS0RFmt5Is5qm9Tm2SoD89OPqe4IRiFtyFY4iwWXsw= github.com/xdg/scram v1.0.5/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= github.com/xdg/stringprep v0.0.0-20180714160509-73f8eece6fdc/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= -github.com/xdg/stringprep v1.0.0 h1:d9X0esnoa3dFsV0FG35rAT0RIhYFlPq7MiP+DW89La0= github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= github.com/xdg/stringprep v1.0.3 h1:cmL5Enob4W83ti/ZHuZLuKD/xqJfus4fVPwE+/BDm+4= github.com/xdg/stringprep v1.0.3/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= @@ -994,7 +976,6 @@ golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnf golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190320223903-b7391e95e576/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190422162423-af44ce270edf/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE= -golang.org/x/crypto v0.0.0-20190506204251-e1dfcc566284/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190530122614-20be4c3c3ed5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -1011,7 +992,6 @@ golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20220214200702-86341886e292 h1:f+lwQ+GtmgoY+A2YaQxlSOnDjXcQ7ZRLWOHbC6HtRqE= golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d h1:sK3txAijHtOK88l68nt020reeT1ZdKLIYetKl95FzVY= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= @@ -1112,8 +1092,6 @@ golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.0.0-20220412020605-290c469a71a5 h1:bRb386wvrE+oBNdF1d/Xh9mQrfQ4ecYhW5qJ5GvTGT4= -golang.org/x/net v0.0.0-20220412020605-290c469a71a5/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220706163947-c90051bbdb60 h1:8NSylCMxLW4JvserAndSgFL7aPli6A68yf0bYFTcWCM= golang.org/x/net v0.0.0-20220706163947-c90051bbdb60/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -1244,8 +1222,6 @@ golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220412211240-33da011f77ad h1:ntjMns5wyP/fN65tdBD4g8J5w8n015+iIIs9rtjXkY0= -golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a h1:dGzPydgVsqGcTRVwiLJ1jVbufYwmzD3LfVPLKsKg+0k= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= @@ -1566,7 +1542,6 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/pipeline/encode/encode_prom.go b/pkg/pipeline/encode/encode_prom.go index 8e278cf08..a57f82d92 100644 --- a/pkg/pipeline/encode/encode_prom.go +++ b/pkg/pipeline/encode/encode_prom.go @@ -83,7 +83,7 @@ var ( // Encode encodes a metric before being stored func (e *EncodeProm) Encode(metricRecord config.GenericMap) { - log.Debugf("entering EncodeMetric. metricRecord = %v", metricRecord) + log.Tracef("entering EncodeMetric. metricRecord = %v", metricRecord) // Process counters for _, mInfo := range e.counters { diff --git a/pkg/pipeline/ingest/ingest_grpc.go b/pkg/pipeline/ingest/ingest_grpc.go index 283b2c19c..c6f6b7712 100644 --- a/pkg/pipeline/ingest/ingest_grpc.go +++ b/pkg/pipeline/ingest/ingest_grpc.go @@ -11,12 +11,14 @@ import ( "github.com/netobserv/flowlogs-pipeline/pkg/pipeline/utils" "github.com/netobserv/netobserv-ebpf-agent/pkg/grpc" "github.com/netobserv/netobserv-ebpf-agent/pkg/pbflow" - log "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus" grpc2 "google.golang.org/grpc" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" ) +var glog = logrus.WithField("component", "ingest.GRPCProtobuf") + const ( defaultBufferLen = 100 ) @@ -62,7 +64,7 @@ func (no *GRPCProtobuf) Ingest(out chan<- config.GenericMap) { no.collector.Close() }() for fp := range no.flowPackets { - log.Debugf("GRPCProtobuf records len = %v", len(fp.Entries)) + glog.Debugf("Ingested %v records", len(fp.Entries)) for _, entry := range fp.Entries { out <- decode.PBFlowToMap(entry) } diff --git a/pkg/pipeline/ingest/ingest_kafka.go b/pkg/pipeline/ingest/ingest_kafka.go index 6571f94ea..353112f74 100644 --- a/pkg/pipeline/ingest/ingest_kafka.go +++ b/pkg/pipeline/ingest/ingest_kafka.go @@ -27,13 +27,16 @@ import ( "github.com/netobserv/flowlogs-pipeline/pkg/pipeline/decode" "github.com/netobserv/flowlogs-pipeline/pkg/pipeline/utils" kafkago "github.com/segmentio/kafka-go" - log "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus" "golang.org/x/net/context" ) +var klog = logrus.WithField("component", "ingest.Kafka") + type kafkaReadMessage interface { ReadMessage(ctx context.Context) (kafkago.Message, error) Config() kafkago.ReaderConfig + Stats() kafkago.ReaderStats } type ingestKafka struct { @@ -51,9 +54,11 @@ const defaultBatchReadTimeout = int64(1000) const defaultKafkaBatchMaxLength = 500 const defaultKafkaCommitInterval = 500 +const kafkaStatsPeriod = 15 * time.Second + // Ingest ingests entries from kafka topic func (k *ingestKafka) Ingest(out chan<- config.GenericMap) { - log.Debugf("entering ingestKafka.Ingest") + klog.Debugf("entering ingestKafka.Ingest") k.metrics.createOutQueueLen(out) // initialize background listener @@ -65,19 +70,27 @@ func (k *ingestKafka) Ingest(out chan<- config.GenericMap) { // background thread to read kafka messages; place received items into ingestKafka input channel func (k *ingestKafka) kafkaListener() { - log.Debugf("entering kafkaListener") + klog.Debugf("entering kafkaListener") + + if logrus.IsLevelEnabled(logrus.DebugLevel) { + go k.reportStats() + } go func() { for { + if k.isStopped() { + klog.Info("gracefully exiting") + return + } + klog.Trace("fetching messages from Kafka") // block until a message arrives - log.Debugf("before ReadMessage") kafkaMessage, err := k.kafkaReader.ReadMessage(context.Background()) if err != nil { - log.Errorln(err) + klog.Errorln(err) continue } - if k.canLogMessages { - log.Debugf("string(kafkaMessage) = %s\n", string(kafkaMessage.Value)) + if k.canLogMessages && logrus.IsLevelEnabled(logrus.TraceLevel) { + klog.Tracef("string(kafkaMessage) = %s\n", string(kafkaMessage.Value)) } k.metrics.flowsProcessed.Inc() messageLen := len(kafkaMessage.Value) @@ -90,6 +103,15 @@ func (k *ingestKafka) kafkaListener() { }() } +func (k *ingestKafka) isStopped() bool { + select { + case <-k.exitChan: + return true + default: + return false + } +} + func (k *ingestKafka) processRecordDelay(record config.GenericMap) { TimeFlowEndInterface, ok := record["TimeFlowEnd"] if !ok { @@ -109,7 +131,7 @@ func (k *ingestKafka) processRecord(record []byte, out chan<- config.GenericMap) // Decode batch decoded, err := k.decoder.Decode(record) if err != nil { - log.WithError(err).Warnf("ignoring flow") + klog.WithError(err).Warnf("ignoring flow") return } k.processRecordDelay(decoded) @@ -123,7 +145,7 @@ func (k *ingestKafka) processLogLines(out chan<- config.GenericMap) { for { select { case <-k.exitChan: - log.Debugf("exiting ingestKafka because of signal") + klog.Debugf("exiting ingestKafka because of signal") return case record := <-k.in: k.processRecord(record, out) @@ -131,9 +153,23 @@ func (k *ingestKafka) processLogLines(out chan<- config.GenericMap) { } } +// reportStats periodically reports kafka stats +func (k *ingestKafka) reportStats() { + ticker := time.NewTicker(kafkaStatsPeriod) + defer ticker.Stop() + for { + select { + case <-k.exitChan: + klog.Debug("gracefully exiting stats reporter") + case <-ticker.C: + klog.Debugf("reader stats: %#v", k.kafkaReader.Stats()) + } + } +} + // NewIngestKafka create a new ingester func NewIngestKafka(opMetrics *operational.Metrics, params config.StageParam) (Ingester, error) { - log.Debugf("entering NewIngestKafka") + klog.Debugf("entering NewIngestKafka") jsonIngestKafka := api.IngestKafka{} if params.Ingest != nil && params.Ingest.Kafka != nil { jsonIngestKafka = *params.Ingest.Kafka @@ -149,9 +185,9 @@ func NewIngestKafka(opMetrics *operational.Metrics, params config.StageParam) (I startOffset = kafkago.LastOffset default: startOffset = kafkago.FirstOffset - log.Errorf("illegal value for StartOffset: %s\n", startOffsetString) + klog.Errorf("illegal value for StartOffset: %s\n", startOffsetString) } - log.Debugf("startOffset = %v", startOffset) + klog.Debugf("startOffset = %v", startOffset) groupBalancers := make([]kafkago.GroupBalancer, 0) for _, gb := range jsonIngestKafka.GroupBalancers { switch gb { @@ -162,7 +198,7 @@ func NewIngestKafka(opMetrics *operational.Metrics, params config.StageParam) (I case "rackAffinity": groupBalancers = append(groupBalancers, &kafkago.RackAffinityGroupBalancer{}) default: - log.Warningf("groupbalancers parameter missing") + klog.Warningf("groupbalancers parameter missing") groupBalancers = append(groupBalancers, &kafkago.RoundRobinGroupBalancer{}) } } @@ -171,20 +207,20 @@ func NewIngestKafka(opMetrics *operational.Metrics, params config.StageParam) (I if jsonIngestKafka.BatchReadTimeout != 0 { batchReadTimeout = jsonIngestKafka.BatchReadTimeout } - log.Infof("batchReadTimeout = %d", batchReadTimeout) + klog.Infof("batchReadTimeout = %d", batchReadTimeout) commitInterval := int64(defaultKafkaCommitInterval) if jsonIngestKafka.CommitInterval != 0 { commitInterval = jsonIngestKafka.CommitInterval } - log.Infof("commitInterval = %d", jsonIngestKafka.CommitInterval) + klog.Infof("commitInterval = %d", jsonIngestKafka.CommitInterval) dialer := &kafkago.Dialer{ Timeout: kafkago.DefaultDialer.Timeout, DualStack: kafkago.DefaultDialer.DualStack, } if jsonIngestKafka.TLS != nil { - log.Infof("Using TLS configuration: %v", jsonIngestKafka.TLS) + klog.Infof("Using TLS configuration: %v", jsonIngestKafka.TLS) tlsConfig, err := jsonIngestKafka.TLS.Build() if err != nil { return nil, err @@ -203,22 +239,21 @@ func NewIngestKafka(opMetrics *operational.Metrics, params config.StageParam) (I } if jsonIngestKafka.PullQueueCapacity > 0 { - log.Infof("pullQueueCapacity = %d", jsonIngestKafka.PullQueueCapacity) readerConfig.QueueCapacity = jsonIngestKafka.PullQueueCapacity } if jsonIngestKafka.PullMaxBytes > 0 { - log.Infof("pullMaxBytes = %d", jsonIngestKafka.PullMaxBytes) readerConfig.MaxBytes = jsonIngestKafka.PullMaxBytes } + klog.Debugf("reader config: %#v", readerConfig) + kafkaReader := kafkago.NewReader(readerConfig) if kafkaReader == nil { errMsg := "NewIngestKafka: failed to create kafka-go reader" - log.Errorf("%s", errMsg) + klog.Errorf("%s", errMsg) return nil, errors.New(errMsg) } - log.Debugf("kafkaReader = %v", kafkaReader) decoder, err := decode.GetDecoder(jsonIngestKafka.Decoder) if err != nil { diff --git a/pkg/pipeline/ingest/ingest_kafka_test.go b/pkg/pipeline/ingest/ingest_kafka_test.go index 7adcddf7c..150c81c46 100644 --- a/pkg/pipeline/ingest/ingest_kafka_test.go +++ b/pkg/pipeline/ingest/ingest_kafka_test.go @@ -180,6 +180,10 @@ func (f *fakeKafkaReader) Config() kafkago.ReaderConfig { return kafkago.ReaderConfig{} } +func (f *fakeKafkaReader) Stats() kafkago.ReaderStats { + return kafkago.ReaderStats{} +} + func Test_KafkaListener(t *testing.T) { ingestOutput := make(chan config.GenericMap) newIngest := initNewIngestKafka(t, testConfig1) diff --git a/pkg/pipeline/transform/transform.go b/pkg/pipeline/transform/transform.go index 598fb83bf..5b51a41ae 100644 --- a/pkg/pipeline/transform/transform.go +++ b/pkg/pipeline/transform/transform.go @@ -20,7 +20,7 @@ package transform import ( "github.com/netobserv/flowlogs-pipeline/pkg/api" "github.com/netobserv/flowlogs-pipeline/pkg/config" - log "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus" ) type Transformer interface { @@ -37,7 +37,7 @@ func (t *transformNone) Transform(f config.GenericMap) (config.GenericMap, bool) // NewTransformNone create a new transform func NewTransformNone() (Transformer, error) { - log.Debugf("entering NewTransformNone") + logrus.Debugf("entering NewTransformNone") return &transformNone{}, nil } diff --git a/pkg/pipeline/transform/transform_filter.go b/pkg/pipeline/transform/transform_filter.go index 9cca6cb18..9db5de2a0 100644 --- a/pkg/pipeline/transform/transform_filter.go +++ b/pkg/pipeline/transform/transform_filter.go @@ -20,19 +20,21 @@ package transform import ( "github.com/netobserv/flowlogs-pipeline/pkg/api" "github.com/netobserv/flowlogs-pipeline/pkg/config" - log "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus" ) +var tlog = logrus.WithField("component", "transform.Filter") + type Filter struct { Rules []api.TransformFilterRule } // Transform transforms a flow func (f *Filter) Transform(entry config.GenericMap) (config.GenericMap, bool) { - log.Tracef("f = %v", f) + tlog.Tracef("f = %v", f) outputEntry := entry.Copy() for _, rule := range f.Rules { - log.Debugf("rule = %v", rule) + tlog.Tracef("rule = %v", rule) switch rule.Type { case api.TransformFilterOperationName("RemoveField"): delete(outputEntry, rule.Input) @@ -57,7 +59,7 @@ func (f *Filter) Transform(entry config.GenericMap) (config.GenericMap, bool) { } } default: - log.Panicf("unknown type %s for transform.Filter rule: %v", rule.Type, rule) + tlog.Panicf("unknown type %s for transform.Filter rule: %v", rule.Type, rule) } } return outputEntry, true @@ -65,7 +67,7 @@ func (f *Filter) Transform(entry config.GenericMap) (config.GenericMap, bool) { // NewTransformFilter create a new filter transform func NewTransformFilter(params config.StageParam) (Transformer, error) { - log.Debugf("entering NewTransformFilter") + tlog.Debugf("entering NewTransformFilter") rules := []api.TransformFilterRule{} if params.Transform != nil && params.Transform.Filter != nil { rules = params.Transform.Filter.Rules diff --git a/pkg/pipeline/transform/transform_generic.go b/pkg/pipeline/transform/transform_generic.go index 9b9a84bcf..57765abb9 100644 --- a/pkg/pipeline/transform/transform_generic.go +++ b/pkg/pipeline/transform/transform_generic.go @@ -20,9 +20,11 @@ package transform import ( "github.com/netobserv/flowlogs-pipeline/pkg/api" "github.com/netobserv/flowlogs-pipeline/pkg/config" - log "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus" ) +var glog = logrus.WithField("component", "transform.Generic") + type Generic struct { policy string rules []api.GenericTransformRule @@ -30,45 +32,41 @@ type Generic struct { // Transform transforms a flow to a new set of keys func (g *Generic) Transform(entry config.GenericMap) (config.GenericMap, bool) { - log.Tracef("entering Generic Transform g = %v", g) - log.Tracef("entry.GenericMap = %v", entry) - outputEntry := make(config.GenericMap) + var outputEntry config.GenericMap + log.Tracef("Transform input = %v", entry) if g.policy != "replace_keys" { - // copy old map to new map - for key, value := range entry { - outputEntry[key] = value - } + outputEntry = entry.Copy() + } else { + outputEntry = config.GenericMap{} } - log.Debugf("outputEntry = %v", outputEntry) for _, transformRule := range g.rules { - log.Debugf("transformRule = %v", transformRule) outputEntry[transformRule.Output] = entry[transformRule.Input] } - log.Debugf("Transform.GenericMap = %v", outputEntry) + glog.Tracef("Transform output = %v", outputEntry) return outputEntry, true } // NewTransformGeneric create a new transform func NewTransformGeneric(params config.StageParam) (Transformer, error) { - log.Debugf("entering NewTransformGeneric") + glog.Debugf("entering NewTransformGeneric") genConfig := api.TransformGeneric{} if params.Transform != nil && params.Transform.Generic != nil { genConfig = *params.Transform.Generic } - log.Debugf("params.Transform.Generic = %v", genConfig) + glog.Debugf("params.Transform.Generic = %v", genConfig) rules := genConfig.Rules policy := genConfig.Policy switch policy { case "replace_keys", "preserve_original_keys", "": // valid; nothing to do - log.Infof("NewTransformGeneric, policy = %s", policy) + glog.Infof("NewTransformGeneric, policy = %s", policy) default: - log.Panicf("unknown policy %s for transform.generic", policy) + glog.Panicf("unknown policy %s for transform.generic", policy) } transformGeneric := &Generic{ policy: policy, rules: rules, } - log.Debugf("transformGeneric = %v", transformGeneric) + glog.Debugf("transformGeneric = %v", transformGeneric) return transformGeneric, nil } diff --git a/pkg/pipeline/transform/transform_network.go b/pkg/pipeline/transform/transform_network.go index 06fe92723..bc78e2831 100644 --- a/pkg/pipeline/transform/transform_network.go +++ b/pkg/pipeline/transform/transform_network.go @@ -29,10 +29,12 @@ import ( "github.com/netobserv/flowlogs-pipeline/pkg/config" "github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/kubernetes" "github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/location" - netdb "github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/netdb" - log "github.com/sirupsen/logrus" + "github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/netdb" + "github.com/sirupsen/logrus" ) +var log = logrus.WithField("component", "transform.Network") + type Network struct { api.TransformNetwork svcNames *netdb.ServiceNames @@ -116,7 +118,7 @@ func (n *Network) Transform(inputEntry config.GenericMap) (config.GenericMap, bo case api.OpAddKubernetes: kubeInfo, err := kubernetes.Data.GetInfo(fmt.Sprintf("%s", outputEntry[rule.Input])) if err != nil { - log.Debugf("Can't find kubernetes info for IP %v err %v", outputEntry[rule.Input], err) + logrus.WithError(err).Tracef("can't find kubernetes info for IP %v", outputEntry[rule.Input]) continue } // NETOBSERV-666: avoid putting empty namespaces or Loki aggregation queries will diff --git a/pkg/pipeline/utils/exit.go b/pkg/pipeline/utils/exit.go index 90c2ef66c..b2c1eb3d7 100644 --- a/pkg/pipeline/utils/exit.go +++ b/pkg/pipeline/utils/exit.go @@ -22,7 +22,7 @@ import ( "os/signal" "syscall" - log "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus" ) var ( @@ -34,18 +34,18 @@ func ExitChannel() <-chan struct{} { } func SetupElegantExit() { - log.Debugf("entering SetupElegantExit") + logrus.Debugf("entering SetupElegantExit") // handle elegant exit; create support for channels of go routines that want to exit cleanly exitChannel = make(chan struct{}) exitSigChan := make(chan os.Signal, 1) - log.Debugf("registered exit signal channel") + logrus.Debugf("registered exit signal channel") signal.Notify(exitSigChan, syscall.SIGINT, syscall.SIGTERM) go func() { // wait for exit signal; then stop all the other go functions sig := <-exitSigChan - log.Debugf("received exit signal = %v", sig) + logrus.Debugf("received exit signal = %v", sig) close(exitChannel) - log.Debugf("exiting SetupElegantExit go function") + logrus.Debugf("exiting SetupElegantExit go function") }() - log.Debugf("exiting SetupElegantExit") + logrus.Debugf("exiting SetupElegantExit") } diff --git a/pkg/pipeline/utils/timed_cache.go b/pkg/pipeline/utils/timed_cache.go index f98e81661..034acac93 100644 --- a/pkg/pipeline/utils/timed_cache.go +++ b/pkg/pipeline/utils/timed_cache.go @@ -22,9 +22,11 @@ import ( "sync" "time" - log "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus" ) +var log = logrus.WithField("component", "utils.TimedCache") + // Functions to manage an LRU cache with an expiry // When an item expires, allow a callback to allow the specific implementation to perform its particular cleanup @@ -58,6 +60,8 @@ func (tc *TimedCache) GetCacheEntry(key string) (interface{}, bool) { } } +var uclog = log.WithField("method", "UpdateCacheEntry") + func (tc *TimedCache) UpdateCacheEntry(key string, entry interface{}) *cacheEntry { nowInSecs := time.Now().Unix() tc.mu.Lock() @@ -75,11 +79,10 @@ func (tc *TimedCache) UpdateCacheEntry(key string, entry interface{}) *cacheEntr key: key, SourceEntry: entry, } + uclog.Debugf("adding entry: %#v", cEntry) // place at end of list - log.Debugf("adding entry = %v", cEntry) cEntry.e = tc.cacheList.PushBack(cEntry) tc.cacheMap[key] = cEntry - log.Debugf("cacheList = %v", tc.cacheList) } return cEntry } @@ -103,13 +106,18 @@ func (tc *TimedCache) Iterate(f func(key string, value interface{})) { // CleanupExpiredEntries removes items from cache that were last touched more than expiryTime seconds ago func (tc *TimedCache) CleanupExpiredEntries(expiryTime int64, callback CacheCallback) { - log.Debugf("entering cleanupExpiredEntries") tc.mu.Lock() defer tc.mu.Unlock() - log.Debugf("cache = %v", tc.cacheMap) - log.Debugf("list = %v", tc.cacheList) + + clog := log.WithFields(logrus.Fields{ + "mapLen": len(tc.cacheMap), + "listLen": tc.cacheList.Len(), + }) + clog.Debugf("cleaning up expried entries") + nowInSecs := time.Now().Unix() expireTime := nowInSecs - expiryTime + deleted := 0 // go through the list until we reach recently used entries for { listEntry := tc.cacheList.Front() @@ -117,12 +125,12 @@ func (tc *TimedCache) CleanupExpiredEntries(expiryTime int64, callback CacheCall return } pCacheInfo := listEntry.Value.(*cacheEntry) - log.Debugf("lastUpdatedTime = %d, expireTime = %d", pCacheInfo.lastUpdatedTime, expireTime) - log.Debugf("pCacheInfo = %v", pCacheInfo) if pCacheInfo.lastUpdatedTime > expireTime { // no more expired items + clog.Debugf("deleted %d expired entries", deleted) return } + deleted++ callback.Cleanup(pCacheInfo.SourceEntry) delete(tc.cacheMap, pCacheInfo.key) tc.cacheList.Remove(listEntry) diff --git a/pkg/pipeline/write/write.go b/pkg/pipeline/write/write.go index 46c2db27f..da05d2207 100644 --- a/pkg/pipeline/write/write.go +++ b/pkg/pipeline/write/write.go @@ -21,7 +21,7 @@ import ( "sync" "github.com/netobserv/flowlogs-pipeline/pkg/config" - log "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus" ) type Writer interface { @@ -35,7 +35,7 @@ type WriteNone struct { // Write writes entries func (t *WriteNone) Write(in config.GenericMap) { - log.Debugf("entering Write none, in = %v", in) + logrus.Debugf("entering Write none, in = %v", in) t.mt.Lock() t.prevRecords = append(t.prevRecords, in) t.mt.Unlock() diff --git a/pkg/pipeline/write/write_fake.go b/pkg/pipeline/write/write_fake.go index 3c4080352..518573a1c 100644 --- a/pkg/pipeline/write/write_fake.go +++ b/pkg/pipeline/write/write_fake.go @@ -21,7 +21,7 @@ import ( "sync" "github.com/netobserv/flowlogs-pipeline/pkg/config" - log "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus" ) type WriteFake struct { @@ -32,7 +32,7 @@ type WriteFake struct { // Write stores in memory all records. func (w *WriteFake) Write(in config.GenericMap) { - log.Trace("entering writeFake Write") + logrus.Trace("entering writeFake Write") w.mt.Lock() w.allRecords = append(w.allRecords, in.Copy()) w.mt.Unlock() @@ -50,7 +50,7 @@ func (w *WriteFake) AllRecords() []config.GenericMap { // NewWriteFake creates a new write. func NewWriteFake(_ config.StageParam) (Writer, error) { - log.Debugf("entering NewWriteFake") + logrus.Debugf("entering NewWriteFake") w := &WriteFake{} return w, nil } diff --git a/pkg/pipeline/write/write_loki.go b/pkg/pipeline/write/write_loki.go index 9a53bf11a..fcaaa5fe5 100644 --- a/pkg/pipeline/write/write_loki.go +++ b/pkg/pipeline/write/write_loki.go @@ -34,7 +34,7 @@ import ( "github.com/netobserv/loki-client-go/pkg/backoff" "github.com/netobserv/loki-client-go/pkg/urlutil" "github.com/prometheus/common/model" - log "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus" ) var jsonEncodingConfig = jsonIter.Config{}.Froze() @@ -43,6 +43,8 @@ var ( keyReplacer = strings.NewReplacer("/", "_", ".", "_", "-", "_") ) +var log = logrus.WithField("component", "write.Loki") + type emitter interface { Handle(labels model.LabelSet, timestamp time.Time, record string) error } @@ -172,7 +174,7 @@ func (l *Loki) addLabels(record config.GenericMap, labels model.LabelSet) { } lv := model.LabelValue(fmt.Sprint(val)) if !lv.IsValid() { - log.WithFields(log.Fields{"key": label, "value": val}). + log.WithFields(logrus.Fields{"key": label, "value": val}). Debug("Invalid label value. Ignoring it") continue } @@ -204,10 +206,10 @@ func getFloat64(timestamp interface{}) (ft float64, ok bool) { // Write writes a flow before being stored func (l *Loki) Write(entry config.GenericMap) { - log.Debugf("entering Loki Write") + log.Tracef("writing entry: %#v", entry) err := l.ProcessRecord(entry) if err != nil { - log.Errorf("Write (Loki) error %v", err) + log.WithError(err).Warn("can't write into loki") } } @@ -246,7 +248,7 @@ func NewWriteLoki(opMetrics *operational.Metrics, params config.StageParam) (*Lo if sanitized.IsValid() { saneLabels[label] = sanitized } else { - log.WithFields(log.Fields{"key": label, "sanitized": sanitized}). + log.WithFields(logrus.Fields{"key": label, "sanitized": sanitized}). Debug("Invalid label. Ignoring it") } } diff --git a/pkg/pipeline/write/write_loki_test.go b/pkg/pipeline/write/write_loki_test.go index 19dda1754..b1623f750 100644 --- a/pkg/pipeline/write/write_loki_test.go +++ b/pkg/pipeline/write/write_loki_test.go @@ -31,11 +31,10 @@ import ( "github.com/netobserv/flowlogs-pipeline/pkg/operational" "github.com/netobserv/flowlogs-pipeline/pkg/test" "github.com/prometheus/common/model" + "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" - - log "github.com/sirupsen/logrus" ) const timeout = 5 * time.Second @@ -325,7 +324,7 @@ func hundredFlows() []config.GenericMap { } func BenchmarkWriteLoki(b *testing.B) { - log.SetLevel(log.ErrorLevel) + logrus.SetLevel(logrus.ErrorLevel) lokiFlows := make(chan map[string]interface{}, 256) fakeLoki := httptest.NewServer(test.FakeLokiHandler(lokiFlows)) diff --git a/pkg/pipeline/write/write_stdout.go b/pkg/pipeline/write/write_stdout.go index f37086e6e..5c228305a 100644 --- a/pkg/pipeline/write/write_stdout.go +++ b/pkg/pipeline/write/write_stdout.go @@ -26,7 +26,7 @@ import ( "time" "github.com/netobserv/flowlogs-pipeline/pkg/config" - log "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus" ) type writeStdout struct { @@ -35,7 +35,7 @@ type writeStdout struct { // Write writes a flow before being stored func (t *writeStdout) Write(v config.GenericMap) { - log.Tracef("entering writeStdout Write") + logrus.Tracef("entering writeStdout Write") if t.format == "json" { txt, _ := json.Marshal(v) fmt.Println(string(txt)) @@ -58,7 +58,7 @@ func (t *writeStdout) Write(v config.GenericMap) { // NewWriteStdout create a new write func NewWriteStdout(params config.StageParam) (Writer, error) { - log.Debugf("entering NewWriteStdout") + logrus.Debugf("entering NewWriteStdout") writeStdout := &writeStdout{} if params.Write != nil && params.Write.Stdout != nil { writeStdout.format = params.Write.Stdout.Format diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 1f6b7e9a2..ad5c63a82 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -17,6 +17,25 @@ This package provides various compression algorithms. # changelog +* July 13, 2022 (v1.15.8) + + * gzip: fix stack exhaustion bug in Reader.Read https://github.com/klauspost/compress/pull/641 + * s2: Add Index header trim/restore https://github.com/klauspost/compress/pull/638 + * zstd: Optimize seqdeq amd64 asm by @greatroar in https://github.com/klauspost/compress/pull/636 + * zstd: Improve decoder memcopy https://github.com/klauspost/compress/pull/637 + * huff0: Pass a single bitReader pointer to asm by @greatroar in https://github.com/klauspost/compress/pull/634 + * zstd: Branchless getBits for amd64 w/o BMI2 by @greatroar in https://github.com/klauspost/compress/pull/640 + * gzhttp: Remove header before writing https://github.com/klauspost/compress/pull/639 + +* June 29, 2022 (v1.15.7) + + * s2: Fix absolute forward seeks https://github.com/klauspost/compress/pull/633 + * zip: Merge upstream https://github.com/klauspost/compress/pull/631 + * zip: Re-add zip64 fix https://github.com/klauspost/compress/pull/624 + * zstd: translate fseDecoder.buildDtable into asm by @WojciechMula in https://github.com/klauspost/compress/pull/598 + * flate: Faster histograms https://github.com/klauspost/compress/pull/620 + * deflate: Use compound hcode https://github.com/klauspost/compress/pull/622 + * June 3, 2022 (v1.15.6) * s2: Improve coding for long, close matches https://github.com/klauspost/compress/pull/613 * s2c: Add Snappy/S2 stream recompression https://github.com/klauspost/compress/pull/611 diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go new file mode 100644 index 000000000..f8435998e --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -0,0 +1,903 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright (c) 2015 Klaus Post +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "encoding/binary" + "fmt" + "io" + "math" +) + +const ( + NoCompression = 0 + BestSpeed = 1 + BestCompression = 9 + DefaultCompression = -1 + + // HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman + // entropy encoding. This mode is useful in compressing data that has + // already been compressed with an LZ style algorithm (e.g. Snappy or LZ4) + // that lacks an entropy encoder. Compression gains are achieved when + // certain bytes in the input stream occur more frequently than others. + // + // Note that HuffmanOnly produces a compressed output that is + // RFC 1951 compliant. That is, any valid DEFLATE decompressor will + // continue to be able to decompress this output. + HuffmanOnly = -2 + ConstantCompression = HuffmanOnly // compatibility alias. + + logWindowSize = 15 + windowSize = 1 << logWindowSize + windowMask = windowSize - 1 + logMaxOffsetSize = 15 // Standard DEFLATE + minMatchLength = 4 // The smallest match that the compressor looks for + maxMatchLength = 258 // The longest match for the compressor + minOffsetSize = 1 // The shortest offset that makes any sense + + // The maximum number of tokens we will encode at the time. + // Smaller sizes usually creates less optimal blocks. + // Bigger can make context switching slow. + // We use this for levels 7-9, so we make it big. + maxFlateBlockTokens = 1 << 15 + maxStoreBlockSize = 65535 + hashBits = 17 // After 17 performance degrades + hashSize = 1 << hashBits + hashMask = (1 << hashBits) - 1 + hashShift = (hashBits + minMatchLength - 1) / minMatchLength + maxHashOffset = 1 << 28 + + skipNever = math.MaxInt32 + + debugDeflate = false +) + +type compressionLevel struct { + good, lazy, nice, chain, fastSkipHashing, level int +} + +// Compression levels have been rebalanced from zlib deflate defaults +// to give a bigger spread in speed and compression. +// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/ +var levels = []compressionLevel{ + {}, // 0 + // Level 1-6 uses specialized algorithm - values not used + {0, 0, 0, 0, 0, 1}, + {0, 0, 0, 0, 0, 2}, + {0, 0, 0, 0, 0, 3}, + {0, 0, 0, 0, 0, 4}, + {0, 0, 0, 0, 0, 5}, + {0, 0, 0, 0, 0, 6}, + // Levels 7-9 use increasingly more lazy matching + // and increasingly stringent conditions for "good enough". + {8, 12, 16, 24, skipNever, 7}, + {16, 30, 40, 64, skipNever, 8}, + {32, 258, 258, 1024, skipNever, 9}, +} + +// advancedState contains state for the advanced levels, with bigger hash tables, etc. +type advancedState struct { + // deflate state + length int + offset int + maxInsertIndex int + chainHead int + hashOffset int + + ii uint16 // position of last match, intended to overflow to reset. + + // input window: unprocessed data is window[index:windowEnd] + index int + estBitsPerByte int + hashMatch [maxMatchLength + minMatchLength]uint32 + + // Input hash chains + // hashHead[hashValue] contains the largest inputIndex with the specified hash value + // If hashHead[hashValue] is within the current window, then + // hashPrev[hashHead[hashValue] & windowMask] contains the previous index + // with the same hash value. + hashHead [hashSize]uint32 + hashPrev [windowSize]uint32 +} + +type compressor struct { + compressionLevel + + h *huffmanEncoder + w *huffmanBitWriter + + // compression algorithm + fill func(*compressor, []byte) int // copy data to window + step func(*compressor) // process window + + window []byte + windowEnd int + blockStart int // window index where current tokens start + err error + + // queued output tokens + tokens tokens + fast fastEnc + state *advancedState + + sync bool // requesting flush + byteAvailable bool // if true, still need to process window[index-1]. +} + +func (d *compressor) fillDeflate(b []byte) int { + s := d.state + if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { + // shift the window by windowSize + copy(d.window[:], d.window[windowSize:2*windowSize]) + s.index -= windowSize + d.windowEnd -= windowSize + if d.blockStart >= windowSize { + d.blockStart -= windowSize + } else { + d.blockStart = math.MaxInt32 + } + s.hashOffset += windowSize + if s.hashOffset > maxHashOffset { + delta := s.hashOffset - 1 + s.hashOffset -= delta + s.chainHead -= delta + // Iterate over slices instead of arrays to avoid copying + // the entire table onto the stack (Issue #18625). + for i, v := range s.hashPrev[:] { + if int(v) > delta { + s.hashPrev[i] = uint32(int(v) - delta) + } else { + s.hashPrev[i] = 0 + } + } + for i, v := range s.hashHead[:] { + if int(v) > delta { + s.hashHead[i] = uint32(int(v) - delta) + } else { + s.hashHead[i] = 0 + } + } + } + } + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error { + if index > 0 || eof { + var window []byte + if d.blockStart <= index { + window = d.window[d.blockStart:index] + } + d.blockStart = index + //d.w.writeBlock(tok, eof, window) + d.w.writeBlockDynamic(tok, eof, window, d.sync) + return d.w.err + } + return nil +} + +// writeBlockSkip writes the current block and uses the number of tokens +// to determine if the block should be stored on no matches, or +// only huffman encoded. +func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error { + if index > 0 || eof { + if d.blockStart <= index { + window := d.window[d.blockStart:index] + // If we removed less than a 64th of all literals + // we huffman compress the block. + if int(tok.n) > len(window)-int(tok.n>>6) { + d.w.writeBlockHuff(eof, window, d.sync) + } else { + // Write a dynamic huffman block. + d.w.writeBlockDynamic(tok, eof, window, d.sync) + } + } else { + d.w.writeBlock(tok, eof, nil) + } + d.blockStart = index + return d.w.err + } + return nil +} + +// fillWindow will fill the current window with the supplied +// dictionary and calculate all hashes. +// This is much faster than doing a full encode. +// Should only be used after a start/reset. +func (d *compressor) fillWindow(b []byte) { + // Do not fill window if we are in store-only or huffman mode. + if d.level <= 0 { + return + } + if d.fast != nil { + // encode the last data, but discard the result + if len(b) > maxMatchOffset { + b = b[len(b)-maxMatchOffset:] + } + d.fast.Encode(&d.tokens, b) + d.tokens.Reset() + return + } + s := d.state + // If we are given too much, cut it. + if len(b) > windowSize { + b = b[len(b)-windowSize:] + } + // Add all to window. + n := copy(d.window[d.windowEnd:], b) + + // Calculate 256 hashes at the time (more L1 cache hits) + loops := (n + 256 - minMatchLength) / 256 + for j := 0; j < loops; j++ { + startindex := j * 256 + end := startindex + 256 + minMatchLength - 1 + if end > n { + end = n + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + + if dstSize <= 0 { + continue + } + + dst := s.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + s.hashPrev[di&windowMask] = s.hashHead[newH] + // Set the head of the hash chain to us. + s.hashHead[newH] = uint32(di + s.hashOffset) + } + } + // Update window information. + d.windowEnd += n + s.index = n +} + +// Try to find a match starting at index whose length is greater than prevSize. +// We only look at chainCount possibilities before giving up. +// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead +func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) { + minMatchLook := maxMatchLength + if lookahead < minMatchLook { + minMatchLook = lookahead + } + + win := d.window[0 : pos+minMatchLook] + + // We quit when we get a match that's at least nice long + nice := len(win) - pos + if d.nice < nice { + nice = d.nice + } + + // If we've got a match that's good enough, only look in 1/4 the chain. + tries := d.chain + length = minMatchLength - 1 + + wEnd := win[pos+length] + wPos := win[pos:] + minIndex := pos - windowSize + if minIndex < 0 { + minIndex = 0 + } + offset = 0 + + cGain := 0 + if d.chain < 100 { + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLen(win[i:i+minMatchLook], wPos) + if n > length { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i <= minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset + if i < minIndex { + break + } + } + return + } + + // Some like it higher (CSV), some like it lower (JSON) + const baseCost = 6 + // Base is 4 bytes at with an additional cost. + // Matches must be better than this. + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLen(win[i:i+minMatchLook], wPos) + if n > length { + // Calculate gain. Estimate + newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) + + //fmt.Println(n, "gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n])) + if newGain > cGain { + length = n + offset = pos - i + cGain = newGain + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + } + if i <= minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset + if i < minIndex { + break + } + } + return +} + +func (d *compressor) writeStoredBlock(buf []byte) error { + if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { + return d.w.err + } + d.w.writeBytes(buf) + return d.w.err +} + +// hash4 returns a hash representation of the first 4 bytes +// of the supplied slice. +// The caller must ensure that len(b) >= 4. +func hash4(b []byte) uint32 { + return hash4u(binary.LittleEndian.Uint32(b), hashBits) +} + +// bulkHash4 will compute hashes using the same +// algorithm as hash4 +func bulkHash4(b []byte, dst []uint32) { + if len(b) < 4 { + return + } + hb := binary.LittleEndian.Uint32(b) + + dst[0] = hash4u(hb, hashBits) + end := len(b) - 4 + 1 + for i := 1; i < end; i++ { + hb = (hb >> 8) | uint32(b[i+3])<<24 + dst[i] = hash4u(hb, hashBits) + } +} + +func (d *compressor) initDeflate() { + d.window = make([]byte, 2*windowSize) + d.byteAvailable = false + d.err = nil + if d.state == nil { + return + } + s := d.state + s.index = 0 + s.hashOffset = 1 + s.length = minMatchLength - 1 + s.offset = 0 + s.chainHead = -1 +} + +// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever, +// meaning it always has lazy matching on. +func (d *compressor) deflateLazy() { + s := d.state + // Sanity enables additional runtime tests. + // It's intended to be used during development + // to supplement the currently ad-hoc unit tests. + const sanity = debugDeflate + + if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { + return + } + if d.windowEnd != s.index && d.chain > 100 { + // Get literal huffman coder. + if d.h == nil { + d.h = newHuffmanEncoder(maxFlateBlockTokens) + } + var tmp [256]uint16 + for _, v := range d.window[s.index:d.windowEnd] { + tmp[v]++ + } + d.h.generate(tmp[:], 15) + } + + s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + + for { + if sanity && s.index > d.windowEnd { + panic("index > windowEnd") + } + lookahead := d.windowEnd - s.index + if lookahead < minMatchLength+maxMatchLength { + if !d.sync { + return + } + if sanity && s.index > d.windowEnd { + panic("index > windowEnd") + } + if lookahead == 0 { + // Flush current output block if any. + if d.byteAvailable { + // There is still one pending token that needs to be flushed + d.tokens.AddLiteral(d.window[s.index-1]) + d.byteAvailable = false + } + if d.tokens.n > 0 { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + return + } + } + if s.index < s.maxInsertIndex { + // Update the hash + hash := hash4(d.window[s.index:]) + ch := s.hashHead[hash] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[hash] = uint32(s.index + s.hashOffset) + } + prevLength := s.length + prevOffset := s.offset + s.length = minMatchLength - 1 + s.offset = 0 + minIndex := s.index - windowSize + if minIndex < 0 { + minIndex = 0 + } + + if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { + if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok { + s.length = newLength + s.offset = newOffset + } + } + + if prevLength >= minMatchLength && s.length <= prevLength { + // Check for better match at end... + // + // checkOff must be >=2 since we otherwise risk checking s.index + // Offset of 2 seems to yield best results. + const checkOff = 2 + prevIndex := s.index - 1 + if prevIndex+prevLength+checkOff < s.maxInsertIndex { + end := lookahead + if lookahead > maxMatchLength { + end = maxMatchLength + } + end += prevIndex + idx := prevIndex + prevLength - (4 - checkOff) + h := hash4(d.window[idx:]) + ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + (4 - checkOff) + if ch2 > minIndex { + length := matchLen(d.window[prevIndex:end], d.window[ch2:]) + // It seems like a pure length metric is best. + if length > prevLength { + prevLength = length + prevOffset = prevIndex - ch2 + } + } + } + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) + + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + newIndex := s.index + prevLength - 1 + // Calculate missing hashes + end := newIndex + if end > s.maxInsertIndex { + end = s.maxInsertIndex + } + end += minMatchLength - 1 + startindex := s.index + 1 + if startindex > s.maxInsertIndex { + startindex = s.maxInsertIndex + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + if dstSize > 0 { + dst := s.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + s.hashPrev[di&windowMask] = s.hashHead[newH] + // Set the head of the hash chain to us. + s.hashHead[newH] = uint32(di + s.hashOffset) + } + } + + s.index = newIndex + d.byteAvailable = false + s.length = minMatchLength - 1 + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.ii = 0 + } else { + // Reset, if we got a match this run. + if s.length >= minMatchLength { + s.ii = 0 + } + // We have a byte waiting. Emit it. + if d.byteAvailable { + s.ii++ + d.tokens.AddLiteral(d.window[s.index-1]) + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + + // If we have a long run of no matches, skip additional bytes + // Resets when s.ii overflows after 64KB. + if n := int(s.ii) - d.chain; n > 0 { + n = 1 + int(n>>6) + for j := 0; j < n; j++ { + if s.index >= d.windowEnd-1 { + break + } + d.tokens.AddLiteral(d.window[s.index-1]) + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + // Index... + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } + s.index++ + } + // Flush last byte + d.tokens.AddLiteral(d.window[s.index-1]) + d.byteAvailable = false + // s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + } + } else { + s.index++ + d.byteAvailable = true + } + } + } +} + +func (d *compressor) store() { + if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + d.windowEnd = 0 + } +} + +// fillWindow will fill the buffer with data for huffman-only compression. +// The number of bytes copied is returned. +func (d *compressor) fillBlock(b []byte) int { + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +// storeHuff will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeHuff() { + if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 { + return + } + d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + d.windowEnd = 0 +} + +// storeFast will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeFast() { + // We only compress if we have maxStoreBlockSize. + if d.windowEnd < len(d.window) { + if !d.sync { + return + } + // Handle extremely small sizes. + if d.windowEnd < 128 { + if d.windowEnd == 0 { + return + } + if d.windowEnd <= 32 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + } else { + d.w.writeBlockHuff(false, d.window[:d.windowEnd], true) + d.err = d.w.err + } + d.tokens.Reset() + d.windowEnd = 0 + d.fast.Reset() + return + } + } + + d.fast.Encode(&d.tokens, d.window[:d.windowEnd]) + // If we made zero matches, store the block as is. + if d.tokens.n == 0 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + // If we removed less than 1/16th, huffman compress the block. + } else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) { + d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + } else { + d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + } + d.tokens.Reset() + d.windowEnd = 0 +} + +// write will add input byte to the stream. +// Unless an error occurs all bytes will be consumed. +func (d *compressor) write(b []byte) (n int, err error) { + if d.err != nil { + return 0, d.err + } + n = len(b) + for len(b) > 0 { + if d.windowEnd == len(d.window) || d.sync { + d.step(d) + } + b = b[d.fill(d, b):] + if d.err != nil { + return 0, d.err + } + } + return n, d.err +} + +func (d *compressor) syncFlush() error { + d.sync = true + if d.err != nil { + return d.err + } + d.step(d) + if d.err == nil { + d.w.writeStoredHeader(0, false) + d.w.flush() + d.err = d.w.err + } + d.sync = false + return d.err +} + +func (d *compressor) init(w io.Writer, level int) (err error) { + d.w = newHuffmanBitWriter(w) + + switch { + case level == NoCompression: + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).store + case level == ConstantCompression: + d.w.logNewTablePenalty = 10 + d.window = make([]byte, 32<<10) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeHuff + case level == DefaultCompression: + level = 5 + fallthrough + case level >= 1 && level <= 6: + d.w.logNewTablePenalty = 7 + d.fast = newFastEnc(level) + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeFast + case 7 <= level && level <= 9: + d.w.logNewTablePenalty = 8 + d.state = &advancedState{} + d.compressionLevel = levels[level] + d.initDeflate() + d.fill = (*compressor).fillDeflate + d.step = (*compressor).deflateLazy + default: + return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) + } + d.level = level + return nil +} + +// reset the state of the compressor. +func (d *compressor) reset(w io.Writer) { + d.w.reset(w) + d.sync = false + d.err = nil + // We only need to reset a few things for Snappy. + if d.fast != nil { + d.fast.Reset() + d.windowEnd = 0 + d.tokens.Reset() + return + } + switch d.compressionLevel.chain { + case 0: + // level was NoCompression or ConstantCompresssion. + d.windowEnd = 0 + default: + s := d.state + s.chainHead = -1 + for i := range s.hashHead { + s.hashHead[i] = 0 + } + for i := range s.hashPrev { + s.hashPrev[i] = 0 + } + s.hashOffset = 1 + s.index, d.windowEnd = 0, 0 + d.blockStart, d.byteAvailable = 0, false + d.tokens.Reset() + s.length = minMatchLength - 1 + s.offset = 0 + s.ii = 0 + s.maxInsertIndex = 0 + } +} + +func (d *compressor) close() error { + if d.err != nil { + return d.err + } + d.sync = true + d.step(d) + if d.err != nil { + return d.err + } + if d.w.writeStoredHeader(0, true); d.w.err != nil { + return d.w.err + } + d.w.flush() + d.w.reset(nil) + return d.w.err +} + +// NewWriter returns a new Writer compressing data at the given level. +// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); +// higher levels typically run slower but compress more. +// Level 0 (NoCompression) does not attempt any compression; it only adds the +// necessary DEFLATE framing. +// Level -1 (DefaultCompression) uses the default compression level. +// Level -2 (ConstantCompression) will use Huffman compression only, giving +// a very fast compression for all types of input, but sacrificing considerable +// compression efficiency. +// +// If level is in the range [-2, 9] then the error returned will be nil. +// Otherwise the error returned will be non-nil. +func NewWriter(w io.Writer, level int) (*Writer, error) { + var dw Writer + if err := dw.d.init(w, level); err != nil { + return nil, err + } + return &dw, nil +} + +// NewWriterDict is like NewWriter but initializes the new +// Writer with a preset dictionary. The returned Writer behaves +// as if the dictionary had been written to it without producing +// any compressed output. The compressed data written to w +// can only be decompressed by a Reader initialized with the +// same dictionary. +func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { + zw, err := NewWriter(w, level) + if err != nil { + return nil, err + } + zw.d.fillWindow(dict) + zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. + return zw, err +} + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + d compressor + dict []byte +} + +// Write writes data to w, which will eventually write the +// compressed form of data to its underlying writer. +func (w *Writer) Write(data []byte) (n int, err error) { + return w.d.write(data) +} + +// Flush flushes any pending data to the underlying writer. +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. +// Flush does not return until the data has been written. +// Calling Flush when there is no pending data still causes the Writer +// to emit a sync marker of at least 4 bytes. +// If the underlying writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (w *Writer) Flush() error { + // For more about flushing: + // http://www.bolet.org/~pornin/deflate-flush.html + return w.d.syncFlush() +} + +// Close flushes and closes the writer. +func (w *Writer) Close() error { + return w.d.close() +} + +// Reset discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level and dictionary. +func (w *Writer) Reset(dst io.Writer) { + if len(w.dict) > 0 { + // w was created with NewWriterDict + w.d.reset(dst) + if dst != nil { + w.d.fillWindow(w.dict) + } + } else { + // w was created with NewWriter + w.d.reset(dst) + } +} + +// ResetDict discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level, but sets a specific dictionary. +func (w *Writer) ResetDict(dst io.Writer, dict []byte) { + w.dict = dict + w.d.reset(dst) + w.d.fillWindow(w.dict) +} diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go new file mode 100644 index 000000000..71c75a065 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/dict_decoder.go @@ -0,0 +1,184 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// dictDecoder implements the LZ77 sliding dictionary as used in decompression. +// LZ77 decompresses data through sequences of two forms of commands: +// +// * Literal insertions: Runs of one or more symbols are inserted into the data +// stream as is. This is accomplished through the writeByte method for a +// single symbol, or combinations of writeSlice/writeMark for multiple symbols. +// Any valid stream must start with a literal insertion if no preset dictionary +// is used. +// +// * Backward copies: Runs of one or more symbols are copied from previously +// emitted data. Backward copies come as the tuple (dist, length) where dist +// determines how far back in the stream to copy from and length determines how +// many bytes to copy. Note that it is valid for the length to be greater than +// the distance. Since LZ77 uses forward copies, that situation is used to +// perform a form of run-length encoding on repeated runs of symbols. +// The writeCopy and tryWriteCopy are used to implement this command. +// +// For performance reasons, this implementation performs little to no sanity +// checks about the arguments. As such, the invariants documented for each +// method call must be respected. +type dictDecoder struct { + hist []byte // Sliding window history + + // Invariant: 0 <= rdPos <= wrPos <= len(hist) + wrPos int // Current output position in buffer + rdPos int // Have emitted hist[:rdPos] already + full bool // Has a full window length been written yet? +} + +// init initializes dictDecoder to have a sliding window dictionary of the given +// size. If a preset dict is provided, it will initialize the dictionary with +// the contents of dict. +func (dd *dictDecoder) init(size int, dict []byte) { + *dd = dictDecoder{hist: dd.hist} + + if cap(dd.hist) < size { + dd.hist = make([]byte, size) + } + dd.hist = dd.hist[:size] + + if len(dict) > len(dd.hist) { + dict = dict[len(dict)-len(dd.hist):] + } + dd.wrPos = copy(dd.hist, dict) + if dd.wrPos == len(dd.hist) { + dd.wrPos = 0 + dd.full = true + } + dd.rdPos = dd.wrPos +} + +// histSize reports the total amount of historical data in the dictionary. +func (dd *dictDecoder) histSize() int { + if dd.full { + return len(dd.hist) + } + return dd.wrPos +} + +// availRead reports the number of bytes that can be flushed by readFlush. +func (dd *dictDecoder) availRead() int { + return dd.wrPos - dd.rdPos +} + +// availWrite reports the available amount of output buffer space. +func (dd *dictDecoder) availWrite() int { + return len(dd.hist) - dd.wrPos +} + +// writeSlice returns a slice of the available buffer to write data to. +// +// This invariant will be kept: len(s) <= availWrite() +func (dd *dictDecoder) writeSlice() []byte { + return dd.hist[dd.wrPos:] +} + +// writeMark advances the writer pointer by cnt. +// +// This invariant must be kept: 0 <= cnt <= availWrite() +func (dd *dictDecoder) writeMark(cnt int) { + dd.wrPos += cnt +} + +// writeByte writes a single byte to the dictionary. +// +// This invariant must be kept: 0 < availWrite() +func (dd *dictDecoder) writeByte(c byte) { + dd.hist[dd.wrPos] = c + dd.wrPos++ +} + +// writeCopy copies a string at a given (dist, length) to the output. +// This returns the number of bytes copied and may be less than the requested +// length if the available space in the output buffer is too small. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) writeCopy(dist, length int) int { + dstBase := dd.wrPos + dstPos := dstBase + srcPos := dstPos - dist + endPos := dstPos + length + if endPos > len(dd.hist) { + endPos = len(dd.hist) + } + + // Copy non-overlapping section after destination position. + // + // This section is non-overlapping in that the copy length for this section + // is always less than or equal to the backwards distance. This can occur + // if a distance refers to data that wraps-around in the buffer. + // Thus, a backwards copy is performed here; that is, the exact bytes in + // the source prior to the copy is placed in the destination. + if srcPos < 0 { + srcPos += len(dd.hist) + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) + srcPos = 0 + } + + // Copy possibly overlapping section before destination position. + // + // This section can overlap if the copy length for this section is larger + // than the backwards distance. This is allowed by LZ77 so that repeated + // strings can be succinctly represented using (dist, length) pairs. + // Thus, a forwards copy is performed here; that is, the bytes copied is + // possibly dependent on the resulting bytes in the destination as the copy + // progresses along. This is functionally equivalent to the following: + // + // for i := 0; i < endPos-dstPos; i++ { + // dd.hist[dstPos+i] = dd.hist[srcPos+i] + // } + // dstPos = endPos + // + for dstPos < endPos { + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// tryWriteCopy tries to copy a string at a given (distance, length) to the +// output. This specialized version is optimized for short distances. +// +// This method is designed to be inlined for performance reasons. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) tryWriteCopy(dist, length int) int { + dstPos := dd.wrPos + endPos := dstPos + length + if dstPos < dist || endPos > len(dd.hist) { + return 0 + } + dstBase := dstPos + srcPos := dstPos - dist + + // Copy possibly overlapping section before destination position. +loop: + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + if dstPos < endPos { + goto loop // Avoid for-loop so that this function can be inlined + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// readFlush returns a slice of the historical buffer that is ready to be +// emitted to the user. The data returned by readFlush must be fully consumed +// before calling any other dictDecoder methods. +func (dd *dictDecoder) readFlush() []byte { + toRead := dd.hist[dd.rdPos:dd.wrPos] + dd.rdPos = dd.wrPos + if dd.wrPos == len(dd.hist) { + dd.wrPos, dd.rdPos = 0, 0 + dd.full = true + } + return toRead +} diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go new file mode 100644 index 000000000..f781aaa62 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -0,0 +1,233 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Modified for deflate by Klaus Post (c) 2015. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "encoding/binary" + "fmt" + "math/bits" +) + +type fastEnc interface { + Encode(dst *tokens, src []byte) + Reset() +} + +func newFastEnc(level int) fastEnc { + switch level { + case 1: + return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}} + case 2: + return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}} + case 3: + return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}} + case 4: + return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}} + case 5: + return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}} + case 6: + return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}} + default: + panic("invalid level specified") + } +} + +const ( + tableBits = 15 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. + baseMatchOffset = 1 // The smallest match offset + baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 + maxMatchOffset = 1 << 15 // The largest match offset + + bTableBits = 17 // Bits used in the big tables + bTableSize = 1 << bTableBits // Size of the table + allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history. + bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. +) + +const ( + prime3bytes = 506832829 + prime4bytes = 2654435761 + prime5bytes = 889523592379 + prime6bytes = 227718039650203 + prime7bytes = 58295818150454627 + prime8bytes = 0xcf1bbcdcb7a56463 +) + +func load32(b []byte, i int) uint32 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:4] + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load64(b []byte, i int) uint64 { + return binary.LittleEndian.Uint64(b[i:]) +} + +func load3232(b []byte, i int32) uint32 { + return binary.LittleEndian.Uint32(b[i:]) +} + +func load6432(b []byte, i int32) uint64 { + return binary.LittleEndian.Uint64(b[i:]) +} + +func hash(u uint32) uint32 { + return (u * 0x1e35a7bd) >> tableShift +} + +type tableEntry struct { + offset int32 +} + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastGen struct { + hist []byte + cur int32 +} + +func (e *fastGen) addBlock(src []byte) int32 { + // check if we have space already + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + e.hist = make([]byte, 0, allocHistory) + } else { + if cap(e.hist) < maxMatchOffset*2 { + panic("unexpected buffer size") + } + // Move down + offset := int32(len(e.hist)) - maxMatchOffset + copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:maxMatchOffset] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +// hash4 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4u(u uint32, h uint8) uint32 { + return (u * prime4bytes) >> (32 - h) +} + +type tableEntryPrev struct { + Cur tableEntry + Prev tableEntry +} + +// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4x64(u uint64, h uint8) uint32 { + return (uint32(u) * prime4bytes) >> ((32 - h) & reg8SizeMask32) +} + +// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash7(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64)) +} + +// hash8 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash8(u uint64, h uint8) uint32 { + return uint32((u * prime8bytes) >> ((64 - h) & reg8SizeMask64)) +} + +// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash6(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & reg8SizeMask64)) +} + +// matchlen will return the match length between offsets and t in src. +// The maximum length returned is maxMatchLength - 4. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastGen) matchlen(s, t int32, src []byte) int32 { + if debugDecode { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > maxMatchOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + s1 := int(s) + maxMatchLength - 4 + if s1 > len(src) { + s1 = len(src) + } + + // Extend the match to be as long as possible. + return int32(matchLen(src[s:s1], src[t:])) +} + +// matchlenLong will return the match length between offsets and t in src. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 { + if debugDeflate { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > maxMatchOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} + +// Reset the encoding table. +func (e *fastGen) Reset() { + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= bufferReset { + e.cur += maxMatchOffset + int32(len(e.hist)) + } + e.hist = e.hist[:0] +} + +// matchLen returns the maximum length. +// 'a' must be the shortest of the two. +func matchLen(a, b []byte) int { + var checked int + + for len(a) >= 8 { + if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { + return checked + (bits.TrailingZeros64(diff) >> 3) + } + checked += 8 + a = a[8:] + b = b[8:] + } + b = b[:len(a)] + for i := range a { + if a[i] != b[i] { + return i + checked + } + } + return len(a) + checked +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go new file mode 100644 index 000000000..40ef45c2f --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -0,0 +1,1185 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "encoding/binary" + "fmt" + "io" + "math" +) + +const ( + // The largest offset code. + offsetCodeCount = 30 + + // The special code used to mark the end of a block. + endBlockMarker = 256 + + // The first length code. + lengthCodesStart = 257 + + // The number of codegen codes. + codegenCodeCount = 19 + badCode = 255 + + // maxPredefinedTokens is the maximum number of tokens + // where we check if fixed size is smaller. + maxPredefinedTokens = 250 + + // bufferFlushSize indicates the buffer size + // after which bytes are flushed to the writer. + // Should preferably be a multiple of 6, since + // we accumulate 6 bytes between writes to the buffer. + bufferFlushSize = 246 + + // bufferSize is the actual output byte buffer size. + // It must have additional headroom for a flush + // which can contain up to 8 bytes. + bufferSize = bufferFlushSize + 8 +) + +// Minimum length code that emits bits. +const lengthExtraBitsMinCode = 8 + +// The number of extra bits needed by length code X - LENGTH_CODES_START. +var lengthExtraBits = [32]uint8{ + /* 257 */ 0, 0, 0, + /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, + /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, + /* 280 */ 4, 5, 5, 5, 5, 0, +} + +// The length indicated by length code X - LENGTH_CODES_START. +var lengthBase = [32]uint8{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, + 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, + 64, 80, 96, 112, 128, 160, 192, 224, 255, +} + +// Minimum offset code that emits bits. +const offsetExtraBitsMinCode = 4 + +// offset code word extra bits. +var offsetExtraBits = [32]int8{ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + /* extended window */ + 14, 14, +} + +var offsetCombined = [32]uint32{} + +func init() { + var offsetBase = [32]uint32{ + /* normal deflate */ + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + + /* extended window */ + 0x008000, 0x00c000, + } + + for i := range offsetCombined[:] { + // Don't use extended window values... + if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 { + continue + } + offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8) + } +} + +// The odd order in which the codegen code sizes are written. +var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +type huffmanBitWriter struct { + // writer is the underlying writer. + // Do not use it directly; use the write method, which ensures + // that Write errors are sticky. + writer io.Writer + + // Data waiting to be written is bytes[0:nbytes] + // and then the low nbits of bits. + bits uint64 + nbits uint8 + nbytes uint8 + lastHuffMan bool + literalEncoding *huffmanEncoder + tmpLitEncoding *huffmanEncoder + offsetEncoding *huffmanEncoder + codegenEncoding *huffmanEncoder + err error + lastHeader int + // Set between 0 (reused block can be up to 2x the size) + logNewTablePenalty uint + bytes [256 + 8]byte + literalFreq [lengthCodesStart + 32]uint16 + offsetFreq [32]uint16 + codegenFreq [codegenCodeCount]uint16 + + // codegen must have an extra space for the final symbol. + codegen [literalCount + offsetCodeCount + 1]uint8 +} + +// Huffman reuse. +// +// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections. +// +// This is controlled by several variables: +// +// If lastHeader is non-zero the Huffman table can be reused. +// This also indicates that a Huffman table has been generated that can output all +// possible symbols. +// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated +// an EOB with the previous table must be written. +// +// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid. +// +// An incoming block estimates the output size of a new table using a 'fresh' by calculating the +// optimal size and adding a penalty in 'logNewTablePenalty'. +// A Huffman table is not optimal, which is why we add a penalty, and generating a new table +// is slower both for compression and decompression. + +func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { + return &huffmanBitWriter{ + writer: w, + literalEncoding: newHuffmanEncoder(literalCount), + tmpLitEncoding: newHuffmanEncoder(literalCount), + codegenEncoding: newHuffmanEncoder(codegenCodeCount), + offsetEncoding: newHuffmanEncoder(offsetCodeCount), + } +} + +func (w *huffmanBitWriter) reset(writer io.Writer) { + w.writer = writer + w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil + w.lastHeader = 0 + w.lastHuffMan = false +} + +func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) { + a := t.offHist[:offsetCodeCount] + b := w.offsetEncoding.codes + b = b[:len(a)] + for i, v := range a { + if v != 0 && b[i].zero() { + return false + } + } + + a = t.extraHist[:literalCount-256] + b = w.literalEncoding.codes[256:literalCount] + b = b[:len(a)] + for i, v := range a { + if v != 0 && b[i].zero() { + return false + } + } + + a = t.litHist[:256] + b = w.literalEncoding.codes[:len(a)] + for i, v := range a { + if v != 0 && b[i].zero() { + return false + } + } + return true +} + +func (w *huffmanBitWriter) flush() { + if w.err != nil { + w.nbits = 0 + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + n := w.nbytes + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + if w.nbits > 8 { // Avoid underflow + w.nbits -= 8 + } else { + w.nbits = 0 + } + n++ + } + w.bits = 0 + w.write(w.bytes[:n]) + w.nbytes = 0 +} + +func (w *huffmanBitWriter) write(b []byte) { + if w.err != nil { + return + } + _, w.err = w.writer.Write(b) +} + +func (w *huffmanBitWriter) writeBits(b int32, nb uint8) { + w.bits |= uint64(b) << (w.nbits & 63) + w.nbits += nb + if w.nbits >= 48 { + w.writeOutBits() + } +} + +func (w *huffmanBitWriter) writeBytes(bytes []byte) { + if w.err != nil { + return + } + n := w.nbytes + if w.nbits&7 != 0 { + w.err = InternalError("writeBytes with unfinished bits") + return + } + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + w.nbits -= 8 + n++ + } + if n != 0 { + w.write(w.bytes[:n]) + } + w.nbytes = 0 + w.write(bytes) +} + +// RFC 1951 3.2.7 specifies a special run-length encoding for specifying +// the literal and offset lengths arrays (which are concatenated into a single +// array). This method generates that run-length encoding. +// +// The result is written into the codegen array, and the frequencies +// of each code is written into the codegenFreq array. +// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional +// information. Code badCode is an end marker +// +// numLiterals The number of literals in literalEncoding +// numOffsets The number of offsets in offsetEncoding +// litenc, offenc The literal and offset encoder to use +func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { + for i := range w.codegenFreq { + w.codegenFreq[i] = 0 + } + // Note that we are using codegen both as a temporary variable for holding + // a copy of the frequencies, and as the place where we put the result. + // This is fine because the output is always shorter than the input used + // so far. + codegen := w.codegen[:] // cache + // Copy the concatenated code sizes to codegen. Put a marker at the end. + cgnl := codegen[:numLiterals] + for i := range cgnl { + cgnl[i] = litEnc.codes[i].len() + } + + cgnl = codegen[numLiterals : numLiterals+numOffsets] + for i := range cgnl { + cgnl[i] = offEnc.codes[i].len() + } + codegen[numLiterals+numOffsets] = badCode + + size := codegen[0] + count := 1 + outIndex := 0 + for inIndex := 1; size != badCode; inIndex++ { + // INVARIANT: We have seen "count" copies of size that have not yet + // had output generated for them. + nextSize := codegen[inIndex] + if nextSize == size { + count++ + continue + } + // We need to generate codegen indicating "count" of size. + if size != 0 { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + count-- + for count >= 3 { + n := 6 + if n > count { + n = count + } + codegen[outIndex] = 16 + outIndex++ + codegen[outIndex] = uint8(n - 3) + outIndex++ + w.codegenFreq[16]++ + count -= n + } + } else { + for count >= 11 { + n := 138 + if n > count { + n = count + } + codegen[outIndex] = 18 + outIndex++ + codegen[outIndex] = uint8(n - 11) + outIndex++ + w.codegenFreq[18]++ + count -= n + } + if count >= 3 { + // count >= 3 && count <= 10 + codegen[outIndex] = 17 + outIndex++ + codegen[outIndex] = uint8(count - 3) + outIndex++ + w.codegenFreq[17]++ + count = 0 + } + } + count-- + for ; count >= 0; count-- { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + } + // Set up invariant for next time through the loop. + size = nextSize + count = 1 + } + // Marker indicating the end of the codegen. + codegen[outIndex] = badCode +} + +func (w *huffmanBitWriter) codegens() int { + numCodegens := len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + return numCodegens +} + +func (w *huffmanBitWriter) headerSize() (size, numCodegens int) { + numCodegens = len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + return 3 + 5 + 5 + 4 + (3 * numCodegens) + + w.codegenEncoding.bitLength(w.codegenFreq[:]) + + int(w.codegenFreq[16])*2 + + int(w.codegenFreq[17])*3 + + int(w.codegenFreq[18])*7, numCodegens +} + +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) { + size = litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + return size +} + +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { + header, numCodegens := w.headerSize() + size = header + + litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + + extraBits + return size, numCodegens +} + +// extraBitSize will return the number of bits that will be written +// as "extra" bits on matches. +func (w *huffmanBitWriter) extraBitSize() int { + total := 0 + for i, n := range w.literalFreq[257:literalCount] { + total += int(n) * int(lengthExtraBits[i&31]) + } + for i, n := range w.offsetFreq[:offsetCodeCount] { + total += int(n) * int(offsetExtraBits[i&31]) + } + return total +} + +// fixedSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) fixedSize(extraBits int) int { + return 3 + + fixedLiteralEncoding.bitLength(w.literalFreq[:]) + + fixedOffsetEncoding.bitLength(w.offsetFreq[:]) + + extraBits +} + +// storedSize calculates the stored size, including header. +// The function returns the size in bits and whether the block +// fits inside a single block. +func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { + if in == nil { + return 0, false + } + if len(in) <= maxStoreBlockSize { + return (len(in) + 5) * 8, true + } + return 0, false +} + +func (w *huffmanBitWriter) writeCode(c hcode) { + // The function does not get inlined if we "& 63" the shift. + w.bits |= c.code64() << (w.nbits & 63) + w.nbits += c.len() + if w.nbits >= 48 { + w.writeOutBits() + } +} + +// writeOutBits will write bits to the buffer. +func (w *huffmanBitWriter) writeOutBits() { + bits := w.bits + w.bits >>= 48 + w.nbits -= 48 + n := w.nbytes + + // We over-write, but faster... + binary.LittleEndian.PutUint64(w.bytes[n:], bits) + n += 6 + + if n >= bufferFlushSize { + if w.err != nil { + n = 0 + return + } + w.write(w.bytes[:n]) + n = 0 + } + + w.nbytes = n +} + +// Write the header of a dynamic Huffman block to the output stream. +// +// numLiterals The number of literals specified in codegen +// numOffsets The number of offsets specified in codegen +// numCodegens The number of codegens used in codegen +func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { + if w.err != nil { + return + } + var firstBits int32 = 4 + if isEof { + firstBits = 5 + } + w.writeBits(firstBits, 3) + w.writeBits(int32(numLiterals-257), 5) + w.writeBits(int32(numOffsets-1), 5) + w.writeBits(int32(numCodegens-4), 4) + + for i := 0; i < numCodegens; i++ { + value := uint(w.codegenEncoding.codes[codegenOrder[i]].len()) + w.writeBits(int32(value), 3) + } + + i := 0 + for { + var codeWord = uint32(w.codegen[i]) + i++ + if codeWord == badCode { + break + } + w.writeCode(w.codegenEncoding.codes[codeWord]) + + switch codeWord { + case 16: + w.writeBits(int32(w.codegen[i]), 2) + i++ + case 17: + w.writeBits(int32(w.codegen[i]), 3) + i++ + case 18: + w.writeBits(int32(w.codegen[i]), 7) + i++ + } + } +} + +// writeStoredHeader will write a stored header. +// If the stored block is only used for EOF, +// it is replaced with a fixed huffman block. +func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { + if w.err != nil { + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + // To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes. + if length == 0 && isEof { + w.writeFixedHeader(isEof) + // EOB: 7 bits, value: 0 + w.writeBits(0, 7) + w.flush() + return + } + + var flag int32 + if isEof { + flag = 1 + } + w.writeBits(flag, 3) + w.flush() + w.writeBits(int32(length), 16) + w.writeBits(int32(^uint16(length)), 16) +} + +func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { + if w.err != nil { + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + // Indicate that we are a fixed Huffman block + var value int32 = 2 + if isEof { + value = 3 + } + w.writeBits(value, 3) +} + +// writeBlock will write a block of tokens with the smallest encoding. +// The original input can be supplied, and if the huffman encoded data +// is larger than the original bytes, the data will be written as a +// stored block. +// If the input is nil, the tokens will always be Huffman encoded. +func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { + if w.err != nil { + return + } + + tokens.AddEOB() + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + numLiterals, numOffsets := w.indexTokens(tokens, false) + w.generate() + var extraBits int + storedSize, storable := w.storedSize(input) + if storable { + extraBits = w.extraBitSize() + } + + // Figure out smallest code. + // Fixed Huffman baseline. + var literalEncoding = fixedLiteralEncoding + var offsetEncoding = fixedOffsetEncoding + var size = math.MaxInt32 + if tokens.n < maxPredefinedTokens { + size = w.fixedSize(extraBits) + } + + // Dynamic Huffman? + var numCodegens int + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) + + if dynamicSize < size { + size = dynamicSize + literalEncoding = w.literalEncoding + offsetEncoding = w.offsetEncoding + } + + // Stored bytes? + if storable && storedSize <= size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + // Huffman. + if literalEncoding == fixedLiteralEncoding { + w.writeFixedHeader(eof) + } else { + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + } + + // Write the tokens. + w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes) +} + +// writeBlockDynamic encodes a block using a dynamic Huffman table. +// This should be used if the symbols used have a disproportionate +// histogram distribution. +// If input is supplied and the compression savings are below 1/16th of the +// input size the block is stored. +func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) { + if w.err != nil { + return + } + + sync = sync || eof + if sync { + tokens.AddEOB() + } + + // We cannot reuse pure huffman table, and must mark as EOF. + if (w.lastHuffMan || eof) && w.lastHeader > 0 { + // We will not try to reuse. + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + w.lastHuffMan = false + } + + // fillReuse enables filling of empty values. + // This will make encodings always reusable without testing. + // However, this does not appear to benefit on most cases. + const fillReuse = false + + // Check if we can reuse... + if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) { + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + numLiterals, numOffsets := w.indexTokens(tokens, !sync) + extraBits := 0 + ssize, storable := w.storedSize(input) + + const usePrefs = true + if storable || w.lastHeader > 0 { + extraBits = w.extraBitSize() + } + + var size int + + // Check if we should reuse. + if w.lastHeader > 0 { + // Estimate size for using a new table. + // Use the previous header size as the best estimate. + newSize := w.lastHeader + tokens.EstimatedBits() + newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty + + // The estimated size is calculated as an optimal table. + // We add a penalty to make it more realistic and re-use a bit more. + reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits + + // Check if a new table is better. + if newSize < reuseSize { + // Write the EOB we owe. + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + size = newSize + w.lastHeader = 0 + } else { + size = reuseSize + } + + if tokens.n < maxPredefinedTokens { + if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size { + // Check if we get a reasonable size decrease. + if storable && ssize <= size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + w.writeFixedHeader(eof) + if !sync { + tokens.AddEOB() + } + w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) + return + } + } + // Check if we get a reasonable size decrease. + if storable && ssize <= size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + } + + // We want a new block/table + if w.lastHeader == 0 { + if fillReuse && !sync { + w.fillTokens() + numLiterals, numOffsets = maxNumLit, maxNumDist + } else { + w.literalFreq[endBlockMarker] = 1 + } + + w.generate() + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + + var numCodegens int + if fillReuse && !sync { + // Reindex for accurate size... + w.indexTokens(tokens, true) + } + size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) + + // Store predefined, if we don't get a reasonable improvement. + if tokens.n < maxPredefinedTokens { + if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size { + // Store bytes, if we don't get an improvement. + if storable && ssize <= preSize { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + w.writeFixedHeader(eof) + if !sync { + tokens.AddEOB() + } + w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) + return + } + } + + if storable && ssize <= size { + // Store bytes, if we don't get an improvement. + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + // Write Huffman table. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + if !sync { + w.lastHeader, _ = w.headerSize() + } + w.lastHuffMan = false + } + + if sync { + w.lastHeader = 0 + } + // Write the tokens. + w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes) +} + +func (w *huffmanBitWriter) fillTokens() { + for i, v := range w.literalFreq[:literalCount] { + if v == 0 { + w.literalFreq[i] = 1 + } + } + for i, v := range w.offsetFreq[:offsetCodeCount] { + if v == 0 { + w.offsetFreq[i] = 1 + } + } +} + +// indexTokens indexes a slice of tokens, and updates +// literalFreq and offsetFreq, and generates literalEncoding +// and offsetEncoding. +// The number of literal and offset tokens is returned. +func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) { + copy(w.literalFreq[:], t.litHist[:]) + copy(w.literalFreq[256:], t.extraHist[:]) + copy(w.offsetFreq[:], t.offHist[:offsetCodeCount]) + + if t.n == 0 { + return + } + if filled { + return maxNumLit, maxNumDist + } + // get the number of literals + numLiterals = len(w.literalFreq) + for w.literalFreq[numLiterals-1] == 0 { + numLiterals-- + } + // get the number of offsets + numOffsets = len(w.offsetFreq) + for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { + numOffsets-- + } + if numOffsets == 0 { + // We haven't found a single match. If we want to go with the dynamic encoding, + // we should count at least one offset to be sure that the offset huffman tree could be encoded. + w.offsetFreq[0] = 1 + numOffsets = 1 + } + return +} + +func (w *huffmanBitWriter) generate() { + w.literalEncoding.generate(w.literalFreq[:literalCount], 15) + w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15) +} + +// writeTokens writes a slice of tokens to the output. +// codes for literal and offset encoding must be supplied. +func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) { + if w.err != nil { + return + } + if len(tokens) == 0 { + return + } + + // Only last token should be endBlockMarker. + var deferEOB bool + if tokens[len(tokens)-1] == endBlockMarker { + tokens = tokens[:len(tokens)-1] + deferEOB = true + } + + // Create slices up to the next power of two to avoid bounds checks. + lits := leCodes[:256] + offs := oeCodes[:32] + lengths := leCodes[lengthCodesStart:] + lengths = lengths[:32] + + // Go 1.16 LOVES having these on stack. + bits, nbits, nbytes := w.bits, w.nbits, w.nbytes + + for _, t := range tokens { + if t < 256 { + //w.writeCode(lits[t.literal()]) + c := lits[t] + bits |= c.code64() << (nbits & 63) + nbits += c.len() + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + continue + } + + // Write the length + length := t.length() + lengthCode := lengthCode(length) & 31 + if false { + w.writeCode(lengths[lengthCode]) + } else { + // inlined + c := lengths[lengthCode] + bits |= c.code64() << (nbits & 63) + nbits += c.len() + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + + if lengthCode >= lengthExtraBitsMinCode { + extraLengthBits := lengthExtraBits[lengthCode] + //w.writeBits(extraLength, extraLengthBits) + extraLength := int32(length - lengthBase[lengthCode]) + bits |= uint64(extraLength) << (nbits & 63) + nbits += extraLengthBits + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + // Write the offset + offset := t.offset() + offsetCode := (offset >> 16) & 31 + if false { + w.writeCode(offs[offsetCode]) + } else { + // inlined + c := offs[offsetCode] + bits |= c.code64() << (nbits & 63) + nbits += c.len() + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + + if offsetCode >= offsetExtraBitsMinCode { + offsetComb := offsetCombined[offsetCode] + //w.writeBits(extraOffset, extraOffsetBits) + bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63) + nbits += uint8(offsetComb) + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + } + // Restore... + w.bits, w.nbits, w.nbytes = bits, nbits, nbytes + + if deferEOB { + w.writeCode(leCodes[endBlockMarker]) + } +} + +// huffOffset is a static offset encoder used for huffman only encoding. +// It can be reused since we will not be encoding offset values. +var huffOffset *huffmanEncoder + +func init() { + w := newHuffmanBitWriter(nil) + w.offsetFreq[0] = 1 + huffOffset = newHuffmanEncoder(offsetCodeCount) + huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15) +} + +// writeBlockHuff encodes a block of bytes as either +// Huffman encoded literals or uncompressed bytes if the +// results only gains very little from compression. +func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { + if w.err != nil { + return + } + + // Clear histogram + for i := range w.literalFreq[:] { + w.literalFreq[i] = 0 + } + if !w.lastHuffMan { + for i := range w.offsetFreq[:] { + w.offsetFreq[i] = 0 + } + } + + const numLiterals = endBlockMarker + 1 + const numOffsets = 1 + + // Add everything as literals + // We have to estimate the header size. + // Assume header is around 70 bytes: + // https://stackoverflow.com/a/25454430 + const guessHeaderSizeBits = 70 * 8 + histogram(input, w.literalFreq[:numLiterals]) + ssize, storable := w.storedSize(input) + if storable && len(input) > 1024 { + // Quick check for incompressible content. + abs := float64(0) + avg := float64(len(input)) / 256 + max := float64(len(input) * 2) + for _, v := range w.literalFreq[:256] { + diff := float64(v) - avg + abs += diff * diff + if abs > max { + break + } + } + if abs < max { + if debugDeflate { + fmt.Println("stored", abs, "<", max) + } + // No chance we can compress this... + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + } + w.literalFreq[endBlockMarker] = 1 + w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15) + estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals]) + if estBits < math.MaxInt32 { + estBits += w.lastHeader + if w.lastHeader == 0 { + estBits += guessHeaderSizeBits + } + estBits += estBits >> w.logNewTablePenalty + } + + // Store bytes, if we don't get a reasonable improvement. + if storable && ssize <= estBits { + if debugDeflate { + fmt.Println("stored,", ssize, "<=", estBits) + } + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + if w.lastHeader > 0 { + reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256]) + + if estBits < reuseSize { + if debugDeflate { + fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes") + } + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } else if debugDeflate { + fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8) + } + } + + count := 0 + if w.lastHeader == 0 { + // Use the temp encoding, so swap. + w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + numCodegens := w.codegens() + + // Huffman. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + w.lastHuffMan = true + w.lastHeader, _ = w.headerSize() + if debugDeflate { + count += w.lastHeader + fmt.Println("header:", count/8) + } + } + + encoding := w.literalEncoding.codes[:256] + // Go 1.16 LOVES having these on stack. At least 1.5x the speed. + bits, nbits, nbytes := w.bits, w.nbits, w.nbytes + + if debugDeflate { + count -= int(nbytes)*8 + int(nbits) + } + // Unroll, write 3 codes/loop. + // Fastest number of unrolls. + for len(input) > 3 { + // We must have at least 48 bits free. + if nbits >= 8 { + n := nbits >> 3 + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + bits >>= (n * 8) & 63 + nbits -= n * 8 + nbytes += n + } + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + if debugDeflate { + count += int(nbytes) * 8 + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + a, b := encoding[input[0]], encoding[input[1]] + bits |= a.code64() << (nbits & 63) + bits |= b.code64() << ((nbits + a.len()) & 63) + c := encoding[input[2]] + nbits += b.len() + a.len() + bits |= c.code64() << (nbits & 63) + nbits += c.len() + input = input[3:] + } + + // Remaining... + for _, t := range input { + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + if debugDeflate { + count += int(nbytes) * 8 + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + // Bitwriting inlined, ~30% speedup + c := encoding[t] + bits |= c.code64() << (nbits & 63) + + nbits += c.len() + if debugDeflate { + count += int(c.len()) + } + } + // Restore... + w.bits, w.nbits, w.nbytes = bits, nbits, nbytes + + if debugDeflate { + nb := count + int(nbytes)*8 + int(nbits) + fmt.Println("wrote", nb, "bits,", nb/8, "bytes.") + } + // Flush if needed to have space. + if w.nbits >= 48 { + w.writeOutBits() + } + + if eof || sync { + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + w.lastHuffMan = false + } +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go new file mode 100644 index 000000000..5ac144f28 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -0,0 +1,412 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "math" + "math/bits" +) + +const ( + maxBitsLimit = 16 + // number of valid literals + literalCount = 286 +) + +// hcode is a huffman code with a bit code and bit length. +type hcode uint32 + +func (h hcode) len() uint8 { + return uint8(h) +} + +func (h hcode) code64() uint64 { + return uint64(h >> 8) +} + +func (h hcode) zero() bool { + return h == 0 +} + +type huffmanEncoder struct { + codes []hcode + bitCount [17]int32 + + // Allocate a reusable buffer with the longest possible frequency table. + // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. + // The largest of these is literalCount, so we allocate for that case. + freqcache [literalCount + 1]literalNode +} + +type literalNode struct { + literal uint16 + freq uint16 +} + +// A levelInfo describes the state of the constructed tree for a given depth. +type levelInfo struct { + // Our level. for better printing + level int32 + + // The frequency of the last node at this level + lastFreq int32 + + // The frequency of the next character to add to this level + nextCharFreq int32 + + // The frequency of the next pair (from level below) to add to this level. + // Only valid if the "needed" value of the next lower level is 0. + nextPairFreq int32 + + // The number of chains remaining to generate for this level before moving + // up to the next level + needed int32 +} + +// set sets the code and length of an hcode. +func (h *hcode) set(code uint16, length uint8) { + *h = hcode(length) | (hcode(code) << 8) +} + +func newhcode(code uint16, length uint8) hcode { + return hcode(length) | (hcode(code) << 8) +} + +func reverseBits(number uint16, bitLength byte) uint16 { + return bits.Reverse16(number << ((16 - bitLength) & 15)) +} + +func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} } + +func newHuffmanEncoder(size int) *huffmanEncoder { + // Make capacity to next power of two. + c := uint(bits.Len32(uint32(size - 1))) + return &huffmanEncoder{codes: make([]hcode, size, 1<= 3 +// The cases of 0, 1, and 2 literals are handled by special case code. +// +// list An array of the literals with non-zero frequencies +// and their associated frequencies. The array is in order of increasing +// frequency, and has as its last element a special element with frequency +// MaxInt32 +// maxBits The maximum number of bits that should be used to encode any literal. +// Must be less than 16. +// return An integer array in which array[i] indicates the number of literals +// that should be encoded in i bits. +func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { + if maxBits >= maxBitsLimit { + panic("flate: maxBits too large") + } + n := int32(len(list)) + list = list[0 : n+1] + list[n] = maxNode() + + // The tree can't have greater depth than n - 1, no matter what. This + // saves a little bit of work in some small cases + if maxBits > n-1 { + maxBits = n - 1 + } + + // Create information about each of the levels. + // A bogus "Level 0" whose sole purpose is so that + // level1.prev.needed==0. This makes level1.nextPairFreq + // be a legitimate value that never gets chosen. + var levels [maxBitsLimit]levelInfo + // leafCounts[i] counts the number of literals at the left + // of ancestors of the rightmost node at level i. + // leafCounts[i][j] is the number of literals at the left + // of the level j ancestor. + var leafCounts [maxBitsLimit][maxBitsLimit]int32 + + // Descending to only have 1 bounds check. + l2f := int32(list[2].freq) + l1f := int32(list[1].freq) + l0f := int32(list[0].freq) + int32(list[1].freq) + + for level := int32(1); level <= maxBits; level++ { + // For every level, the first two items are the first two characters. + // We initialize the levels as if we had already figured this out. + levels[level] = levelInfo{ + level: level, + lastFreq: l1f, + nextCharFreq: l2f, + nextPairFreq: l0f, + } + leafCounts[level][level] = 2 + if level == 1 { + levels[level].nextPairFreq = math.MaxInt32 + } + } + + // We need a total of 2*n - 2 items at top level and have already generated 2. + levels[maxBits].needed = 2*n - 4 + + level := uint32(maxBits) + for level < 16 { + l := &levels[level] + if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To make sure we never come back to this level or any lower level, + // set nextPairFreq impossibly large. + l.needed = 0 + levels[level+1].nextPairFreq = math.MaxInt32 + level++ + continue + } + + prevFreq := l.lastFreq + if l.nextCharFreq < l.nextPairFreq { + // The next item on this row is a leaf node. + n := leafCounts[level][level] + 1 + l.lastFreq = l.nextCharFreq + // Lower leafCounts are the same of the previous node. + leafCounts[level][level] = n + e := list[n] + if e.literal < math.MaxUint16 { + l.nextCharFreq = int32(e.freq) + } else { + l.nextCharFreq = math.MaxInt32 + } + } else { + // The next item on this row is a pair from the previous row. + // nextPairFreq isn't valid until we generate two + // more values in the level below + l.lastFreq = l.nextPairFreq + // Take leaf counts from the lower level, except counts[level] remains the same. + if true { + save := leafCounts[level][level] + leafCounts[level] = leafCounts[level-1] + leafCounts[level][level] = save + } else { + copy(leafCounts[level][:level], leafCounts[level-1][:level]) + } + levels[l.level-1].needed = 2 + } + + if l.needed--; l.needed == 0 { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in nextPairFreq + // of that level with the sum of the two nodes we've just calculated on + // this level. + if l.level == maxBits { + // All done! + break + } + levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq + level++ + } else { + // If we stole from below, move down temporarily to replenish it. + for levels[level-1].needed > 0 { + level-- + } + } + } + + // Somethings is wrong if at the end, the top level is null or hasn't used + // all of the leaves. + if leafCounts[maxBits][maxBits] != n { + panic("leafCounts[maxBits][maxBits] != n") + } + + bitCount := h.bitCount[:maxBits+1] + bits := 1 + counts := &leafCounts[maxBits] + for level := maxBits; level > 0; level-- { + // chain.leafCount gives the number of literals requiring at least "bits" + // bits to encode. + bitCount[bits] = counts[level] - counts[level-1] + bits++ + } + return bitCount +} + +// Look at the leaves and assign them a bit count and an encoding as specified +// in RFC 1951 3.2.2 +func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { + code := uint16(0) + for n, bits := range bitCount { + code <<= 1 + if n == 0 || bits == 0 { + continue + } + // The literals list[len(list)-bits] .. list[len(list)-bits] + // are encoded using "bits" bits, and get the values + // code, code + 1, .... The code values are + // assigned in literal order (not frequency order). + chunk := list[len(list)-int(bits):] + + sortByLiteral(chunk) + for _, node := range chunk { + h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n)) + code++ + } + list = list[0 : len(list)-int(bits)] + } +} + +// Update this Huffman Code object to be the minimum code for the specified frequency count. +// +// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. +// maxBits The maximum number of bits to use for any literal. +func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { + list := h.freqcache[:len(freq)+1] + codes := h.codes[:len(freq)] + // Number of non-zero literals + count := 0 + // Set list to be the set of all non-zero literals and their frequencies + for i, f := range freq { + if f != 0 { + list[count] = literalNode{uint16(i), f} + count++ + } else { + codes[i] = 0 + } + } + list[count] = literalNode{} + + list = list[:count] + if count <= 2 { + // Handle the small cases here, because they are awkward for the general case code. With + // two or fewer literals, everything has bit length 1. + for i, node := range list { + // "list" is in order of increasing literal value. + h.codes[node.literal].set(uint16(i), 1) + } + return + } + sortByFreq(list) + + // Get the number of literals for each bit count + bitCount := h.bitCounts(list, maxBits) + // And do the assignment + h.assignEncodingAndSize(bitCount, list) +} + +// atLeastOne clamps the result between 1 and 15. +func atLeastOne(v float32) float32 { + if v < 1 { + return 1 + } + if v > 15 { + return 15 + } + return v +} + +func histogram(b []byte, h []uint16) { + if true && len(b) >= 8<<10 { + // Split for bigger inputs + histogramSplit(b, h) + } else { + h = h[:256] + for _, t := range b { + h[t]++ + } + } +} + +func histogramSplit(b []byte, h []uint16) { + // Tested, and slightly faster than 2-way. + // Writing to separate arrays and combining is also slightly slower. + h = h[:256] + for len(b)&3 != 0 { + h[b[0]]++ + b = b[1:] + } + n := len(b) / 4 + x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:] + y, z, w = y[:len(x)], z[:len(x)], w[:len(x)] + for i, t := range x { + v0 := &h[t] + v1 := &h[y[i]] + v3 := &h[w[i]] + v2 := &h[z[i]] + *v0++ + *v1++ + *v2++ + *v3++ + } +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go new file mode 100644 index 000000000..207780299 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go @@ -0,0 +1,178 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByFreq(data []literalNode) { + n := len(data) + quickSortByFreq(data, 0, n, maxDepth(n)) +} + +func quickSortByFreq(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivotByFreq(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSortByFreq(data, a, mlo, maxDepth) + a = mhi // i.e., quickSortByFreq(data, mhi, b) + } else { + quickSortByFreq(data, mhi, b, maxDepth) + b = mlo // i.e., quickSortByFreq(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSortByFreq(data, a, b) + } +} + +// siftDownByFreq implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDownByFreq(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) { + child++ + } + if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) + medianOfThreeSortByFreq(data, m, m-s, m+s) + medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThreeSortByFreq(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { + } + b := a + for { + for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot + } + for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot + } + for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSortByFreq(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// quickSortByFreq, loosely following Bentley and McIlroy, +// ``Engineering a Sort Function,'' SP&E November 1993. + +// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go new file mode 100644 index 000000000..93f1aea10 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go @@ -0,0 +1,201 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByLiteral(data []literalNode) { + n := len(data) + quickSort(data, 0, n, maxDepth(n)) +} + +func quickSort(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivot(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSort(data, a, mlo, maxDepth) + a = mhi // i.e., quickSort(data, mhi, b) + } else { + quickSort(data, mhi, b, maxDepth) + b = mlo // i.e., quickSort(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].literal < data[i-6].literal { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSort(data, a, b) + } +} +func heapSort(data []literalNode, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data[first], data[first+i] = data[first+i], data[first] + siftDown(data, lo, i, first) + } +} + +// siftDown implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDown(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data[first+child].literal < data[first+child+1].literal { + child++ + } + if data[first+root].literal > data[first+child].literal { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThree(data, lo, lo+s, lo+2*s) + medianOfThree(data, m, m-s, m+s) + medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThree(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && data[a].literal < data[pivot].literal; a++ { + } + b := a + for { + for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot + } + for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].literal > data[pivot].literal { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot + } + for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSort(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data[j].literal < data[j-1].literal; j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// maxDepth returns a threshold at which quicksort should switch +// to heapsort. It returns 2*ceil(lg(n+1)). +func maxDepth(n int) int { + var depth int + for i := n; i > 0; i >>= 1 { + depth++ + } + return depth * 2 +} + +// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThree(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].literal < data[m1].literal { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go new file mode 100644 index 000000000..414c0bea9 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -0,0 +1,793 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package flate implements the DEFLATE compressed data format, described in +// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file +// formats. +package flate + +import ( + "bufio" + "compress/flate" + "fmt" + "io" + "math/bits" + "sync" +) + +const ( + maxCodeLen = 16 // max length of Huffman code + maxCodeLenMask = 15 // mask for max length of Huffman code + // The next three numbers come from the RFC section 3.2.7, with the + // additional proviso in section 3.2.5 which implies that distance codes + // 30 and 31 should never occur in compressed data. + maxNumLit = 286 + maxNumDist = 30 + numCodes = 19 // number of codes in Huffman meta-code + + debugDecode = false +) + +// Value of length - 3 and extra bits. +type lengthExtra struct { + length, extra uint8 +} + +var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}} + +var bitMask32 = [32]uint32{ + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, + 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, + 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF, + 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, +} // up to 32 bits + +// Initialize the fixedHuffmanDecoder only once upon first use. +var fixedOnce sync.Once +var fixedHuffmanDecoder huffmanDecoder + +// A CorruptInputError reports the presence of corrupt input at a given offset. +type CorruptInputError = flate.CorruptInputError + +// An InternalError reports an error in the flate code itself. +type InternalError string + +func (e InternalError) Error() string { return "flate: internal error: " + string(e) } + +// A ReadError reports an error encountered while reading input. +// +// Deprecated: No longer returned. +type ReadError = flate.ReadError + +// A WriteError reports an error encountered while writing output. +// +// Deprecated: No longer returned. +type WriteError = flate.WriteError + +// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to +// to switch to a new underlying Reader. This permits reusing a ReadCloser +// instead of allocating a new one. +type Resetter interface { + // Reset discards any buffered data and resets the Resetter as if it was + // newly initialized with the given reader. + Reset(r io.Reader, dict []byte) error +} + +// The data structure for decoding Huffman tables is based on that of +// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), +// For codes smaller than the table width, there are multiple entries +// (each combination of trailing bits has the same value). For codes +// larger than the table width, the table contains a link to an overflow +// table. The width of each entry in the link table is the maximum code +// size minus the chunk width. +// +// Note that you can do a lookup in the table even without all bits +// filled. Since the extra bits are zero, and the DEFLATE Huffman codes +// have the property that shorter codes come before longer ones, the +// bit length estimate in the result is a lower bound on the actual +// number of bits. +// +// See the following: +// http://www.gzip.org/algorithm.txt + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link + +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) + +type huffmanDecoder struct { + maxRead int // the maximum number of bits we can read and not overread + chunks *[huffmanNumChunks]uint16 // chunks as described above + links [][]uint16 // overflow links + linkMask uint32 // mask the width of the link table +} + +// Initialize Huffman decoding tables from array of code lengths. +// Following this function, h is guaranteed to be initialized into a complete +// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a +// degenerate case where the tree has only a single symbol with length 1. Empty +// trees are permitted. +func (h *huffmanDecoder) init(lengths []int) bool { + // Sanity enables additional runtime tests during Huffman + // table construction. It's intended to be used during + // development to supplement the currently ad-hoc unit tests. + const sanity = false + + if h.chunks == nil { + h.chunks = &[huffmanNumChunks]uint16{} + } + if h.maxRead != 0 { + *h = huffmanDecoder{chunks: h.chunks, links: h.links} + } + + // Count number of codes of each length, + // compute maxRead and max length. + var count [maxCodeLen]int + var min, max int + for _, n := range lengths { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n&maxCodeLenMask]++ + } + + // Empty tree. The decompressor.huffSym function will fail later if the tree + // is used. Technically, an empty tree is only valid for the HDIST tree and + // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree + // is guaranteed to fail since it will attempt to use the tree to decode the + // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is + // guaranteed to fail later since the compressed data section must be + // composed of at least one symbol (the end-of-block marker). + if max == 0 { + return true + } + + code := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + code <<= 1 + nextcode[i&maxCodeLenMask] = code + code += count[i&maxCodeLenMask] + } + + // Check that the coding is complete (i.e., that we've + // assigned all 2-to-the-max possible bit sequences). + // Exception: To be compatible with zlib, we also need to + // accept degenerate single-code codings. See also + // TestDegenerateHuffmanCoding. + if code != 1< huffmanChunkBits { + numLinks := 1 << (uint(max) - huffmanChunkBits) + h.linkMask = uint32(numLinks - 1) + + // create link tables + link := nextcode[huffmanChunkBits+1] >> 1 + if cap(h.links) < huffmanNumChunks-link { + h.links = make([][]uint16, huffmanNumChunks-link) + } else { + h.links = h.links[:huffmanNumChunks-link] + } + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(bits.Reverse16(uint16(j))) + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + if sanity && h.chunks[reverse] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[reverse] = uint16(off<>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < len(h.chunks); off += 1 << uint(n) { + // We should never need to overwrite + // an existing chunk. Also, 0 is + // never a valid chunk, because the + // lower 4 "count" bits should be + // between 1 and 15. + if sanity && h.chunks[off] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[off] = chunk + } + } else { + j := reverse & (huffmanNumChunks - 1) + if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { + // Longer codes should have been + // associated with a link table above. + panic("impossible: not an indirect chunk") + } + value := h.chunks[j] >> huffmanValueShift + linktab := h.links[value] + reverse >>= huffmanChunkBits + for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { + if sanity && linktab[off] != 0 { + panic("impossible: overwriting existing chunk") + } + linktab[off] = chunk + } + } + } + + if sanity { + // Above we've sanity checked that we never overwrote + // an existing entry. Here we additionally check that + // we filled the tables completely. + for i, chunk := range h.chunks { + if chunk == 0 { + // As an exception, in the degenerate + // single-code case, we allow odd + // chunks to be missing. + if code == 1 && i%2 == 1 { + continue + } + panic("impossible: missing chunk") + } + } + for _, linktab := range h.links { + for _, chunk := range linktab { + if chunk == 0 { + panic("impossible: missing chunk") + } + } + } + } + + return true +} + +// The actual read interface needed by NewReader. +// If the passed in io.Reader does not also have ReadByte, +// the NewReader will introduce its own buffering. +type Reader interface { + io.Reader + io.ByteReader +} + +// Decompress state. +type decompressor struct { + // Input source. + r Reader + roffset int64 + + // Huffman decoders for literal/length, distance. + h1, h2 huffmanDecoder + + // Length arrays used to define Huffman codes. + bits *[maxNumLit + maxNumDist]int + codebits *[numCodes]int + + // Output history, buffer. + dict dictDecoder + + // Next step in the decompression, + // and decompression state. + step func(*decompressor) + stepState int + err error + toRead []byte + hl, hd *huffmanDecoder + copyLen int + copyDist int + + // Temporary buffer (avoids repeated allocation). + buf [4]byte + + // Input bits, in top of b. + b uint32 + + nb uint + final bool +} + +func (f *decompressor) nextBlock() { + for f.nb < 1+2 { + if f.err = f.moreBits(); f.err != nil { + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + if debugDecode { + fmt.Println("stored block") + } + case 1: + // compressed, fixed Huffman tables + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlockDecoder()() + if debugDecode { + fmt.Println("predefinied huffman block") + } + case 2: + // compressed, dynamic Huffman tables + if f.err = f.readHuffman(); f.err != nil { + break + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlockDecoder()() + if debugDecode { + fmt.Println("dynamic huffman block") + } + default: + // 3 is reserved. + if debugDecode { + fmt.Println("reserved data block encountered") + } + f.err = CorruptInputError(f.roffset) + } +} + +func (f *decompressor) Read(b []byte) (int, error) { + for { + if len(f.toRead) > 0 { + n := copy(b, f.toRead) + f.toRead = f.toRead[n:] + if len(f.toRead) == 0 { + return n, f.err + } + return n, nil + } + if f.err != nil { + return 0, f.err + } + f.step(f) + if f.err != nil && len(f.toRead) == 0 { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + } + } +} + +// Support the io.WriteTo interface for io.Copy and friends. +func (f *decompressor) WriteTo(w io.Writer) (int64, error) { + total := int64(0) + flushed := false + for { + if len(f.toRead) > 0 { + n, err := w.Write(f.toRead) + total += int64(n) + if err != nil { + f.err = err + return total, err + } + if n != len(f.toRead) { + return total, io.ErrShortWrite + } + f.toRead = f.toRead[:0] + } + if f.err != nil && flushed { + if f.err == io.EOF { + return total, nil + } + return total, f.err + } + if f.err == nil { + f.step(f) + } + if len(f.toRead) == 0 && f.err != nil && !flushed { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + flushed = true + } + } +} + +func (f *decompressor) Close() error { + if f.err == io.EOF { + return nil + } + return f.err +} + +// RFC 1951 section 3.2.7. +// Compression with dynamic Huffman codes + +var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +func (f *decompressor) readHuffman() error { + // HLIT[5], HDIST[5], HCLEN[4]. + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + if nlit > maxNumLit { + if debugDecode { + fmt.Println("nlit > maxNumLit", nlit) + } + return CorruptInputError(f.roffset) + } + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + if ndist > maxNumDist { + if debugDecode { + fmt.Println("ndist > maxNumDist", ndist) + } + return CorruptInputError(f.roffset) + } + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + // numCodes is 19, so nclen is always valid. + f.b >>= 4 + f.nb -= 5 + 5 + 4 + + // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + f.codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + for i := nclen; i < len(codeOrder); i++ { + f.codebits[codeOrder[i]] = 0 + } + if !f.h1.init(f.codebits[0:]) { + if debugDecode { + fmt.Println("init codebits failed") + } + return CorruptInputError(f.roffset) + } + + // HLIT + 257 code lengths, HDIST + 1 code lengths, + // using the code length Huffman code. + for i, n := 0, nlit+ndist; i < n; { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + if x < 16 { + // Actual length. + f.bits[i] = x + i++ + continue + } + // Repeat previous length or zero. + var rep int + var nb uint + var b int + switch x { + default: + return InternalError("unexpected length code") + case 16: + rep = 3 + nb = 2 + if i == 0 { + if debugDecode { + fmt.Println("i==0") + } + return CorruptInputError(f.roffset) + } + b = f.bits[i-1] + case 17: + rep = 3 + nb = 3 + b = 0 + case 18: + rep = 11 + nb = 7 + b = 0 + } + for f.nb < nb { + if err := f.moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits:", err) + } + return err + } + } + rep += int(f.b & uint32(1<<(nb®SizeMaskUint32)-1)) + f.b >>= nb & regSizeMaskUint32 + f.nb -= nb + if i+rep > n { + if debugDecode { + fmt.Println("i+rep > n", i, rep, n) + } + return CorruptInputError(f.roffset) + } + for j := 0; j < rep; j++ { + f.bits[i] = b + i++ + } + } + + if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { + if debugDecode { + fmt.Println("init2 failed") + } + return CorruptInputError(f.roffset) + } + + // As an optimization, we can initialize the maxRead bits to read at a time + // for the HLIT tree to the length of the EOB marker since we know that + // every block must terminate with one. This preserves the property that + // we never read any extra bytes after the end of the DEFLATE stream. + if f.h1.maxRead < f.bits[endBlockMarker] { + f.h1.maxRead = f.bits[endBlockMarker] + } + if !f.final { + // If not the final block, the smallest block possible is + // a predefined table, BTYPE=01, with a single EOB marker. + // This will take up 3 + 7 bits. + f.h1.maxRead += 10 + } + + return nil +} + +// Copy a single uncompressed data block from input to output. +func (f *decompressor) dataBlock() { + // Uncompressed. + // Discard current half-byte. + left := (f.nb) & 7 + f.nb -= left + f.b >>= left + + offBytes := f.nb >> 3 + // Unfilled values will be overwritten. + f.buf[0] = uint8(f.b) + f.buf[1] = uint8(f.b >> 8) + f.buf[2] = uint8(f.b >> 16) + f.buf[3] = uint8(f.b >> 24) + + f.roffset += int64(offBytes) + f.nb, f.b = 0, 0 + + // Length then ones-complement of length. + nr, err := io.ReadFull(f.r, f.buf[offBytes:4]) + f.roffset += int64(nr) + if err != nil { + f.err = noEOF(err) + return + } + n := uint16(f.buf[0]) | uint16(f.buf[1])<<8 + nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8 + if nn != ^n { + if debugDecode { + ncomp := ^n + fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp) + } + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + f.toRead = f.dict.readFlush() + f.finishBlock() + return + } + + f.copyLen = int(n) + f.copyData() +} + +// copyData copies f.copyLen bytes from the underlying reader into f.hist. +// It pauses for reads when f.hist is full. +func (f *decompressor) copyData() { + buf := f.dict.writeSlice() + if len(buf) > f.copyLen { + buf = buf[:f.copyLen] + } + + cnt, err := io.ReadFull(f.r, buf) + f.roffset += int64(cnt) + f.copyLen -= cnt + f.dict.writeMark(cnt) + if err != nil { + f.err = noEOF(err) + return + } + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).copyData + return + } + f.finishBlock() +} + +func (f *decompressor) finishBlock() { + if f.final { + if f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() + } + f.err = io.EOF + } + f.step = (*decompressor).nextBlock +} + +// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. +func noEOF(e error) error { + if e == io.EOF { + return io.ErrUnexpectedEOF + } + return e +} + +func (f *decompressor) moreBits() error { + c, err := f.r.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << (f.nb & regSizeMaskUint32) + f.nb += 8 + return nil +} + +// Read the next Huffman-encoded symbol from f according to h. +func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(h.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.r.ReadByte() + if err != nil { + f.b = b + f.nb = nb + return 0, noEOF(err) + } + f.roffset++ + b |= uint32(c) << (nb & regSizeMaskUint32) + nb += 8 + } + chunk := h.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return 0, f.err + } + f.b = b >> (n & regSizeMaskUint32) + f.nb = nb - n + return int(chunk >> huffmanValueShift), nil + } + } +} + +func makeReader(r io.Reader) Reader { + if rr, ok := r.(Reader); ok { + return rr + } + return bufio.NewReader(r) +} + +func fixedHuffmanDecoderInit() { + fixedOnce.Do(func() { + // These come from the RFC section 3.2.6. + var bits [288]int + for i := 0; i < 144; i++ { + bits[i] = 8 + } + for i := 144; i < 256; i++ { + bits[i] = 9 + } + for i := 256; i < 280; i++ { + bits[i] = 7 + } + for i := 280; i < 288; i++ { + bits[i] = 8 + } + fixedHuffmanDecoder.init(bits[:]) + }) +} + +func (f *decompressor) Reset(r io.Reader, dict []byte) error { + *f = decompressor{ + r: makeReader(r), + bits: f.bits, + codebits: f.codebits, + h1: f.h1, + h2: f.h2, + dict: f.dict, + step: (*decompressor).nextBlock, + } + f.dict.init(maxMatchOffset, dict) + return nil +} + +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser +// when finished reading. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.r = makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, nil) + return &f +} + +// NewReaderDict is like NewReader but initializes the reader +// with a preset dictionary. The returned Reader behaves as if +// the uncompressed data stream started with the given dictionary, +// which has already been read. NewReaderDict is typically used +// to read data compressed by NewWriterDict. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.r = makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, dict) + return &f +} diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go new file mode 100644 index 000000000..61342b6b8 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go @@ -0,0 +1,1283 @@ +// Code generated by go generate gen_inflate.go. DO NOT EDIT. + +package flate + +import ( + "bufio" + "bytes" + "fmt" + "math/bits" + "strings" +) + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesBuffer() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Buffer) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBytesBuffer + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBytesReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBytesReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBufioReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bufio.Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBufioReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBufioReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanStringsReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*strings.Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanStringsReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanGenericReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanGenericReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanGenericReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +func (f *decompressor) huffmanBlockDecoder() func() { + switch f.r.(type) { + case *bytes.Buffer: + return f.huffmanBytesBuffer + case *bytes.Reader: + return f.huffmanBytesReader + case *bufio.Reader: + return f.huffmanBufioReader + case *strings.Reader: + return f.huffmanStringsReader + case Reader: + return f.huffmanGenericReader + default: + return f.huffmanGenericReader + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go new file mode 100644 index 000000000..0f14f8d63 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level1.go @@ -0,0 +1,240 @@ +package flate + +import ( + "encoding/binary" + "fmt" + "math/bits" +) + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastEncL1 struct { + fastGen + table [tableSize]tableEntry +} + +// EncodeL1 uses a similar algorithm to level 1 +func (e *fastEncL1) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + + for { + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hash(cv) + candidate = e.table[nextHash] + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + + now := load6432(src, nextS) + e.table[nextHash] = tableEntry{offset: s + e.cur} + nextHash = hash(uint32(now)) + + offset := s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = e.table[nextHash] + now >>= 8 + e.table[nextHash] = tableEntry{offset: s + e.cur} + + offset = s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + cv = uint32(now) + s = nextS + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset - e.cur + var l = int32(4) + if false { + l = e.matchlenLong(s+4, t+4, src) + 4 + } else { + // inlined: + a := src[s+4:] + b := src[t+4:] + for len(a) >= 8 { + if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { + l += int32(bits.TrailingZeros64(diff) >> 3) + break + } + l += 8 + a = a[8:] + b = b[8:] + } + if len(a) < 8 { + b = b[:len(a)] + for i := range a { + if a[i] != b[i] { + break + } + l++ + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + // Save the match found + if false { + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + } else { + // Inlined... + xoffset := uint32(s - t - baseMatchOffset) + xlength := l + oc := offsetCode(xoffset) + xoffset |= oc << 16 + for xlength > 0 { + xl := xlength + if xl > 258 { + if xl > 258+baseMatchLength { + xl = 258 + } else { + xl = 258 - baseMatchLength + } + } + xlength -= xl + xl -= baseMatchLength + dst.extraHist[lengthCodes1[uint8(xl)]]++ + dst.offHist[oc]++ + dst.tokens[dst.n] = token(matchType | uint32(xl)<= s { + s = nextS + 1 + } + if s >= sLimit { + // Index first pair after match end. + if int(s+l+4) < len(src) { + cv := load3232(src, s) + e.table[hash(cv)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-2) + o := e.cur + s - 2 + prevHash := hash(uint32(x)) + e.table[prevHash] = tableEntry{offset: o} + x >>= 16 + currHash := hash(uint32(x)) + candidate = e.table[currHash] + e.table[currHash] = tableEntry{offset: o + 2} + + offset := s - (candidate.offset - e.cur) + if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) { + cv = uint32(x >> 8) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go new file mode 100644 index 000000000..8603fbd55 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level2.go @@ -0,0 +1,213 @@ +package flate + +import "fmt" + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastEncL2 struct { + fastGen + table [bTableSize]tableEntry +} + +// EncodeL2 uses a similar algorithm to level 1, but is capable +// of matching across blocks giving better compression at a small slowdown. +func (e *fastEncL2) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + for { + // When should we start skipping if we haven't found matches in a long while. + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hash4u(cv, bTableBits) + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + candidate = e.table[nextHash] + now := load6432(src, nextS) + e.table[nextHash] = tableEntry{offset: s + e.cur} + nextHash = hash4u(uint32(now), bTableBits) + + offset := s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = e.table[nextHash] + now >>= 8 + e.table[nextHash] = tableEntry{offset: s + e.cur} + + offset = s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + break + } + cv = uint32(now) + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset - e.cur + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index first pair after match end. + if int(s+l+4) < len(src) { + cv := load3232(src, s) + e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // Store every second hash in-between, but offset by 1. + for i := s - l + 2; i < s-5; i += 7 { + x := load6432(src, i) + nextHash := hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i} + // Skip one + x >>= 16 + nextHash = hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i + 2} + // Skip one + x >>= 16 + nextHash = hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i + 4} + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 to s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-2) + o := e.cur + s - 2 + prevHash := hash4u(uint32(x), bTableBits) + prevHash2 := hash4u(uint32(x>>8), bTableBits) + e.table[prevHash] = tableEntry{offset: o} + e.table[prevHash2] = tableEntry{offset: o + 1} + currHash := hash4u(uint32(x>>16), bTableBits) + candidate = e.table[currHash] + e.table[currHash] = tableEntry{offset: o + 2} + + offset := s - (candidate.offset - e.cur) + if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { + cv = uint32(x >> 24) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go new file mode 100644 index 000000000..039639f89 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level3.go @@ -0,0 +1,240 @@ +package flate + +import "fmt" + +// fastEncL3 +type fastEncL3 struct { + fastGen + table [1 << 16]tableEntryPrev +} + +// Encode uses a similar algorithm to level 2, will check up to two candidates. +func (e *fastEncL3) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 8 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + tableBits = 16 + tableSize = 1 << tableBits + ) + + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + } + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + e.table[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // Skip if too small. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + for { + const skipLog = 6 + nextS := s + var candidate tableEntry + for { + nextHash := hash4u(cv, tableBits) + s = nextS + nextS = s + 1 + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + candidates := e.table[nextHash] + now := load3232(src, nextS) + + // Safe offset distance until s + 4... + minOffset := e.cur + s - (maxMatchOffset - 4) + e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}} + + // Check both candidates + candidate = candidates.Cur + if candidate.offset < minOffset { + cv = now + // Previous will also be invalid, we have nothing. + continue + } + + if cv == load3232(src, candidate.offset-e.cur) { + if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) { + break + } + // Both match and are valid, pick longest. + offset := s - (candidate.offset - e.cur) + o2 := s - (candidates.Prev.offset - e.cur) + l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) + if l2 > l1 { + candidate = candidates.Prev + } + break + } else { + // We only check if value mismatches. + // Offset will always be invalid in other cases. + candidate = candidates.Prev + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + break + } + } + cv = now + } + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + // + t := candidate.offset - e.cur + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + t += l + // Index first pair after match end. + if int(t+4) < len(src) && t > 0 { + cv := load3232(src, t) + nextHash := hash4u(cv, tableBits) + e.table[nextHash] = tableEntryPrev{ + Prev: e.table[nextHash].Cur, + Cur: tableEntry{offset: e.cur + t}, + } + } + goto emitRemainder + } + + // Store every 5th hash in-between. + for i := s - l + 2; i < s-5; i += 5 { + nextHash := hash4u(load3232(src, i), tableBits) + e.table[nextHash] = tableEntryPrev{ + Prev: e.table[nextHash].Cur, + Cur: tableEntry{offset: e.cur + i}} + } + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 to s. + x := load6432(src, s-2) + prevHash := hash4u(uint32(x), tableBits) + + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 2}, + } + x >>= 8 + prevHash = hash4u(uint32(x), tableBits) + + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 1}, + } + x >>= 8 + currHash := hash4u(uint32(x), tableBits) + candidates := e.table[currHash] + cv = uint32(x) + e.table[currHash] = tableEntryPrev{ + Prev: candidates.Cur, + Cur: tableEntry{offset: s + e.cur}, + } + + // Check both candidates + candidate = candidates.Cur + minOffset := e.cur + s - (maxMatchOffset - 4) + + if candidate.offset > minOffset { + if cv == load3232(src, candidate.offset-e.cur) { + // Found a match... + continue + } + candidate = candidates.Prev + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + // Match at prev... + continue + } + } + cv = uint32(x >> 8) + s++ + break + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go new file mode 100644 index 000000000..1cbffa1ae --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level4.go @@ -0,0 +1,220 @@ +package flate + +import "fmt" + +type fastEncL4 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntry +} + +func (e *fastEncL4) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.bTable[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + e.bTable[nextHashL] = entry + + t = lCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) { + // We got a long match. Use that. + break + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + lCandidate = e.bTable[hash7(next, tableBits)] + + // If the next long is a candidate, check if we should use that instead... + lOff := nextS - (lCandidate.offset - e.cur) + if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) { + l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:]) + if l2 > l1 { + s = nextS + t = lCandidate.offset - e.cur + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if debugDeflate { + if t >= s { + panic("s-t") + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index first pair after match end. + if int(s+8) < len(src) { + cv := load6432(src, s) + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur} + e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // Store every 3rd hash in-between + if true { + i := nextS + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + e.bTable[hash7(cv, tableBits)] = t + e.bTable[hash7(cv>>8, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + + i += 3 + for ; i < s-1; i += 3 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + e.bTable[hash7(cv, tableBits)] = t + e.bTable[hash7(cv>>8, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hash4x64(x, tableBits) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + e.bTable[prevHashL] = tableEntry{offset: o} + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go new file mode 100644 index 000000000..4b97576bd --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -0,0 +1,302 @@ +package flate + +import "fmt" + +type fastEncL5 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL5) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + nextHashS = hash4x64(next, tableBits) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + lCandidate = e.bTable[nextHashL] + // Store the next match + + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // If the next long is a candidate, use that... + t2 := lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + if l == 0 { + // Extend the 4-byte match as long as possible. + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + + // Try to locate a better match by checking the end of best match... + if sAt := s + l; l < 30 && sAt < sLimit { + eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset + // Test current + t2 := eLong - e.cur - l + off := s - t2 + if t2 >= 0 && off < maxMatchOffset && off > 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if debugDeflate { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + goto emitRemainder + } + + // Store every 3rd hash in-between. + if true { + const hashEvery = 3 + i := s - l + 1 + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + e.table[hash4x64(cv, tableBits)] = t + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // Do an long at i+1 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + eLong = &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // We only have enough bits for a short entry at i+2 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + e.table[hash4x64(cv, tableBits)] = t + + // Skip one - otherwise we risk hitting 's' + i += 4 + for ; i < s-1; i += hashEvery { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hash4x64(x, tableBits) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + eLong := &e.bTable[prevHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go new file mode 100644 index 000000000..62888edf3 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level6.go @@ -0,0 +1,315 @@ +package flate + +import "fmt" + +type fastEncL6 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL6) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + // Repeat MUST be > 1 and within range + repeat := int32(1) + for { + const skipLog = 7 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + // Calculate hashes of 'next' + nextHashS = hash4x64(next, tableBits) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { + // Long candidate matches at least 4 bytes. + + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // Check the previous long candidate as well. + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + // Current value did not match, but check if previous long value does. + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + + // Look up next long candidate (at nextS) + lCandidate = e.bTable[nextHashL] + + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // Check repeat at s + repOff + const repOff = 1 + t2 := s - repeat + repOff + if load3232(src, t2) == uint32(cv>>(8*repOff)) { + ml := e.matchlen(s+4+repOff, t2+4, src) + 4 + if ml > l { + t = t2 + l = ml + s += repOff + // Not worth checking more. + break + } + } + + // If the next long is a candidate, use that... + t2 = lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + // This is ok, but check previous as well. + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + if l == 0 { + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + + // Try to locate a better match by checking the end-of-match... + if sAt := s + l; sAt < sLimit { + eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)] + // Test current + t2 := eLong.Cur.offset - e.cur - l + off := s - t2 + if off < maxMatchOffset { + if off > 0 && t2 >= 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + // Test next: + t2 = eLong.Prev.offset - e.cur - l + off := s - t2 + if off > 0 && off < maxMatchOffset && t2 >= 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if false { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + repeat = s - t + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index after match end. + for i := nextS + 1; i < int32(len(src))-8; i += 2 { + cv := load6432(src, i) + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur + } + goto emitRemainder + } + + // Store every long hash in-between and every second short. + if true { + for i := nextS + 1; i < s-1; i += 2 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong2 := &e.bTable[hash7(cv>>8, tableBits)] + e.table[hash4x64(cv, tableBits)] = t + eLong.Cur, eLong.Prev = t, eLong.Cur + eLong2.Cur, eLong2.Prev = t2, eLong2.Cur + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + cv = load6432(src, s) + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/regmask_amd64.go b/vendor/github.com/klauspost/compress/flate/regmask_amd64.go new file mode 100644 index 000000000..6ed28061b --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/regmask_amd64.go @@ -0,0 +1,37 @@ +package flate + +const ( + // Masks for shifts with register sizes of the shift value. + // This can be used to work around the x86 design of shifting by mod register size. + // It can be used when a variable shift is always smaller than the register size. + + // reg8SizeMaskX - shift value is 8 bits, shifted is X + reg8SizeMask8 = 7 + reg8SizeMask16 = 15 + reg8SizeMask32 = 31 + reg8SizeMask64 = 63 + + // reg16SizeMaskX - shift value is 16 bits, shifted is X + reg16SizeMask8 = reg8SizeMask8 + reg16SizeMask16 = reg8SizeMask16 + reg16SizeMask32 = reg8SizeMask32 + reg16SizeMask64 = reg8SizeMask64 + + // reg32SizeMaskX - shift value is 32 bits, shifted is X + reg32SizeMask8 = reg8SizeMask8 + reg32SizeMask16 = reg8SizeMask16 + reg32SizeMask32 = reg8SizeMask32 + reg32SizeMask64 = reg8SizeMask64 + + // reg64SizeMaskX - shift value is 64 bits, shifted is X + reg64SizeMask8 = reg8SizeMask8 + reg64SizeMask16 = reg8SizeMask16 + reg64SizeMask32 = reg8SizeMask32 + reg64SizeMask64 = reg8SizeMask64 + + // regSizeMaskUintX - shift value is uint, shifted is X + regSizeMaskUint8 = reg8SizeMask8 + regSizeMaskUint16 = reg8SizeMask16 + regSizeMaskUint32 = reg8SizeMask32 + regSizeMaskUint64 = reg8SizeMask64 +) diff --git a/vendor/github.com/klauspost/compress/flate/regmask_other.go b/vendor/github.com/klauspost/compress/flate/regmask_other.go new file mode 100644 index 000000000..1b7a2cbd7 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/regmask_other.go @@ -0,0 +1,40 @@ +//go:build !amd64 +// +build !amd64 + +package flate + +const ( + // Masks for shifts with register sizes of the shift value. + // This can be used to work around the x86 design of shifting by mod register size. + // It can be used when a variable shift is always smaller than the register size. + + // reg8SizeMaskX - shift value is 8 bits, shifted is X + reg8SizeMask8 = 0xff + reg8SizeMask16 = 0xff + reg8SizeMask32 = 0xff + reg8SizeMask64 = 0xff + + // reg16SizeMaskX - shift value is 16 bits, shifted is X + reg16SizeMask8 = 0xffff + reg16SizeMask16 = 0xffff + reg16SizeMask32 = 0xffff + reg16SizeMask64 = 0xffff + + // reg32SizeMaskX - shift value is 32 bits, shifted is X + reg32SizeMask8 = 0xffffffff + reg32SizeMask16 = 0xffffffff + reg32SizeMask32 = 0xffffffff + reg32SizeMask64 = 0xffffffff + + // reg64SizeMaskX - shift value is 64 bits, shifted is X + reg64SizeMask8 = 0xffffffffffffffff + reg64SizeMask16 = 0xffffffffffffffff + reg64SizeMask32 = 0xffffffffffffffff + reg64SizeMask64 = 0xffffffffffffffff + + // regSizeMaskUintX - shift value is uint, shifted is X + regSizeMaskUint8 = ^uint(0) + regSizeMaskUint16 = ^uint(0) + regSizeMaskUint32 = ^uint(0) + regSizeMaskUint64 = ^uint(0) +) diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go new file mode 100644 index 000000000..93a1d1503 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/stateless.go @@ -0,0 +1,305 @@ +package flate + +import ( + "io" + "math" + "sync" +) + +const ( + maxStatelessBlock = math.MaxInt16 + // dictionary will be taken from maxStatelessBlock, so limit it. + maxStatelessDict = 8 << 10 + + slTableBits = 13 + slTableSize = 1 << slTableBits + slTableShift = 32 - slTableBits +) + +type statelessWriter struct { + dst io.Writer + closed bool +} + +func (s *statelessWriter) Close() error { + if s.closed { + return nil + } + s.closed = true + // Emit EOF block + return StatelessDeflate(s.dst, nil, true, nil) +} + +func (s *statelessWriter) Write(p []byte) (n int, err error) { + err = StatelessDeflate(s.dst, p, false, nil) + if err != nil { + return 0, err + } + return len(p), nil +} + +func (s *statelessWriter) Reset(w io.Writer) { + s.dst = w + s.closed = false +} + +// NewStatelessWriter will do compression but without maintaining any state +// between Write calls. +// There will be no memory kept between Write calls, +// but compression and speed will be suboptimal. +// Because of this, the size of actual Write calls will affect output size. +func NewStatelessWriter(dst io.Writer) io.WriteCloser { + return &statelessWriter{dst: dst} +} + +// bitWriterPool contains bit writers that can be reused. +var bitWriterPool = sync.Pool{ + New: func() interface{} { + return newHuffmanBitWriter(nil) + }, +} + +// StatelessDeflate allows compressing directly to a Writer without retaining state. +// When returning everything will be flushed. +// Up to 8KB of an optional dictionary can be given which is presumed to precede the block. +// Longer dictionaries will be truncated and will still produce valid output. +// Sending nil dictionary is perfectly fine. +func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { + var dst tokens + bw := bitWriterPool.Get().(*huffmanBitWriter) + bw.reset(out) + defer func() { + // don't keep a reference to our output + bw.reset(nil) + bitWriterPool.Put(bw) + }() + if eof && len(in) == 0 { + // Just write an EOF block. + // Could be faster... + bw.writeStoredHeader(0, true) + bw.flush() + return bw.err + } + + // Truncate dict + if len(dict) > maxStatelessDict { + dict = dict[len(dict)-maxStatelessDict:] + } + + for len(in) > 0 { + todo := in + if len(todo) > maxStatelessBlock-len(dict) { + todo = todo[:maxStatelessBlock-len(dict)] + } + in = in[len(todo):] + uncompressed := todo + if len(dict) > 0 { + // combine dict and source + bufLen := len(todo) + len(dict) + combined := make([]byte, bufLen) + copy(combined, dict) + copy(combined[len(dict):], todo) + todo = combined + } + // Compress + statelessEnc(&dst, todo, int16(len(dict))) + isEof := eof && len(in) == 0 + + if dst.n == 0 { + bw.writeStoredHeader(len(uncompressed), isEof) + if bw.err != nil { + return bw.err + } + bw.writeBytes(uncompressed) + } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { + // If we removed less than 1/16th, huffman compress the block. + bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) + } else { + bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0) + } + if len(in) > 0 { + // Retain a dict if we have more + dict = todo[len(todo)-maxStatelessDict:] + dst.Reset() + } + if bw.err != nil { + return bw.err + } + } + if !eof { + // Align, only a stored block can do that. + bw.writeStoredHeader(0, false) + } + bw.flush() + return bw.err +} + +func hashSL(u uint32) uint32 { + return (u * 0x1e35a7bd) >> slTableShift +} + +func load3216(b []byte, i int16) uint32 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:4] + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load6416(b []byte, i int16) uint64 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:8] + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func statelessEnc(dst *tokens, src []byte, startAt int16) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + type tableEntry struct { + offset int16 + } + + var table [slTableSize]tableEntry + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src)-int(startAt) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = 0 + return + } + // Index until startAt + if startAt > 0 { + cv := load3232(src, 0) + for i := int16(0); i < startAt; i++ { + table[hashSL(cv)] = tableEntry{offset: i} + cv = (cv >> 8) | (uint32(src[i+4]) << 24) + } + } + + s := startAt + 1 + nextEmit := startAt + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int16(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3216(src, s) + + for { + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hashSL(cv) + candidate = table[nextHash] + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit || nextS <= 0 { + goto emitRemainder + } + + now := load6416(src, nextS) + table[nextHash] = tableEntry{offset: s} + nextHash = hashSL(uint32(now)) + + if cv == load3216(src, candidate.offset) { + table[nextHash] = tableEntry{offset: nextS} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = table[nextHash] + now >>= 8 + table[nextHash] = tableEntry{offset: s} + + if cv == load3216(src, candidate.offset) { + table[nextHash] = tableEntry{offset: nextS} + break + } + cv = uint32(now) + s = nextS + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset + l := int16(matchLen(src[s+4:], src[t+4:]) + 4) + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + // Save the match found + dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6416(src, s-2) + o := s - 2 + prevHash := hashSL(uint32(x)) + table[prevHash] = tableEntry{offset: o} + x >>= 16 + currHash := hashSL(uint32(x)) + candidate = table[currHash] + table[currHash] = tableEntry{offset: o + 2} + + if uint32(x) != load3216(src, candidate.offset) { + cv = uint32(x >> 8) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go new file mode 100644 index 000000000..d818790c1 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/token.go @@ -0,0 +1,379 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "math" +) + +const ( + // bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits + // bits 16-22 offsetcode - 5 bits + // bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits + // bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits + lengthShift = 22 + offsetMask = 1<maxnumlit + offHist [32]uint16 // offset codes + litHist [256]uint16 // codes 0->255 + nFilled int + n uint16 // Must be able to contain maxStoreBlockSize + tokens [maxStoreBlockSize + 1]token +} + +func (t *tokens) Reset() { + if t.n == 0 { + return + } + t.n = 0 + t.nFilled = 0 + for i := range t.litHist[:] { + t.litHist[i] = 0 + } + for i := range t.extraHist[:] { + t.extraHist[i] = 0 + } + for i := range t.offHist[:] { + t.offHist[i] = 0 + } +} + +func (t *tokens) Fill() { + if t.n == 0 { + return + } + for i, v := range t.litHist[:] { + if v == 0 { + t.litHist[i] = 1 + t.nFilled++ + } + } + for i, v := range t.extraHist[:literalCount-256] { + if v == 0 { + t.nFilled++ + t.extraHist[i] = 1 + } + } + for i, v := range t.offHist[:offsetCodeCount] { + if v == 0 { + t.offHist[i] = 1 + } + } +} + +func indexTokens(in []token) tokens { + var t tokens + t.indexTokens(in) + return t +} + +func (t *tokens) indexTokens(in []token) { + t.Reset() + for _, tok := range in { + if tok < matchType { + t.AddLiteral(tok.literal()) + continue + } + t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask) + } +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +func emitLiteral(dst *tokens, lit []byte) { + for _, v := range lit { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } +} + +func (t *tokens) AddLiteral(lit byte) { + t.tokens[t.n] = token(lit) + t.litHist[lit]++ + t.n++ +} + +// from https://stackoverflow.com/a/28730362 +func mFastLog2(val float32) float32 { + ux := int32(math.Float32bits(val)) + log2 := (float32)(((ux >> 23) & 255) - 128) + ux &= -0x7f800001 + ux += 127 << 23 + uval := math.Float32frombits(uint32(ux)) + log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759 + return log2 +} + +// EstimatedBits will return an minimum size estimated by an *optimal* +// compression of the block. +// The size of the block +func (t *tokens) EstimatedBits() int { + shannon := float32(0) + bits := int(0) + nMatches := 0 + total := int(t.n) + t.nFilled + if total > 0 { + invTotal := 1.0 / float32(total) + for _, v := range t.litHist[:] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + } + } + // Just add 15 for EOB + shannon += 15 + for i, v := range t.extraHist[1 : literalCount-256] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + bits += int(lengthExtraBits[i&31]) * int(v) + nMatches += int(v) + } + } + } + if nMatches > 0 { + invTotal := 1.0 / float32(nMatches) + for i, v := range t.offHist[:offsetCodeCount] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + bits += int(offsetExtraBits[i&31]) * int(v) + } + } + } + return int(shannon) + bits +} + +// AddMatch adds a match to the tokens. +// This function is very sensitive to inlining and right on the border. +func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { + if debugDeflate { + if xlength >= maxMatchLength+baseMatchLength { + panic(fmt.Errorf("invalid length: %v", xlength)) + } + if xoffset >= maxMatchOffset+baseMatchOffset { + panic(fmt.Errorf("invalid offset: %v", xoffset)) + } + } + oCode := offsetCode(xoffset) + xoffset |= oCode << 16 + + t.extraHist[lengthCodes1[uint8(xlength)]]++ + t.offHist[oCode&31]++ + t.tokens[t.n] = token(matchType | xlength<= maxMatchOffset+baseMatchOffset { + panic(fmt.Errorf("invalid offset: %v", xoffset)) + } + } + oc := offsetCode(xoffset) + xoffset |= oc << 16 + for xlength > 0 { + xl := xlength + if xl > 258 { + // We need to have at least baseMatchLength left over for next loop. + if xl > 258+baseMatchLength { + xl = 258 + } else { + xl = 258 - baseMatchLength + } + } + xlength -= xl + xl -= baseMatchLength + t.extraHist[lengthCodes1[uint8(xl)]]++ + t.offHist[oc&31]++ + t.tokens[t.n] = token(matchType | uint32(xl)<> lengthShift) } + +// Convert length to code. +func lengthCode(len uint8) uint8 { return lengthCodes[len] } + +// Returns the offset code corresponding to a specific offset +func offsetCode(off uint32) uint32 { + if false { + if off < uint32(len(offsetCodes)) { + return offsetCodes[off&255] + } else if off>>7 < uint32(len(offsetCodes)) { + return offsetCodes[(off>>7)&255] + 14 + } else { + return offsetCodes[(off>>14)&255] + 28 + } + } + if off < uint32(len(offsetCodes)) { + return offsetCodes[uint8(off)] + } + return offsetCodes14[uint8(off>>7)] +} diff --git a/vendor/github.com/klauspost/compress/gzip/gunzip.go b/vendor/github.com/klauspost/compress/gzip/gunzip.go new file mode 100644 index 000000000..66fe5ddf7 --- /dev/null +++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go @@ -0,0 +1,349 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gzip implements reading and writing of gzip format compressed files, +// as specified in RFC 1952. +package gzip + +import ( + "bufio" + "compress/gzip" + "encoding/binary" + "hash/crc32" + "io" + "time" + + "github.com/klauspost/compress/flate" +) + +const ( + gzipID1 = 0x1f + gzipID2 = 0x8b + gzipDeflate = 8 + flagText = 1 << 0 + flagHdrCrc = 1 << 1 + flagExtra = 1 << 2 + flagName = 1 << 3 + flagComment = 1 << 4 +) + +var ( + // ErrChecksum is returned when reading GZIP data that has an invalid checksum. + ErrChecksum = gzip.ErrChecksum + // ErrHeader is returned when reading GZIP data that has an invalid header. + ErrHeader = gzip.ErrHeader +) + +var le = binary.LittleEndian + +// noEOF converts io.EOF to io.ErrUnexpectedEOF. +func noEOF(err error) error { + if err == io.EOF { + return io.ErrUnexpectedEOF + } + return err +} + +// The gzip file stores a header giving metadata about the compressed file. +// That header is exposed as the fields of the Writer and Reader structs. +// +// Strings must be UTF-8 encoded and may only contain Unicode code points +// U+0001 through U+00FF, due to limitations of the GZIP file format. +type Header struct { + Comment string // comment + Extra []byte // "extra data" + ModTime time.Time // modification time + Name string // file name + OS byte // operating system type +} + +// A Reader is an io.Reader that can be read to retrieve +// uncompressed data from a gzip-format compressed file. +// +// In general, a gzip file can be a concatenation of gzip files, +// each with its own header. Reads from the Reader +// return the concatenation of the uncompressed data of each. +// Only the first header is recorded in the Reader fields. +// +// Gzip files store a length and checksum of the uncompressed data. +// The Reader will return a ErrChecksum when Read +// reaches the end of the uncompressed data if it does not +// have the expected length or checksum. Clients should treat data +// returned by Read as tentative until they receive the io.EOF +// marking the end of the data. +type Reader struct { + Header // valid after NewReader or Reader.Reset + r flate.Reader + br *bufio.Reader + decompressor io.ReadCloser + digest uint32 // CRC-32, IEEE polynomial (section 8) + size uint32 // Uncompressed size (section 2.3.1) + buf [512]byte + err error + multistream bool +} + +// NewReader creates a new Reader reading the given reader. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// +// It is the caller's responsibility to call Close on the Reader when done. +// +// The Reader.Header fields will be valid in the Reader returned. +func NewReader(r io.Reader) (*Reader, error) { + z := new(Reader) + if err := z.Reset(r); err != nil { + return nil, err + } + return z, nil +} + +// Reset discards the Reader z's state and makes it equivalent to the +// result of its original state from NewReader, but reading from r instead. +// This permits reusing a Reader rather than allocating a new one. +func (z *Reader) Reset(r io.Reader) error { + *z = Reader{ + decompressor: z.decompressor, + multistream: true, + } + if rr, ok := r.(flate.Reader); ok { + z.r = rr + } else { + // Reuse if we can. + if z.br != nil { + z.br.Reset(r) + } else { + z.br = bufio.NewReader(r) + } + z.r = z.br + } + z.Header, z.err = z.readHeader() + return z.err +} + +// Multistream controls whether the reader supports multistream files. +// +// If enabled (the default), the Reader expects the input to be a sequence +// of individually gzipped data streams, each with its own header and +// trailer, ending at EOF. The effect is that the concatenation of a sequence +// of gzipped files is treated as equivalent to the gzip of the concatenation +// of the sequence. This is standard behavior for gzip readers. +// +// Calling Multistream(false) disables this behavior; disabling the behavior +// can be useful when reading file formats that distinguish individual gzip +// data streams or mix gzip data streams with other data streams. +// In this mode, when the Reader reaches the end of the data stream, +// Read returns io.EOF. If the underlying reader implements io.ByteReader, +// it will be left positioned just after the gzip stream. +// To start the next stream, call z.Reset(r) followed by z.Multistream(false). +// If there is no next stream, z.Reset(r) will return io.EOF. +func (z *Reader) Multistream(ok bool) { + z.multistream = ok +} + +// readString reads a NUL-terminated string from z.r. +// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and +// will output a string encoded using UTF-8. +// This method always updates z.digest with the data read. +func (z *Reader) readString() (string, error) { + var err error + needConv := false + for i := 0; ; i++ { + if i >= len(z.buf) { + return "", ErrHeader + } + z.buf[i], err = z.r.ReadByte() + if err != nil { + return "", err + } + if z.buf[i] > 0x7f { + needConv = true + } + if z.buf[i] == 0 { + // Digest covers the NUL terminator. + z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1]) + + // Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1). + if needConv { + s := make([]rune, 0, i) + for _, v := range z.buf[:i] { + s = append(s, rune(v)) + } + return string(s), nil + } + return string(z.buf[:i]), nil + } + } +} + +// readHeader reads the GZIP header according to section 2.3.1. +// This method does not set z.err. +func (z *Reader) readHeader() (hdr Header, err error) { + if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil { + // RFC 1952, section 2.2, says the following: + // A gzip file consists of a series of "members" (compressed data sets). + // + // Other than this, the specification does not clarify whether a + // "series" is defined as "one or more" or "zero or more". To err on the + // side of caution, Go interprets this to mean "zero or more". + // Thus, it is okay to return io.EOF here. + return hdr, err + } + if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate { + return hdr, ErrHeader + } + flg := z.buf[3] + hdr.ModTime = time.Unix(int64(le.Uint32(z.buf[4:8])), 0) + // z.buf[8] is XFL and is currently ignored. + hdr.OS = z.buf[9] + z.digest = crc32.ChecksumIEEE(z.buf[:10]) + + if flg&flagExtra != 0 { + if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { + return hdr, noEOF(err) + } + z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2]) + data := make([]byte, le.Uint16(z.buf[:2])) + if _, err = io.ReadFull(z.r, data); err != nil { + return hdr, noEOF(err) + } + z.digest = crc32.Update(z.digest, crc32.IEEETable, data) + hdr.Extra = data + } + + var s string + if flg&flagName != 0 { + if s, err = z.readString(); err != nil { + return hdr, err + } + hdr.Name = s + } + + if flg&flagComment != 0 { + if s, err = z.readString(); err != nil { + return hdr, err + } + hdr.Comment = s + } + + if flg&flagHdrCrc != 0 { + if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { + return hdr, noEOF(err) + } + digest := le.Uint16(z.buf[:2]) + if digest != uint16(z.digest) { + return hdr, ErrHeader + } + } + + z.digest = 0 + if z.decompressor == nil { + z.decompressor = flate.NewReader(z.r) + } else { + z.decompressor.(flate.Resetter).Reset(z.r, nil) + } + return hdr, nil +} + +// Read implements io.Reader, reading uncompressed bytes from its underlying Reader. +func (z *Reader) Read(p []byte) (n int, err error) { + if z.err != nil { + return 0, z.err + } + + for n == 0 { + n, z.err = z.decompressor.Read(p) + z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n]) + z.size += uint32(n) + if z.err != io.EOF { + // In the normal case we return here. + return n, z.err + } + + // Finished file; check checksum and size. + if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil { + z.err = noEOF(err) + return n, z.err + } + digest := le.Uint32(z.buf[:4]) + size := le.Uint32(z.buf[4:8]) + if digest != z.digest || size != z.size { + z.err = ErrChecksum + return n, z.err + } + z.digest, z.size = 0, 0 + + // File is ok; check if there is another. + if !z.multistream { + return n, io.EOF + } + z.err = nil // Remove io.EOF + + if _, z.err = z.readHeader(); z.err != nil { + return n, z.err + } + } + + return n, nil +} + +// Support the io.WriteTo interface for io.Copy and friends. +func (z *Reader) WriteTo(w io.Writer) (int64, error) { + total := int64(0) + crcWriter := crc32.NewIEEE() + for { + if z.err != nil { + if z.err == io.EOF { + return total, nil + } + return total, z.err + } + + // We write both to output and digest. + mw := io.MultiWriter(w, crcWriter) + n, err := z.decompressor.(io.WriterTo).WriteTo(mw) + total += n + z.size += uint32(n) + if err != nil { + z.err = err + return total, z.err + } + + // Finished file; check checksum + size. + if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + z.err = err + return total, err + } + z.digest = crcWriter.Sum32() + digest := le.Uint32(z.buf[:4]) + size := le.Uint32(z.buf[4:8]) + if digest != z.digest || size != z.size { + z.err = ErrChecksum + return total, z.err + } + z.digest, z.size = 0, 0 + + // File is ok; check if there is another. + if !z.multistream { + return total, nil + } + crcWriter.Reset() + z.err = nil // Remove io.EOF + + if _, z.err = z.readHeader(); z.err != nil { + if z.err == io.EOF { + return total, nil + } + return total, z.err + } + } +} + +// Close closes the Reader. It does not close the underlying io.Reader. +// In order for the GZIP checksum to be verified, the reader must be +// fully consumed until the io.EOF. +func (z *Reader) Close() error { return z.decompressor.Close() } diff --git a/vendor/github.com/klauspost/compress/gzip/gzip.go b/vendor/github.com/klauspost/compress/gzip/gzip.go new file mode 100644 index 000000000..26203851b --- /dev/null +++ b/vendor/github.com/klauspost/compress/gzip/gzip.go @@ -0,0 +1,269 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "errors" + "fmt" + "hash/crc32" + "io" + + "github.com/klauspost/compress/flate" +) + +// These constants are copied from the flate package, so that code that imports +// "compress/gzip" does not also have to import "compress/flate". +const ( + NoCompression = flate.NoCompression + BestSpeed = flate.BestSpeed + BestCompression = flate.BestCompression + DefaultCompression = flate.DefaultCompression + ConstantCompression = flate.ConstantCompression + HuffmanOnly = flate.HuffmanOnly + + // StatelessCompression will do compression but without maintaining any state + // between Write calls. + // There will be no memory kept between Write calls, + // but compression and speed will be suboptimal. + // Because of this, the size of actual Write calls will affect output size. + StatelessCompression = -3 +) + +// A Writer is an io.WriteCloser. +// Writes to a Writer are compressed and written to w. +type Writer struct { + Header // written at first call to Write, Flush, or Close + w io.Writer + level int + err error + compressor *flate.Writer + digest uint32 // CRC-32, IEEE polynomial (section 8) + size uint32 // Uncompressed size (section 2.3.1) + wroteHeader bool + closed bool + buf [10]byte +} + +// NewWriter returns a new Writer. +// Writes to the returned writer are compressed and written to w. +// +// It is the caller's responsibility to call Close on the WriteCloser when done. +// Writes may be buffered and not flushed until Close. +// +// Callers that wish to set the fields in Writer.Header must do so before +// the first call to Write, Flush, or Close. +func NewWriter(w io.Writer) *Writer { + z, _ := NewWriterLevel(w, DefaultCompression) + return z +} + +// NewWriterLevel is like NewWriter but specifies the compression level instead +// of assuming DefaultCompression. +// +// The compression level can be DefaultCompression, NoCompression, or any +// integer value between BestSpeed and BestCompression inclusive. The error +// returned will be nil if the level is valid. +func NewWriterLevel(w io.Writer, level int) (*Writer, error) { + if level < StatelessCompression || level > BestCompression { + return nil, fmt.Errorf("gzip: invalid compression level: %d", level) + } + z := new(Writer) + z.init(w, level) + return z, nil +} + +func (z *Writer) init(w io.Writer, level int) { + compressor := z.compressor + if level != StatelessCompression { + if compressor != nil { + compressor.Reset(w) + } + } + + *z = Writer{ + Header: Header{ + OS: 255, // unknown + }, + w: w, + level: level, + compressor: compressor, + } +} + +// Reset discards the Writer z's state and makes it equivalent to the +// result of its original state from NewWriter or NewWriterLevel, but +// writing to w instead. This permits reusing a Writer rather than +// allocating a new one. +func (z *Writer) Reset(w io.Writer) { + z.init(w, z.level) +} + +// writeBytes writes a length-prefixed byte slice to z.w. +func (z *Writer) writeBytes(b []byte) error { + if len(b) > 0xffff { + return errors.New("gzip.Write: Extra data is too large") + } + le.PutUint16(z.buf[:2], uint16(len(b))) + _, err := z.w.Write(z.buf[:2]) + if err != nil { + return err + } + _, err = z.w.Write(b) + return err +} + +// writeString writes a UTF-8 string s in GZIP's format to z.w. +// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). +func (z *Writer) writeString(s string) (err error) { + // GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII. + needconv := false + for _, v := range s { + if v == 0 || v > 0xff { + return errors.New("gzip.Write: non-Latin-1 header string") + } + if v > 0x7f { + needconv = true + } + } + if needconv { + b := make([]byte, 0, len(s)) + for _, v := range s { + b = append(b, byte(v)) + } + _, err = z.w.Write(b) + } else { + _, err = io.WriteString(z.w, s) + } + if err != nil { + return err + } + // GZIP strings are NUL-terminated. + z.buf[0] = 0 + _, err = z.w.Write(z.buf[:1]) + return err +} + +// Write writes a compressed form of p to the underlying io.Writer. The +// compressed bytes are not necessarily flushed until the Writer is closed. +func (z *Writer) Write(p []byte) (int, error) { + if z.err != nil { + return 0, z.err + } + var n int + // Write the GZIP header lazily. + if !z.wroteHeader { + z.wroteHeader = true + z.buf[0] = gzipID1 + z.buf[1] = gzipID2 + z.buf[2] = gzipDeflate + z.buf[3] = 0 + if z.Extra != nil { + z.buf[3] |= 0x04 + } + if z.Name != "" { + z.buf[3] |= 0x08 + } + if z.Comment != "" { + z.buf[3] |= 0x10 + } + le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix())) + if z.level == BestCompression { + z.buf[8] = 2 + } else if z.level == BestSpeed { + z.buf[8] = 4 + } else { + z.buf[8] = 0 + } + z.buf[9] = z.OS + n, z.err = z.w.Write(z.buf[:10]) + if z.err != nil { + return n, z.err + } + if z.Extra != nil { + z.err = z.writeBytes(z.Extra) + if z.err != nil { + return n, z.err + } + } + if z.Name != "" { + z.err = z.writeString(z.Name) + if z.err != nil { + return n, z.err + } + } + if z.Comment != "" { + z.err = z.writeString(z.Comment) + if z.err != nil { + return n, z.err + } + } + + if z.compressor == nil && z.level != StatelessCompression { + z.compressor, _ = flate.NewWriter(z.w, z.level) + } + } + z.size += uint32(len(p)) + z.digest = crc32.Update(z.digest, crc32.IEEETable, p) + if z.level == StatelessCompression { + return len(p), flate.StatelessDeflate(z.w, p, false, nil) + } + n, z.err = z.compressor.Write(p) + return n, z.err +} + +// Flush flushes any pending compressed data to the underlying writer. +// +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. Flush does +// not return until the data has been written. If the underlying +// writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (z *Writer) Flush() error { + if z.err != nil { + return z.err + } + if z.closed || z.level == StatelessCompression { + return nil + } + if !z.wroteHeader { + z.Write(nil) + if z.err != nil { + return z.err + } + } + z.err = z.compressor.Flush() + return z.err +} + +// Close closes the Writer, flushing any unwritten data to the underlying +// io.Writer, but does not close the underlying io.Writer. +func (z *Writer) Close() error { + if z.err != nil { + return z.err + } + if z.closed { + return nil + } + z.closed = true + if !z.wroteHeader { + z.Write(nil) + if z.err != nil { + return z.err + } + } + if z.level == StatelessCompression { + z.err = flate.StatelessDeflate(z.w, nil, true, nil) + } else { + z.err = z.compressor.Close() + } + if z.err != nil { + return z.err + } + le.PutUint32(z.buf[:4], z.digest) + le.PutUint32(z.buf[4:8], z.size) + _, z.err = z.w.Write(z.buf[:8]) + return z.err +} diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go index 671e630a8..9f3e9f79e 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go @@ -27,10 +27,7 @@ func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) const fallback8BitSize = 800 type decompress4xContext struct { - pbr0 *bitReaderShifted - pbr1 *bitReaderShifted - pbr2 *bitReaderShifted - pbr3 *bitReaderShifted + pbr *[4]bitReaderShifted peekBits uint8 out *byte dstEvery int @@ -89,10 +86,7 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) { ctx := decompress4xContext{ - pbr0: &br[0], - pbr1: &br[1], - pbr2: &br[2], - pbr3: &br[3], + pbr: &br, peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast() out: &out[0], dstEvery: dstEvery, diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s index 6c65c6e2b..dd1a5aecd 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s +++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s @@ -4,45 +4,40 @@ // +build amd64,!appengine,!noasm,gc // func decompress4x_main_loop_amd64(ctx *decompress4xContext) -TEXT ·decompress4x_main_loop_amd64(SB), $8-8 +TEXT ·decompress4x_main_loop_amd64(SB), $0-8 XORQ DX, DX // Preload values MOVQ ctx+0(FP), AX - MOVBQZX 32(AX), SI - MOVQ 40(AX), DI - MOVQ DI, BX - MOVQ 72(AX), CX - MOVQ CX, (SP) - MOVQ 48(AX), R8 - MOVQ 56(AX), R9 - MOVQ (AX), R10 - MOVQ 8(AX), R11 - MOVQ 16(AX), R12 - MOVQ 24(AX), R13 + MOVBQZX 8(AX), DI + MOVQ 16(AX), SI + MOVQ 48(AX), BX + MOVQ 24(AX), R9 + MOVQ 32(AX), R10 + MOVQ (AX), R11 // Main loop main_loop: - MOVQ BX, DI - CMPQ DI, (SP) + MOVQ SI, R8 + CMPQ R8, BX SETGE DL // br0.fillFast32() - MOVQ 32(R10), R14 - MOVBQZX 40(R10), R15 - CMPQ R15, $0x20 + MOVQ 32(R11), R12 + MOVBQZX 40(R11), R13 + CMPQ R13, $0x20 JBE skip_fill0 - MOVQ 24(R10), AX - SUBQ $0x20, R15 + MOVQ 24(R11), AX + SUBQ $0x20, R13 SUBQ $0x04, AX - MOVQ (R10), BP + MOVQ (R11), R14 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(BP*1), BP - MOVQ R15, CX - SHLQ CL, BP - MOVQ AX, 24(R10) - ORQ BP, R14 + MOVL (AX)(R14*1), R14 + MOVQ R13, CX + SHLQ CL, R14 + MOVQ AX, 24(R11) + ORQ R14, R12 // exhausted = exhausted || (br0.off < 4) CMPQ AX, $0x04 @@ -51,57 +46,57 @@ main_loop: skip_fill0: // val0 := br0.peekTopBits(peekBits) - MOVQ R14, BP - MOVQ SI, CX - SHRQ CL, BP + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v0 := table[val0&mask] - MOVW (R9)(BP*2), CX + MOVW (R10)(R14*2), CX // br0.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R14 - ADDB CL, R15 + SHLQ CL, R12 + ADDB CL, R13 // val1 := br0.peekTopBits(peekBits) - MOVQ SI, CX - MOVQ R14, BP - SHRQ CL, BP + MOVQ DI, CX + MOVQ R12, R14 + SHRQ CL, R14 // v1 := table[val1&mask] - MOVW (R9)(BP*2), CX + MOVW (R10)(R14*2), CX // br0.advance(uint8(v1.entry)) MOVB CH, AH - SHLQ CL, R14 - ADDB CL, R15 + SHLQ CL, R12 + ADDB CL, R13 // these two writes get coalesced // out[id * dstEvery + 0] = uint8(v0.entry >> 8) // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - MOVW AX, (DI) + MOVW AX, (R8) - // update the bitrader reader structure - MOVQ R14, 32(R10) - MOVB R15, 40(R10) - ADDQ R8, DI + // update the bitreader structure + MOVQ R12, 32(R11) + MOVB R13, 40(R11) + ADDQ R9, R8 // br1.fillFast32() - MOVQ 32(R11), R14 - MOVBQZX 40(R11), R15 - CMPQ R15, $0x20 + MOVQ 80(R11), R12 + MOVBQZX 88(R11), R13 + CMPQ R13, $0x20 JBE skip_fill1 - MOVQ 24(R11), AX - SUBQ $0x20, R15 + MOVQ 72(R11), AX + SUBQ $0x20, R13 SUBQ $0x04, AX - MOVQ (R11), BP + MOVQ 48(R11), R14 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(BP*1), BP - MOVQ R15, CX - SHLQ CL, BP - MOVQ AX, 24(R11) - ORQ BP, R14 + MOVL (AX)(R14*1), R14 + MOVQ R13, CX + SHLQ CL, R14 + MOVQ AX, 72(R11) + ORQ R14, R12 // exhausted = exhausted || (br1.off < 4) CMPQ AX, $0x04 @@ -110,57 +105,57 @@ skip_fill0: skip_fill1: // val0 := br1.peekTopBits(peekBits) - MOVQ R14, BP - MOVQ SI, CX - SHRQ CL, BP + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v0 := table[val0&mask] - MOVW (R9)(BP*2), CX + MOVW (R10)(R14*2), CX // br1.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R14 - ADDB CL, R15 + SHLQ CL, R12 + ADDB CL, R13 // val1 := br1.peekTopBits(peekBits) - MOVQ SI, CX - MOVQ R14, BP - SHRQ CL, BP + MOVQ DI, CX + MOVQ R12, R14 + SHRQ CL, R14 // v1 := table[val1&mask] - MOVW (R9)(BP*2), CX + MOVW (R10)(R14*2), CX // br1.advance(uint8(v1.entry)) MOVB CH, AH - SHLQ CL, R14 - ADDB CL, R15 + SHLQ CL, R12 + ADDB CL, R13 // these two writes get coalesced // out[id * dstEvery + 0] = uint8(v0.entry >> 8) // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - MOVW AX, (DI) + MOVW AX, (R8) - // update the bitrader reader structure - MOVQ R14, 32(R11) - MOVB R15, 40(R11) - ADDQ R8, DI + // update the bitreader structure + MOVQ R12, 80(R11) + MOVB R13, 88(R11) + ADDQ R9, R8 // br2.fillFast32() - MOVQ 32(R12), R14 - MOVBQZX 40(R12), R15 - CMPQ R15, $0x20 + MOVQ 128(R11), R12 + MOVBQZX 136(R11), R13 + CMPQ R13, $0x20 JBE skip_fill2 - MOVQ 24(R12), AX - SUBQ $0x20, R15 + MOVQ 120(R11), AX + SUBQ $0x20, R13 SUBQ $0x04, AX - MOVQ (R12), BP + MOVQ 96(R11), R14 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(BP*1), BP - MOVQ R15, CX - SHLQ CL, BP - MOVQ AX, 24(R12) - ORQ BP, R14 + MOVL (AX)(R14*1), R14 + MOVQ R13, CX + SHLQ CL, R14 + MOVQ AX, 120(R11) + ORQ R14, R12 // exhausted = exhausted || (br2.off < 4) CMPQ AX, $0x04 @@ -169,57 +164,57 @@ skip_fill1: skip_fill2: // val0 := br2.peekTopBits(peekBits) - MOVQ R14, BP - MOVQ SI, CX - SHRQ CL, BP + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v0 := table[val0&mask] - MOVW (R9)(BP*2), CX + MOVW (R10)(R14*2), CX // br2.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R14 - ADDB CL, R15 + SHLQ CL, R12 + ADDB CL, R13 // val1 := br2.peekTopBits(peekBits) - MOVQ SI, CX - MOVQ R14, BP - SHRQ CL, BP + MOVQ DI, CX + MOVQ R12, R14 + SHRQ CL, R14 // v1 := table[val1&mask] - MOVW (R9)(BP*2), CX + MOVW (R10)(R14*2), CX // br2.advance(uint8(v1.entry)) MOVB CH, AH - SHLQ CL, R14 - ADDB CL, R15 + SHLQ CL, R12 + ADDB CL, R13 // these two writes get coalesced // out[id * dstEvery + 0] = uint8(v0.entry >> 8) // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - MOVW AX, (DI) + MOVW AX, (R8) - // update the bitrader reader structure - MOVQ R14, 32(R12) - MOVB R15, 40(R12) - ADDQ R8, DI + // update the bitreader structure + MOVQ R12, 128(R11) + MOVB R13, 136(R11) + ADDQ R9, R8 // br3.fillFast32() - MOVQ 32(R13), R14 - MOVBQZX 40(R13), R15 - CMPQ R15, $0x20 + MOVQ 176(R11), R12 + MOVBQZX 184(R11), R13 + CMPQ R13, $0x20 JBE skip_fill3 - MOVQ 24(R13), AX - SUBQ $0x20, R15 + MOVQ 168(R11), AX + SUBQ $0x20, R13 SUBQ $0x04, AX - MOVQ (R13), BP + MOVQ 144(R11), R14 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(BP*1), BP - MOVQ R15, CX - SHLQ CL, BP - MOVQ AX, 24(R13) - ORQ BP, R14 + MOVL (AX)(R14*1), R14 + MOVQ R13, CX + SHLQ CL, R14 + MOVQ AX, 168(R11) + ORQ R14, R12 // exhausted = exhausted || (br3.off < 4) CMPQ AX, $0x04 @@ -228,149 +223,142 @@ skip_fill2: skip_fill3: // val0 := br3.peekTopBits(peekBits) - MOVQ R14, BP - MOVQ SI, CX - SHRQ CL, BP + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v0 := table[val0&mask] - MOVW (R9)(BP*2), CX + MOVW (R10)(R14*2), CX // br3.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R14 - ADDB CL, R15 + SHLQ CL, R12 + ADDB CL, R13 // val1 := br3.peekTopBits(peekBits) - MOVQ SI, CX - MOVQ R14, BP - SHRQ CL, BP + MOVQ DI, CX + MOVQ R12, R14 + SHRQ CL, R14 // v1 := table[val1&mask] - MOVW (R9)(BP*2), CX + MOVW (R10)(R14*2), CX // br3.advance(uint8(v1.entry)) MOVB CH, AH - SHLQ CL, R14 - ADDB CL, R15 + SHLQ CL, R12 + ADDB CL, R13 // these two writes get coalesced // out[id * dstEvery + 0] = uint8(v0.entry >> 8) // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - MOVW AX, (DI) + MOVW AX, (R8) - // update the bitrader reader structure - MOVQ R14, 32(R13) - MOVB R15, 40(R13) - ADDQ $0x02, BX + // update the bitreader structure + MOVQ R12, 176(R11) + MOVB R13, 184(R11) + ADDQ $0x02, SI TESTB DL, DL JZ main_loop MOVQ ctx+0(FP), AX - MOVQ 40(AX), CX - MOVQ BX, DX - SUBQ CX, DX - SHLQ $0x02, DX - MOVQ DX, 64(AX) + SUBQ 16(AX), SI + SHLQ $0x02, SI + MOVQ SI, 40(AX) RET // func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) -TEXT ·decompress4x_8b_main_loop_amd64(SB), $16-8 +TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8 XORQ DX, DX // Preload values MOVQ ctx+0(FP), CX - MOVBQZX 32(CX), BX - MOVQ 40(CX), SI - MOVQ SI, (SP) - MOVQ 72(CX), DX - MOVQ DX, 8(SP) - MOVQ 48(CX), DI - MOVQ 56(CX), R8 - MOVQ (CX), R9 - MOVQ 8(CX), R10 - MOVQ 16(CX), R11 - MOVQ 24(CX), R12 + MOVBQZX 8(CX), DI + MOVQ 16(CX), BX + MOVQ 48(CX), SI + MOVQ 24(CX), R9 + MOVQ 32(CX), R10 + MOVQ (CX), R11 // Main loop main_loop: - MOVQ (SP), SI - CMPQ SI, 8(SP) + MOVQ BX, R8 + CMPQ R8, SI SETGE DL - // br1000.fillFast32() - MOVQ 32(R9), R13 - MOVBQZX 40(R9), R14 - CMPQ R14, $0x20 - JBE skip_fill1000 - MOVQ 24(R9), R15 - SUBQ $0x20, R14 - SUBQ $0x04, R15 - MOVQ (R9), BP + // br0.fillFast32() + MOVQ 32(R11), R12 + MOVBQZX 40(R11), R13 + CMPQ R13, $0x20 + JBE skip_fill0 + MOVQ 24(R11), R14 + SUBQ $0x20, R13 + SUBQ $0x04, R14 + MOVQ (R11), R15 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R15)(BP*1), BP - MOVQ R14, CX - SHLQ CL, BP - MOVQ R15, 24(R9) - ORQ BP, R13 - - // exhausted = exhausted || (br1000.off < 4) - CMPQ R15, $0x04 + MOVL (R14)(R15*1), R15 + MOVQ R13, CX + SHLQ CL, R15 + MOVQ R14, 24(R11) + ORQ R15, R12 + + // exhausted = exhausted || (br0.off < 4) + CMPQ R14, $0x04 SETLT AL ORB AL, DL -skip_fill1000: +skip_fill0: // val0 := br0.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v0 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br0.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 // val1 := br0.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v1 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br0.advance(uint8(v1.entry) MOVB CH, AH - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 BSWAPL AX // val2 := br0.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v2 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br0.advance(uint8(v2.entry) MOVB CH, AH - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 // val3 := br0.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v3 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br0.advance(uint8(v3.entry) MOVB CH, AL - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 BSWAPL AX // these four writes get coalesced @@ -378,88 +366,88 @@ skip_fill1000: // out[id * dstEvery + 1] = uint8(v1.entry >> 8) // out[id * dstEvery + 3] = uint8(v2.entry >> 8) // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - MOVL AX, (SI) - - // update the bitreader reader structure - MOVQ R13, 32(R9) - MOVB R14, 40(R9) - ADDQ DI, SI - - // br1001.fillFast32() - MOVQ 32(R10), R13 - MOVBQZX 40(R10), R14 - CMPQ R14, $0x20 - JBE skip_fill1001 - MOVQ 24(R10), R15 - SUBQ $0x20, R14 - SUBQ $0x04, R15 - MOVQ (R10), BP + MOVL AX, (R8) + + // update the bitreader structure + MOVQ R12, 32(R11) + MOVB R13, 40(R11) + ADDQ R9, R8 + + // br1.fillFast32() + MOVQ 80(R11), R12 + MOVBQZX 88(R11), R13 + CMPQ R13, $0x20 + JBE skip_fill1 + MOVQ 72(R11), R14 + SUBQ $0x20, R13 + SUBQ $0x04, R14 + MOVQ 48(R11), R15 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R15)(BP*1), BP - MOVQ R14, CX - SHLQ CL, BP - MOVQ R15, 24(R10) - ORQ BP, R13 - - // exhausted = exhausted || (br1001.off < 4) - CMPQ R15, $0x04 + MOVL (R14)(R15*1), R15 + MOVQ R13, CX + SHLQ CL, R15 + MOVQ R14, 72(R11) + ORQ R15, R12 + + // exhausted = exhausted || (br1.off < 4) + CMPQ R14, $0x04 SETLT AL ORB AL, DL -skip_fill1001: +skip_fill1: // val0 := br1.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v0 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br1.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 // val1 := br1.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v1 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br1.advance(uint8(v1.entry) MOVB CH, AH - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 BSWAPL AX // val2 := br1.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v2 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br1.advance(uint8(v2.entry) MOVB CH, AH - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 // val3 := br1.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v3 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br1.advance(uint8(v3.entry) MOVB CH, AL - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 BSWAPL AX // these four writes get coalesced @@ -467,88 +455,88 @@ skip_fill1001: // out[id * dstEvery + 1] = uint8(v1.entry >> 8) // out[id * dstEvery + 3] = uint8(v2.entry >> 8) // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - MOVL AX, (SI) - - // update the bitreader reader structure - MOVQ R13, 32(R10) - MOVB R14, 40(R10) - ADDQ DI, SI - - // br1002.fillFast32() - MOVQ 32(R11), R13 - MOVBQZX 40(R11), R14 - CMPQ R14, $0x20 - JBE skip_fill1002 - MOVQ 24(R11), R15 - SUBQ $0x20, R14 - SUBQ $0x04, R15 - MOVQ (R11), BP + MOVL AX, (R8) + + // update the bitreader structure + MOVQ R12, 80(R11) + MOVB R13, 88(R11) + ADDQ R9, R8 + + // br2.fillFast32() + MOVQ 128(R11), R12 + MOVBQZX 136(R11), R13 + CMPQ R13, $0x20 + JBE skip_fill2 + MOVQ 120(R11), R14 + SUBQ $0x20, R13 + SUBQ $0x04, R14 + MOVQ 96(R11), R15 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R15)(BP*1), BP - MOVQ R14, CX - SHLQ CL, BP - MOVQ R15, 24(R11) - ORQ BP, R13 - - // exhausted = exhausted || (br1002.off < 4) - CMPQ R15, $0x04 + MOVL (R14)(R15*1), R15 + MOVQ R13, CX + SHLQ CL, R15 + MOVQ R14, 120(R11) + ORQ R15, R12 + + // exhausted = exhausted || (br2.off < 4) + CMPQ R14, $0x04 SETLT AL ORB AL, DL -skip_fill1002: +skip_fill2: // val0 := br2.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v0 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br2.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 // val1 := br2.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v1 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br2.advance(uint8(v1.entry) MOVB CH, AH - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 BSWAPL AX // val2 := br2.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v2 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br2.advance(uint8(v2.entry) MOVB CH, AH - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 // val3 := br2.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v3 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br2.advance(uint8(v3.entry) MOVB CH, AL - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 BSWAPL AX // these four writes get coalesced @@ -556,88 +544,88 @@ skip_fill1002: // out[id * dstEvery + 1] = uint8(v1.entry >> 8) // out[id * dstEvery + 3] = uint8(v2.entry >> 8) // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - MOVL AX, (SI) - - // update the bitreader reader structure - MOVQ R13, 32(R11) - MOVB R14, 40(R11) - ADDQ DI, SI - - // br1003.fillFast32() - MOVQ 32(R12), R13 - MOVBQZX 40(R12), R14 - CMPQ R14, $0x20 - JBE skip_fill1003 - MOVQ 24(R12), R15 - SUBQ $0x20, R14 - SUBQ $0x04, R15 - MOVQ (R12), BP + MOVL AX, (R8) + + // update the bitreader structure + MOVQ R12, 128(R11) + MOVB R13, 136(R11) + ADDQ R9, R8 + + // br3.fillFast32() + MOVQ 176(R11), R12 + MOVBQZX 184(R11), R13 + CMPQ R13, $0x20 + JBE skip_fill3 + MOVQ 168(R11), R14 + SUBQ $0x20, R13 + SUBQ $0x04, R14 + MOVQ 144(R11), R15 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R15)(BP*1), BP - MOVQ R14, CX - SHLQ CL, BP - MOVQ R15, 24(R12) - ORQ BP, R13 - - // exhausted = exhausted || (br1003.off < 4) - CMPQ R15, $0x04 + MOVL (R14)(R15*1), R15 + MOVQ R13, CX + SHLQ CL, R15 + MOVQ R14, 168(R11) + ORQ R15, R12 + + // exhausted = exhausted || (br3.off < 4) + CMPQ R14, $0x04 SETLT AL ORB AL, DL -skip_fill1003: +skip_fill3: // val0 := br3.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v0 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br3.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 // val1 := br3.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v1 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br3.advance(uint8(v1.entry) MOVB CH, AH - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 BSWAPL AX // val2 := br3.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v2 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br3.advance(uint8(v2.entry) MOVB CH, AH - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 // val3 := br3.peekTopBits(peekBits) - MOVQ R13, R15 - MOVQ BX, CX - SHRQ CL, R15 + MOVQ R12, R14 + MOVQ DI, CX + SHRQ CL, R14 // v3 := table[val0&mask] - MOVW (R8)(R15*2), CX + MOVW (R10)(R14*2), CX // br3.advance(uint8(v3.entry) MOVB CH, AL - SHLQ CL, R13 - ADDB CL, R14 + SHLQ CL, R12 + ADDB CL, R13 BSWAPL AX // these four writes get coalesced @@ -645,20 +633,18 @@ skip_fill1003: // out[id * dstEvery + 1] = uint8(v1.entry >> 8) // out[id * dstEvery + 3] = uint8(v2.entry >> 8) // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - MOVL AX, (SI) + MOVL AX, (R8) - // update the bitreader reader structure - MOVQ R13, 32(R12) - MOVB R14, 40(R12) - ADDQ $0x04, (SP) + // update the bitreader structure + MOVQ R12, 176(R11) + MOVB R13, 184(R11) + ADDQ $0x04, BX TESTB DL, DL JZ main_loop MOVQ ctx+0(FP), AX - MOVQ 40(AX), CX - MOVQ (SP), DX - SUBQ CX, DX - SHLQ $0x02, DX - MOVQ DX, 64(AX) + SUBQ 16(AX), BX + SHLQ $0x02, BX + MOVQ BX, 40(AX) RET // func decompress1x_main_loop_amd64(ctx *decompress1xContext) @@ -750,10 +736,8 @@ loop_condition: // Update ctx structure MOVQ ctx+0(FP), AX - MOVQ DX, CX - MOVQ 16(AX), DX - SUBQ DX, CX - MOVQ CX, 40(AX) + SUBQ 16(AX), DX + MOVQ DX, 40(AX) MOVQ (AX), AX MOVQ R9, 24(AX) MOVQ R10, 32(AX) @@ -847,10 +831,8 @@ loop_condition: // Update ctx structure MOVQ ctx+0(FP), AX - MOVQ DX, CX - MOVQ 16(AX), DX - SUBQ DX, CX - MOVQ CX, 40(AX) + SUBQ 16(AX), DX + MOVQ DX, 40(AX) MOVQ (AX), AX MOVQ R9, 24(AX) MOVQ R10, 32(AX) diff --git a/vendor/github.com/klauspost/compress/s2/decode.go b/vendor/github.com/klauspost/compress/s2/decode.go index b5fa4d3f0..27c0f3c2c 100644 --- a/vendor/github.com/klauspost/compress/s2/decode.go +++ b/vendor/github.com/klauspost/compress/s2/decode.go @@ -11,6 +11,7 @@ import ( "fmt" "io" "io/ioutil" + "math" "runtime" "sync" ) @@ -719,7 +720,11 @@ func (r *Reader) Skip(n int64) error { // decoded[i:j] contains decoded bytes that have not yet been passed on. left := int64(r.j - r.i) if left >= n { - r.i += int(n) + tmp := int64(r.i) + n + if tmp > math.MaxInt32 { + return errors.New("s2: internal overflow in skip") + } + r.i = int(tmp) return nil } n -= int64(r.j - r.i) diff --git a/vendor/github.com/klauspost/compress/s2/index.go b/vendor/github.com/klauspost/compress/s2/index.go index 7b24a0060..dd9ecfe71 100644 --- a/vendor/github.com/klauspost/compress/s2/index.go +++ b/vendor/github.com/klauspost/compress/s2/index.go @@ -533,3 +533,66 @@ func (i *Index) JSON() []byte { b, _ := json.MarshalIndent(x, "", " ") return b } + +// RemoveIndexHeaders will trim all headers and trailers from a given index. +// This is expected to save 20 bytes. +// These can be restored using RestoreIndexHeaders. +// This removes a layer of security, but is the most compact representation. +// Returns nil if headers contains errors. +// The returned slice references the provided slice. +func RemoveIndexHeaders(b []byte) []byte { + const save = 4 + len(S2IndexHeader) + len(S2IndexTrailer) + 4 + if len(b) <= save { + return nil + } + if b[0] != ChunkTypeIndex { + return nil + } + chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16 + b = b[4:] + + // Validate we have enough... + if len(b) < chunkLen { + return nil + } + b = b[:chunkLen] + + if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) { + return nil + } + b = b[len(S2IndexHeader):] + if !bytes.HasSuffix(b, []byte(S2IndexTrailer)) { + return nil + } + b = bytes.TrimSuffix(b, []byte(S2IndexTrailer)) + + if len(b) < 4 { + return nil + } + return b[:len(b)-4] +} + +// RestoreIndexHeaders will index restore headers removed by RemoveIndexHeaders. +// No error checking is performed on the input. +// If a 0 length slice is sent, it is returned without modification. +func RestoreIndexHeaders(in []byte) []byte { + if len(in) == 0 { + return in + } + b := make([]byte, 0, 4+len(S2IndexHeader)+len(in)+len(S2IndexTrailer)+4) + b = append(b, ChunkTypeIndex, 0, 0, 0) + b = append(b, []byte(S2IndexHeader)...) + b = append(b, in...) + + var tmp [4]byte + binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)+4+len(S2IndexTrailer))) + b = append(b, tmp[:4]...) + // Trailer + b = append(b, []byte(S2IndexTrailer)...) + + chunkLen := len(b) - skippableFrameHeader + b[1] = uint8(chunkLen >> 0) + b[2] = uint8(chunkLen >> 8) + b[3] = uint8(chunkLen >> 16) + return b +} diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go index 4493baa75..2ad02070d 100644 --- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go +++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go @@ -23,7 +23,7 @@ type byteBuffer interface { readByte() (byte, error) // Skip n bytes. - skipN(n int) error + skipN(n int64) error } // in-memory buffer @@ -62,9 +62,12 @@ func (b *byteBuf) readByte() (byte, error) { return r, nil } -func (b *byteBuf) skipN(n int) error { +func (b *byteBuf) skipN(n int64) error { bb := *b - if len(bb) < n { + if n < 0 { + return fmt.Errorf("negative skip (%d) requested", n) + } + if int64(len(bb)) < n { return io.ErrUnexpectedEOF } *b = bb[n:] @@ -120,9 +123,9 @@ func (r *readerWrapper) readByte() (byte, error) { return r.tmp[0], nil } -func (r *readerWrapper) skipN(n int) error { - n2, err := io.CopyN(ioutil.Discard, r.r, int64(n)) - if n2 != int64(n) { +func (r *readerWrapper) skipN(n int64) error { + n2, err := io.CopyN(ioutil.Discard, r.r, n) + if n2 != n { err = io.ErrUnexpectedEOF } return err diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 286c8f9d7..d212f4737 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -348,6 +348,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { frame.history.setDict(&dict) } if frame.WindowSize > d.o.maxWindowSize { + if debugDecoder { + println("window size exceeded:", frame.WindowSize, ">", d.o.maxWindowSize) + } return dst, ErrWindowSizeExceeded } if frame.FrameContentSize != fcsUnknown { diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index e6b1d01cf..7aaaedb23 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -528,8 +528,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { // If a non-single block is needed the encoder will reset again. e.encoders <- enc }() - // Use single segments when above minimum window and below 1MB. - single := len(src) < 1<<20 && len(src) > MinWindowSize + // Use single segments when above minimum window and below window size. + single := len(src) <= e.o.windowSize && len(src) > MinWindowSize if e.o.single != nil { single = *e.o.single } diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index 44d8dbd19..a7c5e1aac 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -283,7 +283,7 @@ func WithNoEntropyCompression(b bool) EOption { // a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range. // For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB. // This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations. -// If this is not specified, block encodes will automatically choose this based on the input size. +// If this is not specified, block encodes will automatically choose this based on the input size and the window size. // This setting has no effect on streamed encodes. func WithSingleSegment(b bool) EOption { return func(o *encoderOptions) error { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index fa0a633f3..9568a4ba3 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -106,7 +106,7 @@ func (d *frameDec) reset(br byteBuffer) error { } n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) println("Skipping frame with", n, "bytes.") - err = br.skipN(int(n)) + err = br.skipN(int64(n)) if err != nil { if debugDecoder { println("Reading discarded frame", err) @@ -231,20 +231,27 @@ func (d *frameDec) reset(br byteBuffer) error { d.crc.Reset() } + if d.WindowSize > d.o.maxWindowSize { + if debugDecoder { + printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize) + } + return ErrWindowSizeExceeded + } + if d.WindowSize == 0 && d.SingleSegment { // We may not need window in this case. d.WindowSize = d.FrameContentSize if d.WindowSize < MinWindowSize { d.WindowSize = MinWindowSize } - } - - if d.WindowSize > uint64(d.o.maxWindowSize) { - if debugDecoder { - printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize) + if d.WindowSize > d.o.maxDecodedSize { + if debugDecoder { + printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize) + } + return ErrDecoderSizeExceeded } - return ErrWindowSizeExceeded } + // The minimum Window_Size is 1 KB. if d.WindowSize < MinWindowSize { if debugDecoder { diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go index e74df436c..c881d28d8 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go @@ -34,8 +34,8 @@ const ( // buildDtable will build the decoding table. func (s *fseDecoder) buildDtable() error { ctx := buildDtableAsmContext{ - stateTable: (*uint16)(&s.stateTable[0]), - norm: (*int16)(&s.norm[0]), + stateTable: &s.stateTable[0], + norm: &s.norm[0], dt: (*uint64)(&s.dt[0]), } code := buildDtable_asm(s, &ctx) diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go index 847b322ae..7598c1018 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -55,16 +55,22 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize { return false, nil } - useSafe := false - if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc { - useSafe = true - } - if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) { - useSafe = true - } - if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc { - useSafe = true - } + + // FIXME: Using unsafe memory copies leads to rare, random crashes + // with fuzz testing. It is therefore disabled for now. + const useSafe = true + /* + useSafe := false + if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc { + useSafe = true + } + if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) { + useSafe = true + } + if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc { + useSafe = true + } + */ br := s.br diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s index 212c6cac3..27e76774c 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -52,34 +52,46 @@ sequenceDecs_decode_amd64_fill_byte_by_byte: sequenceDecs_decode_amd64_fill_end: // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, 16(R10) + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_amd64_of_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_amd64_of_update_zero: + MOVQ AX, 16(R10) // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, 8(R10) + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_amd64_ml_update_zero: + MOVQ AX, 8(R10) // Fill bitreader to have enough for the remaining CMPQ SI, $0x08 @@ -107,19 +119,25 @@ sequenceDecs_decode_amd64_fill_2_byte_by_byte: sequenceDecs_decode_amd64_fill_2_end: // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, (R10) + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_amd64_ll_update_zero: + MOVQ AX, (R10) // Fill bitreader for state updates MOVQ R14, (SP) @@ -134,18 +152,17 @@ sequenceDecs_decode_amd64_fill_2_end: MOVBQZX DI, R14 SHRQ $0x10, DI MOVWQZX DI, DI - CMPQ R14, $0x00 - JZ sequenceDecs_decode_amd64_llState_updateState_skip_zero - MOVQ BX, CX - ADDQ R14, BX + LEAQ (BX)(R14*1), CX MOVQ DX, R15 - SHLQ CL, R15 - MOVQ R14, CX - NEGQ CX - SHRQ CL, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 ADDQ R15, DI -sequenceDecs_decode_amd64_llState_updateState_skip_zero: // Load ctx.llTable MOVQ ctx+16(FP), CX MOVQ (CX), CX @@ -155,18 +172,17 @@ sequenceDecs_decode_amd64_llState_updateState_skip_zero: MOVBQZX R8, R14 SHRQ $0x10, R8 MOVWQZX R8, R8 - CMPQ R14, $0x00 - JZ sequenceDecs_decode_amd64_mlState_updateState_skip_zero - MOVQ BX, CX - ADDQ R14, BX + LEAQ (BX)(R14*1), CX MOVQ DX, R15 - SHLQ CL, R15 - MOVQ R14, CX - NEGQ CX - SHRQ CL, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 ADDQ R15, R8 -sequenceDecs_decode_amd64_mlState_updateState_skip_zero: // Load ctx.mlTable MOVQ ctx+16(FP), CX MOVQ 24(CX), CX @@ -176,18 +192,17 @@ sequenceDecs_decode_amd64_mlState_updateState_skip_zero: MOVBQZX R9, R14 SHRQ $0x10, R9 MOVWQZX R9, R9 - CMPQ R14, $0x00 - JZ sequenceDecs_decode_amd64_ofState_updateState_skip_zero - MOVQ BX, CX - ADDQ R14, BX + LEAQ (BX)(R14*1), CX MOVQ DX, R15 - SHLQ CL, R15 - MOVQ R14, CX - NEGQ CX - SHRQ CL, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 ADDQ R15, R9 -sequenceDecs_decode_amd64_ofState_updateState_skip_zero: // Load ctx.ofTable MOVQ ctx+16(FP), CX MOVQ 48(CX), CX @@ -201,7 +216,7 @@ sequenceDecs_decode_amd64_skip_update: MOVQ R12, R13 MOVQ R11, R12 MOVQ CX, R11 - JMP sequenceDecs_decode_amd64_adjust_end + JMP sequenceDecs_decode_amd64_after_adjust sequenceDecs_decode_amd64_adjust_offsetB_1_or_0: CMPQ (R10), $0x00000000 @@ -213,7 +228,7 @@ sequenceDecs_decode_amd64_adjust_offset_maybezero: TESTQ CX, CX JNZ sequenceDecs_decode_amd64_adjust_offset_nonzero MOVQ R11, CX - JMP sequenceDecs_decode_amd64_adjust_end + JMP sequenceDecs_decode_amd64_after_adjust sequenceDecs_decode_amd64_adjust_offset_nonzero: CMPQ CX, $0x01 @@ -250,7 +265,7 @@ sequenceDecs_decode_amd64_adjust_temp_valid: MOVQ AX, R11 MOVQ AX, CX -sequenceDecs_decode_amd64_adjust_end: +sequenceDecs_decode_amd64_after_adjust: MOVQ CX, 16(R10) // Check values @@ -359,49 +374,67 @@ sequenceDecs_decode_56_amd64_fill_byte_by_byte: sequenceDecs_decode_56_amd64_fill_end: // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, 16(R10) + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_56_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_56_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_56_amd64_of_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_56_amd64_of_update_zero: + MOVQ AX, 16(R10) // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, 8(R10) + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_56_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_56_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_56_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_56_amd64_ml_update_zero: + MOVQ AX, 8(R10) // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R15 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R15 - ADDQ R15, AX - MOVQ AX, (R10) + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_56_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_56_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_56_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_56_amd64_ll_update_zero: + MOVQ AX, (R10) // Fill bitreader for state updates MOVQ R14, (SP) @@ -416,18 +449,17 @@ sequenceDecs_decode_56_amd64_fill_end: MOVBQZX DI, R14 SHRQ $0x10, DI MOVWQZX DI, DI - CMPQ R14, $0x00 - JZ sequenceDecs_decode_56_amd64_llState_updateState_skip_zero - MOVQ BX, CX - ADDQ R14, BX + LEAQ (BX)(R14*1), CX MOVQ DX, R15 - SHLQ CL, R15 - MOVQ R14, CX - NEGQ CX - SHRQ CL, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 ADDQ R15, DI -sequenceDecs_decode_56_amd64_llState_updateState_skip_zero: // Load ctx.llTable MOVQ ctx+16(FP), CX MOVQ (CX), CX @@ -437,18 +469,17 @@ sequenceDecs_decode_56_amd64_llState_updateState_skip_zero: MOVBQZX R8, R14 SHRQ $0x10, R8 MOVWQZX R8, R8 - CMPQ R14, $0x00 - JZ sequenceDecs_decode_56_amd64_mlState_updateState_skip_zero - MOVQ BX, CX - ADDQ R14, BX + LEAQ (BX)(R14*1), CX MOVQ DX, R15 - SHLQ CL, R15 - MOVQ R14, CX - NEGQ CX - SHRQ CL, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 ADDQ R15, R8 -sequenceDecs_decode_56_amd64_mlState_updateState_skip_zero: // Load ctx.mlTable MOVQ ctx+16(FP), CX MOVQ 24(CX), CX @@ -458,18 +489,17 @@ sequenceDecs_decode_56_amd64_mlState_updateState_skip_zero: MOVBQZX R9, R14 SHRQ $0x10, R9 MOVWQZX R9, R9 - CMPQ R14, $0x00 - JZ sequenceDecs_decode_56_amd64_ofState_updateState_skip_zero - MOVQ BX, CX - ADDQ R14, BX + LEAQ (BX)(R14*1), CX MOVQ DX, R15 - SHLQ CL, R15 - MOVQ R14, CX - NEGQ CX - SHRQ CL, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 ADDQ R15, R9 -sequenceDecs_decode_56_amd64_ofState_updateState_skip_zero: // Load ctx.ofTable MOVQ ctx+16(FP), CX MOVQ 48(CX), CX @@ -483,7 +513,7 @@ sequenceDecs_decode_56_amd64_skip_update: MOVQ R12, R13 MOVQ R11, R12 MOVQ CX, R11 - JMP sequenceDecs_decode_56_amd64_adjust_end + JMP sequenceDecs_decode_56_amd64_after_adjust sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0: CMPQ (R10), $0x00000000 @@ -495,7 +525,7 @@ sequenceDecs_decode_56_amd64_adjust_offset_maybezero: TESTQ CX, CX JNZ sequenceDecs_decode_56_amd64_adjust_offset_nonzero MOVQ R11, CX - JMP sequenceDecs_decode_56_amd64_adjust_end + JMP sequenceDecs_decode_56_amd64_after_adjust sequenceDecs_decode_56_amd64_adjust_offset_nonzero: CMPQ CX, $0x01 @@ -532,7 +562,7 @@ sequenceDecs_decode_56_amd64_adjust_temp_valid: MOVQ AX, R11 MOVQ AX, CX -sequenceDecs_decode_56_amd64_adjust_end: +sequenceDecs_decode_56_amd64_after_adjust: MOVQ CX, 16(R10) // Check values @@ -763,7 +793,7 @@ sequenceDecs_decode_bmi2_skip_update: MOVQ R11, R12 MOVQ R10, R11 MOVQ CX, R10 - JMP sequenceDecs_decode_bmi2_adjust_end + JMP sequenceDecs_decode_bmi2_after_adjust sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0: CMPQ (R9), $0x00000000 @@ -775,7 +805,7 @@ sequenceDecs_decode_bmi2_adjust_offset_maybezero: TESTQ CX, CX JNZ sequenceDecs_decode_bmi2_adjust_offset_nonzero MOVQ R10, CX - JMP sequenceDecs_decode_bmi2_adjust_end + JMP sequenceDecs_decode_bmi2_after_adjust sequenceDecs_decode_bmi2_adjust_offset_nonzero: CMPQ CX, $0x01 @@ -812,7 +842,7 @@ sequenceDecs_decode_bmi2_adjust_temp_valid: MOVQ R13, R10 MOVQ R13, CX -sequenceDecs_decode_bmi2_adjust_end: +sequenceDecs_decode_bmi2_after_adjust: MOVQ CX, 16(R9) // Check values @@ -1018,7 +1048,7 @@ sequenceDecs_decode_56_bmi2_skip_update: MOVQ R11, R12 MOVQ R10, R11 MOVQ CX, R10 - JMP sequenceDecs_decode_56_bmi2_adjust_end + JMP sequenceDecs_decode_56_bmi2_after_adjust sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0: CMPQ (R9), $0x00000000 @@ -1030,7 +1060,7 @@ sequenceDecs_decode_56_bmi2_adjust_offset_maybezero: TESTQ CX, CX JNZ sequenceDecs_decode_56_bmi2_adjust_offset_nonzero MOVQ R10, CX - JMP sequenceDecs_decode_56_bmi2_adjust_end + JMP sequenceDecs_decode_56_bmi2_after_adjust sequenceDecs_decode_56_bmi2_adjust_offset_nonzero: CMPQ CX, $0x01 @@ -1067,7 +1097,7 @@ sequenceDecs_decode_56_bmi2_adjust_temp_valid: MOVQ R13, R10 MOVQ R13, CX -sequenceDecs_decode_56_bmi2_adjust_end: +sequenceDecs_decode_56_bmi2_after_adjust: MOVQ CX, 16(R9) // Check values @@ -1181,52 +1211,65 @@ check_offset: JG error_match_off_too_big // Copy match from history - MOVQ R12, R11 - SUBQ DI, R11 - JLS copy_match - MOVQ R9, R14 - SUBQ R11, R14 - CMPQ R13, R11 - JGE copy_all_from_history - XORQ R11, R11 - TESTQ $0x00000001, R13 - JZ copy_4_word - MOVB (R14)(R11*1), R12 - MOVB R12, (BX)(R11*1) - ADDQ $0x01, R11 - -copy_4_word: - TESTQ $0x00000002, R13 - JZ copy_4_dword - MOVW (R14)(R11*1), R12 - MOVW R12, (BX)(R11*1) - ADDQ $0x02, R11 - -copy_4_dword: - TESTQ $0x00000004, R13 - JZ copy_4_qword - MOVL (R14)(R11*1), R12 - MOVL R12, (BX)(R11*1) - ADDQ $0x04, R11 - -copy_4_qword: - TESTQ $0x00000008, R13 - JZ copy_4_test - MOVQ (R14)(R11*1), R12 - MOVQ R12, (BX)(R11*1) - ADDQ $0x08, R11 - JMP copy_4_test - -copy_4: - MOVUPS (R14)(R11*1), X0 - MOVUPS X0, (BX)(R11*1) - ADDQ $0x10, R11 + MOVQ R12, R11 + SUBQ DI, R11 + JLS copy_match + MOVQ R9, R14 + SUBQ R11, R14 + CMPQ R13, R11 + JG copy_all_from_history + MOVQ R13, R11 + SUBQ $0x10, R11 + JB copy_4_small + +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (BX) + ADDQ $0x10, R14 + ADDQ $0x10, BX + SUBQ $0x10, R11 + JAE copy_4_loop + LEAQ 16(R14)(R11*1), R14 + LEAQ 16(BX)(R11*1), BX + MOVUPS -16(R14), X0 + MOVUPS X0, -16(BX) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), R11 + MOVB 2(R14), R12 + MOVW R11, (BX) + MOVB R12, 2(BX) + ADDQ R13, R14 + ADDQ R13, BX + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), R11 + MOVL -4(R14)(R13*1), R12 + MOVL R11, (BX) + MOVL R12, -4(BX)(R13*1) + ADDQ R13, R14 + ADDQ R13, BX + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), R11 + MOVQ -8(R14)(R13*1), R12 + MOVQ R11, (BX) + MOVQ R12, -8(BX)(R13*1) + ADDQ R13, R14 + ADDQ R13, BX -copy_4_test: - CMPQ R11, R13 - JB copy_4 +copy_4_end: ADDQ R13, DI - ADDQ R13, BX ADDQ $0x18, AX INCQ DX CMPQ DX, CX @@ -1234,53 +1277,74 @@ copy_4_test: JMP loop_finished copy_all_from_history: - XORQ R15, R15 - TESTQ $0x00000001, R11 - JZ copy_5_word - MOVB (R14)(R15*1), BP - MOVB BP, (BX)(R15*1) - ADDQ $0x01, R15 - -copy_5_word: - TESTQ $0x00000002, R11 - JZ copy_5_dword - MOVW (R14)(R15*1), BP - MOVW BP, (BX)(R15*1) - ADDQ $0x02, R15 - -copy_5_dword: - TESTQ $0x00000004, R11 - JZ copy_5_qword - MOVL (R14)(R15*1), BP - MOVL BP, (BX)(R15*1) - ADDQ $0x04, R15 - -copy_5_qword: - TESTQ $0x00000008, R11 - JZ copy_5_test - MOVQ (R14)(R15*1), BP - MOVQ BP, (BX)(R15*1) - ADDQ $0x08, R15 - JMP copy_5_test - -copy_5: - MOVUPS (R14)(R15*1), X0 - MOVUPS X0, (BX)(R15*1) - ADDQ $0x10, R15 - -copy_5_test: - CMPQ R15, R11 - JB copy_5 + MOVQ R11, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (BX) + ADDQ $0x10, R14 + ADDQ $0x10, BX + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(BX)(R15*1), BX + MOVUPS -16(R14), X0 + MOVUPS X0, -16(BX) + JMP copy_5_end + +copy_5_small: + CMPQ R11, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ R11, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(R11*1), BP + MOVB R15, (BX) + MOVB BP, -1(BX)(R11*1) + ADDQ R11, R14 ADDQ R11, BX + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (BX) + MOVB BP, 2(BX) + ADDQ R11, R14 + ADDQ R11, BX + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(R11*1), BP + MOVL R15, (BX) + MOVL BP, -4(BX)(R11*1) + ADDQ R11, R14 + ADDQ R11, BX + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(R11*1), BP + MOVQ R15, (BX) + MOVQ BP, -8(BX)(R11*1) + ADDQ R11, R14 + ADDQ R11, BX + +copy_5_end: ADDQ R11, DI SUBQ R11, R13 // Copy match from the current buffer copy_match: - TESTQ R13, R13 - JZ handle_loop - MOVQ BX, R11 - SUBQ R12, R11 + MOVQ BX, R11 + SUBQ R12, R11 // ml <= mo CMPQ R13, R12 @@ -1382,45 +1446,67 @@ main_loop: // Copy literals TESTQ R11, R11 JZ check_offset - XORQ R14, R14 - TESTQ $0x00000001, R11 - JZ copy_1_word - MOVB (SI)(R14*1), R15 - MOVB R15, (BX)(R14*1) - ADDQ $0x01, R14 - -copy_1_word: - TESTQ $0x00000002, R11 - JZ copy_1_dword - MOVW (SI)(R14*1), R15 - MOVW R15, (BX)(R14*1) - ADDQ $0x02, R14 - -copy_1_dword: - TESTQ $0x00000004, R11 - JZ copy_1_qword - MOVL (SI)(R14*1), R15 - MOVL R15, (BX)(R14*1) - ADDQ $0x04, R14 - -copy_1_qword: - TESTQ $0x00000008, R11 - JZ copy_1_test - MOVQ (SI)(R14*1), R15 - MOVQ R15, (BX)(R14*1) - ADDQ $0x08, R14 - JMP copy_1_test + MOVQ R11, R14 + SUBQ $0x10, R14 + JB copy_1_small + +copy_1_loop: + MOVUPS (SI), X0 + MOVUPS X0, (BX) + ADDQ $0x10, SI + ADDQ $0x10, BX + SUBQ $0x10, R14 + JAE copy_1_loop + LEAQ 16(SI)(R14*1), SI + LEAQ 16(BX)(R14*1), BX + MOVUPS -16(SI), X0 + MOVUPS X0, -16(BX) + JMP copy_1_end + +copy_1_small: + CMPQ R11, $0x03 + JE copy_1_move_3 + JB copy_1_move_1or2 + CMPQ R11, $0x08 + JB copy_1_move_4through7 + JMP copy_1_move_8through16 + +copy_1_move_1or2: + MOVB (SI), R14 + MOVB -1(SI)(R11*1), R15 + MOVB R14, (BX) + MOVB R15, -1(BX)(R11*1) + ADDQ R11, SI + ADDQ R11, BX + JMP copy_1_end -copy_1: - MOVUPS (SI)(R14*1), X0 - MOVUPS X0, (BX)(R14*1) - ADDQ $0x10, R14 +copy_1_move_3: + MOVW (SI), R14 + MOVB 2(SI), R15 + MOVW R14, (BX) + MOVB R15, 2(BX) + ADDQ R11, SI + ADDQ R11, BX + JMP copy_1_end -copy_1_test: - CMPQ R14, R11 - JB copy_1 +copy_1_move_4through7: + MOVL (SI), R14 + MOVL -4(SI)(R11*1), R15 + MOVL R14, (BX) + MOVL R15, -4(BX)(R11*1) ADDQ R11, SI ADDQ R11, BX + JMP copy_1_end + +copy_1_move_8through16: + MOVQ (SI), R14 + MOVQ -8(SI)(R11*1), R15 + MOVQ R14, (BX) + MOVQ R15, -8(BX)(R11*1) + ADDQ R11, SI + ADDQ R11, BX + +copy_1_end: ADDQ R11, DI // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) @@ -1432,52 +1518,65 @@ check_offset: JG error_match_off_too_big // Copy match from history - MOVQ R12, R11 - SUBQ DI, R11 - JLS copy_match - MOVQ R9, R14 - SUBQ R11, R14 - CMPQ R13, R11 - JGE copy_all_from_history - XORQ R11, R11 - TESTQ $0x00000001, R13 - JZ copy_4_word - MOVB (R14)(R11*1), R12 - MOVB R12, (BX)(R11*1) - ADDQ $0x01, R11 - -copy_4_word: - TESTQ $0x00000002, R13 - JZ copy_4_dword - MOVW (R14)(R11*1), R12 - MOVW R12, (BX)(R11*1) - ADDQ $0x02, R11 - -copy_4_dword: - TESTQ $0x00000004, R13 - JZ copy_4_qword - MOVL (R14)(R11*1), R12 - MOVL R12, (BX)(R11*1) - ADDQ $0x04, R11 - -copy_4_qword: - TESTQ $0x00000008, R13 - JZ copy_4_test - MOVQ (R14)(R11*1), R12 - MOVQ R12, (BX)(R11*1) - ADDQ $0x08, R11 - JMP copy_4_test - -copy_4: - MOVUPS (R14)(R11*1), X0 - MOVUPS X0, (BX)(R11*1) - ADDQ $0x10, R11 + MOVQ R12, R11 + SUBQ DI, R11 + JLS copy_match + MOVQ R9, R14 + SUBQ R11, R14 + CMPQ R13, R11 + JG copy_all_from_history + MOVQ R13, R11 + SUBQ $0x10, R11 + JB copy_4_small + +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (BX) + ADDQ $0x10, R14 + ADDQ $0x10, BX + SUBQ $0x10, R11 + JAE copy_4_loop + LEAQ 16(R14)(R11*1), R14 + LEAQ 16(BX)(R11*1), BX + MOVUPS -16(R14), X0 + MOVUPS X0, -16(BX) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), R11 + MOVB 2(R14), R12 + MOVW R11, (BX) + MOVB R12, 2(BX) + ADDQ R13, R14 + ADDQ R13, BX + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), R11 + MOVL -4(R14)(R13*1), R12 + MOVL R11, (BX) + MOVL R12, -4(BX)(R13*1) + ADDQ R13, R14 + ADDQ R13, BX + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), R11 + MOVQ -8(R14)(R13*1), R12 + MOVQ R11, (BX) + MOVQ R12, -8(BX)(R13*1) + ADDQ R13, R14 + ADDQ R13, BX -copy_4_test: - CMPQ R11, R13 - JB copy_4 +copy_4_end: ADDQ R13, DI - ADDQ R13, BX ADDQ $0x18, AX INCQ DX CMPQ DX, CX @@ -1485,99 +1584,143 @@ copy_4_test: JMP loop_finished copy_all_from_history: - XORQ R15, R15 - TESTQ $0x00000001, R11 - JZ copy_5_word - MOVB (R14)(R15*1), BP - MOVB BP, (BX)(R15*1) - ADDQ $0x01, R15 - -copy_5_word: - TESTQ $0x00000002, R11 - JZ copy_5_dword - MOVW (R14)(R15*1), BP - MOVW BP, (BX)(R15*1) - ADDQ $0x02, R15 - -copy_5_dword: - TESTQ $0x00000004, R11 - JZ copy_5_qword - MOVL (R14)(R15*1), BP - MOVL BP, (BX)(R15*1) - ADDQ $0x04, R15 - -copy_5_qword: - TESTQ $0x00000008, R11 - JZ copy_5_test - MOVQ (R14)(R15*1), BP - MOVQ BP, (BX)(R15*1) - ADDQ $0x08, R15 - JMP copy_5_test - -copy_5: - MOVUPS (R14)(R15*1), X0 - MOVUPS X0, (BX)(R15*1) - ADDQ $0x10, R15 - -copy_5_test: - CMPQ R15, R11 - JB copy_5 + MOVQ R11, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (BX) + ADDQ $0x10, R14 + ADDQ $0x10, BX + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(BX)(R15*1), BX + MOVUPS -16(R14), X0 + MOVUPS X0, -16(BX) + JMP copy_5_end + +copy_5_small: + CMPQ R11, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ R11, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(R11*1), BP + MOVB R15, (BX) + MOVB BP, -1(BX)(R11*1) + ADDQ R11, R14 + ADDQ R11, BX + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (BX) + MOVB BP, 2(BX) + ADDQ R11, R14 ADDQ R11, BX + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(R11*1), BP + MOVL R15, (BX) + MOVL BP, -4(BX)(R11*1) + ADDQ R11, R14 + ADDQ R11, BX + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(R11*1), BP + MOVQ R15, (BX) + MOVQ BP, -8(BX)(R11*1) + ADDQ R11, R14 + ADDQ R11, BX + +copy_5_end: ADDQ R11, DI SUBQ R11, R13 // Copy match from the current buffer copy_match: - TESTQ R13, R13 - JZ handle_loop - MOVQ BX, R11 - SUBQ R12, R11 + MOVQ BX, R11 + SUBQ R12, R11 // ml <= mo CMPQ R13, R12 JA copy_overlapping_match // Copy non-overlapping match - ADDQ R13, DI - XORQ R12, R12 - TESTQ $0x00000001, R13 - JZ copy_2_word - MOVB (R11)(R12*1), R14 - MOVB R14, (BX)(R12*1) - ADDQ $0x01, R12 - -copy_2_word: - TESTQ $0x00000002, R13 - JZ copy_2_dword - MOVW (R11)(R12*1), R14 - MOVW R14, (BX)(R12*1) - ADDQ $0x02, R12 - -copy_2_dword: - TESTQ $0x00000004, R13 - JZ copy_2_qword - MOVL (R11)(R12*1), R14 - MOVL R14, (BX)(R12*1) - ADDQ $0x04, R12 - -copy_2_qword: - TESTQ $0x00000008, R13 - JZ copy_2_test - MOVQ (R11)(R12*1), R14 - MOVQ R14, (BX)(R12*1) - ADDQ $0x08, R12 - JMP copy_2_test + ADDQ R13, DI + MOVQ R13, R12 + SUBQ $0x10, R12 + JB copy_2_small -copy_2: - MOVUPS (R11)(R12*1), X0 - MOVUPS X0, (BX)(R12*1) - ADDQ $0x10, R12 +copy_2_loop: + MOVUPS (R11), X0 + MOVUPS X0, (BX) + ADDQ $0x10, R11 + ADDQ $0x10, BX + SUBQ $0x10, R12 + JAE copy_2_loop + LEAQ 16(R11)(R12*1), R11 + LEAQ 16(BX)(R12*1), BX + MOVUPS -16(R11), X0 + MOVUPS X0, -16(BX) + JMP copy_2_end + +copy_2_small: + CMPQ R13, $0x03 + JE copy_2_move_3 + JB copy_2_move_1or2 + CMPQ R13, $0x08 + JB copy_2_move_4through7 + JMP copy_2_move_8through16 + +copy_2_move_1or2: + MOVB (R11), R12 + MOVB -1(R11)(R13*1), R14 + MOVB R12, (BX) + MOVB R14, -1(BX)(R13*1) + ADDQ R13, R11 + ADDQ R13, BX + JMP copy_2_end -copy_2_test: - CMPQ R12, R13 - JB copy_2 +copy_2_move_3: + MOVW (R11), R12 + MOVB 2(R11), R14 + MOVW R12, (BX) + MOVB R14, 2(BX) + ADDQ R13, R11 + ADDQ R13, BX + JMP copy_2_end + +copy_2_move_4through7: + MOVL (R11), R12 + MOVL -4(R11)(R13*1), R14 + MOVL R12, (BX) + MOVL R14, -4(BX)(R13*1) + ADDQ R13, R11 + ADDQ R13, BX + JMP copy_2_end + +copy_2_move_8through16: + MOVQ (R11), R12 + MOVQ -8(R11)(R13*1), R14 + MOVQ R12, (BX) + MOVQ R14, -8(BX)(R13*1) + ADDQ R13, R11 ADDQ R13, BX - JMP handle_loop + +copy_2_end: + JMP handle_loop // Copy overlapping match copy_overlapping_match: @@ -1642,6 +1785,10 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 MOVQ 72(AX), DI MOVQ 80(AX), R8 MOVQ 88(AX), R9 + XORQ CX, CX + MOVQ CX, 8(SP) + MOVQ CX, 16(SP) + MOVQ CX, 24(SP) MOVQ 112(AX), R10 MOVQ 128(AX), CX MOVQ CX, 32(SP) @@ -1691,34 +1838,46 @@ sequenceDecs_decodeSync_amd64_fill_byte_by_byte: sequenceDecs_decodeSync_amd64_fill_end: // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 8(SP) + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_amd64_of_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_amd64_of_update_zero: + MOVQ AX, 8(SP) // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 16(SP) + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_amd64_ml_update_zero: + MOVQ AX, 16(SP) // Fill bitreader to have enough for the remaining CMPQ SI, $0x08 @@ -1746,19 +1905,25 @@ sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte: sequenceDecs_decodeSync_amd64_fill_2_end: // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 24(SP) + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_amd64_ll_update_zero: + MOVQ AX, 24(SP) // Fill bitreader for state updates MOVQ R13, (SP) @@ -1773,18 +1938,17 @@ sequenceDecs_decodeSync_amd64_fill_2_end: MOVBQZX DI, R13 SHRQ $0x10, DI MOVWQZX DI, DI - CMPQ R13, $0x00 - JZ sequenceDecs_decodeSync_amd64_llState_updateState_skip_zero - MOVQ BX, CX - ADDQ R13, BX + LEAQ (BX)(R13*1), CX MOVQ DX, R14 - SHLQ CL, R14 - MOVQ R13, CX - NEGQ CX - SHRQ CL, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 ADDQ R14, DI -sequenceDecs_decodeSync_amd64_llState_updateState_skip_zero: // Load ctx.llTable MOVQ ctx+16(FP), CX MOVQ (CX), CX @@ -1794,18 +1958,17 @@ sequenceDecs_decodeSync_amd64_llState_updateState_skip_zero: MOVBQZX R8, R13 SHRQ $0x10, R8 MOVWQZX R8, R8 - CMPQ R13, $0x00 - JZ sequenceDecs_decodeSync_amd64_mlState_updateState_skip_zero - MOVQ BX, CX - ADDQ R13, BX + LEAQ (BX)(R13*1), CX MOVQ DX, R14 - SHLQ CL, R14 - MOVQ R13, CX - NEGQ CX - SHRQ CL, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 ADDQ R14, R8 -sequenceDecs_decodeSync_amd64_mlState_updateState_skip_zero: // Load ctx.mlTable MOVQ ctx+16(FP), CX MOVQ 24(CX), CX @@ -1815,18 +1978,17 @@ sequenceDecs_decodeSync_amd64_mlState_updateState_skip_zero: MOVBQZX R9, R13 SHRQ $0x10, R9 MOVWQZX R9, R9 - CMPQ R13, $0x00 - JZ sequenceDecs_decodeSync_amd64_ofState_updateState_skip_zero - MOVQ BX, CX - ADDQ R13, BX + LEAQ (BX)(R13*1), CX MOVQ DX, R14 - SHLQ CL, R14 - MOVQ R13, CX - NEGQ CX - SHRQ CL, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 ADDQ R14, R9 -sequenceDecs_decodeSync_amd64_ofState_updateState_skip_zero: // Load ctx.ofTable MOVQ ctx+16(FP), CX MOVQ 48(CX), CX @@ -1841,7 +2003,7 @@ sequenceDecs_decodeSync_amd64_skip_update: MOVUPS 144(CX), X0 MOVQ R13, 144(CX) MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_amd64_adjust_end + JMP sequenceDecs_decodeSync_amd64_after_adjust sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0: CMPQ 24(SP), $0x00000000 @@ -1853,7 +2015,7 @@ sequenceDecs_decodeSync_amd64_adjust_offset_maybezero: TESTQ R13, R13 JNZ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_amd64_adjust_end + JMP sequenceDecs_decodeSync_amd64_after_adjust sequenceDecs_decodeSync_amd64_adjust_offset_nonzero: MOVQ R13, AX @@ -1862,8 +2024,7 @@ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero: CMPQ R13, $0x03 CMOVQEQ R14, AX CMOVQEQ R15, R14 - LEAQ 144(CX), R15 - ADDQ (R15)(AX*8), R14 + ADDQ 144(CX)(AX*8), R14 JNZ sequenceDecs_decodeSync_amd64_adjust_temp_valid MOVQ $0x00000001, R14 @@ -1879,7 +2040,7 @@ sequenceDecs_decodeSync_amd64_adjust_skip: MOVQ R14, 144(CX) MOVQ R14, R13 -sequenceDecs_decodeSync_amd64_adjust_end: +sequenceDecs_decodeSync_amd64_after_adjust: MOVQ R13, 8(SP) // Check values @@ -1934,103 +2095,137 @@ check_offset: JG error_match_off_too_big // Copy match from history - MOVQ CX, AX - SUBQ R12, AX - JLS copy_match - MOVQ 48(SP), R14 - SUBQ AX, R14 - CMPQ R13, AX - JGE copy_all_from_history - XORQ AX, AX - TESTQ $0x00000001, R13 - JZ copy_4_word - MOVB (R14)(AX*1), CL - MOVB CL, (R10)(AX*1) - ADDQ $0x01, AX - -copy_4_word: - TESTQ $0x00000002, R13 - JZ copy_4_dword - MOVW (R14)(AX*1), CX - MOVW CX, (R10)(AX*1) - ADDQ $0x02, AX - -copy_4_dword: - TESTQ $0x00000004, R13 - JZ copy_4_qword - MOVL (R14)(AX*1), CX - MOVL CX, (R10)(AX*1) - ADDQ $0x04, AX - -copy_4_qword: - TESTQ $0x00000008, R13 - JZ copy_4_test - MOVQ (R14)(AX*1), CX - MOVQ CX, (R10)(AX*1) - ADDQ $0x08, AX - JMP copy_4_test - -copy_4: - MOVUPS (R14)(AX*1), X0 - MOVUPS X0, (R10)(AX*1) - ADDQ $0x10, AX + MOVQ CX, AX + SUBQ R12, AX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ AX, R14 + CMPQ R13, AX + JG copy_all_from_history + MOVQ R13, AX + SUBQ $0x10, AX + JB copy_4_small -copy_4_test: - CMPQ AX, R13 - JB copy_4 - ADDQ R13, R12 +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R10) + ADDQ $0x10, R14 + ADDQ $0x10, R10 + SUBQ $0x10, AX + JAE copy_4_loop + LEAQ 16(R14)(AX*1), R14 + LEAQ 16(R10)(AX*1), R10 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R10) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), AX + MOVB 2(R14), CL + MOVW AX, (R10) + MOVB CL, 2(R10) + ADDQ R13, R14 + ADDQ R13, R10 + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), AX + MOVL -4(R14)(R13*1), CX + MOVL AX, (R10) + MOVL CX, -4(R10)(R13*1) + ADDQ R13, R14 ADDQ R13, R10 + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), AX + MOVQ -8(R14)(R13*1), CX + MOVQ AX, (R10) + MOVQ CX, -8(R10)(R13*1) + ADDQ R13, R14 + ADDQ R13, R10 + +copy_4_end: + ADDQ R13, R12 JMP handle_loop JMP loop_finished copy_all_from_history: - XORQ R15, R15 - TESTQ $0x00000001, AX - JZ copy_5_word - MOVB (R14)(R15*1), BP - MOVB BP, (R10)(R15*1) - ADDQ $0x01, R15 - -copy_5_word: - TESTQ $0x00000002, AX - JZ copy_5_dword - MOVW (R14)(R15*1), BP - MOVW BP, (R10)(R15*1) - ADDQ $0x02, R15 - -copy_5_dword: - TESTQ $0x00000004, AX - JZ copy_5_qword - MOVL (R14)(R15*1), BP - MOVL BP, (R10)(R15*1) - ADDQ $0x04, R15 - -copy_5_qword: - TESTQ $0x00000008, AX - JZ copy_5_test - MOVQ (R14)(R15*1), BP - MOVQ BP, (R10)(R15*1) - ADDQ $0x08, R15 - JMP copy_5_test - -copy_5: - MOVUPS (R14)(R15*1), X0 - MOVUPS X0, (R10)(R15*1) - ADDQ $0x10, R15 - -copy_5_test: - CMPQ R15, AX - JB copy_5 + MOVQ AX, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R10) + ADDQ $0x10, R14 + ADDQ $0x10, R10 + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(R10)(R15*1), R10 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R10) + JMP copy_5_end + +copy_5_small: + CMPQ AX, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ AX, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(AX*1), BP + MOVB R15, (R10) + MOVB BP, -1(R10)(AX*1) + ADDQ AX, R14 + ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (R10) + MOVB BP, 2(R10) + ADDQ AX, R14 + ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(AX*1), BP + MOVL R15, (R10) + MOVL BP, -4(R10)(AX*1) + ADDQ AX, R14 ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(AX*1), BP + MOVQ R15, (R10) + MOVQ BP, -8(R10)(AX*1) + ADDQ AX, R14 + ADDQ AX, R10 + +copy_5_end: ADDQ AX, R12 SUBQ AX, R13 // Copy match from the current buffer copy_match: - TESTQ R13, R13 - JZ handle_loop - MOVQ R10, AX - SUBQ CX, AX + MOVQ R10, AX + SUBQ CX, AX // ml <= mo CMPQ R13, CX @@ -2142,6 +2337,10 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 MOVQ 72(CX), SI MOVQ 80(CX), DI MOVQ 88(CX), R8 + XORQ R9, R9 + MOVQ R9, 8(SP) + MOVQ R9, 16(SP) + MOVQ R9, 24(SP) MOVQ 112(CX), R9 MOVQ 128(CX), R10 MOVQ R10, 32(SP) @@ -2314,7 +2513,7 @@ sequenceDecs_decodeSync_bmi2_skip_update: MOVUPS 144(CX), X0 MOVQ R13, 144(CX) MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_bmi2_adjust_end + JMP sequenceDecs_decodeSync_bmi2_after_adjust sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0: CMPQ 24(SP), $0x00000000 @@ -2326,7 +2525,7 @@ sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero: TESTQ R13, R13 JNZ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_bmi2_adjust_end + JMP sequenceDecs_decodeSync_bmi2_after_adjust sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero: MOVQ R13, R12 @@ -2335,8 +2534,7 @@ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero: CMPQ R13, $0x03 CMOVQEQ R14, R12 CMOVQEQ R15, R14 - LEAQ 144(CX), R15 - ADDQ (R15)(R12*8), R14 + ADDQ 144(CX)(R12*8), R14 JNZ sequenceDecs_decodeSync_bmi2_adjust_temp_valid MOVQ $0x00000001, R14 @@ -2352,7 +2550,7 @@ sequenceDecs_decodeSync_bmi2_adjust_skip: MOVQ R14, 144(CX) MOVQ R14, R13 -sequenceDecs_decodeSync_bmi2_adjust_end: +sequenceDecs_decodeSync_bmi2_after_adjust: MOVQ R13, 8(SP) // Check values @@ -2407,103 +2605,137 @@ check_offset: JG error_match_off_too_big // Copy match from history - MOVQ R12, CX - SUBQ R11, CX - JLS copy_match - MOVQ 48(SP), R14 - SUBQ CX, R14 - CMPQ R13, CX - JGE copy_all_from_history - XORQ CX, CX - TESTQ $0x00000001, R13 - JZ copy_4_word - MOVB (R14)(CX*1), R12 - MOVB R12, (R9)(CX*1) - ADDQ $0x01, CX - -copy_4_word: - TESTQ $0x00000002, R13 - JZ copy_4_dword - MOVW (R14)(CX*1), R12 - MOVW R12, (R9)(CX*1) - ADDQ $0x02, CX - -copy_4_dword: - TESTQ $0x00000004, R13 - JZ copy_4_qword - MOVL (R14)(CX*1), R12 - MOVL R12, (R9)(CX*1) - ADDQ $0x04, CX - -copy_4_qword: - TESTQ $0x00000008, R13 - JZ copy_4_test - MOVQ (R14)(CX*1), R12 - MOVQ R12, (R9)(CX*1) - ADDQ $0x08, CX - JMP copy_4_test - -copy_4: - MOVUPS (R14)(CX*1), X0 - MOVUPS X0, (R9)(CX*1) - ADDQ $0x10, CX + MOVQ R12, CX + SUBQ R11, CX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ CX, R14 + CMPQ R13, CX + JG copy_all_from_history + MOVQ R13, CX + SUBQ $0x10, CX + JB copy_4_small + +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R9) + ADDQ $0x10, R14 + ADDQ $0x10, R9 + SUBQ $0x10, CX + JAE copy_4_loop + LEAQ 16(R14)(CX*1), R14 + LEAQ 16(R9)(CX*1), R9 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R9) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), CX + MOVB 2(R14), R12 + MOVW CX, (R9) + MOVB R12, 2(R9) + ADDQ R13, R14 + ADDQ R13, R9 + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), CX + MOVL -4(R14)(R13*1), R12 + MOVL CX, (R9) + MOVL R12, -4(R9)(R13*1) + ADDQ R13, R14 + ADDQ R13, R9 + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), CX + MOVQ -8(R14)(R13*1), R12 + MOVQ CX, (R9) + MOVQ R12, -8(R9)(R13*1) + ADDQ R13, R14 + ADDQ R13, R9 -copy_4_test: - CMPQ CX, R13 - JB copy_4 +copy_4_end: ADDQ R13, R11 - ADDQ R13, R9 JMP handle_loop JMP loop_finished copy_all_from_history: - XORQ R15, R15 - TESTQ $0x00000001, CX - JZ copy_5_word - MOVB (R14)(R15*1), BP - MOVB BP, (R9)(R15*1) - ADDQ $0x01, R15 - -copy_5_word: - TESTQ $0x00000002, CX - JZ copy_5_dword - MOVW (R14)(R15*1), BP - MOVW BP, (R9)(R15*1) - ADDQ $0x02, R15 - -copy_5_dword: - TESTQ $0x00000004, CX - JZ copy_5_qword - MOVL (R14)(R15*1), BP - MOVL BP, (R9)(R15*1) - ADDQ $0x04, R15 - -copy_5_qword: - TESTQ $0x00000008, CX - JZ copy_5_test - MOVQ (R14)(R15*1), BP - MOVQ BP, (R9)(R15*1) - ADDQ $0x08, R15 - JMP copy_5_test - -copy_5: - MOVUPS (R14)(R15*1), X0 - MOVUPS X0, (R9)(R15*1) - ADDQ $0x10, R15 - -copy_5_test: - CMPQ R15, CX - JB copy_5 + MOVQ CX, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R9) + ADDQ $0x10, R14 + ADDQ $0x10, R9 + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(R9)(R15*1), R9 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R9) + JMP copy_5_end + +copy_5_small: + CMPQ CX, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ CX, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(CX*1), BP + MOVB R15, (R9) + MOVB BP, -1(R9)(CX*1) + ADDQ CX, R14 + ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (R9) + MOVB BP, 2(R9) + ADDQ CX, R14 ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(CX*1), BP + MOVL R15, (R9) + MOVL BP, -4(R9)(CX*1) + ADDQ CX, R14 + ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(CX*1), BP + MOVQ R15, (R9) + MOVQ BP, -8(R9)(CX*1) + ADDQ CX, R14 + ADDQ CX, R9 + +copy_5_end: ADDQ CX, R11 SUBQ CX, R13 // Copy match from the current buffer copy_match: - TESTQ R13, R13 - JZ handle_loop - MOVQ R9, CX - SUBQ R12, CX + MOVQ R9, CX + SUBQ R12, CX // ml <= mo CMPQ R13, R12 @@ -2615,6 +2847,10 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 MOVQ 72(AX), DI MOVQ 80(AX), R8 MOVQ 88(AX), R9 + XORQ CX, CX + MOVQ CX, 8(SP) + MOVQ CX, 16(SP) + MOVQ CX, 24(SP) MOVQ 112(AX), R10 MOVQ 128(AX), CX MOVQ CX, 32(SP) @@ -2664,34 +2900,46 @@ sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte: sequenceDecs_decodeSync_safe_amd64_fill_end: // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 8(SP) + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_safe_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_safe_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_safe_amd64_of_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_safe_amd64_of_update_zero: + MOVQ AX, 8(SP) // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 16(SP) + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_safe_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_safe_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_safe_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_safe_amd64_ml_update_zero: + MOVQ AX, 16(SP) // Fill bitreader to have enough for the remaining CMPQ SI, $0x08 @@ -2719,19 +2967,25 @@ sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte: sequenceDecs_decodeSync_safe_amd64_fill_2_end: // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - ADDQ CX, BX - NEGL CX - SHRQ CL, R14 - SHRQ $0x20, AX - TESTQ CX, CX - CMOVQEQ CX, R14 - ADDQ R14, AX - MOVQ AX, 24(SP) + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_safe_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_safe_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_safe_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_safe_amd64_ll_update_zero: + MOVQ AX, 24(SP) // Fill bitreader for state updates MOVQ R13, (SP) @@ -2746,18 +3000,17 @@ sequenceDecs_decodeSync_safe_amd64_fill_2_end: MOVBQZX DI, R13 SHRQ $0x10, DI MOVWQZX DI, DI - CMPQ R13, $0x00 - JZ sequenceDecs_decodeSync_safe_amd64_llState_updateState_skip_zero - MOVQ BX, CX - ADDQ R13, BX + LEAQ (BX)(R13*1), CX MOVQ DX, R14 - SHLQ CL, R14 - MOVQ R13, CX - NEGQ CX - SHRQ CL, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 ADDQ R14, DI -sequenceDecs_decodeSync_safe_amd64_llState_updateState_skip_zero: // Load ctx.llTable MOVQ ctx+16(FP), CX MOVQ (CX), CX @@ -2767,18 +3020,17 @@ sequenceDecs_decodeSync_safe_amd64_llState_updateState_skip_zero: MOVBQZX R8, R13 SHRQ $0x10, R8 MOVWQZX R8, R8 - CMPQ R13, $0x00 - JZ sequenceDecs_decodeSync_safe_amd64_mlState_updateState_skip_zero - MOVQ BX, CX - ADDQ R13, BX + LEAQ (BX)(R13*1), CX MOVQ DX, R14 - SHLQ CL, R14 - MOVQ R13, CX - NEGQ CX - SHRQ CL, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 ADDQ R14, R8 -sequenceDecs_decodeSync_safe_amd64_mlState_updateState_skip_zero: // Load ctx.mlTable MOVQ ctx+16(FP), CX MOVQ 24(CX), CX @@ -2788,18 +3040,17 @@ sequenceDecs_decodeSync_safe_amd64_mlState_updateState_skip_zero: MOVBQZX R9, R13 SHRQ $0x10, R9 MOVWQZX R9, R9 - CMPQ R13, $0x00 - JZ sequenceDecs_decodeSync_safe_amd64_ofState_updateState_skip_zero - MOVQ BX, CX - ADDQ R13, BX + LEAQ (BX)(R13*1), CX MOVQ DX, R14 - SHLQ CL, R14 - MOVQ R13, CX - NEGQ CX - SHRQ CL, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 ADDQ R14, R9 -sequenceDecs_decodeSync_safe_amd64_ofState_updateState_skip_zero: // Load ctx.ofTable MOVQ ctx+16(FP), CX MOVQ 48(CX), CX @@ -2814,7 +3065,7 @@ sequenceDecs_decodeSync_safe_amd64_skip_update: MOVUPS 144(CX), X0 MOVQ R13, 144(CX) MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_safe_amd64_adjust_end + JMP sequenceDecs_decodeSync_safe_amd64_after_adjust sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0: CMPQ 24(SP), $0x00000000 @@ -2826,7 +3077,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero: TESTQ R13, R13 JNZ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_safe_amd64_adjust_end + JMP sequenceDecs_decodeSync_safe_amd64_after_adjust sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero: MOVQ R13, AX @@ -2835,8 +3086,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero: CMPQ R13, $0x03 CMOVQEQ R14, AX CMOVQEQ R15, R14 - LEAQ 144(CX), R15 - ADDQ (R15)(AX*8), R14 + ADDQ 144(CX)(AX*8), R14 JNZ sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid MOVQ $0x00000001, R14 @@ -2852,7 +3102,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_skip: MOVQ R14, 144(CX) MOVQ R14, R13 -sequenceDecs_decodeSync_safe_amd64_adjust_end: +sequenceDecs_decodeSync_safe_amd64_after_adjust: MOVQ R13, 8(SP) // Check values @@ -2885,45 +3135,67 @@ sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok: // Copy literals TESTQ AX, AX JZ check_offset - XORQ R14, R14 - TESTQ $0x00000001, AX - JZ copy_1_word - MOVB (R11)(R14*1), R15 - MOVB R15, (R10)(R14*1) - ADDQ $0x01, R14 - -copy_1_word: - TESTQ $0x00000002, AX - JZ copy_1_dword - MOVW (R11)(R14*1), R15 - MOVW R15, (R10)(R14*1) - ADDQ $0x02, R14 - -copy_1_dword: - TESTQ $0x00000004, AX - JZ copy_1_qword - MOVL (R11)(R14*1), R15 - MOVL R15, (R10)(R14*1) - ADDQ $0x04, R14 - -copy_1_qword: - TESTQ $0x00000008, AX - JZ copy_1_test - MOVQ (R11)(R14*1), R15 - MOVQ R15, (R10)(R14*1) - ADDQ $0x08, R14 - JMP copy_1_test + MOVQ AX, R14 + SUBQ $0x10, R14 + JB copy_1_small -copy_1: - MOVUPS (R11)(R14*1), X0 - MOVUPS X0, (R10)(R14*1) - ADDQ $0x10, R14 +copy_1_loop: + MOVUPS (R11), X0 + MOVUPS X0, (R10) + ADDQ $0x10, R11 + ADDQ $0x10, R10 + SUBQ $0x10, R14 + JAE copy_1_loop + LEAQ 16(R11)(R14*1), R11 + LEAQ 16(R10)(R14*1), R10 + MOVUPS -16(R11), X0 + MOVUPS X0, -16(R10) + JMP copy_1_end + +copy_1_small: + CMPQ AX, $0x03 + JE copy_1_move_3 + JB copy_1_move_1or2 + CMPQ AX, $0x08 + JB copy_1_move_4through7 + JMP copy_1_move_8through16 + +copy_1_move_1or2: + MOVB (R11), R14 + MOVB -1(R11)(AX*1), R15 + MOVB R14, (R10) + MOVB R15, -1(R10)(AX*1) + ADDQ AX, R11 + ADDQ AX, R10 + JMP copy_1_end + +copy_1_move_3: + MOVW (R11), R14 + MOVB 2(R11), R15 + MOVW R14, (R10) + MOVB R15, 2(R10) + ADDQ AX, R11 + ADDQ AX, R10 + JMP copy_1_end + +copy_1_move_4through7: + MOVL (R11), R14 + MOVL -4(R11)(AX*1), R15 + MOVL R14, (R10) + MOVL R15, -4(R10)(AX*1) + ADDQ AX, R11 + ADDQ AX, R10 + JMP copy_1_end -copy_1_test: - CMPQ R14, AX - JB copy_1 +copy_1_move_8through16: + MOVQ (R11), R14 + MOVQ -8(R11)(AX*1), R15 + MOVQ R14, (R10) + MOVQ R15, -8(R10)(AX*1) ADDQ AX, R11 ADDQ AX, R10 + +copy_1_end: ADDQ AX, R12 // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) @@ -2936,149 +3208,206 @@ check_offset: JG error_match_off_too_big // Copy match from history - MOVQ CX, AX - SUBQ R12, AX - JLS copy_match - MOVQ 48(SP), R14 - SUBQ AX, R14 - CMPQ R13, AX - JGE copy_all_from_history - XORQ AX, AX - TESTQ $0x00000001, R13 - JZ copy_4_word - MOVB (R14)(AX*1), CL - MOVB CL, (R10)(AX*1) - ADDQ $0x01, AX - -copy_4_word: - TESTQ $0x00000002, R13 - JZ copy_4_dword - MOVW (R14)(AX*1), CX - MOVW CX, (R10)(AX*1) - ADDQ $0x02, AX - -copy_4_dword: - TESTQ $0x00000004, R13 - JZ copy_4_qword - MOVL (R14)(AX*1), CX - MOVL CX, (R10)(AX*1) - ADDQ $0x04, AX - -copy_4_qword: - TESTQ $0x00000008, R13 - JZ copy_4_test - MOVQ (R14)(AX*1), CX - MOVQ CX, (R10)(AX*1) - ADDQ $0x08, AX - JMP copy_4_test - -copy_4: - MOVUPS (R14)(AX*1), X0 - MOVUPS X0, (R10)(AX*1) - ADDQ $0x10, AX + MOVQ CX, AX + SUBQ R12, AX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ AX, R14 + CMPQ R13, AX + JG copy_all_from_history + MOVQ R13, AX + SUBQ $0x10, AX + JB copy_4_small -copy_4_test: - CMPQ AX, R13 - JB copy_4 - ADDQ R13, R12 +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R10) + ADDQ $0x10, R14 + ADDQ $0x10, R10 + SUBQ $0x10, AX + JAE copy_4_loop + LEAQ 16(R14)(AX*1), R14 + LEAQ 16(R10)(AX*1), R10 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R10) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), AX + MOVB 2(R14), CL + MOVW AX, (R10) + MOVB CL, 2(R10) + ADDQ R13, R14 + ADDQ R13, R10 + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), AX + MOVL -4(R14)(R13*1), CX + MOVL AX, (R10) + MOVL CX, -4(R10)(R13*1) + ADDQ R13, R14 ADDQ R13, R10 + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), AX + MOVQ -8(R14)(R13*1), CX + MOVQ AX, (R10) + MOVQ CX, -8(R10)(R13*1) + ADDQ R13, R14 + ADDQ R13, R10 + +copy_4_end: + ADDQ R13, R12 JMP handle_loop JMP loop_finished copy_all_from_history: - XORQ R15, R15 - TESTQ $0x00000001, AX - JZ copy_5_word - MOVB (R14)(R15*1), BP - MOVB BP, (R10)(R15*1) - ADDQ $0x01, R15 - -copy_5_word: - TESTQ $0x00000002, AX - JZ copy_5_dword - MOVW (R14)(R15*1), BP - MOVW BP, (R10)(R15*1) - ADDQ $0x02, R15 - -copy_5_dword: - TESTQ $0x00000004, AX - JZ copy_5_qword - MOVL (R14)(R15*1), BP - MOVL BP, (R10)(R15*1) - ADDQ $0x04, R15 - -copy_5_qword: - TESTQ $0x00000008, AX - JZ copy_5_test - MOVQ (R14)(R15*1), BP - MOVQ BP, (R10)(R15*1) - ADDQ $0x08, R15 - JMP copy_5_test - -copy_5: - MOVUPS (R14)(R15*1), X0 - MOVUPS X0, (R10)(R15*1) - ADDQ $0x10, R15 - -copy_5_test: - CMPQ R15, AX - JB copy_5 + MOVQ AX, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R10) + ADDQ $0x10, R14 + ADDQ $0x10, R10 + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(R10)(R15*1), R10 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R10) + JMP copy_5_end + +copy_5_small: + CMPQ AX, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ AX, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(AX*1), BP + MOVB R15, (R10) + MOVB BP, -1(R10)(AX*1) + ADDQ AX, R14 + ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (R10) + MOVB BP, 2(R10) + ADDQ AX, R14 ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(AX*1), BP + MOVL R15, (R10) + MOVL BP, -4(R10)(AX*1) + ADDQ AX, R14 + ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(AX*1), BP + MOVQ R15, (R10) + MOVQ BP, -8(R10)(AX*1) + ADDQ AX, R14 + ADDQ AX, R10 + +copy_5_end: ADDQ AX, R12 SUBQ AX, R13 // Copy match from the current buffer copy_match: - TESTQ R13, R13 - JZ handle_loop - MOVQ R10, AX - SUBQ CX, AX + MOVQ R10, AX + SUBQ CX, AX // ml <= mo CMPQ R13, CX JA copy_overlapping_match // Copy non-overlapping match - ADDQ R13, R12 - XORQ CX, CX - TESTQ $0x00000001, R13 - JZ copy_2_word - MOVB (AX)(CX*1), R14 - MOVB R14, (R10)(CX*1) - ADDQ $0x01, CX - -copy_2_word: - TESTQ $0x00000002, R13 - JZ copy_2_dword - MOVW (AX)(CX*1), R14 - MOVW R14, (R10)(CX*1) - ADDQ $0x02, CX - -copy_2_dword: - TESTQ $0x00000004, R13 - JZ copy_2_qword - MOVL (AX)(CX*1), R14 - MOVL R14, (R10)(CX*1) - ADDQ $0x04, CX - -copy_2_qword: - TESTQ $0x00000008, R13 - JZ copy_2_test - MOVQ (AX)(CX*1), R14 - MOVQ R14, (R10)(CX*1) - ADDQ $0x08, CX - JMP copy_2_test - -copy_2: - MOVUPS (AX)(CX*1), X0 - MOVUPS X0, (R10)(CX*1) - ADDQ $0x10, CX + ADDQ R13, R12 + MOVQ R13, CX + SUBQ $0x10, CX + JB copy_2_small -copy_2_test: - CMPQ CX, R13 - JB copy_2 +copy_2_loop: + MOVUPS (AX), X0 + MOVUPS X0, (R10) + ADDQ $0x10, AX + ADDQ $0x10, R10 + SUBQ $0x10, CX + JAE copy_2_loop + LEAQ 16(AX)(CX*1), AX + LEAQ 16(R10)(CX*1), R10 + MOVUPS -16(AX), X0 + MOVUPS X0, -16(R10) + JMP copy_2_end + +copy_2_small: + CMPQ R13, $0x03 + JE copy_2_move_3 + JB copy_2_move_1or2 + CMPQ R13, $0x08 + JB copy_2_move_4through7 + JMP copy_2_move_8through16 + +copy_2_move_1or2: + MOVB (AX), CL + MOVB -1(AX)(R13*1), R14 + MOVB CL, (R10) + MOVB R14, -1(R10)(R13*1) + ADDQ R13, AX ADDQ R13, R10 - JMP handle_loop + JMP copy_2_end + +copy_2_move_3: + MOVW (AX), CX + MOVB 2(AX), R14 + MOVW CX, (R10) + MOVB R14, 2(R10) + ADDQ R13, AX + ADDQ R13, R10 + JMP copy_2_end + +copy_2_move_4through7: + MOVL (AX), CX + MOVL -4(AX)(R13*1), R14 + MOVL CX, (R10) + MOVL R14, -4(R10)(R13*1) + ADDQ R13, AX + ADDQ R13, R10 + JMP copy_2_end + +copy_2_move_8through16: + MOVQ (AX), CX + MOVQ -8(AX)(R13*1), R14 + MOVQ CX, (R10) + MOVQ R14, -8(R10)(R13*1) + ADDQ R13, AX + ADDQ R13, R10 + +copy_2_end: + JMP handle_loop // Copy overlapping match copy_overlapping_match: @@ -3172,6 +3501,10 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 MOVQ 72(CX), SI MOVQ 80(CX), DI MOVQ 88(CX), R8 + XORQ R9, R9 + MOVQ R9, 8(SP) + MOVQ R9, 16(SP) + MOVQ R9, 24(SP) MOVQ 112(CX), R9 MOVQ 128(CX), R10 MOVQ R10, 32(SP) @@ -3344,7 +3677,7 @@ sequenceDecs_decodeSync_safe_bmi2_skip_update: MOVUPS 144(CX), X0 MOVQ R13, 144(CX) MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end + JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0: CMPQ 24(SP), $0x00000000 @@ -3356,7 +3689,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero: TESTQ R13, R13 JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end + JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero: MOVQ R13, R12 @@ -3365,8 +3698,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero: CMPQ R13, $0x03 CMOVQEQ R14, R12 CMOVQEQ R15, R14 - LEAQ 144(CX), R15 - ADDQ (R15)(R12*8), R14 + ADDQ 144(CX)(R12*8), R14 JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid MOVQ $0x00000001, R14 @@ -3382,7 +3714,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_skip: MOVQ R14, 144(CX) MOVQ R14, R13 -sequenceDecs_decodeSync_safe_bmi2_adjust_end: +sequenceDecs_decodeSync_safe_bmi2_after_adjust: MOVQ R13, 8(SP) // Check values @@ -3415,45 +3747,67 @@ sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok: // Copy literals TESTQ CX, CX JZ check_offset - XORQ R14, R14 - TESTQ $0x00000001, CX - JZ copy_1_word - MOVB (R10)(R14*1), R15 - MOVB R15, (R9)(R14*1) - ADDQ $0x01, R14 - -copy_1_word: - TESTQ $0x00000002, CX - JZ copy_1_dword - MOVW (R10)(R14*1), R15 - MOVW R15, (R9)(R14*1) - ADDQ $0x02, R14 - -copy_1_dword: - TESTQ $0x00000004, CX - JZ copy_1_qword - MOVL (R10)(R14*1), R15 - MOVL R15, (R9)(R14*1) - ADDQ $0x04, R14 - -copy_1_qword: - TESTQ $0x00000008, CX - JZ copy_1_test - MOVQ (R10)(R14*1), R15 - MOVQ R15, (R9)(R14*1) - ADDQ $0x08, R14 - JMP copy_1_test + MOVQ CX, R14 + SUBQ $0x10, R14 + JB copy_1_small + +copy_1_loop: + MOVUPS (R10), X0 + MOVUPS X0, (R9) + ADDQ $0x10, R10 + ADDQ $0x10, R9 + SUBQ $0x10, R14 + JAE copy_1_loop + LEAQ 16(R10)(R14*1), R10 + LEAQ 16(R9)(R14*1), R9 + MOVUPS -16(R10), X0 + MOVUPS X0, -16(R9) + JMP copy_1_end + +copy_1_small: + CMPQ CX, $0x03 + JE copy_1_move_3 + JB copy_1_move_1or2 + CMPQ CX, $0x08 + JB copy_1_move_4through7 + JMP copy_1_move_8through16 + +copy_1_move_1or2: + MOVB (R10), R14 + MOVB -1(R10)(CX*1), R15 + MOVB R14, (R9) + MOVB R15, -1(R9)(CX*1) + ADDQ CX, R10 + ADDQ CX, R9 + JMP copy_1_end -copy_1: - MOVUPS (R10)(R14*1), X0 - MOVUPS X0, (R9)(R14*1) - ADDQ $0x10, R14 +copy_1_move_3: + MOVW (R10), R14 + MOVB 2(R10), R15 + MOVW R14, (R9) + MOVB R15, 2(R9) + ADDQ CX, R10 + ADDQ CX, R9 + JMP copy_1_end + +copy_1_move_4through7: + MOVL (R10), R14 + MOVL -4(R10)(CX*1), R15 + MOVL R14, (R9) + MOVL R15, -4(R9)(CX*1) + ADDQ CX, R10 + ADDQ CX, R9 + JMP copy_1_end -copy_1_test: - CMPQ R14, CX - JB copy_1 +copy_1_move_8through16: + MOVQ (R10), R14 + MOVQ -8(R10)(CX*1), R15 + MOVQ R14, (R9) + MOVQ R15, -8(R9)(CX*1) ADDQ CX, R10 ADDQ CX, R9 + +copy_1_end: ADDQ CX, R11 // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) @@ -3466,149 +3820,206 @@ check_offset: JG error_match_off_too_big // Copy match from history - MOVQ R12, CX - SUBQ R11, CX - JLS copy_match - MOVQ 48(SP), R14 - SUBQ CX, R14 - CMPQ R13, CX - JGE copy_all_from_history - XORQ CX, CX - TESTQ $0x00000001, R13 - JZ copy_4_word - MOVB (R14)(CX*1), R12 - MOVB R12, (R9)(CX*1) - ADDQ $0x01, CX - -copy_4_word: - TESTQ $0x00000002, R13 - JZ copy_4_dword - MOVW (R14)(CX*1), R12 - MOVW R12, (R9)(CX*1) - ADDQ $0x02, CX - -copy_4_dword: - TESTQ $0x00000004, R13 - JZ copy_4_qword - MOVL (R14)(CX*1), R12 - MOVL R12, (R9)(CX*1) - ADDQ $0x04, CX - -copy_4_qword: - TESTQ $0x00000008, R13 - JZ copy_4_test - MOVQ (R14)(CX*1), R12 - MOVQ R12, (R9)(CX*1) - ADDQ $0x08, CX - JMP copy_4_test - -copy_4: - MOVUPS (R14)(CX*1), X0 - MOVUPS X0, (R9)(CX*1) - ADDQ $0x10, CX + MOVQ R12, CX + SUBQ R11, CX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ CX, R14 + CMPQ R13, CX + JG copy_all_from_history + MOVQ R13, CX + SUBQ $0x10, CX + JB copy_4_small + +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R9) + ADDQ $0x10, R14 + ADDQ $0x10, R9 + SUBQ $0x10, CX + JAE copy_4_loop + LEAQ 16(R14)(CX*1), R14 + LEAQ 16(R9)(CX*1), R9 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R9) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), CX + MOVB 2(R14), R12 + MOVW CX, (R9) + MOVB R12, 2(R9) + ADDQ R13, R14 + ADDQ R13, R9 + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), CX + MOVL -4(R14)(R13*1), R12 + MOVL CX, (R9) + MOVL R12, -4(R9)(R13*1) + ADDQ R13, R14 + ADDQ R13, R9 + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), CX + MOVQ -8(R14)(R13*1), R12 + MOVQ CX, (R9) + MOVQ R12, -8(R9)(R13*1) + ADDQ R13, R14 + ADDQ R13, R9 -copy_4_test: - CMPQ CX, R13 - JB copy_4 +copy_4_end: ADDQ R13, R11 - ADDQ R13, R9 JMP handle_loop JMP loop_finished copy_all_from_history: - XORQ R15, R15 - TESTQ $0x00000001, CX - JZ copy_5_word - MOVB (R14)(R15*1), BP - MOVB BP, (R9)(R15*1) - ADDQ $0x01, R15 - -copy_5_word: - TESTQ $0x00000002, CX - JZ copy_5_dword - MOVW (R14)(R15*1), BP - MOVW BP, (R9)(R15*1) - ADDQ $0x02, R15 - -copy_5_dword: - TESTQ $0x00000004, CX - JZ copy_5_qword - MOVL (R14)(R15*1), BP - MOVL BP, (R9)(R15*1) - ADDQ $0x04, R15 - -copy_5_qword: - TESTQ $0x00000008, CX - JZ copy_5_test - MOVQ (R14)(R15*1), BP - MOVQ BP, (R9)(R15*1) - ADDQ $0x08, R15 - JMP copy_5_test - -copy_5: - MOVUPS (R14)(R15*1), X0 - MOVUPS X0, (R9)(R15*1) - ADDQ $0x10, R15 - -copy_5_test: - CMPQ R15, CX - JB copy_5 + MOVQ CX, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R9) + ADDQ $0x10, R14 + ADDQ $0x10, R9 + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(R9)(R15*1), R9 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R9) + JMP copy_5_end + +copy_5_small: + CMPQ CX, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ CX, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(CX*1), BP + MOVB R15, (R9) + MOVB BP, -1(R9)(CX*1) + ADDQ CX, R14 + ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (R9) + MOVB BP, 2(R9) + ADDQ CX, R14 + ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(CX*1), BP + MOVL R15, (R9) + MOVL BP, -4(R9)(CX*1) + ADDQ CX, R14 ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(CX*1), BP + MOVQ R15, (R9) + MOVQ BP, -8(R9)(CX*1) + ADDQ CX, R14 + ADDQ CX, R9 + +copy_5_end: ADDQ CX, R11 SUBQ CX, R13 // Copy match from the current buffer copy_match: - TESTQ R13, R13 - JZ handle_loop - MOVQ R9, CX - SUBQ R12, CX + MOVQ R9, CX + SUBQ R12, CX // ml <= mo CMPQ R13, R12 JA copy_overlapping_match // Copy non-overlapping match - ADDQ R13, R11 - XORQ R12, R12 - TESTQ $0x00000001, R13 - JZ copy_2_word - MOVB (CX)(R12*1), R14 - MOVB R14, (R9)(R12*1) - ADDQ $0x01, R12 - -copy_2_word: - TESTQ $0x00000002, R13 - JZ copy_2_dword - MOVW (CX)(R12*1), R14 - MOVW R14, (R9)(R12*1) - ADDQ $0x02, R12 - -copy_2_dword: - TESTQ $0x00000004, R13 - JZ copy_2_qword - MOVL (CX)(R12*1), R14 - MOVL R14, (R9)(R12*1) - ADDQ $0x04, R12 - -copy_2_qword: - TESTQ $0x00000008, R13 - JZ copy_2_test - MOVQ (CX)(R12*1), R14 - MOVQ R14, (R9)(R12*1) - ADDQ $0x08, R12 - JMP copy_2_test - -copy_2: - MOVUPS (CX)(R12*1), X0 - MOVUPS X0, (R9)(R12*1) - ADDQ $0x10, R12 + ADDQ R13, R11 + MOVQ R13, R12 + SUBQ $0x10, R12 + JB copy_2_small -copy_2_test: - CMPQ R12, R13 - JB copy_2 +copy_2_loop: + MOVUPS (CX), X0 + MOVUPS X0, (R9) + ADDQ $0x10, CX + ADDQ $0x10, R9 + SUBQ $0x10, R12 + JAE copy_2_loop + LEAQ 16(CX)(R12*1), CX + LEAQ 16(R9)(R12*1), R9 + MOVUPS -16(CX), X0 + MOVUPS X0, -16(R9) + JMP copy_2_end + +copy_2_small: + CMPQ R13, $0x03 + JE copy_2_move_3 + JB copy_2_move_1or2 + CMPQ R13, $0x08 + JB copy_2_move_4through7 + JMP copy_2_move_8through16 + +copy_2_move_1or2: + MOVB (CX), R12 + MOVB -1(CX)(R13*1), R14 + MOVB R12, (R9) + MOVB R14, -1(R9)(R13*1) + ADDQ R13, CX ADDQ R13, R9 - JMP handle_loop + JMP copy_2_end + +copy_2_move_3: + MOVW (CX), R12 + MOVB 2(CX), R14 + MOVW R12, (R9) + MOVB R14, 2(R9) + ADDQ R13, CX + ADDQ R13, R9 + JMP copy_2_end + +copy_2_move_4through7: + MOVL (CX), R12 + MOVL -4(CX)(R13*1), R14 + MOVL R12, (R9) + MOVL R14, -4(R9)(R13*1) + ADDQ R13, CX + ADDQ R13, R9 + JMP copy_2_end + +copy_2_move_8through16: + MOVQ (CX), R12 + MOVQ -8(CX)(R13*1), R14 + MOVQ R12, (R9) + MOVQ R14, -8(R9)(R13*1) + ADDQ R13, CX + ADDQ R13, R9 + +copy_2_end: + JMP handle_loop // Copy overlapping match copy_overlapping_match: diff --git a/vendor/github.com/segmentio/kafka-go/compress/gzip/gzip.go b/vendor/github.com/segmentio/kafka-go/compress/gzip/gzip.go index 64da3129d..ad5009c39 100644 --- a/vendor/github.com/segmentio/kafka-go/compress/gzip/gzip.go +++ b/vendor/github.com/segmentio/kafka-go/compress/gzip/gzip.go @@ -1,9 +1,10 @@ package gzip import ( - "compress/gzip" "io" "sync" + + "github.com/klauspost/compress/gzip" ) var ( diff --git a/vendor/github.com/segmentio/kafka-go/compress/snappy/snappy.go b/vendor/github.com/segmentio/kafka-go/compress/snappy/snappy.go index a726ecebc..5bc6194f1 100644 --- a/vendor/github.com/segmentio/kafka-go/compress/snappy/snappy.go +++ b/vendor/github.com/segmentio/kafka-go/compress/snappy/snappy.go @@ -4,6 +4,7 @@ import ( "io" "sync" + "github.com/klauspost/compress/s2" "github.com/klauspost/compress/snappy" ) @@ -16,6 +17,16 @@ const ( Unframed ) +// Compression level. +type Compression int + +const ( + DefaultCompression Compression = iota + FasterCompression + BetterCompression + BestCompression +) + var ( readerPool sync.Pool writerPool sync.Pool @@ -28,6 +39,9 @@ type Codec struct { // // Default to Framed. Framing Framing + + // Compression level. + Compression Compression } // Code implements the compress.Codec interface. @@ -56,12 +70,19 @@ func (c *Codec) NewWriter(w io.Writer) io.WriteCloser { if x != nil { x.Reset(w) } else { - x = &xerialWriter{ - writer: w, - encode: snappy.Encode, - } + x = &xerialWriter{writer: w} } x.framed = c.Framing == Framed + switch c.Compression { + case FasterCompression: + x.encode = s2.EncodeSnappy + case BetterCompression: + x.encode = s2.EncodeSnappyBetter + case BestCompression: + x.encode = s2.EncodeSnappyBest + default: + x.encode = snappy.Encode // aka. s2.EncodeSnappyBetter + } return &writer{xerialWriter: x} } diff --git a/vendor/github.com/segmentio/kafka-go/conn.go b/vendor/github.com/segmentio/kafka-go/conn.go index e32dc2163..aa84f5a49 100644 --- a/vendor/github.com/segmentio/kafka-go/conn.go +++ b/vendor/github.com/segmentio/kafka-go/conn.go @@ -971,48 +971,101 @@ func (c *Conn) ReadPartitions(topics ...string) (partitions []Partition, err err topics = nil } } + metadataVersion, err := c.negotiateVersion(metadata, v1, v6) + if err != nil { + return nil, err + } err = c.readOperation( func(deadline time.Time, id int32) error { - return c.writeRequest(metadata, v1, id, topicMetadataRequestV1(topics)) + switch metadataVersion { + case v6: + return c.writeRequest(metadata, v6, id, topicMetadataRequestV6{Topics: topics, AllowAutoTopicCreation: true}) + default: + return c.writeRequest(metadata, v1, id, topicMetadataRequestV1(topics)) + } }, func(deadline time.Time, size int) error { - var res metadataResponseV1 + partitions, err = c.readPartitionsResponse(metadataVersion, size) + return err + }, + ) + return +} - if err := c.readResponse(size, &res); err != nil { - return err - } +func (c *Conn) readPartitionsResponse(metadataVersion apiVersion, size int) ([]Partition, error) { + switch metadataVersion { + case v6: + var res metadataResponseV6 + if err := c.readResponse(size, &res); err != nil { + return nil, err + } + brokers := readBrokerMetadata(res.Brokers) + return c.readTopicMetadatav6(brokers, res.Topics) + default: + var res metadataResponseV1 + if err := c.readResponse(size, &res); err != nil { + return nil, err + } + brokers := readBrokerMetadata(res.Brokers) + return c.readTopicMetadatav1(brokers, res.Topics) + } +} - brokers := make(map[int32]Broker, len(res.Brokers)) - for _, b := range res.Brokers { - brokers[b.NodeID] = Broker{ - Host: b.Host, - Port: int(b.Port), - ID: int(b.NodeID), - Rack: b.Rack, - } - } +func readBrokerMetadata(brokerMetadata []brokerMetadataV1) map[int32]Broker { + brokers := make(map[int32]Broker, len(brokerMetadata)) + for _, b := range brokerMetadata { + brokers[b.NodeID] = Broker{ + Host: b.Host, + Port: int(b.Port), + ID: int(b.NodeID), + Rack: b.Rack, + } + } + return brokers +} - for _, t := range res.Topics { - if t.TopicErrorCode != 0 && (c.topic == "" || t.TopicName == c.topic) { - // We only report errors if they happened for the topic of - // the connection, otherwise the topic will simply have no - // partitions in the result set. - return Error(t.TopicErrorCode) - } - for _, p := range t.Partitions { - partitions = append(partitions, Partition{ - Topic: t.TopicName, - Leader: brokers[p.Leader], - Replicas: makeBrokers(brokers, p.Replicas...), - Isr: makeBrokers(brokers, p.Isr...), - ID: int(p.PartitionID), - }) - } - } - return nil - }, - ) +func (c *Conn) readTopicMetadatav1(brokers map[int32]Broker, topicMetadata []topicMetadataV1) (partitions []Partition, err error) { + for _, t := range topicMetadata { + if t.TopicErrorCode != 0 && (c.topic == "" || t.TopicName == c.topic) { + // We only report errors if they happened for the topic of + // the connection, otherwise the topic will simply have no + // partitions in the result set. + return nil, Error(t.TopicErrorCode) + } + for _, p := range t.Partitions { + partitions = append(partitions, Partition{ + Topic: t.TopicName, + Leader: brokers[p.Leader], + Replicas: makeBrokers(brokers, p.Replicas...), + Isr: makeBrokers(brokers, p.Isr...), + ID: int(p.PartitionID), + OfflineReplicas: []Broker{}, + }) + } + } + return +} + +func (c *Conn) readTopicMetadatav6(brokers map[int32]Broker, topicMetadata []topicMetadataV6) (partitions []Partition, err error) { + for _, t := range topicMetadata { + if t.TopicErrorCode != 0 && (c.topic == "" || t.TopicName == c.topic) { + // We only report errors if they happened for the topic of + // the connection, otherwise the topic will simply have no + // partitions in the result set. + return nil, Error(t.TopicErrorCode) + } + for _, p := range t.Partitions { + partitions = append(partitions, Partition{ + Topic: t.TopicName, + Leader: brokers[p.Leader], + Replicas: makeBrokers(brokers, p.Replicas...), + Isr: makeBrokers(brokers, p.Isr...), + ID: int(p.PartitionID), + OfflineReplicas: makeBrokers(brokers, p.OfflineReplicas...), + }) + } + } return } diff --git a/vendor/github.com/segmentio/kafka-go/kafka.go b/vendor/github.com/segmentio/kafka-go/kafka.go index ec139ac91..d2d36e413 100644 --- a/vendor/github.com/segmentio/kafka-go/kafka.go +++ b/vendor/github.com/segmentio/kafka-go/kafka.go @@ -47,6 +47,9 @@ type Partition struct { Replicas []Broker Isr []Broker + // Available only with metadata API level >= 6: + OfflineReplicas []Broker + // An error that may have occurred while attempting to read the partition // metadata. // diff --git a/vendor/github.com/segmentio/kafka-go/metadata.go b/vendor/github.com/segmentio/kafka-go/metadata.go index 4b1309f85..429a6a260 100644 --- a/vendor/github.com/segmentio/kafka-go/metadata.go +++ b/vendor/github.com/segmentio/kafka-go/metadata.go @@ -199,3 +199,89 @@ func (p partitionMetadataV1) writeTo(wb *writeBuffer) { wb.writeInt32Array(p.Replicas) wb.writeInt32Array(p.Isr) } + +type topicMetadataRequestV6 struct { + Topics []string + AllowAutoTopicCreation bool +} + +func (r topicMetadataRequestV6) size() int32 { + return sizeofStringArray([]string(r.Topics)) + 1 +} + +func (r topicMetadataRequestV6) writeTo(wb *writeBuffer) { + // communicate nil-ness to the broker by passing -1 as the array length. + // for this particular request, the broker interpets a zero length array + // as a request for no topics whereas a nil array is for all topics. + if r.Topics == nil { + wb.writeArrayLen(-1) + } else { + wb.writeStringArray([]string(r.Topics)) + } + wb.writeBool(r.AllowAutoTopicCreation) +} + +type metadataResponseV6 struct { + ThrottleTimeMs int32 + Brokers []brokerMetadataV1 + ClusterId string + ControllerID int32 + Topics []topicMetadataV6 +} + +func (r metadataResponseV6) size() int32 { + n1 := sizeofArray(len(r.Brokers), func(i int) int32 { return r.Brokers[i].size() }) + n2 := sizeofNullableString(&r.ClusterId) + n3 := sizeofArray(len(r.Topics), func(i int) int32 { return r.Topics[i].size() }) + return 4 + 4 + n1 + n2 + n3 +} + +func (r metadataResponseV6) writeTo(wb *writeBuffer) { + wb.writeInt32(r.ThrottleTimeMs) + wb.writeArray(len(r.Brokers), func(i int) { r.Brokers[i].writeTo(wb) }) + wb.writeString(r.ClusterId) + wb.writeInt32(r.ControllerID) + wb.writeArray(len(r.Topics), func(i int) { r.Topics[i].writeTo(wb) }) +} + +type topicMetadataV6 struct { + TopicErrorCode int16 + TopicName string + Internal bool + Partitions []partitionMetadataV6 +} + +func (t topicMetadataV6) size() int32 { + return 2 + 1 + + sizeofString(t.TopicName) + + sizeofArray(len(t.Partitions), func(i int) int32 { return t.Partitions[i].size() }) +} + +func (t topicMetadataV6) writeTo(wb *writeBuffer) { + wb.writeInt16(t.TopicErrorCode) + wb.writeString(t.TopicName) + wb.writeBool(t.Internal) + wb.writeArray(len(t.Partitions), func(i int) { t.Partitions[i].writeTo(wb) }) +} + +type partitionMetadataV6 struct { + PartitionErrorCode int16 + PartitionID int32 + Leader int32 + Replicas []int32 + Isr []int32 + OfflineReplicas []int32 +} + +func (p partitionMetadataV6) size() int32 { + return 2 + 4 + 4 + sizeofInt32Array(p.Replicas) + sizeofInt32Array(p.Isr) + sizeofInt32Array(p.OfflineReplicas) +} + +func (p partitionMetadataV6) writeTo(wb *writeBuffer) { + wb.writeInt16(p.PartitionErrorCode) + wb.writeInt32(p.PartitionID) + wb.writeInt32(p.Leader) + wb.writeInt32Array(p.Replicas) + wb.writeInt32Array(p.Isr) + wb.writeInt32Array(p.OfflineReplicas) +} diff --git a/vendor/github.com/segmentio/kafka-go/offsetdelete.go b/vendor/github.com/segmentio/kafka-go/offsetdelete.go new file mode 100644 index 000000000..ea526eb25 --- /dev/null +++ b/vendor/github.com/segmentio/kafka-go/offsetdelete.go @@ -0,0 +1,106 @@ +package kafka + +import ( + "context" + "fmt" + "net" + "time" + + "github.com/segmentio/kafka-go/protocol/offsetdelete" +) + +// OffsetDelete deletes the offset for a consumer group on a particular topic +// for a particular partition. +type OffsetDelete struct { + Topic string + Partition int +} + +// OffsetDeleteRequest represents a request sent to a kafka broker to delete +// the offsets for a partition on a given topic associated with a consumer group. +type OffsetDeleteRequest struct { + // Address of the kafka broker to send the request to. + Addr net.Addr + + // ID of the consumer group to delete the offsets for. + GroupID string + + // Set of topic partitions to delete offsets for. + Topics map[string][]int +} + +// OffsetDeleteResponse represents a response from a kafka broker to a delete +// offset request. +type OffsetDeleteResponse struct { + // An error that may have occurred while attempting to delete an offset + Error error + + // The amount of time that the broker throttled the request. + Throttle time.Duration + + // Set of topic partitions that the kafka broker has additional info (error?) + // for. + Topics map[string][]OffsetDeletePartition +} + +// OffsetDeletePartition represents the state of a status of a partition in response +// to deleting offsets. +type OffsetDeletePartition struct { + // ID of the partition. + Partition int + + // An error that may have occurred while attempting to delete an offset for + // this partition. + Error error +} + +// OffsetDelete sends a delete offset request to a kafka broker and returns the +// response. +func (c *Client) OffsetDelete(ctx context.Context, req *OffsetDeleteRequest) (*OffsetDeleteResponse, error) { + topics := make([]offsetdelete.RequestTopic, 0, len(req.Topics)) + + for topicName, partitionIndexes := range req.Topics { + partitions := make([]offsetdelete.RequestPartition, len(partitionIndexes)) + + for i, c := range partitionIndexes { + partitions[i] = offsetdelete.RequestPartition{ + PartitionIndex: int32(c), + } + } + + topics = append(topics, offsetdelete.RequestTopic{ + Name: topicName, + Partitions: partitions, + }) + } + + m, err := c.roundTrip(ctx, req.Addr, &offsetdelete.Request{ + GroupID: req.GroupID, + Topics: topics, + }) + if err != nil { + return nil, fmt.Errorf("kafka.(*Client).OffsetDelete: %w", err) + } + r := m.(*offsetdelete.Response) + + res := &OffsetDeleteResponse{ + Error: makeError(r.ErrorCode, ""), + Throttle: makeDuration(r.ThrottleTimeMs), + Topics: make(map[string][]OffsetDeletePartition, len(r.Topics)), + } + + for _, topic := range r.Topics { + partitions := make([]OffsetDeletePartition, len(topic.Partitions)) + + for i, p := range topic.Partitions { + partitions[i] = OffsetDeletePartition{ + Partition: int(p.PartitionIndex), + Error: makeError(p.ErrorCode, ""), + } + } + + res.Topics[topic.Name] = partitions + } + + return res, nil +} diff --git a/vendor/github.com/segmentio/kafka-go/protocol.go b/vendor/github.com/segmentio/kafka-go/protocol.go index 829fabf54..37208abf1 100644 --- a/vendor/github.com/segmentio/kafka-go/protocol.go +++ b/vendor/github.com/segmentio/kafka-go/protocol.go @@ -107,10 +107,11 @@ const ( v2 = 2 v3 = 3 v5 = 5 + v6 = 6 v7 = 7 v10 = 10 - // Unused protocol versions: v4, v6, v8, v9. + // Unused protocol versions: v4, v8, v9. ) var apiKeyStrings = [...]string{ diff --git a/vendor/github.com/segmentio/kafka-go/protocol/offsetdelete/offsetdelete.go b/vendor/github.com/segmentio/kafka-go/protocol/offsetdelete/offsetdelete.go new file mode 100644 index 000000000..bda619f3c --- /dev/null +++ b/vendor/github.com/segmentio/kafka-go/protocol/offsetdelete/offsetdelete.go @@ -0,0 +1,47 @@ +package offsetdelete + +import "github.com/segmentio/kafka-go/protocol" + +func init() { + protocol.Register(&Request{}, &Response{}) +} + +type Request struct { + GroupID string `kafka:"min=v0,max=v0"` + Topics []RequestTopic `kafka:"min=v0,max=v0"` +} + +func (r *Request) ApiKey() protocol.ApiKey { return protocol.OffsetDelete } + +func (r *Request) Group() string { return r.GroupID } + +type RequestTopic struct { + Name string `kafka:"min=v0,max=v0"` + Partitions []RequestPartition `kafka:"min=v0,max=v0"` +} + +type RequestPartition struct { + PartitionIndex int32 `kafka:"min=v0,max=v0"` +} + +var ( + _ protocol.GroupMessage = (*Request)(nil) +) + +type Response struct { + ErrorCode int16 `kafka:"min=v0,max=v0"` + ThrottleTimeMs int32 `kafka:"min=v0,max=v0"` + Topics []ResponseTopic `kafka:"min=v0,max=v0"` +} + +func (r *Response) ApiKey() protocol.ApiKey { return protocol.OffsetDelete } + +type ResponseTopic struct { + Name string `kafka:"min=v0,max=v0"` + Partitions []ResponsePartition `kafka:"min=v0,max=v0"` +} + +type ResponsePartition struct { + PartitionIndex int32 `kafka:"min=v0,max=v0"` + ErrorCode int16 `kafka:"min=v0,max=v0"` +} diff --git a/vendor/github.com/segmentio/kafka-go/reader.go b/vendor/github.com/segmentio/kafka-go/reader.go index facaf7090..8e46c338f 100644 --- a/vendor/github.com/segmentio/kafka-go/reader.go +++ b/vendor/github.com/segmentio/kafka-go/reader.go @@ -277,7 +277,7 @@ func (r *Reader) commitLoop(ctx context.Context, gen *Generation) { l.Printf("stopped commit for group %s\n", r.config.GroupID) }) - if r.config.CommitInterval == 0 { + if r.useSyncCommits() { r.commitLoopImmediate(ctx, gen) } else { r.commitLoopInterval(ctx, gen) @@ -398,6 +398,11 @@ type ReaderConfig struct { // Default: 10s MaxWait time.Duration + // ReadBatchTimeout amount of time to wait to fetch message from kafka messages batch. + // + // Default: 10s + ReadBatchTimeout time.Duration + // ReadLagInterval sets the frequency at which the reader lag is updated. // Setting this field to a negative value disables lag reporting. ReadLagInterval time.Duration @@ -649,6 +654,10 @@ func NewReader(config ReaderConfig) *Reader { config.MaxWait = 10 * time.Second } + if config.ReadBatchTimeout == 0 { + config.ReadBatchTimeout = 10 * time.Second + } + if config.ReadLagInterval == 0 { config.ReadLagInterval = 1 * time.Minute } @@ -1052,12 +1061,16 @@ func (r *Reader) SetOffsetAt(ctx context.Context, t time.Time) error { } r.mutex.Unlock() + if len(r.config.Brokers) < 1 { + return errors.New("no brokers in config") + } + var conn *Conn + var err error for _, broker := range r.config.Brokers { - conn, err := r.config.Dialer.DialLeader(ctx, "tcp", broker, r.config.Topic, r.config.Partition) + conn, err = r.config.Dialer.DialLeader(ctx, "tcp", broker, r.config.Topic, r.config.Partition) if err != nil { continue } - deadline, _ := ctx.Deadline() conn.SetDeadline(deadline) offset, err := conn.ReadOffset(t) @@ -1068,7 +1081,7 @@ func (r *Reader) SetOffsetAt(ctx context.Context, t time.Time) error { return r.SetOffset(offset) } - return fmt.Errorf("error setting offset for timestamp %+v", t) + return fmt.Errorf("error dialing all brokers, one of the errors: %w", err) } // Stats returns a snapshot of the reader stats since the last time the method @@ -1181,22 +1194,23 @@ func (r *Reader) start(offsetsByPartition map[topicPartition]int64) { defer join.Done() (&reader{ - dialer: r.config.Dialer, - logger: r.config.Logger, - errorLogger: r.config.ErrorLogger, - brokers: r.config.Brokers, - topic: key.topic, - partition: int(key.partition), - minBytes: r.config.MinBytes, - maxBytes: r.config.MaxBytes, - maxWait: r.config.MaxWait, - backoffDelayMin: r.config.ReadBackoffMin, - backoffDelayMax: r.config.ReadBackoffMax, - version: r.version, - msgs: r.msgs, - stats: r.stats, - isolationLevel: r.config.IsolationLevel, - maxAttempts: r.config.MaxAttempts, + dialer: r.config.Dialer, + logger: r.config.Logger, + errorLogger: r.config.ErrorLogger, + brokers: r.config.Brokers, + topic: key.topic, + partition: int(key.partition), + minBytes: r.config.MinBytes, + maxBytes: r.config.MaxBytes, + maxWait: r.config.MaxWait, + readBatchTimeout: r.config.ReadBatchTimeout, + backoffDelayMin: r.config.ReadBackoffMin, + backoffDelayMax: r.config.ReadBackoffMax, + version: r.version, + msgs: r.msgs, + stats: r.stats, + isolationLevel: r.config.IsolationLevel, + maxAttempts: r.config.MaxAttempts, // backwards-compatibility flags offsetOutOfRangeError: r.config.OffsetOutOfRangeError, @@ -1209,22 +1223,23 @@ func (r *Reader) start(offsetsByPartition map[topicPartition]int64) { // used as an way to asynchronously fetch messages while the main program reads // them using the high level reader API. type reader struct { - dialer *Dialer - logger Logger - errorLogger Logger - brokers []string - topic string - partition int - minBytes int - maxBytes int - maxWait time.Duration - backoffDelayMin time.Duration - backoffDelayMax time.Duration - version int64 - msgs chan<- readerMessage - stats *readerStats - isolationLevel IsolationLevel - maxAttempts int + dialer *Dialer + logger Logger + errorLogger Logger + brokers []string + topic string + partition int + minBytes int + maxBytes int + maxWait time.Duration + readBatchTimeout time.Duration + backoffDelayMin time.Duration + backoffDelayMax time.Duration + version int64 + msgs chan<- readerMessage + stats *readerStats + isolationLevel IsolationLevel + maxAttempts int offsetOutOfRangeError bool } @@ -1492,15 +1507,8 @@ func (r *reader) read(ctx context.Context, offset int64, conn *Conn) (int64, err var size int64 var bytes int64 - const safetyTimeout = 10 * time.Second - deadline := time.Now().Add(safetyTimeout) - conn.SetReadDeadline(deadline) - for { - if now := time.Now(); deadline.Sub(now) < (safetyTimeout / 2) { - deadline = now.Add(safetyTimeout) - conn.SetReadDeadline(deadline) - } + conn.SetReadDeadline(time.Now().Add(r.readBatchTimeout)) if msg, err = batch.ReadMessage(); err != nil { batch.Close() diff --git a/vendor/github.com/segmentio/kafka-go/writer.go b/vendor/github.com/segmentio/kafka-go/writer.go index 8d48e95cd..dc9c77f78 100644 --- a/vendor/github.com/segmentio/kafka-go/writer.go +++ b/vendor/github.com/segmentio/kafka-go/writer.go @@ -100,6 +100,18 @@ type Writer struct { // The default is to try at most 10 times. MaxAttempts int + // WriteBackoffMin optionally sets the smallest amount of time the writer waits before + // it attempts to write a batch of messages + // + // Default: 100ms + WriteBackoffMin time.Duration + + // WriteBackoffMax optionally sets the maximum amount of time the writer waits before + // it attempts to write a batch of messages + // + // Default: 1s + WriteBackoffMax time.Duration + // Limit on how many messages will be buffered before being sent to a // partition. // @@ -343,17 +355,19 @@ type WriterStats struct { BatchTime DurationStats `metric:"kafka.writer.batch.seconds"` WriteTime DurationStats `metric:"kafka.writer.write.seconds"` WaitTime DurationStats `metric:"kafka.writer.wait.seconds"` - Retries SummaryStats `metric:"kafka.writer.retries.count"` + Retries int64 `metric:"kafka.writer.retries.count" type:"counter"` BatchSize SummaryStats `metric:"kafka.writer.batch.size"` BatchBytes SummaryStats `metric:"kafka.writer.batch.bytes"` - MaxAttempts int64 `metric:"kafka.writer.attempts.max" type:"gauge"` - MaxBatchSize int64 `metric:"kafka.writer.batch.max" type:"gauge"` - BatchTimeout time.Duration `metric:"kafka.writer.batch.timeout" type:"gauge"` - ReadTimeout time.Duration `metric:"kafka.writer.read.timeout" type:"gauge"` - WriteTimeout time.Duration `metric:"kafka.writer.write.timeout" type:"gauge"` - RequiredAcks int64 `metric:"kafka.writer.acks.required" type:"gauge"` - Async bool `metric:"kafka.writer.async" type:"gauge"` + MaxAttempts int64 `metric:"kafka.writer.attempts.max" type:"gauge"` + WriteBackoffMin time.Duration `metric:"kafka.writer.backoff.min" type:"gauge"` + WriteBackoffMax time.Duration `metric:"kafka.writer.backoff.max" type:"gauge"` + MaxBatchSize int64 `metric:"kafka.writer.batch.max" type:"gauge"` + BatchTimeout time.Duration `metric:"kafka.writer.batch.timeout" type:"gauge"` + ReadTimeout time.Duration `metric:"kafka.writer.read.timeout" type:"gauge"` + WriteTimeout time.Duration `metric:"kafka.writer.write.timeout" type:"gauge"` + RequiredAcks int64 `metric:"kafka.writer.acks.required" type:"gauge"` + Async bool `metric:"kafka.writer.async" type:"gauge"` Topic string `tag:"topic"` @@ -390,7 +404,7 @@ type writerStats struct { batchTime summary writeTime summary waitTime summary - retries summary + retries counter batchSize summary batchSizeBytes summary } @@ -779,6 +793,20 @@ func (w *Writer) maxAttempts() int { return 10 } +func (w *Writer) writeBackoffMin() time.Duration { + if w.WriteBackoffMin > 0 { + return w.WriteBackoffMin + } + return 100 * time.Millisecond +} + +func (w *Writer) writeBackoffMax() time.Duration { + if w.WriteBackoffMax > 0 { + return w.WriteBackoffMax + } + return 1 * time.Second +} + func (w *Writer) batchSize() int { if w.BatchSize > 0 { return w.BatchSize @@ -849,26 +877,28 @@ func (w *Writer) stats() *writerStats { func (w *Writer) Stats() WriterStats { stats := w.stats() return WriterStats{ - Dials: stats.dials.snapshot(), - Writes: stats.writes.snapshot(), - Messages: stats.messages.snapshot(), - Bytes: stats.bytes.snapshot(), - Errors: stats.errors.snapshot(), - DialTime: stats.dialTime.snapshotDuration(), - BatchTime: stats.batchTime.snapshotDuration(), - WriteTime: stats.writeTime.snapshotDuration(), - WaitTime: stats.waitTime.snapshotDuration(), - Retries: stats.retries.snapshot(), - BatchSize: stats.batchSize.snapshot(), - BatchBytes: stats.batchSizeBytes.snapshot(), - MaxAttempts: int64(w.MaxAttempts), - MaxBatchSize: int64(w.BatchSize), - BatchTimeout: w.BatchTimeout, - ReadTimeout: w.ReadTimeout, - WriteTimeout: w.WriteTimeout, - RequiredAcks: int64(w.RequiredAcks), - Async: w.Async, - Topic: w.Topic, + Dials: stats.dials.snapshot(), + Writes: stats.writes.snapshot(), + Messages: stats.messages.snapshot(), + Bytes: stats.bytes.snapshot(), + Errors: stats.errors.snapshot(), + DialTime: stats.dialTime.snapshotDuration(), + BatchTime: stats.batchTime.snapshotDuration(), + WriteTime: stats.writeTime.snapshotDuration(), + WaitTime: stats.waitTime.snapshotDuration(), + Retries: stats.retries.snapshot(), + BatchSize: stats.batchSize.snapshot(), + BatchBytes: stats.batchSizeBytes.snapshot(), + MaxAttempts: int64(w.MaxAttempts), + WriteBackoffMin: w.WriteBackoffMin, + WriteBackoffMax: w.WriteBackoffMax, + MaxBatchSize: int64(w.BatchSize), + BatchTimeout: w.BatchTimeout, + ReadTimeout: w.ReadTimeout, + WriteTimeout: w.WriteTimeout, + RequiredAcks: int64(w.RequiredAcks), + Async: w.Async, + Topic: w.Topic, } } @@ -1086,7 +1116,7 @@ func (ptw *partitionWriter) writeBatch(batch *writeBatch) { // guarantees to abort, but may be better to avoid long wait times // on close. // - delay := backoff(attempt, 100*time.Millisecond, 1*time.Second) + delay := backoff(attempt, ptw.w.writeBackoffMin(), ptw.w.writeBackoffMax()) ptw.w.withLogger(func(log Logger) { log.Printf("backing off %s writing %d messages to %s (partition: %d)", delay, len(batch.msgs), key.topic, key.partition) }) diff --git a/vendor/modules.txt b/vendor/modules.txt index 2d7cba3ed..cd533f1f3 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -120,10 +120,12 @@ github.com/jpillora/backoff # github.com/json-iterator/go v1.1.12 ## explicit; go 1.12 github.com/json-iterator/go -# github.com/klauspost/compress v1.15.7 +# github.com/klauspost/compress v1.15.9 ## explicit; go 1.16 github.com/klauspost/compress +github.com/klauspost/compress/flate github.com/klauspost/compress/fse +github.com/klauspost/compress/gzip github.com/klauspost/compress/huff0 github.com/klauspost/compress/internal/cpuinfo github.com/klauspost/compress/internal/snapref @@ -198,8 +200,6 @@ github.com/netsampler/goflow2/utils # github.com/pelletier/go-toml v1.9.4 ## explicit; go 1.12 github.com/pelletier/go-toml -# github.com/pierrec/lz4 v2.6.1+incompatible -## explicit # github.com/pierrec/lz4/v4 v4.1.15 ## explicit; go 1.14 github.com/pierrec/lz4/v4 @@ -245,7 +245,7 @@ github.com/prometheus/prometheus/tsdb/errors github.com/prometheus/prometheus/tsdb/fileutil github.com/prometheus/prometheus/tsdb/tsdbutil github.com/prometheus/prometheus/util/strutil -# github.com/segmentio/kafka-go v0.4.35 +# github.com/segmentio/kafka-go v0.4.38 ## explicit; go 1.15 github.com/segmentio/kafka-go github.com/segmentio/kafka-go/compress @@ -279,6 +279,7 @@ github.com/segmentio/kafka-go/protocol/listgroups github.com/segmentio/kafka-go/protocol/listoffsets github.com/segmentio/kafka-go/protocol/metadata github.com/segmentio/kafka-go/protocol/offsetcommit +github.com/segmentio/kafka-go/protocol/offsetdelete github.com/segmentio/kafka-go/protocol/offsetfetch github.com/segmentio/kafka-go/protocol/produce github.com/segmentio/kafka-go/protocol/saslauthenticate