diff --git a/block_test.go b/block_test.go index 7da8f82..65ef667 100644 --- a/block_test.go +++ b/block_test.go @@ -28,7 +28,7 @@ import ( func Test_genericBlock_BlockDigest(t *testing.T) { content := "foo" - digest := "sha1:0BEEC7B5EA3F0FDBC95D0DD47F3C5BC275DA8A33" + digest := "sha1:0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33" tests := []blockDigestTest{ { @@ -57,7 +57,7 @@ func Test_genericBlock_BlockDigest(t *testing.T) { func Test_genericBlock_Cache(t *testing.T) { content := "foo" - digest := "sha1:0BEEC7B5EA3F0FDBC95D0DD47F3C5BC275DA8A33" + digest := "sha1:0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33" tests := []cacheTest{ { @@ -126,7 +126,7 @@ func Test_genericBlock_IsCached(t *testing.T) { func Test_genericBlock_RawBytes(t *testing.T) { content := "foo" - digest := "sha1:0BEEC7B5EA3F0FDBC95D0DD47F3C5BC275DA8A33" + digest := "sha1:0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33" tests := []rawBytesTest{ { @@ -158,7 +158,7 @@ func Test_genericBlock_RawBytes(t *testing.T) { func Test_warcfieldsBlock_BlockDigest(t *testing.T) { content := "foo: bar\r\ncontent-type:bb\r\n" - digest := "sha1:A1D43D400C5985BEE035C4E5A2E08F3D57989596" + digest := "sha1:a1d43d400c5985bee035c4e5a2e08f3d57989596" tests := []blockDigestTest{ { @@ -191,7 +191,7 @@ func Test_warcfieldsBlock_BlockDigest(t *testing.T) { func Test_warcfieldsBlock_Cache(t *testing.T) { content := "foo: bar\r\ncontent-type:bb\r\n" - digest := "sha1:A1D43D400C5985BEE035C4E5A2E08F3D57989596" + digest := "sha1:a1d43d400c5985bee035c4e5a2e08f3d57989596" tests := []cacheTest{ { @@ -272,7 +272,7 @@ func Test_warcfieldsBlock_IsCached(t *testing.T) { func Test_warcfieldsBlock_RawBytes(t *testing.T) { content := "foo: bar\r\ncontent-type:bb\r\n" - digest := "sha1:A1D43D400C5985BEE035C4E5A2E08F3D57989596" + digest := "sha1:a1d43d400c5985bee035c4e5a2e08f3d57989596" tests := []rawBytesTest{ { @@ -313,8 +313,8 @@ func Test_httpRequestBlock_BlockDigest(t *testing.T) { "Referer: http://example.com/foo.html\n" + "Connection: close\n" + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36\n\n" - digest := "sha1:A3781FF1FC3FB52318F623E22C85D63D74C12932" - payloadDigest := "sha1:DA39A3EE5E6B4B0D3255BFEF95601890AFD80709" + digest := "sha1:a3781ff1fc3fb52318f623e22c85d63d74c12932" + payloadDigest := "sha1:da39a3ee5e6b4b0d3255bfef95601890afd80709" tests := []blockDigestTest{ { @@ -354,7 +354,7 @@ func Test_httpRequestBlock_Cache(t *testing.T) { "Referer: http://example.com/foo.html\n" + "Connection: close\n" + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36\n\n" - digest := "sha1:A3781FF1FC3FB52318F623E22C85D63D74C12932" + digest := "sha1:a3781ff1fc3fb52318f623e22c85d63d74c12932" tests := []cacheTest{ { @@ -443,7 +443,7 @@ func Test_httpRequestBlock_RawBytes(t *testing.T) { "Referer: http://example.com/foo.html\n" + "Connection: close\n" + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36\n\n" - digest := "sha1:A3781FF1FC3FB52318F623E22C85D63D74C12932" + digest := "sha1:a3781ff1fc3fb52318f623e22c85d63d74c12932" tests := []rawBytesTest{ { @@ -482,8 +482,8 @@ func Test_httpResponseBlock_BlockDigest(t *testing.T) { content := "HTTP/1.1 200 OK\nDate: Tue, 19 Sep 2016 17:18:40 GMT\nServer: Apache/2.0.54 (Ubuntu)\n" + "Last-Modified: Mon, 16 Jun 2013 22:28:51 GMT\nETag: \"3e45-67e-2ed02ec0\"\nAccept-Ranges: bytes\n" + "Content-Length: 19\nConnection: close\nContent-Type: text/plain\n\nThis is the content" - digest := "sha1:B285747AD7CC57AA74BCE2E30B453C8D1CB71BA4" - payloadDigest := "sha1:C37FFB221569C553A2476C22C7DAD429F3492977" + digest := "sha1:b285747ad7cc57aa74bce2e30b453c8d1cb71ba4" + payloadDigest := "sha1:c37ffb221569c553a2476c22c7dad429f3492977" tests := []blockDigestTest{ { @@ -520,7 +520,7 @@ func Test_httpResponseBlock_Cache(t *testing.T) { content := "HTTP/1.1 200 OK\nDate: Tue, 19 Sep 2016 17:18:40 GMT\nServer: Apache/2.0.54 (Ubuntu)\n" + "Last-Modified: Mon, 16 Jun 2013 22:28:51 GMT\nETag: \"3e45-67e-2ed02ec0\"\nAccept-Ranges: bytes\n" + "Content-Length: 19\nConnection: close\nContent-Type: text/plain\n\nThis is the content" - digest := "sha1:B285747AD7CC57AA74BCE2E30B453C8D1CB71BA4" + digest := "sha1:b285747ad7cc57aa74bce2e30b453c8d1cb71ba4" tests := []cacheTest{ { @@ -603,7 +603,7 @@ func Test_httpResponseBlock_RawBytes(t *testing.T) { content := "HTTP/1.1 200 OK\nDate: Tue, 19 Sep 2016 17:18:40 GMT\nServer: Apache/2.0.54 (Ubuntu)\n" + "Last-Modified: Mon, 16 Jun 2013 22:28:51 GMT\nETag: \"3e45-67e-2ed02ec0\"\nAccept-Ranges: bytes\n" + "Content-Length: 19\nConnection: close\nContent-Type: text/plain\n\nThis is the content" - digest := "sha1:B285747AD7CC57AA74BCE2E30B453C8D1CB71BA4" + digest := "sha1:b285747ad7cc57aa74bce2e30b453c8d1cb71ba4" tests := []rawBytesTest{ { diff --git a/digest.go b/digest.go index ee11b46..cb91cd1 100644 --- a/digest.go +++ b/digest.go @@ -17,6 +17,7 @@ package gowarc import ( + "bytes" "crypto/md5" "crypto/sha1" "crypto/sha256" @@ -36,7 +37,7 @@ func (d digestEncoding) encode(digest *digest) string { dig := digest.Sum(nil) switch d { case Base16: - return strings.ToUpper(hex.EncodeToString(dig)) + return strings.ToLower(hex.EncodeToString(dig)) case Base32: return base32.StdEncoding.EncodeToString(dig) case Base64: @@ -46,6 +47,19 @@ func (d digestEncoding) encode(digest *digest) string { } } +func (d digestEncoding) decode(s string) ([]byte, error) { + switch d { + case Base16: + return hex.DecodeString(s) + case Base32: + return base32.StdEncoding.DecodeString(s) + case Base64: + return base64.StdEncoding.DecodeString(s) + default: + return []byte(s), nil + } +} + const ( unknown digestEncoding = 0 Base16 digestEncoding = 1 @@ -85,6 +99,22 @@ func detectEncoding(algorithm, digest string, defaultEncoding digestEncoding) di return defaultEncoding } +// normalizeAlgorithmName normalizes the algorithm name to the format used in WARC digest-fields. +func normalizeAlgorithmName(algorithm string) string { + algorithm = strings.ToLower(algorithm) + + switch algorithm { + case "sha-1": + return "sha1" + case "sha-256": + return "sha256" + case "sha-512": + return "sha512" + default: + return algorithm + } +} + // digest is a utility for parsing, creation and validation of WARC block and payload digests. // // Typical usage is to create a digest from a WARC record's WARC-Block-Digest or WARC-Payload-Digest fields. @@ -124,7 +154,11 @@ func (d *digest) format() string { // digest. func (d *digest) validate() error { computed := d.encoding.encode(d) - if d.hash != computed { + dig, err := d.encoding.decode(d.hash) + if err != nil { + return err + } + if !bytes.Equal(dig, d.Sum(nil)) { return fmt.Errorf("wrong digest: expected %s:%s, computed: %s:%s", d.name, d.hash, d.name, computed) } return nil @@ -144,17 +178,16 @@ func (d *digest) updateDigest() { func newDigest(digestString string, defaultEncoding digestEncoding) (*digest, error) { t := strings.SplitN(digestString, ":", 2) algorithm := t[0] - algorithm = strings.ToLower(algorithm) - if algorithm == "" { - return nil, fmt.Errorf("missing algorithm") - } + algorithm = normalizeAlgorithmName(algorithm) var hash string if len(t) > 1 { hash = t[1] } encoding := detectEncoding(algorithm, hash, defaultEncoding) - if encoding < Base64 { - // base16 and base32 encodings are case insensitive. + switch encoding { + case Base16: + hash = strings.ToLower(hash) + case Base32: hash = strings.ToUpper(hash) } diff --git a/digest_test.go b/digest_test.go index 4023f57..cfc0061 100644 --- a/digest_test.go +++ b/digest_test.go @@ -31,22 +31,25 @@ func Test_newDigest(t *testing.T) { wantDigest string wantErr bool }{ - {"md5", "md5", "Some content", Base16, "md5", "md5:B53227DA4280F0E18270F21DD77C91D0", false}, - {"md5 with base16 digest", "md5:12345", "Some content", Base16, "md5", "md5:B53227DA4280F0E18270F21DD77C91D0", false}, + {"md5", "md5", "Some content", Base16, "md5", "md5:b53227da4280f0e18270f21dd77c91d0", false}, + {"md5 with base16 digest", "md5:12345", "Some content", Base16, "md5", "md5:b53227da4280f0e18270f21dd77c91d0", false}, {"md5 with base32 digest", "md5:12345", "Some content", Base32, "md5", "md5:WUZCPWSCQDYODATQ6IO5O7ER2A======", false}, {"md5 with base64 digest", "md5:12345", "Some content", Base64, "md5", "md5:tTIn2kKA8OGCcPId13yR0A==", false}, - {"sha1", "sha1", "Some content", Base16, "sha1", "sha1:9F1A6ECF74E9F9B1AE52E8EB581D420E63E8453A", false}, - {"sha1 with base16 digest", "sha1:12345", "Some content", Base16, "sha1", "sha1:9F1A6ECF74E9F9B1AE52E8EB581D420E63E8453A", false}, + {"sha1", "sha1", "Some content", Base16, "sha1", "sha1:9f1a6ecf74e9f9b1ae52e8eb581d420e63e8453a", false}, + {"sha1 with base16 digest", "sha1:12345", "Some content", Base16, "sha1", "sha1:9f1a6ecf74e9f9b1ae52e8eb581d420e63e8453a", false}, + {"sha-1 with base16 digest", "sha-1:12345", "Some content", Base16, "sha1", "sha1:9f1a6ecf74e9f9b1ae52e8eb581d420e63e8453a", false}, {"sha1 with base32 digest", "sha1:12345", "Some content", Base32, "sha1", "sha1:T4NG5T3U5H43DLSS5DVVQHKCBZR6QRJ2", false}, {"sha1 with base64 digest", "sha1:12345", "Some content", Base64, "sha1", "sha1:nxpuz3Tp+bGuUujrWB1CDmPoRTo=", false}, - {"sha256", "sha256", "Some content", Base16, "sha256", "sha256:9C6609FC5111405EA3F5BB3D1F6B5A5EFD19A0CEC53D85893FD96D265439CD5B", false}, - {"sha256 with base16 digest", "sha256:12345", "Some content", Base16, "sha256", "sha256:9C6609FC5111405EA3F5BB3D1F6B5A5EFD19A0CEC53D85893FD96D265439CD5B", false}, + {"sha256", "sha256", "Some content", Base16, "sha256", "sha256:9c6609fc5111405ea3f5bb3d1f6b5a5efd19a0cec53d85893fd96d265439cd5b", false}, + {"sha-256", "sha256", "Some content", Base16, "sha256", "sha256:9c6609fc5111405ea3f5bb3d1f6b5a5efd19a0cec53d85893fd96d265439cd5b", false}, + {"sha256 with base16 digest", "sha256:12345", "Some content", Base16, "sha256", "sha256:9c6609fc5111405ea3f5bb3d1f6b5a5efd19a0cec53d85893fd96d265439cd5b", false}, {"sha256 with base32 digest", "sha256:12345", "Some content", Base32, "sha256", "sha256:TRTAT7CRCFAF5I7VXM6R6222L36RTIGOYU6YLCJ73FWSMVBZZVNQ====", false}, {"sha256 with base64 digest", "sha256:12345", "Some content", Base64, "sha256", "sha256:nGYJ/FERQF6j9bs9H2taXv0ZoM7FPYWJP9ltJlQ5zVs=", false}, - {"sha512", "sha512", "Some content", Base16, "sha512", "sha512:B20D977718ED67F2BF7620EE2D982FD850C4883EC8D048440FE7B6A86CF6322FD791C47B0C7469DBEEF3E339032E1ABC4BCEBE5EFC104BC19A117BFEF4478605", false}, - {"sha512 with base16 digest", "sha512:12345", "Some content", Base16, "sha512", "sha512:B20D977718ED67F2BF7620EE2D982FD850C4883EC8D048440FE7B6A86CF6322FD791C47B0C7469DBEEF3E339032E1ABC4BCEBE5EFC104BC19A117BFEF4478605", false}, + {"sha512", "sha512", "Some content", Base16, "sha512", "sha512:b20d977718ed67f2bf7620ee2d982fd850c4883ec8d048440fe7b6a86cf6322fd791c47b0c7469dbeef3e339032e1abc4bcebe5efc104bc19a117bfef4478605", false}, + {"sha512 with base16 digest", "sha512:12345", "Some content", Base16, "sha512", "sha512:b20d977718ed67f2bf7620ee2d982fd850c4883ec8d048440fe7b6a86cf6322fd791c47b0c7469dbeef3e339032e1abc4bcebe5efc104bc19a117bfef4478605", false}, {"sha512 with base32 digest", "sha512:12345", "Some content", Base32, "sha512", "sha512:WIGZO5YY5VT7FP3WEDXC3GBP3BIMJCB6ZDIEQRAP463KQ3HWGIX5PEOEPMGHI2O353Z6GOIDFYNLYS6OXZPPYECLYGNBC6766RDYMBI=", false}, {"sha512 with base64 digest", "sha512:12345", "Some content", Base64, "sha512", "sha512:sg2XdxjtZ/K/diDuLZgv2FDEiD7I0EhED+e2qGz2Mi/XkcR7DHRp2+7z4zkDLhq8S86+XvwQS8GaEXv+9EeGBQ==", false}, + {"sha-512 with base64 digest", "sha512:12345", "Some content", Base64, "sha512", "sha512:sg2XdxjtZ/K/diDuLZgv2FDEiD7I0EhED+e2qGz2Mi/XkcR7DHRp2+7z4zkDLhq8S86+XvwQS8GaEXv+9EeGBQ==", false}, {"unknown algorithm", "mysecret:12345", "Some content", Base16, "mysecret", "mysecret:123", true}, {"unknown algorithm with digest", "mysecret:12345", "Some content", Base16, "mysecret", "mysecret:123", true}, } @@ -81,35 +84,40 @@ func Test_digest_validate(t *testing.T) { wantValid bool }{ {"md5", "Some content", "md5", false}, - {"md5 with base16 digest", "Some content", "md5:B53227DA4280F0E18270F21DD77C91D0", true}, + {"md5 with base16 digest", "Some content", "md5:b53227da4280f0e18270f21dd77c91d0", true}, {"md5 with base32 digest", "Some content", "md5:WUZCPWSCQDYODATQ6IO5O7ER2A======", true}, {"md5 with base64 digest", "Some content", "md5:tTIn2kKA8OGCcPId13yR0A==", true}, {"md5 with wrong digest", "Some content", "md5:123", false}, {"sha1", "Some content", "sha1", false}, - {"sha1 with base16 digest", "Some content", "sha1:9F1A6ECF74E9F9B1AE52E8EB581D420E63E8453A", true}, + {"sha1 with base16 digest", "Some content", "sha1:9f1a6ecf74e9f9b1ae52e8eb581d420e63e8453a", true}, + {"SHA-1 with base16 digest", "Some content", "SHA-1:9f1a6ecf74e9f9b1ae52e8eb581d420e63e8453a", true}, {"sha1 with base32 digest", "Some content", "sha1:T4NG5T3U5H43DLSS5DVVQHKCBZR6QRJ2", true}, {"sha1 with base64 digest", "Some content", "sha1:nxpuz3Tp+bGuUujrWB1CDmPoRTo=", true}, {"sha1 with wrong digest", "Some content", "sha1:123", false}, {"sha256", "Some content", "sha256", false}, - {"sha256 with base16 digest", "Some content", "sha256:9C6609FC5111405EA3F5BB3D1F6B5A5EFD19A0CEC53D85893FD96D265439CD5B", true}, + {"sha256 with base16 digest", "Some content", "sha256:9c6609fc5111405ea3f5bb3d1f6b5a5efd19a0cec53d85893fd96d265439cd5b", true}, + {"SHA-256 with base16 digest", "Some content", "SHA-256:9c6609fc5111405ea3f5bb3d1f6b5a5efd19a0cec53d85893fd96d265439cd5b", true}, {"sha256 with base32 digest", "Some content", "sha256:TRTAT7CRCFAF5I7VXM6R6222L36RTIGOYU6YLCJ73FWSMVBZZVNQ====", true}, {"sha256 with base64 digest", "Some content", "sha256:nGYJ/FERQF6j9bs9H2taXv0ZoM7FPYWJP9ltJlQ5zVs=", true}, {"sha256 with wrong digest", "Some content", "sha256:123", false}, {"sha512", "Some content", "sha512", false}, - {"sha512 with base16 digest", "Some content", "sha512:B20D977718ED67F2BF7620EE2D982FD850C4883EC8D048440FE7B6A86CF6322FD791C47B0C7469DBEEF3E339032E1ABC4BCEBE5EFC104BC19A117BFEF4478605", true}, + {"sha512 with base16 digest", "Some content", "sha512:b20d977718ed67f2bf7620ee2d982fd850c4883ec8d048440fe7b6a86cf6322fd791c47b0c7469dbeef3e339032e1abc4bcebe5efc104bc19a117bfef4478605", true}, {"sha512 with base32 digest", "Some content", "sha512:WIGZO5YY5VT7FP3WEDXC3GBP3BIMJCB6ZDIEQRAP463KQ3HWGIX5PEOEPMGHI2O353Z6GOIDFYNLYS6OXZPPYECLYGNBC6766RDYMBI=", true}, {"sha512 with base64 digest", "Some content", "sha512:sg2XdxjtZ/K/diDuLZgv2FDEiD7I0EhED+e2qGz2Mi/XkcR7DHRp2+7z4zkDLhq8S86+XvwQS8GaEXv+9EeGBQ==", true}, {"sha512 with wrong digest", "Some content", "sha512:123", false}, - {"lovercase base16 encoding", "Some content", "sha1:9f1a6ecf74e9f9b1ae52e8eb581d420e63e8453a", true}, + {"uppercase base16 encoding", "Some content", "sha1:9F1A6ECF74E9F9B1AE52E8EB581D420E63E8453A", true}, {"lovercase base32 encoding", "Some content", "sha1:t4ng5t3u5h43dlss5dvvqhkcbzr6qrj2", true}, {"lovercase base64 encoding", "Some content", "sha1:nxpuz3tp+bguuujrwb1cdmporto=", false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - d, _ := newDigest(tt.digestString, unknown) - assert := assert.New(t) - _, err := d.Write([]byte(tt.input)) + + d, err := newDigest(tt.digestString, unknown) + assert.NoError(err) + assert.NotNil(d) + + _, err = d.Write([]byte(tt.input)) assert.NoError(err) err = d.validate() @@ -117,6 +125,7 @@ func Test_digest_validate(t *testing.T) { assert.Error(err) } else { assert.NoError(err) + //assert.Equal(tt.digestString, d.format()) } }) } diff --git a/example_test.go b/example_test.go index 6387f36..e1541f3 100644 --- a/example_test.go +++ b/example_test.go @@ -51,7 +51,7 @@ func ExampleUnmarshaler() { "WARC-Filename: temp-20170306040353.warc.gz\r\n" + "WARC-Type: warcinfo\r\n" + "Content-Type: application/warc-fields\r\n" + - "Warc-Block-Digest: sha1:AF4D582B4FFC017D07A947D841E392A821F754F3\r\n" + + "Warc-Block-Digest: sha1:af4d582b4ffc017d07a947d841e392a821f754f3\r\n" + "Content-Length: 34\r\n" + "\r\n" + "format: WARC File Format 1.1\r\n" + @@ -68,7 +68,7 @@ func ExampleUnmarshaler() { // Output: Offset: 2, WARC record: version: WARC/1.1, type: warcinfo, id: urn:uuid:e9a0cecc-0221-11e7-adb1-0242ac120008 // gowarc: Validation errors: // 1: gowarc: record was found 2 bytes after expected offset - // 2: block: wrong digest: expected sha1:AF4D582B4FFC017D07A947D841E392A821F754F3, computed: sha1:8A936F9FD60D664CF95B1FFB40F1C4093E65BB40 + // 2: block: wrong digest: expected sha1:af4d582b4ffc017d07a947d841e392a821f754f3, computed: sha1:8a936f9fd60d664cf95b1ffb40f1c4093e65bb40 } func ExampleNewWarcFileWriter() { diff --git a/record_test.go b/record_test.go index e3a4a79..4d4a483 100644 --- a/record_test.go +++ b/record_test.go @@ -43,8 +43,8 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:B285747AD7CC57AA74BCE2E30B453C8D1CB71BA4"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:b285747ad7cc57aa74bce2e30b453c8d1cb71ba4"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: ContentLength, Value: "257"}, }, "HTTP/1.1 200 OK\nDate: Tue, 19 Sep 2016 17:18:40 GMT\nServer: Apache/2.0.54 (Ubuntu)\n"+ @@ -56,8 +56,8 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: WarcType, Value: "revisit"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:BF9D96D3F3F230CE8E2C6A3E5E1D51A81016B55E"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:bf9d96d3f3f230ce8e2c6a3e5e1d51a81016b55e"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, &nameValue{Name: ContentLength, Value: "238"}, &nameValue{Name: WarcProfile, Value: ProfileServerNotModifiedV1_1}, @@ -89,7 +89,7 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: WarcType, Value: "revisit"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:BF9D96D3F3F230CE8E2C6A3E5E1D51A81016B55E"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:bf9d96d3f3f230ce8e2c6a3e5e1d51a81016b55e"}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, &nameValue{Name: ContentLength, Value: "238"}, &nameValue{Name: WarcProfile, Value: ProfileServerNotModifiedV1_1}, @@ -109,8 +109,8 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:B285747AD7CC57AA74BCE2E30B453C8D1CB71BA4"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:b285747ad7cc57aa74bce2e30b453c8d1cb71ba4"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: ContentLength, Value: "257"}, }, "HTTP/1.1 200 OK\nDate: Tue, 19 Sep 2016 17:18:40 GMT\nServer: Apache/2.0.54 (Ubuntu)\n"+ @@ -122,8 +122,8 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: WarcType, Value: "revisit"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:BF9D96D3F3F230CE8E2C6A3E5E1D51A81016B55E"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:bf9d96d3f3f230ce8e2c6a3e5e1d51a81016b55e"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, &nameValue{Name: ContentLength, Value: "238"}, &nameValue{Name: WarcProfile, Value: ProfileIdenticalPayloadDigestV1_1}, @@ -160,7 +160,7 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: ContentType, Value: "text/plain"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: ContentLength, Value: "19"}, }, "This is the content"), @@ -170,8 +170,8 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: WarcType, Value: "revisit"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:DA39A3EE5E6B4B0D3255BFEF95601890AFD80709"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:da39a3ee5e6b4b0d3255bfef95601890afd80709"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: ContentType, Value: "text/plain"}, &nameValue{Name: ContentLength, Value: "0"}, &nameValue{Name: WarcProfile, Value: ProfileIdenticalPayloadDigestV1_1}, @@ -189,8 +189,8 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: ContentType, Value: "text/plain"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: ContentLength, Value: "19"}, }, "This is the content"), @@ -200,8 +200,8 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: WarcType, Value: "revisit"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:DA39A3EE5E6B4B0D3255BFEF95601890AFD80709"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:da39a3ee5e6b4b0d3255bfef95601890afd80709"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: ContentType, Value: "text/plain"}, &nameValue{Name: ContentLength, Value: "0"}, &nameValue{Name: WarcProfile, Value: ProfileIdenticalPayloadDigestV1_1}, @@ -283,8 +283,8 @@ func Test_warcRecord_Merge(t *testing.T) { &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, &nameValue{Name: ContentLength, Value: "238"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:BF9D96D3F3F230CE8E2C6A3E5E1D51A81016B55E"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:bf9d96d3f3f230ce8e2c6a3e5e1d51a81016b55e"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: WarcProfile, Value: ProfileServerNotModifiedV1_1}, &nameValue{Name: WarcRefersTo, Value: ""}, &nameValue{Name: WarcRefersToTargetURI, Value: "http://example.com"}, @@ -313,8 +313,8 @@ func Test_warcRecord_Merge(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: WarcType, Value: "response"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:6E9D6B234FEEBBF1AB618707217E577C3B83448A"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:6e9d6b234feebbf1ab618707217e577c3b83448a"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, &nameValue{Name: ContentLength, Value: "257"}, }, @@ -343,8 +343,8 @@ func Test_warcRecord_Merge(t *testing.T) { &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, &nameValue{Name: ContentLength, Value: "238"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:BF9D96D3F3F230CE8E2C6A3E5E1D51A81016B55E"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:bf9d96d3f3f230ce8e2c6a3e5e1d51a81016b55e"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: WarcProfile, Value: ProfileIdenticalPayloadDigestV1_1}, &nameValue{Name: WarcRefersTo, Value: ""}, &nameValue{Name: WarcRefersToTargetURI, Value: "http://example.com"}, @@ -373,8 +373,8 @@ func Test_warcRecord_Merge(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: WarcType, Value: "response"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:6E9D6B234FEEBBF1AB618707217E577C3B83448A"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:6e9d6b234feebbf1ab618707217e577c3b83448a"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, &nameValue{Name: ContentLength, Value: "257"}, }, @@ -403,8 +403,8 @@ func Test_warcRecord_Merge(t *testing.T) { &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, &nameValue{Name: ContentLength, Value: "237"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:D1EA0889024BD99516D23CA2AD5E30E850977C84"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:d1ea0889024bd99516d23ca2ad5e30e850977c84"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: WarcProfile, Value: ProfileIdenticalPayloadDigestV1_1}, &nameValue{Name: WarcRefersTo, Value: ""}, &nameValue{Name: WarcRefersToTargetURI, Value: "http://example.com"}, @@ -420,7 +420,7 @@ func Test_warcRecord_Merge(t *testing.T) { &nameValue{Name: WarcDate, Value: "2016-09-19T18:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:6E9D6B234FEEBBF1AB618707217E577C3B83448A"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:6e9d6b234feebbf1ab618707217e577c3b83448a"}, &nameValue{Name: ContentLength, Value: "236"}, }, "HTTP/1.1 200 OK\nDate: Tue, 19 Sep 2016 17:18:40 GMT\nServer: Apache/2.0.54 (Ubuntu)\n"+ @@ -433,8 +433,8 @@ func Test_warcRecord_Merge(t *testing.T) { &nameValue{Name: WarcDate, Value: "2017-03-06T04:03:53Z"}, &nameValue{Name: WarcRecordID, Value: ""}, &nameValue{Name: WarcType, Value: "response"}, - &nameValue{Name: WarcBlockDigest, Value: "sha1:6E9D6B234FEEBBF1AB618707217E577C3B83448A"}, - &nameValue{Name: WarcPayloadDigest, Value: "sha1:C37FFB221569C553A2476C22C7DAD429F3492977"}, + &nameValue{Name: WarcBlockDigest, Value: "sha1:6e9d6b234feebbf1ab618707217e577c3b83448a"}, + &nameValue{Name: WarcPayloadDigest, Value: "sha1:c37ffb221569c553a2476c22c7dad429f3492977"}, &nameValue{Name: ContentType, Value: "application/http;msgtype=response"}, &nameValue{Name: ContentLength, Value: "256"}, }, diff --git a/warcfile_test.go b/warcfile_test.go index 220b034..d8512e8 100644 --- a/warcfile_test.go +++ b/warcfile_test.go @@ -176,7 +176,7 @@ func TestWarcFileWriter_Write_warcinfo_compressed(t *testing.T) { defer func() { assert.NoError(os.RemoveAll(testdir)) }() // Write two records sequentially - compressedWarcinfoSize := int64(254) + compressedWarcinfoSize := int64(252) res := w.Write(createTestRecord()) assert.NoError(res[0].Err) assert.Equalf(uncompressedRecordWithWarcInfoIdSize, res[0].BytesWritten, "Expected size from writer %d, but was %d", uncompressedRecordWithWarcInfoIdSize, res[0].BytesWritten)