Skip to content

Commit

Permalink
(doc) Add gzip's ExtraField public apis for accessing SubField(s). (#608
Browse files Browse the repository at this point in the history
)

Improve javadoc, particularly the important immutability part when using
the iterator.

Co-authored-by: Danny Deschenes <[email protected]>
  • Loading branch information
ddeschenes-1 and ddeschenes-1 authored Nov 18, 2024
1 parent f49db00 commit 55cb3b2
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,44 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;

import org.apache.commons.compress.compressors.gzip.ExtraField.SubField;

/**
* If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes. It consists of a series of subfields, each of
* the form:
* If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the
* header, with total length XLEN bytes.
*
* <pre>
* +---+---+=================================+
* | XLEN |...XLEN bytes of "extra field"...| (more-->)
* +---+---+=================================+
* </pre>
*
* This class represents the extra field payload (excluding the XLEN 2 bytes).
* The ExtraField payload consists of a series of subfields, each of the form:
*
* <pre>
* +---+---+---+---+==================================+
* |SI1|SI2| LEN |... LEN bytes of subfield data ...|
* +---+---+---+---+==================================+
* </pre>
*
* This class does not expose the internal subfields list to prevent adding subfields without total extra length validation. However a copy of the list is
* available.
* This class does not expose the internal subfields list to prevent adding
* subfields without total extra length validation. The class is iterable, but
* this iterator is immutable.
*
* @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
* @since 1.28.0
*/
public class ExtraField implements Iterable<SubField> {

/**
* If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes. It consists of a series of subfields, each
* of the form:
* If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes.
* It consists of a series of subfields, each of the form:
*
* <pre>
* +---+---+---+---+==================================+
Expand Down Expand Up @@ -185,13 +196,14 @@ public SubField getSubFieldAt(final int index) {
}

/**
* Returns an iterator over the SubField elements in this extra field in proper sequence.
* Returns an immutable iterator over the SubField elements in this extra field
* in the order they were added.
*
* @return an iterator over the SubField elements in this extra field in proper sequence.
* @return an immutable naturally ordered iterator over the SubField elements.
*/
@Override
public Iterator<SubField> iterator() {
return subFields.iterator();
return Collections.unmodifiableList(subFields).iterator();
}

byte[] toByteArray() {
Expand All @@ -211,4 +223,50 @@ byte[] toByteArray() {
return ba;
}

/**
* Test is this extra field has no subfields.
*
* @return true if there are no subfields, false otherwise.
*/
public boolean isEmpty() {
return subFields.isEmpty();
}

/**
* Removes all subfields from this instance.
*/
public void clear() {
subFields.clear();
totalSize = 0;
}

/**
* Calculate the size in bytes of the encoded extra field. This does not include
* its own 16 bits size when embeded in the gzip header. For N sub fields, the
* total is all subfields payloads bytes + 4N.
*
* @return the bytes count of this extra payload when encoded.
*/
public int getEncodedSize() {
return totalSize;
}

/**
* Return the count of subfields currently in in this extra field.
*
* @return the count of subfields contained in this instance.
*/
public int getSize() {
return subFields.size();
}

/**
* Finds the first subfield that matched the id if found, null otherwise
*
* @return the 1st SubField that matched or null.
*/
public SubField findFirstSubField(String subfieldId) {
return subFields.stream().filter(f -> f.getId().equals(subfieldId)).findFirst().orElse(null);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.api.Assumptions.assumeTrue;
Expand All @@ -32,8 +34,8 @@
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.compress.compressors.gzip.ExtraField.SubField;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -94,11 +96,12 @@ public void testChineseFileNameUTF8() throws IOException {
/**
* Tests the gzip extra header containing subfields.
*
* @throws IOException When the test fails.
* @throws IOException When the test has issues with the underlying file system or unexpected gzip operations.
*/
@ParameterizedTest
// @formatter:off
@CsvSource({
"0, 42, false",
"1, , true",
"1, 0, false",
"1, 65531, false",
Expand All @@ -108,7 +111,8 @@ public void testChineseFileNameUTF8() throws IOException {
"2, 32763, false"
})
// @formatter:on
public void testExtraSubfields(final int subFieldCount, final Integer payloadSize, final boolean shouldFail) throws IOException {
public void testExtraSubfields(final int subFieldCount, final Integer payloadSize, final boolean shouldFail)
throws IOException {
final Path tempSourceFile = Files.createTempFile("test_gzip_extra_", ".txt");
final Path targetFile = Files.createTempFile("test_gzip_extra_", ".txt.gz");
Files.write(tempSourceFile, "Hello World!".getBytes(StandardCharsets.ISO_8859_1));
Expand All @@ -128,28 +132,35 @@ public void testExtraSubfields(final int subFieldCount, final Integer payloadSiz
break;
}
}
assertEquals(shouldFail, failed, "appending subfield " + (shouldFail ? "succes" : "failure") + " was not expected.");
assertEquals(shouldFail, failed, "add subfield " + (shouldFail ? "succes" : "failure") + " was not expected.");
if (shouldFail) {
return;
}
if (subFieldCount > 0) {
assertThrows(UnsupportedOperationException.class, () -> extra.iterator().remove());
}
parameters.setExtraField(extra);
try (OutputStream fos = Files.newOutputStream(targetFile);
GzipCompressorOutputStream gos = new GzipCompressorOutputStream(fos, parameters)) {
Files.copy(tempSourceFile, gos);
}
try (GzipCompressorInputStream gis = new GzipCompressorInputStream(Files.newInputStream(targetFile))) {
final ExtraField extra2 = gis.getMetaData().getExtraField();
assertEquals(subFieldCount == 0, extra2.isEmpty());
assertEquals(subFieldCount, extra2.getSize());
assertEquals(4 * subFieldCount + subFieldCount * payloadSize, extra2.getEncodedSize());
ArrayList<SubField> listCopy = new ArrayList<>();
extra2.forEach(listCopy::add);
assertEquals(subFieldCount, listCopy.size());
for (int i = 0; i < subFieldCount; i++) {
final SubField sf = extra2.getSubFieldAt(i);
assertSame(sf, listCopy.get(i));
assertSame(sf, extra2.findFirstSubField("z" + i));
assertEquals("z" + i, sf.getId()); // id was saved/loaded correctly
assertArrayEquals(payloads[i], sf.getPayload(), "field " + i + " has wrong payload");
}
final AtomicInteger i = new AtomicInteger();
gis.getMetaData().getExtraField().forEach(sf -> {
assertEquals("z" + i, sf.getId()); // id was saved/loaded correctly
assertArrayEquals(payloads[i.intValue()], sf.getPayload(), "field " + i + " has wrong payload");
i.incrementAndGet();
});
extra2.clear();
assertTrue(extra2.isEmpty());
}
}

Expand Down

0 comments on commit 55cb3b2

Please sign in to comment.