fordfrog · dmitrygusev · Mar 14, 2015
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,5 @@
 target/
 nbactions.xml
+.classpath
+.project
+.settings
diff --git a/src/main/java/com/fordfrog/xml2csv/Convertor.java b/src/main/java/com/fordfrog/xml2csv/Convertor.java
@@ -28,15 +28,21 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
+import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.regex.Pattern;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
 import javax.xml.stream.XMLInputFactory;
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
 
+import org.apache.commons.io.input.BOMInputStream;
+
 /**
  * XML to CSV convertor.
  *
@@ -62,11 +68,24 @@ public static void convert(final Path inputFile, final Path outputFile,
             final String[] columns, final Filters filters,
             final Remappings remappings, final char separator,
             final boolean trim, final boolean join, final String itemName) {
-        try (final InputStream inputStream = Files.newInputStream(inputFile);
-                final Writer writer = Files.newBufferedWriter(
-                        outputFile, Charset.forName("UtF-8"))) {
-            convert(inputStream, writer, columns, filters, remappings, separator,
-                    trim, join, itemName);
+        try (final Writer writer = Files.newBufferedWriter(outputFile, Charset.forName("UTF-8"))) {
+            if (inputFile.toString().endsWith(".zip")) {
+                try (ZipFile zipFile = new ZipFile(inputFile.toFile())) {
+                    Enumeration<? extends ZipEntry> entries = zipFile.entries();
+                    while (entries.hasMoreElements()) {
+                        ZipEntry entry = entries.nextElement();
+                        try (final InputStream inputStream = new BOMInputStream(zipFile.getInputStream(entry))) {
+                            convert(inputStream, writer, columns, filters, remappings, separator,
+                                    trim, join, itemName);
+                        }
+                    }
+                }
+            } else {
+                try (final InputStream inputStream = Files.newInputStream(inputFile)) {
+                    convert(inputStream, writer, columns, filters, remappings, separator,
+                            trim, join, itemName);
+                }
+            }
         } catch (final IOException ex) {
             throw new RuntimeException("IO operation failed", ex);
         }

diff --git a/src/main/java/org/apache/commons/io/ByteOrderMark.java b/src/main/java/org/apache/commons/io/ByteOrderMark.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io;
+
+import java.io.Serializable;
+
+/**
+ * Byte Order Mark (BOM) representation - see {@link org.apache.commons.io.input.BOMInputStream}.
+ * 
+ * @see org.apache.commons.io.input.BOMInputStream
+ * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a>
+ * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings
+ *      (Non-Normative)</a>
+ * @version $Id: ByteOrderMark.java 1347571 2012-06-07 11:13:53Z sebb $
+ * @since 2.0
+ */
+public class ByteOrderMark implements Serializable {
+
+    private static final long serialVersionUID = 1L;
+
+    /** UTF-8 BOM */
+    public static final ByteOrderMark UTF_8    = new ByteOrderMark("UTF-8",    0xEF, 0xBB, 0xBF);
+
+    /** UTF-16BE BOM (Big-Endian) */
+    public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF);
+
+    /** UTF-16LE BOM (Little-Endian) */
+    public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE);
+
+    /** 
+     * UTF-32BE BOM (Big-Endian)
+     * @since 2.2 
+     */
+    public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
+
+    /** 
+     * UTF-32LE BOM (Little-Endian)
+     * @since 2.2 
+     */
+    public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00);
+
+    private final String charsetName;
+    private final int[] bytes;
+
+    /**
+     * Construct a new BOM.
+     *
+     * @param charsetName The name of the charset the BOM represents
+     * @param bytes The BOM's bytes
+     * @throws IllegalArgumentException if the charsetName is null or
+     * zero length
+     * @throws IllegalArgumentException if the bytes are null or zero
+     * length
+     */
+    public ByteOrderMark(String charsetName, int... bytes) {
+        if (charsetName == null || charsetName.length() == 0) {
+            throw new IllegalArgumentException("No charsetName specified");
+        }
+        if (bytes == null || bytes.length == 0) {
+            throw new IllegalArgumentException("No bytes specified");
+        }
+        this.charsetName = charsetName;
+        this.bytes = new int[bytes.length];
+        System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+    }
+
+    /**
+     * Return the name of the {@link java.nio.charset.Charset} the BOM represents.
+     *
+     * @return the character set name
+     */
+    public String getCharsetName() {
+        return charsetName;
+    }
+
+    /**
+     * Return the length of the BOM's bytes.
+     *
+     * @return the length of the BOM's bytes
+     */
+    public int length() {
+        return bytes.length;
+    }
+
+    /**
+     * The byte at the specified position.
+     *
+     * @param pos The position
+     * @return The specified byte
+     */
+    public int get(int pos) {
+        return bytes[pos];
+    }
+
+    /**
+     * Return a copy of the BOM's bytes.
+     *
+     * @return a copy of the BOM's bytes
+     */
+    public byte[] getBytes() {
+        byte[] copy = new byte[bytes.length];
+        for (int i = 0; i < bytes.length; i++) {
+            copy[i] = (byte)bytes[i];
+        }
+        return copy;
+    }
+
+    /**
+     * Indicates if this BOM's bytes equals another.
+     *
+     * @param obj The object to compare to
+     * @return true if the bom's bytes are equal, otherwise
+     * false
+     */
+    @Override
+    public boolean equals(Object obj) {
+        if (!(obj instanceof ByteOrderMark)) {
+            return false;
+        }
+        ByteOrderMark bom = (ByteOrderMark)obj;
+        if (bytes.length != bom.length()) {
+            return false;
+        }
+        for (int i = 0; i < bytes.length; i++) {
+            if (bytes[i] != bom.get(i)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Return the hashcode for this BOM.
+     *
+     * @return the hashcode for this BOM.
+     * @see java.lang.Object#hashCode()
+     */
+    @Override
+    public int hashCode() {
+        int hashCode = getClass().hashCode();
+        for (int b : bytes) {
+            hashCode += b;
+        }
+        return hashCode;
+    }
+
+    /**
+     * Provide a String representation of the BOM.
+     *
+     * @return the length of the BOM's bytes
+     */
+    @Override
+    public String toString() {
+        StringBuilder builder = new StringBuilder();
+        builder.append(getClass().getSimpleName());
+        builder.append('[');
+        builder.append(charsetName);
+        builder.append(": ");
+        for (int i = 0; i < bytes.length; i++) {
+            if (i > 0) {
+                builder.append(",");
+            }
+            builder.append("0x");
+            builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase());
+        }
+        builder.append(']');
+        return builder.toString();
+    }
+
+}