Skip to content

Commit

Permalink
Merge github pull apache#110 for TIKA-1893
Browse files Browse the repository at this point in the history
  • Loading branch information
Gagravarr committed May 2, 2016
2 parents 9f09a55 + 0cdf17d commit 585ab9b
Show file tree
Hide file tree
Showing 8 changed files with 377 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4705,7 +4705,14 @@
<mime-type type="image/g3fax">
<glob pattern="*.g3"/>
</mime-type>

<mime-type type="image/icns">
<_comment>Apple Icon Image Format</_comment>
<magic priority="50">
<match value="icns" type="string" offset="0">
</match>
</magic>
<glob pattern="*.icns"/>
</mime-type>
<mime-type type="image/gif">
<acronym>GIF</acronym>
<_comment>Graphics Interchange Format</_comment>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Copyright 2016 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.image;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Set;
import org.apache.poi.util.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.AbstractParser;
import static org.apache.tika.parser.image.ICNSType.findIconType;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/**
*
* @author Manisha Kampasi
*/
//Create a basic parser class to parse ICNS files
public class ICNSParser extends AbstractParser {

//private static final long serialVersionUID = 261736541253892772L;
private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.image("icns"));
public static final String ICNS_MIME_TYPE = "image/icns";

public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}

public void parse(
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
byte[] header = new byte[4];
IOUtils.readFully(stream, header, 0, 4); // Extract magic byte
if (header[0] == (byte) 'i' && header[1] == (byte) 'c'
&& header[2] == (byte) 'n' && header[3] == (byte) 's') {
// Good, signature found
} else {
throw new TikaException("ICNS magic signature invalid");
}
IOUtils.readFully(stream, header, 0, 4); //Extract image size/length of bytes in file
int image_length = java.nio.ByteBuffer.wrap(header).getInt();
byte[] full_file = new byte[image_length];
IOUtils.readFully(stream, full_file);
ArrayList<ICNSType> icons = new ArrayList<>();
ArrayList<ICNSType> icon_masks = new ArrayList<>();
byte[] tempByteArray = new byte[4];
for (int offset = 0; offset < image_length - 8;) {
//Read the ResType/OSTYpe identifier for sub-icon
tempByteArray[0] = full_file[offset];
tempByteArray[1] = full_file[offset + 1];
tempByteArray[2] = full_file[offset + 2];
tempByteArray[3] = full_file[offset + 3];
ICNSType icnstype = findIconType(tempByteArray);

if (icnstype == null) {
//exit out of loop
//No more icons left
offset = image_length - 8;
} else if (icnstype.hasMask() == true) {
icon_masks.add(findIconType(tempByteArray));
} else {
icons.add(findIconType(tempByteArray));

}
//Read the sub-icon length
tempByteArray[0] = full_file[offset + 4];
tempByteArray[1] = full_file[offset + 5];
tempByteArray[2] = full_file[offset + 6];
tempByteArray[3] = full_file[offset + 7];
int icon_length = java.nio.ByteBuffer.wrap(tempByteArray).getInt();
offset = offset + icon_length;
}
String icon_details = "", iconmask_details = "", bitsPerPixel,dimensions;
for (ICNSType icon : icons) {
bitsPerPixel = (icon.getBitsPerPixel()!=0)?icon.getBitsPerPixel() + " bpp":"JPEG 2000 or PNG format";
dimensions = (!icon.hasRetinaDisplay())?(icon.getHeight() + "x" + icon.getWidth()):(icon.getHeight() + "x" + icon.getWidth() + "@2X");
icon_details = icon_details + ", " + dimensions + " (" + bitsPerPixel + ")";
}
for (ICNSType icon : icon_masks) {
iconmask_details = iconmask_details + ", " + icon.getHeight() + "x" + icon.getWidth() + " (" + icon.getBitsPerPixel() + " bpp" + ")";
}

metadata.set(Metadata.CONTENT_TYPE, ICNS_MIME_TYPE);
if (!icon_details.equals("")) {
metadata.set("Icon count", String.valueOf(icons.size()));
icon_details = icon_details.substring(2);
metadata.set("Icon details", icon_details);
}
if (!iconmask_details.equals("")) {
metadata.set("Masked icon count", String.valueOf(icon_masks.size()));
iconmask_details = iconmask_details.substring(2);
metadata.set("Masked icon details", iconmask_details);
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.endDocument();
}

}
173 changes: 173 additions & 0 deletions tika-parsers/src/main/java/org/apache/tika/parser/image/ICNSType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
/*
* Copyright 2016 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.image;

import java.io.UnsupportedEncodingException;

/**
*
* @author Manisha Kampasi
*/
public class ICNSType {

private final int type;
private final int width;
private final int height;
private final int bitsPerPixel;
private final boolean hasMask;
private final boolean hasRetinaDisplay;

public int getType() {
return type;
}

public int getWidth() {
return width;
}

public int getHeight() {
return height;
}

public int getBitsPerPixel() {
return bitsPerPixel;
}

public boolean hasMask() {
return hasMask;
}

public boolean hasRetinaDisplay() {
return hasRetinaDisplay;
}

public static int converttoInt(byte[] bytes) {
if (bytes.length != 4) {
throw new IllegalArgumentException("Cannot convert to integer");
}
return ((0xff & bytes[0]) << 24)
| ((0xff & bytes[1]) << 16)
| ((0xff & bytes[2]) << 8)
| (0xff & bytes[3]);
}

private ICNSType(String type, int width, int height, int bitsPerPixel, boolean hasMask, boolean hasRetinaDisplay) {
byte[] bytes = null;
try {
bytes = type.getBytes("US-ASCII");
} catch (UnsupportedEncodingException cannotHappen) {
}
this.type = converttoInt(bytes);
this.width = width;
this.height = height;
this.bitsPerPixel = bitsPerPixel;
this.hasMask = hasMask;
this.hasRetinaDisplay = hasRetinaDisplay;

}
public static final ICNSType ICNS_32x32_1BIT_IMAGE
= new ICNSType("ICON", 32, 32, 1, false, false);
public static final ICNSType ICNS_16x12_1BIT_IMAGE_AND_MASK
= new ICNSType("icm#", 16, 12, 1, true, false);
public static final ICNSType ICNS_16x12_4BIT_IMAGE
= new ICNSType("icm4", 16, 12, 4, false, false);
public static final ICNSType ICNS_16x12_8BIT_IMAGE
= new ICNSType("icm8", 16, 12, 8, false, false);

public static final ICNSType ICNS_16x16_8BIT_MASK
= new ICNSType("s8mk", 16, 16, 8, true, false);
public static final ICNSType ICNS_16x16_1BIT_IMAGE_AND_MASK
= new ICNSType("ics#", 16, 16, 1, true, false);
public static final ICNSType ICNS_16x16_4BIT_IMAGE
= new ICNSType("ics4", 16, 16, 4, false, false);
public static final ICNSType ICNS_16x16_8BIT_IMAGE
= new ICNSType("ics8", 16, 16, 8, false, false);
public static final ICNSType ICNS_16x16_24BIT_IMAGE
= new ICNSType("is32", 16, 16, 24, false, false);

public static final ICNSType ICNS_32x32_8BIT_MASK
= new ICNSType("l8mk", 32, 32, 8, true, false);
public static final ICNSType ICNS_32x32_1BIT_IMAGE_AND_MASK
= new ICNSType("ICN#", 32, 32, 1, true, false);
public static final ICNSType ICNS_32x32_4BIT_IMAGE
= new ICNSType("icl4", 32, 32, 4, false, false);
public static final ICNSType ICNS_32x32_8BIT_IMAGE
= new ICNSType("icl8", 32, 32, 8, false, false);
public static final ICNSType ICNS_32x32_24BIT_IMAGE
= new ICNSType("il32", 32, 32, 24, false, false);

public static final ICNSType ICNS_48x48_8BIT_MASK
= new ICNSType("h8mk", 48, 48, 8, true, false);
public static final ICNSType ICNS_48x48_1BIT_IMAGE_AND_MASK
= new ICNSType("ich#", 48, 48, 1, true, false);
public static final ICNSType ICNS_48x48_4BIT_IMAGE
= new ICNSType("ich4", 48, 48, 4, false, false);
public static final ICNSType ICNS_48x48_8BIT_IMAGE
= new ICNSType("ich8", 48, 48, 8, false, false);
public static final ICNSType ICNS_48x48_24BIT_IMAGE
= new ICNSType("ih32", 48, 48, 24, false, false);
public static final ICNSType ICNS_128x128_8BIT_MASK
= new ICNSType("t8mk", 128, 128, 8, true, false);
public static final ICNSType ICNS_128x128_24BIT_IMAGE
= new ICNSType("it32", 128, 128, 24, false, false);

public static final ICNSType ICNS_16x16_JPEG_PNG_IMAGE
= new ICNSType("icp4", 16, 16, 0, false, false);
public static final ICNSType ICNS_32x32_JPEG_PNG_IMAGE
= new ICNSType("icp5", 32, 32, 0, false, false);
public static final ICNSType ICNS_64x64_JPEG_PNG_IMAGE
= new ICNSType("icp6", 64, 64, 0, false, false);
public static final ICNSType ICNS_128x128_JPEG_PNG_IMAGE
= new ICNSType("icp7", 128, 128, 0, false, false);
public static final ICNSType ICNS_256x256_JPEG_PNG_IMAGE
= new ICNSType("ic08", 256, 256, 0, false, false);
public static final ICNSType ICNS_512x512_JPEG_PNG_IMAGE
= new ICNSType("ic09", 512, 512, 0, false, false);
public static final ICNSType ICNS_1024x1024_2X_JPEG_PNG_IMAGE
= new ICNSType("ic10", 1024, 1024, 0, false, true);
public static final ICNSType ICNS_16x16_2X_JPEG_PNG_IMAGE
= new ICNSType("ic11", 16, 16, 0, false, true);
public static final ICNSType ICNS_32x32_2X_JPEG_PNG_IMAGE
= new ICNSType("ic12", 32, 32, 0, false, true);
public static final ICNSType ICNS_128x128_2X_JPEG_PNG_IMAGE
= new ICNSType("ic13", 128, 128, 0, false, true);
public static final ICNSType ICNS_256x256_2X_JPEG_PNG_IMAGE
= new ICNSType("ic14", 256, 256, 0, false, true);

private static final ICNSType[] allImageTypes
= {
ICNS_32x32_1BIT_IMAGE, ICNS_16x12_1BIT_IMAGE_AND_MASK, ICNS_16x12_4BIT_IMAGE, ICNS_16x12_8BIT_IMAGE,
ICNS_16x16_1BIT_IMAGE_AND_MASK, ICNS_16x16_4BIT_IMAGE, ICNS_16x16_8BIT_IMAGE, ICNS_16x16_24BIT_IMAGE,
ICNS_32x32_1BIT_IMAGE_AND_MASK, ICNS_32x32_4BIT_IMAGE, ICNS_32x32_8BIT_IMAGE, ICNS_32x32_24BIT_IMAGE,
ICNS_48x48_1BIT_IMAGE_AND_MASK, ICNS_48x48_4BIT_IMAGE, ICNS_48x48_8BIT_IMAGE, ICNS_48x48_24BIT_IMAGE,
ICNS_128x128_24BIT_IMAGE, ICNS_16x16_8BIT_MASK,
ICNS_32x32_8BIT_MASK, ICNS_48x48_8BIT_MASK, ICNS_128x128_8BIT_MASK,
ICNS_16x16_JPEG_PNG_IMAGE, ICNS_32x32_JPEG_PNG_IMAGE, ICNS_64x64_JPEG_PNG_IMAGE, ICNS_128x128_JPEG_PNG_IMAGE, ICNS_256x256_JPEG_PNG_IMAGE,
ICNS_512x512_JPEG_PNG_IMAGE, ICNS_1024x1024_2X_JPEG_PNG_IMAGE, ICNS_16x16_2X_JPEG_PNG_IMAGE, ICNS_32x32_2X_JPEG_PNG_IMAGE,
ICNS_128x128_2X_JPEG_PNG_IMAGE, ICNS_256x256_2X_JPEG_PNG_IMAGE
};

public static ICNSType findIconType(byte[] bytes) {
int type = converttoInt(bytes);
for (ICNSType allImageType : allImageTypes) {
if (allImageType.getType() == type) {
return allImageType;
}
}
return null;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,5 @@ org.apache.tika.parser.isatab.ISArchiveParser
org.apache.tika.parser.geoinfo.GeographicInformationParser
org.apache.tika.parser.geo.topic.GeoParser
org.apache.tika.parser.external.CompositeExternalParser
org.apache.tika.parser.journal.JournalParser
org.apache.tika.parser.journal.JournalParser
org.apache.tika.parser.image.ICNSParser
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,15 @@ public void testBpgDetection() throws Exception {
assertTypeByName("image/x-bpg", "x.bpg");
}

@Test
public void testIcnsDetection() throws Exception {
assertType("image/icns", "testICNS.icns");
assertTypeByData("image/icns", "testICNS_basic.icns");
assertTypeByData("image/icns", "testICNS.icns");
assertTypeByName("image/icns", "testICNS.icns");
}


@Test
public void testTiffDetection() throws Exception {
assertType("image/tiff", "testTIFF.tif");
Expand Down
Loading

0 comments on commit 585ab9b

Please sign in to comment.