Skip to content

Commit

Permalink
create ShapeFileHandler from file to avoid Zip/File-InputStream
Browse files Browse the repository at this point in the history
  • Loading branch information
jo-pol committed Sep 3, 2024
1 parent db5150d commit 86f6798
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 168 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -100,71 +100,25 @@ public IngestServiceShapefileHelper(File zippedShapefile, File rezipFolder){
//this.processFile(zippedShapefile, rezipFolder);

}

private FileInputStream getFileInputStream(File fileObject){
if (fileObject==null){
return null;
}
try {
return new FileInputStream(fileObject);
} catch (FileNotFoundException ex) {
logger.severe("Failed to create FileInputStream from File: " + fileObject.getAbsolutePath());
return null;
}
}

private void closeFileInputStream(FileInputStream fis){
if (fis==null){
return;
}
try {
fis.close();
} catch (IOException ex) {
logger.info("Failed to close FileInputStream");
}
}


public boolean processFile() {

if ((!isValidFile(this.zippedShapefile))||(!isValidFolder(this.rezipFolder))){
return false;
}

// (1) Use the ShapefileHandler to the .zip for a shapefile
//
FileInputStream shpfileInputStream = this.getFileInputStream(zippedShapefile);
if (shpfileInputStream==null){
return false;
}

this.shpHandler = new ShapefileHandler(shpfileInputStream);
if (!shpHandler.containsShapefile()){
logger.severe("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here");
return false;
}

this.closeFileInputStream(shpfileInputStream);

// (2) Rezip the shapefile pieces
logger.info("rezipFolder: " + rezipFolder.getAbsolutePath());
shpfileInputStream = this.getFileInputStream(zippedShapefile);
if (shpfileInputStream==null){
return false;
}

boolean rezipSuccess;
try {
rezipSuccess = shpHandler.rezipShapefileSets(shpfileInputStream, rezipFolder);
this.shpHandler = new ShapefileHandler(zippedShapefile);
if (!shpHandler.containsShapefile()){
logger.severe("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here");
return false;
}
logger.info("rezipFolder: " + rezipFolder.getAbsolutePath());
return shpHandler.rezipShapefileSets(rezipFolder);
} catch (IOException ex) {
logger.severe("Shapefile was not correctly unpacked/repacked");
logger.severe("shpHandler message: " + shpHandler.errorMessage);
return false;
}

this.closeFileInputStream(shpfileInputStream);

return rezipSuccess;

// return createDataFiles(rezipFolder);

}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ public static String determineFileType(File f, String fileName) throws IOExcepti
// Check for shapefile extensions as described here: http://en.wikipedia.org/wiki/Shapefile
//logger.info("Checking for shapefile");

ShapefileHandler shp_handler = new ShapefileHandler(new FileInputStream(f));
ShapefileHandler shp_handler = new ShapefileHandler(f);
if (shp_handler.containsShapefile()){
// logger.info("------- shapefile FOUND ----------");
fileType = ShapefileHandler.SHAPEFILE_FILE_TYPE; //"application/zipped-shapefile";
Expand Down
144 changes: 42 additions & 102 deletions src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
package edu.harvard.iq.dataverse.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.text.MessageFormat;
import java.util.Date;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;
import java.util.HashMap;
import java.util.*;

import java.nio.file.Files;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
import java.util.logging.Level;
import static java.text.MessageFormat.format;

import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;

Expand All @@ -42,11 +42,10 @@
* "shape1.pdf", "README.md", "shape_notes.txt"
*
* Code Example:
* FileInputStream shp_file_input_stream = new FileInputStream(new File("zipped_shapefile.zip"))
* ShapefileHandler shp_handler = new ShapefileHandler(shp_file_input_stream);
* ShapefileHandler shp_handler = new ShapefileHandler(new File("zipped_shapefile.zip"));
* if (shp_handler.containsShapefile()){
* File rezip_folder = new File("~/folder_for_rezipping");
* boolean rezip_success = shp_handler.rezipShapefileSets(shp_file_input_stream, rezip_folder );
* boolean rezip_success = shp_handler.rezipShapefileSets(rezip_folder );
* if (!rezip_success){
* // rezip failed, should be an error message (String) available
System.out.println(shp_handler.error_message);
Expand All @@ -73,7 +72,8 @@ public class ShapefileHandler{
public final static String SHP_XML_EXTENSION = "shp.xml";
public final static String BLANK_EXTENSION = "__PLACEHOLDER-FOR-BLANK-EXTENSION__";
public final static List<String> SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", "qpj", "qmd", SHP_XML_EXTENSION);

private final File zipFile;

public boolean DEBUG = false;

private boolean zipFileProcessed = false;
Expand All @@ -97,9 +97,6 @@ public class ShapefileHandler{

private List<File> finalRezippedFiles = new ArrayList<>();

private String outputFolder = "unzipped";
private String rezippedFolder = "rezipped";

// Debug helper
private void msg(String s){
//logger.info(s);
Expand All @@ -115,40 +112,28 @@ private void msgt(String s){
}

/*
Constructor, start with filename
*/
public ShapefileHandler(String filename){

if (filename==null){
this.addErrorMessage("The filename was null");
return;
}

FileInputStream zip_file_stream;
try {
zip_file_stream = new FileInputStream(new File(filename));
} catch (FileNotFoundException ex) {
this.addErrorMessage("The file was not found");
Constructor, start with File
*/
public ShapefileHandler(File zip_file) throws IOException {
zipFile = zip_file;
if (zip_file == null) {
this.addErrorMessage("The file was null");
return;
}

this.examineZipfile(zip_file_stream);

}


/*
Constructor, start with FileInputStream
*/
public ShapefileHandler(FileInputStream zip_file_stream){

if (zip_file_stream==null){
this.addErrorMessage("The zip_file_stream was null");
return;
try (var zip_file_object = new ZipFile(zip_file)) {
this.examineZipfile(zip_file_object);
}
catch (FileNotFoundException ex) {
// While this constructor had a FileInputStream as argument:
// FileUtil.determineFileType threw this exception before calling the constructor with a FileInputStream
// IngestServiceShapefileHelper.processFile won´t call this constructor if the file is not valid hence does not exist.
// When the file would have disappeared in the meantime, it would have produced a slightly different error message.
logger.severe("File not found: " + zip_file.getAbsolutePath());
throw ex;
}
this.examineZipfile(zip_file_stream);
}

public List<File> getFinalRezippedFiles(){
return this.finalRezippedFiles;
}
Expand Down Expand Up @@ -290,26 +275,19 @@ inside the uploaded zip file (issue #6873). To achieve this, we recreate
subfolders in the FileMetadata of the newly created DataFiles.
(-- L.A. 09/2020)
*/
private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File target_directory){
private boolean unzipFilesToDirectory(ZipFile zipfileInput, File target_directory){
logger.fine("unzipFilesToDirectory: " + target_directory.getAbsolutePath() );

if (zipfile_input_stream== null){
this.addErrorMessage("unzipFilesToDirectory. The zipfile_input_stream is null.");
return false;
}
if (!target_directory.isDirectory()){
this.addErrorMessage("This directory does not exist: " + target_directory.getAbsolutePath());
return false;
}

List<String> unzippedFileNames = new ArrayList<>();
List<String> unzippedFileNames = new ArrayList<>();

ZipInputStream zipStream = new ZipInputStream(zipfile_input_stream);

ZipEntry origEntry;
byte[] buffer = new byte[2048];
try {
while((origEntry = zipStream.getNextEntry())!=null){
for(var origEntry : Collections.list(zipfileInput.entries())){

String zentryFileName = origEntry.getName();
logger.fine("\nOriginal entry name: " + origEntry);
Expand Down Expand Up @@ -359,15 +337,9 @@ private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File
unzippedFileNames.add(outpath);
}
logger.fine("Write zip file: " + outpath);
FileOutputStream fileOutputStream;
long fsize = 0;
fileOutputStream = new FileOutputStream(outpath);
int len;// = 0;
while ((len = zipStream.read(buffer)) > 0){
fileOutputStream.write(buffer, 0, len);
fsize+=len;
} // end while
fileOutputStream.close();
try(var inputStream = zipfileInput.getInputStream(origEntry)) {
Files.copy(inputStream, Path.of(outpath), StandardCopyOption.REPLACE_EXISTING);
}
} // end outer while
} catch (IOException ex) {
for (StackTraceElement el : ex.getStackTrace()){
Expand All @@ -376,19 +348,13 @@ private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File
this.addErrorMessage("Failed to open ZipInputStream entry" + ex.getMessage());
return false;
}

try {
zipStream.close();
} catch (IOException ex) {
Logger.getLogger(ShapefileHandler.class.getName()).log(Level.SEVERE, null, ex);
}
return true;
return true;
}
/*
Rezip the shapefile(s) into a given directory
Assumes that the zipfile_input_stream has already been checked!
*/
public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rezippedFolder) throws IOException{
public boolean rezipShapefileSets(File rezippedFolder) throws IOException{
logger.fine("rezipShapefileSets");
//msgt("rezipShapefileSets");
if (!this.zipFileProcessed){
Expand All @@ -399,10 +365,6 @@ public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rez
this.addErrorMessage("There are no shapefiles here!");
return false;
}
if (zipfile_input_stream== null){
this.addErrorMessage("The zipfile_input_stream is null.");
return false;
}
if (rezippedFolder == null){
this.addErrorMessage("The rezippedFolder is null.");
return false;
Expand Down Expand Up @@ -432,9 +394,11 @@ public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rez


// Unzip files!
if (!this.unzipFilesToDirectory(zipfile_input_stream, dir_for_unzipping)){
this.addErrorMessage("Failed to unzip files.");
return false;
try(var zipfileObject = new ZipFile(zipFile)) {
if (!this.unzipFilesToDirectory(zipfileObject, dir_for_unzipping)) {
this.addErrorMessage("Failed to unzip files.");
return false;
}
}
// Redistribute files!
String target_dirname = rezippedFolder.getAbsolutePath();
Expand Down Expand Up @@ -680,27 +644,17 @@ private boolean isFileToSkip(String fname){
/**************************************
* Iterate through the zip file contents.
* Does it contain any shapefiles?
*
* @param FileInputStream zip_file_stream
*/
private boolean examineZipfile(FileInputStream zip_file_stream){
private boolean examineZipfile(ZipFile zip_file){
// msgt("examineZipfile");

if (zip_file_stream==null){
this.addErrorMessage("The zip file stream was null");
return false;
}

// Clear out file lists
this.filesListInDir.clear();
this.filesizeHash.clear();
this.fileGroups.clear();

try{
ZipInputStream zipStream = new ZipInputStream(zip_file_stream);
ZipEntry entry;

while((entry = zipStream.getNextEntry())!=null){
for(var entry : Collections.list(zip_file.entries())){

String zentryFileName = entry.getName();
//msg("zip entry: " + entry.getName());
Expand Down Expand Up @@ -738,8 +692,6 @@ private boolean examineZipfile(FileInputStream zip_file_stream){
this.filesizeHash.put(unzipFilePath, entry.getSize());
}
} // end while

zipStream.close();

if (this.filesListInDir.isEmpty()){
errorMessage = "No files in zipStream";
Expand All @@ -749,23 +701,11 @@ private boolean examineZipfile(FileInputStream zip_file_stream){
this.zipFileProcessed = true;
return true;

}catch(ZipException ex){
this.addErrorMessage("ZipException");
msgt("ZipException");
return false;

}catch(IOException ex){
//ex.printStackTrace();
this.addErrorMessage("IOException File name");
msgt("IOException");
return false;
}catch(IllegalArgumentException ex){
this.addErrorMessage("IllegalArgumentException when parsing zipfile");
msgt("IllegalArgumentException when parsing zipfile");
return false;

}finally{

}

} // end examineFile
Expand Down
Loading

0 comments on commit 86f6798

Please sign in to comment.