Catch formatter exception

skoulouzis · May 22, 2017 · cc5058b · cc5058b
1 parent 7afa64c
commit cc5058b
Show file tree

Hide file tree

Showing 2 changed files with 143 additions and 136 deletions.
diff --git a/...ication/src/main/java/eu/edisonproject/classification/prepare/controller/DataPrepare.java b/...ication/src/main/java/eu/edisonproject/classification/prepare/controller/DataPrepare.java
@@ -59,152 +59,152 @@
  */
 public class DataPrepare implements IDataPrepare {
 
-  private String inputFolder;
-  private String outputFolder;
-  private LinkedList<DocumentObject> documentObjectList;
-  private DocumentObject documentObject;
-  private String charArraySetPath;
+    private String inputFolder;
+    private String outputFolder;
+    private LinkedList<DocumentObject> documentObjectList;
+    private DocumentObject documentObject;
+    private String charArraySetPath;
 //    private CharArraySet stopWordArraySet;
 //    private ReaderFile fileReader;
 //    private static final int maxNumberOfAvroPerFile = 10;
-  private final StopWord cleanStopWord;
-  private final StanfordLemmatizer cleanLemmatisation;
-
-  public DataPrepare(String inputFolder, String outputFolder, String stopWordsPath) {
-    this.inputFolder = inputFolder;
-    this.outputFolder = outputFolder;
-    documentObjectList = new LinkedList<>();
-    CharArraySet stopWordArraySet = new CharArraySet(ConfigHelper.loadStopWords(stopWordsPath), true);
-    cleanStopWord = new StopWord(stopWordArraySet);
-    cleanLemmatisation = new StanfordLemmatizer();
-  }
-
-  @Override
-  public void execute() {
-    File file = new File(inputFolder);
-    Document davro;
-    DocumentAvroSerializer dAvroSerializer = null;
-    if (file.isDirectory()) {
-      File[] filesInDir = file.listFiles();
+    private final StopWord cleanStopWord;
+    private final StanfordLemmatizer cleanLemmatisation;
+
+    public DataPrepare(String inputFolder, String outputFolder, String stopWordsPath) {
+        this.inputFolder = inputFolder;
+        this.outputFolder = outputFolder;
+        documentObjectList = new LinkedList<>();
+        CharArraySet stopWordArraySet = new CharArraySet(ConfigHelper.loadStopWords(stopWordsPath), true);
+        cleanStopWord = new StopWord(stopWordArraySet);
+        cleanLemmatisation = new StanfordLemmatizer();
+    }
+
+    @Override
+    public void execute() {
+        File file = new File(inputFolder);
+        Document davro;
+        DocumentAvroSerializer dAvroSerializer = null;
+        if (file.isDirectory()) {
+            File[] filesInDir = file.listFiles();
 //            Arrays.sort(filesInDir);
 
 //            LocalDate date = getCreationDate(file);
-      for (File f : filesInDir) {
-        if (f.isFile() && FilenameUtils.getExtension(f.getName()).endsWith("txt")) {
-          LocalDate date = getCreationDate(f);
-          documentObject = new DocumentObject();
-          documentObject.setDate(date);
-          ReaderFile rf = new ReaderFile(f.getAbsolutePath());
-          String contents = rf.readFile();
-          cleanStopWord.setDescription(contents);
-          String cleanCont = cleanStopWord.execute().toLowerCase();
-          cleanLemmatisation.setDescription(cleanCont);
-          cleanCont = cleanLemmatisation.execute();
-          documentObject.setDescription(cleanCont);
-          documentObject.setDocumentId(FilenameUtils.removeExtension(f.getName()));
-          documentObject.setTitle(f.getParentFile().getName());
+            for (File f : filesInDir) {
+                if (f.isFile() && FilenameUtils.getExtension(f.getName()).endsWith("txt")) {
+                    LocalDate date = getCreationDate(f);
+                    documentObject = new DocumentObject();
+                    documentObject.setDate(date);
+                    ReaderFile rf = new ReaderFile(f.getAbsolutePath());
+                    String contents = rf.readFile();
+                    cleanStopWord.setDescription(contents);
+                    String cleanCont = cleanStopWord.execute().toLowerCase();
+                    cleanLemmatisation.setDescription(cleanCont);
+                    cleanCont = cleanLemmatisation.execute();
+                    documentObject.setDescription(cleanCont);
+                    documentObject.setDocumentId(FilenameUtils.removeExtension(f.getName()));
+                    documentObject.setTitle(f.getParentFile().getName());
 //                extract(this.getDocumentObject(), f.getPath());
 //                documentObject.setDescription(documentObject.getDescription().toLowerCase());
 //                clean(this.getDocumentObject().getDescription());
-          if (documentObject.getDescription().equals("")) {
-            continue;
-          }
-          documentObjectList.add(this.getDocumentObject());
-
-          davro = new Document();
-          davro.setDocumentId(documentObject.getDocumentId());
-          davro.setTitle(documentObject.getTitle());
-          davro.setDate(documentObject.getDate().toString());
-          davro.setDescription(documentObject.getDescription());
-
-          if (dAvroSerializer == null) {
-            dAvroSerializer = new DocumentAvroSerializer(outputFolder
-                    + File.separator + documentObject.getTitle().replaceAll(" ", "_")
-                    + date + ".avro", davro.getSchema());
-          }
-          Logger.getLogger(Text2Avro.class.getName()).log(Level.INFO, "Adding :{0} to: {1}{2}{3}{4}.avro", new Object[]{documentObject.getDocumentId(), outputFolder, File.separator, documentObject.getTitle().replaceAll(" ", "_"), date});
-          dAvroSerializer.serialize(davro);
+                    if (documentObject.getDescription().equals("")) {
+                        continue;
+                    }
+                    documentObjectList.add(this.getDocumentObject());
+
+                    davro = new Document();
+                    davro.setDocumentId(documentObject.getDocumentId());
+                    davro.setTitle(documentObject.getTitle());
+                    davro.setDate(documentObject.getDate().toString());
+                    davro.setDescription(documentObject.getDescription());
+
+                    if (dAvroSerializer == null) {
+                        dAvroSerializer = new DocumentAvroSerializer(outputFolder
+                                + File.separator + documentObject.getTitle().replaceAll(" ", "_")
+                                + date + ".avro", davro.getSchema());
+                    }
+                    Logger.getLogger(Text2Avro.class.getName()).log(Level.INFO, "Adding :{0} to: {1}{2}{3}{4}.avro", new Object[]{documentObject.getDocumentId(), outputFolder, File.separator, documentObject.getTitle().replaceAll(" ", "_"), date});
+                    dAvroSerializer.serialize(davro);
+                }
+
+            }
+
+            if (dAvroSerializer != null) {
+                dAvroSerializer.close();
+                dAvroSerializer = null;
+            }
         }
 
-      }
-
-      if (dAvroSerializer != null) {
-        dAvroSerializer.close();
-        dAvroSerializer = null;
-      }
     }
 
-  }
-
-  @Override
-  public void extract(DocumentObject jp, String filePath) {
-    Extractor extractorTitle = new Title();
-    extractorTitle.setJp(jp);
-    extractorTitle.setFilePath(filePath);
-    extractorTitle.readFromFile();
-    extractorTitle.extract();
-
-    Extractor extractorDate = new Date();
-    extractorDate.setJp(extractorTitle.getJp());
-    extractorDate.extract();
-
-    Extractor extractorText = new Text();
-    extractorText.setJp(extractorDate.getJp());
-    extractorText.extract();
-  }
+    @Override
+    public void extract(DocumentObject jp, String filePath) {
+        Extractor extractorTitle = new Title();
+        extractorTitle.setJp(jp);
+        extractorTitle.setFilePath(filePath);
+        extractorTitle.readFromFile();
+        extractorTitle.extract();
+
+        Extractor extractorDate = new Date();
+        extractorDate.setJp(extractorTitle.getJp());
+        extractorDate.extract();
+
+        Extractor extractorText = new Text();
+        extractorText.setJp(extractorDate.getJp());
+        extractorText.extract();
+    }
 
-  @Override
-  public void clean(String description) {
-    //System.out.println("DESCRIZIONE"+description);
+    @Override
+    public void clean(String description) {
+        //System.out.println("DESCRIZIONE"+description);
 //        Cleaner cleanStopWord = new StopWord(this.getStopWordArraySet());
-    cleanStopWord.setDescription(description);
-    documentObject.setDescription(cleanStopWord.execute());
-    //System.out.println(documentObject.getDescription());
-    Cleaner cleanStanfordLemmatizer = new StanfordLemmatizer();
-    cleanStanfordLemmatizer.setDescription(documentObject.getDescription());
-    documentObject.setDescription(cleanStanfordLemmatizer.execute());
+        cleanStopWord.setDescription(description);
+        documentObject.setDescription(cleanStopWord.execute());
+        //System.out.println(documentObject.getDescription());
+        Cleaner cleanStanfordLemmatizer = new StanfordLemmatizer();
+        cleanStanfordLemmatizer.setDescription(documentObject.getDescription());
+        documentObject.setDescription(cleanStanfordLemmatizer.execute());
 
-  }
+    }
 
-  public String getInputFolder() {
-    return inputFolder;
-  }
+    public String getInputFolder() {
+        return inputFolder;
+    }
 
-  public void setInputFolder(String inputFolder) {
-    this.inputFolder = inputFolder;
-  }
+    public void setInputFolder(String inputFolder) {
+        this.inputFolder = inputFolder;
+    }
 
-  public String getOutputFolder() {
-    return outputFolder;
-  }
+    public String getOutputFolder() {
+        return outputFolder;
+    }
 
-  public void setOutputFolder(String outputFolder) {
-    this.outputFolder = outputFolder;
-  }
+    public void setOutputFolder(String outputFolder) {
+        this.outputFolder = outputFolder;
+    }
 
-  public LinkedList<DocumentObject> getJobPostList() {
-    return documentObjectList;
-  }
+    public LinkedList<DocumentObject> getJobPostList() {
+        return documentObjectList;
+    }
 
-  public void setDocumentObjectList(LinkedList<DocumentObject> jdocumentObjectList) {
-    this.documentObjectList = jdocumentObjectList;
-  }
+    public void setDocumentObjectList(LinkedList<DocumentObject> jdocumentObjectList) {
+        this.documentObjectList = jdocumentObjectList;
+    }
 
-  public DocumentObject getDocumentObject() {
-    return documentObject;
-  }
+    public DocumentObject getDocumentObject() {
+        return documentObject;
+    }
 
-  public void setJDocumentObject(DocumentObject jobPost) {
-    this.documentObject = jobPost;
-  }
+    public void setJDocumentObject(DocumentObject jobPost) {
+        this.documentObject = jobPost;
+    }
 
-  public String getCharArraySetPath() {
-    return charArraySetPath;
-  }
+    public String getCharArraySetPath() {
+        return charArraySetPath;
+    }
 
-  public void setCharArraySetPath(String charArraySetPath) {
-    this.charArraySetPath = charArraySetPath;
-  }
+    public void setCharArraySetPath(String charArraySetPath) {
+        this.charArraySetPath = charArraySetPath;
+    }
 
 //    public CharArraySet getStopWordArraySet() {
 //        return stopWordArraySet;
@@ -213,23 +213,29 @@ public void setCharArraySetPath(String charArraySetPath) {
 //    public void setStopWordArraySet(CharArraySet charArraySet) {
 //        this.stopWordArraySet = charArraySet;
 //    }
-  private LocalDate getCreationDate(File file) {
-    Path p = Paths.get(file.getAbsolutePath());
-    BasicFileAttributes attr = null;
-    try {
-      attr = Files.readAttributes(p, BasicFileAttributes.class);
-    } catch (IOException ex) {
-      Logger.getLogger(Text2Avro.class.getName()).log(Level.SEVERE, null, ex);
-    }
-    FileTime ct = attr.creationTime();
-    DateTimeFormatter formatter;
+    private LocalDate getCreationDate(File file) {
+        Path p = Paths.get(file.getAbsolutePath());
+        BasicFileAttributes attr = null;
+        try {
+            attr = Files.readAttributes(p, BasicFileAttributes.class);
+        } catch (IOException ex) {
+            Logger.getLogger(Text2Avro.class.getName()).log(Level.SEVERE, null, ex);
+        }
+        FileTime ct = attr.creationTime();
+        DateTimeFormatter formatter;
 
-//        formatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS'Z'");
+//        
 //        LocalDate.parse("2016-09-18T11:40:03.750522Z", formatter);
-    formatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss'Z'");
-    LocalDate date = LocalDate.parse(ct.toString(), formatter);
-    Logger.getLogger(Text2Avro.class.getName()).log(Level.INFO, "CreationDate: {0}", date);
-    return date;
-  }
+        formatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss'Z'");
+        LocalDate date = null;
+        try {
+            date = LocalDate.parse(ct.toString(), formatter);
+        } catch (java.lang.IllegalArgumentException ex) {
+            formatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS'Z'");
+            date = LocalDate.parse(ct.toString(), formatter);
+        }
+        Logger.getLogger(Text2Avro.class.getName()).log(Level.INFO, "CreationDate: {0}", date);
+        return date;
+    }
 
 }
diff --git a/rest/nb-configuration.xml b/rest/nb-configuration.xml
@@ -14,5 +14,6 @@ That way multiple projects can share the same settings (useful for formatting ru
 Any value defined here will override the pom.xml file value but is only applicable to the current project.
 -->
         <netbeans.hint.jdkPlatform>JDK_1.7</netbeans.hint.jdkPlatform>
+        <org-netbeans-modules-whitelist.whitelist-oracle>false</org-netbeans-modules-whitelist.whitelist-oracle>
     </properties>
 </project-shared-configuration>