From 3f6ee6137cca9dab4f000625f5f23c9063b5e919 Mon Sep 17 00:00:00 2001 From: j-dimension Date: Sun, 28 Jan 2024 23:39:07 +0100 Subject: [PATCH] OCR for PDFs in scan inbox. issue #2294 --- .../jlawyer/client/JKanzleiGUI.java | 6 +- .../jlawyer/client/desktop/DesktopPanel.java | 6 +- .../editors/documents/EditScanPanel.form | 13 + .../editors/documents/EditScanPanel.java | 72 +- .../documents/ScannerDocumentsTimerTask.java | 33 +- .../editors/documents/ScannerPanel.java | 60 +- .../client/events/ScannerStatusEvent.java | 20 +- .../client/modulebar/ModuleButton.java | 2 +- .../baseline_font_download_green_48dp.png | Bin 0 -> 648 bytes .../baseline_font_download_off_black_48dp.png | Bin 0 -> 749 bytes .../baseline_font_download_off_red_48dp.png | Bin 0 -> 760 bytes .../baseline_font_download_red_48dp.png | Bin 0 -> 647 bytes .../baseline_font_download_yellow_48dp.png | Bin 0 -> 608 bytes .../services/IntegrationServiceRemote.java | 10 +- .../services/SingletonServiceRemote.java | 7 +- .../jdimension/jlawyer/pojo/FileMetadata.java | 765 +++++++++++++++++ .../jlawyer/services/IntegrationService.java | 232 ++++-- .../jlawyer/services/SingletonService.java | 172 ++-- .../services/SingletonServiceLocal.java | 7 +- .../jlawyer/async/SearchIndexProcessor.java | 74 ++ .../org/jlawyer/utils/ocr/OcrRequest.java | 695 ++++++++++++++++ .../java/org/jlawyer/utils/ocr/OcrUtils.java | 782 ++++++++++++++++++ .../jlawyer/timer/TransientTimer.java | 2 +- 23 files changed, 2770 insertions(+), 188 deletions(-) create mode 100644 j-lawyer-client/src/icons16/material/baseline_font_download_green_48dp.png create mode 100644 j-lawyer-client/src/icons16/material/baseline_font_download_off_black_48dp.png create mode 100644 j-lawyer-client/src/icons16/material/baseline_font_download_off_red_48dp.png create mode 100644 j-lawyer-client/src/icons16/material/baseline_font_download_red_48dp.png create mode 100644 j-lawyer-client/src/icons16/material/baseline_font_download_yellow_48dp.png create mode 100644 j-lawyer-server-entities/src/java/com/jdimension/jlawyer/pojo/FileMetadata.java create mode 100644 j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/utils/ocr/OcrRequest.java create mode 100644 j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/utils/ocr/OcrUtils.java diff --git a/j-lawyer-client/src/com/jdimension/jlawyer/client/JKanzleiGUI.java b/j-lawyer-client/src/com/jdimension/jlawyer/client/JKanzleiGUI.java index 6f9677c81..32f3c2763 100755 --- a/j-lawyer-client/src/com/jdimension/jlawyer/client/JKanzleiGUI.java +++ b/j-lawyer-client/src/com/jdimension/jlawyer/client/JKanzleiGUI.java @@ -1022,9 +1022,9 @@ public void onEvent(Event e) { } } else if (e instanceof ScannerStatusEvent) { this.lblScanStatus.setIcon(new javax.swing.ImageIcon(getClass().getResource("/icons/scanner.png"))); - this.lblScanStatus.setText("" + ((ScannerStatusEvent) e).getFileNames().size()); - this.lblScanStatus.setToolTipText(((ScannerStatusEvent) e).getFileNames().size() + " " + java.util.ResourceBundle.getBundle("com/jdimension/jlawyer/client/editors/EditorsRegistry").getString("status.scansfound")); - if (((ScannerStatusEvent) e).getFileNames().size() > 0) { + this.lblScanStatus.setText("" + ((ScannerStatusEvent) e).getFileMetadata().size()); + this.lblScanStatus.setToolTipText(((ScannerStatusEvent) e).getFileMetadata().size() + " " + java.util.ResourceBundle.getBundle("com/jdimension/jlawyer/client/editors/EditorsRegistry").getString("status.scansfound")); + if (((ScannerStatusEvent) e).getFileMetadata().size() > 0) { this.lblScanStatus.setEnabled(true); } else { this.lblScanStatus.setEnabled(false); diff --git a/j-lawyer-client/src/com/jdimension/jlawyer/client/desktop/DesktopPanel.java b/j-lawyer-client/src/com/jdimension/jlawyer/client/desktop/DesktopPanel.java index 4dbbec4a2..3a80b4d2c 100644 --- a/j-lawyer-client/src/com/jdimension/jlawyer/client/desktop/DesktopPanel.java +++ b/j-lawyer-client/src/com/jdimension/jlawyer/client/desktop/DesktopPanel.java @@ -1709,9 +1709,9 @@ public void onEvent(Event e) { this.revalidate(); this.repaint(); } else if (e instanceof ScannerStatusEvent) { - this.lblScans.setText("" + ((ScannerStatusEvent) e).getFileNames().size()); - this.lblScans.setToolTipText(((ScannerStatusEvent) e).getFileNames().size() + " " + java.util.ResourceBundle.getBundle("com/jdimension/jlawyer/client/editors/EditorsRegistry").getString("status.scansfound")); - if(!((ScannerStatusEvent) e).getFileNames().isEmpty()) + this.lblScans.setText("" + ((ScannerStatusEvent) e).getFileMetadata().size()); + this.lblScans.setToolTipText(((ScannerStatusEvent) e).getFileMetadata().size() + " " + java.util.ResourceBundle.getBundle("com/jdimension/jlawyer/client/editors/EditorsRegistry").getString("status.scansfound")); + if(!((ScannerStatusEvent) e).getFileMetadata().isEmpty()) this.lblScans.setEnabled(true); else this.lblScans.setEnabled(false); diff --git a/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/EditScanPanel.form b/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/EditScanPanel.form index 6b3941fe7..1a5d043a0 100644 --- a/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/EditScanPanel.form +++ b/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/EditScanPanel.form @@ -26,6 +26,8 @@ + + @@ -42,6 +44,7 @@ + @@ -96,5 +99,15 @@ + + + + + + + + + + diff --git a/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/EditScanPanel.java b/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/EditScanPanel.java index 397ccbaeb..ecd2d71db 100644 --- a/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/EditScanPanel.java +++ b/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/EditScanPanel.java @@ -664,8 +664,14 @@ package com.jdimension.jlawyer.client.editors.documents; import com.jdimension.jlawyer.client.mail.SaveToCaseExecutor; +import com.jdimension.jlawyer.client.settings.ClientSettings; +import com.jdimension.jlawyer.pojo.FileMetadata; +import com.jdimension.jlawyer.services.IntegrationServiceRemote; +import com.jdimension.jlawyer.services.JLawyerServiceLocator; import java.awt.Color; +import java.util.ArrayList; import java.util.List; +import org.apache.log4j.Logger; /** * @@ -673,10 +679,14 @@ */ public class EditScanPanel extends javax.swing.JPanel { + private static final Logger log = Logger.getLogger(EditScanPanel.class.getName()); + private SaveToCaseExecutor executor = null; private String openedFromEditorClass = null; + private ArrayList noOcrFiles = new ArrayList<>(); + /** * Creates new form EditScanPanel * @@ -707,6 +717,33 @@ public void setDetails(List selectedDocuments, SaveToCaseExecutor execut } } + this.cmdOcr.setEnabled(false); + this.cmdOcr.setToolTipText(null); + this.noOcrFiles.clear(); + try { + + ClientSettings settings = ClientSettings.getInstance(); + JLawyerServiceLocator locator = JLawyerServiceLocator.getInstance(settings.getLookupProperties()); + IntegrationServiceRemote isr = locator.lookupIntegrationServiceRemote(); + + for (String f : selectedDocuments) { + if (f.toLowerCase().endsWith(".pdf")) { + FileMetadata meta = isr.getObservedFileMetadata(f); + if (meta.getOcrStatus()==FileMetadata.OCRSTATUS_WITHOUTOCR) { + noOcrFiles.add(meta); + } + } + } + if (!noOcrFiles.isEmpty()) { + this.cmdOcr.setEnabled(true); + this.cmdOcr.setToolTipText(noOcrFiles.size() + " PDF-Dokumente sind nicht durchsuchbar - Klick für OCR/Texterkennung"); + } + + } catch (Exception ex) { + log.error(ex); + + } + } /** @@ -722,6 +759,7 @@ private void initComponents() { cmdDeleteScan = new javax.swing.JButton(); cmdRenameScan = new javax.swing.JButton(); cmdSplitPdf = new javax.swing.JButton(); + cmdOcr = new javax.swing.JButton(); lblDescription.setIcon(new javax.swing.ImageIcon(getClass().getResource("/icons/folder_documents.png"))); // NOI18N java.util.ResourceBundle bundle = java.util.ResourceBundle.getBundle("com/jdimension/jlawyer/client/editors/addresses/CaseForContactEntryPanel"); // NOI18N @@ -753,6 +791,13 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { } }); + cmdOcr.setIcon(new javax.swing.ImageIcon(getClass().getResource("/icons16/material/baseline_font_download_off_red_48dp.png"))); // NOI18N + cmdOcr.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + cmdOcrActionPerformed(evt); + } + }); + javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this); this.setLayout(layout); layout.setHorizontalGroup( @@ -766,7 +811,9 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) .addComponent(cmdRenameScan) .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) - .addComponent(cmdSplitPdf))) + .addComponent(cmdSplitPdf) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(cmdOcr))) .addContainerGap(115, Short.MAX_VALUE)) ); layout.setVerticalGroup( @@ -778,7 +825,8 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) .addComponent(cmdDeleteScan) .addComponent(cmdRenameScan) - .addComponent(cmdSplitPdf)) + .addComponent(cmdSplitPdf) + .addComponent(cmdOcr)) .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)) ); }// //GEN-END:initComponents @@ -807,8 +855,28 @@ private void cmdSplitPdfActionPerformed(java.awt.event.ActionEvent evt) {//GEN-F this.executor.splitPdfCallback(); }//GEN-LAST:event_cmdSplitPdfActionPerformed + private void cmdOcrActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_cmdOcrActionPerformed + try { + + ClientSettings settings = ClientSettings.getInstance(); + JLawyerServiceLocator locator = JLawyerServiceLocator.getInstance(settings.getLookupProperties()); + IntegrationServiceRemote isr = locator.lookupIntegrationServiceRemote(); + + for (FileMetadata meta : this.noOcrFiles) { + isr.performOcrForObservedFile(meta.getFileName()); + } + + } catch (Exception ex) { + log.error(ex); + + } + + + }//GEN-LAST:event_cmdOcrActionPerformed + // Variables declaration - do not modify//GEN-BEGIN:variables private javax.swing.JButton cmdDeleteScan; + private javax.swing.JButton cmdOcr; private javax.swing.JButton cmdRenameScan; private javax.swing.JButton cmdSplitPdf; private javax.swing.JLabel lblDescription; diff --git a/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/ScannerDocumentsTimerTask.java b/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/ScannerDocumentsTimerTask.java index 9fd30abfe..c67be135f 100644 --- a/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/ScannerDocumentsTimerTask.java +++ b/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/ScannerDocumentsTimerTask.java @@ -666,12 +666,11 @@ import com.jdimension.jlawyer.client.events.EventBroker; import com.jdimension.jlawyer.client.events.ScannerStatusEvent; import com.jdimension.jlawyer.client.settings.ClientSettings; +import com.jdimension.jlawyer.pojo.FileMetadata; import com.jdimension.jlawyer.services.JLawyerServiceLocator; -import java.io.File; -import java.util.ArrayList; -import java.util.Collections; import java.util.Date; import java.util.HashMap; +import java.util.Map; import org.apache.log4j.Logger; /** @@ -681,7 +680,7 @@ public class ScannerDocumentsTimerTask extends java.util.TimerTask { private static final Logger log = Logger.getLogger(ScannerDocumentsTimerTask.class.getName()); - private static ArrayList lastFiles = new ArrayList(); + private static HashMap lastFiles = new HashMap<>(); private boolean bypassCache=false; /** @@ -700,18 +699,12 @@ public void run() { try { ClientSettings settings = ClientSettings.getInstance(); JLawyerServiceLocator locator = JLawyerServiceLocator.getInstance(settings.getLookupProperties()); - HashMap files = locator.lookupSingletonServiceRemote().getObservedFiles(this.bypassCache); + HashMap currentFiles = locator.lookupSingletonServiceRemote().getObservedFiles(this.bypassCache); - ArrayList currentFiles = new ArrayList<>(); - for (File f : files.keySet()) { - currentFiles.add(f.getName()); - } - Collections.sort(currentFiles); - - if (!currentFiles.equals(lastFiles)) { + if (!areKeySetsDifferent(lastFiles, currentFiles)) { EventBroker eb = EventBroker.getInstance(); - eb.publishEvent(new ScannerStatusEvent(files)); + eb.publishEvent(new ScannerStatusEvent(currentFiles)); } lastFiles = currentFiles; @@ -721,5 +714,19 @@ public void run() { } } } + + private static boolean areKeySetsDifferent(Map map1, Map map2) { + if (map1.size() != map2.size()) { + return true; // Different number of keys + } + + for (FileMetadata key : map1.keySet()) { + if (!map2.containsKey(key)) { + return true; // Key present in map1 but not in map2 + } + } + + return false; // Key sets are the same + } } diff --git a/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/ScannerPanel.java b/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/ScannerPanel.java index a5f1bdc46..9a221c72f 100755 --- a/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/ScannerPanel.java +++ b/j-lawyer-client/src/com/jdimension/jlawyer/client/editors/documents/ScannerPanel.java @@ -690,6 +690,7 @@ import com.jdimension.jlawyer.client.utils.ThreadUtils; import com.jdimension.jlawyer.persistence.ArchiveFileBean; import com.jdimension.jlawyer.persistence.CaseFolder; +import com.jdimension.jlawyer.pojo.FileMetadata; import com.jdimension.jlawyer.services.ArchiveFileServiceRemote; import com.jdimension.jlawyer.services.IntegrationServiceRemote; import com.jdimension.jlawyer.services.JLawyerServiceLocator; @@ -731,12 +732,12 @@ public class ScannerPanel extends javax.swing.JPanel implements ThemeableEditor, private static final Logger log = Logger.getLogger(ScannerPanel.class.getName()); private Image backgroundImage = null; - private HashMap lastEventFileNames = new HashMap<>(); + private HashMap lastEventFileMetadata = new HashMap<>(); @Override public void notifyStatusBarReady() { EventBroker eb = EventBroker.getInstance(); - eb.publishEvent(new ScannerStatusEvent(this.lastEventFileNames)); + eb.publishEvent(new ScannerStatusEvent(this.lastEventFileMetadata)); } @Override @@ -749,19 +750,19 @@ public void onEvent(Event e) { if (e instanceof ScannerStatusEvent) { int selectedRow = tblDirContent.getSelectedRow(); - HashMap fileNames = ((ScannerStatusEvent) e).getFileNames(); + HashMap fileMetadata = ((ScannerStatusEvent) e).getFileMetadata(); SimpleDateFormat df = new SimpleDateFormat("dd.MM.yyyy, HH:mm", Locale.GERMAN); - String[] colNames = new String[]{"geändert", "Dateiname"}; + String[] colNames = new String[]{"geändert", "OCR", "Dateiname"}; DefaultTableModel model = new DefaultTableModel(colNames, 0) { @Override public boolean isCellEditable(int i, int i0) { return false; } }; - if (fileNames != null) { - for (File f : fileNames.keySet()) { - Date lastModified = fileNames.get(f); - Object[] row = new Object[]{df.format(lastModified), f.getName()}; + if (fileMetadata != null) { + for (FileMetadata f : fileMetadata.keySet()) { + Date lastModified = fileMetadata.get(f); + Object[] row = new Object[]{df.format(lastModified), f, f.getFileName()}; model.addRow(row); } } @@ -783,7 +784,7 @@ public boolean isCellEditable(int i, int i0) { } catch (Throwable t) { log.error("Error re-selecting table row", t); } - this.lastEventFileNames = fileNames; + this.lastEventFileMetadata = fileMetadata; } } @@ -815,6 +816,19 @@ public Component getTableCellRendererComponent(JTable table, Object value, boole JLabel label = (JLabel) super.getTableCellRendererComponent(table, value, isSelected, hasFocus, row, column); if (column == 1) { + FileMetadata fm=(FileMetadata)value; + label.setIcon(null); + label.setText(""); + if(fm.getOcrStatus()==FileMetadata.OCRSTATUS_NOTSUPPORTED) { + // no icon + } else if(fm.getOcrStatus()==FileMetadata.OCRSTATUS_PROCESSING) { + label.setIcon(new javax.swing.ImageIcon(getClass().getResource("/icons16/material/baseline_font_download_yellow_48dp.png"))); + } else if(fm.getOcrStatus()==FileMetadata.OCRSTATUS_WITHOCR) { + label.setIcon(new javax.swing.ImageIcon(getClass().getResource("/icons16/material/baseline_font_download_green_48dp.png"))); + } else if(fm.getOcrStatus()==FileMetadata.OCRSTATUS_WITHOUTOCR) { + label.setIcon(new javax.swing.ImageIcon(getClass().getResource("/icons16/material/baseline_font_download_off_red_48dp.png"))); + } + } else if (column == 2) { String sValue = (String) value; FileUtils fu = FileUtils.getInstance(); Icon icon = fu.getFileTypeIcon(sValue); @@ -867,7 +881,7 @@ public void refreshList() { JLawyerServiceLocator locator = JLawyerServiceLocator.getInstance(settings.getLookupProperties()); IntegrationServiceRemote is = locator.lookupIntegrationServiceRemote(); - HashMap observedDirContent = is.getObservedDirectoryContent(); + HashMap observedDirContent = is.getObservedDirectoryContent(); EventBroker eb = EventBroker.getInstance(); eb.publishEvent(new ScannerStatusEvent(observedDirContent)); @@ -1106,20 +1120,24 @@ private void selectDirContent() { this.pnlActionsChild.removeAll(); int[] selRow = this.tblDirContent.getSelectedRows(); if (selRow.length > 0) { + + log.info(selRow.length + " documents selected"); ArrayList fileNames = new ArrayList<>(); for (int r : selRow) { - String fileName = this.tblDirContent.getValueAt(r, 1).toString(); + String fileName = this.tblDirContent.getValueAt(r, 2).toString(); fileNames.add(fileName); } ArrayList actionPanelEntries = new ArrayList<>(); int i = 0; + log.info("creating EditScanPanel"); EditScanPanel dsp = new EditScanPanel(this.getClass().getName()); dsp.setDetails(fileNames, this); actionPanelEntries.add(dsp); + log.info("creating SaveScanToCasePanel"); // empty case reference - will trigger a search SaveScanToCasePanel sp = new SaveScanToCasePanel(this.getClass().getName()); sp.setBackground(sp.getBackground().brighter()); @@ -1134,13 +1152,17 @@ private void selectDirContent() { actionPanelEntries.add(sp); try { + log.info("querying last changed"); ClientSettings settings = ClientSettings.getInstance(); JLawyerServiceLocator locator = JLawyerServiceLocator.getInstance(settings.getLookupProperties()); ArchiveFileServiceRemote fileService = locator.lookupArchiveFileServiceRemote(); ArrayList caseCandidates = new ArrayList<>(); + log.info("querying last changed for current user"); List myNewList = fileService.getLastChanged(settings.getConfiguration(ClientSettings.CONF_LASTUSER, ""), true, 10); + log.info("querying last changed for other users"); List othersNewList = fileService.getLastChanged(settings.getConfiguration(ClientSettings.CONF_LASTUSER, ""), false, 10); + log.info("building suggestion list 1"); for (ArchiveFileBean af : myNewList) { if (!caseCandidates.contains(af.getFileNumber())) { caseCandidates.add(af.getFileNumber()); @@ -1155,6 +1177,7 @@ private void selectDirContent() { } } + log.info("building suggestion list 2"); for (ArchiveFileBean af : othersNewList) { if (!caseCandidates.contains(af.getFileNumber())) { caseCandidates.add(af.getFileNumber()); @@ -1175,16 +1198,19 @@ private void selectDirContent() { } + log.info("updating UI (sidebar)"); this.pnlActionsChild.setLayout(new GridLayout(actionPanelEntries.size(), 1)); for (Component o : actionPanelEntries) { this.pnlActionsChild.add(o); } + log.info("displaying preview"); // display document preview if (fileNames.size() == 1) { final String fFileName = fileNames.get(0); new Thread(() -> { try { + log.info("loading preview as bytes"); int splitLocation = splitTop.getDividerLocation(); ThreadUtils.setVisible(pnlPreview, false); ThreadUtils.removeAll(pnlPreview); @@ -1198,6 +1224,7 @@ private void selectDirContent() { IntegrationServiceRemote is = locator.lookupIntegrationServiceRemote(); byte[] data = is.getObservedFile(fFileName); + log.info("rendering preview"); JComponent preview = DocumentViewerFactory.getDocumentViewer(null, fFileName, true, new ScanPreviewProvider(is, fFileName), data, pnlPreview.getWidth(), pnlPreview.getHeight()); ThreadUtils.setVisible(pnlPreview, false); ThreadUtils.remove(pnlPreview, loading); @@ -1205,6 +1232,7 @@ private void selectDirContent() { ThreadUtils.addComponent(pnlPreview, preview, BorderLayout.CENTER); ThreadUtils.setVisible(pnlPreview, true); ThreadUtils.setSplitDividerLocation(splitTop, splitLocation); + log.info("preview done"); } catch (Exception ex) { log.error(ex); clearPreview(); @@ -1250,7 +1278,7 @@ private void tblDirContentMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIR int selRow = this.tblDirContent.getSelectedRow(); if (selRow > -1) { - String fileName = this.tblDirContent.getValueAt(selRow, 1).toString(); + String fileName = this.tblDirContent.getValueAt(selRow, 2).toString(); try { ClientSettings settings = ClientSettings.getInstance(); @@ -1376,7 +1404,7 @@ public boolean saveToCaseCallback(String caseId, boolean withAttachments, boolea ArrayList fileNames = new ArrayList<>(); for (int r : selRow) { - String fileName = this.tblDirContent.getValueAt(r, 1).toString(); + String fileName = this.tblDirContent.getValueAt(r, 2).toString(); fileNames.add(fileName); } @@ -1463,7 +1491,7 @@ public boolean renameCallback() { } for (int r : selRow) { - String fileName = this.tblDirContent.getValueAt(r, 1).toString(); + String fileName = this.tblDirContent.getValueAt(r, 2).toString(); String toFileName = FileUtils.getNewFileName(fileName, false); if (toFileName == null) { // user cancelled @@ -1513,7 +1541,7 @@ public boolean removeCallback() { } for (int r : selRow) { - String fileName = this.tblDirContent.getValueAt(r, 1).toString(); + String fileName = this.tblDirContent.getValueAt(r, 2).toString(); try { @@ -1544,7 +1572,7 @@ public boolean splitPdfCallback() { int[] selRow = this.tblDirContent.getSelectedRows(); if (selRow.length == 1) { - String fileName = this.tblDirContent.getValueAt(selRow[0], 1).toString(); + String fileName = this.tblDirContent.getValueAt(selRow[0], 2).toString(); if(!fileName.toLowerCase().endsWith(".pdf")) return false; diff --git a/j-lawyer-client/src/com/jdimension/jlawyer/client/events/ScannerStatusEvent.java b/j-lawyer-client/src/com/jdimension/jlawyer/client/events/ScannerStatusEvent.java index 7e7f41b4c..9490978a9 100644 --- a/j-lawyer-client/src/com/jdimension/jlawyer/client/events/ScannerStatusEvent.java +++ b/j-lawyer-client/src/com/jdimension/jlawyer/client/events/ScannerStatusEvent.java @@ -663,7 +663,7 @@ */ package com.jdimension.jlawyer.client.events; -import java.io.File; +import com.jdimension.jlawyer.pojo.FileMetadata; import java.util.Date; import java.util.HashMap; @@ -673,11 +673,11 @@ */ public class ScannerStatusEvent extends Event { - private HashMap fileNames=null; + private HashMap fileMetadata=null; - public ScannerStatusEvent(HashMap fileNames) { + public ScannerStatusEvent(HashMap fileNames) { super(Event.TYPE_SCANNERSTATUS); - this.fileNames=fileNames; + this.fileMetadata=fileNames; } @@ -689,17 +689,17 @@ public boolean isUiUpdateTrigger() { } /** - * @return the fileNames + * @return the fileMetadata */ - public HashMap getFileNames() { - return fileNames; + public HashMap getFileMetadata() { + return fileMetadata; } /** - * @param fileNames the fileNames to set + * @param metadata the fileMetadata to set */ - public void setFileNames(HashMap fileNames) { - this.fileNames = fileNames; + public void setFileMetadata(HashMap metadata) { + this.fileMetadata = metadata; } diff --git a/j-lawyer-client/src/com/jdimension/jlawyer/client/modulebar/ModuleButton.java b/j-lawyer-client/src/com/jdimension/jlawyer/client/modulebar/ModuleButton.java index d73beec61..618c97742 100644 --- a/j-lawyer-client/src/com/jdimension/jlawyer/client/modulebar/ModuleButton.java +++ b/j-lawyer-client/src/com/jdimension/jlawyer/client/modulebar/ModuleButton.java @@ -977,7 +977,7 @@ public void onEvent(Event e) { } if (e instanceof ScannerStatusEvent) { - updateIndicator(((ScannerStatusEvent) e).getFileNames().size()); + updateIndicator(((ScannerStatusEvent) e).getFileMetadata().size()); } else if (e instanceof MailingStatusEvent) { updateIndicator(((MailingStatusEvent) e).getMailingList().size()); } else if (e instanceof EmailStatusEvent) { diff --git a/j-lawyer-client/src/icons16/material/baseline_font_download_green_48dp.png b/j-lawyer-client/src/icons16/material/baseline_font_download_green_48dp.png new file mode 100644 index 0000000000000000000000000000000000000000..3453831b938baa794f3a05781ded1cb69712b4e9 GIT binary patch literal 648 zcmV;30(bq1P)v8Hv}uY8L?Ratpqu)AU%W__6~~WER$3z*PMWFoj5*ZQLc;qF3XV#bU_ahUuFt2;==8XeES6Vv$}yP@08mrY#`~Uo*){-J z_-QTx&=i@yIC4UIv}54vGS|mrdcQ75G}c zRZu=eo9(>^-|Dfb3{&5dRvIFelp>){R4lDI@=tR*+r94Id+zCBU2(cC2Yuhp?|07U zd^ul!2l&q-003Ut2C7E^Fm6ZTX+Af9`(^>g%5Q`I92l;pv6gvsm6w)nm1oZEl>gq4K%~MC`Cl|5;YJslD;Dj8@(X$Q4LU0&* z9A;2ViGGq>y|r|1BQ#|wG&DMGL_(eXuHq}!iu&7YduxM|+q^Y4cjGJGxp#F;Q#w?~ zdR~ArRV=BxyuO!6KM@pDqMzmV#+tQG2>^)u%iL03L#tV2lu|+nV<0@?QjZ%m7+dLj z=+lP$J|a0>#GdCc<|jJ^An^qyx!`$z^|mxJCkh162K_#!L>U0^0Knu$S=d)r&)FPO z2F6AxAw(!DtzhyZF=oH^B%2T{g+l&87qQwi8A}oah*(^HlWfh~DYx8pfohR#45b#w f=MsxlU@rdvm3=mOBS`KI00000NkvXXu0mjf(0)#p literal 0 HcmV?d00001 diff --git a/j-lawyer-client/src/icons16/material/baseline_font_download_off_red_48dp.png b/j-lawyer-client/src/icons16/material/baseline_font_download_off_red_48dp.png new file mode 100644 index 0000000000000000000000000000000000000000..9dfd9aedd96741f572163ab971ebfb86166aa4bf GIT binary patch literal 760 zcmVB8BSJvqz0YYE%vygCUZTa3S(TQ=vtHlAstZWw)iY-L|`(@c`8#Ezrbwo9|>k zc{A_(9pFEU0DwS9mC9Hg0CVaM7wRhX*4+YDHI05st$oZ;`V9bh1E;e;cGSOrEie1} zq_8)ls;P&yM_<(&_KLfOhSDkd%2)r%G^LB2%4jm8Wux78M_0RO>ykBf%pD54f~kbG zTcD%TU~Mp(8Ua8@0VT%}y=pzsc5$EO{IZ(GL^9s@ZdD#WXR2xyc|p$rFaU%QQVKd8 zkM_@{QvnXZ#!CVmA>@JG;b|9bo~WiybcTX0&sKg-JN=^ur+?J=Il1;@C%4>|0wKg} zcXVA_zy7#qY2j>+F*V0nreBWu$_4^}ju5VU0}5bG#e}V9%xS)9ucb1TJMV$<|r_ znZcMeo(~ED8B4@p1eIjTmMRGxh(!hzO26D(oOKPxr15-E8Aew+=L45#l!ROs@TI)` zL?Fa`qq^qKd`6j8C`I?*^gp=ggDt_-R%p>sCKK_cg|sqs!f5KaTT^$taIY{ZcPQZM zbsQfk9Mhr>-^3z524vesTgMg$gmC$wqMA154h1%iDFJ{>X0i3rkz@V&qDE;M2m2)XU{g&N~^1_J;IrL>XM6uA|olXf&UA1@coi_2 h#x+WJdu^z#egVfo@27duhRw7O?Y{uxTgWqWgflr-egO@7nnF*QZEc}m&8;F);?_%8 zGg+3k03B`E)WQHPM=*7^V+RlbDwX+t8^tH_2vDx~(7#*v`HdynT2wIoABoc%cd?%P8Vwe8Miwu5QlsoaU5Hpiz_MV!6DsT^1$$KrTGbq{D*)AiBZ^@J*C>c=9-RY6ivCguz82P-(ZF4=kno14?d6i1DaPu;Qv1ww(YoMROOqQj<1i_th{{qS~ zicjJ`V01g_E%m#dxB#OlF(k_*9j_7!9R<>ru=P?lHWTBJnkivbXNf zuP8u%1Qj;w_Mo|S1@BJf0BE&nu3f}cx}Qkq8CrmuIl_@B@p6!A`n1fKsW2fZ8n8wK u&7JMo3sa+3C6dTfVM6IFOKLAvQ9l64*tXDeK--=G0000 getObservedDirectoryContent(); + HashMap getObservedDirectoryContent(); boolean removeObservedFile(String fileName); @@ -718,5 +718,11 @@ public interface IntegrationServiceRemote { void renameEmailTemplate(String oldName, String newName) throws Exception; void duplicateEmailTemplate(String templateName, String duplicateName) throws Exception; + + FileMetadata getObservedFileMetadata(String fileName) throws Exception; + + List getObservedFilesMetadata(List fileNames) throws Exception; + + boolean performOcrForObservedFile(String fileName) throws Exception; } diff --git a/j-lawyer-server-api/src/com/jdimension/jlawyer/services/SingletonServiceRemote.java b/j-lawyer-server-api/src/com/jdimension/jlawyer/services/SingletonServiceRemote.java index 52a722290..7e6f253b9 100644 --- a/j-lawyer-server-api/src/com/jdimension/jlawyer/services/SingletonServiceRemote.java +++ b/j-lawyer-server-api/src/com/jdimension/jlawyer/services/SingletonServiceRemote.java @@ -683,6 +683,7 @@ You should also get your employer (if you work as a programmer) or school, import com.jdimension.jlawyer.persistence.EpostQueueBean; import com.jdimension.jlawyer.persistence.FaxQueueBean; +import com.jdimension.jlawyer.pojo.FileMetadata; import java.io.File; import java.util.ArrayList; import java.util.Date; @@ -700,13 +701,13 @@ public interface SingletonServiceRemote { void setSystemStatus(int status); - HashMap getObservedFiles(); + HashMap getObservedFiles(); - HashMap getObservedFiles(boolean bypassCache); + HashMap getObservedFiles(boolean bypassCache); void updateObservedFiles(); - void setObservedFiles(HashMap fileNames); + void setObservedFiles(HashMap fileNames); FaxQueueBean getFailedFax(); EpostQueueBean getFailedLetter(); diff --git a/j-lawyer-server-entities/src/java/com/jdimension/jlawyer/pojo/FileMetadata.java b/j-lawyer-server-entities/src/java/com/jdimension/jlawyer/pojo/FileMetadata.java new file mode 100644 index 000000000..afff90e03 --- /dev/null +++ b/j-lawyer-server-entities/src/java/com/jdimension/jlawyer/pojo/FileMetadata.java @@ -0,0 +1,765 @@ +/* GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. + */ +package com.jdimension.jlawyer.pojo; + +import java.io.Serializable; +import java.util.Objects; + +/** + * + * @author jens + */ +public class FileMetadata implements Serializable { + + public static final int OCRSTATUS_WITHOCR=10; + public static final int OCRSTATUS_WITHOUTOCR=20; + public static final int OCRSTATUS_NOTSUPPORTED=30; + public static final int OCRSTATUS_PROCESSING=40; + + protected static long serialVersionUID = 1L; + + private String fileName=null; + private int ocrStatus=OCRSTATUS_NOTSUPPORTED; + private long fileSize=0; + + public FileMetadata() { + + } + + @Override + public int hashCode() { + int hash = 7; + hash = 83 * hash + Objects.hashCode(this.fileName); + hash = 83 * hash + this.ocrStatus; + hash = 83 * hash + (int) (this.fileSize ^ (this.fileSize >>> 32)); + return hash; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final FileMetadata other = (FileMetadata) obj; + if (this.ocrStatus != other.ocrStatus) { + return false; + } + if (this.fileSize != other.fileSize) { + return false; + } + return Objects.equals(this.fileName, other.fileName); + } + + /** + * @return the fileName + */ + public String getFileName() { + return fileName; + } + + /** + * @param fileName the fileName to set + */ + public void setFileName(String fileName) { + this.fileName = fileName; + } + + /** + * @return the fileSize + */ + public long getFileSize() { + return fileSize; + } + + /** + * @param fileSize the fileSize to set + */ + public void setFileSize(long fileSize) { + this.fileSize = fileSize; + } + + /** + * @return the ocrStatus + */ + public int getOcrStatus() { + return ocrStatus; + } + + /** + * @param ocrStatus the ocrStatus to set + */ + public void setOcrStatus(int ocrStatus) { + this.ocrStatus = ocrStatus; + } + + + + + +} diff --git a/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/IntegrationService.java b/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/IntegrationService.java index 11763f3b2..47c70dabd 100644 --- a/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/IntegrationService.java +++ b/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/IntegrationService.java @@ -671,6 +671,7 @@ import com.jdimension.jlawyer.persistence.IntegrationHookFacadeLocal; import com.jdimension.jlawyer.persistence.ServerSettingsBean; import com.jdimension.jlawyer.persistence.ServerSettingsBeanFacadeLocal; +import com.jdimension.jlawyer.pojo.FileMetadata; import com.jdimension.jlawyer.server.utils.ServerFileUtils; import com.jdimension.jlawyer.storage.VirtualFile; import java.io.BufferedReader; @@ -694,12 +695,19 @@ import java.util.Date; import java.util.HashMap; import java.util.List; +import javax.annotation.Resource; import javax.annotation.security.DeclareRoles; import javax.annotation.security.RolesAllowed; import javax.ejb.EJB; import javax.ejb.Stateless; +import javax.inject.Inject; +import javax.jms.JMSConnectionFactory; +import javax.jms.JMSContext; +import javax.jms.ObjectMessage; import org.apache.log4j.Logger; import org.apache.tika.Tika; +import org.jlawyer.utils.ocr.OcrRequest; +import org.jlawyer.utils.ocr.OcrUtils; /** * @@ -721,9 +729,16 @@ public class IntegrationService implements IntegrationServiceRemote, Integration @EJB private CustomHooksServiceLocal hookService; + @Inject + @JMSConnectionFactory("java:/JmsXA") + private JMSContext jmsContext; + + @Resource(lookup = "java:/jms/queue/searchIndexProcessorQueue") + private javax.jms.Queue searchIndexQueue; + @Override @RolesAllowed(value = {"readArchiveFileRole"}) - public HashMap getObservedDirectoryContent() { + public HashMap getObservedDirectoryContent() { ServerSettingsBean obs = this.settingsFacade.find("jlawyer.server.observe.directory"); if (obs == null) { @@ -748,32 +763,30 @@ public HashMap getObservedDirectoryContent() { return new HashMap(); } - HashMap fileObjects = new HashMap<>(); + HashMap fileObjects = new HashMap<>(); File files[] = scanDirectory.listFiles(); if (files != null) { for (File f : files) { - if (!f.isDirectory()) { - - if ((System.currentTimeMillis() - f.lastModified()) > 5000l) { - String name = f.getName(); - fileObjects.put(f, new Date(f.lastModified())); - } else { - - long size = f.length(); - try { - Thread.sleep(300); - } catch (Throwable t) { - - } - if (size != f.length()) { - // skip file - still copying... - } else { - String name = f.getName(); - fileObjects.put(f, new Date(f.lastModified())); + if (!f.isDirectory() && !f.getName().endsWith(".metadata")) { + try { + // file might still be copying - skip if last modified is less than 2.5s in the past + if ((System.currentTimeMillis() - f.lastModified()) > 2500l) { + + if (!OcrUtils.hasMetadata(f)) { + FileMetadata newMetadata = OcrUtils.generateMetadata(f); + if (newMetadata.getOcrStatus() == FileMetadata.OCRSTATUS_PROCESSING) { + // send request to perform OCR + OcrRequest req = new OcrRequest(f.getAbsolutePath()); + this.publishOcrRequest(req); + } + } + + fileObjects.put(OcrUtils.getMetadata(f), new Date(f.lastModified())); } + } catch (Exception ex) { + log.error("unable to get metadata for observed file " + f.getAbsolutePath(), ex); } - } } } else { @@ -782,6 +795,16 @@ public HashMap getObservedDirectoryContent() { return fileObjects; } + private void publishOcrRequest(OcrRequest req) { + try { + ObjectMessage msg = this.jmsContext.createObjectMessage(req); + jmsContext.createProducer().send(searchIndexQueue, msg); + + } catch (Exception ex) { + log.error("could not publish OCR request", ex); + } + } + @Override @RolesAllowed(value = {"readArchiveFileRole"}) public boolean removeObservedFile(String fileName) { @@ -809,6 +832,9 @@ public boolean removeObservedFile(String fileName) { String name = f.getName(); if (name.equals(fileName)) { f.delete(); + File metadataFile=new File(f.getAbsolutePath() + ".metadata"); + if(metadataFile.exists()) + metadataFile.delete(); return true; } } @@ -861,10 +887,11 @@ public byte[] getObservedFile(String fileName) throws Exception { @Override @RolesAllowed(value = {"writeArchiveFileRole"}) public String assignObservedFile(String fileName, String archiveFileId, String renameTo) throws Exception { - - if(fileName==null || "".equals(fileName)) + + if (fileName == null || "".equals(fileName)) { throw new Exception("Dokumentname darf nicht leer sein!"); - + } + ServerSettingsBean obs = this.settingsFacade.find("jlawyer.server.observe.directory"); if (obs == null) { log.error("directory observation is switched off"); @@ -893,7 +920,7 @@ public String assignObservedFile(String fileName, String archiveFileId, String r String name = f.getName(); if (name.equals(fileName)) { byte[] data = ServerFileUtils.readFile(f); - ArchiveFileDocumentsBean d=this.archiveFileService.addDocument(archiveFileId, renameTo, data, "", null); + ArchiveFileDocumentsBean d = this.archiveFileService.addDocument(archiveFileId, renameTo, data, "", null); return d.getId(); } } @@ -943,7 +970,7 @@ public void saveEmailTemplate(EmailTemplate template, boolean replace) throws Ex fw.write(template.toXML()); } } - + @Override @RolesAllowed(value = {"loginRole"}) public void deleteEmailTemplate(String fileName) throws Exception { @@ -979,7 +1006,7 @@ public EmailTemplate getEmailTemplate(String fileName) throws Exception { try (FileReader fr = new FileReader(f)) { char[] buffer = new char[1024]; int len = 0; - + while ((len = fr.read(buffer)) > -1) { sb.append(buffer, 0, len); } @@ -1002,10 +1029,7 @@ public String getObservedFilePreview(String fileName) throws Exception { Tika tika = new Tika(); String result = null; try { - try (Reader r = tika.parse(new ByteArrayInputStream(data)); - BufferedReader br = new BufferedReader(r); - StringWriter sw = new StringWriter(); - BufferedWriter bw = new BufferedWriter(sw)) { + try (Reader r = tika.parse(new ByteArrayInputStream(data)); BufferedReader br = new BufferedReader(r); StringWriter sw = new StringWriter(); BufferedWriter bw = new BufferedWriter(sw)) { char[] buffer = new char[1024]; int bytesRead = -1; while ((bytesRead = br.read(buffer)) > -1) { @@ -1049,10 +1073,10 @@ public boolean validateExternalStorageLocation(String location) throws Exception @Override @RolesAllowed(value = {"loginRole"}) public String[] getHookTypes() { - HookType[] types=HookType.values(); - String[] typeNames=new String[types.length]; - for(int i=0;i getObservedFilesMetadata(List fileNames) throws Exception { + List metadataList = new ArrayList<>(); + for (String f : fileNames) { + FileMetadata meta = this.getObservedFileMetadata(f); + if (meta != null) { + metadataList.add(meta); + } + } + return metadataList; + } + + @Override + @RolesAllowed(value = {"loginRole"}) + public boolean performOcrForObservedFile(String fileName) throws Exception { + ServerSettingsBean obs = this.settingsFacade.find("jlawyer.server.observe.directory"); + if (obs == null) { + log.error("directory observation is switched off"); + return false; + } + + String scanDir = obs.getSettingValue(); + if (scanDir == null) { + log.error("directory observation is switched off"); + return false; + } + + File scanDirectory = new File(scanDir); + if (!scanDirectory.exists() && scanDirectory.isDirectory()) { + log.error("observed directory does not exist / is not a directory"); + return false; + } + + File files[] = scanDirectory.listFiles(); + for (File f : files) { + if (!f.isDirectory()) { + String name = f.getName(); + if (name.equals(fileName)) { + + OcrRequest req = new OcrRequest(f.getAbsolutePath()); + this.publishOcrRequest(req); + + return true; + } + } + } + + return false; } - - } diff --git a/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/SingletonService.java b/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/SingletonService.java index 482ea7ff1..c9f81e67a 100644 --- a/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/SingletonService.java +++ b/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/SingletonService.java @@ -682,6 +682,7 @@ You should also get your employer (if you work as a programmer) or school, import com.jdimension.jlawyer.persistence.FaxQueueBean; import com.jdimension.jlawyer.persistence.ServerSettingsBean; import com.jdimension.jlawyer.persistence.ServerSettingsBeanFacadeLocal; +import com.jdimension.jlawyer.pojo.FileMetadata; import com.jdimension.jlawyer.pojo.JobStatus; import com.jdimension.jlawyer.server.constants.MonitoringConstants; import java.io.File; @@ -689,11 +690,18 @@ You should also get your employer (if you work as a programmer) or school, import java.util.Collection; import java.util.Date; import java.util.HashMap; +import javax.annotation.Resource; import javax.annotation.security.PermitAll; import javax.annotation.security.RolesAllowed; import javax.ejb.Singleton; +import javax.inject.Inject; +import javax.jms.JMSConnectionFactory; +import javax.jms.JMSContext; +import javax.jms.ObjectMessage; import javax.naming.InitialContext; import org.apache.log4j.Logger; +import org.jlawyer.utils.ocr.OcrRequest; +import org.jlawyer.utils.ocr.OcrUtils; /** * @@ -701,19 +709,26 @@ You should also get your employer (if you work as a programmer) or school, */ @Singleton public class SingletonService implements SingletonServiceRemote, SingletonServiceLocal { - + private static final Logger log = Logger.getLogger(SingletonService.class.getName()); private int systemStatus = MonitoringConstants.LEVEL_NORMAL; - private HashMap observedFileNames = new HashMap<>(); + private HashMap observedFileNames = new HashMap<>(); private FaxQueueBean failedFax = null; protected EpostQueueBean failedLetter = null; private ArrayList faxQueue = new ArrayList<>(); protected ArrayList epostQueue = new ArrayList<>(); private HashMap jobStatus = new HashMap<>(); - - private long latestInstantMessageReceived=-1; - private long latestInstantMessageStatusUpdated=-1; + + private long latestInstantMessageReceived = -1; + private long latestInstantMessageStatusUpdated = -1; + + @Inject + @JMSConnectionFactory("java:/JmsXA") + private JMSContext jmsContext; + + @Resource(lookup = "java:/jms/queue/searchIndexProcessorQueue") + private javax.jms.Queue searchIndexQueue; @Override @RolesAllowed(value = {"loginRole"}) @@ -729,82 +744,93 @@ public void setSystemStatus(int status) { @Override @RolesAllowed(value = {"loginRole"}) - public HashMap getObservedFiles() { + public HashMap getObservedFiles() { return this.getObservedFiles(false); } - + @Override @PermitAll public void updateObservedFiles() { ServerSettingsBean mode = null; - try { - InitialContext ic = new InitialContext(); - ServerSettingsBeanFacadeLocal settings = (ServerSettingsBeanFacadeLocal) ic.lookup("java:global/j-lawyer-server/j-lawyer-server-ejb/ServerSettingsBeanFacade!com.jdimension.jlawyer.persistence.ServerSettingsBeanFacadeLocal"); - mode = settings.find("jlawyer.server.observe.directory"); - if (mode == null || "".equals(mode.getSettingValue())) { - log.info("directory observation is switched off"); - return; - } - } catch (Throwable ex) { - log.error("Error getting server setting for directory observation", ex); - return; - } - - String scanDir = mode.getSettingValue(); - if (scanDir == null || "".equals(scanDir)) { + try { + InitialContext ic = new InitialContext(); + ServerSettingsBeanFacadeLocal settings = (ServerSettingsBeanFacadeLocal) ic.lookup("java:global/j-lawyer-server/j-lawyer-server-ejb/ServerSettingsBeanFacade!com.jdimension.jlawyer.persistence.ServerSettingsBeanFacadeLocal"); + mode = settings.find("jlawyer.server.observe.directory"); + if (mode == null || "".equals(mode.getSettingValue())) { log.info("directory observation is switched off"); return; } + } catch (Throwable ex) { + log.error("Error getting server setting for directory observation", ex); + return; + } - File scanDirectory = new File(scanDir); - if (!scanDirectory.exists()) { - log.error("observed directory does not exist"); - return; - } + String scanDir = mode.getSettingValue(); + if (scanDir == null || "".equals(scanDir)) { + log.info("directory observation is switched off"); + return; + } - if (!scanDirectory.isDirectory()) { - log.error("observed directory is not a directory"); - return; - } + File scanDirectory = new File(scanDir); + if (!scanDirectory.exists()) { + log.error("observed directory does not exist"); + return; + } - HashMap fileObjects = new HashMap<>(); - File files[] = scanDirectory.listFiles(); - if (files != null) { - for (File f : files) { - if (!f.isDirectory()) { - // file might still be copying - skip if last modified is less than 3s in the past - if ((System.currentTimeMillis() - f.lastModified()) > 5000l) { - fileObjects.put(f, new Date(f.lastModified())); - } else { - - long size = f.length(); - try { - Thread.sleep(300); - } catch (Throwable t) { + if (!scanDirectory.isDirectory()) { + log.error("observed directory is not a directory"); + return; + } - } - if (size != f.length()) { - // skip file - still copying... - } else { - fileObjects.put(f, new Date(f.lastModified())); + HashMap fileObjects = new HashMap<>(); + File files[] = scanDirectory.listFiles(); + if (files != null) { + for (File f : files) { + if (!f.isDirectory() && !f.getName().endsWith(".metadata")) { + try { + // file might still be copying - skip if last modified is less than 2.5s in the past + + if ((System.currentTimeMillis() - f.lastModified()) > 2500l) { + + if (!OcrUtils.hasMetadata(f)) { + FileMetadata newMetadata = OcrUtils.generateMetadata(f); + if (newMetadata.getOcrStatus() == FileMetadata.OCRSTATUS_PROCESSING) { + // send request to perform OCR + OcrRequest req = new OcrRequest(f.getAbsolutePath()); + this.publishOcrRequest(req); + } } + fileObjects.put(OcrUtils.getMetadata(f), new Date(f.lastModified())); } + + } catch (Exception ex) { + log.error("unable to get metadata for observed file " + f.getAbsolutePath(), ex); } } - - } else { - log.error("observed directory returns null for #listFiles"); } - - this.setObservedFiles(fileObjects); - + } else { + log.error("observed directory returns null for #listFiles"); + } + + this.setObservedFiles(fileObjects); + + } + + private void publishOcrRequest(OcrRequest req) { + try { + ObjectMessage msg = this.jmsContext.createObjectMessage(req); + jmsContext.createProducer().send(searchIndexQueue, msg); + + } catch (Exception ex) { + log.error("could not publish OCR request", ex); + } } @Override @RolesAllowed(value = {"loginRole"}) - public HashMap getObservedFiles(boolean bypassCache) { + public HashMap getObservedFiles(boolean bypassCache) { if (bypassCache) { this.updateObservedFiles(); } @@ -813,7 +839,7 @@ public HashMap getObservedFiles(boolean bypassCache) { @Override @PermitAll - public void setObservedFiles(HashMap fileNames) { + public void setObservedFiles(HashMap fileNames) { this.observedFileNames = fileNames; } @@ -822,7 +848,7 @@ public void setObservedFiles(HashMap fileNames) { public FaxQueueBean getFailedFax() { return this.failedFax; } - + @Override @RolesAllowed(value = {"loginRole"}) public ArrayList getFaxQueue() { @@ -848,7 +874,7 @@ public long getLatestInstantMessageReceived() { @Override @PermitAll public void setLatestInstantMessageReceived(long timestamp) { - this.latestInstantMessageReceived=timestamp; + this.latestInstantMessageReceived = timestamp; } @Override @@ -905,29 +931,31 @@ public JobStatus getJobStatus(String jobId) { @PermitAll public void updateJobStatus(JobStatus jobStatus) { this.purgeOldJobs(); - if(jobStatus.getId()!=null) + if (jobStatus.getId() != null) { this.jobStatus.put(jobStatus.getId(), jobStatus); + } } - + /* * purges any jobs that are older than two days - */ + */ private void purgeOldJobs() { - ArrayList removedKeys=new ArrayList<>(); - for(String jobId: this.jobStatus.keySet()) { - JobStatus s=this.jobStatus.get(jobId); - if(s!=null) { - Date d=s.getLastUpdated(); - if(d==null) { + ArrayList removedKeys = new ArrayList<>(); + for (String jobId : this.jobStatus.keySet()) { + JobStatus s = this.jobStatus.get(jobId); + if (s != null) { + Date d = s.getLastUpdated(); + if (d == null) { removedKeys.add(jobId); } else { - if((System.currentTimeMillis()-d.getTime())>2l*24l*60l*60l*1000l) + if ((System.currentTimeMillis() - d.getTime()) > 2l * 24l * 60l * 60l * 1000l) { removedKeys.add(jobId); + } } - + } } - for(String id: removedKeys) { + for (String id : removedKeys) { this.jobStatus.remove(id); } } diff --git a/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/SingletonServiceLocal.java b/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/SingletonServiceLocal.java index 1b6d4f76c..25049d2af 100644 --- a/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/SingletonServiceLocal.java +++ b/j-lawyer-server/j-lawyer-server-ejb/src/java/com/jdimension/jlawyer/services/SingletonServiceLocal.java @@ -683,6 +683,7 @@ You should also get your employer (if you work as a programmer) or school, import com.jdimension.jlawyer.persistence.EpostQueueBean; import com.jdimension.jlawyer.persistence.FaxQueueBean; +import com.jdimension.jlawyer.pojo.FileMetadata; import com.jdimension.jlawyer.pojo.JobStatus; import java.io.File; import java.util.ArrayList; @@ -702,13 +703,13 @@ public interface SingletonServiceLocal { void setSystemStatus(int status); - HashMap getObservedFiles(); + HashMap getObservedFiles(); - HashMap getObservedFiles(boolean bypassCache); + HashMap getObservedFiles(boolean bypassCache); void updateObservedFiles(); - void setObservedFiles(HashMap fileNames); + void setObservedFiles(HashMap fileNames); FaxQueueBean getFailedFax(); EpostQueueBean getFailedLetter(); diff --git a/j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/async/SearchIndexProcessor.java b/j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/async/SearchIndexProcessor.java index 183bfcbd0..055cf84fb 100644 --- a/j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/async/SearchIndexProcessor.java +++ b/j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/async/SearchIndexProcessor.java @@ -666,14 +666,27 @@ import com.jdimension.jlawyer.documents.PreviewGenerator; import com.jdimension.jlawyer.persistence.ArchiveFileDocumentsBean; import com.jdimension.jlawyer.persistence.ArchiveFileDocumentsBeanFacadeLocal; +import com.jdimension.jlawyer.pojo.FileMetadata; +import com.jdimension.jlawyer.server.utils.ServerFileUtils; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.Reader; +import java.io.StringWriter; import java.util.List; +import java.util.concurrent.TimeUnit; import javax.ejb.*; import javax.jms.Message; import javax.jms.MessageListener; import javax.jms.ObjectMessage; import org.apache.log4j.Logger; +import org.apache.tika.Tika; +import org.jboss.ejb3.annotation.TransactionTimeout; import org.jlawyer.search.SearchAPI; import org.jlawyer.search.SearchIndexRequest; +import org.jlawyer.utils.ocr.OcrRequest; +import org.jlawyer.utils.ocr.OcrUtils; /** * @@ -696,6 +709,7 @@ public SearchIndexProcessor() { } @Override + @TransactionTimeout(value = 15, unit = TimeUnit.MINUTES) public void onMessage(Message message) { try { @@ -730,6 +744,66 @@ public void onMessage(Message message) { } } } + } else if (o instanceof OcrRequest) { + // Get the parent directory of the original file + File f=new File(((OcrRequest)o).getAbsolutePath()); + + byte[] data = ServerFileUtils.readFile(f); + if (data == null) { + log.error("Error checking for OCR information - content data for file is null"); + OcrUtils.updateOcrStatus(f, FileMetadata.OCRSTATUS_WITHOUTOCR); + return; + } + + Tika tika = new Tika(); + String result = ""; + try { + try (Reader r = tika.parse(new ByteArrayInputStream(data)); BufferedReader br = new BufferedReader(r); StringWriter sw = new StringWriter(); BufferedWriter bw = new BufferedWriter(sw)) { + char[] buffer = new char[1024]; + int bytesRead = -1; + while ((bytesRead = br.read(buffer)) > -1) { + bw.write(buffer, 0, bytesRead); + } + result = sw.toString(); + } + + } catch (Throwable t) { + log.error("Error checking for OCR information - text extraction failed", t); + OcrUtils.updateOcrStatus(f, FileMetadata.OCRSTATUS_WITHOUTOCR); + return; + } + + if(result.length()>0) { + // no OCR required + OcrUtils.updateOcrStatus(f, FileMetadata.OCRSTATUS_WITHOCR); + return; + } + + + String tmpDir = System.getProperty("java.io.tmpdir"); + if (!tmpDir.endsWith(System.getProperty("file.separator"))) { + tmpDir = tmpDir + System.getProperty("file.separator"); + } + + // Add the "OCR" prefix to the original file name + String tmpFileName = tmpDir+System.currentTimeMillis(); + + + // Create a new File instance with the modified file name in the same directory + File outputFile = new File(tmpFileName); + + OcrUtils.performOcr(f, outputFile); + + if(!outputFile.exists()) { + log.error("OCR failed for file " + f.getAbsolutePath()); + OcrUtils.updateOcrStatus(f, FileMetadata.OCRSTATUS_WITHOUTOCR); + } else { + byte[] ocrFile = ServerFileUtils.readFile(outputFile); + ServerFileUtils.writeFile(f, ocrFile); + outputFile.delete(); + OcrUtils.updateOcrStatus(f, FileMetadata.OCRSTATUS_WITHOCR); + } + } } diff --git a/j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/utils/ocr/OcrRequest.java b/j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/utils/ocr/OcrRequest.java new file mode 100644 index 000000000..8448be361 --- /dev/null +++ b/j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/utils/ocr/OcrRequest.java @@ -0,0 +1,695 @@ +/* + * GNU AFFERO GENERAL PUBLIC LICENSE + * Version 3, 19 November 2007 + * + * Copyright (C) 2007 Free Software Foundation, Inc. + * Everyone is permitted to copy and distribute verbatim copies + * of this license document, but changing it is not allowed. + * + * Preamble + * + * The GNU Affero General Public License is a free, copyleft license for + * software and other kinds of works, specifically designed to ensure + * cooperation with the community in the case of network server software. + * + * The licenses for most software and other practical works are designed + * to take away your freedom to share and change the works. By contrast, + * our General Public Licenses are intended to guarantee your freedom to + * share and change all versions of a program--to make sure it remains free + * software for all its users. + * + * When we speak of free software, we are referring to freedom, not + * price. Our General Public Licenses are designed to make sure that you + * have the freedom to distribute copies of free software (and charge for + * them if you wish), that you receive source code or can get it if you + * want it, that you can change the software or use pieces of it in new + * free programs, and that you know you can do these things. + * + * Developers that use our General Public Licenses protect your rights + * with two steps: (1) assert copyright on the software, and (2) offer + * you this License which gives you legal permission to copy, distribute + * and/or modify the software. + * + * A secondary benefit of defending all users' freedom is that + * improvements made in alternate versions of the program, if they + * receive widespread use, become available for other developers to + * incorporate. Many developers of free software are heartened and + * encouraged by the resulting cooperation. However, in the case of + * software used on network servers, this result may fail to come about. + * The GNU General Public License permits making a modified version and + * letting the public access it on a server without ever releasing its + * source code to the public. + * + * The GNU Affero General Public License is designed specifically to + * ensure that, in such cases, the modified source code becomes available + * to the community. It requires the operator of a network server to + * provide the source code of the modified version running there to the + * users of that server. Therefore, public use of a modified version, on + * a publicly accessible server, gives the public access to the source + * code of the modified version. + * + * An older license, called the Affero General Public License and + * published by Affero, was designed to accomplish similar goals. This is + * a different license, not a version of the Affero GPL, but Affero has + * released a new version of the Affero GPL which permits relicensing under + * this license. + * + * The precise terms and conditions for copying, distribution and + * modification follow. + * + * TERMS AND CONDITIONS + * + * 0. Definitions. + * + * "This License" refers to version 3 of the GNU Affero General Public License. + * + * "Copyright" also means copyright-like laws that apply to other kinds of + * works, such as semiconductor masks. + * + * "The Program" refers to any copyrightable work licensed under this + * License. Each licensee is addressed as "you". "Licensees" and + * "recipients" may be individuals or organizations. + * + * To "modify" a work means to copy from or adapt all or part of the work + * in a fashion requiring copyright permission, other than the making of an + * exact copy. The resulting work is called a "modified version" of the + * earlier work or a work "based on" the earlier work. + * + * A "covered work" means either the unmodified Program or a work based + * on the Program. + * + * To "propagate" a work means to do anything with it that, without + * permission, would make you directly or secondarily liable for + * infringement under applicable copyright law, except executing it on a + * computer or modifying a private copy. Propagation includes copying, + * distribution (with or without modification), making available to the + * public, and in some countries other activities as well. + * + * To "convey" a work means any kind of propagation that enables other + * parties to make or receive copies. Mere interaction with a user through + * a computer network, with no transfer of a copy, is not conveying. + * + * An interactive user interface displays "Appropriate Legal Notices" + * to the extent that it includes a convenient and prominently visible + * feature that (1) displays an appropriate copyright notice, and (2) + * tells the user that there is no warranty for the work (except to the + * extent that warranties are provided), that licensees may convey the + * work under this License, and how to view a copy of this License. If + * the interface presents a list of user commands or options, such as a + * menu, a prominent item in the list meets this criterion. + * + * 1. Source Code. + * + * The "source code" for a work means the preferred form of the work + * for making modifications to it. "Object code" means any non-source + * form of a work. + * + * A "Standard Interface" means an interface that either is an official + * standard defined by a recognized standards body, or, in the case of + * interfaces specified for a particular programming language, one that + * is widely used among developers working in that language. + * + * The "System Libraries" of an executable work include anything, other + * than the work as a whole, that (a) is included in the normal form of + * packaging a Major Component, but which is not part of that Major + * Component, and (b) serves only to enable use of the work with that + * Major Component, or to implement a Standard Interface for which an + * implementation is available to the public in source code form. A + * "Major Component", in this context, means a major essential component + * (kernel, window system, and so on) of the specific operating system + * (if any) on which the executable work runs, or a compiler used to + * produce the work, or an object code interpreter used to run it. + * + * The "Corresponding Source" for a work in object code form means all + * the source code needed to generate, install, and (for an executable + * work) run the object code and to modify the work, including scripts to + * control those activities. However, it does not include the work's + * System Libraries, or general-purpose tools or generally available free + * programs which are used unmodified in performing those activities but + * which are not part of the work. For example, Corresponding Source + * includes interface definition files associated with source files for + * the work, and the source code for shared libraries and dynamically + * linked subprograms that the work is specifically designed to require, + * such as by intimate data communication or control flow between those + * subprograms and other parts of the work. + * + * The Corresponding Source need not include anything that users + * can regenerate automatically from other parts of the Corresponding + * Source. + * + * The Corresponding Source for a work in source code form is that + * same work. + * + * 2. Basic Permissions. + * + * All rights granted under this License are granted for the term of + * copyright on the Program, and are irrevocable provided the stated + * conditions are met. This License explicitly affirms your unlimited + * permission to run the unmodified Program. The output from running a + * covered work is covered by this License only if the output, given its + * content, constitutes a covered work. This License acknowledges your + * rights of fair use or other equivalent, as provided by copyright law. + * + * You may make, run and propagate covered works that you do not + * convey, without conditions so long as your license otherwise remains + * in force. You may convey covered works to others for the sole purpose + * of having them make modifications exclusively for you, or provide you + * with facilities for running those works, provided that you comply with + * the terms of this License in conveying all material for which you do + * not control copyright. Those thus making or running the covered works + * for you must do so exclusively on your behalf, under your direction + * and control, on terms that prohibit them from making any copies of + * your copyrighted material outside their relationship with you. + * + * Conveying under any other circumstances is permitted solely under + * the conditions stated below. Sublicensing is not allowed; section 10 + * makes it unnecessary. + * + * 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + * + * No covered work shall be deemed part of an effective technological + * measure under any applicable law fulfilling obligations under article + * 11 of the WIPO copyright treaty adopted on 20 December 1996, or + * similar laws prohibiting or restricting circumvention of such + * measures. + * + * When you convey a covered work, you waive any legal power to forbid + * circumvention of technological measures to the extent such circumvention + * is effected by exercising rights under this License with respect to + * the covered work, and you disclaim any intention to limit operation or + * modification of the work as a means of enforcing, against the work's + * users, your or third parties' legal rights to forbid circumvention of + * technological measures. + * + * 4. Conveying Verbatim Copies. + * + * You may convey verbatim copies of the Program's source code as you + * receive it, in any medium, provided that you conspicuously and + * appropriately publish on each copy an appropriate copyright notice; + * keep intact all notices stating that this License and any + * non-permissive terms added in accord with section 7 apply to the code; + * keep intact all notices of the absence of any warranty; and give all + * recipients a copy of this License along with the Program. + * + * You may charge any price or no price for each copy that you convey, + * and you may offer support or warranty protection for a fee. + * + * 5. Conveying Modified Source Versions. + * + * You may convey a work based on the Program, or the modifications to + * produce it from the Program, in the form of source code under the + * terms of section 4, provided that you also meet all of these conditions: + * + * a) The work must carry prominent notices stating that you modified + * it, and giving a relevant date. + * + * b) The work must carry prominent notices stating that it is + * released under this License and any conditions added under section + * 7. This requirement modifies the requirement in section 4 to + * "keep intact all notices". + * + * c) You must license the entire work, as a whole, under this + * License to anyone who comes into possession of a copy. This + * License will therefore apply, along with any applicable section 7 + * additional terms, to the whole of the work, and all its parts, + * regardless of how they are packaged. This License gives no + * permission to license the work in any other way, but it does not + * invalidate such permission if you have separately received it. + * + * d) If the work has interactive user interfaces, each must display + * Appropriate Legal Notices; however, if the Program has interactive + * interfaces that do not display Appropriate Legal Notices, your + * work need not make them do so. + * + * A compilation of a covered work with other separate and independent + * works, which are not by their nature extensions of the covered work, + * and which are not combined with it such as to form a larger program, + * in or on a volume of a storage or distribution medium, is called an + * "aggregate" if the compilation and its resulting copyright are not + * used to limit the access or legal rights of the compilation's users + * beyond what the individual works permit. Inclusion of a covered work + * in an aggregate does not cause this License to apply to the other + * parts of the aggregate. + * + * 6. Conveying Non-Source Forms. + * + * You may convey a covered work in object code form under the terms + * of sections 4 and 5, provided that you also convey the + * machine-readable Corresponding Source under the terms of this License, + * in one of these ways: + * + * a) Convey the object code in, or embodied in, a physical product + * (including a physical distribution medium), accompanied by the + * Corresponding Source fixed on a durable physical medium + * customarily used for software interchange. + * + * b) Convey the object code in, or embodied in, a physical product + * (including a physical distribution medium), accompanied by a + * written offer, valid for at least three years and valid for as + * long as you offer spare parts or customer support for that product + * model, to give anyone who possesses the object code either (1) a + * copy of the Corresponding Source for all the software in the + * product that is covered by this License, on a durable physical + * medium customarily used for software interchange, for a price no + * more than your reasonable cost of physically performing this + * conveying of source, or (2) access to copy the + * Corresponding Source from a network server at no charge. + * + * c) Convey individual copies of the object code with a copy of the + * written offer to provide the Corresponding Source. This + * alternative is allowed only occasionally and noncommercially, and + * only if you received the object code with such an offer, in accord + * with subsection 6b. + * + * d) Convey the object code by offering access from a designated + * place (gratis or for a charge), and offer equivalent access to the + * Corresponding Source in the same way through the same place at no + * further charge. You need not require recipients to copy the + * Corresponding Source along with the object code. If the place to + * copy the object code is a network server, the Corresponding Source + * may be on a different server (operated by you or a third party) + * that supports equivalent copying facilities, provided you maintain + * clear directions next to the object code saying where to find the + * Corresponding Source. Regardless of what server hosts the + * Corresponding Source, you remain obligated to ensure that it is + * available for as long as needed to satisfy these requirements. + * + * e) Convey the object code using peer-to-peer transmission, provided + * you inform other peers where the object code and Corresponding + * Source of the work are being offered to the general public at no + * charge under subsection 6d. + * + * A separable portion of the object code, whose source code is excluded + * from the Corresponding Source as a System Library, need not be + * included in conveying the object code work. + * + * A "User Product" is either (1) a "consumer product", which means any + * tangible personal property which is normally used for personal, family, + * or household purposes, or (2) anything designed or sold for incorporation + * into a dwelling. In determining whether a product is a consumer product, + * doubtful cases shall be resolved in favor of coverage. For a particular + * product received by a particular user, "normally used" refers to a + * typical or common use of that class of product, regardless of the status + * of the particular user or of the way in which the particular user + * actually uses, or expects or is expected to use, the product. A product + * is a consumer product regardless of whether the product has substantial + * commercial, industrial or non-consumer uses, unless such uses represent + * the only significant mode of use of the product. + * + * "Installation Information" for a User Product means any methods, + * procedures, authorization keys, or other information required to install + * and execute modified versions of a covered work in that User Product from + * a modified version of its Corresponding Source. The information must + * suffice to ensure that the continued functioning of the modified object + * code is in no case prevented or interfered with solely because + * modification has been made. + * + * If you convey an object code work under this section in, or with, or + * specifically for use in, a User Product, and the conveying occurs as + * part of a transaction in which the right of possession and use of the + * User Product is transferred to the recipient in perpetuity or for a + * fixed term (regardless of how the transaction is characterized), the + * Corresponding Source conveyed under this section must be accompanied + * by the Installation Information. But this requirement does not apply + * if neither you nor any third party retains the ability to install + * modified object code on the User Product (for example, the work has + * been installed in ROM). + * + * The requirement to provide Installation Information does not include a + * requirement to continue to provide support service, warranty, or updates + * for a work that has been modified or installed by the recipient, or for + * the User Product in which it has been modified or installed. Access to a + * network may be denied when the modification itself materially and + * adversely affects the operation of the network or violates the rules and + * protocols for communication across the network. + * + * Corresponding Source conveyed, and Installation Information provided, + * in accord with this section must be in a format that is publicly + * documented (and with an implementation available to the public in + * source code form), and must require no special password or key for + * unpacking, reading or copying. + * + * 7. Additional Terms. + * + * "Additional permissions" are terms that supplement the terms of this + * License by making exceptions from one or more of its conditions. + * Additional permissions that are applicable to the entire Program shall + * be treated as though they were included in this License, to the extent + * that they are valid under applicable law. If additional permissions + * apply only to part of the Program, that part may be used separately + * under those permissions, but the entire Program remains governed by + * this License without regard to the additional permissions. + * + * When you convey a copy of a covered work, you may at your option + * remove any additional permissions from that copy, or from any part of + * it. (Additional permissions may be written to require their own + * removal in certain cases when you modify the work.) You may place + * additional permissions on material, added by you to a covered work, + * for which you have or can give appropriate copyright permission. + * + * Notwithstanding any other provision of this License, for material you + * add to a covered work, you may (if authorized by the copyright holders of + * that material) supplement the terms of this License with terms: + * + * a) Disclaiming warranty or limiting liability differently from the + * terms of sections 15 and 16 of this License; or + * + * b) Requiring preservation of specified reasonable legal notices or + * author attributions in that material or in the Appropriate Legal + * Notices displayed by works containing it; or + * + * c) Prohibiting misrepresentation of the origin of that material, or + * requiring that modified versions of such material be marked in + * reasonable ways as different from the original version; or + * + * d) Limiting the use for publicity purposes of names of licensors or + * authors of the material; or + * + * e) Declining to grant rights under trademark law for use of some + * trade names, trademarks, or service marks; or + * + * f) Requiring indemnification of licensors and authors of that + * material by anyone who conveys the material (or modified versions of + * it) with contractual assumptions of liability to the recipient, for + * any liability that these contractual assumptions directly impose on + * those licensors and authors. + * + * All other non-permissive additional terms are considered "further + * restrictions" within the meaning of section 10. If the Program as you + * received it, or any part of it, contains a notice stating that it is + * governed by this License along with a term that is a further + * restriction, you may remove that term. If a license document contains + * a further restriction but permits relicensing or conveying under this + * License, you may add to a covered work material governed by the terms + * of that license document, provided that the further restriction does + * not survive such relicensing or conveying. + * + * If you add terms to a covered work in accord with this section, you + * must place, in the relevant source files, a statement of the + * additional terms that apply to those files, or a notice indicating + * where to find the applicable terms. + * + * Additional terms, permissive or non-permissive, may be stated in the + * form of a separately written license, or stated as exceptions; + * the above requirements apply either way. + * + * 8. Termination. + * + * You may not propagate or modify a covered work except as expressly + * provided under this License. Any attempt otherwise to propagate or + * modify it is void, and will automatically terminate your rights under + * this License (including any patent licenses granted under the third + * paragraph of section 11). + * + * However, if you cease all violation of this License, then your + * license from a particular copyright holder is reinstated (a) + * provisionally, unless and until the copyright holder explicitly and + * finally terminates your license, and (b) permanently, if the copyright + * holder fails to notify you of the violation by some reasonable means + * prior to 60 days after the cessation. + * + * Moreover, your license from a particular copyright holder is + * reinstated permanently if the copyright holder notifies you of the + * violation by some reasonable means, this is the first time you have + * received notice of violation of this License (for any work) from that + * copyright holder, and you cure the violation prior to 30 days after + * your receipt of the notice. + * + * Termination of your rights under this section does not terminate the + * licenses of parties who have received copies or rights from you under + * this License. If your rights have been terminated and not permanently + * reinstated, you do not qualify to receive new licenses for the same + * material under section 10. + * + * 9. Acceptance Not Required for Having Copies. + * + * You are not required to accept this License in order to receive or + * run a copy of the Program. Ancillary propagation of a covered work + * occurring solely as a consequence of using peer-to-peer transmission + * to receive a copy likewise does not require acceptance. However, + * nothing other than this License grants you permission to propagate or + * modify any covered work. These actions infringe copyright if you do + * not accept this License. Therefore, by modifying or propagating a + * covered work, you indicate your acceptance of this License to do so. + * + * 10. Automatic Licensing of Downstream Recipients. + * + * Each time you convey a covered work, the recipient automatically + * receives a license from the original licensors, to run, modify and + * propagate that work, subject to this License. You are not responsible + * for enforcing compliance by third parties with this License. + * + * An "entity transaction" is a transaction transferring control of an + * organization, or substantially all assets of one, or subdividing an + * organization, or merging organizations. If propagation of a covered + * work results from an entity transaction, each party to that + * transaction who receives a copy of the work also receives whatever + * licenses to the work the party's predecessor in interest had or could + * give under the previous paragraph, plus a right to possession of the + * Corresponding Source of the work from the predecessor in interest, if + * the predecessor has it or can get it with reasonable efforts. + * + * You may not impose any further restrictions on the exercise of the + * rights granted or affirmed under this License. For example, you may + * not impose a license fee, royalty, or other charge for exercise of + * rights granted under this License, and you may not initiate litigation + * (including a cross-claim or counterclaim in a lawsuit) alleging that + * any patent claim is infringed by making, using, selling, offering for + * sale, or importing the Program or any portion of it. + * + * 11. Patents. + * + * A "contributor" is a copyright holder who authorizes use under this + * License of the Program or a work on which the Program is based. The + * work thus licensed is called the contributor's "contributor version". + * + * A contributor's "essential patent claims" are all patent claims + * owned or controlled by the contributor, whether already acquired or + * hereafter acquired, that would be infringed by some manner, permitted + * by this License, of making, using, or selling its contributor version, + * but do not include claims that would be infringed only as a + * consequence of further modification of the contributor version. For + * purposes of this definition, "control" includes the right to grant + * patent sublicenses in a manner consistent with the requirements of + * this License. + * + * Each contributor grants you a non-exclusive, worldwide, royalty-free + * patent license under the contributor's essential patent claims, to + * make, use, sell, offer for sale, import and otherwise run, modify and + * propagate the contents of its contributor version. + * + * In the following three paragraphs, a "patent license" is any express + * agreement or commitment, however denominated, not to enforce a patent + * (such as an express permission to practice a patent or covenant not to + * sue for patent infringement). To "grant" such a patent license to a + * party means to make such an agreement or commitment not to enforce a + * patent against the party. + * + * If you convey a covered work, knowingly relying on a patent license, + * and the Corresponding Source of the work is not available for anyone + * to copy, free of charge and under the terms of this License, through a + * publicly available network server or other readily accessible means, + * then you must either (1) cause the Corresponding Source to be so + * available, or (2) arrange to deprive yourself of the benefit of the + * patent license for this particular work, or (3) arrange, in a manner + * consistent with the requirements of this License, to extend the patent + * license to downstream recipients. "Knowingly relying" means you have + * actual knowledge that, but for the patent license, your conveying the + * covered work in a country, or your recipient's use of the covered work + * in a country, would infringe one or more identifiable patents in that + * country that you have reason to believe are valid. + * + * If, pursuant to or in connection with a single transaction or + * arrangement, you convey, or propagate by procuring conveyance of, a + * covered work, and grant a patent license to some of the parties + * receiving the covered work authorizing them to use, propagate, modify + * or convey a specific copy of the covered work, then the patent license + * you grant is automatically extended to all recipients of the covered + * work and works based on it. + * + * A patent license is "discriminatory" if it does not include within + * the scope of its coverage, prohibits the exercise of, or is + * conditioned on the non-exercise of one or more of the rights that are + * specifically granted under this License. You may not convey a covered + * work if you are a party to an arrangement with a third party that is + * in the business of distributing software, under which you make payment + * to the third party based on the extent of your activity of conveying + * the work, and under which the third party grants, to any of the + * parties who would receive the covered work from you, a discriminatory + * patent license (a) in connection with copies of the covered work + * conveyed by you (or copies made from those copies), or (b) primarily + * for and in connection with specific products or compilations that + * contain the covered work, unless you entered into that arrangement, + * or that patent license was granted, prior to 28 March 2007. + * + * Nothing in this License shall be construed as excluding or limiting + * any implied license or other defenses to infringement that may + * otherwise be available to you under applicable patent law. + * + * 12. No Surrender of Others' Freedom. + * + * If conditions are imposed on you (whether by court order, agreement or + * otherwise) that contradict the conditions of this License, they do not + * excuse you from the conditions of this License. If you cannot convey a + * covered work so as to satisfy simultaneously your obligations under this + * License and any other pertinent obligations, then as a consequence you may + * not convey it at all. For example, if you agree to terms that obligate you + * to collect a royalty for further conveying from those to whom you convey + * the Program, the only way you could satisfy both those terms and this + * License would be to refrain entirely from conveying the Program. + * + * 13. Remote Network Interaction; Use with the GNU General Public License. + * + * Notwithstanding any other provision of this License, if you modify the + * Program, your modified version must prominently offer all users + * interacting with it remotely through a computer network (if your version + * supports such interaction) an opportunity to receive the Corresponding + * Source of your version by providing access to the Corresponding Source + * from a network server at no charge, through some standard or customary + * means of facilitating copying of software. This Corresponding Source + * shall include the Corresponding Source for any work covered by version 3 + * of the GNU General Public License that is incorporated pursuant to the + * following paragraph. + * + * Notwithstanding any other provision of this License, you have + * permission to link or combine any covered work with a work licensed + * under version 3 of the GNU General Public License into a single + * combined work, and to convey the resulting work. The terms of this + * License will continue to apply to the part which is the covered work, + * but the work with which it is combined will remain governed by version + * 3 of the GNU General Public License. + * + * 14. Revised Versions of this License. + * + * The Free Software Foundation may publish revised and/or new versions of + * the GNU Affero General Public License from time to time. Such new versions + * will be similar in spirit to the present version, but may differ in detail to + * address new problems or concerns. + * + * Each version is given a distinguishing version number. If the + * Program specifies that a certain numbered version of the GNU Affero General + * Public License "or any later version" applies to it, you have the + * option of following the terms and conditions either of that numbered + * version or of any later version published by the Free Software + * Foundation. If the Program does not specify a version number of the + * GNU Affero General Public License, you may choose any version ever published + * by the Free Software Foundation. + * + * If the Program specifies that a proxy can decide which future + * versions of the GNU Affero General Public License can be used, that proxy's + * public statement of acceptance of a version permanently authorizes you + * to choose that version for the Program. + * + * Later license versions may give you additional or different + * permissions. However, no additional obligations are imposed on any + * author or copyright holder as a result of your choosing to follow a + * later version. + * + * 15. Disclaimer of Warranty. + * + * THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY + * APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT + * HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY + * OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM + * IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF + * ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + * + * 16. Limitation of Liability. + * + * IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING + * WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS + * THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY + * GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE + * USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF + * DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD + * PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), + * EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES. + * + * 17. Interpretation of Sections 15 and 16. + * + * If the disclaimer of warranty and limitation of liability provided + * above cannot be given local legal effect according to their terms, + * reviewing courts shall apply local law that most closely approximates + * an absolute waiver of all civil liability in connection with the + * Program, unless a warranty or assumption of liability accompanies a + * copy of the Program in return for a fee. + * + * END OF TERMS AND CONDITIONS + * + * How to Apply These Terms to Your New Programs + * + * If you develop a new program, and you want it to be of the greatest + * possible use to the public, the best way to achieve this is to make it + * free software which everyone can redistribute and change under these terms. + * + * To do so, attach the following notices to the program. It is safest + * to attach them to the start of each source file to most effectively + * state the exclusion of warranty; and each file should have at least + * the "copyright" line and a pointer to where the full notice is found. + * + * + * Copyright (C) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * Also add information on how to contact you by electronic and paper mail. + * + * If your software can interact with users remotely through a computer + * network, you should also make sure that it provides a way for users to + * get its source. For example, if your program is a web application, its + * interface could display a "Source" link that leads users to an archive + * of the code. There are many ways you could offer source, and different + * solutions will be better for different programs; see section 13 for the + * specific requirements. + * + * You should also get your employer (if you work as a programmer) or school, + * if any, to sign a "copyright disclaimer" for the program, if necessary. + * For more information on this, and how to apply and follow the GNU AGPL, see + * . + */ +package org.jlawyer.utils.ocr; + +import org.jlawyer.search.*; + +/** + * + * @author jens + */ +public class OcrRequest extends SearchHit { + + private String absolutePath; + + public OcrRequest(String fileAbsolutePath) { + this.absolutePath=fileAbsolutePath; + + } + + /** + * @return the absolutePath + */ + public String getAbsolutePath() { + return absolutePath; + } + + /** + * @param absolutePath the absolutePath to set + */ + public void setAbsolutePath(String absolutePath) { + this.absolutePath = absolutePath; + } + +} diff --git a/j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/utils/ocr/OcrUtils.java b/j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/utils/ocr/OcrUtils.java new file mode 100644 index 000000000..29c7410c0 --- /dev/null +++ b/j-lawyer-server/j-lawyer-server-ejb/src/java/org/jlawyer/utils/ocr/OcrUtils.java @@ -0,0 +1,782 @@ +/* + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. + */ +package org.jlawyer.utils.ocr; + +import com.jdimension.jlawyer.pojo.FileMetadata; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Properties; +import org.apache.log4j.Logger; + +/** + * + * @author jens + */ +public class OcrUtils { + + private static final Logger log=Logger.getLogger(OcrUtils.class.getName()); + + private static final String METAPROPERTIES_KEY_FILENAME = "file.name"; + private static final String METAPROPERTIES_KEY_FILESIZE = "file.size"; + private static final String METAPROPERTIES_KEY_OCRSTATUS = "file.ocrstatus"; + + + public static void performOcr(File inputFile, File outputFile) throws Exception { + String[] cmd = new String[]{ + "ocrmypdf", + "--skip-text", + inputFile.getAbsolutePath(), + outputFile.getAbsolutePath() + }; + + log.info("OCR command line arguments for file " + inputFile.getAbsolutePath() + ": "); + for (String str : cmd) { + log.info(" " + str); + } + + int exitCode = 0; + ProcessBuilder processBuilder = new ProcessBuilder(cmd); + processBuilder.redirectErrorStream(true); // Redirect standard error to standard output + + log.info("OCR output:"); + try { + Process process = processBuilder.start(); + + // Read standard output and standard error streams + InputStream inputStream = process.getInputStream(); + BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); + + String line; + while ((line = reader.readLine()) != null) { + log.info(" " + line); + } + + // Wait for the process to finish and get the exit code + exitCode = process.waitFor(); + log.info("OCR exit code for file " + inputFile.getAbsolutePath() + ": " + exitCode); + + } catch (IOException | InterruptedException ex) { + log.error("OCR failed", ex); + exitCode = -1; // Set a custom exit code to indicate an error + } + + } + + public static boolean hasMetadata(File f) { + File metadata = new File(f.getAbsolutePath() + ".metadata"); + return metadata.exists(); + } + + public static void updateOcrStatus(File f, int newStatus) throws Exception { + Properties metaProperties = new Properties(); + FileReader fr = new FileReader(f.getAbsolutePath() + ".metadata"); + metaProperties.load(fr); + fr.close(); + metaProperties.put(METAPROPERTIES_KEY_OCRSTATUS, "" + newStatus); + + FileWriter fw = new FileWriter(f.getAbsolutePath() + ".metadata"); + metaProperties.store(fw, null); + fw.close(); + + } + + public static FileMetadata generateMetadata(File f) throws Exception { + File metadata = new File(f.getAbsolutePath() + ".metadata"); + if (!metadata.exists()) { + Properties metaProperties = new Properties(); + metaProperties.put(METAPROPERTIES_KEY_FILENAME, f.getName()); + metaProperties.put(METAPROPERTIES_KEY_FILESIZE, "" + f.length()); + metaProperties.put(METAPROPERTIES_KEY_OCRSTATUS, "" + FileMetadata.OCRSTATUS_NOTSUPPORTED); + if (f.getName().toLowerCase().endsWith(".pdf")) { + metaProperties.put(METAPROPERTIES_KEY_OCRSTATUS, "" + FileMetadata.OCRSTATUS_PROCESSING); + } + FileWriter fw = new FileWriter(metadata); + metaProperties.store(fw, null); + fw.close(); + } + return getMetadata(f); + + } + + public static FileMetadata getMetadata(File f) throws Exception { + FileMetadata metadata = null; + if (hasMetadata(f)) { + Properties metaProperties = new Properties(); + FileReader fr = new FileReader(f.getAbsolutePath() + ".metadata"); + metaProperties.load(fr); + fr.close(); + metadata = new FileMetadata(); + metadata.setFileName(f.getName()); + metadata.setFileSize(f.length()); + metadata.setOcrStatus(Integer.parseInt(metaProperties.get(METAPROPERTIES_KEY_OCRSTATUS).toString())); + } + return metadata; + } + +} diff --git a/j-lawyer-server/j-lawyer-server-war/src/java/com/jdimension/jlawyer/timer/TransientTimer.java b/j-lawyer-server/j-lawyer-server-war/src/java/com/jdimension/jlawyer/timer/TransientTimer.java index 90fc512a2..b6f4e2b1a 100755 --- a/j-lawyer-server/j-lawyer-server-war/src/java/com/jdimension/jlawyer/timer/TransientTimer.java +++ b/j-lawyer-server/j-lawyer-server-war/src/java/com/jdimension/jlawyer/timer/TransientTimer.java @@ -715,7 +715,7 @@ public void start() { timerObserver = new Timer(); // start after 20s and run every 12s - timerObserver.schedule(new DirectoryObserverTask(), 20000l, 12000l); + timerObserver.schedule(new DirectoryObserverTask(), 20000l, 10000l); }