Skip to content

Commit

Permalink
Added hdtCat
Browse files Browse the repository at this point in the history
  • Loading branch information
D063520 committed Jul 13, 2018
1 parent 6a76c03 commit 9b7d2c8
Show file tree
Hide file tree
Showing 66 changed files with 449,652 additions and 40 deletions.
5 changes: 4 additions & 1 deletion README
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,12 @@ Authors
Mario Arias <mario.arias@gmailcom>
Javier D. Fernandez <[email protected]>
Miguel A. Martinez-Prieto <[email protected]>
Dennis Diefenbach <[email protected]>
Jose Gimenez Garcia: <[email protected]>

[contributors](https://github.com/WDAqua/Trill/graphs/contributors)

Acknowledgements
================

RDF/HDT is a project developed by the Insight Centre for Data Analytics (www.insight-centre.org), University of Valladolid (www.uva.es), University of Chile (www.uchile.cl). Funded by Science Foundation Ireland: Grant No. SFI/08/CE/I1380, Lion-II; the Spanish Ministry of Economy and Competitiveness (TIN2009-14009-C02-02); and Chilean Fondecyt's 1110287 and 1-110066.
RDF/HDT is a project developed by the Insight Centre for Data Analytics (www.insight-centre.org), University of Valladolid (www.uva.es), University of Chile (www.uchile.cl), University of Saint-Etienne (www.univ-st-etienne.fr). Funded by Science Foundation Ireland: Grant No. SFI/08/CE/I1380, Lion-II; the Spanish Ministry of Economy and Competitiveness (TIN2009-14009-C02-02); Chilean Fondecyt's 1110287 and 1-110066; and the European Union's Horizon 2020 research and innovation program under the Marie Sklodowska-Curie grant agreement No 642795.
18 changes: 17 additions & 1 deletion hdt-api/src/main/java/org/rdfhdt/hdt/hdt/HDTManager.java
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.rdfhdt.hdt.hdt;
package org.rdfhdt.hdt.hdt;

import java.io.IOException;
import java.io.InputStream;
Expand Down Expand Up @@ -225,6 +225,20 @@ public static TripleWriter getHDTWriter(String outFile, String baseURI, HDTOptio
return HDTManager.getInstance().doGetHDTWriter(outFile, baseURI, hdtFormat);
}

/**
* Create an HDT file from two HDT files by joining the triples.
* @param location where the new HDT file is stored
* @param hdtFileName1 First hdt file name
* @param hdtFileName2 Second hdt file name
* @param hdtFormat Parameters to tune the generated HDT.
* @param listener Listener to get notified of loading progress. Can be null if no notifications needed.
* @return
* @throws IOException
*/
public static HDT catHDT(String location, String hdtFileName1, String hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException {
return HDTManager.getInstance().doHDTCat(location, hdtFileName1, hdtFileName2, hdtFormat, listener);
}

// Abstract methods for the current implementation
protected abstract HDTOptions doReadOptions(String file) throws IOException;
protected abstract HDT doLoadHDT(String hdtFileName, ProgressListener listener) throws IOException;
Expand All @@ -238,4 +252,6 @@ public static TripleWriter getHDTWriter(String outFile, String baseURI, HDTOptio
protected abstract HDT doGenerateHDT(Iterator<TripleString> iterator, String baseURI, HDTOptions hdtFormat, ProgressListener listener) throws IOException;
protected abstract TripleWriter doGetHDTWriter(OutputStream out, String baseURI, HDTOptions hdtFormat) throws IOException;
protected abstract TripleWriter doGetHDTWriter(String outFile, String baseURI, HDTOptions hdtFormat) throws IOException;
protected abstract HDT doHDTCat(String location, String hdtFileName1, String hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException;

}
27 changes: 27 additions & 0 deletions hdt-java-cli/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#HDT-java-cli

This module contains some command line tool to manipulate hdt files namely:

- rdf2hdt: allows to compress an RDF file in N-triples, turtle and other popular formats to HDT

./bin/rdf2hdt.sh file.nt file.hdt

- hdt2rdf: allows to decompress an RDF file from HDT to N-triples

./bin/hdt2rdf.sh file.hdt file.nt

- hdtSearch: allows to search fast for triple patterns over HDT files

./bin/hdtSearch.sh file.hdt

- hdtCat: allows to merge two HDT files to one HDT file

./bin/hdtCat.sh file.hdt file2.hdt fileCat.hdt

- hdtInfo: shows some statistics of the RDF file

./bin/hdtInfo.sh file.hdt

- hdtVerify: checks if the HDT file is correct

./bin/hdtVerify.sh file.hdt
Empty file modified hdt-java-cli/bin/hdt2rdf.sh
100644 → 100755
Empty file.
8 changes: 8 additions & 0 deletions hdt-java-cli/bin/hdtCat.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

source `dirname $0`/javaenv.sh

export MAVEN_OPTS="-Xmx6g"
mvn exec:java -Dexec.mainClass="org.rdfhdt.hdt.tools.HDTCat" -Dexec.args="$*"

exit $?
Empty file modified hdt-java-cli/bin/hdtInfo.sh
100644 → 100755
Empty file.
Empty file modified hdt-java-cli/bin/hdtSearch.sh
100644 → 100755
Empty file.
Empty file modified hdt-java-cli/bin/hdtVerify.sh
100644 → 100755
Empty file.
Empty file modified hdt-java-cli/bin/javaenv.sh
100644 → 100755
Empty file.
Empty file modified hdt-java-cli/bin/rdf2hdt.sh
100644 → 100755
Empty file.
155 changes: 155 additions & 0 deletions hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HDTCat.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/**
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Contacting the authors:
* Dennis Diefenbach: [email protected]
*/

package org.rdfhdt.hdt.tools;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.internal.Lists;

import org.rdfhdt.hdt.exceptions.ParserException;
import org.rdfhdt.hdt.hdt.HDT;
import org.rdfhdt.hdt.hdt.HDTManager;
import org.rdfhdt.hdt.hdt.HDTVersion;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.options.HDTSpecification;
import org.rdfhdt.hdt.util.StopWatch;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;

/**
* @author Dennis Diefenbach
*
*/
public class HDTCat implements ProgressListener {

public String hdtInput1;
public String hdtInput2;
public String hdtOutput;

@Parameter(description = "<input HDT1> <input HDT2> <output HDT>")
public List<String> parameters = Lists.newArrayList();

@Parameter(names = "-options", description = "HDT Conversion options (override those of config file)")
public String options;

@Parameter(names = "-config", description = "Conversion config file")
public String configFile;

@Parameter(names = "-index", description = "Generate also external indices to solve all queries")
public boolean generateIndex;

@Parameter(names = "-version", description = "Prints the HDT version number")
public static boolean showVersion;

@Parameter(names = "-quiet", description = "Do not show progress of the conversion")
public boolean quiet;

public void execute() throws ParserException, IOException {

HDTSpecification spec;
if(configFile!=null) {
spec = new HDTSpecification(configFile);
} else {
spec = new HDTSpecification();
}
if(options!=null) {
spec.setOptions(options);
}

File file = new File(hdtOutput);
File theDir = new File(file.getAbsolutePath()+"_tmp");
theDir.mkdirs();
String location = theDir.getAbsolutePath()+"/";
HDT hdt = HDTManager.catHDT(location,hdtInput1, hdtInput2 , spec,this);


try {
// Show Basic stats
if(!quiet){
System.out.println("Total Triples: "+hdt.getTriples().getNumberOfElements());
System.out.println("Different subjects: "+hdt.getDictionary().getNsubjects());
System.out.println("Different predicates: "+hdt.getDictionary().getNpredicates());
System.out.println("Different objects: "+hdt.getDictionary().getNobjects());
System.out.println("Common Subject/Object:"+hdt.getDictionary().getNshared());
}

// Dump to HDT file
StopWatch sw = new StopWatch();
hdt.saveToHDT(hdtOutput, this);
System.out.println("HDT saved to file in: "+sw.stopAndShow());
Files.delete(Paths.get(location+"dictionary"));
Files.delete(Paths.get(location+"triples"));
theDir.delete();


// Generate index and dump it to .hdt.index file
sw.reset();
if(generateIndex) {
hdt = HDTManager.indexedHDT(hdt,this);
System.out.println("Index generated and saved in: "+sw.stopAndShow());
}
} finally {
if(hdt!=null) hdt.close();
}

// Debug all inserted triples
//HdtSearch.iterate(hdt, "","","");
}

/* (non-Javadoc)
* @see hdt.ProgressListener#notifyProgress(float, java.lang.String)
*/
@Override
public void notifyProgress(float level, String message) {
if(!quiet) {
System.out.print("\r"+message + "\t"+ Float.toString(level)+" \r");
}
}

public static void main(String[] args) throws Throwable {
HDTCat hdtCat = new HDTCat();
System.out.println("Welcome to hdtCat!");
System.out.println("This tool was developed by Dennis Diefenbach and Jośe M. Giḿenez-Garćıa");
System.out.println("NOTE: this tool is not working under WINDOWS! This is a well-known BUG!");
JCommander com = new JCommander(hdtCat, args);
com.setProgramName("hdtCat");

if(hdtCat.parameters.size()==3) {
hdtCat.hdtInput1 = hdtCat.parameters.get(0);
hdtCat.hdtInput2 = hdtCat.parameters.get(1);
hdtCat.hdtOutput = hdtCat.parameters.get(2);
} else if (showVersion){
System.out.println(HDTVersion.get_version_string("."));
System.exit(0);
}
else{
com.usage();
System.exit(1);
}

System.out.println("Cat "+ hdtCat.hdtInput1+" and "+ hdtCat.hdtInput2+" to "+ hdtCat.hdtOutput);

hdtCat.execute();
}
}
Loading

0 comments on commit 9b7d2c8

Please sign in to comment.