-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #77 from dice-group/develop
Lastest developments from Develop
- Loading branch information
Showing
401 changed files
with
290,466 additions
and
11,704 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
FROM openjdk:8u151-jdk | ||
|
||
RUN apt-get update && apt-get install -y netcat | ||
|
||
COPY ./squirrel.frontier/target/squirrel.frontier.jar /data/squirrel/squirrel.jar | ||
COPY ./spring-config/default-config.xml /data/squirrel/default-config.xml | ||
WORKDIR /data/squirrel | ||
|
||
#ADD entrypoint.sh /entrypoint.sh | ||
#RUN chmod +x /entrypoint.sh | ||
|
||
VOLUME ["/var/squirrel/data"] | ||
|
||
CMD java -cp squirrel.jar:. org.hobbit.core.run.ComponentStarter org.aksw.simba.squirrel.components.FrontierComponent |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
FROM openjdk:8u151-jdk | ||
|
||
RUN apt-get update && apt-get install -y netcat | ||
|
||
COPY ./squirrel.worker/target/squirrel.worker.jar /data/squirrel/squirrel.jar | ||
COPY ./spring-config/default-config.xml /data/squirrel/default-config.xml | ||
WORKDIR /data/squirrel | ||
|
||
#ADD entrypoint.sh /entrypoint.sh | ||
#RUN chmod +x /entrypoint.sh | ||
|
||
VOLUME ["/var/squirrel/data"] | ||
|
||
CMD java -cp squirrel.jar:. org.aksw.simba.squirrel.components.WorkerComponentStarter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,18 @@ | ||
default: build | ||
|
||
build: | ||
mvn clean package -U -DskipTests -Dmaven.javadoc.skip=true | ||
docker-compose -f docker-compose-sparql.yml down | ||
mvn clean install -U -DskipTests -Dmaven.javadoc.skip=true | ||
|
||
dockerize: build | ||
docker build -t squirrel . | ||
dockerize: | ||
docker build -f Dockerfile.frontier -t squirrel.frontier . | ||
docker build -f Dockerfile.worker -t squirrel.worker . | ||
|
||
start: dockerize | ||
docker-compose -f docker-compose-sparql.yml up | ||
|
||
restart: | ||
docker-compose -f docker-compose-sparql.yml down | ||
docker-compose -f docker-compose-sparql.yml up | ||
clean: | ||
rm -rf data/worker* && rm -rf deployment/scenarios/1/worker* | ||
rm -rf data/worker* && rm -rf deployment/scenarios/1/worker* && rm -rf data/sparqlhost/sparqlhost_data/databases |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,57 @@ | ||
# Squirrel | ||
Squirrel searches and collects Linked Data | ||
|
||
# Running with docker | ||
## Running with docker | ||
|
||
### Using the Makefile... | ||
|
||
``` | ||
$ make build dockerize | ||
$ docker-compose build | ||
$ docker-compose up | ||
``` | ||
|
||
![Squirrel logo](https://hobbitdata.informatik.uni-leipzig.de/squirrel/squirrel-logo.png) | ||
======= | ||
### ... or do it manually | ||
|
||
1. ``mvn clean package shade:shade -U -DskipTests`` | ||
1. if you have a new version of squirrel, e.g. version 0.3.0, you **can** execute``mvn install:install-file -DgroupId=org.aksw.simba -DartifactId=squirrel -Dpackaging=jar -Dversion=0.3.0 -Dfile="target\original-squirrel.jar" -DgeneratePom=true -DlocalRepositoryPath=repository`` | ||
1. If you want to use the Web-Components, have a look to the Dependencies in this file | ||
1. ``docker build -t squirrel .`` | ||
1. execute a `.yml` file with ``docker-compose -f <file> up``/ ``down`` | ||
|
||
#### There are currently 3 yml-options | ||
|
||
All yml files in the root folder crawls real existing data portals with the help of [HtmlScraper](https://github.com/dice-group/Squirrel/wiki/HtmlScraper_how_to) | ||
- `docker-compose.yml`: file-sink based, without web | ||
- `docker-compose-sparql.yml`: sparql-sink based (_JENA_), without web | ||
- `docker-compose-sparql-web.yml`: sparql-sink based (_JENA_), with web including the visualization of crawled graph! | ||
|
||
--- | ||
|
||
## Dependencies | ||
|
||
### Using a Sparql-Host | ||
|
||
You can use a sparql-based triple store to store the crawled data. If you want use it, you have to do the following: | ||
|
||
Until yet, the necessary datasets in the database are not created automatically. So you have to create them by hand: | ||
1. Run Squirrel as explained above | ||
2. Enter *localhost:3030* in your browser's address line | ||
3. Go to *manage datasets* | ||
4. Click *add new dataset* | ||
5. For *Dataset name* paste *contentset* | ||
6. For *Dataset type* select *Persistent – dataset will persist across Fuseki restarts* | ||
7. Go to step 4 again and do the same, **but this time with *"Metadata"* as *"Dataset name"*** | ||
|
||
### Further dependencies | ||
|
||
The [Squirrel-Webservice](https://github.com/phhei/Squirrel-Webservice) and the [SquirrelWebObject](https://github.com/phhei/SquirrelWebObject) are included in this project, now. This leads to the fact, that this project is a multi module maven project now. For that, there are 2 pom.xml's in the root layer: | ||
- `pom.xml`: this is the module bundle pom xml. If you execute ``mvn clean package``, this file will be called. As a consequence from this, all submodules including the _squirrel_ will be complied an packed | ||
- `squirrel-pom.xml`: the pom for the _squirrel_ | ||
|
||
If you want to run the squirrel with the **Webservice**, take care that you have already the current Webservice image (Docker). If not, execute | ||
1. ``mvn clean package`` _(only necessary if you want to compile each subproject (module) for itself)_ | ||
1. (``SquirrelWebObject\install.bat``) | ||
1. ``SquirrelWebService\buildImage.bat`` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
124 changes: 124 additions & 0 deletions
124
deployment/docker-compose-simulation-scenario1-with-web.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
version: '2' | ||
|
||
services: | ||
rethinkdb: | ||
image: rethinkdb:2.3.5 | ||
# volumes: | ||
# - ./scenarios/1/rethinkdb:/data | ||
ports: | ||
- "28015:28015" | ||
|
||
# message bus | ||
rabbit: | ||
image: rabbitmq:management | ||
container_name: rabbit | ||
hostname: rabbit | ||
ports: | ||
- "15672:15672" | ||
# Forwarding the port for testing | ||
- "5672:5672" | ||
|
||
sparqlHost: | ||
image: stain/jena-fuseki | ||
container_name: sparqlHost | ||
ports: | ||
- "3030:3030" | ||
volumes: | ||
- ./scenarios/1/data/jena:/fuseki | ||
environment: | ||
- ADMIN_PASSWORD=pw123 | ||
- JVM_ARGS=-Xmx2g | ||
|
||
frontier: | ||
image: squirrel | ||
container_name: frontier | ||
environment: | ||
HOBBIT_RABBIT_HOST: rabbit | ||
SEED_FILE: /var/squirrel/data/seeds.txt | ||
RDB_HOST_NAME: rethinkdb | ||
RDB_PORT: 28015 | ||
SERVICE_PRECONDITION: "rethinkdb:28015 rabbit:5672" | ||
COMMUNICATION_WITH_WEBSERVICE: "true" | ||
VISUALIZATION_OF_CRAWLED_GRAPH: "true" | ||
volumes: | ||
- ./scenarios/1:/var/squirrel/data | ||
command: java -cp squirrel.jar org.hobbit.core.run.ComponentStarter org.aksw.simba.squirrel.components.FrontierComponent | ||
|
||
web: | ||
image: squirrel/webimage:latest | ||
container_name: web | ||
ports: | ||
- "8080:8080" | ||
|
||
worker1: | ||
image: squirrel | ||
container_name: worker1 | ||
environment: | ||
HOBBIT_RABBIT_HOST: rabbit | ||
OUTPUT_FOLDER: /var/squirrel/data | ||
SERVICE_PRECONDITION: "rethinkdb:28015 rabbit:5672" | ||
DEDUPLICATION_ACTIVE: "true" | ||
SPARQL_HOST_NAME: sparqlHost | ||
SPARQL_HOST_PORT: 3030 | ||
RDB_HOST_NAME: rethinkdb | ||
RDB_PORT: 28015 | ||
volumes: | ||
- ./scenarios/1/worker1:/var/squirrel/data | ||
command: java -cp squirrel.jar org.hobbit.core.run.ComponentStarter org.aksw.simba.squirrel.components.WorkerComponent | ||
|
||
deduplicator: | ||
image: squirrel | ||
container_name: deduplicator | ||
environment: | ||
DEDUPLICATION_ACTIVE: "true" | ||
HOBBIT_RABBIT_HOST: rabbit | ||
RDB_HOST_NAME: rethinkdb | ||
RDB_PORT: 28015 | ||
SPARQL_HOST_NAME: sparqlHost | ||
SPARQL_HOST_PORT: 3030 | ||
OUTPUT_FOLDER: /var/squirrel/data | ||
SERVICE_PRECONDITION: "rethinkdb:28015 rabbit:5672" | ||
volumes: | ||
- ./scenarios/1/deduplicator:/var/squirrel/data | ||
command: java -cp squirrel.jar org.hobbit.core.run.ComponentStarter org.aksw.simba.squirrel.components.DeduplicatorComponent | ||
|
||
SimulationNodeA: | ||
image: squirrel | ||
container_name: NodeA | ||
environment: | ||
RESOURCE_MODEL: /var/squirrel/data/nodeA.ttl | ||
RESOURCE_MODEL_LANG: N3 | ||
PORT: 80 | ||
DUMP_FILE_NAME: dump.gz | ||
USE_DEREF: "true" | ||
SERVICE_PRECONDITION: "rethinkdb:28015 rabbit:5672" | ||
volumes: | ||
- ./scenarios/1:/var/squirrel/data | ||
command: java -cp squirrel.jar org.hobbit.core.run.ComponentStarter org.aksw.simba.squirrel.components.SimpleHttpServerComponent | ||
|
||
SimulationNodeB: | ||
image: squirrel | ||
container_name: NodeB | ||
environment: | ||
RESOURCE_MODEL: /var/squirrel/data/nodeB.ttl | ||
RESOURCE_MODEL_LANG: N3 | ||
PORT: 80 | ||
USE_DEREF: "true" | ||
SERVICE_PRECONDITION: "rethinkdb:28015 rabbit:5672" | ||
volumes: | ||
- ./scenarios/1:/var/squirrel/data | ||
command: java -cp squirrel.jar org.hobbit.core.run.ComponentStarter org.aksw.simba.squirrel.components.SimpleHttpServerComponent | ||
|
||
SimulationNodeC: | ||
image: squirrel | ||
container_name: NodeC | ||
environment: | ||
RESOURCE_MODEL: /var/squirrel/data/nodeC.ttl | ||
RESOURCE_MODEL_LANG: N3 | ||
PORT: 80 | ||
DUMP_FILE_NAME: dump.gz | ||
USE_DEREF: "false" | ||
SERVICE_PRECONDITION: "rethinkdb:28015 rabbit:5672" | ||
volumes: | ||
- ./scenarios/1:/var/squirrel/data | ||
command: java -cp squirrel.jar org.hobbit.core.run.ComponentStarter org.aksw.simba.squirrel.components.SimpleHttpServerComponent |
Oops, something went wrong.