Skip to content

Commit

Permalink
Metadata logstash (#16)
Browse files Browse the repository at this point in the history
* Adding new metadata pipeline
  • Loading branch information
PascalLike authored Mar 17, 2023
1 parent 790b315 commit 2b2177f
Show file tree
Hide file tree
Showing 7 changed files with 268 additions and 190 deletions.
44 changes: 22 additions & 22 deletions ES/add_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,28 +50,28 @@ done
python3 /load_es_data.py /in/masked.geojson fid

# Waiting for geopackage_pusher container to create the file
while ! test -f "/in/activity_level_ldn.geojson"; do
sleep 10
echo "Still waiting"
done
python3 /load_es_data.py /in/activity_level_ldn.geojson GSS_CODE

# Waiting for geopackage_pusher container to create the file
while ! test -f "/in/cardivasular_disease_ldn.geojson"; do
sleep 10
echo "Still waiting"
done
python3 /load_es_data.py /in/cardivasular_disease_ldn.geojson GSS_CODE

# Waiting for geopackage_pusher container to create the file
while ! test -f "/in/tweet_count_sample.geojson"; do
sleep 10
echo "Still waiting"
done
python3 /load_es_data.py /in/tweet_count_sample.geojson id

echo "Pushing EC metadata"
python3 /load_es_data.py /in/metadata/ec_catalog.geojson id
# while ! test -f "/in/activity_level_ldn.geojson"; do
# sleep 10
# echo "Still waiting"
# done
# python3 /load_es_data.py /in/activity_level_ldn.geojson GSS_CODE

# # Waiting for geopackage_pusher container to create the file
# while ! test -f "/in/cardivasular_disease_ldn.geojson"; do
# sleep 10
# echo "Still waiting"
# done
# python3 /load_es_data.py /in/cardivasular_disease_ldn.geojson GSS_CODE

# # Waiting for geopackage_pusher container to create the file
# while ! test -f "/in/tweet_count_sample.geojson"; do
# sleep 10
# echo "Still waiting"
# done
# python3 /load_es_data.py /in/tweet_count_sample.geojson id

# echo "Pushing EC metadata"
# python3 /load_es_data.py /in/metadata/ec_catalog.geojson id

echo "Loading igot data"
python3 /load_es_data.py /in/a0000000a.geojson OBJECTID
Expand Down
2 changes: 2 additions & 0 deletions docker-compose-local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ services:
- elastic_search
depends_on:
- elastic_search
volumes:
- ./logstash/data:/in

geopackage_pusher:
build: ./gdal
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ services:
- elastic_search
depends_on:
- elastic_search
volumes:
- ./logstash/data:/in

networks:
- default
Expand Down
4 changes: 2 additions & 2 deletions logstash/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FROM docker.elastic.co/logstash/logstash:7.5.1
FROM docker.elastic.co/logstash/logstash:8.4.1

LABEL maintainer="[email protected]"

COPY masked_metadata.conf /usr/share/logstash/pipeline/masked_metadata.conf
COPY ec_metadata_harvester.conf /usr/share/logstash/pipeline/ec_metadata_harvester.conf
157 changes: 157 additions & 0 deletions logstash/ec_metadata_harvester.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# Connection to Elasticsearch
input {
file {
path => "/in/metadata.csv"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
# Transform
filter {
csv {
separator => "|"
skip_header => "true"
columns =>["id", "title", "description", "license", "contactPoint_name", "contactPoint_institution", "contactPoint_email", "publisher", "keywords", "themes", "bbox", "geometry_coordinates", "temporal_interval", "features_endpoint", "tiles_endpoint", "wms_endpoint"]
}

mutate {
add_field => [ "[properties][id]", "%{id}",
"recordCreated", "%{@timestamp}",
"recordUpdated", "%{@timestamp}",
"[properties][created]", "%{@timestamp}",
"[properties][updated]", "%{@timestamp}",
"[properties][type]", "Feature",
"[properties][title]", "%{title}",
"[properties][description]", "%{description}",
"[properties][language]", "en",
"[properties][publisher]", "%{publisher}",
"[properties][formats]", "HTML",
"[properties][formats]", "GeoJSON",
"[properties][contactPoint][institution]", "%{contactPoint_institution}"
]
}

if [features_endpoint] {
mutate {
add_field => ["[properties][associations][0][href]", "%{features_endpoint}"]
add_field => ["[properties][associations][0][rel]", "item"]
add_field => ["[properties][associations][0][type]", "application/geo+json"]
add_field => ["[properties][associations][0][title]", "%{title} - OGC API Features"]
}
}

if [tiles_endpoint] {
mutate {
add_field => ["[properties][associations][1][href]", "%{tiles_endpoint}"]
add_field => ["[properties][associations][1][rel]", "item"]
add_field => ["[properties][associations][1][type]", "application/geo+json"]
add_field => ["[properties][associations][1][title]", "%{title} - OGC API Tiles"]
}
}

if [wms_endpoint] {
mutate {
add_field => ["[properties][associations][2][href]", "%{wms_endpoint}"]
add_field => ["[properties][associations][2][rel]", "item"]
add_field => ["[properties][associations][2][type]", "OGC:WMS"]
add_field => ["[properties][associations][2][title]", "%{title} - WMS"]
}
}

if [contactPoint_name] {
mutate { add_field => ["[properties][contactPoint][name]", "%{contactPoint_name}"] }
}

if [contactPoint_email] {
mutate { add_field => ["[properties][contactPoint][email]", "%{contactPoint_email}"] }
}

if [keywords] {
mutate {
split => { "keywords" => ";" }
}
mutate {
convert => { "keywords" => "string" }
}
mutate {
merge => ["[properties][keywords]", "keywords"]
}
}

if [themes] {
mutate {
split => { "themes" => ";" }
}
mutate {
convert => { "themes" => "string" }
}
mutate {
merge => ["[properties][themes][concepts]", "themes"]
}
}

json {
source => "bbox"
target => "[properties][extent][spatial][bbox]"
}

mutate {
add_field => { "[properties][extent][spatial][crs]" => "http://www.opengis.net/def/crs/OGC/1.3/CRS84" }
add_field => { "[geometry][type]" => "Polygon" }
add_field => { "[temporal][trs]" => "http://www.opengis.net/def/uom/ISO-8601/0/Gregorian" }
}



json {
source => "geometry_coordinates"
target => "[geometry][coordinates]"
}

if [temporal_interval] {
json {
source => "temporal_interval"
target => "[temporal][interval]"
}
}

mutate {
remove_field => [
"title",
"description",
"license",
"contactPoint_name",
"contactPoint_institution",
"contactPoint_email",
"publisher",
"keywords",
"themes",
"bbox",
"geom",
"geometry_coordinates",
"temporal_interval",
"features_endpoint",
"tiles_endpoint",
"wms_endpoint",
"message",
"event",
"original",
"log",
"host"]
}
}
# Output to Elasticsearch
output {
elasticsearch {
hosts => ["elastic:9200"]
#user => "elastic"
#password => "elastic12pr2"
index => "ec_catalog"
#template_overwrite => true
#template_name => "metadata_template"
# ilm_enabled => false
# manage_template => false
document_id => "%{[id]}"
doc_as_upsert => true
}
}
Loading

0 comments on commit 2b2177f

Please sign in to comment.