From e1d520d34a7eef3aff9ad20ded786c85572b0007 Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Tue, 19 Nov 2024 10:39:52 -0600 Subject: [PATCH] WIP: documentation --- docs/geonames_solr.md | 100 +++++++++++++++--- ..._geonames.rake => geoportal_geonames.rake} | 40 +++---- 2 files changed, 106 insertions(+), 34 deletions(-) rename lib/tasks/geoportal/{import_geonames.rake => geoportal_geonames.rake} (98%) diff --git a/docs/geonames_solr.md b/docs/geonames_solr.md index b88e9764b..094877bb0 100644 --- a/docs/geonames_solr.md +++ b/docs/geonames_solr.md @@ -1,11 +1,16 @@ -# navigate to the solr directory -ewlarson@beanburrito-5 geoportal % cd tmp/solr +# Solr Geonames Setup -# create the collection -ewlarson@beanburrito-5 geoportal % bin/solr create -c geonames +## Install Steps -# add the field types -ewlarson@beanburrito-5 geoportal % curl -X POST -H 'Content-type:application/json' http://localhost:8983/solr/geonames/schema -d '{ +### Navigate to the Solr Wrapper installation directory +`cd tmp/solr ` + +### Create the collection +`bin/solr create -c geonames` + +### Add the field types +``` +curl -X POST -H 'Content-type:application/json' http://localhost:8983/solr/geonames/schema -d '{ "add-field-type":{ "name":"tag", "class":"solr.TextField", @@ -37,29 +42,96 @@ ewlarson@beanburrito-5 geoportal % curl -X POST -H 'Content-type:application/jso "add-copy-field":{"source":"name", "dest":["name_tag"]} }' +``` + +### Add the request handler -# add the request handler -ewlarson@beanburrito-5 solr % curl -X POST -H 'Content-type:application/json' http://localhost:8983/solr/geonames/config -d '{ +``` +curl -X POST -H 'Content-type:application/json' http://localhost:8983/solr/geonames/config -d '{ "add-requesthandler" : { "name": "/tag", "class":"solr.TaggerRequestHandler", "defaults":{"field":"name_tag"} } }' +``` -# Tagger example for Minneapolis -ewlarson@beanburrito-5 geoportal % curl -X POST \ - 'http://localhost:8983/solr/geonames/tag?overlaps=NO_SUB&tagsLimit=5000&fl=geonameid,name,countrycode&wt=json&indent=on' \ +### Tagger example for Minneapolis + +``` +curl -X POST \ + 'http://localhost:8983/solr/geonames/tag?overlaps=NO_SUB&tagsLimit=5000&fl=geonameid_i,name,countrycode&wt=json&indent=on' \ -H 'Content-Type:text/plain' -d 'Minneapolis' +``` + +### Lat/Long search example for Minneapolis -# Lat/Long search example for Minneapolis +``` curl -v -X POST \ 'http://localhost:8983/solr/geonames/select?q=*:*&fq=%7B!bbox%20sfield=location_p%7D&pt=44.9135128,-93.2802394&d=1' +``` + +### Lat/Long search example for Minneapolis sorted by distance descending -# Lat/Long search example for Minneapolis sorted by distance descending +``` curl -v -X POST \ 'http://localhost:8983/solr/geonames/select?q=*:*&fq=%7B!bbox%20sfield=location_p%7D&pt=44.9135128,-93.2802394&d=1&sort=geodist(location_p,44.9135128,-93.2802394)%20desc' +``` + +### Lat/Long search example for Minneapolis sorted by distance ascending and including distance + +``` +curl -v -X POST \ + 'http://localhost:8983/solr/geonames/select?q=*:*&fq=%7B!bbox%20sfield=location_p%7D&pt=44.9135128,-93.2802394&d=1&sort=geodist(location_p,44.9135128,-93.2802394)%20asc&fl=*,score,geodist:geodist(location_p,44.9135128,-93.2802394)' +``` + +### Tagger example for Minneapolis + +``` +curl -v -X POST \ + 'http://localhost:8983/solr/geonames/tag?overlaps=NO_SUB&tagsLimit=5000&fl=geonameid_i,name,countrycode&wt=json&indent=on' -H 'Content-Type:text/plain' -d 'Minneapolis' +``` + +### Lat/Long search example for Minneapolis sorted by distance ascending and including distance -# Lat/Long search example for Minneapolis sorted by distance ascending and including distance +``` curl -v -X POST \ 'http://localhost:8983/solr/geonames/select?q=*:*&fq=%7B!bbox%20sfield=location_p%7D&pt=44.9135128,-93.2802394&d=1&sort=geodist(location_p,44.9135128,-93.2802394)%20asc&fl=*,score,geodist:geodist(location_p,44.9135128,-93.2802394)' +``` + +### Combine tagger and geospatial search to sort the tagged geonames for "Minneapolis" + +```ruby +require 'net/http' +require 'json' +require 'uri' + +# Step 1: Tag the text to get geoname IDs +uri = URI('http://localhost:8983/solr/geonames/tag?overlaps=NO_SUB&tagsLimit=5000&fl=geonameid_i&wt=json') +request = Net::HTTP::Post.new(uri) +request['Content-Type'] = 'text/plain' +request.body = 'Minneapolis' + +response = Net::HTTP.start(uri.hostname, uri.port) do |http| + http.request(request) +end + +# Parse the JSON response to extract geoname IDs +tagged_response = JSON.parse(response.body) +geoname_ids = tagged_response['response']['docs'].map { |doc| doc['geonameid_i'] }.join(' OR ') + +# Step 2: Use the geoname IDs to perform a geospatial query with sorting +uri = URI("http://localhost:8983/solr/geonames/select?q=geonameid_i:(#{geoname_ids})&fq=%7B!bbox%20sfield=location_p%7D&pt=44.9135128,-93.2802394&d=1&sort=geodist(location_p,44.9135128,-93.2802394)%20asc&fl=*,score,geodist:geodist(location_p,44.9135128,-93.2802394)") + +# Troubleshooting +uri = URI("http://localhost:8983/solr/geonames/select?q=geonameid_i:(#{geoname_ids})&sort=geodist(location_p,44.9135128,-93.2802394)%20asc&fl=*,score,geodist:geodist(location_p,44.9135128,-93.2802394)&wt=json") + +request = Net::HTTP::Post.new(uri) + +response = Net::HTTP.start(uri.hostname, uri.port) do |http| + http.request(request) +end + +# Output the response +puts response.body +``` \ No newline at end of file diff --git a/lib/tasks/geoportal/import_geonames.rake b/lib/tasks/geoportal/geoportal_geonames.rake similarity index 98% rename from lib/tasks/geoportal/import_geonames.rake rename to lib/tasks/geoportal/geoportal_geonames.rake index 64f63b6e4..ea2400289 100644 --- a/lib/tasks/geoportal/import_geonames.rake +++ b/lib/tasks/geoportal/geoportal_geonames.rake @@ -46,7 +46,7 @@ namespace :geoportal do puts "Download and extraction completed successfully." end - desc "Import allCountries.txt data into the Geonames table" + desc "Import US.txt data into the Geonames table" task import: :environment do file_path = Rails.root.join('db', 'geonames', 'US.txt') @@ -112,24 +112,6 @@ namespace :geoportal do puts "Geonames import completed successfully." end - desc "Import Geoname entries into Solr" - task reindex_solr: :environment do - # Define the path to the CSV file - csv_file_path = Rails.root.join('db', 'geonames', 'geonames_export.csv') - - # Define the Solr update URL - solr_url = "http://localhost:8983/solr/geonames/update?commit=true" - - # Execute the curl command to update Solr - begin - puts "Updating Solr with data from #{csv_file_path}..." - system("curl '#{solr_url}' --data-binary @#{csv_file_path} -H 'Content-type:application/csv'") - puts "Geonames import to Solr completed successfully." - rescue StandardError => e - puts "Error updating Solr: #{e.message}" - end - end - desc "Export Geoname table to a CSV file using PostgreSQL COPY" task export: :environment do file_path = Rails.root.join('db', 'geonames', 'geonames_export.csv') @@ -140,7 +122,7 @@ namespace :geoportal do connection.execute <<-SQL COPY ( SELECT - geonameid, + geonameid AS geonameid_i, name, asciiname AS asciiname_s, alternatenames AS alternatenames_s, @@ -170,5 +152,23 @@ namespace :geoportal do puts "Error exporting Geoname table: #{e.message}" end end + + desc "Import Geoname entries into Solr" + task reindex_solr: :environment do + # Define the path to the CSV file + csv_file_path = Rails.root.join('db', 'geonames', 'geonames_export.csv') + + # Define the Solr update URL + solr_url = "http://localhost:8983/solr/geonames/update?commit=true" + + # Execute the curl command to update Solr + begin + puts "Updating Solr with data from #{csv_file_path}..." + system("curl '#{solr_url}' --data-binary @#{csv_file_path} -H 'Content-type:application/csv'") + puts "Geonames import to Solr completed successfully." + rescue StandardError => e + puts "Error updating Solr: #{e.message}" + end + end end end