Skip to content

Commit

Permalink
Fixed create_browser to use positional parameters including tests
Browse files Browse the repository at this point in the history
  • Loading branch information
saumier committed Nov 19, 2024
1 parent b74e17a commit e68e39d
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 8 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ gem 'nokogiri'
gem 'open-uri'
gem 'rake'
gem 'ferrum'
gem 'mocha'


4 changes: 4 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ GEM
logger (1.6.0)
matrix (0.4.2)
minitest (5.21.2)
mocha (2.5.0)
ruby2_keywords (>= 0.0.5)
multi_json (1.15.0)
net-http-persistent (4.0.2)
connection_pool (~> 2.2)
Expand Down Expand Up @@ -165,6 +167,7 @@ GEM
rdf (~> 3.3)
rexml (~> 3.2)
rexml (3.2.6)
ruby2_keywords (0.0.5)
scanf (1.0.0)
shacl (0.4.1)
json-ld (~> 3.3)
Expand Down Expand Up @@ -225,6 +228,7 @@ DEPENDENCIES
ferrum
linkeddata
minitest
mocha
nokogiri
open-uri
rake
Expand Down
26 changes: 18 additions & 8 deletions src/lib/headless_browser.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require 'ferrum'
require 'json'
require 'linkeddata'
require 'rbconfig'

module HeadlessBrowser
def self.fetch_json_ld_objects(entity_urls, base_url, headers, sparql_paths, browser: nil, graph: nil)
Expand All @@ -16,15 +17,18 @@ def self.fetch_json_ld_objects(entity_urls, base_url, headers, sparql_paths, bro
graph
end

def self.create_browser(headers: nil)
# MacOS "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
# Linux "/usr/bin/google-chrome-stable"
browser = Ferrum::Browser.new(browser_path: "/usr/bin/google-chrome-stable", headless: true, pending_connection_errors: false, process_timeout: 60, xvfb: true, browser_options: { 'no-sandbox': nil })
def self.create_browser(headers = nil)
browser_path = if running_on_macos?
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
else
"/usr/bin/google-chrome-stable"
end
browser = Ferrum::Browser.new(browser_path: browser_path, headless: true, pending_connection_errors: false, process_timeout: 60, xvfb: true, browser_options: { 'no-sandbox': nil })
browser.headers.set(headers) if headers
browser
end

def self.process_entity_url(entity_url, browser, graph, add_url_sparql_file: nil)
def self.process_entity_url(entity_url, browser, graph, add_url_sparql_file = nil)
puts "Processing #{entity_url} in headless mode"
browser.go_to(entity_url)
sleep 15
Expand All @@ -37,13 +41,15 @@ def self.process_entity_url(entity_url, browser, graph, add_url_sparql_file: nil
puts "Error processing #{entity_url} in headless mode: #{e.message}"
end

def self.process_json_ld_script(script, entity_url, graph, add_url_sparql_file)
def self.process_json_ld_script(script, entity_url, graph, add_url_sparql_file = nil)
# Parse the JSON-LD string into a JSON object
json_ld = string_to_json(script.text)
# Convert the JSON-LD object to an RDF graph
loaded_graph = RDF::Graph.new << JSON::LD::API.toRdf(json_ld)
# sparql_file_with_url = add_url_sparql_file.gsub("subject_url", entity_url)
# loaded_graph.query(SPARQL.parse(sparql_file_with_url, update: true))
if add_url_sparql_file
sparql_file_with_url = add_url_sparql_file.gsub("subject_url", entity_url)
loaded_graph.query(SPARQL.parse(sparql_file_with_url, update: true))
end
graph << loaded_graph
rescue JSON::ParserError => e
puts "Error parsing JSON-LD: #{e.message}"
Expand All @@ -61,4 +67,8 @@ def self.fetch_entity_urls_headless(url, headers, browser: nil)
sleep 15
browser.body
end

def self.running_on_macos?
RbConfig::CONFIG['host_os'] =~ /darwin|mac os/
end
end
35 changes: 35 additions & 0 deletions tests/headless_browser_test.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'minitest/autorun'
require 'mocha/minitest'
require 'ferrum'
require 'linkeddata'
require_relative '../src/lib/headless_browser'
Expand All @@ -18,6 +19,40 @@ def test_string_to_json_with_newlines
assert_equal expected, actual
end

# Test create_browser method
def test_create_browser
# Stub the Ferrum::Browser.new method
Ferrum::Browser.stubs(:new).returns(mock_browser)

# Call the method
browser = HeadlessBrowser.create_browser

# Assertions
assert_instance_of Mocha::Mock, browser
end

def test_create_browser_with_headers
# Stub the Ferrum::Browser.new method
Ferrum::Browser.stubs(:new).returns(mock_browser)

# Call the method with headers
browser = HeadlessBrowser.create_browser({"User-Agent" => "Chrome"})

# Assertions
assert_instance_of Mocha::Mock, browser
assert_equal "Chrome", browser.headers["User-Agent"]
end

private

def mock_browser
headers_mock = mock('headers')
headers_mock.stubs(:set)
headers_mock.stubs(:[]).returns("Chrome")

browser = mock('browser')
browser.stubs(:headers).returns(headers_mock)

browser
end
end

0 comments on commit e68e39d

Please sign in to comment.