From e68e39dc216a664dc47b1209cff781cc0487836a Mon Sep 17 00:00:00 2001 From: Gregory Saumier-Finch Date: Tue, 19 Nov 2024 11:42:23 -0500 Subject: [PATCH] Fixed create_browser to use positional parameters including tests --- Gemfile | 1 + Gemfile.lock | 4 ++++ src/lib/headless_browser.rb | 26 +++++++++++++++++-------- tests/headless_browser_test.rb | 35 ++++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 8 deletions(-) diff --git a/Gemfile b/Gemfile index 5d4d2cb..0c2f0c8 100644 --- a/Gemfile +++ b/Gemfile @@ -6,5 +6,6 @@ gem 'nokogiri' gem 'open-uri' gem 'rake' gem 'ferrum' +gem 'mocha' diff --git a/Gemfile.lock b/Gemfile.lock index 4e63ccd..03a296f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -75,6 +75,8 @@ GEM logger (1.6.0) matrix (0.4.2) minitest (5.21.2) + mocha (2.5.0) + ruby2_keywords (>= 0.0.5) multi_json (1.15.0) net-http-persistent (4.0.2) connection_pool (~> 2.2) @@ -165,6 +167,7 @@ GEM rdf (~> 3.3) rexml (~> 3.2) rexml (3.2.6) + ruby2_keywords (0.0.5) scanf (1.0.0) shacl (0.4.1) json-ld (~> 3.3) @@ -225,6 +228,7 @@ DEPENDENCIES ferrum linkeddata minitest + mocha nokogiri open-uri rake diff --git a/src/lib/headless_browser.rb b/src/lib/headless_browser.rb index 9c7d72d..59b8bbe 100644 --- a/src/lib/headless_browser.rb +++ b/src/lib/headless_browser.rb @@ -1,6 +1,7 @@ require 'ferrum' require 'json' require 'linkeddata' +require 'rbconfig' module HeadlessBrowser def self.fetch_json_ld_objects(entity_urls, base_url, headers, sparql_paths, browser: nil, graph: nil) @@ -16,15 +17,18 @@ def self.fetch_json_ld_objects(entity_urls, base_url, headers, sparql_paths, bro graph end - def self.create_browser(headers: nil) - # MacOS "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" - # Linux "/usr/bin/google-chrome-stable" - browser = Ferrum::Browser.new(browser_path: "/usr/bin/google-chrome-stable", headless: true, pending_connection_errors: false, process_timeout: 60, xvfb: true, browser_options: { 'no-sandbox': nil }) + def self.create_browser(headers = nil) + browser_path = if running_on_macos? + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" + else + "/usr/bin/google-chrome-stable" + end + browser = Ferrum::Browser.new(browser_path: browser_path, headless: true, pending_connection_errors: false, process_timeout: 60, xvfb: true, browser_options: { 'no-sandbox': nil }) browser.headers.set(headers) if headers browser end - def self.process_entity_url(entity_url, browser, graph, add_url_sparql_file: nil) + def self.process_entity_url(entity_url, browser, graph, add_url_sparql_file = nil) puts "Processing #{entity_url} in headless mode" browser.go_to(entity_url) sleep 15 @@ -37,13 +41,15 @@ def self.process_entity_url(entity_url, browser, graph, add_url_sparql_file: nil puts "Error processing #{entity_url} in headless mode: #{e.message}" end - def self.process_json_ld_script(script, entity_url, graph, add_url_sparql_file) + def self.process_json_ld_script(script, entity_url, graph, add_url_sparql_file = nil) # Parse the JSON-LD string into a JSON object json_ld = string_to_json(script.text) # Convert the JSON-LD object to an RDF graph loaded_graph = RDF::Graph.new << JSON::LD::API.toRdf(json_ld) - # sparql_file_with_url = add_url_sparql_file.gsub("subject_url", entity_url) - # loaded_graph.query(SPARQL.parse(sparql_file_with_url, update: true)) + if add_url_sparql_file + sparql_file_with_url = add_url_sparql_file.gsub("subject_url", entity_url) + loaded_graph.query(SPARQL.parse(sparql_file_with_url, update: true)) + end graph << loaded_graph rescue JSON::ParserError => e puts "Error parsing JSON-LD: #{e.message}" @@ -61,4 +67,8 @@ def self.fetch_entity_urls_headless(url, headers, browser: nil) sleep 15 browser.body end + + def self.running_on_macos? + RbConfig::CONFIG['host_os'] =~ /darwin|mac os/ + end end \ No newline at end of file diff --git a/tests/headless_browser_test.rb b/tests/headless_browser_test.rb index 6839054..9cdc53e 100644 --- a/tests/headless_browser_test.rb +++ b/tests/headless_browser_test.rb @@ -1,4 +1,5 @@ require 'minitest/autorun' +require 'mocha/minitest' require 'ferrum' require 'linkeddata' require_relative '../src/lib/headless_browser' @@ -18,6 +19,40 @@ def test_string_to_json_with_newlines assert_equal expected, actual end + # Test create_browser method + def test_create_browser + # Stub the Ferrum::Browser.new method + Ferrum::Browser.stubs(:new).returns(mock_browser) + # Call the method + browser = HeadlessBrowser.create_browser + # Assertions + assert_instance_of Mocha::Mock, browser + end + + def test_create_browser_with_headers + # Stub the Ferrum::Browser.new method + Ferrum::Browser.stubs(:new).returns(mock_browser) + + # Call the method with headers + browser = HeadlessBrowser.create_browser({"User-Agent" => "Chrome"}) + + # Assertions + assert_instance_of Mocha::Mock, browser + assert_equal "Chrome", browser.headers["User-Agent"] + end + + private + + def mock_browser + headers_mock = mock('headers') + headers_mock.stubs(:set) + headers_mock.stubs(:[]).returns("Chrome") + + browser = mock('browser') + browser.stubs(:headers).returns(headers_mock) + + browser + end end \ No newline at end of file