diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index b7e9a7f..8e2ecb5 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -58,7 +58,10 @@ jobs: - uses: ruby/setup-ruby@v1 with: bundler-cache: true - ruby-version: "3.3" + # Setting this later than 3.1 requires use of anonymous positional + # arguments forwarding, which breaks all versions earlier than 3.2 + # in our test matrix + ruby-version: "3.1" - name: Install dependencies run: bundle install - name: Run the tests diff --git a/.gitignore b/.gitignore index a95f483..8390f69 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,4 @@ csvlint-earl.ttl .byebug_history gemfiles/*.lock +/util/csv_testing.csv diff --git a/.standard.yml b/.standard.yml new file mode 100644 index 0000000..72b2693 --- /dev/null +++ b/.standard.yml @@ -0,0 +1 @@ +ruby_version: 3.1 diff --git a/csvlint.gemspec b/csvlint.gemspec index d20f5b0..d947e0d 100644 --- a/csvlint.gemspec +++ b/csvlint.gemspec @@ -1,4 +1,4 @@ -lib = File.expand_path("../lib", __FILE__) +lib = File.expand_path("lib", __dir__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require "csvlint/version" @@ -18,35 +18,35 @@ Gem::Specification.new do |spec| spec.required_ruby_version = [">= 2.5", "< 3.4"] - spec.add_dependency "csv" - spec.add_dependency "rainbow" - spec.add_dependency "open_uri_redirections" spec.add_dependency "activesupport" spec.add_dependency "addressable" - spec.add_dependency "typhoeus" + spec.add_dependency "csv" spec.add_dependency "escape_utils" - spec.add_dependency "uri_template" - spec.add_dependency "thor" - spec.add_dependency "rack" spec.add_dependency "net-http-persistent" + spec.add_dependency "open_uri_redirections" + spec.add_dependency "rack" + spec.add_dependency "rainbow" + spec.add_dependency "thor" + spec.add_dependency "typhoeus" + spec.add_dependency "uri_template" + spec.add_development_dependency "appraisal" + spec.add_development_dependency "aruba" spec.add_development_dependency "bundler", ">= 1.3" - spec.add_development_dependency "rake" - spec.add_development_dependency "cucumber" - spec.add_development_dependency "simplecov" - spec.add_development_dependency "simplecov-rcov" - spec.add_development_dependency "spork" - spec.add_development_dependency "webmock" - spec.add_development_dependency "rspec" - spec.add_development_dependency "rspec-pride" - spec.add_development_dependency "rspec-expectations" - spec.add_development_dependency "coveralls" spec.add_development_dependency "byebug" + spec.add_development_dependency "coveralls" + spec.add_development_dependency "cucumber" spec.add_development_dependency "github_changelog_generator" - spec.add_development_dependency "aruba" + spec.add_development_dependency "henry" + spec.add_development_dependency "rake" spec.add_development_dependency "rdf", "< 4.0" spec.add_development_dependency "rdf-turtle" - spec.add_development_dependency "henry" + spec.add_development_dependency "rspec" + spec.add_development_dependency "rspec-expectations" + spec.add_development_dependency "rspec-pride" + spec.add_development_dependency "simplecov" + spec.add_development_dependency "simplecov-rcov" + spec.add_development_dependency "spork" spec.add_development_dependency "standardrb" - spec.add_development_dependency "appraisal" + spec.add_development_dependency "webmock" end diff --git a/spec/validator_spec.rb b/spec/validator_spec.rb index 6af2e85..06c3380 100644 --- a/spec/validator_spec.rb +++ b/spec/validator_spec.rb @@ -9,7 +9,8 @@ end it "should validate from a URL" do - stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + stub_request(:get, "http://example.com/example.csv").to_return(status: 200, + headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) validator = Csvlint::Validator.new("http://example.com/example.csv") expect(validator.valid?).to eql(true) @@ -19,7 +20,8 @@ end it "should validate from a file path" do - validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures", + "valid.csv"))) expect(validator.valid?).to eql(true) expect(validator.instance_variable_get(:@expected_columns)).to eql(3) @@ -28,7 +30,8 @@ end it "should validate from a file path including whitespace" do - validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "white space in filename.csv"))) + validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures", + "white space in filename.csv"))) expect(validator.valid?).to eql(true) end @@ -40,7 +43,7 @@ end context "validation with multiple lines: " do - # TODO multiple lines permits testing of warnings + # TODO: multiple lines permits testing of warnings # TODO need more assertions in each test IE @formats # TODO the phrasing of col_counts if only consulting specs might be confusing # TODO ^-> col_counts and data.size should be equivalent, but only data is populated outside of if row.nil? @@ -53,7 +56,7 @@ validator = Csvlint::Validator.new(data) expect(validator.valid?).to eql(true) - # TODO would be beneficial to know how formats functions WRT to headers - check_format.feature:17 returns 3 rows total + # TODO: would be beneficial to know how formats functions WRT to headers - check_format.feature:17 returns 3 rows total # TODO in its formats object but is provided with 5 rows (with one nil row) [uses validation_warnings_steps.rb] expect(validator.instance_variable_get(:@expected_columns)).to eql(3) expect(validator.instance_variable_get(:@col_counts).count).to eql(4) @@ -221,7 +224,7 @@ expect(validator.errors.first.type).to eql(:unclosed_quote) end - # TODO stray quotes is not covered in any spec in this library + # TODO: stray quotes is not covered in any spec in this library # it "checks for stray quotes" do # stream = "\"a\",“b“,\"c\"" "\r\n" # validator = Csvlint::Validator.new(stream) @@ -241,7 +244,7 @@ end it "returns line break errors if incorrectly specified" do - # TODO the logic for catching this error message is very esoteric + # TODO: the logic for catching this error message is very esoteric stream = "\"a\",\"b\",\"c\"\n" validator = Csvlint::Validator.new(StringIO.new(stream), {"lineTerminator" => "\r\n"}) expect(validator.valid?).to eql(false) @@ -255,7 +258,7 @@ data = StringIO.new("minimum, minimum") validator = Csvlint::Validator.new(data) validator.reset - expect(validator.validate_header(["minimum", "minimum"])).to eql(true) + expect(validator.validate_header(%w[minimum minimum])).to eql(true) expect(validator.warnings.size).to eql(1) expect(validator.warnings.first.type).to eql(:duplicate_column_name) expect(validator.warnings.first.category).to eql(:schema) @@ -338,7 +341,7 @@ validator = Csvlint::Validator.new("http://example.com/example.csv") - rows.each_with_index do |row, i| + rows.each_with_index do |row, _i| validator.build_formats(row) end @@ -354,7 +357,7 @@ validator = Csvlint::Validator.new("http://example.com/example.csv") - rows.each_with_index do |row, i| + rows.each_with_index do |row, _i| validator.build_formats(row) end @@ -415,7 +418,7 @@ end end - # TODO the below tests are all the remaining tests from validator_spec.rb, annotations indicate their status HOWEVER these tests may be best refactored into client specs + # TODO: the below tests are all the remaining tests from validator_spec.rb, annotations indicate their status HOWEVER these tests may be best refactored into client specs context "when detecting headers" do it "should default to expecting a header" do validator = Csvlint::Validator.new("http://example.com/example.csv") @@ -436,21 +439,24 @@ end it "should look in content-type for header=absent" do - stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv; header=absent"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + stub_request(:get, "http://example.com/example.csv").to_return(status: 200, + headers: {"Content-Type" => "text/csv; header=absent"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) validator = Csvlint::Validator.new("http://example.com/example.csv") expect(validator.header?).to eql(false) expect(validator.errors.size).to eql(0) end it "should look in content-type for header=present" do - stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv; header=present"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + stub_request(:get, "http://example.com/example.csv").to_return(status: 200, + headers: {"Content-Type" => "text/csv; header=present"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) validator = Csvlint::Validator.new("http://example.com/example.csv") expect(validator.header?).to eql(true) expect(validator.errors.size).to eql(0) end it "assume header present if not specified in content type" do - stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + stub_request(:get, "http://example.com/example.csv").to_return(status: 200, + headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) validator = Csvlint::Validator.new("http://example.com/example.csv") expect(validator.header?).to eql(true) expect(validator.errors.size).to eql(0) @@ -459,7 +465,8 @@ end it "give wrong content type error if content type is wrong" do - stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/html"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + stub_request(:get, "http://example.com/example.csv").to_return(status: 200, + headers: {"Content-Type" => "text/html"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) validator = Csvlint::Validator.new("http://example.com/example.csv") expect(validator.header?).to eql(true) expect(validator.errors.size).to eql(1) @@ -504,22 +511,26 @@ end it "should not be an error if we have assumed a header, there is no dialect and content-type doesn't declare header, as we assume header=present" do - stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + stub_request(:get, "http://example.com/example.csv").to_return(status: 200, + headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) validator = Csvlint::Validator.new("http://example.com/example.csv") expect(validator.valid?).to eql(true) end it "should be valid if we have a dialect and the data is from the web" do - stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + stub_request(:get, "http://example.com/example.csv").to_return(status: 200, + headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) # header defaults to true in csv dialect, so this is valid validator = Csvlint::Validator.new("http://example.com/example.csv", {}) expect(validator.valid?).to eql(true) - stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + stub_request(:get, "http://example.com/example.csv").to_return(status: 200, + headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) validator = Csvlint::Validator.new("http://example.com/example.csv", {"header" => true}) expect(validator.valid?).to eql(true) - stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + stub_request(:get, "http://example.com/example.csv").to_return(status: 200, + headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) validator = Csvlint::Validator.new("http://example.com/example.csv", {"header" => false}) expect(validator.valid?).to eql(true) end @@ -527,7 +538,10 @@ context "accessing metadata" do before :all do - stub_request(:get, "http://example.com/crlf.csv").to_return(status: 200, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "windows-line-endings.csv"))) + stub_request(:get, "http://example.com/crlf.csv").to_return(status: 200, + body: File.read(File.join( + File.dirname(__FILE__), "..", "features", "fixtures", "windows-line-endings.csv" + ))) stub_request(:get, "http://example.com/crlf.csv-metadata.json").to_return(status: 404) end @@ -540,20 +554,24 @@ it "should give access to the complete CSV data file" do stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv; header=present"}, - body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + body: File.read(File.join( + File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv" + ))) validator = Csvlint::Validator.new("http://example.com/example.csv") expect(validator.valid?).to eql(true) data = validator.data expect(data.count).to eql 3 - expect(data[0]).to eql ["Foo", "Bar", "Baz"] - expect(data[2]).to eql ["3", "2", "1"] + expect(data[0]).to eql %w[Foo Bar Baz] + expect(data[2]).to eql %w[3 2 1] end it "should count the total number of rows read" do stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv; header=present"}, - body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + body: File.read(File.join( + File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv" + ))) validator = Csvlint::Validator.new("http://example.com/example.csv") expect(validator.row_count).to eq(3) end @@ -561,12 +579,14 @@ it "should limit number of lines read" do stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv; header=present"}, - body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"))) + body: File.read(File.join( + File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv" + ))) validator = Csvlint::Validator.new("http://example.com/example.csv", {}, nil, limit_lines: 2) expect(validator.valid?).to eql(true) data = validator.data expect(data.count).to eql 2 - expect(data[0]).to eql ["Foo", "Bar", "Baz"] + expect(data[0]).to eql %w[Foo Bar Baz] end context "with a lambda" do diff --git a/util/csv_testing.rb b/util/csv_testing.rb new file mode 100644 index 0000000..84828d3 --- /dev/null +++ b/util/csv_testing.rb @@ -0,0 +1,120 @@ +# frozen_string_literal: true + +require "bundler/inline" + +gemfile(true) do + source "https://rubygems.org" + gem "csv" + gem "csvlint", path: "../" + gem "pry" +end + +module Ct + puts "Ruby: #{RUBY_VERSION}" + # puts "Csvlint: #{Csvlint::VERSION}" + + class Test + EOL_MAP = { + "\r" => "CR", + "\n" => "LF", + "\r\n" => "CRLF" + } + + def initialize(main_eol, test_eol, loc) + @main_eol = main_eol + @test_eol = test_eol + @loc = loc + @csv = set_csv + end + + def csv_parse = @csv_parse ||= run_csv_parse + + def lint = @lint ||= run_lint + + def row_count + return unless @v.respond_to?(:row_count) + + @v.row_count + end + + def result = { + main_eol: EOL_MAP[main_eol], + test_eol: EOL_MAP[test_eol], + loc: loc, + csv: csv.inspect, + csvlint: lint, + csvlint_row_ct: row_count, + csv_parse: csv_parse + } + + private + + attr_reader :main_eol, :test_eol, :loc, :csv + + def set_csv + case loc + when "final row ending" + "obj,note#{main_eol}val,val#{main_eol}val,val#{test_eol}" + when "extra blank row at end" + "obj,note#{main_eol}val,val#{main_eol}val,val#{main_eol}#{test_eol}" + when "blank row between populated rows" + "obj,note#{main_eol}val,val#{main_eol}#{test_eol}val,val#{main_eol}" + end + end + + def run_csv_parse + CSV.parse(csv, headers: true, nil_value: "") + rescue => e + "#{e.class}: #{e.message}" + else + "success" + end + + def run_lint + @v = Csvlint::Validator.new(StringIO.new(csv)) + rescue => e + @v = nil + "VALIDATION ERROR: #{e}" + else + return "valid" if @v.errors.empty? + + @v.errors.map(&:type).join("; ") + end + end + + module_function + + def base_test_configs(main, test) + ["final row ending", + "extra blank row at end", + "blank row between populated rows"].map do |loc| + Test.new(main, test, loc) + end + end + + def perms + [ + ["\r", "\r"], + ["\r", "\n"], + ["\r", "\r\n"], + ["\n", "\n"], + ["\n", "\r"], + ["\n", "\r\n"], + ["\r\n", "\r\n"], + ["\r\n", "\r"], + ["\r\n", "\n"] + ] + end + + tr = perms.map { |pair| base_test_configs(*pair) } + .flatten + .map(&:result) + + headers = tr[0].keys + + CSV.open("csv_testing.csv", "w", write_headers: true, headers: headers) do |csv| + tr.each do |result| + csv << result.values_at(*headers) + end + end +end