Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add automated CSV EOL test utility #5

Merged
merged 4 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ jobs:
- uses: ruby/setup-ruby@v1
with:
bundler-cache: true
ruby-version: "3.3"
# Setting this later than 3.1 requires use of anonymous positional
# arguments forwarding, which breaks all versions earlier than 3.2
# in our test matrix
ruby-version: "3.1"
- name: Install dependencies
run: bundle install
- name: Run the tests
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ csvlint-earl.ttl
.byebug_history

gemfiles/*.lock
/util/csv_testing.csv
1 change: 1 addition & 0 deletions .standard.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ruby_version: 3.1
42 changes: 21 additions & 21 deletions csvlint.gemspec
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
lib = File.expand_path("../lib", __FILE__)
lib = File.expand_path("lib", __dir__)
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
require "csvlint/version"

Expand All @@ -18,35 +18,35 @@ Gem::Specification.new do |spec|

spec.required_ruby_version = [">= 2.5", "< 3.4"]

spec.add_dependency "csv"
spec.add_dependency "rainbow"
spec.add_dependency "open_uri_redirections"
spec.add_dependency "activesupport"
spec.add_dependency "addressable"
spec.add_dependency "typhoeus"
spec.add_dependency "csv"
spec.add_dependency "escape_utils"
spec.add_dependency "uri_template"
spec.add_dependency "thor"
spec.add_dependency "rack"
spec.add_dependency "net-http-persistent"
spec.add_dependency "open_uri_redirections"
spec.add_dependency "rack"
spec.add_dependency "rainbow"
spec.add_dependency "thor"
spec.add_dependency "typhoeus"
spec.add_dependency "uri_template"

spec.add_development_dependency "appraisal"
spec.add_development_dependency "aruba"
spec.add_development_dependency "bundler", ">= 1.3"
spec.add_development_dependency "rake"
spec.add_development_dependency "cucumber"
spec.add_development_dependency "simplecov"
spec.add_development_dependency "simplecov-rcov"
spec.add_development_dependency "spork"
spec.add_development_dependency "webmock"
spec.add_development_dependency "rspec"
spec.add_development_dependency "rspec-pride"
spec.add_development_dependency "rspec-expectations"
spec.add_development_dependency "coveralls"
spec.add_development_dependency "byebug"
spec.add_development_dependency "coveralls"
spec.add_development_dependency "cucumber"
spec.add_development_dependency "github_changelog_generator"
spec.add_development_dependency "aruba"
spec.add_development_dependency "henry"
spec.add_development_dependency "rake"
spec.add_development_dependency "rdf", "< 4.0"
spec.add_development_dependency "rdf-turtle"
spec.add_development_dependency "henry"
spec.add_development_dependency "rspec"
spec.add_development_dependency "rspec-expectations"
spec.add_development_dependency "rspec-pride"
spec.add_development_dependency "simplecov"
spec.add_development_dependency "simplecov-rcov"
spec.add_development_dependency "spork"
spec.add_development_dependency "standardrb"
spec.add_development_dependency "appraisal"
spec.add_development_dependency "webmock"
end
72 changes: 46 additions & 26 deletions spec/validator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
end

it "should validate from a URL" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")

expect(validator.valid?).to eql(true)
Expand All @@ -19,7 +20,8 @@
end

it "should validate from a file path" do
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures",
"valid.csv")))

expect(validator.valid?).to eql(true)
expect(validator.instance_variable_get(:@expected_columns)).to eql(3)
Expand All @@ -28,7 +30,8 @@
end

it "should validate from a file path including whitespace" do
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "white space in filename.csv")))
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures",
"white space in filename.csv")))

expect(validator.valid?).to eql(true)
end
Expand All @@ -40,7 +43,7 @@
end

context "validation with multiple lines: " do
# TODO multiple lines permits testing of warnings
# TODO: multiple lines permits testing of warnings
# TODO need more assertions in each test IE @formats
# TODO the phrasing of col_counts if only consulting specs might be confusing
# TODO ^-> col_counts and data.size should be equivalent, but only data is populated outside of if row.nil?
Expand All @@ -53,7 +56,7 @@
validator = Csvlint::Validator.new(data)

expect(validator.valid?).to eql(true)
# TODO would be beneficial to know how formats functions WRT to headers - check_format.feature:17 returns 3 rows total
# TODO: would be beneficial to know how formats functions WRT to headers - check_format.feature:17 returns 3 rows total
# TODO in its formats object but is provided with 5 rows (with one nil row) [uses validation_warnings_steps.rb]
expect(validator.instance_variable_get(:@expected_columns)).to eql(3)
expect(validator.instance_variable_get(:@col_counts).count).to eql(4)
Expand Down Expand Up @@ -221,7 +224,7 @@
expect(validator.errors.first.type).to eql(:unclosed_quote)
end

# TODO stray quotes is not covered in any spec in this library
# TODO: stray quotes is not covered in any spec in this library
# it "checks for stray quotes" do
# stream = "\"a\",“b“,\"c\"" "\r\n"
# validator = Csvlint::Validator.new(stream)
Expand All @@ -241,7 +244,7 @@
end

it "returns line break errors if incorrectly specified" do
# TODO the logic for catching this error message is very esoteric
# TODO: the logic for catching this error message is very esoteric
stream = "\"a\",\"b\",\"c\"\n"
validator = Csvlint::Validator.new(StringIO.new(stream), {"lineTerminator" => "\r\n"})
expect(validator.valid?).to eql(false)
Expand All @@ -255,7 +258,7 @@
data = StringIO.new("minimum, minimum")
validator = Csvlint::Validator.new(data)
validator.reset
expect(validator.validate_header(["minimum", "minimum"])).to eql(true)
expect(validator.validate_header(%w[minimum minimum])).to eql(true)
expect(validator.warnings.size).to eql(1)
expect(validator.warnings.first.type).to eql(:duplicate_column_name)
expect(validator.warnings.first.category).to eql(:schema)
Expand Down Expand Up @@ -338,7 +341,7 @@

validator = Csvlint::Validator.new("http://example.com/example.csv")

rows.each_with_index do |row, i|
rows.each_with_index do |row, _i|
validator.build_formats(row)
end

Expand All @@ -354,7 +357,7 @@

validator = Csvlint::Validator.new("http://example.com/example.csv")

rows.each_with_index do |row, i|
rows.each_with_index do |row, _i|
validator.build_formats(row)
end

Expand Down Expand Up @@ -415,7 +418,7 @@
end
end

# TODO the below tests are all the remaining tests from validator_spec.rb, annotations indicate their status HOWEVER these tests may be best refactored into client specs
# TODO: the below tests are all the remaining tests from validator_spec.rb, annotations indicate their status HOWEVER these tests may be best refactored into client specs
context "when detecting headers" do
it "should default to expecting a header" do
validator = Csvlint::Validator.new("http://example.com/example.csv")
Expand All @@ -436,21 +439,24 @@
end

it "should look in content-type for header=absent" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv; header=absent"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv; header=absent"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.header?).to eql(false)
expect(validator.errors.size).to eql(0)
end

it "should look in content-type for header=present" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv; header=present"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv; header=present"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.header?).to eql(true)
expect(validator.errors.size).to eql(0)
end

it "assume header present if not specified in content type" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.header?).to eql(true)
expect(validator.errors.size).to eql(0)
Expand All @@ -459,7 +465,8 @@
end

it "give wrong content type error if content type is wrong" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/html"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/html"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.header?).to eql(true)
expect(validator.errors.size).to eql(1)
Expand Down Expand Up @@ -504,30 +511,37 @@
end

it "should not be an error if we have assumed a header, there is no dialect and content-type doesn't declare header, as we assume header=present" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.valid?).to eql(true)
end

it "should be valid if we have a dialect and the data is from the web" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
# header defaults to true in csv dialect, so this is valid
validator = Csvlint::Validator.new("http://example.com/example.csv", {})
expect(validator.valid?).to eql(true)

stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header" => true})
expect(validator.valid?).to eql(true)

stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header" => false})
expect(validator.valid?).to eql(true)
end
end

context "accessing metadata" do
before :all do
stub_request(:get, "http://example.com/crlf.csv").to_return(status: 200, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "windows-line-endings.csv")))
stub_request(:get, "http://example.com/crlf.csv").to_return(status: 200,
body: File.read(File.join(
File.dirname(__FILE__), "..", "features", "fixtures", "windows-line-endings.csv"
)))
stub_request(:get, "http://example.com/crlf.csv-metadata.json").to_return(status: 404)
end

Expand All @@ -540,33 +554,39 @@
it "should give access to the complete CSV data file" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv; header=present"},
body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
body: File.read(File.join(
File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"
)))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.valid?).to eql(true)
data = validator.data

expect(data.count).to eql 3
expect(data[0]).to eql ["Foo", "Bar", "Baz"]
expect(data[2]).to eql ["3", "2", "1"]
expect(data[0]).to eql %w[Foo Bar Baz]
expect(data[2]).to eql %w[3 2 1]
end

it "should count the total number of rows read" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv; header=present"},
body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
body: File.read(File.join(
File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"
)))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.row_count).to eq(3)
end

it "should limit number of lines read" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv; header=present"},
body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
body: File.read(File.join(
File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"
)))
validator = Csvlint::Validator.new("http://example.com/example.csv", {}, nil, limit_lines: 2)
expect(validator.valid?).to eql(true)
data = validator.data
expect(data.count).to eql 2
expect(data[0]).to eql ["Foo", "Bar", "Baz"]
expect(data[0]).to eql %w[Foo Bar Baz]
end

context "with a lambda" do
Expand Down
Loading
Loading