Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix row_count always being 1 when \r used as lineTerminator #6

Merged
merged 1 commit into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 55 additions & 6 deletions lib/csvlint/validate.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def initialize(source, dialect = {}, schema = nil, options = {})
@formats = []
@schema = schema
@dialect = dialect
@orig_sep = $/
@csv_header = true
@headers = {}
@lambda = options[:lambda]
Expand Down Expand Up @@ -107,10 +108,17 @@ def validate

def validate_stream
@current_line = 1
# StringIO.each_line splits on the value of $/ (input record separator),
# which defaults to \n. This is why CSVs with \r EOL character don't get
# lines counted properly?
set_sep(@source)

@source.each_line do |line|
break if line_limit_reached?
parse_line(line)
end

reset_sep
validate_line(@leading, @current_line) unless @leading == ""
end

Expand All @@ -131,10 +139,13 @@ def validate_url
request.on_body do |chunk|
chunk.force_encoding(Encoding::UTF_8) if chunk.encoding == Encoding::ASCII_8BIT
io = StringIO.new(chunk)
set_sep(io)

io.each_line do |line|
break if line_limit_reached?
parse_line(line)
end
reset_sep
end
request.run
# Validate the last line too
Expand All @@ -144,7 +155,7 @@ def validate_url
def parse_line(line)
line = @leading + line
# Check if the last line is a line break - in which case it's a full line
if line[-1, 1].include?("\n")
if ["\n", "\r"].include?(line[-1, 1])
# If the number of quotes is odd, the linebreak is inside some quotes
if line.count(@dialect["quoteChar"]).odd?
@leading = line
Expand Down Expand Up @@ -304,7 +315,7 @@ def header?
end

def report_line_breaks(line_no = nil)
return unless @input[-1, 1].include?("\n") # Return straight away if there's no newline character - i.e. we're on the last line
return unless ["\r", "\n"].include?(@input[-1, 1]) # Return straight away if there's no newline character - i.e. we're on the last line
line_break = get_line_break(@input)
@line_breaks << line_break
unless line_breaks_reported?
Expand Down Expand Up @@ -518,6 +529,43 @@ def locate_schema

private

def set_sep(source)
$/ = determine_sep(source)
end

def determine_sep(source)
return explicitly_set_sep if explicitly_set_sep

src_str = case source
when File
File.read(source.path)
when IO
source.read
when StringIO
source.string
when Tempfile
source.read
else
raise "Unhandled source class: #{source.class}"
end
src_str.include?("\n") ? "\n" : "\r"
end

def explicitly_set_sep
return unless @dialect
return unless @dialect.key?("lineTerminator")

sep = @dialect["lineTerminator"]
return unless sep.is_a?(String)
return if sep.empty?

sep
end

def reset_sep
$/ = @orig_sep
end

def parse_extension(source)
case source
when File
Expand Down Expand Up @@ -589,11 +637,12 @@ def line_limit_reached?
end

def get_line_break(line)
eol = line.chars.last(2)
if eol.first == "\r"
"\r\n"
eol = line.chars.last(2).join
case eol
when "\r\n"
eol
else
"\n"
eol[-1]
end
end

Expand Down
58 changes: 58 additions & 0 deletions spec/validator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,64 @@
end
end

describe "#row_count" do
let(:validator) { Csvlint::Validator.new(StringIO.new(csv), opts) }
let(:opts) { {} }
let(:result) { validator.row_count }

context 'with \r\n line terminators and final EOL' do
let(:csv) do
%("Foo","Bar","Baz"\r\n"1","2","3"\r\n"3","2","1"\r\n)
end

it "should count the total number of rows read" do
expect(validator.row_count).to eq(3)
end
end

context 'with \r\n line terminators and without final EOL' do
let(:csv) do
%("Foo","Bar","Baz"\r\n"1","2","3"\r\n"3","2","1")
end

it "should count the total number of rows read" do
expect(validator.row_count).to eq(3)
end
end

context 'with \n line terminators and final EOL' do
let(:csv) do
%("Foo","Bar","Baz"\n"1","2","3"\n"3","2","1"\n)
end

it "should count the total number of rows read" do
expect(validator.row_count).to eq(3)
end
end

context 'with \r line terminators and final EOL' do
let(:csv) do
%("Foo","Bar","Baz"\r"1","2","3"\r"3","2","1"\r)
end

it "should count the total number of rows read" do
expect(validator.row_count).to eq(3)
end
end

context 'with \r line terminators and final EOL and lineTerminator ' \
"specified" do
let(:csv) do
%("Foo","Bar","Baz"\r"1","2","3"\r"3","2","1"\r)
end
let(:opts) { {"lineTerminator" => "\r"} }

it "should count the total number of rows read" do
expect(validator.row_count).to eq(3)
end
end
end

# Commented out because there is currently no way to mock redirects with Typhoeus and WebMock - see https://github.com/bblimke/webmock/issues/237
# it "should follow redirects to SSL" do
# stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
Expand Down
Loading