Skip to content

Commit

Permalink
Improved column separator detection by ignoring quoted sections (#276)
Browse files Browse the repository at this point in the history
* Improving auto detection. Column separator detection: Count only non-quoted occurrences of the delimiter
  • Loading branch information
nicastelo authored Jul 10, 2024
1 parent a343e02 commit f8048f0
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 1 deletion.
7 changes: 6 additions & 1 deletion lib/smarter_csv/auto_detection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ def guess_column_separator(filehandle, options)
count.times do
line = readline_with_counts(filehandle, options)
delimiters.each do |d|
candidates[d] += line.scan(d).count
escaped_quote = Regexp.escape(options[:quote_char])

# Count only non-quoted occurrences of the delimiter
non_quoted_text = line.split(/#{escaped_quote}[^#{escaped_quote}]*#{escaped_quote}/).join

candidates[d] += non_quoted_text.scan(d).count
end
rescue EOFError # short files
break
Expand Down
18 changes: 18 additions & 0 deletions spec/features/formating/column_separator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@
end.to raise_exception SmarterCSV::NoColSepDetected
end

it 'does not detect separators that are between quotes' do
data = SmarterCSV.process("#{fixture_path}/separator_chars_between_quotes.csv", options)


expect(data.first.keys.size).to eq 5
expect(data.size).to eq 3
end

context 'when auto is given as a string' do
let(:options) do
{
Expand Down Expand Up @@ -148,6 +156,16 @@
end.to raise_exception SmarterCSV::NoColSepDetected
end

it 'does not detect separators that are between quotes' do
data = SmarterCSV.process(
"#{fixture_path}/separator_chars_between_quotes_no_headers.csv",
options.merge(user_provided_headers: %w[Name Age Job Department Project])
)

expect(data.first.keys.size).to eq 5
expect(data.size).to eq 3
end

context 'when auto is given as a string' do
let(:options) do
{
Expand Down
4 changes: 4 additions & 0 deletions spec/fixtures/separator_chars_between_quotes.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"name, info":"age, years":"job, title":"department, info":"project, code"
"John, Doe":"35, years":"Senior, Developer":"Engineering, Dept":"Code, 1234"
"Jane, Smith":"29, years":"Project, Manager":"Product, Development":"Code,5678"
"Emily, Jones":"42, years":"CTO,":"Technology,Dept":"Code,9012"
3 changes: 3 additions & 0 deletions spec/fixtures/separator_chars_between_quotes_no_headers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"John, Doe":"35, years":"Senior, Developer":"Engineering, Dept":"Code, 1234"
"Jane, Smith":"29, years":"Project, Manager":"Product, Development":"Code,5678"
"Emily, Jones":"42, years":"CTO,":"Technology,Dept":"Code,9012"

0 comments on commit f8048f0

Please sign in to comment.