diff --git a/lib/smarter_csv/auto_detection.rb b/lib/smarter_csv/auto_detection.rb index 36f226ba..31564f26 100644 --- a/lib/smarter_csv/auto_detection.rb +++ b/lib/smarter_csv/auto_detection.rb @@ -19,7 +19,12 @@ def guess_column_separator(filehandle, options) count.times do line = readline_with_counts(filehandle, options) delimiters.each do |d| - candidates[d] += line.scan(d).count + escaped_quote = Regexp.escape(options[:quote_char]) + + # Count only non-quoted occurrences of the delimiter + non_quoted_text = line.split(/#{escaped_quote}[^#{escaped_quote}]*#{escaped_quote}/).join + + candidates[d] += non_quoted_text.scan(d).count end rescue EOFError # short files break diff --git a/spec/features/formating/column_separator_spec.rb b/spec/features/formating/column_separator_spec.rb index c729c7b0..7e98926f 100644 --- a/spec/features/formating/column_separator_spec.rb +++ b/spec/features/formating/column_separator_spec.rb @@ -87,6 +87,14 @@ end.to raise_exception SmarterCSV::NoColSepDetected end + it 'does not detect separators that are between quotes' do + data = SmarterCSV.process("#{fixture_path}/separator_chars_between_quotes.csv", options) + + + expect(data.first.keys.size).to eq 5 + expect(data.size).to eq 3 + end + context 'when auto is given as a string' do let(:options) do { @@ -148,6 +156,16 @@ end.to raise_exception SmarterCSV::NoColSepDetected end + it 'does not detect separators that are between quotes' do + data = SmarterCSV.process( + "#{fixture_path}/separator_chars_between_quotes_no_headers.csv", + options.merge(user_provided_headers: %w[Name Age Job Department Project]) + ) + + expect(data.first.keys.size).to eq 5 + expect(data.size).to eq 3 + end + context 'when auto is given as a string' do let(:options) do { diff --git a/spec/fixtures/separator_chars_between_quotes.csv b/spec/fixtures/separator_chars_between_quotes.csv new file mode 100644 index 00000000..06c00c38 --- /dev/null +++ b/spec/fixtures/separator_chars_between_quotes.csv @@ -0,0 +1,4 @@ +"name, info":"age, years":"job, title":"department, info":"project, code" +"John, Doe":"35, years":"Senior, Developer":"Engineering, Dept":"Code, 1234" +"Jane, Smith":"29, years":"Project, Manager":"Product, Development":"Code,5678" +"Emily, Jones":"42, years":"CTO,":"Technology,Dept":"Code,9012" diff --git a/spec/fixtures/separator_chars_between_quotes_no_headers.csv b/spec/fixtures/separator_chars_between_quotes_no_headers.csv new file mode 100644 index 00000000..7fef5849 --- /dev/null +++ b/spec/fixtures/separator_chars_between_quotes_no_headers.csv @@ -0,0 +1,3 @@ +"John, Doe":"35, years":"Senior, Developer":"Engineering, Dept":"Code, 1234" +"Jane, Smith":"29, years":"Project, Manager":"Product, Development":"Code,5678" +"Emily, Jones":"42, years":"CTO,":"Technology,Dept":"Code,9012"