Skip to content

Commit

Permalink
added utf-8 handling
Browse files Browse the repository at this point in the history
  • Loading branch information
Rickard Lundin committed Sep 12, 2022
1 parent ad73841 commit ff854cc
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions impl/ColumnBuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"io"
"os"
"strconv"
"strings"
"sync"
"time"
)
Expand Down Expand Up @@ -80,6 +81,7 @@ type FixedSizeTable struct {
HasHeader bool
HasFooter bool
CalcHash bool
SourceEncoding string
ConsumeLineFunc func(line string, fstc *FixedSizeTableChunk)
CustomParams interface{}
CustomColumnBuilders map[arrow.Type]func(fixedField *FixedField, builder *array.RecordBuilder, columnsize int, fieldNr int, columnsizeCap int) *ColumnBuilder
Expand Down Expand Up @@ -420,9 +422,17 @@ func (fstc *FixedSizeTableChunk) process(lfHeader bool, lfFooter bool) int {
}

re := bytes.NewReader(bbb)
decodingReader := transform.NewReader(re, charmap.ISO8859_1.NewDecoder()) // lines := []string{}
var scanner bufio.Scanner

scanner := bufio.NewScanner(decodingReader)
switch strings.ToLower(fstc.FixedSizeTable.SourceEncoding) {
case "iso8859-1":
scanner = *bufio.NewScanner(transform.NewReader(re, charmap.ISO8859_1.NewDecoder()))

case "utf-8":
scanner = *bufio.NewScanner(re)
default:
scanner = *bufio.NewScanner(re)
}
lineCnt := 0
for scanner.Scan() {
line := scanner.Text()
Expand Down

0 comments on commit ff854cc

Please sign in to comment.