Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

csv: convert datetimes with time zones to local date, mostly (WIP) #1583

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions hledger-lib/Hledger/Read/CsvReader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import Data.Time.Calendar (Day)
import Data.Time (UTCTime, Day, localDay, utcToLocalTime, getCurrentTimeZone, LocalTime)
import Data.Time.Format (parseTimeM, defaultTimeLocale)
import Safe (atMay, headMay, lastMay, readDef, readMay)
import System.Directory (doesFileExist)
Expand All @@ -78,6 +78,7 @@ import Text.Printf (printf)
import Hledger.Data
import Hledger.Utils
import Hledger.Read.Common (aliasesFromOpts, Reader(..),InputOpts(..), amountp, statusp, genericSourcePos, journalFinalise )
import Data.Time.LocalTime (TimeZone)

--- ** doctest setup
-- $setup
Expand Down Expand Up @@ -741,6 +742,7 @@ readJournalFromCsv mrulesfile csvfile csvdata =
-- let (headerlines, datalines) = identifyHeaderLines records
-- mfieldnames = lastMay headerlines

tz <- getCurrentTimeZone
let
-- convert CSV records to transactions
txns = dbg7 "csv txns" $ snd $ mapAccumL
Expand All @@ -750,7 +752,7 @@ readJournalFromCsv mrulesfile csvfile csvdata =
line' = (mkPos . (+1) . unPos) line
pos' = SourcePos name line' col
in
(pos, transactionFromCsvRecord pos' rules r)
(pos, transactionFromCsvRecord pos' rules tz r)
)
(initialPos parsecfilename) records

Expand Down Expand Up @@ -874,8 +876,8 @@ hledgerField = getEffectiveAssignment
hledgerFieldValue :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe Text
hledgerFieldValue rules record = fmap (renderTemplate rules record) . hledgerField rules record

transactionFromCsvRecord :: SourcePos -> CsvRules -> CsvRecord -> Transaction
transactionFromCsvRecord sourcepos rules record = t
transactionFromCsvRecord :: SourcePos -> CsvRules -> TimeZone -> CsvRecord -> Transaction
transactionFromCsvRecord sourcepos rules tz record = t
where
----------------------------------------------------------------------
-- 1. Define some helpers:
Expand All @@ -884,7 +886,7 @@ transactionFromCsvRecord sourcepos rules record = t
-- ruleval = csvRuleValue rules record :: DirectiveName -> Maybe String
field = hledgerField rules record :: HledgerFieldName -> Maybe FieldTemplate
fieldval = hledgerFieldValue rules record :: HledgerFieldName -> Maybe Text
parsedate = parseDateWithCustomOrDefaultFormats (rule "date-format")
parsedate = parseDateWithCustomOrDefaultFormats tz (rule "date-format")
mkdateerror datefield datevalue mdateformat = T.unpack $ T.unlines
["error: could not parse \""<>datevalue<>"\" as a date using date format "
<>maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" (T.pack . show) mdateformat
Expand Down Expand Up @@ -1269,16 +1271,24 @@ csvFieldValue rules record fieldname = do
fieldvalue <- T.strip <$> atMay record (fieldindex-1)
return fieldvalue

-- | Parse the date string using the specified date-format, or if unspecified
-- the "simple date" formats (YYYY/MM/DD, YYYY-MM-DD, YYYY.MM.DD, leading
-- zeroes optional).
parseDateWithCustomOrDefaultFormats :: Maybe DateFormat -> Text -> Maybe Day
parseDateWithCustomOrDefaultFormats mformat s = asum $ map parsewith formats
-- | Parse a date from a date/datetime string using the specified strptime format,
-- or else try all the "simple date" formats (YYYY/MM/DD, YYYY-MM-DD, YYYY.MM.DD
-- with optional leading zeroes).
--
-- If the string includes time and time zone, the local date (in the provided
-- local time zone) will be returned. This could be a day earlier or later than
-- the one in the string.
parseDateWithCustomOrDefaultFormats :: TimeZone -> Maybe DateFormat -> Text -> Maybe Day
parseDateWithCustomOrDefaultFormats tz mformat s = do
ut <- asum $ map parsewith formats :: Maybe UTCTime
let lt = utcToLocalTime tz ut :: LocalTime
let ld = localDay lt :: Day
return ld
where
parsewith = flip (parseTimeM True defaultTimeLocale) (T.unpack s)
formats = map T.unpack $ maybe
["%Y/%-m/%-d"
,"%Y-%-m-%-d"
["%Y-%-m-%-d"
,"%Y/%-m/%-d"
,"%Y.%-m.%-d"
-- ,"%-m/%-d/%Y"
-- ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
Expand Down
8 changes: 3 additions & 5 deletions hledger/hledger.m4.md
Original file line number Diff line number Diff line change
Expand Up @@ -3865,11 +3865,9 @@ date-format %-m/%-d/%Y %l:%M %p some other junk
For the supported strptime syntax, see:\
<https://hackage.haskell.org/package/time/docs/Data-Time-Format.html#v:formatTime>

Note that although you can parse date-times which include a time zone,
that time zone is ignored; it will not change the date that is parsed.
This means when reading CSV data with times not in your local time zone,
dates can be "off by one".

Note: date-times which include a time zone, different from your own local time zone,
will usually be parsed as the correct date in your time zone; but in certain situations
with daylight savings, it's possible for the parsed date to be "off by one".

### `decimal-mark`

Expand Down