Skip to content

Commit

Permalink
* use line-columns for error-reporting
Browse files Browse the repository at this point in the history
- continue using offsets unless there's an error
  - if there's an error calculate the appropriate line/column using
    megaparsec's machinery
- still not very fine positions

related to #17
  • Loading branch information
odanoburu committed Aug 27, 2019
1 parent 51acede commit 1765b88
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 46 deletions.
7 changes: 3 additions & 4 deletions src/Data.hs
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,9 @@ data SourceError
SourcePosition
WNError deriving (Show)

toSourceError :: Synset a -> WNError -> SourceError
toSourceError Synset{sourcePosition, lexicographerFileId}
= SourceError (lexicographerFileIdToText lexicographerFileId)
sourcePosition
attachOffset :: Synset a -> WNError -> (Int, WNError)
attachOffset Synset{sourcePosition = SourcePosition (offset,_)}
= (,) offset

type WNValidation a = Validation (NonEmpty WNError) a
type SourceValidation a = Validation (NonEmpty SourceError) a
Expand Down
43 changes: 25 additions & 18 deletions src/Lib.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ module Lib
) where

import Data ( Synset(..), Unvalidated, Validated
, synsetToTriples )
, synsetToTriples, Validation(..), SourceValidation
, singleton )
import Parse (parseLexicographer)
import Validate ( Validation(..), makeIndex
, validateSynsets, SourceValidation)
import Validate ( makeIndex
, validateSynsets )
----------------------------------
import Control.Monad (unless,(>>))
import Control.Monad.Reader (ReaderT(..), ask, liftIO)
Expand All @@ -24,9 +25,9 @@ import Data.Functor(void)
import Data.List (partition)
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty as NE
import Data.Maybe (fromMaybe)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Maybe (maybe)
import Data.RDF.Encoder.NQuads (encodeRDFGraph)
import Data.RDF.ToRDF (runRDFGen)
import Data.RDF.Types (RDFGraph(..), IRI(..))
Expand All @@ -35,11 +36,12 @@ import Data.String (fromString)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import Data.Text.Prettyprint.Doc (Pretty(..))
import Data.Text.Prettyprint.Doc.Render.Text (putDoc)
import qualified Data.Text.Read as TR
import System.Directory (doesDirectoryExist)
import System.FilePath ((</>), normalise,equalFilePath)
import Data.Text.Prettyprint.Doc (Pretty(..))
import Data.Text.Prettyprint.Doc.Render.Text (putDoc)
import Text.Megaparsec (PosState)

data Config = Config
{ lexnamesToId :: Map Text Int
Expand Down Expand Up @@ -85,7 +87,8 @@ readConfig configurationDir = do

type App = ReaderT Config IO

parseLexicographerFile :: FilePath -> App (SourceValidation (NonEmpty (Synset Unvalidated)))
parseLexicographerFile :: FilePath
-> App (SourceValidation (NonEmpty (Synset Unvalidated), PosState Text))
parseLexicographerFile filePath = do
Config{relationRDFNames} <- ask
liftIO $ do
Expand All @@ -96,24 +99,26 @@ parseLexicographerFile filePath = do
parseLexicographerFiles :: NonEmpty FilePath
-> App (SourceValidation (NonEmpty (Synset Validated)))
parseLexicographerFiles filePaths = do
lexFilesSynsetsOrErrors <- mapM parseLexicographerFile filePaths
case sequenceA lexFilesSynsetsOrErrors of
Success lexFilesSynsets ->
let synsets = sconcat lexFilesSynsets
parseResults <- mapM parseLexicographerFile filePaths
case sequenceA parseResults of
Success results ->
let (lexFilesSynsets, posStates) = NE.unzip results
synsets = sconcat lexFilesSynsets
index = makeIndex synsets
in return $ validateSynsets index synsets
in return . validateSynsets index $ NE.zip lexFilesSynsets posStates
Failure sourceErrors -> return $ Failure sourceErrors

prettyPrintList :: Pretty a => NonEmpty a -> IO ()
prettyPrintList = mapM_ (putDoc . pretty)

validateSynsetsNoParseErrors :: NonEmpty (NonEmpty (Synset Unvalidated))
-> Maybe (NonEmpty (Synset Unvalidated))
validateSynsetsNoParseErrors :: NonEmpty (NonEmpty (Synset Unvalidated), PosState Text)
-> Maybe (NonEmpty (Synset Unvalidated), PosState Text)
-> IO ()
validateSynsetsNoParseErrors indexSynsets maybeSynsetsToValidate =
let synsets = sconcat indexSynsets
validateSynsetsNoParseErrors results maybeSynsetsToValidate =
let lexFilesSynsets = NE.map fst results
synsets = sconcat lexFilesSynsets
index = makeIndex synsets
in case validateSynsets index (fromMaybe synsets maybeSynsetsToValidate) of
in case validateSynsets index (maybe results singleton maybeSynsetsToValidate) of
Success _ -> return ()
Failure errors -> prettyPrintList errors

Expand Down Expand Up @@ -151,7 +156,9 @@ synsetsToTriples relationsMap baseIRI synsets outputFile =
. toLazyByteString
. encodeRDFGraph . RDFGraph Nothing $ DL.toList synsetsTriples
where
synsetsTriples = foldMap (\synset -> runRDFGen (synsetToTriples relationsMap synset) baseIRI) synsets
synsetsTriples = foldMap (\synset
-> runRDFGen (synsetToTriples relationsMap synset) baseIRI)
synsets

lexicographerFilesToTriples :: IRI -> FilePath -> App ()
lexicographerFilesToTriples baseIRI outputFile = do
Expand Down
30 changes: 20 additions & 10 deletions src/Parse.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,32 @@ type Parser = ParsecT Void Text (Reader (LexicographerFileId, Map Text Text))

parseLexicographer :: Map Text Text
-> String -> Text
-> SourceValidation (NonEmpty (Synset Unvalidated))
-> SourceValidation (NonEmpty (Synset Unvalidated), PosState Text)
parseLexicographer relationsMap fileName inputText =
case runReader (runParserT lexicographerFile fileName inputText)
(lexFileId, relationsMap) of
Right rawSynsets
-> case partitionEithers (NE.toList rawSynsets) of
([], synsetsToValidate) -> Success $ NE.fromList synsetsToValidate
(parseErrors, _) -> Failure . NE.map toSourceError $ NE.fromList parseErrors
-> case partitionEithers $ NE.toList rawSynsets of
([], synsetsToValidate) -> Success (NE.fromList synsetsToValidate, posState)
(x:parseErrors, _) -> Failure . liftErrors $ x:|parseErrors
Left ParseErrorBundle{bundleErrors} ->
Failure . NE.map toSourceError $ bundleErrors
Failure . liftErrors $ bundleErrors
where
toSourceError parseError
= let errorPos = errorOffset parseError
in SourceError (T.pack fileName)
(SourcePosition (errorPos, errorPos +1))
(ParseError $ parseErrorTextPretty parseError)
posState =
PosState
{ pstateInput = inputText
, pstateOffset = 0
, pstateSourcePos = initialPos fileName
, pstateTabWidth = defaultTabWidth
, pstateLinePrefix = ""
}
liftErrors parseErrors
= let (errors, _) = attachSourcePos errorOffset parseErrors posState
in NE.map toSourceError errors
toSourceError (parseError, SourcePos{sourceName, sourceLine, sourceColumn})
= SourceError (T.pack sourceName)
(SourcePosition (unPos sourceLine, unPos sourceColumn))
(ParseError $ parseErrorTextPretty parseError)
lexFileId :: LexicographerFileId
lexFileId =
fromMaybe (error $ "Couldn't parse first line of "
Expand Down
43 changes: 29 additions & 14 deletions src/Validate.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ import qualified Data.List.NonEmpty as NE
import Data.Maybe (fromJust, mapMaybe)
import qualified Data.Text as T
import Data.GenericTrie (Trie, fromListWith', member, lookup, foldWithKey, empty, insert)
import Data.Semigroup (sconcat)
import Data.Text (Text)
import Prelude hiding (lookup)
import Text.Megaparsec (PosState, SourcePos(..),unPos,attachSourcePos)

-- when to change LexicographerFile : Text to LexicographerFileId :
-- Int in wordsenses etc.? is changing it really necessary?
Expand All @@ -27,13 +30,14 @@ makeIndex synsets = fromListWith' (<>) keyValuePairs
in
(headSenseKey, singleton $ Right synset) : map (\wordSense -> (wordSenseKey wordSense, singleton $ Left headSenseKey)) wordSenses

checkIndexNoDuplicates :: Trie String (NonEmpty (Either String (Synset Unvalidated))) -> SourceValidation (Index (Synset Unvalidated))
checkIndexNoDuplicates :: Trie String (NonEmpty (Either String (Synset Unvalidated)))
-> Validation (NonEmpty (Int, WNError)) (Index (Synset Unvalidated))
checkIndexNoDuplicates index = foldWithKey go (Success empty) index
where
go key (value :| []) noDuplicatesTrie
= Success (insert key value) <*> noDuplicatesTrie
go key values noDuplicatesTrie
= case map (\synset -> toSourceError synset . DuplicateWordSense $ takeWhile (/= '\t') key)
= case map (\synset -> attachOffset synset . DuplicateWordSense $ takeWhile (/= '\t') key)
$ duplicatesSynsets values of
[] -> noDuplicatesTrie
x:duplicateErrors -> Failure (x :| duplicateErrors) <*> noDuplicatesTrie
Expand All @@ -58,14 +62,13 @@ senseKey (LexicographerFileId (pos, lexname)) (WordSenseForm wordForm) (Lexical

---
-- checks
checkSynset :: Index a -> Synset Unvalidated -> SourceValidation (Synset Validated)
checkSynset index Synset{lexicographerFileId, wordSenses, relations, definition
checkSynset :: Index a -> Synset Unvalidated -> Validation (NonEmpty (Int, WNError)) (Synset Validated)
checkSynset index unvalidatedSynset@Synset{lexicographerFileId, wordSenses, relations, definition
, examples, frames, sourcePosition} =
case result of
Success synset -> Success synset
Failure errors -> Failure $ NE.map (SourceError lexfileName sourcePosition) errors
Failure errors -> Failure $ NE.map (attachOffset unvalidatedSynset) errors
where
lexfileName = lexicographerFileIdToText lexicographerFileId
result = Synset
<$> Success sourcePosition
<*> Success lexicographerFileId
Expand Down Expand Up @@ -164,17 +167,25 @@ checkWordSensePointersTargets index = traverse checkWordPointer
where
targetSenseKey = senseKey lexFileId wordForm lexicalId

validateSynsets :: Trie String (NonEmpty (Either String (Synset Unvalidated)))
-> NonEmpty (Synset Unvalidated)
validateLexFileSynsets :: Trie String (NonEmpty (Either String (Synset Unvalidated)))
-> (NonEmpty (Synset Unvalidated), PosState Text)
-> SourceValidation (NonEmpty (Synset Validated))
validateSynsets indexWithDuplicates (firstSynset:|synsets) =
checkSynsetsOrder . checkSynsets $ checkIndexNoDuplicates indexWithDuplicates
validateLexFileSynsets indexWithDuplicates (firstSynset:|synsets, posState) =
bimap liftErrors id . checkSynsetsOrder . checkSynsets $ checkIndexNoDuplicates indexWithDuplicates
where
liftErrors parseErrors
= let (errors, _) = attachSourcePos fst parseErrors posState
in NE.map toSourceError errors
toSourceError ((_, wnError), SourcePos{sourceName, sourceLine, sourceColumn})
= SourceError (T.pack sourceName)
(SourcePosition (unPos sourceLine, unPos sourceColumn))
wnError
checkSynsetsOrder (Success validatedSynsets)
= bimap (NE.map toError) NE.fromList . validateSorted $ NE.toList validatedSynsets
= bimap (NE.map toError)
NE.fromList . validateSorted $ NE.toList validatedSynsets
checkSynsetsOrder (Failure es) = Failure es
toError unsortedSynsetSequences@(synset:|_)
= toSourceError synset $ UnsortedSynsets (unsortedSynsetSequences :| [])
= attachOffset synset $ UnsortedSynsets (unsortedSynsetSequences :| [])
checkSynsets (Failure errors) = Failure errors
checkSynsets (Success index)
= (:|)
Expand All @@ -184,5 +195,9 @@ validateSynsets indexWithDuplicates (firstSynset:|synsets) =
checkSynset' = checkSynset index
go synset result = (:) <$> checkSynset' synset <*> result

---

validateSynsets :: Trie String (NonEmpty (Either String (Synset Unvalidated)))
-> NonEmpty (NonEmpty (Synset Unvalidated), PosState Text)
-> SourceValidation (NonEmpty (Synset Validated))
validateSynsets indexWithDuplicates lexFileSynsetsPosStates =
bimap id sconcat . sequenceA
$ NE.map (validateLexFileSynsets indexWithDuplicates) lexFileSynsetsPosStates

0 comments on commit 1765b88

Please sign in to comment.