diff --git a/src/Data.hs b/src/Data.hs index 088a2af..1d6cd50 100644 --- a/src/Data.hs +++ b/src/Data.hs @@ -5,6 +5,7 @@ module Data where import Control.Monad.Trans.Reader (ask) +import Data.Bifunctor (Bifunctor(..)) import qualified Data.DList as DL import qualified Data.List.NonEmpty as NE import Data.List.NonEmpty(NonEmpty(..)) @@ -17,9 +18,13 @@ import Data.RDF.Types (Subject(..), Predicate(..), Object(..), IRI(..), Triple(..),Literal(..),LiteralType(..)) import Data.Text (Text) import qualified Data.Text as T -import Data.Text.Prettyprint.Doc (Pretty(..),Doc,dot,colon,(<+>)) +import Data.Text.Prettyprint.Doc ( Pretty(..),Doc,dot,colon,(<+>), nest + , line, indent, align, vsep, hsep) +singleton :: a -> NonEmpty a +singleton x = x :| [] + data WNPOS = A | S | R | N | V deriving (Show,Eq,Enum,Ord) newtype LexicographerFileId = LexicographerFileId (WNPOS, Text) deriving (Show,Eq,Ord) @@ -74,7 +79,53 @@ data Synset a = Synset instance Ord (Synset Validated) where Synset{wordSenses = (headWord:|_)} <= Synset{wordSenses = (headWord2:|_)} = headWord <= headWord2 - + +---- validation +data Validation e a = Failure e | Success a deriving (Show,Eq) + +instance Functor (Validation e) where + fmap _ (Failure e) = Failure e + fmap f (Success a) = Success (f a) + +instance Bifunctor Validation where + bimap f _ (Failure e) = Failure (f e) + bimap _ g (Success a) = Success (g a) + +instance Semigroup e => Applicative (Validation e) where + -- pure :: a -> Validation e a + pure = Success + --(<*>) :: Validation e (a -> b) -> Validation e a -> Validation e b + Success f <*> Success a = Success (f a) + Success _ <*> Failure e = Failure e + Failure e <*> Success _ = Failure e + Failure e <*> Failure e' = Failure (e <> e') + +data WNError + = ParseError String + | DuplicateWordSense String + | DuplicateSynsetWords (NonEmpty Text) + | DuplicateWordRelation (NonEmpty WordPointer) + | DuplicateSynsetRelation (NonEmpty SynsetRelation) + | MissingSynsetRelationTarget SynsetRelation + | MissingWordRelationTarget WordPointer + | UnsortedSynsets (NonEmpty (NonEmpty (Synset Validated))) + | UnsortedWordSenses (NonEmpty (NonEmpty Text)) + | UnsortedSynsetRelations (NonEmpty (NonEmpty SynsetRelation)) + | UnsortedWordPointers (NonEmpty (NonEmpty WordPointer)) + deriving (Show) + +data SourceError + = SourceError Text -- ^ name of source file + SourcePosition + WNError deriving (Show) + +toSourceError :: Synset a -> WNError -> SourceError +toSourceError Synset{sourcePosition, lexicographerFileId} + = SourceError (lexicographerFileIdToText lexicographerFileId) + sourcePosition + +type WNValidation a = Validation (NonEmpty WNError) a +type SourceValidation a = Validation (NonEmpty SourceError) a --- Pretty instances instance Pretty WNPOS where @@ -119,6 +170,56 @@ instance Pretty WNWord where instance Pretty (Synset a) where pretty Synset{wordSenses = wordSense:|_} = pretty wordSense +prettyMissingTarget :: Text -> Text -> Doc ann -> Doc ann +prettyMissingTarget relationType relationName target + = "error: Missing" + <+> pretty relationType + <+> pretty relationName + <+> "target" <+> target + +prettyUnordered :: Pretty a => Text -> NonEmpty (NonEmpty a) -> Doc ann +prettyUnordered what sequences + = "warning: Unsorted" <+> pretty what <> line + <> (indent 2 . align . vsep . map prettyUnorderedSequence $ NE.toList sequences) + where + prettyUnorderedSequence (x:|xs) = + pretty x <+> "should come after" <+> hsep (map pretty xs) + +prettyDuplicate :: Pretty a => Text -> NonEmpty a -> Doc ann +prettyDuplicate what duplicates + = "error: Duplicate" + <+> pretty what + <+> pretty (NE.head duplicates) + +instance Pretty WNError where + pretty (ParseError errorString) = pretty errorString + pretty (DuplicateWordSense sensekey) + = prettyDuplicate "wordsense" (singleton sensekey) + pretty (DuplicateSynsetWords synsetWords) + = prettyDuplicate "synset words" synsetWords + pretty (DuplicateWordRelation wordPointers) + = prettyDuplicate "word pointer" wordPointers + pretty (DuplicateSynsetRelation synsetRelations) + = prettyDuplicate "synset relation" synsetRelations + pretty (MissingSynsetRelationTarget (SynsetRelation relationName target)) + = prettyMissingTarget "synset relation" relationName $ pretty target + pretty (MissingWordRelationTarget (WordPointer pointerName target)) + = prettyMissingTarget "word pointer" pointerName $ pretty target + pretty (UnsortedSynsets sequences) + = prettyUnordered "synsets" sequences + pretty (UnsortedSynsetRelations sequences) + = prettyUnordered "synset relations" sequences + pretty (UnsortedWordSenses sequences) + = prettyUnordered "synset word senses" sequences + pretty (UnsortedWordPointers sequences) + = prettyUnordered "word pointers" sequences + +instance Pretty SourceError where + pretty (SourceError lexicographerFileId (SourcePosition (beg, end)) wnError) + = pretty lexicographerFileId + <> colon <> pretty beg <> colon <> pretty end <> colon + <+> nest 2 (pretty wnError) <> line + --- -- to RDF instances @@ -231,4 +332,3 @@ synsetToTriples relationsMap Synset{lexicographerFileId, wordSenses, definition, , map (Triple wordSenseIri framePredicate) frameLiterals , zipWith (Triple wordSenseIri) pointersPredicates targetWordSenseObjs ] - diff --git a/src/Parse.hs b/src/Parse.hs index 9db0bcc..b7e29c3 100644 --- a/src/Parse.hs +++ b/src/Parse.hs @@ -1,7 +1,6 @@ module Parse (parseLexicographer) where import Data -import Validate (SourceError(..), WNError(..), SourceValidation, Validation(..)) import Control.Applicative hiding (some,many) import qualified Control.Applicative.Combinators.NonEmpty as NC diff --git a/src/Validate.hs b/src/Validate.hs index a4d405b..a939671 100644 --- a/src/Validate.hs +++ b/src/Validate.hs @@ -1,30 +1,19 @@ module Validate ( validateSynsets , makeIndex - , Validation(..) - , SourceValidation - , SourceError(..) - , WNError (..) ) where import Data -import Data.Bifunctor (Bifunctor(..)) +import Data.Bifunctor (bimap) import Data.List hiding (insert, lookup) import Data.List.NonEmpty(NonEmpty(..)) import qualified Data.List.NonEmpty as NE import Data.Maybe (fromJust, mapMaybe) -import Data.Text (Text) import qualified Data.Text as T import Data.GenericTrie (Trie, fromListWith', member, lookup, foldWithKey, empty, insert) -import Data.Text.Prettyprint.Doc - ( Pretty(..),Doc,(<+>), colon, align, hsep, nest - , line, indent, vsep) import Prelude hiding (lookup) -singleton :: a -> NonEmpty a -singleton x = x :| [] - -- when to change LexicographerFile : Text to LexicographerFileId : -- Int in wordsenses etc.? is changing it really necessary? type Index a = Trie String (Either String a) -- Left is a reference to another key @@ -65,104 +54,7 @@ senseKey :: LexicographerFileId -> WordSenseForm -> LexicalId -> String senseKey (LexicographerFileId (pos, lexname)) (WordSenseForm wordForm) (LexicalId lexicalId) = intercalate "\t" [T.unpack wordForm, show pos ++ T.unpack lexname, show lexicalId] ----- validation -data Validation e a = Failure e | Success a deriving (Show,Eq) - -instance Functor (Validation e) where - fmap _ (Failure e) = Failure e - fmap f (Success a) = Success (f a) - -instance Bifunctor Validation where - bimap f _ (Failure e) = Failure (f e) - bimap _ g (Success a) = Success (g a) - -instance Semigroup e => Applicative (Validation e) where - -- pure :: a -> Validation e a - pure = Success - --(<*>) :: Validation e (a -> b) -> Validation e a -> Validation e b - Success f <*> Success a = Success (f a) - Success _ <*> Failure e = Failure e - Failure e <*> Success _ = Failure e - Failure e <*> Failure e' = Failure (e <> e') - -data WNError - = ParseError String - | DuplicateWordSense String - | DuplicateSynsetWords (NonEmpty Text) - | DuplicateWordRelation (NonEmpty WordPointer) - | DuplicateSynsetRelation (NonEmpty SynsetRelation) - | MissingSynsetRelationTarget SynsetRelation - | MissingWordRelationTarget WordPointer - | UnsortedSynsets (NonEmpty (NonEmpty (Synset Validated))) - | UnsortedWordSenses (NonEmpty (NonEmpty Text)) - | UnsortedSynsetRelations (NonEmpty (NonEmpty SynsetRelation)) - | UnsortedWordPointers (NonEmpty (NonEmpty WordPointer)) - deriving (Show) - -data SourceError - = SourceError Text -- ^ name of source file - SourcePosition - WNError deriving (Show) - -toSourceError :: Synset a -> WNError -> SourceError -toSourceError Synset{sourcePosition, lexicographerFileId} - = SourceError (lexicographerFileIdToText lexicographerFileId) - sourcePosition - -type WNValidation a = Validation (NonEmpty WNError) a -type SourceValidation a = Validation (NonEmpty SourceError) a - ---- --- Pretty instances -prettyMissingTarget :: Text -> Text -> Doc ann -> Doc ann -prettyMissingTarget relationType relationName target - = "error: Missing" - <+> pretty relationType - <+> pretty relationName - <+> "target" <+> target - -prettyUnordered :: Pretty a => Text -> NonEmpty (NonEmpty a) -> Doc ann -prettyUnordered what sequences - = "warning: Unsorted" <+> pretty what <> line - <> (indent 2 . align . vsep . map prettyUnorderedSequence $ NE.toList sequences) - where - prettyUnorderedSequence (x:|xs) = - pretty x <+> "should come after" <+> hsep (map pretty xs) - -prettyDuplicate :: Pretty a => Text -> NonEmpty a -> Doc ann -prettyDuplicate what duplicates - = "error: Duplicate" - <+> pretty what - <+> pretty (NE.head duplicates) - -instance Pretty WNError where - pretty (ParseError errorString) = pretty errorString - pretty (DuplicateWordSense sensekey) - = prettyDuplicate "wordsense" (singleton sensekey) - pretty (DuplicateSynsetWords synsetWords) - = prettyDuplicate "synset words" synsetWords - pretty (DuplicateWordRelation wordPointers) - = prettyDuplicate "word pointer" wordPointers - pretty (DuplicateSynsetRelation synsetRelations) - = prettyDuplicate "synset relation" synsetRelations - pretty (MissingSynsetRelationTarget (SynsetRelation relationName target)) - = prettyMissingTarget "synset relation" relationName $ pretty target - pretty (MissingWordRelationTarget (WordPointer pointerName target)) - = prettyMissingTarget "word pointer" pointerName $ pretty target - pretty (UnsortedSynsets sequences) - = prettyUnordered "synsets" sequences - pretty (UnsortedSynsetRelations sequences) - = prettyUnordered "synset relations" sequences - pretty (UnsortedWordSenses sequences) - = prettyUnordered "synset word senses" sequences - pretty (UnsortedWordPointers sequences) - = prettyUnordered "word pointers" sequences -instance Pretty SourceError where - pretty (SourceError lexicographerFileId (SourcePosition (beg, end)) wnError) - = pretty lexicographerFileId - <> colon <> pretty beg <> colon <> pretty end <> colon - <+> nest 2 (pretty wnError) <> line --- -- checks