Skip to content

Commit

Permalink
* refactoring
Browse files Browse the repository at this point in the history
- put data declaration and instances in Data.hs
  • Loading branch information
odanoburu committed Aug 26, 2019
1 parent 306c36d commit 51acede
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 113 deletions.
106 changes: 103 additions & 3 deletions src/Data.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
module Data where

import Control.Monad.Trans.Reader (ask)
import Data.Bifunctor (Bifunctor(..))
import qualified Data.DList as DL
import qualified Data.List.NonEmpty as NE
import Data.List.NonEmpty(NonEmpty(..))
Expand All @@ -17,9 +18,13 @@ import Data.RDF.Types (Subject(..), Predicate(..), Object(..),
IRI(..), Triple(..),Literal(..),LiteralType(..))
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Prettyprint.Doc (Pretty(..),Doc,dot,colon,(<+>))
import Data.Text.Prettyprint.Doc ( Pretty(..),Doc,dot,colon,(<+>), nest
, line, indent, align, vsep, hsep)


singleton :: a -> NonEmpty a
singleton x = x :| []

data WNPOS = A | S | R | N | V deriving (Show,Eq,Enum,Ord)

newtype LexicographerFileId = LexicographerFileId (WNPOS, Text) deriving (Show,Eq,Ord)
Expand Down Expand Up @@ -74,7 +79,53 @@ data Synset a = Synset
instance Ord (Synset Validated) where
Synset{wordSenses = (headWord:|_)} <= Synset{wordSenses = (headWord2:|_)}
= headWord <= headWord2


---- validation
data Validation e a = Failure e | Success a deriving (Show,Eq)

instance Functor (Validation e) where
fmap _ (Failure e) = Failure e
fmap f (Success a) = Success (f a)

instance Bifunctor Validation where
bimap f _ (Failure e) = Failure (f e)
bimap _ g (Success a) = Success (g a)

instance Semigroup e => Applicative (Validation e) where
-- pure :: a -> Validation e a
pure = Success
--(<*>) :: Validation e (a -> b) -> Validation e a -> Validation e b
Success f <*> Success a = Success (f a)
Success _ <*> Failure e = Failure e
Failure e <*> Success _ = Failure e
Failure e <*> Failure e' = Failure (e <> e')

data WNError
= ParseError String
| DuplicateWordSense String
| DuplicateSynsetWords (NonEmpty Text)
| DuplicateWordRelation (NonEmpty WordPointer)
| DuplicateSynsetRelation (NonEmpty SynsetRelation)
| MissingSynsetRelationTarget SynsetRelation
| MissingWordRelationTarget WordPointer
| UnsortedSynsets (NonEmpty (NonEmpty (Synset Validated)))
| UnsortedWordSenses (NonEmpty (NonEmpty Text))
| UnsortedSynsetRelations (NonEmpty (NonEmpty SynsetRelation))
| UnsortedWordPointers (NonEmpty (NonEmpty WordPointer))
deriving (Show)

data SourceError
= SourceError Text -- ^ name of source file
SourcePosition
WNError deriving (Show)

toSourceError :: Synset a -> WNError -> SourceError
toSourceError Synset{sourcePosition, lexicographerFileId}
= SourceError (lexicographerFileIdToText lexicographerFileId)
sourcePosition

type WNValidation a = Validation (NonEmpty WNError) a
type SourceValidation a = Validation (NonEmpty SourceError) a

--- Pretty instances
instance Pretty WNPOS where
Expand Down Expand Up @@ -119,6 +170,56 @@ instance Pretty WNWord where
instance Pretty (Synset a) where
pretty Synset{wordSenses = wordSense:|_} = pretty wordSense

prettyMissingTarget :: Text -> Text -> Doc ann -> Doc ann
prettyMissingTarget relationType relationName target
= "error: Missing"
<+> pretty relationType
<+> pretty relationName
<+> "target" <+> target

prettyUnordered :: Pretty a => Text -> NonEmpty (NonEmpty a) -> Doc ann
prettyUnordered what sequences
= "warning: Unsorted" <+> pretty what <> line
<> (indent 2 . align . vsep . map prettyUnorderedSequence $ NE.toList sequences)
where
prettyUnorderedSequence (x:|xs) =
pretty x <+> "should come after" <+> hsep (map pretty xs)

prettyDuplicate :: Pretty a => Text -> NonEmpty a -> Doc ann
prettyDuplicate what duplicates
= "error: Duplicate"
<+> pretty what
<+> pretty (NE.head duplicates)

instance Pretty WNError where
pretty (ParseError errorString) = pretty errorString
pretty (DuplicateWordSense sensekey)
= prettyDuplicate "wordsense" (singleton sensekey)
pretty (DuplicateSynsetWords synsetWords)
= prettyDuplicate "synset words" synsetWords
pretty (DuplicateWordRelation wordPointers)
= prettyDuplicate "word pointer" wordPointers
pretty (DuplicateSynsetRelation synsetRelations)
= prettyDuplicate "synset relation" synsetRelations
pretty (MissingSynsetRelationTarget (SynsetRelation relationName target))
= prettyMissingTarget "synset relation" relationName $ pretty target
pretty (MissingWordRelationTarget (WordPointer pointerName target))
= prettyMissingTarget "word pointer" pointerName $ pretty target
pretty (UnsortedSynsets sequences)
= prettyUnordered "synsets" sequences
pretty (UnsortedSynsetRelations sequences)
= prettyUnordered "synset relations" sequences
pretty (UnsortedWordSenses sequences)
= prettyUnordered "synset word senses" sequences
pretty (UnsortedWordPointers sequences)
= prettyUnordered "word pointers" sequences

instance Pretty SourceError where
pretty (SourceError lexicographerFileId (SourcePosition (beg, end)) wnError)
= pretty lexicographerFileId
<> colon <> pretty beg <> colon <> pretty end <> colon
<+> nest 2 (pretty wnError) <> line

---
-- to RDF instances

Expand Down Expand Up @@ -231,4 +332,3 @@ synsetToTriples relationsMap Synset{lexicographerFileId, wordSenses, definition,
, map (Triple wordSenseIri framePredicate) frameLiterals
, zipWith (Triple wordSenseIri) pointersPredicates targetWordSenseObjs
]

1 change: 0 additions & 1 deletion src/Parse.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
module Parse (parseLexicographer) where

import Data
import Validate (SourceError(..), WNError(..), SourceValidation, Validation(..))

import Control.Applicative hiding (some,many)
import qualified Control.Applicative.Combinators.NonEmpty as NC
Expand Down
110 changes: 1 addition & 109 deletions src/Validate.hs
Original file line number Diff line number Diff line change
@@ -1,30 +1,19 @@
module Validate
( validateSynsets
, makeIndex
, Validation(..)
, SourceValidation
, SourceError(..)
, WNError (..)
) where

import Data

import Data.Bifunctor (Bifunctor(..))
import Data.Bifunctor (bimap)
import Data.List hiding (insert, lookup)
import Data.List.NonEmpty(NonEmpty(..))
import qualified Data.List.NonEmpty as NE
import Data.Maybe (fromJust, mapMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import Data.GenericTrie (Trie, fromListWith', member, lookup, foldWithKey, empty, insert)
import Data.Text.Prettyprint.Doc
( Pretty(..),Doc,(<+>), colon, align, hsep, nest
, line, indent, vsep)
import Prelude hiding (lookup)

singleton :: a -> NonEmpty a
singleton x = x :| []

-- when to change LexicographerFile : Text to LexicographerFileId :
-- Int in wordsenses etc.? is changing it really necessary?
type Index a = Trie String (Either String a) -- Left is a reference to another key
Expand Down Expand Up @@ -65,104 +54,7 @@ senseKey :: LexicographerFileId -> WordSenseForm -> LexicalId -> String
senseKey (LexicographerFileId (pos, lexname)) (WordSenseForm wordForm) (LexicalId lexicalId) =
intercalate "\t" [T.unpack wordForm, show pos ++ T.unpack lexname, show lexicalId]

---- validation
data Validation e a = Failure e | Success a deriving (Show,Eq)

instance Functor (Validation e) where
fmap _ (Failure e) = Failure e
fmap f (Success a) = Success (f a)

instance Bifunctor Validation where
bimap f _ (Failure e) = Failure (f e)
bimap _ g (Success a) = Success (g a)

instance Semigroup e => Applicative (Validation e) where
-- pure :: a -> Validation e a
pure = Success
--(<*>) :: Validation e (a -> b) -> Validation e a -> Validation e b
Success f <*> Success a = Success (f a)
Success _ <*> Failure e = Failure e
Failure e <*> Success _ = Failure e
Failure e <*> Failure e' = Failure (e <> e')

data WNError
= ParseError String
| DuplicateWordSense String
| DuplicateSynsetWords (NonEmpty Text)
| DuplicateWordRelation (NonEmpty WordPointer)
| DuplicateSynsetRelation (NonEmpty SynsetRelation)
| MissingSynsetRelationTarget SynsetRelation
| MissingWordRelationTarget WordPointer
| UnsortedSynsets (NonEmpty (NonEmpty (Synset Validated)))
| UnsortedWordSenses (NonEmpty (NonEmpty Text))
| UnsortedSynsetRelations (NonEmpty (NonEmpty SynsetRelation))
| UnsortedWordPointers (NonEmpty (NonEmpty WordPointer))
deriving (Show)

data SourceError
= SourceError Text -- ^ name of source file
SourcePosition
WNError deriving (Show)

toSourceError :: Synset a -> WNError -> SourceError
toSourceError Synset{sourcePosition, lexicographerFileId}
= SourceError (lexicographerFileIdToText lexicographerFileId)
sourcePosition

type WNValidation a = Validation (NonEmpty WNError) a
type SourceValidation a = Validation (NonEmpty SourceError) a

---
-- Pretty instances
prettyMissingTarget :: Text -> Text -> Doc ann -> Doc ann
prettyMissingTarget relationType relationName target
= "error: Missing"
<+> pretty relationType
<+> pretty relationName
<+> "target" <+> target

prettyUnordered :: Pretty a => Text -> NonEmpty (NonEmpty a) -> Doc ann
prettyUnordered what sequences
= "warning: Unsorted" <+> pretty what <> line
<> (indent 2 . align . vsep . map prettyUnorderedSequence $ NE.toList sequences)
where
prettyUnorderedSequence (x:|xs) =
pretty x <+> "should come after" <+> hsep (map pretty xs)

prettyDuplicate :: Pretty a => Text -> NonEmpty a -> Doc ann
prettyDuplicate what duplicates
= "error: Duplicate"
<+> pretty what
<+> pretty (NE.head duplicates)

instance Pretty WNError where
pretty (ParseError errorString) = pretty errorString
pretty (DuplicateWordSense sensekey)
= prettyDuplicate "wordsense" (singleton sensekey)
pretty (DuplicateSynsetWords synsetWords)
= prettyDuplicate "synset words" synsetWords
pretty (DuplicateWordRelation wordPointers)
= prettyDuplicate "word pointer" wordPointers
pretty (DuplicateSynsetRelation synsetRelations)
= prettyDuplicate "synset relation" synsetRelations
pretty (MissingSynsetRelationTarget (SynsetRelation relationName target))
= prettyMissingTarget "synset relation" relationName $ pretty target
pretty (MissingWordRelationTarget (WordPointer pointerName target))
= prettyMissingTarget "word pointer" pointerName $ pretty target
pretty (UnsortedSynsets sequences)
= prettyUnordered "synsets" sequences
pretty (UnsortedSynsetRelations sequences)
= prettyUnordered "synset relations" sequences
pretty (UnsortedWordSenses sequences)
= prettyUnordered "synset word senses" sequences
pretty (UnsortedWordPointers sequences)
= prettyUnordered "word pointers" sequences

instance Pretty SourceError where
pretty (SourceError lexicographerFileId (SourcePosition (beg, end)) wnError)
= pretty lexicographerFileId
<> colon <> pretty beg <> colon <> pretty end <> colon
<+> nest 2 (pretty wnError) <> line

---
-- checks
Expand Down

0 comments on commit 51acede

Please sign in to comment.