Skip to content

Commit

Permalink
Merge pull request #29 from own-pt/fast-validation
Browse files Browse the repository at this point in the history
faster validation
  • Loading branch information
odanoburu authored Sep 5, 2019
2 parents 56507d3 + 4526f5c commit 426c2d3
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 90 deletions.
16 changes: 9 additions & 7 deletions package.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: wntext
name: mill
version: 0.1.0.0
github: "own-pt/wntext"
github: "own-pt/mill"
license: Apache-2.0
author: "bruno cuconato"
maintainer: "[email protected]"
Expand All @@ -17,13 +17,14 @@ extra-source-files:
# To avoid duplicated efforts in documentation and dealing with the
# complications of embedding Haddock markup inside cabal files, it is
# common to point users to the README.md file.
description: Please see the README on GitHub at <https://github.com/own-pt/wntext#readme>
description: Please see the README on GitHub at <https://github.com/own-pt/mill#readme>


default-extensions:
- OverloadedStrings
- NamedFieldPuns
- DuplicateRecordFields
- TupleSections

dependencies:
- aeson == 1.4.*
Expand All @@ -34,13 +35,14 @@ dependencies:
- directory == 1.*
- dlist == 0.8.*
- filepath == 1.*
- generic-trie > 0.2.0 && < 1.0
- list-tries == 0.6.*
- megaparsec == 7.*
- mtl == 2.*
- optparse-applicative >= 0.14 && < 1
- parser-combinators == 1.*
- prettyprinter == 1.*
- rdf == 0.1.*
- shake == 0.17.*
- text == 1.2.*
- transformers == 0.5.*

Expand All @@ -56,15 +58,15 @@ executables:
- -rtsopts
- -with-rtsopts=-N
dependencies:
- wntext
- mill

tests:
wntext-test:
mill-test:
main: Spec.hs
source-dirs: test
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- wntext
- mill
51 changes: 35 additions & 16 deletions src/Data.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DerivingStrategies #-}

module Data where

import Data.Aeson (ToJSON(..), genericToEncoding, defaultOptions)
import Data.Bifunctor (Bifunctor(..))
import Data.Binary (Binary)
import Data.List.NonEmpty(NonEmpty(..))
import qualified Data.List.NonEmpty as NE
import Data.RDF.ToRDF (ToObject(..))
Expand All @@ -21,7 +24,7 @@ import Text.Printf (printf)
singleton :: a -> NonEmpty a
singleton x = x :| []

data WNObj = SynsetObj | WordObj deriving (Eq,Enum)
data WNObj = SynsetObj | WordObj deriving (Binary,Eq,Enum,Generic)

instance Show WNObj where
show SynsetObj = "synset"
Expand All @@ -34,7 +37,7 @@ readWNObj input = case input of
_ -> error . T.unpack
$ T.intercalate " " ["Can't parse", input, "as WordNet object name (one of synset or word)"]

data WNPOS = A | S | R | N | V deriving (Eq,Enum,Ord,Show)
data WNPOS = A | S | R | N | V deriving (Binary,Eq,Enum,Generic,Ord,Show)

readShortWNPOS :: Text -> WNPOS
readShortWNPOS "n" = N
Expand All @@ -52,7 +55,9 @@ readLongWNPOS "adj" = Just A
readLongWNPOS "adv" = Just R
readLongWNPOS _ = Nothing

newtype LexicographerFileId = LexicographerFileId (WNPOS, Text) deriving (Eq,Ord,Show)
newtype LexicographerFileId = LexicographerFileId (WNPOS, Text)
deriving (Eq,Generic,Ord,Show)
deriving anyclass (Binary)

synsetType :: WNPOS -> Int
synsetType N = 1
Expand Down Expand Up @@ -83,18 +88,24 @@ instance ToObject LexicographerFileId where
= pure . LiteralObject
$ Literal (lexicographerFileIdToText lexicographerFileId) LiteralUntyped

newtype WordSenseForm = WordSenseForm Text deriving (Show,Eq,Ord,Pretty,ToObject,Generic)
newtype WordSenseForm = WordSenseForm Text
deriving (Eq,Ord,Generic,Show)
deriving newtype (Binary,Pretty, ToObject)

instance ToJSON WordSenseForm where
toEncoding = genericToEncoding defaultOptions

newtype LexicalId = LexicalId Int deriving (Show,Eq,Ord,Pretty)
newtype LexicalId = LexicalId Int
deriving (Eq,Generic,Ord,Show)
deriving newtype (Binary,Pretty)

newtype WordSenseIdentifier =
WordSenseIdentifier ( LexicographerFileId
, WordSenseForm
, LexicalId
) deriving (Eq,Ord,Show)
)
deriving (Eq,Generic,Ord,Show)
deriving anyclass (Binary)

makeWordSenseIdentifier :: LexicographerFileId -> WordSenseForm -> LexicalId
-> WordSenseIdentifier
Expand All @@ -105,17 +116,19 @@ newtype SynsetIdentifier =
SynsetIdentifier ( LexicographerFileId
, WordSenseForm
, LexicalId
) deriving (Show,Eq,Ord)
)
deriving (Eq,Generic,Ord,Show)
deriving anyclass (Binary)

type PointerName = Text
type RelationName = Text
data WordPointer = WordPointer PointerName WordSenseIdentifier
deriving (Show,Eq,Ord)
deriving (Binary,Eq,Generic,Ord,Show)
data SynsetRelation = SynsetRelation RelationName SynsetIdentifier
deriving (Show,Eq,Ord)
deriving (Binary,Eq,Generic,Ord,Show)
type FrameIdentifier = Int
data WNWord = WNWord WordSenseIdentifier [FrameIdentifier] [WordPointer]
deriving (Eq,Ord,Show)
deriving (Binary,Eq,Generic,Ord,Show)

senseKey :: Int -> Int -> Maybe SynsetRelation -> WNWord -> String
senseKey lexFileNum synsetTypeNum maybeHeadRelation
Expand All @@ -133,10 +146,12 @@ senseKey lexFileNum synsetTypeNum maybeHeadRelation
_ -> ("", "")


newtype SourcePosition = SourcePosition (Int, Int) deriving (Show,Eq,Ord)
newtype SourcePosition = SourcePosition (Int, Int)
deriving (Eq,Generic,Ord,Show)
deriving anyclass (Binary)

-- synsets can be
data Unvalidated
data Unvalidated deriving (Binary,Generic)
data Validated

data Synset a = Synset
Expand All @@ -147,14 +162,14 @@ data Synset a = Synset
, examples :: [Text]
, frames :: [Int]
, relations :: [SynsetRelation] -- [] use NonEmpty if not for a relationless adjectives?
} deriving (Show,Eq)
} deriving (Binary,Eq,Generic,Show)

instance Ord (Synset Validated) where
Synset{wordSenses = (headWord:|_)} <= Synset{wordSenses = (headWord2:|_)}
= headWord <= headWord2

---- validation
data Validation e a = Failure e | Success a deriving (Show,Eq)
data Validation e a = Failure e | Success a deriving (Binary,Eq,Generic,Show)

instance Functor (Validation e) where
fmap _ (Failure e) = Failure e
Expand All @@ -173,6 +188,10 @@ instance Semigroup e => Applicative (Validation e) where
Failure e <*> Success _ = Failure e
Failure e <*> Failure e' = Failure (e <> e')

validate :: (a -> Validation e b) -> Validation e a -> Validation e b
validate f (Success a) = f a
validate _ (Failure e) = Failure e

data WNError
= ParseError String
| DuplicateWordSense String
Expand All @@ -185,12 +204,12 @@ data WNError
| UnsortedWordSenses (NonEmpty (NonEmpty Text))
| UnsortedSynsetRelations (NonEmpty (NonEmpty SynsetRelation))
| UnsortedWordPointers (NonEmpty (NonEmpty WordPointer))
deriving (Show)
deriving (Binary,Generic,Show)

data SourceError
= SourceError Text -- ^ name of source file
SourcePosition
WNError deriving (Show)
WNError deriving (Binary,Generic,Show)

toSourceError :: Synset a -> WNError -> SourceError
toSourceError Synset{sourcePosition, lexicographerFileId}
Expand Down
Loading

0 comments on commit 426c2d3

Please sign in to comment.