Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Haskell! #33

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
8 changes: 8 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ python:

sudo: false

addons:
apt:
packages:
- cabal-install
- ghc

install:
# Coveralls 4.0 doesn't support Python 3.2
- if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi
Expand All @@ -25,6 +31,8 @@ script:
- grunt
# Test Ruby:
- rake test
# Test Haskell
- cabal configure --enable-tests && cabal build && cabal test

after_success:
- coverage report
Expand Down
41 changes: 41 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,47 @@ wordfilter.addWords(['zebra','elephant'])
wordfilter.blacklisted('this string has zebra in it') # True
```

Or with Haskell:
Clone this repo and then `cabal install` (or `stack build`)

```haskell
module MightBeNaughty where

import System.IO
import Wordfilter

-- functions without trailing ' use Darius' wordlist
checkInput :: IO String -> IO ()
checkInput = do
input <- getLine
ok <- blacklisted input
printLn $ if ok then "cool :)" else "not cool >:("

lessThanOriginalList :: String -> IO [String]
lessThanOriginalList toRemove1 toRemove2 = removeWord toRemove1 >>=
removeWord' toRemove2
-- ~~~important ^ ~~~

-- functions with a trailing ' need an IO [String] wordlist
getSomeOtherList :: IO [String]
getSomeOtherList = ...

otherListAndMore :: [String] -> IO Bool
otherListAndMore otherWords toCheck = getSomeOtherList >>=
addWords' otherWords >>=
blacklisted' toCheck

-- clearList is just an empty IO [String] for compatability/convenience(?)

-- blacklist is original
checkInputParticular :: String -> String -> [String] -> IO Bool
checkInputParticular toTest toRemove toAdd = blacklist >>=
removeWord' toRemove >>=
addWords' toAdd >>=
blacklisted' toTest
```


## Documentation
This is a word filter adapted from code that I use in a lot of my twitter bots. It is based on [a list of words that I've hand-picked](https://github.com/dariusk/wordfilter/blob/master/lib/badwords.json) for exclusion from my bots: essentially, it's a list of things that I would not say myself. Generally speaking, they are "words of oppression", aka racist/sexist/ableist things that I would not say.

Expand Down
2 changes: 2 additions & 0 deletions Setup.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain
78 changes: 78 additions & 0 deletions lib/Wordfilter.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
-- |
-- Module: Wordfilter
-- License: MIT
-- Portability: portable
--
-- Haskell port of Darius Kazemi's Wordfilter

module Wordfilter
(
-- Immutability changes some of the functionality:
-- addWords and removeWord return changed copies of
-- the list instead of changing the list itself. To
-- address this, we export "raw" and "convenience"
-- versions of those functions. The "raw" versions
-- (marked with a ') take an IO [String] wordlist,
-- while the "convenience" versions "bake in" the
-- original blacklist. Similarly, clearList is
-- just an empty list, which can be passed to
-- the "raw" functions to build up a fresh list.
-- Examples:
--
-- blacklisted "foo" // IO False
-- clearList >>= addWords ["foo", "bar"] >>= blacklisted' "foo" // IO True
--
-- real blacklist
blacklist
-- empty "blacklist"
, clearList
-- "convenience" functions
, blacklisted
, addWords
, removeWord
-- "raw" functions
, blacklisted'
, addWords'
, removeWord'
) where


import Data.Aeson
import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
import Data.Maybe (maybeToList)
import Data.List (intersperse)
import Text.Regex.PCRE

import Paths_wordfilter (getDataFileName)

blacklist :: IO [String]
blacklist = getDataFileName "lib/badwords.json" >>=
B.readFile >>=
(return . concat . maybeToList . decode)

clearList :: IO [String]
clearList = return []

blacklisted' :: String -> [String] -> IO Bool
blacklisted' _ [] = return False
blacklisted' s bl = return $ matchTest re s where
re = makeRegexOpts (defaultCompOpt .|. compCaseless)
defaultExecOpt
(concat $ intersperse "|" bl)

blacklisted :: String -> IO Bool
blacklisted s = blacklist >>= (blacklisted' s)

addWords' :: [String] -> [String] -> IO [String]
addWords' ws bl = return $ bl ++ ws

addWords :: [String] -> IO [String]
addWords ws = blacklist >>= addWords' ws

removeWord' :: String -> [String] -> IO [String]
removeWord' w bl = return $ filter (not . (== w)) bl

removeWord :: String -> IO [String]
removeWord w = blacklist >>= (removeWord' w)

66 changes: 66 additions & 0 deletions stack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# This file was automatically generated by 'stack init'
#
# Some commonly used options have been documented as comments in this file.
# For advanced use and comprehensive documentation of the format, please see:
# http://docs.haskellstack.org/en/stable/yaml_configuration/

# Resolver to choose a 'specific' stackage snapshot or a compiler version.
# A snapshot resolver dictates the compiler version and the set of packages
# to be used for project dependencies. For example:
#
# resolver: lts-3.5
# resolver: nightly-2015-09-21
# resolver: ghc-7.10.2
# resolver: ghcjs-0.1.0_ghc-7.10.2
# resolver:
# name: custom-snapshot
# location: "./custom-snapshot.yaml"
resolver: lts-7.8

# User packages to be built.
# Various formats can be used as shown in the example below.
#
# packages:
# - some-directory
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
# - location:
# git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# extra-dep: true
# subdirs:
# - auto-update
# - wai
#
# A package marked 'extra-dep: true' will only be built if demanded by a
# non-dependency (i.e. a user package), and its test suites and benchmarks
# will not be run. This is useful for tweaking upstream packages.
packages:
- '.'
# Dependency packages to be pulled from upstream that are not in the resolver
# (e.g., acme-missiles-0.3)
extra-deps: []

# Override default flag values for local packages and extra-deps
flags: {}

# Extra package databases containing global packages
extra-package-dbs: []

# Control whether we use the GHC we find on the path
# system-ghc: true
#
# Require a specific version of stack, using version ranges
# require-stack-version: -any # Default
# require-stack-version: ">=1.2"
#
# Override the architecture used by stack, especially useful on Windows
# arch: i386
# arch: x86_64
#
# Extra directories used by stack for building
# extra-include-dirs: [/path/to/dir]
# extra-lib-dirs: [/path/to/dir]
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor
25 changes: 25 additions & 0 deletions test/Wordfilter_Test.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
module Main where

import Wordfilter (blacklisted', addWords', removeWord')
import Test.HUnit

blacklistedTests = TestList [testEmptyFalse, testContained, testNotContained]
testEmptyFalse = TestCase (do r <- blacklisted' "foo" []
assertEqual "always false on empty list" False r)
testContained = TestCase (do r <- blacklisted' "i am foo" ["bar", "foo"]
assertEqual "should match" True r)
testNotContained = TestCase (do r <- blacklisted' "quux i am" ["bar", "foo"]
assertEqual "should not match" False r)


addWordsTest = TestList [testAdd]
testAdd = TestCase (do r <- addWords' ["foo"] ["bar", "baz"]
assertEqual "add words to list" ["bar", "baz", "foo"] r)

removeWordTests = TestList [testPresent, testAbsent]
testPresent = TestCase (do r <- removeWord' "foo" ["foo", "bar"]
assertEqual "remove word from list" ["bar"] r)
testAbsent = TestCase (do r <- removeWord' "foo" ["bar", "baz"]
assertEqual "don't remove absent word" ["bar", "baz"] r)

main = runTestTT $ TestList [blacklistedTests, addWordsTest, removeWordTests]
34 changes: 34 additions & 0 deletions wordfilter.cabal
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
-- Initial wordfilter.cabal generated by cabal init. For further
-- documentation, see http://haskell.org/cabal/users-guide/

name: wordfilter
version: 0.1.0.5
synopsis: Word filter
-- description:
homepage: https://github.com/dariusk/wordfilter
license: MIT
license-file: LICENSE-MIT
author: Sam Raker
maintainer: [email protected]
-- copyright:
category: Language
build-type: Simple
extra-source-files: README.md
cabal-version: >=1.10
data-files: lib/badwords.json

Test-Suite test-wordfilter
type: exitcode-stdio-1.0
main-is: test/Wordfilter_Test.hs
build-depends: base, HUnit, wordfilter
default-language: Haskell2010

library
exposed-modules: Wordfilter, Paths_wordfilter
-- other-extensions:
build-depends: base >=4.8 && <4.10,
aeson, bytestring,
filepath,
regex-pcre-builtin
hs-source-dirs: lib
default-language: Haskell2010