diff --git a/.circleci/config.yml b/.circleci/config.yml
new file mode 100644
index 0000000..dd98f9e
--- /dev/null
+++ b/.circleci/config.yml
@@ -0,0 +1,21 @@
+version: 2
+
+jobs:
+  build:
+    docker:
+      - image: swipl:stable
+
+    steps:
+      - run:
+          # TODO Build custom image to improve build time
+          name: Install Deps
+          command: |
+            apt update -y
+            apt install git make -y
+
+      - checkout
+
+      - run:
+          name: Run tests
+          command: |
+            make test
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b25c15b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*~
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..08dd184
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,44 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog][keep-a-change-log], and this project
+adheres to [Semantic Versioning][semantic-versioning].
+
+[keep-a-change-log]: https://keepachangelog.com/en/1.0.0/
+[semantic-versioning]: https://semver.org/spec/v2.0.0.html
+
+## [unreleased]
+
+## [1.0.0]
+
+### Added
+
+- Support for numbers by [@Annipoo](https://github.com/Anniepoo) #34
+- Support for strings #37
+- Code of Conduct #23
+
+### Changed
+
+- Spaces are now tagged with `space` instead of `spc` #41
+- Tokenization of numbers and strings is enabled by default #40
+- Options are now processed by a more conventional means #39
+- The location for the pack's home is updated
+
+## [0.1.2]
+
+Prior to changelog.
+
+## [0.1.1]
+
+Prior to changelog.
+
+## [0.1.0]
+
+Prior to changelog.
+
+[unreleased]: https://github.com/shonfeder/tokenize/compare/v1.0.0...HEAD
+[1.0.0]: https://github.com/shonfeder/tokenize/compare/v0.1.2...v1.0.0
+[0.1.2]: https://github.com/shonfeder/tokenize/compare/v0.1.1...v0.1.2
+[0.1.1]: https://github.com/shonfeder/tokenize/compare/v0.1.0...v0.1.1
+[0.1.0]: https://github.com/shonfeder/tokenize/releases/tag/v0.1.0
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 87eda1c..d1ae63f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -5,20 +5,74 @@ reports, etc.
 
 ## Code of Conduct
 
-Please review and accept to our [code of conduct](CODE_OF_CONDUCT.md) prior to
+Please review and accept our [code of conduct](CODE_OF_CONDUCT.md) prior to
 engaging in the project.
 
+## Overall direction and aims
+
+Consult the [`design_notes.md`](design_notes.md) to see the latest codified
+design philosophy and principles.
+
 ## Setting up Development
 
-TODO
+1. Install swi-prolog's [swipl](http://www.swi-prolog.org/download/stable).
+    - Optionally, you may wish to use [swivm](https://github.com/fnogatz/swivm) to
+      manage multiple installed versions of swi-prolog.
+2. Hack on the source code in `[./prolog](./prolog)`.
+3. Run and explore your changes by loading the file in `swipl` (or using your
+   editors IDE capabilities):
+    - Example in swipl
+
+    ```prolog
+    # in ~/oss/tokenize on git:develop x [22:45:02]
+    $ cd ./prolog
+
+    # in ~/oss/tokenize/prolog on git:develop x [22:45:04]
+    $ swipl
+    Welcome to SWI-Prolog (threaded, 64 bits, version 8.0.2)
+    SWI-Prolog comes with ABSOLUTELY NO WARRANTY. This is free software.
+    Please run ?- license. for legal details.
+
+    For online help and background, visit http://www.swi-prolog.org
+    For built-in help, use ?- help(Topic). or ?- apropos(Word).
+
+    % lod the tokenize module
+    ?- [tokenize].
+    true.
+
+    % experiment
+    ?- tokenize("Foo bar baz", Tokens).
+    Tokens = [word(foo), space(' '), word(bar), space(' '), word(baz)].
+
+    % reload the module when you make changes to the source code
+    ?- make.
+    % Updating index for library /usr/local/Cellar/swi-prolog/8.0.2/libexec/lib/swipl/library/
+    true.
+
+    % finished
+    ?- halt.
+    ```
+
+Please ask here or in `##prolog` on [freenode](https://freenode.net/) if you
+need any help! :)
 
 ## Running tests
 
 Tests are located in the [`./test`](./test) directory. To run the test suite,
-simply execute the test file:
+simply execute make test:
 
 ```sh
-$ ./test/test.pl
+$ make test
+% PL-Unit: tokenize .. done
+% All 2 tests passed
+```
+
+If inside the swipl repl, make sure to load the test file and query run_tests.
+
+```prolog
+?- [test/test].
+?- run_tests.
 % PL-Unit: tokenize .. done
 % All 2 tests passed
+true.
 ```
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..044b64f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,19 @@
+.PHONY: all test clean
+
+version := $(shell swipl -q -s pack -g 'version(V),writeln(V)' -t halt)
+packfile = quickcheck-$(version).tgz
+
+SWIPL := swipl
+
+all: test
+
+version:
+	echo $(version)
+
+check: test
+
+install:
+	echo "(none)"
+
+test:
+	@$(SWIPL) -s test/test.pl -g 'run_tests,halt(0)' -t 'halt(1)'
diff --git a/README.md b/README.md
index 82ec7d1..47ac380 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,30 @@
-# Synopsis
+# `pack(tokenize) :-`
+
+A modest tokenization library for SWI-Prolog, seeking a balance between
+simplicity and flexibility.
+
+[![CircleCI](https://circleci.com/gh/shonfeder/tokenize.svg?style=svg)](https://circleci.com/gh/shonfeder/tokenize)
+
+## Synopsis
 
 ```prolog
 ?- tokenize(`\tExample  Text.`, Tokens).
-Tokens = [cntrl('\t'), word(example), spc(' '), spc(' '), word(text), punct('.')] 
+Tokens = [cntrl('\t'), word(example), space(' '), space(' '), word(text), punct('.')]
 
 ?- tokenize(`\tExample  Text.`, Tokens, [cntrl(false), pack(true), cased(true)]).
-Tokens = [word('Example', 1), spc(' ', 2), word('Text', 1), punct('.', 1)] 
+Tokens = [word('Example', 1), space(' ', 2), word('Text', 1), punct('.', 1)]
 
 ?- tokenize(`\tExample  Text.`, Tokens), untokenize(Tokens, Text), format('~s~n', [Text]).
 	example  text.
-Tokens = [cntrl('\t'), word(example), spc(' '), spc(' '), word(text), punct('.')],
-Text = [9, 101, 120, 97, 109, 112, 108, 101, 32|...] 
+Tokens = [cntrl('\t'), word(example), space(' '), space(' '), word(text), punct('.')],
+Text = [9, 101, 120, 97, 109, 112, 108, 101, 32|...]
 ```
 
-# Description
+## Description
 
 Module `tokenize` aims to provide a straightforward tool for tokenizing text into a simple format. It is the result of a learning exercise, and it is far from perfect. If there is sufficient interest from myself or anyone else, I'll try to improve it.
 
-It is packaged as an SWI-Prolog pack, available [here](http://www.swi-prolog.org/pack/list?p=tokenize). Install it into your SWI-Prolog system with the query 
+It is packaged as an SWI-Prolog pack, available [here](http://www.swi-prolog.org/pack/list?p=tokenize). Install it into your SWI-Prolog system with the query
 
 ```prolog
 ?- pack_install(tokenize).
@@ -25,6 +32,6 @@ It is packaged as an SWI-Prolog pack, available [here](http://www.swi-prolog.org
 
 Please [visit the wiki](https://github.com/aBathologist/tokenize/wiki/tokenize.pl-options-and-examples) for more detailed instructions and examples, including a full list of options supported.
 
-# Contributing
+## Contributing
 
 See [CONTRIBUTING.md](./CONTRIBUTING.md).
diff --git a/comment-wip/README.md b/comment-wip/README.md
new file mode 100644
index 0000000..c1c3fd9
--- /dev/null
+++ b/comment-wip/README.md
@@ -0,0 +1,4 @@
+WIP code towards tokenization of comments.
+
+It was extracted here because it's not ready for release, but we want to keep it
+available for the author to resume work on it.
diff --git a/comment-wip/comment.pl b/comment-wip/comment.pl
new file mode 100644
index 0000000..cea7fd6
--- /dev/null
+++ b/comment-wip/comment.pl
@@ -0,0 +1,115 @@
+:- module(comment,
+          [comment//2,
+           comment_rec//2,
+           comment_token//3,
+           comment_token_rec//3]).
+
+/** <module> Tokenizing comments
+This module defines matchers for comments used by the tokenize module. (Note
+that we will use matcher as a name for dcg rules that match parts of the codes
+list).
+
+@author Stefan Israelsson Tampe
+@license LGPL v2 or later
+
+Interface Note:
+Start and End matchers is a matcher (dcg rule) that is either evaluated with no
+extra argument (--> call(StartMatcher)) and it will just match it's token or it
+can have an extra argument producing the codes matched by the matcher e.g. used
+as --> call(StartMatcher,MatchedCodes). The matchers match start and end codes
+of the comment, the 2matcher type will represent these kinds of dcg rules or
+matchers 2 is because they support two kinds of arguments to the dcg rules.
+For examples
+see:
+
+  @see tests/test_comments.pl
+
+The matchers predicates exported and defined are:
+
+ comment(+Start:2matcher,+End:2matcher)
+   - anonymously match a non recursive comment
+
+ comment_rec(+Start:2matcher,+End:2matcher,2matcher)
+   - anonymously match a recursive comment
+
+ coment_token(+Start:2matcher,+End:2matcher,-Matched:list(codes))
+   - match an unrecursive comment outputs the matched sequence used
+     for building a resulting comment token
+
+ coment_token_rec(+Start:2matcher,+End:2matcher,-Matched:list(codes))
+   - match an recursive comment outputs the matched sequence used
+     for building a resulting comment token
+*/
+
+
+
+%% comment(+Start:2matcher,+End:2matcher)
+%    non recursive non tokenizing matcher
+
+comment_body(E) --> call(E),!.
+comment_body(E) --> [_],comment_body(E).
+
+comment(S,E) -->
+    call(S),
+    comment_body(E).
+
+%% comment_token(+Start:2matcher,+End:2matcher,-Matched:list(codes))
+%    non recursive tokenizing matcher
+
+comment_body_token(E,Text) -->
+    call(E,HE),!,
+    {append(HE,[],Text)}.
+
+comment_body_token(E,[X|L]) -->
+    [X],
+    comment_body_token(E,L).
+
+comment_token(S,E,Text) -->
+    call(S,HS),
+    {append(HS,T,Text)},
+    comment_body_token(E,T).
+
+%% comment_token_rec(+Start:2matcher,+End:2matcher,-Matched:list(codes))
+%   recursive tokenizing matcher
+
+% Use this as the initial continuation, will just tidy up the matched result
+% by ending the list with [].
+comment_body_rec_start(_,_,[]).
+
+comment_body_token_rec(_,E,Cont,Text) -->
+    call(E,HE),!,
+    {append(HE,T,Text)},
+    call(Cont,T).
+
+comment_body_token_rec(S,E,Cont,Text) -->
+    call(S,HS),!,
+    {append(HS,T,Text)},
+    comment_body_token_rec(S,E,comment_body_token_rec(S,E,Cont),T).
+
+comment_body_token_rec(S,E,Cont,[X|L]) -->
+    [X],
+    comment_body_token_rec(S,E,Cont,L).
+
+comment_token_rec(S,E,Text) -->
+    call(S,HS),
+    {append(HS,T,Text)},
+    comment_body_token_rec(S,E,comment_body_rec_start,T).
+
+%% comment_rec(+Start:2matcher,+End:2matcher)
+%    recursive non tokenizing matcher
+
+comment_body_rec(_,E) -->
+    call(E),!.
+
+comment_body_rec(S,E) -->
+    call(S),!,
+    comment_body_rec(S,E),
+    comment_body_rec(S,E).
+
+comment_body_rec(S,E) -->
+    [_],
+    comment_body_rec(S,E).
+
+comment_rec(S,E) -->
+    call(S),
+    comment_body_rec(S,E).
diff --git a/comment-wip/test_comments.pl b/comment-wip/test_comments.pl
new file mode 100644
index 0000000..aa7f907
--- /dev/null
+++ b/comment-wip/test_comments.pl
@@ -0,0 +1,104 @@
+:- dynamic user:file_search_path/2.
+:- multifile user:file_search_path/2.
+
+% Add the package source files relative to the current file location
+:- prolog_load_context(directory, Dir),
+   atom_concat(Dir, '/../prolog', PackageDir),
+   asserta(user:file_search_path(package, PackageDir)).
+
+:- use_module(package(comment)).
+:- begin_tests(tokenize_comment).
+
+id(X)    --> {atom_codes(X,XX)},XX.
+id(X,XX) --> {atom_codes(X,XX)},XX.
+
+mytest(Tok,S,U) :-
+    atom_codes(S,SS),
+    call_dcg(Tok,SS,U).
+
+test_comment(S) :-
+    mytest(comment(id('<'),id('>')),S,[]).
+
+test_comment_rec(S) :-
+    mytest(comment_rec(id('<'),id('>')),S,[]).
+
+test_comment_token(S,T) :-
+    mytest(comment_token(id('<'),id('>'),TT),S,[]),
+    atom_codes(T,TT).
+
+test_comment_token_rec(S,T) :-
+    mytest(comment_token_rec(id('<'),id('>'),TT),S,[]),
+    atom_codes(T,TT).
+
+start(AA) :-
+    (
+        catch(b_getval(a,[N,A]),_,N=0) ->
+          true;
+        N=0
+    ),
+    NN is N + 1,
+    (
+        N == 0 ->
+          AA = _;
+        AA = A
+    ),
+    b_setval(a,[NN,AA]).
+
+end(A) :-
+    b_getval(a,[N,A]),
+    NN is N - 1,
+    b_setval(a,[NN,A]).
+
+left(A) -->
+    {atom_codes(A,AA)},
+    AA,
+    {start(B)},
+    [B].
+
+left(A,C) -->
+    {atom_codes(A,AA)},
+    AA,
+    {start(B)},
+    [B],
+    {append(AA,[B],C)}.
+
+right(A) -->
+    {end(B)},
+    [B],
+    {atom_codes(A,AA)},
+    AA.
+
+right(A,C) -->
+    {end(B)},
+    [B],
+    {atom_codes(A,AA)},
+    AA,
+    {append([B],AA,C)}.
+
+test_adapt(S,T) :-
+    mytest(comment_token_rec(left('<'),right('>'),TT),S,[]),
+    atom_codes(T,TT).
+
+
+:- multifile test/2.
+
+test('Test comment',[true(test_comment('<alla>'))]) :- true.
+test('Test comment_rec',[true(test_comment_rec('<alla<balla>>'))]) :- true.
+test('Test comment_token',[true(A == B)]) :-
+    A='<alla>',
+    test_comment_token(A,B).
+
+test('Test comment_token_rec',[true(A == B)]) :-
+    A='<alla<balla>>',
+    test_comment_token(A,B).
+
+test('Test comment_token_rec advanced 1',[true(A == B)]) :-
+    A='<1 alla2> <1 balla2> 1>1>',
+    test_adapt(A,B).
+
+test('Test comment_token_rec advanced 2',[true(A == B)]) :-
+    A='<2 alla1> <2 balla1> 2>2>',
+    test_adapt(A,B).
+
+
+:- end_tests(tokenize_comment).
diff --git a/design_notes.md b/design_notes.md
new file mode 100644
index 0000000..e84fade
--- /dev/null
+++ b/design_notes.md
@@ -0,0 +1,45 @@
+# Design Notes
+
+Initially extracted from conversation with
+[@Annieppo](https://github.com/Anniepoo) and [@nicoabie](https://github.com/nicoabie) in
+##prolog on [freenode](https://freenode.net/).
+
+The library started as a very simple and lightweight set of predicates for a
+common, but very limited, form of lexing. As we extend it, we aim to maintain a
+modest scope in order to achieve a sweet spot between ease of use and powerful
+flexibility.
+
+## Scope and Aims
+
+`tokenize` does not aspire to become an industrial strength lexer generator. We
+aim to serve most users needs between raw input and a structured form ready for
+parsing by a DCG.
+
+If a user is parsing a language with keywords such as `class`, `module`, etc.,
+and wants to distinguish these from variable names, `tokenize` isn't going to
+give you this out of the box. But, it should provide an easy means of achieving
+this result through a subsequent lexing pass.
+
+## Some Model Users
+
+* somebody making a computer language
+  * needs to be able to distinguish keywords, variables and literals
+  * needs to be able to identify comments
+* somebody making a parser for an interactive fiction game
+  * needs to handle stuff like "William O. N'mutu-O'Connell went to the market"
+* somebody wanting to analyze human texts
+  * wanting to do some analysis on New York Times articles, they want to first
+    process the articles into meaningful tokens
+
+## Design Rules
+
+* We don't parse.
+* Every token generated is callable (i.e., an atom or compound).
+  * Example of an possible compound token: `space(' ')`.
+  * Example of a possible atom token: `escape`.
+  tokenization need to return tokens represented with the same arity)
+* Users should be able to determine the kind of token by unification.
+* Users should be able to clearly see and specify the precedence for tokenizaton
+  * E.g., given `"-12.3"`, `numbers, punctuation` should yield `[pnct('-'),
+    number(12), pnct('.'), number(3)]` while `punctuation, numbers` should yield
+    `[number(-12.3)]`.
diff --git a/pack.pl b/pack.pl
index 174019f..68438aa 100644
--- a/pack.pl
+++ b/pack.pl
@@ -1,10 +1,10 @@
 name(tokenize).
-title('A nascent tokenization library').
+title('A simple tokenization library').
 
-version('0.1.2').
-download('https://github.com/aBathologist/tokenize/release/*.zip').
+version('1.0.0').
+download('https://github.com/shonfeder/tokenize/release/*.zip').
 
 author('Shon Feder', 'shon.feder@gmail.com').
 packager('Shon Feder', 'shon.feder@gmail.com').
 maintainer('Shon Feder', 'shon.feder@gmail.com').
-home('https://github.com/aBathologist/tokenize').
+home('https://github.com/shonfeder/tokenize').
diff --git a/prolog/tokenize.pl b/prolog/tokenize.pl
index a177bf9..6923d64 100644
--- a/prolog/tokenize.pl
+++ b/prolog/tokenize.pl
@@ -25,6 +25,12 @@
 
 */
 
+:- use_module(library(dcg/basics), [eos//0, number//1]).
+:- use_module(tokenize_opts).
+
+% Ensure we interpret back ticks as enclosing code lists in this module.
+:- set_prolog_flag(back_quotes, codes).
+
 %% tokenize(+Text:list(code), -Tokens:list(term)) is semidet.
 %
 %   @see tokenize/3 is called with an empty list of options: thus, with defaults.
@@ -47,23 +53,33 @@
 %   * a word (contiguous alpha-numeric chars): `word(W)`
 %   * a punctuation mark (determined by `char_type(C, punct)`): `punct(P)`
 %   * a control character (determined by `char_typ(C, cntrl)`): `cntrl(C)`
-%   * a space ( == ` `): `spc(S)`.
+%   * a space ( == ` `): `space(S)`.
 %
-%  Valid options are:
+%   Valid options are:
 %
-%   * cased(+bool)  : Determines whether tokens perserve cases of the source text.
-%   * spaces(+bool) : Determines whether spaces are represted as tokens or discarded.
-%   * cntrl(+bool)  : Determines whether control characters are represented as tokens or discarded.
-%   * punct(+bool)  : Determines whether punctuation characters are represented as tokens or discarded.
-%   * to(+on_of([strings,atoms,chars,codes])) : Determines the representation format used for the tokens.
+%   * cased(+bool) : Determines whether tokens perserve cases of the source
+%         text.
+%   * spaces(+bool) : Determines whether spaces are represted as tokens or
+%         discarded.
+%   * cntrl(+bool) : Determines whether control characters are represented as
+%         tokens or discarded.
+%   * punct(+bool) : Determines whether punctuation characters are represented
+%         as tokens or discarded.
 %   * pack(+bool)   : Determines whether tokens are packed or repeated.
+%   * to(+one_of([strings,atoms,chars,codes])) : Determines the representation
+%         format used for the tokens.
 
-% TODO is it possible to achieve the proper semidet  without the cut?
+% TODO is it possible to achieve the proper semidet without the cut?
+% Annie sez some parses are ambiguous, not even sure the cut should be
+% there
 
-tokenize(Text, Tokens, Options) :-
+tokenize(Text, ProcessedTokens, Options) :-
     must_be(nonvar, Text),
     string_codes(Text, Codes),
-    phrase(process_options, [Options-Codes], [Options-Tokens]),
+    process_options(Options, PreOpts, TokenOpts, PostOpts),
+    preprocess(PreOpts, Codes, ProcessedCodes),
+    phrase(tokens(TokenOpts, Tokens), ProcessedCodes),
+    postprocess(PostOpts, Tokens, ProcessedTokens),
     !.
 
 %% untokenize(+Tokens:list(term), -Untokens:list(codes)) is semidet.
@@ -112,104 +128,62 @@
     read_file_to_codes(File, Codes, [encoding(utf8)]),
     tokenize(Codes, Tokens, Options).
 
-% PROCESSING OPTIONS
-%
-%   NOTE: This way of processing options is probably stupid.
-%   I will correct/improve/rewrite it if there is ever a good
-%   reason to. But for now, it works.
-%
-%   TODO: Throw exception if invalid options are passed in.
-%   At the moment it just fails.
-
-%% Dispatches dcgs by option-list functors, with default values.
-process_options -->
-    opt(cased,  false),
-    non_opt(tokenize_text),
-    opt(spaces, true),
-    opt(cntrl,  true),
-    opt(punct,  true),
-    opt(to,     atoms),
-    opt(pack,   false).
-
-%% opt(+OptionFunctor:atom, DefaultValue:nonvar)
-%
-%   If dcg functor is identical to the option name with 'opt_' prefixed,
-%   then the dcg functor can be omitted.
-
-opt(Opt, Default) -->
-    { atom_concat('opt_', Opt, Opt_DCG) },
-    opt(Opt, Default, Opt_DCG).
-
-%% opt(+OptionFunctor:atom, +DefaultValue:nonvar, +DCGFunctor:atom).
-opt(Opt, Default, DCG) -->
-    state(Opts-Text0, Text0),
-    {
-        pad(Opt, Selection, Opt_Selection),
-        option(Opt_Selection, Opts, Default),
-        DCG_Selection =.. [DCG, Selection]
-    },
-    DCG_Selection,
-    state(Text1, Opts-Text1).
-%% This ugly bit should be dispensed with...
-opt(Opt, Default, _) -->
-    state(Opts-_),
-    {
-        var(Default), \+ option(Opt, Opts),
-        writeln("Unknown options passed to opt//3: "),
-        write(Opt)
-    }.
-
-%% non_opt(+DCG).
-%
-%   Non optional dcg to dispatch. Passes the object of concern
-%   without the options list, then recovers option list.
-
-non_opt(DCG) -->
-    state(Opts-Text0, Text0),
-    DCG,
-    state(Text1, Opts-Text1).
-
-state(S0),     [S0] --> [S0].
-state(S0, S1), [S1] --> [S0].
-
-%% Dispatching options:
-
-opt_cased(true)  --> [].
-opt_cased(false) --> state(Text, LowerCodes),
-    {
-        text_to_string(Text, Str),
-        string_lower(Str, LowerStr),
-        string_codes(LowerStr, LowerCodes)
-    }.
-
-tokenize_text --> state(Text, Tokenized),
-    { phrase(tokens(Tokenized), Text) }.
-
-opt_spaces(true)  --> [].
-opt_spaces(false) --> state(T0, T1),
-    { exclude( =(spc(_)), T0, T1) }.
-
-opt_cntrl(true)  --> [].
-opt_cntrl(false) --> state(T0, T1),
-    { exclude( =(cntrl(_)), T0, T1) }.
-
-opt_punct(true)  --> [].
-opt_punct(false) --> state(T0, T1),
-    { exclude( =(punct(_)), T0, T1) }.
 
-opt_to(codes) --> [].
-opt_to(Type)  --> state(CodeTokens, Tokens),
-    { maplist(token_to(Type), CodeTokens, Tokens) }.
+/***********************************
+*      {PRE,POST}-PROCESSING HELPERS      *
+***********************************/
 
-opt_pack(false) --> [].
-opt_pack(true)  --> state(T0, T1),
-    { phrase(pack_tokens(T1), T0) }.
-
-
-
-%% POST PROCESSING
-
-%% Convert tokens to alternative representations.
+preprocess(PreOpts, Codes, ProcessedCodes) :-
+    preopts_data(cased, PreOpts, Cased),
+    DCG_Rules = (
+        preprocess_case(Cased)
+    ),
+    phrase(process_dcg_rules(DCG_Rules, ProcessedCodes), Codes).
+
+postprocess(PostOpts, Tokens, ProcessedTokens) :-
+    postopts_data(spaces, PostOpts, Spaces),
+    postopts_data(cntrl, PostOpts, Cntrl),
+    postopts_data(punct, PostOpts, Punct),
+    postopts_data(to, PostOpts, To),
+    postopts_data(pack, PostOpts, Pack),
+    DCG_Rules = (
+        keep_token(space(_), Spaces),
+        keep_token(cntrl(_), Cntrl),
+        keep_token(punct(_), Punct),
+        convert_token(To)
+    ),
+    phrase(process_dcg_rules(DCG_Rules, PrePackedTokens), Tokens),
+    (Pack
+    -> phrase(pack_tokens(ProcessedTokens), PrePackedTokens)
+    ;  ProcessedTokens = PrePackedTokens
+    ).
+
+
+/***********************************
+*      POSTPROCESSING HELPERS      *
+***********************************/
+
+% Process a stream through a pipeline of DCG rules
+process_dcg_rules(_, []) --> eos, !.
+process_dcg_rules(DCG_Rules, []) --> DCG_Rules, eos, !.
+process_dcg_rules(DCG_Rules, [C|Cs]) -->
+    DCG_Rules,
+    [C],
+    process_dcg_rules(DCG_Rules, Cs).
+
+preprocess_case(true), [C] --> [C].
+preprocess_case(false), [CodeOut] --> [CodeIn],
+    { to_lower(CodeIn, CodeOut) }.
+
+keep_token(_, true), [T] --> [T].
+keep_token(Token, false) --> [Token].
+keep_token(Token, false), [T] --> [T], {T \= Token}.
+
+convert_token(Type), [Converted] --> [Token],
+    {token_to(Type, Token, Converted)}.
+
+% Convert tokens to alternative representations.
+token_to(_, number(X), number(X)) :- !.
 token_to(Type, Token, Converted) :-
     ( Type == strings -> Conversion = inverse(string_codes)
     ; Type == atoms   -> Conversion = inverse(atom_codes)
@@ -218,9 +192,7 @@
     ),
     call_into_term(Conversion, Token, Converted).
 
-
-%% Packing repeating tokens
-%
+% Packing repeating tokens
 pack_tokens([T])    --> pack_token(T).
 pack_tokens([T|Ts]) --> pack_token(T), pack_tokens(Ts).
 
@@ -228,37 +200,73 @@
 
 pack(X, Count) --> [X], pack(X, 1, Count).
 
-pack(_, Total, Total)      --> call(eos).
+pack(_, Total, Total)      --> eos.
 pack(X, Total, Total), [Y] --> [Y], { Y \= X }.
 pack(X, Count, Total)      --> [X], { succ(Count, NewCount) },
                                pack(X, NewCount, Total).
 
 
+/**************************
+*      TOKENIZATION       *
+**************************/
+
+tokenize_text --> state(Text, Tokenized),
+                  { phrase(tokens(Tokenized), Text) }.
+
 
 % PARSING
 
-tokens([T])    --> token(T), call(eos), !.
-tokens([T|Ts]) --> token(T), tokens(Ts).
+tokens(Opts, [T])    --> token(Opts, T), eos, !.
+tokens(Opts, [T|Ts]) --> token(Opts, T), tokens(Opts, Ts).
 
 % NOTE for debugging
 % tokens(_)   --> {length(L, 200)}, L, {format(L)}, halt, !.
 
-token(word(W))     --> word(W), call(eos), !.
-token(word(W)),` ` --> word(W), ` `.
-token(word(W)), C  --> word(W), (punct(C) ; cntrl(C) ; nasciis(C)).
-token(spc(S))      --> spc(S).
-token(punct(P))    --> punct(P).
-token(cntrl(C))    --> cntrl(C).
-token(other(O))    --> nasciis(O).
+token(Opts, string(S)) -->
+    { tokenopts_data(strings, Opts, true) },
+    string(S).
+
+token(Opts, number(N)) -->
+    { tokenopts_data(numbers, Opts, true) },
+    number(N), !.
 
+token(_Opts, word(W))     --> word(W), eos, !.
+token(_Opts, word(W)),` ` --> word(W), ` `.
+token(_Opts, word(W)), C  --> word(W), (punct(C) ; cntrl(C) ; nasciis(C)).
 
-spc(` `) --> ` `.
+token(_Opts, space(S))   --> space(S).
+token(_Opts, punct(P)) --> punct(P).
+token(_Opts, cntrl(C)) --> cntrl(C).
+token(_Opts, other(O)) --> nasciis(O).
+
+space(` `) --> ` `.
 
 sep --> ' '.
-sep --> call(eos), !.
+sep --> eos, !.
 
 word(W) --> csyms(W).
 
+% TODO Make open and close brackets configurable
+string(S) --> string(`"`, `"`, S).
+string(OpenBracket, CloseBracket, S) --> string_start(OpenBracket, CloseBracket, S).
+
+% A string starts when we encounter an OpenBracket
+string_start(OpenBracket, CloseBracket, Cs) -->
+    OpenBracket, string_content(OpenBracket, CloseBracket, Cs).
+
+% String content is everything up until we hit a CloseBracket
+string_content(_OpenBracket, CloseBracket, []) --> CloseBracket, !.
+% String content includes a bracket following an escape, but not the escape
+string_content(OpenBracket, CloseBracket, [C|Cs]) -->
+    escape, (CloseBracket | OpenBracket),
+    {[C] = CloseBracket},
+    string_content(OpenBracket, CloseBracket, Cs).
+% String content includes any character that isn't a CloseBracket or an escape.
+string_content(OpenBracket, CloseBracket, [C|Cs]) -->
+    [C],
+    {[C] \= CloseBracket},
+    string_content(OpenBracket, CloseBracket, Cs).
+
 csyms([L])    --> csym(L).
 csyms([L|Ls]) --> csym(L), csyms(Ls).
 
@@ -266,7 +274,7 @@
 
 
 % non ascii's
-nasciis([C])     --> nascii(C), (call(eos), !).
+nasciis([C])     --> nascii(C), eos, !.
 nasciis([C]),[D] --> nascii(C), [D], {D < 127}.
 nasciis([C|Cs])  --> nascii(C), nasciis(Cs).
 
@@ -275,6 +283,9 @@
 ' ' --> space.
 ' ' --> space, ' '.
 
+escape --> `\\`.
+
+% Any
 ... --> [].
 ... --> [_], ... .
 
@@ -283,8 +294,6 @@
 punct([P]) --> [P], {code_type(P, punct)}.
 cntrl([C]) --> [C], {code_type(C, cntrl)}.
 
-eos([], []).
-
 %% move to general module
 
 codes_to_lower([], []).
diff --git a/prolog/tokenize_opts.pl b/prolog/tokenize_opts.pl
new file mode 100644
index 0000000..688077e
--- /dev/null
+++ b/prolog/tokenize_opts.pl
@@ -0,0 +1,40 @@
+:- module(tokenize_opts,
+          [process_options/4,
+           preopts_data/3,
+           tokenopts_data/3,
+           postopts_data/3]).
+
+:- use_module(library(record)).
+
+% pre-processing options
+:- record preopts(
+       cased:boolean=false
+   ).
+
+% tokenization options
+:- record tokenopts(
+       numbers:boolean=true,
+       strings:boolean=true
+   ).
+
+% post-processing options
+:- record postopts(
+       spaces:boolean=true,
+       cntrl:boolean=true,
+       punct:boolean=true,
+       to:oneof([strings,atoms,chars,codes])=atoms,
+       pack:boolean=false
+   ).
+
+%% process_options(+Options:list(term), -PreOpts:term, -PostOpts:term) is semidet.
+%
+process_options(Options, PreOpts, TokenOpts, PostOpts) :-
+    make_preopts(Options, PreOpts, Rest0),
+    make_postopts(Rest0, PostOpts, Rest1),
+    make_tokenopts(Rest1, TokenOpts, InvalidOpts),
+    throw_on_invalid_options(InvalidOpts).
+
+throw_on_invalid_options(InvalidOpts) :-
+    InvalidOpts \= []
+    -> throw(invalid_options_given(InvalidOpts))
+    ;  true.
diff --git a/test/test.pl b/test/test.pl
index 49b1857..9e17e36 100755
--- a/test/test.pl
+++ b/test/test.pl
@@ -1,18 +1,3 @@
-#!/usr/bin/env swipl
-/** <module>  Unit tests for the tokenize library
- *
- * To run these tests, execute this file
- *
- *    ./test/test.pl
- */
-
-:- initialization(main, main).
-
-main(_Argv) :-
-    run_tests.
-
-:- begin_tests(tokenize).
-
 :- dynamic user:file_search_path/2.
 :- multifile user:file_search_path/2.
 
@@ -22,21 +7,149 @@
    asserta(user:file_search_path(package, PackageDir)).
 
 :- use_module(package(tokenize)).
+:- use_module(package(tokenize_opts)).
 
-% TESTS START HERE
+:- begin_tests(tokenize).
 
 test('Hello, Tokenize!',
      [true(Actual == Expected)]
     ) :-
     tokenize("Hello, Tokenize!", Actual),
-    Expected = [word(hello),punct(','),spc(' '),word(tokenize),punct(!)].
+    Expected = [word(hello),punct(','),space(' '),word(tokenize),punct(!)].
 
 test('Goodbye, Tokenize!',
      [true(Actual == Expected)]
     ) :-
-    Tokens = [word('Goodbye'),punct(','),spc(' '),word('Tokenize'),punct('!')],
+    Tokens = [word('Goodbye'),punct(','),space(' '),word('Tokenize'),punct('!')],
     untokenize(Tokens, Codes),
     string_codes(Actual, Codes),
     Expected = "Goodbye, Tokenize!".
 
+
+% OPTION PROCESSING
+
+test('process_options/4 throws on invalid options') :-
+    catch(
+        process_options([invalid(true)], _, _, _),
+        invalid_options_given([invalid(true)]),
+        true
+    ).
+
+test('process_options/4 sets valid options in opt records') :-
+    Options = [
+        cased(false),   % non-default preopt
+        strings(false), % non-default tokenopt
+        spaces(false)   % non-default postopt
+    ],
+    process_options(Options, PreOpts, TokenOpts, PostOpts),
+    % Fetch the options that were set
+    preopts_data(cased, PreOpts, Cased),
+    tokenopts_data(strings, TokenOpts, Strings),
+    postopts_data(spaces, PostOpts, Spaces),
+    % These compounds are just ensure informative output on failure
+    assertion(cased:Cased == cased:false),
+    assertion(strings:Strings == strings:false),
+    assertion(spaces:Spaces == spaces:false).
+
+% NUMBERS
+
+test('tokenize 7.0',
+     [true(Actual == Expected)]
+    ) :-
+    tokenize("7.0", Actual),
+    Expected = [number(7.0)].
+
+test('untokenize 6.3',
+     [true(Actual == Expected)]
+    ) :-
+    untokenize([number(6.3)], Actual),
+    Expected = `6.3`.
+
+test('tokenize number in other stuff',
+     [true(Actual == Expected)]
+    ) :-
+    tokenize("hi 7.0 x", Actual),
+    Expected = [word(hi), space(' '), number(7.0), space(' '), word(x)].
+
+test('untokenize 6.3 in other stuff',
+     [true(Actual == Expected)]
+    ) :-
+    untokenize([word(hi), number(6.3)], Actual),
+    Expected = `hi6.3`.
+
+test('can disable number tokens',
+     [true(Actual == Expected)]
+    ) :-
+    tokenize("hi 7.0 x", Actual, [numbers(false)]),
+    Expected = [word(hi), space(' '), word('7'), punct('.'), word('0'), space(' '), word(x)].
+
+
+% STRINGS
+
+test('Tokenizing the empty strings',
+     [true(Actual == Expected)]
+    ) :-
+    tokenize(`""`, Actual),
+    Expected = [string('')].
+
+test('Untokenizing an empty string',
+     [true(Actual == Expected)]
+    ) :-
+    untokenize([string('')], Actual),
+    Expected = `""`.
+
+test('Tokenizing a string with just two escapes',
+     [true(Actual == Expected)]
+    ) :-
+    tokenize(`"\\\\"`, Actual),
+    Expected = [string('\\\\')].
+
+test('Untokenizing a string with just two characters',
+     [true(Actual == Expected)]
+    ) :-
+    untokenize([string('aa')], Actual),
+    Expected = `"aa"`.
+
+test('Extracts a string',
+     [true(Actual == Expected)]
+    ) :-
+    tokenize(`"a string"`, Actual),
+    Expected = [string('a string')].
+
+test('Extracts a string among other stuff',
+     [true(Actual == Expected)]
+    ) :-
+    tokenize(`Some other "a string" stuff`, Actual),
+    Expected = [word(some),space(' '),word(other),space(' '),string('a string'),space(' '),word(stuff)].
+
+test('Extracts a string that includes escaped brackets',
+     [true(Actual == Expected)]
+    ) :-
+    tokenize(`"a \\"string\\""`, Actual),
+    Expected = [string('a "string"')].
+
+test('Tokenization preserves escaped characters',
+     [true(Actual == Expected)]
+    ) :-
+    tokenize(`"\\tLine text\\n"`, Actual),
+    Expected = [string('\\tline text\\n')].
+
+test('Extracts a string that includes a doubly nested string',
+     [true(Actual == Expected)]
+    ) :-
+    tokenize(`"a \\"sub \\\\"string\\\\"\\""`, Actual),
+    Expected = [string('a "sub \\"string\\""')].
+
+test('can disable string tokens',
+     [true(Actual == Expected)]
+    ) :-
+    tokenize(`some "string".`, Actual, [numbers(false)]),
+    Expected = [word(some), space(' '), string(string), punct('.')].
+
+test('Untokenizes string things',
+     [true(Actual == Expected)]
+    ) :-
+    untokenize([string('some string')], Actual),
+    Expected = `"some string"`.
+
 :- end_tests(tokenize).