R. Muller
Getting Started with OCaml Part 2: Structured Types & Recursive Functions
- Structured Types
- Recursive Functions
- Functions are Values Part 1
In the previous section, we learned the basics of OCaml, the basic data types, operators etc. In this section we're going to explore OCaml's built-in structured types, ways to make new types and how to process data of unbounded size using recursive functions.
# let pair = (1 + 2, Char.chr 65);;
val pair : int * char = (3, 'A') (* int * char is -product- type *)
# fst pair;; (* fst : 'a * 'b -> 'a is pervasive *)
- : int = 3
# snd pair;;
- : char = 'A'
# let (n, a) = pair; (* matching pattern (n, a) against pair *)
val n : int = 3
val a : char = 'A'
# type point = {x : int; y : int};; (* x and y are field labels *)
type point = {x : int; y : int}
# let p = {x = 2 * 2; y = 8};;
val p : point = {x = 4; y = 8}
# p.x;;
- : int = 4
# let {x; y} = p;;
val x : int = 4
val y : int = 8
Sum types are especially important in programming languages. They're sometimes called union types and sometimes called variants.
# type fruit = Apple | Lemon | Tomato | Lime;;
type fruit = Apple | Lemon | Tomato | Lime
# let t = Tomato;;
val t : fruit = Tomato
We can think of Apple
, Lemon
, Tomato
and Lime
as a complete enumeration of the constants of type fruit
. OCaml has a powerful match form for processing values of sum type.
(* isCitrus : fruit -> bool
*)
let isCitrus fruit =
match fruit with
| Apple -> false
| Lemon -> true
| Tomato -> false
| Lime -> true
let isCitrus fruit = (* slightly more succinct *)
match fruit with
| Lemon | Lime -> true
| _ -> false
type herb = Basil | Celery | Banana
type food = Fruit of fruit | Herb of herb
The symbols Fruit
and Herb
are constructors. We can think of the Fruit
constructor as a function from fruit
to food
— if you apply it to a value of type fruit, it returns a value of type food.
# let aBite = Fruit Tomato;;
val aBite : food = Fruit Tomato
(* tomLikes : food -> bool Tom likes Apples and Celery
*)
let tomLikes food =
match food with
| Fruit fruit -> (match fruit with | Apple -> true | _ -> false)
| Herb herb -> (match herb with | Celery -> true | _ -> false)
# Apple :: [];;
- : fruit list = [Apple]
The operator ::
, called "cons", is a built-in list constructor that accepts a list element and a list and returns a new list with one new element. When we type ordinary user-friendly list notation:
[Lime; Apple]
OCaml views it as a sequence of conses ending in the empty list []
called "nil".
# Lime :: Apple :: [];;
- : fruit list = [Lime; Apple]
# let fruits = [Lime; Apple];;
val fruits : fruit list = [Lime; Apple]
# let head :: tail = fruits;; (* matching a cons *)
val head : fruit = Lime
val tail : fruit list = [Apple] (* the tail of a list is a list! *)
# List.length fruits;;
- : int = 2
Lists are polymorphic — they can hold values of any type as long as all the values in the list are of the same type. What if we want a list of floats and ints together? Must make a combining type:
type number = Float of float | Int of int
# let numbers = [Float 3.14; Int 343; Float 0.5];;
val numbers : number list = [Float 3.14; Int 343; Float 0.5]
The type list is defined in OCaml as a recursive sum of products (!). Roughly speaking:
type 'a list = [] | :: of 'a * 'a list
In this notation, the symbol 'a
is a type variable standing for any type. The larger "definition" is really just a slogan suggesting how lists work, the actual type is built-in as a special case because it is so ubiquitous.
# let foods = [Fruit Lemon; Herb Banana; Fruit Lime; Fruit Apple];;
val foods : food list = [Fruit Lemon; Herb Banana; Fruit Lime; Fruit Apple]
(* countFruits : food list -> int
*)
let rec countFruits foods =
match foods with
| [] -> 0
| (Fruit _) :: foods -> 1 + countFruits foods (* NB: recycling the variable name foods! *)
| _ :: foods -> countFruits foods
Key Idea
when writing a function that processes a value of a recursive type, one can often solve a major sub-problem by applying the function recursively to those parts of the value that are defined recursively in the type.
(* append : 'a list -> 'a list -> 'a list
For a call (append xs ys), this function definition is linear in (List.length xs).
*)
let rec append xs ys =
match xs with
| [] -> ys
| x :: xs -> x :: append xs ys
(* reverse : 'a list -> 'a list
For a call a (reverse xs), this function is QUADRATIC in (List.length xs). Why? It calls
a linear function a linear number of times. Not good!
*)
let rec reverse xs =
match xs with
| [] -> []
| x :: xs -> append (reverse xs) [x]
(* A more efficient, tail-recursive version. This version is LINEAR in (List.length xs)
*)
let reverse xs =
let rec loop xs answer =
match xs with
| [] -> answer
| x :: xs -> loop xs (x :: answer)
in
loop xs []
(* copy : 'a -> int -> 'a list
*)
let rec copy item n =
match n = 0 with
| true -> []
| false -> item :: copy item (n - 1)
(* nth : int -> 'a list -> 'a
*)
let rec nth n xs =
match (n = 0, xs) with
| (true, x :: _) ->
| (true, []) -> failwith "nth: not enough elements"
| (false, _ :: xs) -> nth (n - 1) xs
Trees
type 'a tree = Empty | Node of {info : a'; left : 'a tree; right; 'a tree}
A 'a tree
is a binary tree carrying information of polymorphic type 'a
. Since the left
and right fields are defined recursively, processing 'a tree
s will usually involve some sort of recursive calls on those fields. Note that there is no null, in OCaml — the base case of the recursive definition is explicitly defined as Empty
.
type sym = A | B | C | D | E
let t : sym tree = Node {info = A; A
left = Node {info = B; / \
left = Empty; B C
right = Node {info = D; left = Empty; Right = Empty}} \
right = Node {info = C; left = Empty; Right = Empty}} D
(* preorder : 'a tree -> 'a list
*)
let rec preorder tree =
match tree with
| Empty -> []
| Node{info; left; right} -> info :: (preorder left) @ (preorder right)
(* breadthFirst : 'a tree -> 'a list
*)
let breadFirst tree =
let rec loop forest =
match forest with
| [] -> []
| Empty :: forest -> loop forest
| (Node{info; left; right}) :: forest ->
info :: loop (left :: right :: forest)
in
loop [tree]
Abstract Syntax Trees
type value = int
type ast = Literal of int
| Plus of {left : ast; right : ast}
| Times of {left : ast; right : ast}
let fmt = Printf.sprintf
(* format : ast -> string
*)
let rec format ast =
match ast with
| Literal i -> string_of_int i
| Plus{left; right} -> fmt "(%s + %s)" (format left) (format right)
| Times{left; right} -> fmt "(%s * %s)" (format left) (format right)
(* eval : ast -> value
*)
let rec eval ast =
match ast with
| Literal i -> i
| Plus {left; right} -> (eval left) + (eval right)
| Times {left; right} -> (eval left) * (eval right)
To be continued.