-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtypes.nw
1347 lines (1213 loc) · 46.1 KB
/
types.nw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% -*- mode: Noweb; noweb-code-mode: c++-mode; c-basic-offset: 8; -*-
%% \begin{flushright}
%% Mathematicians are like Frenchmen: \\
%% whatever you say to them they translate into their own language, \\
%% and forthwith it is something entirely different.\\
%% {\em Johann Wolfgang von Goethe} % (1749 - 1832)
%% \end{flushright}
%%
%% The setting we use for representing individuals is a polymorphically
%% typed higher-order logic (see \cite{lloyd02logic}). This provides a
%% variety of data types for representing individuals. The following is a
%% summary of the relevant parts of the logic. Some formal definitions
%% from \cite[\S 3]{lloyd02logic} are reproduced here.
%% %% One can think of all the data type classes, defined here and in
%% %% AlkemyBase, as the set of type constructors, and the instances
%% %% defined by the read function of all the nullary type constructors
%% %% as the set of constants.
%%
%% \section{Overview of Higher-order Logic}
%% \label{subsec:logic overview}
%% In brief, the logic is based on the simple theory of types
%% \cite{church} with several extensions, the
%% most significant of which is the polymorphic extension to the type
%% system. The alphabet of the logic consists of four sets, a set
%% $\mathfrak{T}$ of type constructors, a set $\mathfrak{P}$ of
%% parameters (type variables), a set $\mathfrak{C}$ of constants, and a
%% set $\mathfrak{V}$ of variables. Always included in the set
%% $\mathfrak{T}$ are type constructors {\bf 1} and $\Omega$ both of
%% arity 0. {\bf 1} is the type of some distinguished singleton set and
%% $\Omega$ is the type of the booleans. The types of the logic are built
%% up from the set of type constructors and the set of parameters using
%% the symbols $\rightarrow$ and $\times$. The former is used to
%% construct function types, the latter, product types. A type is closed
%% if it contains no type variables. The set of all closed types is
%% denoted by $\mathfrak{S}^c$.
%%
%% $\mathfrak{C}$ is the set of constants of various types. The two
%% constants $\top$ (true) and $\bot$ (false) are always included in
%% it. One can distinguish between two kinds of constants, \emph{data
%% constructors} and \emph{functions}. In a knowledge representation
%% context, data constructors are used to represent individuals. In a
%% programming language context, data constructors are used to
%% construct data values whereas functions are used to compute on data
%% values. Functions have definitions, data constructors do not. In the
%% semantics of the logic, which is a Henkin model
%% \cite{henkin50completeness}, the data constructors are used to
%% construct models \cite{lloyd02logic}.
%%
%% The terms of the logic are the terms of the typed $\lambda$-calculus,
%% which are formed in the usual way by abstraction and application (or
%% juxtaposition) from constants and variables. Products of terms can
%% also be formed using a tuple-forming notation $(\dots)$.
%% % A formula is a term of type $\Omega$. A predicate is a function
%% % whose co-domain has type $\Omega$.
%% The set of all terms is denoted by $\mathfrak{L}$ (for language).
%%
%% \section{Basic Terms}
%% \label{subsec:basic terms}
%% A suitable class of closed terms called basic terms are used to
%% represent individuals in our application domains. Before giving the
%% definition of basic terms, we need a few more concepts. These will be
%% developed in stages as needed. First, we identify a subset of terms
%% called normal terms that are suitable for knowledge representation. We
%% point out the undesirable non-uniqueness of normal terms and describe
%% a way to solve this problem. Finally, we define basic terms as the
%% distinguished representatives of equivalence classes of normal terms.
%%
%% For reasons that will become clear, we need the concept of a
%% default term for each type. For example, we can use 0 as the default
%% term for integers, false for the booleans, etc. The set of default
%% terms is denoted by $\mathfrak{D}$.
%%
%% \paragraph{Normal Terms}
%% We now give the definition of normal terms.
%% \begin{definition}
%% The set of {\em normal terms}, $\mathfrak{N}$, is defined inductively
%% as follows.
%%
%% \begin{enumerate}
%% \item If $C$ is a data constructor having signature
%% $\sigma_1 \rightarrow \cdots \rightarrow \sigma_n \rightarrow (T
%% \; a_1 \ldots a_k)$ and $t_1, \ldots, t_n \in \mathfrak{N}$ $(n
%% \geq 0$) such that $C \; t_1 \ldots t_n \in \mathfrak{L}$, then
%% $C \; t_1 \ldots t_n \in \mathfrak{N}$.
%%
%% \item If $t_1, \ldots, t_n \in \mathfrak{N}$,
%% $s_1, \ldots, s_n \in \mathfrak{N}$ $(n \geq 0$), $s_0 \in
%% \mathfrak{D}$ and \[ \lambda x.\mathit{if} \; x = t_1 \;
%% \mathit{then} \; s_1 \; \mathit{else} \; \ldots \; \mathit{if}
%% \; x = t_n \; \mathit{then} \; s_n \; \mathit{else} \; s_0 \in
%% \mathfrak{L}, \] then \[ \lambda x.\mathit{if} \; x = t_1 \;
%% \mathit{then} \; s_1 \; \mathit{else} \; \ldots \; \mathit{if}
%% \; x = t_n \; \mathit{then} \; s_n \; \mathit{else} \; s_0 \in
%% \mathfrak{N}. \]
%%
%% \item If $t_1, \ldots, t_n \in \mathfrak{N}$ $(n \geq 0$) and
%% $(t_1, \ldots, t_n) \in \mathfrak{L}$, then $(t_1, \ldots, t_n)
%% \in \mathfrak{N}$.
%% \end{enumerate}
%% \end{definition}
%%
%% Normal terms formed from Part 1 of the definition through application
%% are called \emph{normal structures} and always have a type of the form
%% $T \alpha_{1} \dots \alpha_{n}$. Thus, we obtain atomic
%% individuals like natural numbers, integers, etc, from data
%% constructors with 0 arity, and more complicated structures like lists,
%% trees from data constructors with non-zero arities.
%%
%% Normal terms formed from Part 2 of the definition through lambda
%% abstraction are called \emph{normal abstractions}. They always have a
%% type of the form $\beta \rightarrow \gamma$. This class of normal
%% terms are essentially a form of finite lookup tables, where the value
%% for element $t_{n}$ is $s_{n}$. Now we see why default terms are
%% needed. In a finite lookup table, only values for a (small) set of
%% elements are usually defined. To implement a complete lookup table
%% defined for all elements of a certain type, we can simply define all
%% elements that are not explicitly specified as having the default
%% value. The lambda function definition is mathematically very neat,
%% allowing one to model sets, multisets and similar data types
%% intensionally.
%%
%% To see how this can be done, consider the set
%% $\{1,2\}$. In higher-order logic, sets can be viewed as predicates.
%% For the given set, we can represent it intensionally using the term
%% \begin{gather}
%% \lambda x. \text{if} \; x = 1 \; \text{then} \; \top \; \text{else} \;
%% \text{if} \; x = 2 \; \text{then} \; \top \; \text{else} \; \bot.
%% \label{f0}
%% \end{gather}
%% Similarly, a multiset with 4 occurrences of A and 23 occurrences of B
%% can be represented intensionally as the term
%% \[ \lambda x. \text{if} \; x = A \; \text{then} \; 4 \; \text{else} \;
%% \text{if} \; x = B \; \text{then} \; 23 \; \text{else} \; 0. \]
%%
%% Part 3 of the definition simply states that one can form a tuple from
%% normal terms to obtain another normal term. Terms formed in this way
%% are called \emph{normal tuples} and they always have a type of the form
%% $\alpha_{1} \times \dots \times \alpha_{n}$.
%%
%% One problem with the use of normal terms as the knowledge
%% representation language is that normal abstractions are not unique,
%% \ie there are syntactically different normal terms that mean the same
%% thing. Going back to the example $\{1,2\}$ above, we can see that the
%% following two function definitions, though syntactically different,
%% are semantically equivalent to the one given earlier.
%% \begin{gather}
%% \lambda x. \text{if} \; x = 2 \; \text{then} \; \top \; \text{else} \;
%% \text{if} \; x = 1 \; \text{then} \; \top \; \text{else} \; \bot
%% \label{f1} \\
%% \lambda x. \text{if} \; x = 2 \; \text{then} \; \top \; \text{else} \;
%% \text{if} \; x = 1 \; \text{then} \; \top \; \text{else} \; \text{if}
%% \; x = 3 \; \text{then} \; \bot \; \text{else} \; \bot \label{f2}
%% \end{gather}
%% To understand the way to fix this undesirable property of normal
%% abstractions, we need to go no further than the two illustrative
%% function definitions above. Function definition ~\ref{f1} is different
%% from function ~\ref{f0} only in the order of the subterms of the form
%% $x = y$. Function definition ~\ref{f2}, in addition to the difference
%% in order, contains redundant declarations. The intuitive idea is to
%% devise a \emph{regularise} procedure that, when given
%% syntactically different but semantically equivalent normal terms of a
%% particular type, is able to \emph{reduce}, through syntactic
%% manipulation, the different terms into identical ones. For example,
%% given function ~\ref{f1}, it will try to re-order the subterms. Given
%% function ~\ref{f2}, it will first remove redundant declarations and
%% then re-order the subterms.
%%
%% To devise such a procedure, we need to have more mathematical
%% tools. Firstly, we need to define a strict total order $<$ on the
%% normal terms, which can be used to order the elements of normal
%% abstractions. Secondly, we need to define an equivalence relation
%% $\equiv$ on normal terms of a certain type, which can be used to
%% identify and remove redundant elements of a function
%% definition. Obviously, the equivalence relation needs to be defined in
%% a way such that normal terms of a certain type denoting the same
%% individual belong to the same equivalence class. We can then pick out
%% one representative term from each equivalence class and use these as
%% the knowledge representative language. Given a normal term, it is the
%% unique representative term of the equivalence class in which the term
%% belongs that our \emph{regularise} procedure will return. The set
%% of all such unique representative terms is called the basic terms, the
%% formal definition of which is given in the following paragraph. The
%% reader is referred to Section 3 of \cite{lloyd02logic} for a detailed
%% formalisation of the ideas presented in this section.
%%
%% \paragraph{Basic Terms}
%% \label{par:basic terms}
%% \begin{definition}
%% The set of {\em basic terms}, $\mathfrak{B}$, is defined inductively
%% as follows.
%% \begin{enumerate}
%% \item If $C$ is a data constructor having signature
%% $\sigma_1 \rightarrow \cdots \rightarrow \sigma_n \rightarrow (T
%% \; a_1 \ldots a_k)$ and $t_1, \ldots, t_n \in \mathfrak{B}$ $(n
%% \geq 0$) such that $C \; t_1 \ldots t_n \in \mathfrak{L}$, then
%% $C \; t_1 \ldots t_n \in \mathfrak{B}$.
%%
%% \item If $t_1, \ldots, t_n \in \mathfrak{B}$,
%% $s_1, \ldots, s_n \in \mathfrak{B}$, $t_1 < \ldots < t_n$, $s_i
%% \not\in \mathfrak{D}$, for $1 \leq i \leq n$ $(n \geq 0)$, $s_0
%% \in \mathfrak{D}$ and \[ \lambda x.\mathit{if} \; x = t_1 \;
%% \mathit{then} \; s_1 \; \mathit{else} \; \ldots \; \mathit{if}
%% \; x = t_n \; \mathit{then} \; s_n \; \mathit{else} \; s_0 \in
%% \mathfrak{L}, \] then \[ \lambda x.\mathit{if} \; x = t_1 \;
%% \mathit{then} \; s_1 \; \mathit{else} \; \ldots \; \mathit{if}
%% \; x = t_n \; \mathit{then} \; s_n \; \mathit{else} \; s_0 \in
%% \mathfrak{B}. \]
%%
%% \item If $t_1, \ldots, t_n \in \mathfrak{B}$ $(n \geq 0$) and
%% $(t_1, \ldots, t_n) \in \mathfrak{L}$, then $(t_1, \ldots, t_n)
%% \in \mathfrak{B}$.
%% \end{enumerate}
%% \end{definition}
%%
%% The basic terms from Part 1 of the definition are called {\em basic
%% structures}, those from Part 2 are called {\em basic abstractions},
%% and those from Part 3 are called {\em basic tuples}.
\section{Types}
\begin{comment}
Types are defined inductively in the logic, thus lending itself nicely
to the use of composite pattern \cite[p.163]{gamma95patterns} for its
implementation.
% Figure ~\ref{fig:typeDiagram} shows the structure of the classes involved.
% \begin{figure}[!htbp]
% \begin{center}
% \includegraphics[width=13cm]{FIG/typeDiagram.eps}
% \caption{The structure of the type classes.}
% \label{fig:typeDiagram}
% \end{center}
% \end{figure}
We differentiate between atomic and composite types.
Atomic types are obtained from type constructors with arity 0.
Examples of these include $int$, $float$, $nat$, $char$, $string$, etc.
(Note that $string$ is a nullary type constructor in this case.
Strings in general can also be constructed from $List \; char$.)
They are the base types, and occupy the leaf nodes of a composite type
structure.
Everything else are composite types.
Examples of composite types include types obtained from type
constructors of non-zero arity like $List \; \alpha$, $Btree \; \alpha$,
$Graph \; \alpha \; \beta$, etc; function types like $set \; \alpha$
(this is equivalent to $\alpha \rightarrow \varOmega$) and
$multiset \; \alpha$ ($\alpha \rightarrow nat$); and product types
obtained from the tuple-forming operator.
The following is an outline of the data types module.
We first give the abstract classes, followed by the actual data types.
\end{comment}
<<types.h>>=
#ifndef _DATATYPE_H_
#define _DATATYPE_H_
#include <set>
#include <vector>
#include <string>
#include <assert.h>
#include <iostream>
using namespace std;
#define dcast dynamic_cast
#define unint unsigned int
extern const string underscore, alpha, Parameter, Tuple, Arrow,
gBool, gInt, gFloat, gChar, gString;
<<type::function declarations>>
<<type::type>>
<<type::composite types>>
<<type::parameters>>
<<type::tuples>>
<<type::algebraic types>>
<<type::abstractions>>
<<type::synonyms>>
#endif
@
<<types.cc>>=
#include "types.h"
#include <stdlib.h>
<<type::functions>>
<<type::composite types::implementation>>
<<type::parameters::implementation>>
<<type::tuples::implementation>>
<<type::algebraic types::implementation>>
<<type::abstractions::implementation>>
@
\begin{comment}
The top-level \dstruct{type} structure contains as members those
variables and functions that are common to all types.
Every type obviously has a name.
The functions \func{setAlpha} and \func{addAlpha} are used to
configure subtypes; they are defined only for composite types like
tuples and list. (See Comment \ref{com:composite types} for details.)
\end{comment}
<<type::type>>=
class type {
public:
int count;
type() { count = 0; }
type(string n) : tag(n) { count = 0; }
virtual ~type() {}
virtual void setAlpha(type * x, unsigned int y) {}
virtual void addAlpha(type * x) {}
virtual type * getAlpha(unsigned int x) { return NULL; }
virtual int alphaCount() { return 0; }
virtual bool isComposite() { return false; }
virtual bool isTuple() { return false; }
virtual bool isAbstract() { return false; }
virtual bool isParameter() { return false; }
virtual bool isSynonym() { return false; }
virtual bool isUdefined() { return false; }
virtual string getName() { return tag; }
virtual string & getTag() { return tag; }
virtual type * clone() { count++; return this; }
virtual void deccount() { count--; }
virtual void getParameters(set<string> & ret) {}
virtual void renameParameters() {}
virtual void renameParameter(string name) {}
protected:
string tag;
};
@
\begin{comment}
We use reference counting for the memory management of the base types.
The variable \dstruct{count} keeps track of the number of references to
a type.
Deallocation of a type structure is done using the function
\func{delete-type} defined as follows.
\end{comment}
<<type::type>>=
void delete_type(type * x);
@ %def delete_type
<<type::functions>>=
void delete_type(type * x) {
// if (x->isComposite() || x->isParameter()) assert(x->count == 0);
if (x->count == 0) delete x; else x->deccount();
}
@ %def delete_type
\begin{comment}\label{com:composite types}
The following is the class declaration for composite types.
The member \dstruct{alpha} stores the sub-types in the composite structure.
It serves different purposes for different kinds of composite types.
\end{comment}
<<type::composite types>>=
class type_composite : public type {
protected:
vector<type *> alpha;
public:
virtual ~type_composite();
bool isComposite() { return true; }
virtual void deccount();
virtual void setAlpha(type * x, unsigned int y);
virtual void addAlpha(type * x) { alpha.push_back(x); }
virtual type * getAlpha(unsigned int x);
virtual int alphaCount() { return alpha.size(); }
virtual string getName();
virtual type * clone() { assert(false); }
virtual void getParameters(set<string> & x);
virtual void renameParameters();
virtual void renameParameter(string name);
};
@
<<type::composite types::implementation>>=
type_composite::~type_composite() {
for (unsigned int i=0; i!=alpha.size(); i++) delete_type(alpha[i]);
}
<<type::composite types::implementation>>=
void type_composite::deccount() {
count--;
for (unsigned int i=0; i!=alpha.size(); i++) alpha[i]->deccount();
}
<<type::composite types::implementation>>=
void type_composite::setAlpha(type * x, unsigned int y)
{ assert(y < alpha.size()); alpha[y] = x; }
type * type_composite::getAlpha(unsigned int x)
{ assert(x < alpha.size()); return alpha[x]; }
string type_composite::getName() { assert(false); return ""; }
@
\begin{comment}
The following functions are used during unification and type-checking.
The first one collects in a set all the parameters in a type.
This is used in the unification algorithm.
The second and third functions are used to rename parameters during
instantiation and type checking.
\end{comment}
<<type::composite types::implementation>>=
void type_composite::getParameters(set<string> & ret) {
for (unsigned int i=0; i!=alpha.size(); i++)
alpha[i]->getParameters(ret);
}
@ %def getParameters
<<type::composite types::implementation>>=
void type_composite::renameParameters() {
set<string> ps;
getParameters(ps);
set<string>::iterator p = ps.begin();
while (p != ps.end()) { renameParameter(*p); inc_counter(); p++; }
}
void type_composite::renameParameter(string name) {
for (unsigned int i=0; i!=alpha.size(); i++)
alpha[i]->renameParameter(name);
}
@ %def renameParameters renameParameter
\begin{comment}
Parameters are type variables.
\end{comment}
<<type::parameters>>=
class type_parameter : public type {
public: type_parameter();
type_parameter(string x) { tag = Parameter; vname = x; }
type * clone() { return new type_parameter(vname); }
bool isParameter() { return true; }
string getName() { return tag + underscore + vname; }
void getParameters(set<string> & ret);
void renameParameters();
void renameParameter(string name);
private:
string vname;
};
extern string newParameterName();
@
\begin{comment}
When we create a new type parameter, a distinct name of the form
\dstruct{alpha\_i} where \dstruct{i} is a number will be assigned to
the parameter.
\end{comment}
<<type::parameters::implementation>>=
#include "global.h"
static int parameterCount = 0;
type_parameter::type_parameter() {
tag = Parameter; vname = newParameterName(); }
@
\begin{comment}
New parameter names are created using this next function.
The variable \dstruct{parameterCount} is used here as the index
for new parameter names.
This function can be replaced with {\tt newVar} in terms.nw.
\end{comment}
<<type::parameters::implementation>>=
string newParameterName() {
string vname = alpha + numtostr(parameterCount++);
return vname;
}
<<type::parameters::implementation>>=
void type_parameter::getParameters(set<string> & ret) {
string temp = tag + underscore + vname;
ret.insert(temp);
}
<<type::parameters::implementation>>=
void type_parameter::renameParameters()
{ string temp = tag+underscore+vname; renameParameter(temp); inc_counter(); }
@
\begin{comment}
If a parameter has been indexed, we will first remove its index and
then attach a new one.
The function \func{rfind} returns \dstruct{npos} if an underscore
cannot be found in \dstruct{vname}.
(Search proceeds from the end of \dstruct{vname}.)
\end{comment}
<<type::parameters::implementation>>=
void type_parameter::renameParameter(string name) {
string tname = tag + underscore + vname;
if (tname != name) return;
char temp[10]; sprintf(temp, "_%d", get_counter_value());
unint i = vname.rfind(underscore);
if (i >= 0 && i < vname.size()) vname.erase(i, vname.size()-i);
string temp2(temp); vname = vname + temp2;
}
@
\begin{comment}
Some times, parameters need to be renamed to avoid name capture.
We use a global counter for this purpose.
\end{comment}
<<type::function declarations>>=
void inc_counter();
int get_counter_value();
@
<<type::functions>>=
static int counter = 0;
void inc_counter() { counter++; }
int get_counter_value() { return counter; }
@ %def inc_counter get_counter_value
\begin{comment}
Users can define type synonyms of the form $t_1 = t_2$, where $t_1$ is
an identifier and $t_2$ the actual type.
These are handled using the following class.
The identifier $t_1$ is stored in \dstruct{tname}; the actual type
$t_2$ is stored in \dstruct{actual}.
\end{comment}
<<type::synonyms>>=
class type_synonym : public type {
public:
type_synonym(string name, type * ac)
{ tag = name; tname = name; actual = ac; }
~type_synonym() { delete_type(actual); }
type * clone() {
// assert(actual); count++; actual->count++; return this; }
assert(actual);return new type_synonym(tname,actual->clone());}
void deccount() { assert(false); }
bool isSynonym() { return true; }
type * getActual() { return actual; }
string getName() { return actual->getName(); }
private:
type * actual;
string tname;
};
@
\begin{comment}
We support the following base types: boolean, integer, float point
number and string.
Natural number is not supported because we can always use integer in
its place.
\end{comment}
\begin{comment}
The following is used to create product types.
\end{comment}
<<type::tuples>>=
class type_tuple : public type_composite {
public:
type_tuple() { tag = Tuple; }
type * clone();
bool isTuple() { return true; }
string getName();
};
@
<<type::tuples::implementation>>=
type * type_tuple::clone() {
type_tuple * ret = new type_tuple;
for (int i=0; i!=alphaCount(); i++)
ret->addAlpha(alpha[i]->clone());
return ret;
}
@
<<type::tuples::implementation>>=
string type_tuple::getName() {
string ret = "( ";
for (unsigned int i=0; i!=alpha.size()-1; i++)
ret = ret + alpha[i]->getName() + " * ";
ret = ret + alpha[alpha.size()-1]->getName() + ")";
return ret;
}
@
\begin{comment}
This is used for the construction of function types.
It is worth mentioning that sets and multisets have function types.
Function types of particular interest here are those for
transformations.
The variable \dstruct{rank} is used to record the rank of
transformations.
This value can be calculated using \func{compRank}.
The functions \func{getSource} and \func{getTarget} returns the source
and target of a transformation.
The function \func{getArg} returns the $n$-th argument.
\end{comment}
<<type::abstractions>>=
class type_abs : public type_composite {
public:
int rank;
type_abs() { tag = Arrow; rank = -5; }
type_abs(type * source, type * target) {
tag = Arrow; rank = -5;
addAlpha(source); addAlpha(target);
}
bool isAbstract() { return true; }
type * clone();
type * getArg(int n);
type * getSource();
type * getTarget();
string getName();
int compRank();
};
@ %def getArg getSource getTarget
<<type::abstractions::implementation>>=
type * type_abs::clone() {
type_abs * ret = new type_abs(alpha[0]->clone(), alpha[1]->clone());
ret->rank = rank;
return ret;
}
@
<<type::abstractions::implementation>>=
string type_abs::getName() {
string ret;
if (alpha[0]->isComposite())
ret = "(" + alpha[0]->getName() + ") -> ";
else ret = alpha[0]->getName() + " -> ";
if (alpha[1]->isComposite())
ret = ret + "(" + alpha[1]->getName() + ")";
else ret = ret + alpha[1]->getName();
return ret;
}
@
<<type::abstractions::implementation>>=
type * type_abs::getArg(int n) {
assert(n < rank);
type * p = this;
int temp = 0;
while (temp != n) { p = p->getAlpha(1); temp++; }
return p->getAlpha(0);
}
@ %def getArg
<<type::abstractions::implementation>>=
type * type_abs::getSource() {
assert(rank != -5);
type * p = this;
for (int i=0; i!=rank; i++) p = p->getAlpha(1);
assert(p->getAlpha(0)); return p->getAlpha(0);
}
@ %def getSource
<<type::abstractions::implementation>>=
type * type_abs::getTarget() {
assert(rank != -5);
type * p = this;
for (int i=0; i!=rank; i++) p = p->getAlpha(1);
return p->getAlpha(1);
}
@ %def getTarget
\begin{comment}
This function computes the rank of a transformation.
We inspect the spine of the type and count the number of predicate
types appearing in it.
\end{comment}
<<type::abstractions::implementation>>=
int type_abs::compRank() {
if (alpha[1]->isAbstract() && alpha[0]->isAbstract() &&
alpha[0]->getAlpha(1)->getTag() == gBool) {
type_abs * t = dcast<type_abs *>(alpha[1]);
return 1 + t->compRank();
}
return 0;
}
@ %def compRank
\begin{comment}
Algebraic types are supported using the following classes.
The class \dstruct{type\_udefined} supports nullary type constructors;
the class \dstruct{type\_alg} supports non-nullary type constructors.
Perhaps it makes sense to combine the two in one type.
\end{comment}
<<type::algebraic types>>=
class type_udefined : public type {
const vector<string> values;
public:
type_udefined(string & tname, const vector<string> &vals)
: type(tname), values(vals) {}
type_udefined(string & tname) : type(tname) {}
bool isUdefined() { return true; }
// type * clone() { count++; return this; }
const vector<string> & getValues() { return values; }
};
@
<<type::algebraic types>>=
class type_alg : public type_composite {
public:
type_alg(string tid) { tag = tid; }
type_alg(string tid, vector<type *> x) {
tag = tid;
for (unsigned int i=0; i!=x.size(); i++)
addAlpha(x[i]->clone());
}
type_alg(string tid, type_tuple * x) {
tag = tid;
for (int i=0; i!=x->alphaCount(); i++)
addAlpha(x->getAlpha(i)->clone());
}
type * clone() { return new type_alg(tag, alpha); }
string getName();
};
@
<<type::algebraic types::implementation>>=
string type_alg::getName() {
string ret = "(" + tag;
for (unint i=0; i!=alpha.size()-1; i++)
ret = ret + " " + alpha[i]->getName();
ret = ret + " " + alpha[alpha.size()-1]->getName() + ")";
return ret;
}
@
\subsection{Unification}
\begin{comment}
We now discuss type unification.
The type unification algorithm given here is adapted from the one
given in \cite[Chap.5]{peyton-jones87}.
\end{comment}
<<unification.h>>=
#ifndef _UNIFICATION_H_
#define _UNIFICATION_H_
#include "terms.h"
#include "types.h"
#include <vector>
#include <utility>
struct term_type { term * first; type * second; };
extern bool unify(vector<pair<string,type *> > &eqns,type *tvn,type *t);
extern type * apply_subst(vector<pair<string, type *> > & eqns, type * x);
extern type * wellTyped(term * t);
extern pair<type *, vector<term_type> > mywellTyped(term * t);
extern type * get_type_from_syn(type * in);
#endif
@ %def term_type
<<unification.cc>>=
#include <iostream>
#include <utility>
#include <vector>
#include <string>
#include "types.h"
#include "unification.h"
using namespace std;
bool unify_verbose = false; // set this to see the unification process
<<unification body>>
<<type checking>>
@
\begin{comment}
The function \func{getBinding} returns the binding for parameter $x$ in
a type substitution $\theta$.
\end{comment}
<<unification body>>=
type * getBinding(vector<pair<string, type *> > & eqns, type * x) {
assert(x->isParameter());
string vname = x->getName();
for (unsigned int i=0; i!=eqns.size(); i++)
if (eqns[i].first == vname) return eqns[i].second;
return x;
}
@ %def getBinding
\begin{comment}
Given a type substitution $\theta$ and a type $t$ with parameters,
\func{apply\_subst} computes $t\theta$.
\end{comment}
<<unification body>>=
type * apply_subst(vector<pair<string, type *> > & eqns, type * t) {
if (t->isParameter())
return getBinding(eqns, t)->clone();
type * ret = t->clone();
for (int i=0; i!=ret->alphaCount(); i++) {
type * temp = apply_subst(eqns, ret->getAlpha(i));
delete_type(ret->getAlpha(i));
ret->setAlpha(temp, i);
}
return ret;
}
@ %def apply_subst
\begin{comment}
This function extends a substitution $\theta$ with an additional
equation $x = t$.
If $t$ is $x$, then the extension succeeds trivially.
Otherwise, unless $x$ appears in $t$, the extension succeeds.
\end{comment}
<<unification body>>=
bool extend(vector<pair<string, type *> > & eqns, type * x, type * t) {
assert(x->isParameter());
<<delete eqns of the form x = x>>
<<if x appears in t, return false>>
<<apply (x,t) to each eqn in eqns, extend eqns and return true>>
}
@ %def extend
<<delete eqns of the form x = x>>=
if (t->isParameter())
if (x->getName() == t->getName()) return true;
@
<<if x appears in t, return false>>=
// case of t not a parameter
set<string> parameters;
t->getParameters(parameters);
// set<string>::iterator p = parameters.begin();
// cout << "parameters : ";
// while (p != parameters.end()) { cout << *p << " "; p++; }
if (parameters.find(x->getName()) != parameters.end())
return false;
@
<<apply (x,t) to each eqn in eqns, extend eqns and return true>>=
for (unsigned int i=0; i!=eqns.size(); i++) {
type * temp = eqns[i].second;
eqns[i].second = apply_subst(eqns, temp);
delete_type(temp);
}
pair<string, type *> eqn(x->getName(), t->clone());
eqns.push_back(eqn);
return true;
@
\begin{comment}
This function extracts the actual type of a synonym.
We may need to go through several redirections to get to the actual type.
\end{comment}
<<unification body>>=
type * get_type_from_syn(type * in) {
type * ret = in;
while (ret->isSynonym())
ret = dcast<type_synonym *>(ret)->getActual();
return ret;
}
@ %def get_type_from_syn
\begin{comment}
This function returns whether two types \dstruct{tvn} and \dstruct{t}
are unifiable.
If one of the two, say \dstruct{tvn}, is a parameter, we will try
extending \dstruct{eqns} with the equation (\dstruct{tvn} $ = $ \dstruct{t}).
Otherwise, we compare the tags and try to recursively unify the subtypes
if the tags match.
\end{comment}
<<unification body>>=
bool unify(vector<pair<string,type *> > &eqns, type * tvn, type * t) {
<<unify::verbose 1>>
if (tvn->isSynonym()) tvn = get_type_from_syn(tvn);
if (t->isSynonym()) t = get_type_from_syn(t);
<<unify::verbose 2>>
bool ret = false;
if (tvn->isParameter()) {
type * phitvn = getBinding(eqns, tvn)->clone();
type * phit = apply_subst(eqns, t);
// if phitvn == tvn
if (phitvn->isParameter()) {
if (tvn->getName() == phitvn->getName()) {
ret = extend(eqns, tvn, phit);
delete_type(phit); delete_type(phitvn);
if (unify_verbose) cerr << ret << endl;
return ret;
}
} else {
ret = unify(eqns, phitvn, phit);
delete_type(phit); delete_type(phitvn);
if (unify_verbose) cerr << ret << endl;
return ret;
}
}
// switch place
if (tvn->isParameter() == false && t->isParameter())
return unify(eqns, t, tvn);
<<unify::case of both non-parameters>>
return true;
}
@ %def unify
<<unify::case of both non-parameters>>=
if (tvn->isParameter() == false && t->isParameter() == false) {
if (tvn->getTag() != t->getTag()) return false;
if (tvn->getTag() == Tuple && t->getTag() == Tuple)
if (tvn->alphaCount() != t->alphaCount()) {
if (unify_verbose) cerr << false << endl;
return false;
}
// unify each component
if (tvn->alphaCount() != t->alphaCount()) {
cerr << "Error in unification. Argument counts don't match.\n";
cerr << "tvn = " << tvn->getName() << endl;
cerr << " t = " << t->getName() << endl;
assert(false);
}
for (int i=0; i!=tvn->alphaCount(); i++) {
bool r = unify(eqns,tvn->getAlpha(i),t->getAlpha(i));
if (r == false) return false;
}
}
@
\begin{comment}
We print out some information to help debugging.
\end{comment}
<<unify::verbose 1>>=
if (unify_verbose)
cerr << "Unifying " << tvn->getName() << " and " << t->getName() <<endl;
@
<<unify::verbose 2>>=
if (unify_verbose) cerr << "After transformation:\n";
<<unify::verbose 1>>
@
\subsection{Type Checking}
\begin{comment}
The type-checking procedure implements the following algorithm.
For more details on type checking and type inference, see, for example,
\cite[Chap. 11]{jmitchell96foundations}.
\begin{align*}
\hspace*{-1em}\it{WT}(C) &= \alpha \;\;\; \text{where $\alpha$ is the declared
signature of $C$} \\
\hspace*{-1em}\it{WT}(x) &= \begin{cases}
\alpha \;\;\;\text{ if $\it{WT}(x) = \alpha$ has been
established before;} \\
a \;\;\;\; \text{ otherwise; here, $a$ is a fresh parameter.} \\
\end{cases}\\
\hspace*{-1em}\it{WT}((t_1,\ldots,t_n)) &= \it{WT}(t_1) \times \cdots \times \it{WT}(t_n)\\
\hspace*{-1em}\it{WT}(\lambda x.t) &=
\begin{cases}
\alpha \rightarrow \beta \;\;\;\text{ if $\it{WT}(t) = \beta$
and $x$ is free with relative type $\alpha$ in $t$.} \\
a \rightarrow \beta \;\;\;\text{ where $a$ is a parameter otherwise.}\\
\end{cases} \\
\hspace*{-1em}\it{WT}((s\,t)) &=
\beta\theta \;\;\;\text{ if $\it{WT}(s) = \alpha \rightarrow \beta$,
$\it{WT}(t) = \gamma$, and
$\alpha$ and $\gamma$ are unifiable using $\theta$.}
\end{align*}
The input term is not well-typed if any one of the $\it{WT}$ calls on
its subterms fails.
\end{comment}
<<type checking actual>>=
type * wellTyped2(term * t, vector<var_name> bvars, int scope) {
type * ret = NULL;
<<wellTyped2::case of t a constant>>
<<wellTyped2::case of t a variable>>
<<wellTyped2::case of t an application>>
<<wellTyped2::case of t an abstraction>>
<<wellTyped2::case of t a modal term>>
<<wellTyped2::case of t a tuple>>
return ret;
}
@ %def wellTyped2
\begin{comment}
We first look at some data structures.
The vector \dstruct{term\_types} is used to store the inferred type
for each subterm of the input term.
The structure \dstruct{var\_name} is used to handle variables; see
Comment \ref{com:type checking:variable} for more details.
\end{comment}
<<type checking variables>>=
vector<term_type> term_types;
struct var_name { int vname; string pname; };
@