-
Notifications
You must be signed in to change notification settings - Fork 0
/
csvfoc.h
112 lines (100 loc) · 3.85 KB
/
csvfoc.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/* it's all too easy to start over-complicating this: for example quotations. Here you woul dneed to check the last 2 characters of everyword, not just the last one, i.e "stop!", that adds new layers. */
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#ifdef DBG
#define CBUF 2
#define WABUF 1
#define LBUF 2
#else
#define CBUF 12
#define WABUF 20
#define LBUF 32
#endif
#define GBUF 2
#define MAXVSZ 36
#define MAXISZ 5 // max size of the integer
#define boolean unsigned char
#define CONDREALLOC(x, b, c, a, t); \
if((x)>=((b)-1)) { \
(b) += (c); \
(a)=realloc((a), (b)*sizeof(t)); \
}
// following is hard coded for oca_t and it has its own iterative index, k9 for fun and uniqueness.
#define CONDREALLOC2(x, b, c, a); \
int k9; \
if((x)>=((b)-1)) { \
(b) += (c); \
(a)=realloc((a), (b)*sizeof(oca_t)); \
for(k9=(b) - (c); k9<(b);++k9) { \
aoca[k9].uo=NULL; \
aoca[k9].ocibf=(c); \
aoca[k9].ocinda=malloc(aoca[k9].ocibf*sizeof(int)); \
aoca[k9].ocisz=0; \
} \
}
typedef unsigned char boole;
typedef enum
{
STRG, /* unknown type, so default to string */
NUMS, /* NUMberS: but does not include currency. Date, time, float or int, i.e. just a sequence of numbers with maybe some special symbils.*/
PNI, /* pos or beg int */
STCP, /* string with closing punctuation attached.. a comma, or a full stop, semicolon, !? etc */
SCST, /* string with starting capital */
SCCP, /* string with starting capital AND closing punctuation */
ALLC /* string with all caps */
} t_t;
typedef struct /* word type */
{
char *w;
t_t t; /* number or not */
unsigned lp1; /* length of word plus one (i.e. includes null char */
} w_c;
typedef struct /* aw_c: array of words container */
{
w_c **aw;
unsigned ab;
unsigned al;
} aw_c;
typedef struct /* aaw_c: array of array of words container */
{
size_t numl; /* number of lines, i.e. rows */
aw_c **aaw; /* an array of pointers to aw_c */
} aaw_c;
/* checking each character can be comptiue-intensive, so I've offloaded off to MACROS */
/* Macro fo GET Leading Char TYPE */
/* so, this refers to the first character: "+-.0123456789" only these are allowed. These is how we infer
* a quantity of some sort ... except for currency */
#define GETLCTYPE(c, typ); \
if(((c) == 0x2B) | ((c) == 0x2D) | ((c) == 0x2E) | (((c) >= 0x30) && ((c) <= 0x39))) { \
if( ((c) == 0x2B) | ((c) == 0x2D) | (((c) >= 0x30) && ((c) <= 0x39))) \
typ = PNI; \
else \
typ = NUMS; \
} else if(((c) >= 0x41) && ((c) <= 0x5A)) \
typ = SCST;
/* Macro for InWord MODify TYPE */
#define IWMODTYPEIF(c, typ); \
if( ((typ) == NUMS) & (((c) != 0x2E) & (((c) < 0x30) || ((c) > 0x39)))) \
typ = STRG; \
else if( ((typ) == PNI) & (c == 0x2E)) \
typ = NUMS; \
else if( ((typ) == PNI) & ((c < 0x30) || (c > 0x39)) ) \
typ = STRG;
/* Macro for SETting CLosing Punctuation TYPE, based on oldc (oc) not c-var */
/* 21=! 29=) 2C=, 2E=. 3B=; 3F=? 5D=] 7D=}*/
#define SETCPTYPE(oc, typ); \
if( ((oc)==0x21)|((oc)==0x29)|((oc)==0x2C)|((oc)==0x2E)|((oc)==0x3B)|((oc)==0x3F)|((oc)==0x5D)|((oc)==0x7D) ) { \
if((typ) == STRG) \
typ = STCP; \
else if((typ) == SCST) \
typ = SCCP; \
}
typedef struct /* oca_t OCcurence Array type */
{
char *uo; /* the unique occurence this array entry refers to */
int uosz; /* the unique occurence size as this is a string */
int *ocinda; /* the indices corresponding to the uo unique occurence. */
int ocisz; /* Ocurrence Index Array size */
int ocibf; /* Ocurrence Index Array buffer */
} oca_t;