-
Notifications
You must be signed in to change notification settings - Fork 41
/
Copy pathjsonsl.h
1004 lines (883 loc) · 30.3 KB
/
jsonsl.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**
* JSON Simple/Stacked/Stateful Lexer.
* - Does not buffer data
* - Maintains state
* - Callback oriented
* - Lightweight and fast. One source file and one header file
*
* Copyright (C) 2012-2015 Mark Nunberg
* See included LICENSE file for license details.
*/
#ifndef JSONSL_H_
#define JSONSL_H_
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <sys/types.h>
#include <wchar.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#ifdef JSONSL_USE_WCHAR
typedef jsonsl_char_t wchar_t;
typedef jsonsl_uchar_t unsigned wchar_t;
#else
typedef char jsonsl_char_t;
typedef unsigned char jsonsl_uchar_t;
#endif /* JSONSL_USE_WCHAR */
#ifdef JSONSL_PARSE_NAN
#define JSONSL__NAN_PROXY JSONSL_SPECIALf_NAN
#define JSONSL__INF_PROXY JSONSL_SPECIALf_INF
#else
#define JSONSL__NAN_PROXY 0
#define JSONSL__INF_PROXY 0
#endif
/* Stolen from http-parser.h, and possibly others */
#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#if !defined(_MSC_VER) || _MSC_VER<1400
typedef unsigned int size_t;
typedef int ssize_t;
#endif
#else
#include <stdint.h>
#endif
#if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS))
#define JSONSL_STATE_GENERIC
#endif /* !defined JSONSL_STATE_GENERIC */
#ifdef JSONSL_STATE_GENERIC
#define JSONSL_STATE_USER_FIELDS
#endif /* JSONSL_STATE_GENERIC */
/* Additional fields for component object */
#ifndef JSONSL_JPR_COMPONENT_USER_FIELDS
#define JSONSL_JPR_COMPONENT_USER_FIELDS
#endif
#ifndef JSONSL_API
/**
* We require a /DJSONSL_DLL so that users already using this as a static
* or embedded library don't get confused
*/
#if defined(_WIN32) && defined(JSONSL_DLL)
#define JSONSL_API __declspec(dllexport)
#else
#define JSONSL_API
#endif /* _WIN32 */
#endif /* !JSONSL_API */
#ifndef JSONSL_INLINE
#if defined(_MSC_VER)
#define JSONSL_INLINE __inline
#elif defined(__GNUC__)
#define JSONSL_INLINE __inline__
#else
#define JSONSL_INLINE inline
#endif /* _MSC_VER or __GNUC__ */
#endif /* JSONSL_INLINE */
#define JSONSL_MAX_LEVELS 512
struct jsonsl_st;
typedef struct jsonsl_st *jsonsl_t;
typedef struct jsonsl_jpr_st* jsonsl_jpr_t;
/**
* This flag is true when AND'd against a type whose value
* must be in "quoutes" i.e. T_HKEY and T_STRING
*/
#define JSONSL_Tf_STRINGY 0xffff00
/**
* Constant representing the special JSON types.
* The values are special and aid in speed (the OBJECT and LIST
* values are the char literals of their openings).
*
* Their actual value is a character which attempts to resemble
* some mnemonic reference to the actual type.
*
* If new types are added, they must fit into the ASCII printable
* range (so they should be AND'd with 0x7f and yield something
* meaningful)
*/
#define JSONSL_XTYPE \
X(STRING, '"'|JSONSL_Tf_STRINGY) \
X(HKEY, '#'|JSONSL_Tf_STRINGY) \
X(OBJECT, '{') \
X(LIST, '[') \
X(SPECIAL, '^') \
X(UESCAPE, 'u')
typedef enum {
#define X(o, c) \
JSONSL_T_##o = c,
JSONSL_XTYPE
JSONSL_T_UNKNOWN = '?',
/* Abstract 'root' object */
JSONSL_T_ROOT = 0
#undef X
} jsonsl_type_t;
/**
* Subtypes for T_SPECIAL. We define them as flags
* because more than one type can be applied to a
* given object.
*/
#define JSONSL_XSPECIAL \
X(NONE, 0) \
X(SIGNED, 1<<0) \
X(UNSIGNED, 1<<1) \
X(TRUE, 1<<2) \
X(FALSE, 1<<3) \
X(NULL, 1<<4) \
X(FLOAT, 1<<5) \
X(EXPONENT, 1<<6) \
X(NONASCII, 1<<7) \
X(NAN, 1<<8) \
X(INF, 1<<9)
typedef enum {
#define X(o,b) \
JSONSL_SPECIALf_##o = b,
JSONSL_XSPECIAL
#undef X
/* Handy flags for checking */
JSONSL_SPECIALf_UNKNOWN = 1 << 10,
/** @private Private */
JSONSL_SPECIALf_ZERO = 1 << 11 | JSONSL_SPECIALf_UNSIGNED,
/** @private */
JSONSL_SPECIALf_DASH = 1 << 12,
/** @private */
JSONSL_SPECIALf_POS_INF = (JSONSL_SPECIALf_INF),
JSONSL_SPECIALf_NEG_INF = (JSONSL_SPECIALf_INF|JSONSL_SPECIALf_SIGNED),
/** Type is numeric */
JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED),
/** Type is a boolean */
JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE),
/** Type is an "extended", not integral type (but numeric) */
JSONSL_SPECIALf_NUMNOINT =
(JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT|JSONSL_SPECIALf_NAN
|JSONSL_SPECIALf_INF)
} jsonsl_special_t;
/**
* These are the various types of stack (or other) events
* which will trigger a callback.
* Like the type constants, this are also mnemonic
*/
#define JSONSL_XACTION \
X(PUSH, '+') \
X(POP, '-') \
X(UESCAPE, 'U') \
X(ERROR, '!')
typedef enum {
#define X(a,c) \
JSONSL_ACTION_##a = c,
JSONSL_XACTION
JSONSL_ACTION_UNKNOWN = '?'
#undef X
} jsonsl_action_t;
/**
* Various errors which may be thrown while parsing JSON
*/
#define JSONSL_XERR \
/* Trailing garbage characters */ \
X(GARBAGE_TRAILING) \
/* We were expecting a 'special' (numeric, true, false, null) */ \
X(SPECIAL_EXPECTED) \
/* The 'special' value was incomplete */ \
X(SPECIAL_INCOMPLETE) \
/* Found a stray token */ \
X(STRAY_TOKEN) \
/* We were expecting a token before this one */ \
X(MISSING_TOKEN) \
/* Cannot insert because the container is not ready */ \
X(CANT_INSERT) \
/* Found a '\' outside a string */ \
X(ESCAPE_OUTSIDE_STRING) \
/* Found a ':' outside of a hash */ \
X(KEY_OUTSIDE_OBJECT) \
/* found a string outside of a container */ \
X(STRING_OUTSIDE_CONTAINER) \
/* Found a null byte in middle of string */ \
X(FOUND_NULL_BYTE) \
/* Current level exceeds limit specified in constructor */ \
X(LEVELS_EXCEEDED) \
/* Got a } as a result of an opening [ or vice versa */ \
X(BRACKET_MISMATCH) \
/* We expected a key, but got something else instead */ \
X(HKEY_EXPECTED) \
/* We got an illegal control character (bad whitespace or something) */ \
X(WEIRD_WHITESPACE) \
/* Found a \u-escape, but there were less than 4 following hex digits */ \
X(UESCAPE_TOOSHORT) \
/* Invalid two-character escape */ \
X(ESCAPE_INVALID) \
/* Trailing comma */ \
X(TRAILING_COMMA) \
/* An invalid number was passed in a numeric field */ \
X(INVALID_NUMBER) \
/* Value is missing for object */ \
X(VALUE_EXPECTED) \
/* The following are for JPR Stuff */ \
\
/* Found a literal '%' but it was only followed by a single valid hex digit */ \
X(PERCENT_BADHEX) \
/* jsonpointer URI is malformed '/' */ \
X(JPR_BADPATH) \
/* Duplicate slash */ \
X(JPR_DUPSLASH) \
/* No leading root */ \
X(JPR_NOROOT) \
/* Allocation failure */ \
X(ENOMEM) \
/* Invalid unicode codepoint detected (in case of escapes) */ \
X(INVALID_CODEPOINT)
typedef enum {
JSONSL_ERROR_SUCCESS = 0,
#define X(e) \
JSONSL_ERROR_##e,
JSONSL_XERR
#undef X
JSONSL_ERROR_GENERIC
} jsonsl_error_t;
/**
* A state is a single level of the stack.
* Non-private data (i.e. the 'data' field, see the STATE_GENERIC section)
* will remain in tact until the item is popped.
*
* As a result, it means a parent state object may be accessed from a child
* object, (the parents fields will all be valid). This allows a user to create
* an ad-hoc hierarchy on top of the JSON one.
*
*/
struct jsonsl_state_st {
/**
* The JSON object type
*/
unsigned type;
/** If this element is special, then its extended type is here */
unsigned special_flags;
/**
* The position (in terms of number of bytes since the first call to
* jsonsl_feed()) at which the state was first pushed. This includes
* opening tokens, if applicable.
*
* @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will
* be the position of the first quote.
*
* @see jsonsl_st::pos which contains the _current_ position and can be
* used during a POP callback to get the length of the element.
*/
size_t pos_begin;
/**FIXME: This is redundant as the same information can be derived from
* jsonsl_st::pos at pop-time */
size_t pos_cur;
/**
* Level of recursion into nesting. This is mainly a convenience
* variable, as this can technically be deduced from the lexer's
* level parameter (though the logic is not that simple)
*/
unsigned int level;
/**
* how many elements in the object/list.
* For objects (hashes), an element is either
* a key or a value. Thus for one complete pair,
* nelem will be 2.
*
* For special types, this will hold the sum of the digits.
* This only holds true for values which are simple signed/unsigned
* numbers. Otherwise a special flag is set, and extra handling is not
* performed.
*/
uint64_t nelem;
/*TODO: merge this and special_flags into a union */
/**
* Useful for an opening nest, this will prevent a callback from being
* invoked on this item or any of its children
*/
int ignore_callback;
/**
* Counter which is incremented each time an escape ('\') is encountered.
* This is used internally for non-string types and should only be
* inspected by the user if the state actually represents a string
* type.
*/
unsigned int nescapes;
/**
* Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then
* the macro expansion happens here.
*
* You can use these fields to store hierarchical or 'tagging' information
* for specific objects.
*
* See the documentation above for the lifetime of the state object (i.e.
* if the private data points to allocated memory, it should be freed
* when the object is popped, as the state object will be re-used)
*/
#ifndef JSONSL_STATE_GENERIC
JSONSL_STATE_USER_FIELDS
#else
/**
* Otherwise, this is a simple void * pointer for anything you want
*/
void *data;
#endif /* JSONSL_STATE_USER_FIELDS */
};
/**Gets the number of elements in the list.
* @param st The state. Must be of type JSONSL_T_LIST
* @return number of elements in the list
*/
#define JSONSL_LIST_SIZE(st) ((st)->nelem)
/**Gets the number of key-value pairs in an object
* @param st The state. Must be of type JSONSL_T_OBJECT
* @return the number of key-value pairs in the object
*/
#define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2)
/**Gets the numeric value.
* @param st The state. Must be of type JSONSL_T_SPECIAL and
* special_flags must have the JSONSL_SPECIALf_NUMERIC flag
* set.
* @return the numeric value of the state.
*/
#define JSONSL_NUMERIC_VALUE(st) ((st)->nelem)
/*
* So now we need some special structure for keeping the
* JPR info in sync. Preferrably all in a single block
* of memory (there's no need for separate allocations.
* So we will define a 'table' with the following layout
*
* Level nPosbl JPR1_last JPR2_last JPR3_last
*
* 0 1 NOMATCH POSSIBLE POSSIBLE
* 1 0 NOMATCH NOMATCH COMPLETE
* [ table ends here because no further path is possible]
*
* Where the JPR..n corresponds to the number of JPRs
* requested, and nPosble is a quick flag to determine
*
* the number of possibilities. In the future this might
* be made into a proper 'jump' table,
*
* Since we always mark JPRs from the higher levels descending
* into the lower ones, a prospective child match would first
* look at the parent table to check the possibilities, and then
* see which ones were possible..
*
* Thus, the size of this blob would be (and these are all ints here)
* nLevels * nJPR * 2.
*
* the 'Width' of the table would be nJPR*2, and the 'height' would be
* nlevels
*/
/**
* This is called when a stack change ocurs.
*
* @param jsn The lexer
* @param action The type of action, this can be PUSH or POP
* @param state A pointer to the stack currently affected by the action
* @param at A pointer to the position of the input buffer which triggered
* this action.
*/
typedef void (*jsonsl_stack_callback)(
jsonsl_t jsn,
jsonsl_action_t action,
struct jsonsl_state_st* state,
const jsonsl_char_t *at);
/**
* This is called when an error is encountered.
* Sometimes it's possible to 'erase' characters (by replacing them
* with whitespace). If you think you have corrected the error, you
* can return a true value, in which case the parser will backtrack
* and try again.
*
* @param jsn The lexer
* @param error The error which was thrown
* @param state the current state
* @param a pointer to the position of the input buffer which triggered
* the error. Note that this is not const, this is because you have the
* possibility of modifying the character in an attempt to correct the
* error
*
* @return zero to bail, nonzero to try again (this only makes sense if
* the input buffer has been modified by this callback)
*/
typedef int (*jsonsl_error_callback)(
jsonsl_t jsn,
jsonsl_error_t error,
struct jsonsl_state_st* state,
jsonsl_char_t *at);
struct jsonsl_st {
/** Public, read-only */
/** This is the current level of the stack */
unsigned int level;
/** Flag set to indicate we should stop processing */
unsigned int stopfl;
/**
* This is the current position, relative to the beginning
* of the stream.
*/
size_t pos;
/** This is the 'bytes' variable passed to feed() */
const jsonsl_char_t *base;
/** Callback invoked for PUSH actions */
jsonsl_stack_callback action_callback_PUSH;
/** Callback invoked for POP actions */
jsonsl_stack_callback action_callback_POP;
/** Default callback for any action, if neither PUSH or POP callbacks are defined */
jsonsl_stack_callback action_callback;
/**
* Do not invoke callbacks for objects deeper than this level.
* NOTE: This field establishes the lower bound for ignored callbacks,
* and is thus misnamed. `min_ignore_level` would actually make more
* sense, but we don't want to break API.
*/
unsigned int max_callback_level;
/** The error callback. Invoked when an error happens. Should not be NULL */
jsonsl_error_callback error_callback;
/* these are boolean flags you can modify. You will be called
* about notification for each of these types if the corresponding
* variable is true.
*/
/**
* @name Callback Booleans.
* These determine whether a callback is to be invoked for certain types of objects
* @{*/
/** Boolean flag to enable or disable the invokcation for events on this type*/
int call_SPECIAL;
int call_OBJECT;
int call_LIST;
int call_STRING;
int call_HKEY;
/*@}*/
/**
* @name u-Escape handling
* Special handling for the \\u-f00d type sequences. These are meant
* to be translated back into the corresponding octet(s).
* A special callback (if set) is invoked with *at=='u'. An application
* may wish to temporarily suspend parsing and handle the 'u-' sequence
* internally (or not).
*/
/*@{*/
/** Callback to be invoked for a u-escape */
jsonsl_stack_callback action_callback_UESCAPE;
/** Boolean flag, whether to invoke the callback */
int call_UESCAPE;
/** Boolean flag, whether we should return after encountering a u-escape:
* the callback is invoked and then we return if this is true
*/
int return_UESCAPE;
/*@}*/
struct {
int allow_trailing_comma;
} options;
/** Put anything here */
void *data;
/*@{*/
/** Private */
int in_escape;
char expecting;
char tok_last;
int can_insert;
unsigned int levels_max;
#ifndef JSONSL_NO_JPR
size_t jpr_count;
jsonsl_jpr_t *jprs;
/* Root pointer for JPR matching information */
size_t *jpr_root;
#endif /* JSONSL_NO_JPR */
/*@}*/
/**
* This is the stack. Its upper bound is levels_max, or the
* nlevels argument passed to jsonsl_new. If you modify this structure,
* make sure that this member is last.
*/
struct jsonsl_state_st stack[1];
};
/**
* Creates a new lexer object, with capacity for recursion up to nlevels
*
* @param nlevels maximum recursion depth
*/
JSONSL_API
jsonsl_t jsonsl_new(int nlevels);
/**
* Feeds data into the lexer.
*
* @param jsn the lexer object
* @param bytes new data to be fed
* @param nbytes size of new data
*/
JSONSL_API
void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes);
/**
* Resets the internal parser state. This does not free the parser
* but does clean it internally, so that the next time feed() is called,
* it will be treated as a new stream
*
* @param jsn the lexer
*/
JSONSL_API
void jsonsl_reset(jsonsl_t jsn);
/**
* Frees the lexer, cleaning any allocated memory taken
*
* @param jsn the lexer
*/
JSONSL_API
void jsonsl_destroy(jsonsl_t jsn);
/**
* Gets the 'parent' element, given the current one
*
* @param jsn the lexer
* @param cur the current nest, which should be a struct jsonsl_nest_st
*/
static JSONSL_INLINE
struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn,
const struct jsonsl_state_st *state)
{
/* Don't complain about overriding array bounds */
if (state->level > 1) {
return jsn->stack + state->level - 1;
} else {
return NULL;
}
}
/**
* Gets the state of the last fully consumed child of this parent. This is
* only valid in the parent's POP callback.
*
* @param the lexer
* @return A pointer to the child.
*/
static JSONSL_INLINE
struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn,
const struct jsonsl_state_st *parent)
{
return jsn->stack + (parent->level + 1);
}
/**Call to instruct the parser to stop parsing and return. This is valid
* only from within a callback */
static JSONSL_INLINE
void jsonsl_stop(jsonsl_t jsn)
{
jsn->stopfl = 1;
}
/**
* This enables receiving callbacks on all events. Doesn't do
* anything special but helps avoid some boilerplate.
* This does not touch the UESCAPE callbacks or flags.
*/
static JSONSL_INLINE
void jsonsl_enable_all_callbacks(jsonsl_t jsn)
{
jsn->call_HKEY = 1;
jsn->call_STRING = 1;
jsn->call_OBJECT = 1;
jsn->call_SPECIAL = 1;
jsn->call_LIST = 1;
}
/**
* A macro which returns true if the current state object can
* have children. This means a list type or an object type.
*/
#define JSONSL_STATE_IS_CONTAINER(state) \
(state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST)
/**
* These two functions, dump a string representation
* of the error or type, respectively. They will never
* return NULL
*/
JSONSL_API
const char* jsonsl_strerror(jsonsl_error_t err);
JSONSL_API
const char* jsonsl_strtype(jsonsl_type_t jt);
/**
* Dumps global metrics to the screen. This is a noop unless
* jsonsl was compiled with JSONSL_USE_METRICS
*/
JSONSL_API
void jsonsl_dump_global_metrics(void);
/* This macro just here for editors to do code folding */
#ifndef JSONSL_NO_JPR
/**
* @name JSON Pointer API
*
* JSONPointer API. This isn't really related to the lexer (at least not yet)
* JSONPointer provides an extremely simple specification for providing
* locations within JSON objects. We will extend it a bit and allow for
* providing 'wildcard' characters by which to be able to 'query' the stream.
*
* See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00
*
* Currently I'm implementing the 'single query' API which can only use a single
* query component. In the future I will integrate my yet-to-be-published
* Boyer-Moore-esque prefix searching implementation, in order to allow
* multiple paths to be merged into one for quick and efficient searching.
*
*
* JPR (as we'll refer to it within the source) can be used by splitting
* the components into mutliple sections, and incrementally 'track' each
* component. When JSONSL delivers a 'pop' callback for a string, or a 'push'
* callback for an object, we will check to see whether the index matching
* the component corresponding to the current level contains a match
* for our path.
*
* In order to do this properly, a structure must be maintained within the
* parent indicating whether its children are possible matches. This flag
* will be 'inherited' by call children which may conform to the match
* specification, and discarded by all which do not (thereby eliminating
* their children from inheriting it).
*
* A successful match is a complete one. One can provide multiple paths with
* multiple levels of matches e.g.
* /foo/bar/baz/^/blah
*
* @{
*/
/** The wildcard character */
#ifndef JSONSL_PATH_WILDCARD_CHAR
#define JSONSL_PATH_WILDCARD_CHAR '^'
#endif /* WILDCARD_CHAR */
#define JSONSL_XMATCH \
X(COMPLETE,1) \
X(POSSIBLE,0) \
X(NOMATCH,-1) \
X(TYPE_MISMATCH, -2)
typedef enum {
#define X(T,v) \
JSONSL_MATCH_##T = v,
JSONSL_XMATCH
#undef X
JSONSL_MATCH_UNKNOWN
} jsonsl_jpr_match_t;
typedef enum {
JSONSL_PATH_STRING = 1,
JSONSL_PATH_WILDCARD,
JSONSL_PATH_NUMERIC,
JSONSL_PATH_ROOT,
/* Special */
JSONSL_PATH_INVALID = -1,
JSONSL_PATH_NONE = 0
} jsonsl_jpr_type_t;
struct jsonsl_jpr_component_st {
/** The string the component points to */
char *pstr;
/** if this is a numeric type, the number is 'cached' here */
unsigned long idx;
/** The length of the string */
size_t len;
/** The type of component (NUMERIC or STRING) */
jsonsl_jpr_type_t ptype;
/** Set this to true to enforce type checking between dict keys and array
* indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects
* that an array index is actually a child of a dictionary. */
short is_arridx;
/* Extra fields (for more advanced searches. Default is empty) */
JSONSL_JPR_COMPONENT_USER_FIELDS
};
struct jsonsl_jpr_st {
/** Path components */
struct jsonsl_jpr_component_st *components;
size_t ncomponents;
/**Type of the match to be expected. If nonzero, will be compared against
* the actual type */
unsigned match_type;
/** Base of allocated string for components */
char *basestr;
/** The original match string. Useful for returning to the user */
char *orig;
size_t norig;
};
/**
* Create a new JPR object.
*
* @param path the JSONPointer path specification.
* @param errp a pointer to a jsonsl_error_t. If this function returns NULL,
* then more details will be in this variable.
*
* @return a new jsonsl_jpr_t object, or NULL on error.
*/
JSONSL_API
jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp);
/**
* Destroy a JPR object
*/
JSONSL_API
void jsonsl_jpr_destroy(jsonsl_jpr_t jpr);
/**
* Match a JSON object against a type and specific level
*
* @param jpr the JPR object
* @param parent_type the type of the parent (should be T_LIST or T_OBJECT)
* @param parent_level the level of the parent
* @param key the 'key' of the child. If the parent is an array, this should be
* empty.
* @param nkey - the length of the key. If the parent is an array (T_LIST), then
* this should be the current index.
*
* NOTE: The key of the child means any kind of associative data related to the
* element. Thus: <<< { "foo" : [ >>,
* the opening array's key is "foo".
*
* @return a status constant. This indicates whether a match was excluded, possible,
* or successful.
*/
JSONSL_API
jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr,
unsigned int parent_type,
unsigned int parent_level,
const char *key, size_t nkey);
/**
* Alternate matching algorithm. This matching algorithm does not use
* JSONPointer but relies on a more structured searching mechanism. It
* assumes that there is a clear distinction between array indices and
* object keys. In this case, the jsonsl_path_component_st::ptype should
* be set to @ref JSONSL_PATH_NUMERIC for an array index (the
* jsonsl_path_comonent_st::is_arridx field will be removed in a future
* version).
*
* @param jpr The path
* @param parent The parent structure. Can be NULL if this is the root object
* @param child The child structure. Should not be NULL
* @param key Object key, if an object
* @param nkey Length of object key
* @return Status constant if successful
*
* @note
* For successful matching, both the key and the path itself should be normalized
* to contain 'proper' utf8 sequences rather than utf16 '\uXXXX' escapes. This
* should currently be done in the application. Another version of this function
* may use a temporary buffer in such circumstances (allocated by the application).
*
* Since this function also checks the state of the child, it should only
* be called on PUSH callbacks, and not POP callbacks
*/
JSONSL_API
jsonsl_jpr_match_t
jsonsl_path_match(jsonsl_jpr_t jpr,
const struct jsonsl_state_st *parent,
const struct jsonsl_state_st *child,
const char *key, size_t nkey);
/**
* Associate a set of JPR objects with a lexer instance.
* This should be called before the lexer has been fed any data (and
* behavior is undefined if you don't adhere to this).
*
* After using this function, you may subsequently call match_state() on
* given states (presumably from within the callbacks).
*
* Note that currently the first JPR is the quickest and comes
* pre-allocated with the state structure. Further JPR objects
* are chained.
*
* @param jsn The lexer
* @param jprs An array of jsonsl_jpr_t objects
* @param njprs How many elements in the jprs array.
*/
JSONSL_API
void jsonsl_jpr_match_state_init(jsonsl_t jsn,
jsonsl_jpr_t *jprs,
size_t njprs);
/**
* This follows the same semantics as the normal match,
* except we infer parent and type information from the relevant state objects.
* The match status (for all possible JPR objects) is set in the *out parameter.
*
* If a match has succeeded, then its JPR object will be returned. In all other
* instances, NULL is returned;
*
* @param jpr The jsonsl_jpr_t handle
* @param state The jsonsl_state_st which is a candidate
* @param key The hash key (if applicable, can be NULL if parent is list)
* @param nkey Length of hash key (if applicable, can be zero if parent is list)
* @param out A pointer to a jsonsl_jpr_match_t. This will be populated with
* the match result
*
* @return If a match was completed in full, then the JPR object containing
* the matching path will be returned. Otherwise, the return is NULL (note, this
* does not mean matching has failed, it can still be part of the match: check
* the out parameter).
*/
JSONSL_API
jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
struct jsonsl_state_st *state,
const char *key,
size_t nkey,
jsonsl_jpr_match_t *out);
/**
* Cleanup any memory allocated and any states set by
* match_state_init() and match_state()
* @param jsn The lexer
*/
JSONSL_API
void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn);
/**
* Return a string representation of the match result returned by match()
*/
JSONSL_API
const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
/* @}*/
/**
* Utility function to convert escape sequences into their original form.
*
* The decoders I've sampled do not seem to specify a standard behavior of what
* to escape/unescape.
*
* RFC 4627 Mandates only that the quoute, backslash, and ASCII control
* characters (0x00-0x1f) be escaped. It is often common for applications
* to escape a '/' - however this may also be desired behavior. the JSON
* spec is not clear on this, and therefore jsonsl leaves it up to you.
*
* Additionally, sometimes you may wish to _normalize_ JSON. This is specifically
* true when dealing with 'u-escapes' which can be expressed perfectly fine
* as utf8. One use case for normalization is JPR string comparison, in which
* case two effectively equivalent strings may not match because one is using
* u-escapes and the other proper utf8. To normalize u-escapes only, pass in
* an empty `toEscape` table, enabling only the `u` index.
*
* @param in The input string.
* @param out An allocated output (should be the same size as in)
* @param len the size of the buffer
* @param toEscape - A sparse array of characters to unescape. Characters
* which are not present in this array, e.g. toEscape['c'] == 0 will be
* ignored and passed to the output in their original form.
* @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
* then this variable will have the SPECIALf_NONASCII flag on.
*
* @param err A pointer to an error variable. If an error ocurrs, it will be
* set in this variable
* @param errat If not null and an error occurs, this will be set to point
* to the position within the string at which the offending character was
* encountered.
*
* @return The effective size of the output buffer.
*
* @note
* This function now encodes the UTF8 equivalents of utf16 escapes (i.e.
* 'u-escapes'). Previously this would encode the escapes as utf16 literals,
* which while still correct in some sense was confusing for many (especially
* considering that the inputs were variations of char).
*
* @note
* The output buffer will never be larger than the input buffer, since
* standard escape sequences (i.e. '\t') occupy two bytes in the source
* but only one byte (when unescaped) in the output. Likewise u-escapes
* (i.e. \uXXXX) will occupy six bytes in the source, but at the most
* two bytes when escaped.
*/
JSONSL_API
size_t jsonsl_util_unescape_ex(const char *in,
char *out,
size_t len,
const int toEscape[128],
unsigned *oflags,
jsonsl_error_t *err,
const char **errat);
/**
* Convenience macro to avoid passing too many parameters
*/
#define jsonsl_util_unescape(in, out, len, toEscape, err) \
jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
#endif /* JSONSL_NO_JPR */
#ifdef __cplusplus