-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathJsonReader.h
417 lines (358 loc) · 20.5 KB
/
JsonReader.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
#pragma once
#include <codecvt>
#include <cstring>
#include <fstream>
#include <functional>
#include <locale>
#include <map>
#include <queue>
#include <set>
#include <string>
#include <vector>
#ifdef _MSC_VER
#define USE_WINAPI
#endif
// Reads JSON data and notifies its elements and their values to the client.
class JsonReader
{
public:
// Converts the character encoding of a string.
// This class is public so it can be used externally as JsonReader::TextConverter (note that it is not thread-safe).
class TextConverter
{
public:
TextConverter();
~TextConverter();
// These functions receive a string and its length and return the converted string (null terminated).
// If not null, the output length is stored in the 'lenOut' argument (not including the null terminator).
// The term 'MultiByte' here refers to a non-Unicode charset, where characters can be encoded with one
// or more bytes according to a locale such as ISO-8859-1 or GB18030.
const char* WideToUtf8(const wchar_t* bufferWide, const size_t lenWide, size_t* lenOut = nullptr);
const wchar_t* Utf8ToWide(const char* bufferUtf8, const size_t lenUtf8, size_t* lenOut = nullptr);
void Utf8ToWide(const std::string stringUtf8, std::wstring& stringWide);
const char* MultiByteToUtf8(const char* bufferMB, const size_t lenMB, size_t* lenOut = nullptr);
const char* Utf8ToMultiByte(const char* bufferUtf8, const size_t lenUtf8, size_t* lenOut = nullptr);
void Utf8ToMultiByte(const std::string stringUtf8, std::string& stringMB);
// Encodes a 4-byte Unicode code point into UTF-8.
const char* CodePointToUtf8(const uint32_t codePoint, size_t& lenOut);
protected:
char* m_bufferNarrow; // Holds the last decoded narrow string.
size_t m_lenMaxNarrow; // Maximum length of the narrow string.
wchar_t* m_bufferWide; // Holds the last decoded wide string.
size_t m_lenMaxWide; // Maximum length of the wide string.
#ifndef USE_WINAPI
// NOTE: The <codecvt> header has been deprecated in C++17.
// Used here until a standard replacement is available.
std::wstring_convert<std::codecvt_utf8<wchar_t>> m_codecvt;
// Auxiliary variables, made member by convenience.
std::string m_str;
std::wstring m_wstr;
#endif
};
protected:
// Represents a string.
struct STR
{
STR();
STR(const wchar_t* source, bool useLocale = false);
~STR() { release(); }
// Methods
void resize(size_t newCapacity); // Allocs more memory for the string.
void setLength(size_t newLength); // Sets the current length of the string. It does not allocate memory.
void clear(); // Clears the string.
void release(); // Frees the memory allocated for the string.
const char* toNarrow(); // It may return the string encoded as multibyte or UTF-8.
const char* toUtf8() { return str; } // Returns the internal string as UTF-8.
const wchar_t* toWide() // Returns the internal string as a wide character string.
{
return converter.Utf8ToWide(str, length);
}
void copy(const char* source, size_t sourceLen, // Sets the content of the internal string.
bool checkCapacity = false, bool checkEncoding = false);
// Variables
char* str; // The string is internally stored as UTF-8.
size_t length; // Current length of the string.
size_t capacity; // Number of allocated bytes to store the string.
bool isAscii; // True if the string contains ASCII characters exclusively.
bool isQuoted; // True if the string was enclosed between quotation marks in the JSON structure.
bool useLocale; // If true, the method 'toNarrow()' returns the string as multibyte according to the locale.
private:
static const int CAPACITY_DEFAULT = 1024; // Default number of bytes available.
TextConverter converter; // Used to convert character encodings.
};
// Stores the value of the current array item when an array is being parsed.
struct ARRAY_ITEM
{
void setValue(STR* itemValue)
{
value = itemValue;
isValue = true;
}
STR* getValue() { return isValue ? value : nullptr; }
void clear()
{
value = nullptr;
isValue = false;
}
STR* value;
bool isValue; // True if the value is of type string, number, boolean or null.
} m_arrayItem;
// Encapsulates the JSON input source, which can be a file or a buffer.
class JsonInput
{
public:
JsonInput();
virtual ~JsonInput();
// If 'isFile' is true, 'source' is assumed to be the full path of a JSON file.
// Otherwise, it's assumed to be a buffer and makes an internal copy.
void init(const char* source, bool isFile);
// Releases the internal buffer and closes the file if open.
void clear();
// Looks for the first valid character to process and returns 'true' if found.
bool findFirstChar();
// Returns the next character to process.
// If 'verbatim' is true, returns exactly the next character (for example, while reading strings or numbers).
// Otherwise, discards irrelevant characters such as spaces or new lines between elements.
char getNextChar(bool verbatim = false);
// Returns the current character read.
char getCurrentChar() { return m_buffer[m_idx]; };
// Called when an escape sequence is found.
void readEscapeSequence(STR& text);
// Moves the buffer's index one position back.
void goToPreviousChar();
// Moves the buffer's index forward until a quotation mark is found.
void goToNextQuote();
// Returns true if no more data can be read from the source.
bool isEOF() { return m_isEOF; }
// Returns the current absolute position in the input source.
size_t getPosition() { return m_position; }
// Methods related to progress notification.
// Sets the progress increment and the callback to be notified.
void setProgressParams(int step, std::function<void(int)> progressCallback);
// Returns the progress as the percentage of the number of bytes read so far.
double getProgress();
// Notifies the progress when it goes beyond the increment.
void notifyProgress();
// Notifies that the parsing has finished.
void notifyProgressEnd();
protected:
bool openFile(const char* fileFullPath);
bool setBuffer(const char* buffer);
void fillBuffer();
void getEscapedCodePoint(STR& text);
char charToHex(char input);
protected:
char* m_buffer; // Buffer containing all or part of the input source.
size_t m_bufferLen; // Number of bytes available in the buffer.
size_t m_maxLen; // Total number of bytes to read from the input source.
size_t m_idx; // Index of the current character in the buffer.
size_t m_position; // Absolute character position of the input data.
std::ifstream m_file; // The input file, in case we are reading from a file.
bool m_isEOF; // True when the end has been reached.
TextConverter m_converter; // Used to decode Unicode code points.
// Used to notify the progress.
int m_progressStep; // Increment of the percentage.
size_t m_progressNext; // Next progress threshold to be notified.
std::function<void(int)> m_progressCallback; // Callback to notify the progress.
};
// Wrappers of the client's callback target (function, lambda expression, etc.) that will receive the events.
// A specialized class is used depending on the number of arguments sent.
// -> Callback's base class.
class Callback
{
public:
Callback(){};
virtual ~Callback(){};
virtual void notify(STR* value) = 0; // Executes a client's callback, optionally passing one string.
};
// -> Callback without arguments.
class Callback0 : public Callback
{
public:
Callback0(std::function<void()>& callback) { m_func = callback; }
void notify(STR*) { m_func(); }
protected:
std::function<void()> m_func;
};
// -> Callback receiving one string.
class Callback1 : public Callback
{
public:
Callback1(std::function<void(const char*)>& callback);
Callback1(std::function<void(const wchar_t*)>& callback);
void notify(STR* value);
protected:
bool m_narrow; // True if the string is to be sent as a narrow string (multibyte or UTF-8).
std::function<void(const char*)> m_funcNarrow; // The argument is passed as a narrow string.
std::function<void(const wchar_t*)> m_funcWide; // The argument is passed as a wide string.
};
// Notifies the client about one type of event (new object found, new array found, etc.).
class Publisher
{
struct comparer
{
bool operator()(char const* str1, char const* str2) const { return std::strcmp(str1, str2) < 0; }
};
// Map used to retrieve the callback to notify when a specified element is found.
// The key is the name or path of the element.
typedef std::map<const char*, Callback*, comparer> CALLBACK_MAP;
public:
Publisher();
// Subscribes a callback to one type of event related to a specific element.
void subscribe(const wchar_t* element, Callback* callback);
void subscribe(const char* elementUtf8, Callback* callback);
// Unsubscribes all callbacks related to one event type.
void unsubscribe();
// Looks for any callbacks associated to the name or path of the current element.
void notify(char* path, size_t namePos, size_t nameLen, size_t pathLen, STR* value = nullptr);
// Returns the name of the current element.
void getCurrentElementName(STR& elemName);
protected:
// Finds a callback associated to the element described by 'nameOrPath' and, if found, calls it passing 'value'.
void notify(CALLBACK_MAP* map, char* nameOrPath, STR* value);
protected:
CALLBACK_MAP m_callbacksName; // Callbacks associated to element names.
CALLBACK_MAP m_callbacksPath; // Callbacks associated to element paths.
Callback* m_callbackAll; // Callback used to notify an event on all elements that apply.
size_t m_numSubscribersByName; // Number of callbacks subscribed using the element's name.
size_t m_numSubscribersByPath; // Number of callbacks subscribed using the element's path.
char* m_name; // Name of the current element being notified.
size_t m_nameLen; // Length of the name of the current element being notified.
CALLBACK_MAP::iterator m_iterator; // Auxiliary variable made member for convenience.
};
public:
// Main class declarations.
JsonReader();
~JsonReader();
// Methods to process the JSON structure from a file or a buffer.
bool readFile(const char* fileFullPath); // File contents must be encoded in UTF-8.
bool readBuffer(const char* buffer); // The buffer must be null terminated and encoded in UTF-8.
// Methods to subscribe to event types related to specific JSON elements (object found, array found, etc.).
// The argument 'element' determines the JSON element by its name or its path.
// Paths are built appending the names and the opening curly and square brackets found from the root up to the
// element (without quotes).
// Additionally, object and array paths end with an opening curly and square bracket respectively.
// For example, '{users[{id' targets the key 'id' contained in the object '{users[{' in the array '{users[' in the
// root object '{'. To receive notifications about the array 'users' apply the name 'users' or the path '{users['.
// However, paths must be used in case elements with the same name are found in different contexts in the JSON data.
// Use NULL to subscribe to an event type for all elements (e.g. to get notified when any object is found).
// The argument 'callback' is a callable object that will be triggered when the event on the target element happens.
// Notifies that the definition of a new object has started.
void onObjectBegin(const wchar_t* element, std::function<void()> callback);
// Notifies that the definition of the current object has finished.
void onObjectEnd(const wchar_t* element, std::function<void()> callback);
// Notifies that the definition of a new array has started.
void onArrayBegin(const wchar_t* element, std::function<void()> callback);
// Notifies that the definition of the current array has finished.
void onArrayEnd(const wchar_t* element, std::function<void()> callback);
// Notifies that an array's item has been found, passing the item's value (if applies) as an argument.
// This argument is a pointer to a string if the item is of type string, number or boolean. Otherwise it is NULL.
void onArrayItem(const wchar_t* element, std::function<void(const char* value)> callback); // Value as narrow.
void onArrayItem(const wchar_t* element, std::function<void(const wchar_t* value)> callback); // Value as wide.
// Notifies that a pair (key/value) has been found, passing the key's value as an argument.
// This argument is a pointer to a string if the value is of type string, number or boolean. Otherwise it is NULL.
void onPair(const wchar_t* element, std::function<void(const char* value)> callback); // Value as narrow.
void onPair(const wchar_t* element, std::function<void(const wchar_t* value)> callback); // Value as wide.
// Same functions passing the 'element' argument encoded in UTF-8.
void onObjectBegin(const char* elementUtf8, std::function<void()> callback);
void onObjectEnd(const char* elementUtf8, std::function<void()> callback);
void onArrayBegin(const char* elementUtf8, std::function<void()> callback);
void onArrayEnd(const char* elementUtf8, std::function<void()> callback);
void onArrayItem(const char* elementUtf8, std::function<void(const char*)> callback);
void onArrayItem(const char* elementUtf8, std::function<void(const wchar_t*)> callback);
void onPair(const char* elementUtf8, std::function<void(const char*)> callback);
void onPair(const char* elementUtf8, std::function<void(const wchar_t*)> callback);
// Methods related to progress notification.
// The callback 'progressCallback' will execute whenever the percentage of bytes read so far increments by 'step'.
// The step must range from 1 to 99 and it is a target value (the actual percentage is passed to the callback).
// To disable progress notification, set 'progressCallback' to null.
void onProgress(int step, std::function<void(int progress)> progressCallback);
// Returns the progress as the percentage of the current number of bytes read.
double getProgress() { return m_input.getProgress(); }
// Methods that return a list of unique paths of all elements found in a JSON text.
// These may help to find out the exact element's path in order to subscribe to its events.
bool getPathsFromFile(const char* fileFullPath, std::set<std::wstring>& paths);
bool getPathsFromBuffer(const char* buffer, std::set<std::wstring>& paths);
// Methods that provide additional information from within the callback functions.
// Return the current element's path.
std::string getCurrentElementPath();
std::wstring getCurrentElementPathWide();
void getCurrentElementPath(std::string& elementPath);
void getCurrentElementPathWide(std::wstring& elementPath);
// Return the current element's name.
std::string getCurrentElementName();
std::wstring getCurrentElementNameWide();
void getCurrentElementName(std::string& elementName);
void getCurrentElementNameWide(std::wstring& elementName);
// Returns true if the value passed to the callback function was originally quoted.
// Used to distinguish strings from booleans and numbers (e.g. 123 and "123" are both passed as "123").
bool isValueQuoted() { return m_elemValue.isQuoted; }
// Returns true if the current path contains only ASCII characters.
bool isPathAscii() { return m_path.isAscii; }
// Returns true if the current array item is of type string, number, boolean or null.
bool isArrayItemValue() { return m_arrayItem.isValue; }
// Method to receive narrow strings in a non-Unicode multibyte encoding such as ISO-8859-1 or GB18030.
// This conversion, which is turned off by default, does not apply when values are notified as wide char strings.
// If 'useLocale' is true, 'locale' specifies the locale to use for encoding (make sure the locale is installed).
void useLocale(bool useLocale, const char* locale = nullptr);
// Methods related to process cancellation.
void cancel() { m_cancel = true; } // Stops reading further data.
bool isCancelled() { return m_cancel; } // Returns true if the reading has been cancelled.
// Methods to get a description when an error is found.
std::string getErrorDescription() { return m_errDescription; }
std::wstring getErrorDescriptionWide()
{
std::wstring wsTmp(m_errDescription.begin(), m_errDescription.end());
return wsTmp;
}
protected:
// Reads a file or buffer containing the JSON data encoded in UTF-8.
// If 'isFile' is true, 'source' is the full path of the input file. Otherwise, it's a pointer to a UTF-8 buffer.
// The optional argument 'pathList' returns a list of unique paths of all the elements found.
bool read(const char* source, bool isFile, std::set<std::wstring>* pathList = nullptr);
// Methods to reset the state.
void clear();
void clearStrings();
void unsubscribe(); // Removes all callbacks.
// Methods used for parsing.
void parseValue(size_t pathLen, ARRAY_ITEM* arrayItem = nullptr);
void parseObject(size_t pathLen, size_t namePos, size_t elemNameLen);
void parseArray(size_t pathLen, size_t namePos, size_t elemNameLen);
void parseString(STR& text);
void parseNumber(STR& number);
bool parseTrue();
bool parseFalse();
bool parseNull();
bool isNumericCharacter(char ch); // True if 'ch' may be part of a number (digit, decimal, sign...).
void updateCurrentPath(size_t& pathLen); // Updates the length of the current path according to the context.
// Notifies an event.
// The argument 'publisher' determines the type of event (new object, new array...).
// The arguments 'namePos', 'nameLen' and 'pathLen' describe the element that raised the event.
// If applicable, the argument 'value' contains the string to be sent to the client. Otherwise it is NULL.
void notify(Publisher* publisher, size_t namePos, size_t nameLen, size_t pathLen, STR* value = nullptr);
// Throws a runtime exception from a variable argument list.
[[noreturn]] static void throwException(const char* format, ...);
protected:
// Represents the JSON input stream.
JsonInput m_input;
// Publishers used to notify one type of event (new object, new array, etc.) to their subscribed callbacks.
Publisher m_onObjectBegin;
Publisher m_onObjectEnd;
Publisher m_onArrayBegin;
Publisher m_onArrayEnd;
Publisher m_onArrayItem;
Publisher m_onPair;
Publisher* m_currentPublisher; // Points to the publisher that is currently sending a notification.
// Strings used during the parsing of the JSON data.
STR m_elemName; // Stores the name of the last element found.
STR m_elemValue; // Stores the value of the last value (string, number or boolean) found.
STR m_path; // Stores the path of the current element being parsed.
STR m_currentElemName; // Auxiliary string that stores the name of the current element being notified.
// It is only updated when the client requests the current element's name.
// Flags
bool m_useLocale; // If true, UTF-8 strings are notified as non-Unicode multibyte strings.
bool m_notifyProgress; // If true, the progress is notified.
bool m_cancel; // If true, the parsing is interrupted.
// Stores the description of the last error.
std::string m_errDescription;
};