-
Notifications
You must be signed in to change notification settings - Fork 3
/
iterators.h
202 lines (174 loc) · 6.22 KB
/
iterators.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
// Copyright 2007 Google Inc. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
//
// Author: [email protected] (Stephen Chen)
#ifndef TOOLS_TAGS_ITERATORS_H__
#define TOOLS_TAGS_ITERATORS_H__
#include "strutil.h"
#include "tagsutil.h"
// CharacterIterator specifies an interface for one-way const
// iteration over a set of characters. It's used as a generic input
// source for SExpression::ParseSexp. We guarantee that the stream
// returned by the CharacterIterator contains zero or more non-\0
// characters, followed by a \0, and IsDone() is true only when the
// cursor is at \0.
//
// We provide, below, implementations to iterate through strings and
// files.
//
// Sample usage:
//
// for (CharacterIterator* c_iter = new ...; !c_iter->IsDone(); ++c_iter) {
// char c = *c_iter;
// ...
// }
class CharacterIterator {
public:
virtual ~CharacterIterator() { }
// Dereferences the iterator to a character.
virtual const char operator*() const = 0;
// Advances to the next character in the sequence.
virtual void operator++() = 0;
// Returns true if no more characters can be read. If IsDone(), ++
// has undefined effects. IsDone() should be synonymous with
// *char_iter == '\0'.
virtual bool IsDone() const = 0;
protected:
CharacterIterator() {}
};
// Simple class to support iterating over the characters of a c-str.
class StringCharacterIterator : public CharacterIterator {
public:
// Creates a new iterator over the given c-str.
explicit StringCharacterIterator(const char* str)
: CharacterIterator(),
cursor_(str) {}
virtual const char operator*() const { return *cursor_; }
virtual void operator++() { ++cursor_; }
virtual bool IsDone() const { return cursor_ == '\0'; }
private:
// Points to the next character to read.
const char* cursor_;
};
// Abstract class that iterates over the characters read from a file
// object. Implementing classes can set the file object to take
// input from different sources.
class FileObjectCharacterIterator : public CharacterIterator {
public:
// Closes the file when we're done.
virtual ~FileObjectCharacterIterator() { fclose(file_); }
virtual const char operator*() const {
return (current_char_ == EOF) ? '\0' : current_char_;
}
virtual void operator++() { LoadNextChar(); }
virtual bool IsDone() const { return current_char_ == EOF; }
protected:
// Implementing classes should call Init() in their constructors
// and pass in the file object to be read.
explicit FileObjectCharacterIterator() : CharacterIterator() {}
void Init(FILE* file) {
file_ = file;
LoadNextChar();
}
private:
// Gets the next char from file, checking to see there was not a
// read error.
inline void LoadNextChar() {
current_char_ = fgetc(file_);
// Signal when there is a file error
CHECK(!(current_char_ == EOF && ferror(file_)));
}
// FILE object we're reading from
FILE* file_;
// Always contains the next character to return
int current_char_;
};
// Iterates over the characters of a file.
class FileCharacterIterator : public FileObjectCharacterIterator {
public:
// Creates a new iterator reading from the specified file.
explicit FileCharacterIterator(string filename)
: FileObjectCharacterIterator() {
FILE * file = fopen(filename.c_str(), "r");
CHECK(file != NULL) << "Could not open file " << filename;
Init(file);
}
virtual ~FileCharacterIterator() { }
};
// gunzips a file stream and reads the characters
class GzippedFileCharacterIterator : public FileObjectCharacterIterator {
public:
// Creates a new iterator reading from the specified file.
explicit GzippedFileCharacterIterator(string filename)
: FileObjectCharacterIterator() {
//Check file existance
FILE * file = fopen(filename.c_str(), "r");
CHECK(file != NULL);
fclose(file);
// Pipe in the output from "gunzip -c FILENAME".
string cmd;
cmd.append("gunzip -c ");
cmd.append(filename.c_str());
Init(popen(cmd.c_str(), "r"));
}
virtual ~GzippedFileCharacterIterator() { }
};
// Moves *psexp so that it points to the first non-whitespace
// pointer in the string.
template <typename T>
static inline void SkipWhitespace(T* psexp) {
while (ascii_isspace(**psexp))
++(*psexp);
}
// FileReader reads the s-expressions from a file sequentially. Sample
// usage:
//
// SExpression::FileReader f("/path/to/file");
// while (!f.IsDone()) {
// SExpression* s = f.GetNext();
// ...
// }
template<class T>
class FileReader {
public:
// Creates new reader reading from the specified file.
FileReader(string filename)
: pchar_iter(new FileCharacterIterator(filename)) {}
// Creates new reader reading from the specified file, optionally
// enabling a gunzip filter on the input.
FileReader(string filename, bool enable_gunzip) {
if (enable_gunzip)
pchar_iter = new GzippedFileCharacterIterator(filename);
else
pchar_iter = new FileCharacterIterator(filename);
}
// Deletes the underlying iterator when we're done.
~FileReader() { delete pchar_iter; }
// Returns the next s-expression found in the file.
T* GetNext() { return T::ParseFromCharIterator(pchar_iter); }
// Returns true if there is nothing more to read from the file.
bool IsDone() {
// Skip over whitespace so we can tell whether there are
// subsequent s-expressions, or just EOF.
SkipWhitespace(pchar_iter);
return pchar_iter->IsDone();
}
private:
// This is the underlying iterator used to get all the characters in
// the file.
CharacterIterator* pchar_iter;
};
#endif // TOOLS_TAGS_ITERATORS_H__