This repository has been archived by the owner on Jul 3, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse.rb
319 lines (272 loc) · 8.32 KB
/
parse.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#!/usr/bin/env ruby
# This file is called rdparse.rb because it implements a Recursive
# Descent Parser. Read more about the theory on e.g.
# http://en.wikipedia.org/wiki/Recursive_descent_parser
# 2010-02-11 New version of this file for the 2010 instance of TDP007
# which handles false return values during parsing, and has an easy way
# of turning on and off debug messages.
# 2014-02-16 New version that handles { false } blocks and :empty tokens.
require 'logger'
class Rule
# A rule is created through the rule method of the Parser class, like this:
# rule :term do
# match(:term, '*', :dice) {|a, _, b| a * b }
# match(:term, '/', :dice) {|a, _, b| a / b }
# match(:dice)
# end
Match = Struct.new :pattern, :block
def initialize(name, parser)
@logger = parser.logger
# The name of the expressions this rule matches
@name = name
# We need the parser to recursively parse sub-expressions occurring
# within the pattern of the match objects associated with this rule
@parser = parser
@matches = []
# Left-recursive matches
@lrmatches = []
end
# Add a matching expression to this rule, as in this example:
# match(:term, '*', :dice) {|a, _, b| a * b }
# The arguments to 'match' describe the constituents of this expression.
def match(*pattern, &block)
match = Match.new(pattern, block)
# If the pattern is left-recursive, then add it to the left-recursive set
if pattern[0] == @name
pattern.shift
@lrmatches << match
else
@matches << match
end
end
def parse
# Try non-left-recursive matches first, to avoid infinite recursion
match_result = try_matches(@matches)
return nil if match_result.nil?
loop do
result = try_matches(@lrmatches, match_result)
return match_result if result.nil?
match_result = result
end
end
private
# Try out all matching patterns of this rule
def try_matches(matches, pre_result = nil)
match_result = nil
# Begin at the current position in the input string of the parser
start = @parser.pos
matches.each do |match|
# pre_result is a previously available result from evaluating expressions
result = pre_result.nil? ? [] : [pre_result]
# We iterate through the parts of the pattern, which may be e.g.
# [:expr,'*',:term]
match.pattern.each_with_index do |token,index|
# If this "token" is a compound term, add the result of
# parsing it to the "result" array
if @parser.rules[token]
result << @parser.rules[token].parse
if result.last.nil?
result = nil
break
end
@logger.debug("Matched '#{@name} = #{match.pattern[index..-1].inspect}'")
else
# Otherwise, we consume the token as part of applying this rule
nt = @parser.expect(token)
if nt
result << nt
if @lrmatches.include?(match.pattern) then
pattern = [@name]+match.pattern
else
pattern = match.pattern
end
@logger.debug("Matched token '#{nt}' as part of rule '#{@name} <= #{pattern.inspect}'")
else
result = nil
break
end
end
end
if result
if match.block
match_result = match.block.call(*result)
else
match_result = result[0]
end
@logger.debug("'#{@parser.string[[email protected]]}' matched '#{@name}' and generated '#{match_result.inspect}'") unless match_result.nil?
break
else
# If this rule did not match the current token list, move
# back to the scan position of the last match
@parser.pos = start
end
end
return match_result
end
end
class Parser
attr_accessor :pos
attr_reader :rules, :string, :logger
class ParseError < RuntimeError
end
def initialize(language_name, &block)
@logger = Logger.new(STDOUT)
@lex_tokens = []
@rules = {}
@start = nil
@language_name = language_name
instance_eval(&block)
end
# Tokenize the string into small pieces
def tokenize(string)
@tokens = []
@string = string.clone
until string.empty?
# Unless any of the valid tokens of our language are the prefix of
# 'string', we fail with an exception
raise ParseError, "unable to lex '#{string}" unless @lex_tokens.any? do |tok|
match = tok.pattern.match(string)
# The regular expression of a token has matched the beginning of 'string'
if match
@logger.debug("Token #{match[0]} consumed")
# Also, evaluate this expression by using the block
# associated with the token
@tokens << tok.block.call(match.to_s) if tok.block
# consume the match and proceed with the rest of the string
string = match.post_match
true
else
# this token pattern did not match, try the next
false
end # if
end # raise
end # until
end
def parse(string)
# First, split the string according to the "token" instructions given.
# Afterwards @tokens contains all tokens that are to be parsed.
tokenize(string)
# These variables are used to match if the total number of tokens
# are consumed by the parser
@pos = 0
@max_pos = 0
@expected = []
# Parse (and evaluate) the tokens received
result = @start.parse
# If there are unparsed extra tokens, signal error
if @pos != @tokens.size
p "TOKENS #{@tokens}"
raise ParseError, "Parse error. expected: '#{@expected.join(', ')}', found '#{@tokens[@max_pos]}'"
end
return result
end
def next_token
@pos += 1
return @tokens[@pos - 1]
end
# Return the next token in the queue
def expect(tok)
return tok if tok == :empty
t = next_token
if @pos - 1 > @max_pos
@max_pos = @pos - 1
@expected = []
end
return t if tok === t
@expected << tok if @max_pos == @pos - 1 && [email protected]?(tok)
return nil
end
def to_s
"Parser for #{@language_name}"
end
private
LexToken = Struct.new(:pattern, :block)
def token(pattern, &block)
@lex_tokens << LexToken.new(Regexp.new('\\A' + pattern.source), block)
end
def start(name, &block)
rule(name, &block)
@start = @rules[name]
end
def rule(name,&block)
@current_rule = Rule.new(name, self)
@rules[name] = @current_rule
instance_eval &block
@current_rule = nil
end
def match(*pattern, &block)
@current_rule.send(:match, *pattern, &block)
end
end
##############################################################################
#
# This part defines the dice language
#
##############################################################################
class DiceRoller
def DiceRoller.roll(times, sides)
(1..times).inject(0) {|sum, _| sum + rand(sides) + 1 }
end
def initialize
@diceParser = Parser.new("dice roller") do
token(/\s+/)
token(/\d+/) {|m| m.to_i }
token(/./) {|m| m }
start :expr do
match(:expr, '+', :term) {|a, _, b| a + b }
match(:expr, '-', :term) {|a, _, b| a - b }
match(:term)
end
rule :term do
match(:term, '*', :dice) {|a, _, b| a * b }
match(:term, '/', :dice) {|a, _, b| a / b }
match(:dice)
end
rule :dice do
match(:atom, 'd', :sides) {|a, _, b| DiceRoller.roll(a, b) }
match('d', :sides) {|_, b| DiceRoller.roll(1, b) }
match(:atom)
end
rule :sides do
match('%') { 100 }
match(:atom)
end
rule :atom do
# Match the result of evaluating an integer expression, which
# should be an Integer
match(Integer)
match('(', :expr, ')') {|_, a, _| a }
end
end
end
def done(str)
["quit","exit","bye",""].include?(str.chomp)
end
def roll
print "[diceroller] "
str = gets
if done(str) then
puts "Bye."
else
puts "=> #{@diceParser.parse str}"
roll
end
end
def log(state = true)
if state
@diceParser.logger.level = Logger::DEBUG
else
@diceParser.logger.level = Logger::WARN
end
end
end
# Examples of use
# irb(main):1696:0> DiceRoller.new.roll
# [diceroller] 1+3
# => 4
# [diceroller] 1+d4
# => 2
# [diceroller] 1+d4
# => 3
# [diceroller] (2+8*d20)*3d6
# => 306