-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhex1tobin.py
executable file
·179 lines (163 loc) · 6.33 KB
/
hex1tobin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env python
#
# Copyright (C) 2019 Mark Jenkins <[email protected]>
# This file is part of knightpies
#
# knightpies is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# knightpies is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with knightpies. If not, see <http://www.gnu.org/licenses/>.
from __future__ import generators # for yield keyword in python 2.2
from string import hexdigits
from pythoncompat import write_byte, open_ascii, COMPAT_FALSE, COMPAT_TRUE
STATE_MAIN, STATE_DECLARE, STATE_REF, STATE_COMMENT, STATE_EOF = range(5)
TOK_HEX, TOK_LABEL, TOK_REF = range(3)
TRANSITIONS = {
STATE_MAIN: {
';': (STATE_COMMENT, None),
'#': (STATE_COMMENT, None),
':': (STATE_DECLARE, None),
'@': (STATE_REF, None),
None: (STATE_MAIN, TOK_HEX)
},
STATE_DECLARE: {
None: (STATE_MAIN, TOK_LABEL)
},
STATE_REF: {
None: (STATE_MAIN, TOK_REF)
},
STATE_COMMENT: {
'\n': (STATE_MAIN, None),
None: (STATE_COMMENT, None),
},
}
# Confirming a relevant fact of ascii encoding which stage1_assembler-1 relies
# on, almost all of the characters between 'G' and prior to
# 'a' (decimal 97) do not have their 6th bit set (2**5==32)
# so you can mask 0xdf== int('11011111', 2) to do a lower to
# upper case conversion of a-f
#
# the exception is decimal 64+32==96, which is backtick "`"
#
# stage1_assember-1 handles both lower and uppercase a-f/A-F by masking
# 0xdf==int('11011111', 2) and checking aginst decimal 70 [ord('F')]
# backtick is caught up in this and ends up acting like '9'
if __debug__:
assert ord('G') == 71
assert ord('_') == 95
for i in range(ord('G'), ord('_')+1):
assert (i & int('11011111', 2) ) > ord('F')
assert ord("`") == (64+32) # 96
assert ( ord("`")>ord('F') and
ord("`") & int('11011111', 2) <= ord('F') )
UPPER_HEX_TO_DECIMAL = ord('A') - int('A', 16)
assert UPPER_HEX_TO_DECIMAL == 55
def get_next_token_and_state(c, state):
assert state != STATE_EOF
if len(c)==0:
return (None, None), STATE_EOF
else:
next_state, token_type = \
TRANSITIONS[state].get( c, TRANSITIONS[state][None] )
if token_type == None:
token = (None, c)
elif token_type == TOK_HEX:
if c in hexdigits:
token = (TOK_HEX, c)
# we replicate the funky behavior of stage1_assembler-1
# which treates backtick "`" (ascii decimal 96) like '9'
# because upper and lower case A-F/a-f are handled the same
# by way of a upper to lower case conversion by
# masking out the 6th bit 2**5==32, comparing against
# 'F' (ascii decimal 70), and subtracting 55
# to convert from ascii A-F/a-f to decimal
elif ( ord(c)>ord('F') and
ord(c) & int('11011111', 2) <= ord('F') ):
# the only character in ascii that meets these conditions
assert(c) == '`'
lower_to_upper_conversion = ord(c) & int('11011111', 2)
hex_to_decimal_conversion = (
lower_to_upper_conversion - UPPER_HEX_TO_DECIMAL) & 0xF
decimal_to_hexchar = hex(hex_to_decimal_conversion)[2:]
assert len(decimal_to_hexchar)==1 # the 0xF mask assures this
token = (
TOK_HEX,
decimal_to_hexchar)
assert token[1] == '9' # what "`" will encode to
else:
token = (None, c)
else:
token = (token_type, c)
return (token, next_state)
def read_char_and_get_next_token_and_state(fileobj, state):
assert state != STATE_EOF
c = fileobj.read(1)
return get_next_token_and_state(c, state)
def tokenize_file(fileobj):
state = STATE_MAIN
while state != STATE_EOF:
next_tok, next_state = read_char_and_get_next_token_and_state(
fileobj, state)
if next_state!=STATE_EOF and next_tok[0] != None:
yield next_tok
state = next_state
def get_label_table(input_file):
input_file.seek(0)
ip = 0
first_nyble = COMPAT_TRUE
labels = {}
for token_type, c in tokenize_file(input_file):
if token_type == TOK_HEX:
if first_nyble:
first_nyble = COMPAT_FALSE
else:
first_nyble = COMPAT_TRUE
ip+=1
elif token_type == TOK_LABEL:
labels[c] = ip
elif token_type == TOK_REF:
ip+=2
return labels
def int_bytes_from_hex1_fd(input_file):
label_table = get_label_table(input_file)
input_file.seek(0) # start again for a second pass
ip = 0
first_nyble = COMPAT_TRUE
accumulator = 0
for token_type, c in tokenize_file(input_file):
if token_type == TOK_HEX:
accumulator += int(c, 16)
if first_nyble:
accumulator = accumulator << 4
first_nyble = COMPAT_FALSE
else:
first_nyble = COMPAT_TRUE
yield accumulator
accumulator = 0
ip+=1
elif token_type == TOK_REF:
ip+=2
label_abs_address = label_table[c]
label_rel_address = label_abs_address - ip
yield (label_rel_address>>8) & 0xFF
yield label_rel_address & 0xFF
def write_binary_filefd_from_hex1_filefd(input_file, output_file):
for output_byte in int_bytes_from_hex1_fd(input_file):
write_byte(output_file, output_byte)
def write_binary_file_from_hex1_file(input_filename, output_filename):
input_file = open_ascii(input_filename)
output_file = open(output_filename, 'wb') # binary output
write_binary_filefd_from_hex1_filefd(input_file, output_file)
output_file.close()
input_file.close()
if __name__ == "__main__":
from sys import argv
write_binary_file_from_hex1_file(*argv[1:2+1])