-
Notifications
You must be signed in to change notification settings - Fork 11
/
lowlevel-64bit.S
323 lines (264 loc) · 7.99 KB
/
lowlevel-64bit.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
/*
* PTLsim: Cycle Accurate x86-64 Simulator
* 64-bit low level functions
*
* Copyright 2003-2008 Matt T. Yourst <[email protected]>
*/
.text
.intel_syntax
#define __ASM_ONLY__
#include <ptlhwdef.h>
.section .ctors
.global __CTOR_LIST__
__CTOR_LIST__:
.previous
.extern ctx
.extern stack_min_addr
.extern x87state
.extern saved_cs
.extern saved_ss
.extern saved_ds
.extern saved_es
.extern saved_fs
.extern saved_gs
.global ptlsim_preinit_entry
ptlsim_preinit_entry:
#
# We may be a 64-bit process running in a 32-bit address space,
# which means argv, argc, etc. will be in 32-bit format and need
# to be converted before any initialization routines.
#
fxsave x87state
mov word ptr [saved_cs],%cs
mov word ptr [saved_ss],%ss
mov word ptr [saved_ds],%ds
mov word ptr [saved_es],%es
mov word ptr [saved_fs],%fs
mov word ptr [saved_gs],%gs
mov byte ptr [running_in_sim_mode],1
mov %rdi,%rsp # origrsp
mov %rsi,offset main # next_init_func
call ptlsim_preinit
test byte ptr [inside_ptlsim],1
jnz 1f
# Not inside PTLsim: just call main() directly (no stack switch)
# Put argc, argv, etc. into arg registers
mov %rdi,[%rsp + 0*8] # argc
lea %rsi,[%rsp + 1*8] # argv[]
call main #
int3 # (main never returns)
1:
mov %rbp,%rax # update to new stack pointer
/*
* Give user thread a really big stack by accessing memory
* below the grows-down stack object. We have to do this
* now since PTLsim has no concept of grow down auto allocate
* stacks and will just throw erroneous page faults unless
* the stack pages are already visible to mqueryall().
*/
xor %eax,%eax
mov %rsi,[stack_min_addr]
1:
sub %rsp,4096
mov [%rsp],%rax
cmp %rsp,%rsi
jae 1b
mov %rsp,%rbp # update to new stack pointer
# Put argc, argv, etc. into arg registers
mov %rdi,[%rsp + 0*8] # argc
lea %rsi,[%rsp + 1*8] # argv[]
call main
/*
* struct ThreadState {
* ThreadState* self;
* void* stack;
* ...
* };
*/
#define ThreadState_self 8*0
#define ThreadState_rsp 8*1
#define ThreadState_simcall 8*2
.extern ctx
.extern save_context_switch_to_sim
.extern x87state
.extern basetls
.extern running_in_sim_mode
.global save_context_switch_to_sim_lowlevel
save_context_switch_to_sim_lowlevel:
mov byte ptr [running_in_sim_mode],1
mov [ctx + 8*REG_rax + 8*0],%rax
mov [ctx + 8*REG_rax + 8*1],%rcx
mov [ctx + 8*REG_rax + 8*2],%rdx
mov [ctx + 8*REG_rax + 8*3],%rbx
mov [ctx + 8*REG_rax + 8*4],%rsp
mov [ctx + 8*REG_rax + 8*5],%rbp
mov [ctx + 8*REG_rax + 8*6],%rsi
mov [ctx + 8*REG_rax + 8*7],%rdi
mov [ctx + 8*REG_rax + 8*8],%r8
mov [ctx + 8*REG_rax + 8*9],%r9
mov [ctx + 8*REG_rax + 8*10],%r10
mov [ctx + 8*REG_rax + 8*11],%r11
mov [ctx + 8*REG_rax + 8*12],%r12
mov [ctx + 8*REG_rax + 8*13],%r13
mov [ctx + 8*REG_rax + 8*14],%r14
mov [ctx + 8*REG_rax + 8*15],%r15
mov %rax,[%rsp] # Get return %rip (if we got here through a CALL insn)
mov [ctx + 8*REG_rip],%rax # Save %rip
mov %rsp,[basetls + ThreadState_rsp] # Switch to private thread stack
pushfq # Save rflags
pop qword ptr [ctx + 8*REG_flags + 8] # Put flags into structure
# (skip tr0/tr1/tr2)
mov qword ptr [ctx + 8*REG_zero],0 # Save %zero
and %rsp,-16
fxsave x87state
mov word ptr [saved_cs],%cs
mov word ptr [saved_ss],%ss
mov word ptr [saved_ds],%ds
mov word ptr [saved_es],%es
mov word ptr [saved_fs],%fs
mov word ptr [saved_gs],%gs
sub %rsp,64
and %rsp,-16
call save_context_switch_to_sim
.data
switch_to_native_restore_context_temp_64_to_64:
.quad 0
switch_to_native_restore_context_temp_64_to_32:
.long 0
.word 0x23 # selector for 32-bit x86 code
switch_to_native_restore_context_temp_64_or_32_func:
.quad 0
.previous
# extern "C" void switch_to_native_restore_context_lowlevel(const UserContext& ctx, int switch_64_to_32);
# %rdi = ctx
# %rsi = switch_64_to_32
.global switch_to_native_restore_context_lowlevel
switch_to_native_restore_context_lowlevel:
# Calling convention:
# %rdi = pointer to state to restore
# %rsi = set if switching from 64-bit to 32-bit mode
fxrstor x87state
mov %rcx,offset switch_to_native_restore_context_64_to_64
mov %rdx,offset switch_to_native_restore_context_64_to_32
test %rsi,1 # Is user thread 64-bit?
cmovnz %rcx,%rdx
# Only overwrite seg regs if going back to 32-bit (behavior specific to Intel x86-64 implementation)
jz 1f
mov %fs,word ptr [saved_fs]
mov %gs,word ptr [saved_gs]
1:
mov [switch_to_native_restore_context_temp_64_or_32_func],%rcx
mov %rax,[%rdi + 8*REG_rip] # Load %rip
mov [switch_to_native_restore_context_temp_64_to_64],%rax # Save %rip for final jump
mov [switch_to_native_restore_context_temp_64_to_32],%eax # Save %rip for final jump
lea %rsp,[%rdi + 8*REG_flags] # Load address of flags
popfq # Restore flags
mov %rsp,[%rdi + 8*REG_rsp] # Restore user %rsp; now on user stack
mov %rax,[%rdi + 8*REG_rax + 8*0]
mov %rcx,[%rdi + 8*REG_rax + 8*1]
mov %rdx,[%rdi + 8*REG_rax + 8*2]
mov %rbx,[%rdi + 8*REG_rax + 8*3]
# mov %rsp,[%rdi + 8*REG_rax + 8*4] # (already done)
mov %rbp,[%rdi + 8*REG_rax + 8*5]
mov %rsi,[%rdi + 8*REG_rax + 8*6]
# mov %rdi,[%rdi + 8*REG_rax + 8*7] # (done at very end)
mov %r8,[%rdi + 8*REG_rax + 8*8]
mov %r9,[%rdi + 8*REG_rax + 8*9]
mov %r10,[%rdi + 8*REG_rax + 8*10]
mov %r11,[%rdi + 8*REG_rax + 8*11]
mov %r12,[%rdi + 8*REG_rax + 8*12]
mov %r13,[%rdi + 8*REG_rax + 8*13]
mov %r14,[%rdi + 8*REG_rax + 8*14]
mov %r15,[%rdi + 8*REG_rax + 8*15]
mov %rdi,[%rdi + 8*REG_rdi] # Restore %rdi
jmp qword ptr [switch_to_native_restore_context_temp_64_or_32_func]
switch_to_native_restore_context_64_to_32:
mov byte ptr [running_in_sim_mode],0 # store must be atomic
jmp fword ptr [switch_to_native_restore_context_temp_64_to_32]
switch_to_native_restore_context_64_to_64:
mov byte ptr [running_in_sim_mode],0 # store must be atomic
jmp qword ptr [switch_to_native_restore_context_temp_64_to_64]
.global inside_sim_escape_code_template_64bit
.global inside_sim_escape_code_template_64bit_end
inside_sim_escape_code_template_64bit:
# Pass args are already in registers: rdi rsi rdx rcx r8 r9
# Undocumented x86 escape opcode to do PTL calls
.byte 0x0f
.byte 0x37
ret
inside_sim_escape_code_template_64bit_end:
/*
* from arch/x86_64/lib/memcpy.S
* memcpy - Copy a memory block.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* rax original destination
*/
.att_syntax
.globl __memcpy
.globl memcpy
.p2align 4
__memcpy:
memcpy:
pushq %rbx
movq %rdi,%rax
movl %edx,%ecx
shrl $6,%ecx
jz .Lhandle_tail
.p2align 4
.Lloop_64:
decl %ecx
movq (%rsi),%r11
movq 8(%rsi),%r8
movq %r11,(%rdi)
movq %r8,1*8(%rdi)
movq 2*8(%rsi),%r9
movq 3*8(%rsi),%r10
movq %r9,2*8(%rdi)
movq %r10,3*8(%rdi)
movq 4*8(%rsi),%r11
movq 5*8(%rsi),%r8
movq %r11,4*8(%rdi)
movq %r8,5*8(%rdi)
movq 6*8(%rsi),%r9
movq 7*8(%rsi),%r10
movq %r9,6*8(%rdi)
movq %r10,7*8(%rdi)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
jnz .Lloop_64
.Lhandle_tail:
movl %edx,%ecx
andl $63,%ecx
shrl $3,%ecx
jz .Lhandle_7
.p2align 4
.Lloop_8:
decl %ecx
movq (%rsi),%r8
movq %r8,(%rdi)
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jnz .Lloop_8
.Lhandle_7:
movl %edx,%ecx
andl $7,%ecx
jz .Lende
.p2align 4
.Lloop_1:
movb (%rsi),%r8b
movb %r8b,(%rdi)
incq %rdi
incq %rsi
decl %ecx
jnz .Lloop_1
.Lende:
popq %rbx
ret
.Lfinal:
.intel_syntax