1
- use rustc_abi:: { Align , Endian , HasDataLayout , Size } ;
1
+ use rustc_abi:: { Align , BackendRepr , Endian , HasDataLayout , Primitive , Size , TyAndLayout } ;
2
+ use rustc_codegen_ssa:: MemFlags ;
2
3
use rustc_codegen_ssa:: common:: IntPredicate ;
3
4
use rustc_codegen_ssa:: mir:: operand:: OperandRef ;
4
- use rustc_codegen_ssa:: traits:: { BaseTypeCodegenMethods , BuilderMethods , ConstCodegenMethods } ;
5
+ use rustc_codegen_ssa:: traits:: {
6
+ BaseTypeCodegenMethods , BuilderMethods , ConstCodegenMethods , LayoutTypeCodegenMethods ,
7
+ } ;
5
8
use rustc_middle:: ty:: Ty ;
6
9
use rustc_middle:: ty:: layout:: { HasTyCtxt , LayoutOf } ;
7
10
@@ -303,6 +306,298 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
303
306
bx. load ( val_type, val_addr, layout. align . abi )
304
307
}
305
308
309
+ fn emit_x86_64_sysv64_va_arg < ' ll , ' tcx > (
310
+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
311
+ list : OperandRef < ' tcx , & ' ll Value > ,
312
+ target_ty : Ty < ' tcx > ,
313
+ ) -> & ' ll Value {
314
+ let dl = bx. cx . data_layout ( ) ;
315
+
316
+ // Implementation of the systemv x86_64 ABI calling convention for va_args, see
317
+ // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily
318
+ // based on the one in clang.
319
+
320
+ // We're able to take some shortcuts because the return type of `va_arg` must implement the
321
+ // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait.
322
+
323
+ // typedef struct __va_list_tag {
324
+ // unsigned int gp_offset;
325
+ // unsigned int fp_offset;
326
+ // void *overflow_arg_area;
327
+ // void *reg_save_area;
328
+ // } va_list[1];
329
+ let va_list_addr = list. immediate ( ) ;
330
+
331
+ // Peel off any newtype wrappers.
332
+ let layout = {
333
+ let mut layout = bx. cx . layout_of ( target_ty) ;
334
+
335
+ while let Some ( ( _, inner) ) = layout. non_1zst_field ( bx. cx ) {
336
+ layout = inner;
337
+ }
338
+
339
+ layout
340
+ } ;
341
+
342
+ // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
343
+ // in the registers. If not go to step 7.
344
+
345
+ // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
346
+ // general purpose registers needed to pass type and num_fp to hold
347
+ // the number of floating point registers needed.
348
+
349
+ let mut num_gp_registers = 0 ;
350
+ let mut num_fp_registers = 0 ;
351
+
352
+ let mut registers_for_primitive = |p| match p {
353
+ Primitive :: Int ( integer, _is_signed) => {
354
+ num_gp_registers += integer. size ( ) . bytes ( ) . div_ceil ( 8 ) as u32 ;
355
+ }
356
+ Primitive :: Float ( float) => {
357
+ num_fp_registers += float. size ( ) . bytes ( ) . div_ceil ( 16 ) as u32 ;
358
+ }
359
+ Primitive :: Pointer ( _) => {
360
+ num_gp_registers += 1 ;
361
+ }
362
+ } ;
363
+
364
+ match layout. layout . backend_repr ( ) {
365
+ BackendRepr :: Scalar ( scalar) => {
366
+ registers_for_primitive ( scalar. primitive ( ) ) ;
367
+ }
368
+ BackendRepr :: ScalarPair ( scalar1, scalar2) => {
369
+ registers_for_primitive ( scalar1. primitive ( ) ) ;
370
+ registers_for_primitive ( scalar2. primitive ( ) ) ;
371
+ }
372
+ BackendRepr :: SimdVector { .. } => {
373
+ // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
374
+ unreachable ! (
375
+ "No x86-64 SysV va_arg implementation for {:?}" ,
376
+ layout. layout. backend_repr( )
377
+ )
378
+ }
379
+ BackendRepr :: Memory { .. } => {
380
+ let mem_addr = x86_64_sysv64_va_arg_from_memory ( bx, va_list_addr, layout) ;
381
+ return bx. load ( layout. llvm_type ( bx) , mem_addr, layout. align . abi ) ;
382
+ }
383
+ } ;
384
+
385
+ // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
386
+ // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
387
+ // l->fp_offset > 176 - num_fp * 16 go to step 7.
388
+
389
+ let unsigned_int_offset = 4 ;
390
+ let ptr_offset = 8 ;
391
+ let gp_offset_ptr = va_list_addr;
392
+ let fp_offset_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( unsigned_int_offset) ) ;
393
+
394
+ let gp_offset_v = bx. load ( bx. type_i32 ( ) , gp_offset_ptr, Align :: from_bytes ( 8 ) . unwrap ( ) ) ;
395
+ let fp_offset_v = bx. load ( bx. type_i32 ( ) , fp_offset_ptr, Align :: from_bytes ( 4 ) . unwrap ( ) ) ;
396
+
397
+ let mut use_regs = bx. const_bool ( false ) ;
398
+
399
+ if num_gp_registers > 0 {
400
+ let max_offset_val = 48u32 - num_gp_registers * 8 ;
401
+ let fits_in_gp = bx. icmp ( IntPredicate :: IntULE , gp_offset_v, bx. const_u32 ( max_offset_val) ) ;
402
+ use_regs = fits_in_gp;
403
+ }
404
+
405
+ if num_fp_registers > 0 {
406
+ let max_offset_val = 176u32 - num_fp_registers * 16 ;
407
+ let fits_in_fp = bx. icmp ( IntPredicate :: IntULE , fp_offset_v, bx. const_u32 ( max_offset_val) ) ;
408
+ use_regs = if num_gp_registers > 0 { bx. and ( use_regs, fits_in_fp) } else { fits_in_fp } ;
409
+ }
410
+
411
+ let in_reg = bx. append_sibling_block ( "va_arg.in_reg" ) ;
412
+ let in_mem = bx. append_sibling_block ( "va_arg.in_mem" ) ;
413
+ let end = bx. append_sibling_block ( "va_arg.end" ) ;
414
+
415
+ bx. cond_br ( use_regs, in_reg, in_mem) ;
416
+
417
+ // Emit code to load the value if it was passed in a register.
418
+ bx. switch_to_block ( in_reg) ;
419
+
420
+ // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
421
+ // an offset of l->gp_offset and/or l->fp_offset. This may require
422
+ // copying to a temporary location in case the parameter is passed
423
+ // in different register classes or requires an alignment greater
424
+ // than 8 for general purpose registers and 16 for XMM registers.
425
+ //
426
+ // FIXME(llvm): This really results in shameful code when we end up needing to
427
+ // collect arguments from different places; often what should result in a
428
+ // simple assembling of a structure from scattered addresses has many more
429
+ // loads than necessary. Can we clean this up?
430
+ let reg_save_area_ptr =
431
+ bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( 2 * unsigned_int_offset + ptr_offset) ) ;
432
+ let reg_save_area_v = bx. load ( bx. type_ptr ( ) , reg_save_area_ptr, dl. pointer_align . abi ) ;
433
+
434
+ let reg_addr = match layout. layout . backend_repr ( ) {
435
+ BackendRepr :: Scalar ( scalar) => match scalar. primitive ( ) {
436
+ Primitive :: Int ( _, _) | Primitive :: Pointer ( _) => {
437
+ let reg_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
438
+
439
+ // Copy into a temporary if the type is more aligned than the register save area.
440
+ copy_to_temporary_if_more_aligned ( bx, reg_addr, layout)
441
+ }
442
+ Primitive :: Float ( _) => bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ,
443
+ } ,
444
+ BackendRepr :: ScalarPair ( scalar1, scalar2) => {
445
+ let ty_lo = bx. cx ( ) . scalar_pair_element_backend_type ( layout, 0 , false ) ;
446
+ let ty_hi = bx. cx ( ) . scalar_pair_element_backend_type ( layout, 1 , false ) ;
447
+
448
+ let align_lo = layout. field ( bx. cx , 0 ) . layout . align ( ) . abi ;
449
+ let align_hi = layout. field ( bx. cx , 1 ) . layout . align ( ) . abi ;
450
+
451
+ match ( scalar1. primitive ( ) , scalar2. primitive ( ) ) {
452
+ ( Primitive :: Float ( _) , Primitive :: Float ( _) ) => {
453
+ // SSE registers are spaced 16 bytes apart in the register save
454
+ // area, we need to collect the two eightbytes together.
455
+ // The ABI isn't explicit about this, but it seems reasonable
456
+ // to assume that the slots are 16-byte aligned, since the stack is
457
+ // naturally 16-byte aligned and the prologue is expected to store
458
+ // all the SSE registers to the RSA.
459
+ let reg_lo_addr = bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ;
460
+ let reg_hi_addr = bx. inbounds_ptradd ( reg_lo_addr, bx. const_i32 ( 16 ) ) ;
461
+
462
+ let align = layout. layout . align ( ) . abi ;
463
+ let tmp = bx. alloca ( layout. layout . size ( ) , align) ;
464
+
465
+ let reg_lo = bx. load ( ty_lo, reg_lo_addr, align_lo) ;
466
+ let reg_hi = bx. load ( ty_hi, reg_hi_addr, align_hi) ;
467
+
468
+ let offset = scalar1. size ( bx. cx ) . align_to ( align_hi) . bytes ( ) ;
469
+ let field0 = tmp;
470
+ let field1 = bx. inbounds_ptradd ( tmp, bx. const_u32 ( offset as u32 ) ) ;
471
+
472
+ bx. store ( reg_lo, field0, align) ;
473
+ bx. store ( reg_hi, field1, align) ;
474
+
475
+ tmp
476
+ }
477
+ ( Primitive :: Float ( _) , _) | ( _, Primitive :: Float ( _) ) => {
478
+ let gp_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
479
+ let fp_addr = bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ;
480
+
481
+ let ( reg_lo_addr, reg_hi_addr) = match scalar1. primitive ( ) {
482
+ Primitive :: Float ( _) => ( fp_addr, gp_addr) ,
483
+ Primitive :: Int ( _, _) | Primitive :: Pointer ( _) => ( gp_addr, fp_addr) ,
484
+ } ;
485
+
486
+ let tmp = bx. alloca ( layout. layout . size ( ) , layout. layout . align ( ) . abi ) ;
487
+
488
+ let reg_lo = bx. load ( ty_lo, reg_lo_addr, align_lo) ;
489
+ let reg_hi = bx. load ( ty_hi, reg_hi_addr, align_hi) ;
490
+
491
+ let offset = scalar1. size ( bx. cx ) . align_to ( align_hi) . bytes ( ) ;
492
+ let field0 = tmp;
493
+ let field1 = bx. inbounds_ptradd ( tmp, bx. const_u32 ( offset as u32 ) ) ;
494
+
495
+ bx. store ( reg_lo, field0, align_lo) ;
496
+ bx. store ( reg_hi, field1, align_hi) ;
497
+
498
+ tmp
499
+ }
500
+ ( _, _) => {
501
+ // Two integer/pointer values are just contiguous in memory.
502
+ let reg_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
503
+
504
+ // Copy into a temporary if the type is more aligned than the register save area.
505
+ copy_to_temporary_if_more_aligned ( bx, reg_addr, layout)
506
+ }
507
+ }
508
+ }
509
+ BackendRepr :: SimdVector { .. } => {
510
+ unreachable ! ( "panics in the previous match on `backend_repr`" )
511
+ }
512
+ BackendRepr :: Memory { .. } => {
513
+ unreachable ! ( "early returns in the previous match on `backend_repr`" )
514
+ }
515
+ } ;
516
+
517
+ // AMD64-ABI 3.5.7p5: Step 5. Set:
518
+ // l->gp_offset = l->gp_offset + num_gp * 8
519
+ if num_gp_registers > 0 {
520
+ let offset = bx. const_u32 ( num_gp_registers * 8 ) ;
521
+ let sum = bx. add ( gp_offset_v, offset) ;
522
+ bx. store ( sum, gp_offset_ptr, Align :: from_bytes ( 8 ) . unwrap ( ) ) ;
523
+ }
524
+
525
+ // l->fp_offset = l->fp_offset + num_fp * 16.
526
+ if num_fp_registers > 0 {
527
+ let offset = bx. const_u32 ( num_fp_registers * 16 ) ;
528
+ let sum = bx. add ( fp_offset_v, offset) ;
529
+ bx. store ( sum, fp_offset_ptr, Align :: from_bytes ( 4 ) . unwrap ( ) ) ;
530
+ }
531
+
532
+ bx. br ( end) ;
533
+
534
+ bx. switch_to_block ( in_mem) ;
535
+ let mem_addr = x86_64_sysv64_va_arg_from_memory ( bx, va_list_addr, layout) ;
536
+ bx. br ( end) ;
537
+
538
+ bx. switch_to_block ( end) ;
539
+
540
+ let val_type = layout. llvm_type ( bx) ;
541
+ let val_addr = bx. phi ( bx. type_ptr ( ) , & [ reg_addr, mem_addr] , & [ in_reg, in_mem] ) ;
542
+
543
+ bx. load ( val_type, val_addr, layout. align . abi )
544
+ }
545
+
546
+ /// Copy into a temporary if the type is more aligned than the register save area.
547
+ fn copy_to_temporary_if_more_aligned < ' ll , ' tcx > (
548
+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
549
+ reg_addr : & ' ll Value ,
550
+ layout : TyAndLayout < ' tcx , Ty < ' tcx > > ,
551
+ ) -> & ' ll Value {
552
+ if layout. layout . align . abi . bytes ( ) > 8 {
553
+ let tmp = bx. alloca ( layout. layout . size ( ) , layout. layout . align ( ) . abi ) ;
554
+ bx. memcpy (
555
+ tmp,
556
+ layout. layout . align . abi ,
557
+ reg_addr,
558
+ Align :: from_bytes ( 8 ) . unwrap ( ) ,
559
+ bx. const_u32 ( layout. layout . size ( ) . bytes ( ) as u32 ) ,
560
+ MemFlags :: empty ( ) ,
561
+ ) ;
562
+ tmp
563
+ } else {
564
+ reg_addr
565
+ }
566
+ }
567
+
568
+ fn x86_64_sysv64_va_arg_from_memory < ' ll , ' tcx > (
569
+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
570
+ va_list_addr : & ' ll Value ,
571
+ layout : TyAndLayout < ' tcx , Ty < ' tcx > > ,
572
+ ) -> & ' ll Value {
573
+ let dl = bx. cx . data_layout ( ) ;
574
+
575
+ let overflow_arg_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. const_usize ( 8 ) ) ;
576
+
577
+ let overflow_arg_area_v = bx. load ( bx. type_ptr ( ) , overflow_arg_area_ptr, dl. pointer_align . abi ) ;
578
+ // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
579
+ // byte boundary if alignment needed by type exceeds 8 byte boundary.
580
+ // It isn't stated explicitly in the standard, but in practice we use
581
+ // alignment greater than 16 where necessary.
582
+ if layout. layout . align . abi . bytes ( ) > 8 {
583
+ unreachable ! ( "all instances of VaArgSafe have an alignment <= 8" ) ;
584
+ }
585
+
586
+ // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
587
+ let mem_addr = overflow_arg_area_v;
588
+
589
+ // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
590
+ // l->overflow_arg_area + sizeof(type).
591
+ // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
592
+ // an 8 byte boundary.
593
+ let size_in_bytes = layout. layout . size ( ) . bytes ( ) ;
594
+ let offset = bx. const_i32 ( size_in_bytes. next_multiple_of ( 8 ) as i32 ) ;
595
+ let overflow_arg_area = bx. inbounds_ptradd ( overflow_arg_area_v, offset) ;
596
+ bx. store ( overflow_arg_area, overflow_arg_area_ptr, dl. pointer_align . abi ) ;
597
+
598
+ mem_addr
599
+ }
600
+
306
601
fn emit_xtensa_va_arg < ' ll , ' tcx > (
307
602
bx : & mut Builder < ' _ , ' ll , ' tcx > ,
308
603
list : OperandRef < ' tcx , & ' ll Value > ,
@@ -334,8 +629,7 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
334
629
// (*va).va_ndx
335
630
let va_reg_offset = 4 ;
336
631
let va_ndx_offset = va_reg_offset + 4 ;
337
- let offset_ptr =
338
- bx. inbounds_gep ( bx. type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( va_ndx_offset) ] ) ;
632
+ let offset_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( va_ndx_offset) ) ;
339
633
340
634
let offset = bx. load ( bx. type_i32 ( ) , offset_ptr, bx. tcx ( ) . data_layout . i32_align . abi ) ;
341
635
let offset = round_up_to_alignment ( bx, offset, layout. align . abi ) ;
@@ -356,11 +650,10 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
356
650
bx. store ( offset_next, offset_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
357
651
358
652
// (*va).va_reg
359
- let regsave_area_ptr =
360
- bx. inbounds_gep ( bx. type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( va_reg_offset) ] ) ;
653
+ let regsave_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( va_reg_offset) ) ;
361
654
let regsave_area =
362
655
bx. load ( bx. type_ptr ( ) , regsave_area_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
363
- let regsave_value_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , regsave_area, & [ offset] ) ;
656
+ let regsave_value_ptr = bx. inbounds_ptradd ( regsave_area, offset) ;
364
657
bx. br ( end) ;
365
658
366
659
bx. switch_to_block ( from_stack) ;
@@ -381,9 +674,9 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
381
674
bx. store ( offset_next_corrected, offset_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
382
675
383
676
// let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
384
- let stack_area_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( 0 ) ] ) ;
677
+ let stack_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( 0 ) ) ;
385
678
let stack_area = bx. load ( bx. type_ptr ( ) , stack_area_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
386
- let stack_value_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , stack_area, & [ offset_corrected] ) ;
679
+ let stack_value_ptr = bx. inbounds_ptradd ( stack_area, offset_corrected) ;
387
680
bx. br ( end) ;
388
681
389
682
bx. switch_to_block ( end) ;
@@ -449,6 +742,8 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
449
742
AllowHigherAlign :: No ,
450
743
)
451
744
}
745
+ // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64.
746
+ "x86_64" => emit_x86_64_sysv64_va_arg ( bx, addr, target_ty) ,
452
747
"xtensa" => emit_xtensa_va_arg ( bx, addr, target_ty) ,
453
748
// For all other architecture/OS combinations fall back to using
454
749
// the LLVM va_arg instruction.
0 commit comments