Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arrays may be indexed as pointers #9

Open
langston-barrett opened this issue Aug 11, 2022 · 0 comments
Open

Arrays may be indexed as pointers #9

langston-barrett opened this issue Aug 11, 2022 · 0 comments
Labels
precision Having to do with possibly improving analysis precision

Comments

@langston-barrett
Copy link
Collaborator

Consider this C program:

char get4(char *buf) { return buf[4]; }

void indirect() {
  char tp_indir_buf[8];
  printf("c = '%c'\n", get4(tp_indir_buf));
}

which produces this LLVM program:

; Function Attrs: noinline nounwind optnone uwtable
define dso_local signext i8 @get4(i8*) #0 {
  %2 = alloca i8*, align 8
  store i8* %0, i8** %2, align 8
  %3 = load i8*, i8** %2, align 8
  %4 = getelementptr inbounds i8, i8* %3, i64 4
  %5 = load i8, i8* %4, align 1
  ret i8 %5
}

; Function Attrs: noinline nounwind optnone uwtable
define dso_local void @indirect() #0 {
  %1 = alloca [8 x i8], align 1
  %2 = getelementptr inbounds [8 x i8], [8 x i8]* %1, i32 0, i32 0
  %3 = call signext i8 @get4(i8* %2)
  %4 = sext i8 %3 to i32
  %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i32 %4)
  ret void
}

The array-typed allocation indirect:%1 will be indexed by the getelementptr in get4 as if it were a pointer. However, our rules for deciding which array suballocations to create don't account for this kind of indexing:

pointer_index(?region, ?type, ?finalIdx) :-
( ( getelementptr_instruction_index(?insn, 0, ?indexOp)
, constant_to_int(?indexOp, ?index)
, getelementptr_instruction_base_type(?insn, ?declaredType)
, _alloc_region(?region)
)
; ( getelementptr_constant_expression_index(?expr, 0, ?indexOp)
, constant_to_int(?indexOp, ?index)
, _getelementptr_constant_expression_base_type(?expr, ?declaredType)
, global_region(?region)
)
),
?index >= 0,
// The first index of a GEP always a pointer type.
pointer_type_has_component(?declaredType, ?declaredElemType),
( ( type_compatible(?type, ?declaredElemType)
, type_has_size(?type, ?size)
)
; // This case is not redundant with the previous in the case; there may be
// types ?type where there is no pointer_type_has_component(_, ?type).
// Conversely, there may be types that are not compatible, but such that
// pointer types to them are compatible (i8 and i64 vs. i8* and i64*).
( pointer_type_has_component(?ptrType, ?type)
, type_compatible(?ptrType, ?declaredType)
, type_has_size(?type, ?size)
)
),
// Adjust the index... (this calculation should match what's done for GEP)
?size > 0,
type_has_size(?declaredElemType, ?declaredSize),
// Soufflé can still divide by zero here if it reorders clauses, so we take max
?finalIdx = (?index * ?declaredSize) / max(?size, 1),
// Check that the type sizes divide evenly...
(?finalIdx * ?size) = (?index * ?declaredSize).
.decl pointer_offset(?region: Region, ?type: Type, ?offset: SubregionOffset)
// Still need [0] for unsized types, e.g., opaque structs.
pointer_offset(?region, ?type, 0) :-
pointer_index(?region, ?type, 0).
pointer_offset(?region, ?type, ?index * ?size) :-
pointer_index(?region, ?type, ?index),
type_has_size(?type, ?size).
// Find all statically possible array indices
.decl array_indices__no_typecomp(?region: Region, ?type: ArrayType, ?index: ArrayIndex)
array_indices__no_typecomp(?region, ?declaredType, as(?constantIndex, ArrayIndex)) :-
( ( getelementptr_instruction_index(?insn, ?index, ?indexOp)
, getelementptr_instruction_interm_type(?insn, ?index, ?declaredType)
, _alloc_region(?region)
)
; ( getelementptr_constant_expression_index(?expr, ?index, ?indexOp)
, _getelementptr_constant_expression_interm_type(?expr, ?index, ?declaredType)
, global_region(?region)
)
),
constant_to_int(?indexOp, ?constantIndex),
array_type(?declaredType).
// Same thing, but also consider compatible array types
.decl array_indices(?region: Region, ?type: ArrayType, ?index: ArrayIndex)
array_indices(?region, ?type, ?constantIndex) :-
array_type(?type),
type_contains_pointer(?type),
type_compatible(?type, ?declaredType),
array_indices__no_typecomp(?region, ?declaredType, ?constantIndex).

In particular, pointer_index is separate from array_index, so array allocations don't have numbered suballocations at indices that are used to index into pointers. This is an issue for precision, because the GEP rules will produce points-to facts for the imprecise [*]-suballocation whereas they could in principle use suballocations at particular indices. The fix would be to add indices to array_index that correspond to pointer-indexing GEPs at a compatible type:

diff --git a/datalog/points-to/allocations-subobjects.dl b/datalog/points-to/allocations-subobjects.dl
index 2f58634..49f3163 100644
--- a/datalog/points-to/allocations-subobjects.dl
+++ b/datalog/points-to/allocations-subobjects.dl
@@ -41,14 +41,18 @@ _alloc_region(?r) :- global_region(?r).
 _alloc_region(?r) :- heap_region(?r).
 _alloc_region(?r) :- stack_region(?r).
 
+.decl _array_or_pointer_index(?region: Region, ?type: Type, ?index: ArrayIndex)
 .decl pointer_index(?region: Region, ?type: Type, ?index: ArrayIndex)
 
 pointer_index(?region, ?type, 0) :-
   _alloc_region(?region),
   type(?type).
 
+pointer_index(?region, ?type, ?idx) :-
+  _array_or_pointer_index(?region, ?type, ?idx).
+
 // TODO(lb): Use `gep_zero_index_offset` here.
-pointer_index(?region, ?type, ?finalIdx) :-
+_array_or_pointer_index(?region, ?type, ?finalIdx) :-
   ( ( getelementptr_instruction_index(?insn, 0, ?indexOp)
     , constant_to_int(?indexOp, ?index)
     , getelementptr_instruction_base_type(?insn, ?declaredType)
@@ -120,6 +124,11 @@ array_indices(?region, ?type, ?constantIndex) :-
   type_compatible(?type, ?declaredType),
   array_indices__no_typecomp(?region, ?declaredType, ?constantIndex).
 
+// Arrays may be indexed as pointers
+array_indices(?region, ?type, ?index) :-
+  array_type_has_component(?type, ?elemType),
+  _array_or_pointer_index(?region, ?elemType, ?index).
+
 .decl array_offset(?region: Region, ?type: Type, ?offset: SubregionOffset)
 array_offset(?region, ?type, ?index * ?size) :-
   array_indices(?region, ?type, ?index),
@@ -212,7 +221,7 @@ not_array_index(?component) :- path_component_at_any_index(?component).
 .type ArraySubregion
   = ArrayIndexSubregion
   | AnyArrayIndexSubregion
-  
+
 .type ArrayIndexSubregion <: symbol
 .type AnyArrayIndexSubregion <: symbol
@langston-barrett langston-barrett added the precision Having to do with possibly improving analysis precision label Aug 11, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
precision Having to do with possibly improving analysis precision
Projects
None yet
Development

No branches or pull requests

1 participant