Skip to content

Commit 6873a4c

Browse files
authored
Support opaque pointers
2 parents 43cfa8c + 385150a commit 6873a4c

File tree

18 files changed

+160
-226
lines changed

18 files changed

+160
-226
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
matrix:
2424
image:
2525
- { name: 'ubuntu', tag: '20.04' }
26-
llvm: ['13', '14']
26+
llvm: ['14']
2727

2828
runs-on: ubuntu-20.04
2929
container:
@@ -94,7 +94,7 @@ jobs:
9494
fail-fast: false
9595
matrix:
9696
os: ['macos-11']
97-
llvm: ['13', '14']
97+
llvm: ['14']
9898

9999
runs-on: ${{ matrix.os }}
100100

@@ -217,7 +217,7 @@ jobs:
217217
runs-on: ubuntu-latest
218218
strategy:
219219
matrix:
220-
llvm: ["13", "14"]
220+
llvm: ["14"]
221221
ubuntu: ["20.04"]
222222
steps:
223223
- uses: actions/checkout@v2

bin/lift/Lift.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ int main(int argc, char *argv[]) {
240240
// Make sure `--address` and `--entry_address` are in-bounds for the target
241241
// architecture's address size.
242242
llvm::LLVMContext context;
243+
context.enableOpaquePointers();
243244
auto arch = remill::Arch::Get(context, FLAGS_os, FLAGS_arch);
244245
const uint64_t addr_mask = ~0ULL >> (64UL - arch->address_size);
245246
if (FLAGS_address != (FLAGS_address & addr_mask)) {
@@ -259,7 +260,6 @@ int main(int argc, char *argv[]) {
259260

260261
std::unique_ptr<llvm::Module> module(remill::LoadArchSemantics(arch.get()));
261262

262-
const auto state_ptr_type = arch->StatePointerType();
263263
const auto mem_ptr_type = arch->MemoryPointerType();
264264

265265
Memory memory = UnhexlifyInputBytes(addr_mask);
@@ -343,10 +343,10 @@ int main(int argc, char *argv[]) {
343343
<< "Invalid register name '" << reg_name.str()
344344
<< "' used in output slice list '" << FLAGS_slice_outputs << "'";
345345

346-
arg_types.push_back(llvm::PointerType::get(reg->type, 0));
346+
arg_types.push_back(llvm::PointerType::get(context, 0));
347347
}
348348

349-
const auto state_type = state_ptr_type->getPointerElementType();
349+
const auto state_type = llvm::PointerType::get(context, 0);
350350
const auto func_type =
351351
llvm::FunctionType::get(mem_ptr_type, arg_types, false);
352352
const auto func = llvm::Function::Create(

include/remill/Arch/Arch.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ struct Register {
125125
// The directly enclosed registers.
126126
std::vector<const Register *> children;
127127

128-
void CompteGEPAccessors(const llvm::DataLayout &dl, llvm::Type *state_type);
128+
void ComputeGEPAccessors(const llvm::DataLayout &dl,
129+
llvm::StructType *state_type);
129130
};
130131

131132
class Arch {
@@ -159,6 +160,10 @@ class Arch {
159160
// Return the type of a lifted function.
160161
llvm::FunctionType *LiftedFunctionType(void) const;
161162

163+
// Returns the type of the register window. If the architecture doesn't have a register window, a
164+
// null pointer will be returned.
165+
llvm::StructType *RegisterWindowType() const;
166+
162167
// Apply `cb` to every register.
163168
void ForEachRegister(std::function<void(const Register *)> cb) const;
164169

include/remill/Arch/Runtime/Types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
struct State;
3636
struct Memory;
37+
struct RegisterWindow;
3738

3839
// Address in the source architecture type. We don't use a `uintptr_t` because
3940
// that might be specific to the destination architecture type.

include/remill/BC/Compat/PointerType.h

Lines changed: 0 additions & 35 deletions
This file was deleted.

include/remill/BC/InstructionLifter.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class LLVMContext;
3030
class IntegerType;
3131
class BasicBlock;
3232
class Value;
33+
class Type;
3334
} // namespace llvm
3435

3536
namespace remill {
@@ -78,8 +79,9 @@ class InstructionLifter {
7879
bool is_delayed = false);
7980

8081
// Load the address of a register.
81-
llvm::Value *LoadRegAddress(llvm::BasicBlock *block, llvm::Value *state_ptr,
82-
std::string_view reg_name) const;
82+
std::pair<llvm::Value *, llvm::Type *>
83+
LoadRegAddress(llvm::BasicBlock *block, llvm::Value *state_ptr,
84+
std::string_view reg_name) const;
8385

8486
// Load the value of a register.
8587
llvm::Value *LoadRegValue(llvm::BasicBlock *block, llvm::Value *state_ptr,

include/remill/BC/Util.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,15 @@ llvm::CallInst *AddTerminatingTailCall(llvm::BasicBlock *source_block,
8383

8484
// Find a local variable defined in the entry block of the function. We use
8585
// this to find register variables.
86-
llvm::Value *FindVarInFunction(llvm::BasicBlock *block, std::string_view name,
87-
bool allow_failure = false);
86+
std::pair<llvm::Value *, llvm::Type *>
87+
FindVarInFunction(llvm::BasicBlock *block, std::string_view name,
88+
bool allow_failure = false);
8889

8990
// Find a local variable defined in the entry block of the function. We use
9091
// this to find register variables.
91-
llvm::Value *FindVarInFunction(llvm::Function *func, std::string_view name,
92-
bool allow_failure = false);
92+
std::pair<llvm::Value *, llvm::Type *>
93+
FindVarInFunction(llvm::Function *func, std::string_view name,
94+
bool allow_failure = false);
9395

9496
// Find the machine state pointer. The machine state pointer is, by convention,
9597
// passed as the first argument to every lifted function.

lib/Arch/Arch.cpp

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ llvm::StructType *Arch::StateStructType(void) const {
237237
llvm::PointerType *Arch::StatePointerType(void) const {
238238
CHECK(impl)
239239
<< "Have you not run `PrepareModule` on a loaded semantics module?";
240-
return llvm::PointerType::get(impl->state_type, 0);
240+
return llvm::PointerType::get(*context, 0);
241241
}
242242

243243
// Return the type of an address, i.e. `addr_t` in the semantics.
@@ -259,6 +259,12 @@ llvm::FunctionType *Arch::LiftedFunctionType(void) const {
259259
return impl->lifted_function_type;
260260
}
261261

262+
llvm::StructType *Arch::RegisterWindowType(void) const {
263+
CHECK(impl)
264+
<< "Have you not run `PrepareModule` on a loaded semantics module?";
265+
return impl->register_window_type;
266+
}
267+
262268
// Return information about the register at offset `offset` in the `State`
263269
// structure.
264270
const Register *Arch::RegisterAtStateOffset(uint64_t offset) const {
@@ -386,11 +392,11 @@ namespace {
386392

387393
// These variables must always be defined within any lifted function.
388394
static bool BlockHasSpecialVars(llvm::Function *basic_block) {
389-
return FindVarInFunction(basic_block, kStateVariableName, true) &&
390-
FindVarInFunction(basic_block, kMemoryVariableName, true) &&
391-
FindVarInFunction(basic_block, kPCVariableName, true) &&
392-
FindVarInFunction(basic_block, kNextPCVariableName, true) &&
393-
FindVarInFunction(basic_block, kBranchTakenVariableName, true);
395+
return FindVarInFunction(basic_block, kStateVariableName, true).first &&
396+
FindVarInFunction(basic_block, kMemoryVariableName, true).first &&
397+
FindVarInFunction(basic_block, kPCVariableName, true).first &&
398+
FindVarInFunction(basic_block, kNextPCVariableName, true).first &&
399+
FindVarInFunction(basic_block, kBranchTakenVariableName, true).first;
394400
}
395401

396402
// Add attributes to llvm::Argument in a way portable across LLVMs
@@ -444,10 +450,9 @@ namespace {
444450

445451
// Compute the total offset of a GEP chain.
446452
static uint64_t TotalOffset(const llvm::DataLayout &dl, llvm::Value *base,
447-
llvm::Type *state_ptr_type) {
453+
llvm::StructType *state_type) {
448454
uint64_t total_offset = 0;
449-
const auto state_size =
450-
dl.getTypeAllocSize(state_ptr_type->getPointerElementType());
455+
const auto state_size = dl.getTypeAllocSize(state_type);
451456
while (base) {
452457
if (auto gep = llvm::dyn_cast<llvm::GEPOperator>(base); gep) {
453458
llvm::APInt accumulated_offset(dl.getPointerSizeInBits(0), 0, false);
@@ -468,7 +473,7 @@ static uint64_t TotalOffset(const llvm::DataLayout &dl, llvm::Value *base,
468473
} else if (auto pti = llvm::dyn_cast<llvm::PtrToIntOperator>(base); pti) {
469474
base = pti->getOperand(0);
470475

471-
} else if (base->getType() == state_ptr_type) {
476+
} else if (base->getType()->isPointerTy()) {
472477
break;
473478

474479
} else {
@@ -482,27 +487,21 @@ static uint64_t TotalOffset(const llvm::DataLayout &dl, llvm::Value *base,
482487

483488
static llvm::Value *
484489
FinishAddressOf(llvm::IRBuilder<> &ir, const llvm::DataLayout &dl,
485-
llvm::Type *state_ptr_type, size_t state_size,
486-
const Register *reg, unsigned addr_space,
487-
llvm::Value *gep) {
490+
llvm::StructType *state_type, size_t state_size,
491+
const Register *reg, unsigned addr_space, llvm::Value *gep) {
488492

489493

490-
auto gep_offset = TotalOffset(dl, gep, state_ptr_type);
491-
auto gep_type_at_offset = gep->getType()->getPointerElementType();
494+
auto gep_offset = TotalOffset(dl, gep, state_type);
492495

493496
CHECK_LT(gep_offset, state_size);
494497

495498
const auto index_type = reg->gep_index_list[0]->getType();
496-
const auto goal_ptr_type = llvm::PointerType::get(reg->type, addr_space);
499+
const auto goal_ptr_type = llvm::PointerType::get(ir.getContext(), addr_space);
497500

498501
// Best case: we've found a value field in the structure that
499502
// is located at the correct byte offset.
500503
if (gep_offset == reg->offset) {
501-
if (gep_type_at_offset == reg->type) {
502-
return gep;
503-
504-
} else if (auto const_gep = llvm::dyn_cast<llvm::Constant>(gep);
505-
const_gep) {
504+
if (auto const_gep = llvm::dyn_cast<llvm::Constant>(gep); const_gep) {
506505
return llvm::ConstantExpr::getBitCast(const_gep, goal_ptr_type);
507506

508507
} else {
@@ -539,22 +538,26 @@ FinishAddressOf(llvm::IRBuilder<> &ir, const llvm::DataLayout &dl,
539538

540539
if (auto const_gep = llvm::dyn_cast<llvm::Constant>(gep); const_gep) {
541540
const_gep = llvm::ConstantExpr::getBitCast(
542-
const_gep, llvm::PointerType::get(byte_type, addr_space));
541+
const_gep, llvm::PointerType::get(ir.getContext(), addr_space));
543542
const_gep = llvm::ConstantExpr::getGetElementPtr(byte_type, const_gep,
544543
elem_indexes);
545544
return llvm::ConstantExpr::getBitCast(const_gep, goal_ptr_type);
546545

547546
} else {
548-
gep = ir.CreateBitCast(gep, llvm::PointerType::get(byte_type, addr_space));
547+
gep = ir.CreateBitCast(gep, llvm::PointerType::get(ir.getContext(), addr_space));
549548
gep = ir.CreateGEP(byte_type, gep, elem_indexes);
550549
return ir.CreateBitCast(gep, goal_ptr_type);
551550
}
552551
}
553552

554553
} // namespace
555554

556-
void Register::CompteGEPAccessors(const llvm::DataLayout &dl,
557-
llvm::Type *state_type) {
555+
void Register::ComputeGEPAccessors(const llvm::DataLayout &dl,
556+
llvm::StructType *state_type) {
557+
if (!state_type) {
558+
state_type = arch->state_type;
559+
}
560+
558561
if (gep_type_at_offset || !state_type) {
559562
return;
560563
}
@@ -585,15 +588,13 @@ llvm::Value *Register::AddressOf(llvm::Value *state_ptr,
585588
CHECK_NOTNULL(state_ptr_type);
586589
const auto addr_space = state_ptr_type->getAddressSpace();
587590

588-
const auto state_type =
589-
llvm::dyn_cast<llvm::StructType>(state_ptr_type->getPointerElementType());
590-
CHECK_NOTNULL(state_type);
591+
const auto state_type = arch->state_type;
591592

592593
const auto module = ir.GetInsertBlock()->getParent()->getParent();
593594
const auto &dl = module->getDataLayout();
594595

595596
if (!gep_type_at_offset) {
596-
const_cast<Register *>(this)->CompteGEPAccessors(dl, state_type);
597+
const_cast<Register *>(this)->ComputeGEPAccessors(dl, state_type);
597598
}
598599

599600
llvm::Value *gep = nullptr;
@@ -607,7 +608,7 @@ llvm::Value *Register::AddressOf(llvm::Value *state_ptr,
607608

608609
auto state_size = dl.getTypeAllocSize(state_type);
609610
auto ret = FinishAddressOf(
610-
ir, dl, state_ptr_type, state_size, this, addr_space, gep);
611+
ir, dl, state_type, state_size, this, addr_space, gep);
611612

612613
// Add the metadata to `inst`.
613614
if (auto inst = llvm::dyn_cast<llvm::Instruction>(ret); inst) {
@@ -722,7 +723,7 @@ void Arch::InitializeEmptyLiftedFunction(llvm::Function *func) const {
722723
// `FinishLiftedFunctionInitialization`.
723724

724725
ir.CreateStore(state,
725-
ir.CreateAlloca(llvm::PointerType::get(impl->state_type, 0),
726+
ir.CreateAlloca(llvm::PointerType::get(context, 0),
726727
nullptr, "STATE"));
727728
ir.CreateStore(memory,
728729
ir.CreateAlloca(impl->memory_type, nullptr, "MEMORY"));
@@ -757,7 +758,7 @@ const Register *Arch::AddRegister(const char *reg_name_, llvm::Type *val_type,
757758
auto reg_impl = new Register(reg_name, offset, dl.getTypeAllocSize(val_type),
758759
val_type, parent_reg, impl.get());
759760

760-
reg_impl->CompteGEPAccessors(dl, impl->state_type);
761+
reg_impl->ComputeGEPAccessors(dl, impl->state_type);
761762

762763
reg = reg_impl;
763764
impl->registers.emplace_back(reg_impl);
@@ -803,10 +804,20 @@ void Arch::InitFromSemanticsModule(llvm::Module *module) const {
803804
const auto &dl = module->getDataLayout();
804805
const auto basic_block = module->getFunction("__remill_jump");
805806
CHECK_NOTNULL(basic_block);
806-
const auto state_ptr_type =
807-
NthArgument(basic_block, kStatePointerArgNum)->getType();
808-
const auto state_type =
809-
llvm::dyn_cast<llvm::StructType>(state_ptr_type->getPointerElementType());
807+
808+
const auto *state_global = module->getGlobalVariable("__remill_state");
809+
CHECK_NOTNULL(state_global);
810+
auto *state_type = llvm::dyn_cast<llvm::StructType>(state_global->getValueType());
811+
CHECK_NOTNULL(state_type);
812+
813+
const auto *register_window_global =
814+
module->getGlobalVariable("__remill_register_window");
815+
if (register_window_global) {
816+
auto *register_window_type = llvm::dyn_cast<llvm::StructType>(
817+
register_window_global->getValueType());
818+
CHECK_NOTNULL(register_window_type);
819+
impl->register_window_type = register_window_type;
820+
}
810821

811822
impl->state_type = state_type;
812823
impl->reg_by_offset.resize(dl.getTypeAllocSize(state_type));

lib/Arch/Arch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ class ArchImpl {
4343
// Lifted function type.
4444
llvm::FunctionType *lifted_function_type{nullptr};
4545

46+
// Register window type.
47+
llvm::StructType *register_window_type{nullptr};
48+
4649
// Metadata type ID for remill registers.
4750
unsigned reg_md_id{0};
4851

lib/Arch/Runtime/Intrinsics.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,22 @@
2727
// addresses taken, and so this prevents dead argument elimination.
2828
extern "C" void __remill_mark_as_used(const void *);
2929

30+
extern State __remill_state;
31+
32+
#if defined(REMILL_ON_SPARC32) || defined(REMILL_ON_SPARC64)
33+
extern RegisterWindow __remill_register_window;
34+
#endif
35+
3036
// This is just a hack to make sure all these functions appear in the bitcode
3137
// file!
3238
[[gnu::used]] extern "C" void __remill_intrinsics(void) {
3339

40+
USED(__remill_state);
41+
42+
#if defined(REMILL_ON_SPARC32) || defined(REMILL_ON_SPARC64)
43+
USED(__remill_register_window);
44+
#endif
45+
3446
USED(__remill_read_memory_8);
3547
USED(__remill_read_memory_16);
3648
USED(__remill_read_memory_32);

0 commit comments

Comments
 (0)