Skip to content

Commit c0f90b9

Browse files
authored
Add Context Structure to Affect State Dependent Liftings (#617)
* add empty contexts * add include * make function const * add helper for uniform mappings * expose cache clearing for operand lifter * decoding context documentation: * move virtual inheritance down * remove unused var names * add type alias * remove underscores * make sure we have poetry * check version in CI * try specify python3 * newer poetry install script * fail fast * try use pythons pip * upgrade pip? * install directly * update in linux too
1 parent 854c73e commit c0f90b9

File tree

20 files changed

+221
-61
lines changed

20 files changed

+221
-61
lines changed

.github/workflows/ci.yml

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ jobs:
3737
with:
3838
fetch-depth: 0
3939
- uses: ./.github/actions/prepare_git_user
40+
- name: Get Poetry
41+
shell: bash
42+
run: |
43+
python3 -m pip install poetry
4044
- name: Build with build script
4145
shell: bash
4246
run: |
@@ -49,7 +53,6 @@ jobs:
4953
export VCPKG_ROOT=$(pwd)/../lifting-bits-downloads/vcpkg_${{ matrix.image.name }}-${{ matrix.image.tag }}_llvm-${{ matrix.llvm }}_amd64
5054
export INSTALL_DIR=$(pwd)/remill-preset-install
5155
./scripts/build-preset.sh release
52-
5356
- name: Install Python Test Deps
5457
shell: bash
5558
run: |
@@ -109,6 +112,14 @@ jobs:
109112
with:
110113
fetch-depth: 0
111114
- uses: ./.github/actions/prepare_git_user
115+
- name: Get Poetry
116+
shell: bash
117+
run: |
118+
python3 -m pip install poetry
119+
- name: Install Python Test Deps
120+
shell: bash
121+
run: |
122+
python3 -m pip install --user ./scripts/diff_tester_export_insns
112123
- name: Build with build script
113124
shell: bash
114125
run: |
@@ -121,10 +132,6 @@ jobs:
121132
export VCPKG_ROOT=$(pwd)/../lifting-bits-downloads/vcpkg_${{ matrix.os}}_llvm-${{ matrix.llvm }}_xcode-13.0_amd64
122133
export INSTALL_DIR=$(pwd)/remill-preset-install
123134
./scripts/build-preset.sh release
124-
- name: Install Python Test Deps
125-
shell: bash
126-
run: |
127-
pip3 install --user ./scripts/diff_tester_export_insns
128135
- name: Run tests
129136
shell: bash
130137
working-directory: remill-build

include/remill/Arch/Arch.h

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <llvm/IR/IRBuilder.h>
3131
#include <remill/BC/InstructionLifter.h>
3232
#include <remill/BC/IntrinsicTable.h>
33+
#include <remill/Arch/Context.h>
3334

3435
#pragma clang diagnostic pop
3536

@@ -170,6 +171,9 @@ class Arch {
170171

171172
virtual ~Arch(void);
172173

174+
175+
virtual DecodingContext CreateInitialContext(void) const = 0;
176+
173177
// Factory method for loading the correct architecture class for a given
174178
// operating system and architecture class.
175179
static auto Get(llvm::LLVMContext &context, std::string_view os,
@@ -281,14 +285,23 @@ class Arch {
281285
// walk up, one byte at a time, to `MaxInstructionSize(false)`
282286
// bytes being passed to the decoder, until you successfully decode
283287
// or ultimately fail.
284-
virtual bool DecodeInstruction(uint64_t address, std::string_view instr_bytes,
285-
Instruction &inst) const = 0;
288+
289+
// The decoder takes contextual information in the form of a DecodingContext, making a copy to produce a ContextMap which is a function that maps
290+
// a successor to a new context that updates the old context.
291+
292+
using DecodingResult = std::optional<DecodingContext::ContextMap>;
293+
294+
virtual DecodingResult
295+
DecodeInstruction(uint64_t address, std::string_view instr_bytes,
296+
Instruction &inst, DecodingContext context) const = 0;
286297

287298
// Decode an instruction that is within a delay slot.
288-
bool DecodeDelayedInstruction(uint64_t address, std::string_view instr_bytes,
289-
Instruction &inst) const {
299+
DecodingResult
300+
DecodeDelayedInstruction(uint64_t address, std::string_view instr_bytes,
301+
Instruction &inst, DecodingContext context) const {
290302
inst.in_delay_slot = true;
291-
return this->DecodeInstruction(address, instr_bytes, inst);
303+
return this->DecodeInstruction(address, instr_bytes, inst,
304+
std::move(context));
292305
}
293306

294307
// Minimum alignment of an instruction for this particular architecture.

include/remill/Arch/ArchBase.h

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#pragma once
1818

1919
#include <remill/Arch/Arch.h>
20+
#include <remill/Arch/Context.h>
2021

2122
#include <memory>
2223
#include <unordered_map>
@@ -31,13 +32,9 @@ namespace remill {
3132

3233
struct Register;
3334

35+
3436
// Internal base architecture for all Remill-internal architectures.
3537
class ArchBase : public remill::Arch {
36-
protected:
37-
virtual bool ArchDecodeInstruction(uint64_t address,
38-
std::string_view instr_bytes,
39-
Instruction &inst) const = 0;
40-
4138
public:
4239
using ArchPtr = std::unique_ptr<const Arch>;
4340

@@ -73,12 +70,6 @@ class ArchBase : public remill::Arch {
7370

7471
unsigned RegMdID(void) const final;
7572

76-
virtual bool DecodeInstruction(uint64_t address, std::string_view instr_bytes,
77-
Instruction &inst) const override;
78-
79-
OperandLifter::OpLifterPtr
80-
DefaultLifter(const remill::IntrinsicTable &intrinsics) const override;
81-
8273
// Get the state pointer and various other types from the `llvm::LLVMContext`
8374
// associated with `module`.
8475
//
@@ -114,4 +105,27 @@ class ArchBase : public remill::Arch {
114105
mutable std::unique_ptr<IntrinsicTable> instrinsics{nullptr};
115106
};
116107

108+
class DefaultContextAndLifter : virtual public remill::ArchBase {
109+
public:
110+
virtual DecodingContext CreateInitialContext(void) const override;
111+
112+
virtual std::optional<DecodingContext::ContextMap>
113+
DecodeInstruction(uint64_t address, std::string_view instr_bytes,
114+
Instruction &inst, DecodingContext context) const override;
115+
116+
117+
OperandLifter::OpLifterPtr
118+
DefaultLifter(const remill::IntrinsicTable &intrinsics) const override;
119+
120+
121+
DefaultContextAndLifter(llvm::LLVMContext *context_, OSName os_name_,
122+
ArchName arch_name_);
123+
124+
protected:
125+
virtual bool ArchDecodeInstruction(uint64_t address,
126+
std::string_view instr_bytes,
127+
Instruction &inst) const = 0;
128+
};
129+
130+
117131
} // namespace remill

include/remill/Arch/Context.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Copyright (c) 2022 Trail of Bits, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
18+
#pragma once
19+
20+
21+
#include <functional>
22+
#include <string_view>
23+
#include <unordered_map>
24+
25+
namespace remill {
26+
27+
/// A decoding context is contextual information about the state of the program that affects decoding, ie. the thumb mode register on ARM
28+
/// We allow clients to interpose on a context for resolution
29+
30+
/// We return a function of successor -> DecodingContext. The decoder defines a relation on the
31+
/// previous context and the successor address that produces a new decoding.
32+
/// This definition of returned contexts allows us to cleanly handle situations like indirect jumps in arm
33+
class DecodingContext {
34+
35+
private:
36+
std::unordered_map<std::string, uint64_t> context_value;
37+
38+
public:
39+
using ContextMap = std::function<DecodingContext(uint64_t)>;
40+
41+
DecodingContext() = default;
42+
43+
DecodingContext(std::unordered_map<std::string, uint64_t> context_value);
44+
45+
46+
uint64_t GetContextValue(const std::string &context_reg) const;
47+
DecodingContext PutContextReg(std::string creg, uint64_t value) const;
48+
49+
static ContextMap UniformContextMapping(DecodingContext cst);
50+
};
51+
52+
} // namespace remill

include/remill/Arch/Instruction.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ class Instruction {
352352
Operand &EmplaceOperand(const Operand::Address &op);
353353

354354

355-
const InstructionLifter::LifterPtr &GetLifter();
355+
const InstructionLifter::LifterPtr &GetLifter() const;
356356

357357
void SetLifter(InstructionLifter::LifterPtr lifter);
358358

include/remill/BC/InstructionLifter.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ class OperandLifter {
6767
std::string_view reg_name) const = 0;
6868

6969
virtual llvm::Type *GetMemoryType() = 0;
70+
71+
virtual void ClearCache(void) const = 0;
7072
};
7173

7274
// Wraps the process of lifting an instruction into a block. This resolves
@@ -108,7 +110,7 @@ class InstructionLifter : public OperandLifter {
108110
std::string_view reg_name) const override final;
109111

110112
// Clear out the cache of the current register values/addresses loaded.
111-
void ClearCache(void) const;
113+
void ClearCache(void) const override;
112114

113115

114116
virtual llvm::Type *GetMemoryType() override final;

lib/Arch/AArch32/Arch.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ namespace remill {
5050
AArch32Arch::AArch32Arch(llvm::LLVMContext *context_, OSName os_name_,
5151
ArchName arch_name_)
5252
: ArchBase(context_, os_name_, arch_name_),
53-
AArch32ArchBase(context_, os_name_, arch_name_) {}
53+
AArch32ArchBase(context_, os_name_, arch_name_),
54+
DefaultContextAndLifter(context_, os_name_, arch_name_) {}
5455

5556
AArch32Arch::~AArch32Arch(void) {}
5657

lib/Arch/AArch32/Arch.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
#include <remill/Arch/AArch32/AArch32Base.h>
2020

2121
namespace remill {
22-
class AArch32Arch final : public AArch32ArchBase {
22+
class AArch32Arch final : public AArch32ArchBase,
23+
public DefaultContextAndLifter {
2324
public:
2425
AArch32Arch(llvm::LLVMContext *context_, OSName os_name_,
2526
ArchName arch_name_);

lib/Arch/AArch64/Arch.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ Instruction::Category InstCategory(const aarch64::InstData &inst) {
106106
}
107107
}
108108

109-
class AArch64Arch final : public ArchBase {
109+
class AArch64Arch final : public DefaultContextAndLifter {
110110
public:
111111
AArch64Arch(llvm::LLVMContext *context_, OSName os_name_,
112112
ArchName arch_name_);
@@ -148,7 +148,8 @@ class AArch64Arch final : public ArchBase {
148148

149149
AArch64Arch::AArch64Arch(llvm::LLVMContext *context_, OSName os_name_,
150150
ArchName arch_name_)
151-
: ArchBase(context_, os_name_, arch_name_) {}
151+
: ArchBase(context_, os_name_, arch_name_),
152+
DefaultContextAndLifter(context_, os_name_, arch_name_) {}
152153

153154
AArch64Arch::~AArch64Arch(void) {}
154155

lib/Arch/Arch.cpp

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -850,16 +850,34 @@ const IntrinsicTable *ArchBase::GetInstrinsicTable(void) const {
850850
return this->instrinsics.get();
851851
}
852852

853-
OperandLifter::OpLifterPtr
854-
ArchBase::DefaultLifter(const remill::IntrinsicTable &intrinsics) const {
855-
return std::make_shared<InstructionLifter>(this, intrinsics);
853+
854+
DecodingContext DefaultContextAndLifter::CreateInitialContext(void) const {
855+
return DecodingContext();
856856
}
857857

858-
bool ArchBase::DecodeInstruction(uint64_t address, std::string_view instr_bytes,
859-
Instruction &inst) const {
858+
Arch::DecodingResult DefaultContextAndLifter::DecodeInstruction(
859+
uint64_t address, std::string_view instr_bytes, Instruction &inst,
860+
DecodingContext context) const {
860861
inst.SetLifter(std::make_unique<remill::InstructionLifter>(
861862
this, this->GetInstrinsicTable()));
862-
return this->ArchDecodeInstruction(address, instr_bytes, inst);
863+
if (this->ArchDecodeInstruction(address, instr_bytes, inst)) {
864+
return [](uint64_t) -> DecodingContext { return DecodingContext(); };
865+
}
866+
867+
return std::nullopt;
863868
}
864869

870+
871+
OperandLifter::OpLifterPtr DefaultContextAndLifter::DefaultLifter(
872+
const remill::IntrinsicTable &intrinsics) const {
873+
return std::make_shared<InstructionLifter>(this, intrinsics);
874+
}
875+
876+
877+
DefaultContextAndLifter::DefaultContextAndLifter(llvm::LLVMContext *context_,
878+
OSName os_name_,
879+
ArchName arch_name_)
880+
: ArchBase(context_, os_name_, arch_name_) {}
881+
882+
865883
} // namespace remill

0 commit comments

Comments
 (0)