Skip to content

Commit 4cbea5e

Browse files
authored
Cranelift: Translate ir::UserExternalNameRefs into callers when inlining (#11389)
This was an entity that we forgot to translate from the callee into the caller. Note that we do not use the `EntityMap` offset approach for these entities because `ir::Function` hash-conses them.
1 parent d8d88f1 commit 4cbea5e

File tree

4 files changed

+225
-10
lines changed

4 files changed

+225
-10
lines changed

cranelift/codegen/src/inline.rs

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,15 @@ pub(crate) fn do_inlining(
132132
func_ref,
133133
} => {
134134
let args = cursor.func.dfg.inst_args(inst);
135+
trace!(
136+
"considering call site for inlining: {inst}: {}",
137+
cursor.func.dfg.display_inst(inst),
138+
);
135139
match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
136-
InlineCommand::KeepCall => continue,
140+
InlineCommand::KeepCall => {
141+
trace!(" --> keeping call");
142+
continue;
143+
}
137144
InlineCommand::Inline(callee) => {
138145
inline_one(
139146
&mut allocs,
@@ -157,8 +164,15 @@ pub(crate) fn do_inlining(
157164
exception,
158165
} => {
159166
let args = cursor.func.dfg.inst_args(inst);
167+
trace!(
168+
"considering call site for inlining: {inst}: {}",
169+
cursor.func.dfg.display_inst(inst),
170+
);
160171
match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
161-
InlineCommand::KeepCall => continue,
172+
InlineCommand::KeepCall => {
173+
trace!(" --> keeping call");
174+
continue;
175+
}
162176
InlineCommand::Inline(callee) => {
163177
inline_one(
164178
&mut allocs,
@@ -195,8 +209,18 @@ struct InliningAllocs {
195209
values: SecondaryMap<ir::Value, PackedOption<ir::Value>>,
196210

197211
/// Map from callee constant to inlined caller constant.
212+
///
213+
/// Not in `EntityMap` because these are hash-consed inside the
214+
/// `ir::Function`.
198215
constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>,
199216

217+
/// Map from callee to inlined caller external name refs.
218+
///
219+
/// Not in `EntityMap` because these are hash-consed inside the
220+
/// `ir::Function`.
221+
user_external_name_refs:
222+
SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>,
223+
200224
/// The set of _caller_ inlined call instructions that need exception table
201225
/// fixups at the end of inlining.
202226
///
@@ -220,6 +244,7 @@ impl InliningAllocs {
220244
let InliningAllocs {
221245
values,
222246
constants,
247+
user_external_name_refs,
223248
calls_needing_exception_table_fixup,
224249
} = self;
225250

@@ -229,6 +254,9 @@ impl InliningAllocs {
229254
constants.clear();
230255
constants.resize(callee.dfg.constants.len());
231256

257+
user_external_name_refs.clear();
258+
user_external_name_refs.resize(callee.params.user_named_funcs().len());
259+
232260
// Note: We do not reserve capacity for
233261
// `calls_needing_exception_table_fixup` because it is a sparse set and
234262
// we don't know how large it needs to be ahead of time.
@@ -1205,7 +1233,8 @@ fn create_entities(
12051233
entity_map.block_offset = Some(create_blocks(allocs, func, callee));
12061234
entity_map.global_value_offset = Some(create_global_values(func, callee));
12071235
entity_map.sig_ref_offset = Some(create_sig_refs(func, callee));
1208-
entity_map.func_ref_offset = Some(create_func_refs(func, callee, &entity_map));
1236+
create_user_external_name_refs(allocs, func, callee);
1237+
entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map));
12091238
entity_map.stack_slot_offset = Some(create_stack_slots(func, callee));
12101239
entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map));
12111240
entity_map.dynamic_stack_slot_offset =
@@ -1309,8 +1338,24 @@ fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 {
13091338
offset
13101339
}
13111340

1341+
fn create_user_external_name_refs(
1342+
allocs: &mut InliningAllocs,
1343+
func: &mut ir::Function,
1344+
callee: &ir::Function,
1345+
) {
1346+
for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() {
1347+
let caller_named_func_ref = func.declare_imported_user_function(name.clone());
1348+
allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into();
1349+
}
1350+
}
1351+
13121352
/// Translate `ir::FuncRef`s from the callee into the caller.
1313-
fn create_func_refs(func: &mut ir::Function, callee: &ir::Function, entity_map: &EntityMap) -> u32 {
1353+
fn create_func_refs(
1354+
allocs: &InliningAllocs,
1355+
func: &mut ir::Function,
1356+
callee: &ir::Function,
1357+
entity_map: &EntityMap,
1358+
) -> u32 {
13141359
let offset = func.dfg.ext_funcs.len();
13151360
let offset = u32::try_from(offset).unwrap();
13161361

@@ -1322,7 +1367,17 @@ fn create_func_refs(func: &mut ir::Function, callee: &ir::Function, entity_map:
13221367
} in callee.dfg.ext_funcs.values()
13231368
{
13241369
func.dfg.ext_funcs.push(ir::ExtFuncData {
1325-
name: name.clone(),
1370+
name: match name {
1371+
ir::ExternalName::User(name_ref) => {
1372+
ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect(
1373+
"should have translated all `ir::UserExternalNameRef`s before translating \
1374+
`ir::FuncRef`s",
1375+
))
1376+
}
1377+
ir::ExternalName::TestCase(_)
1378+
| ir::ExternalName::LibCall(_)
1379+
| ir::ExternalName::KnownSymbol(_) => name.clone(),
1380+
},
13261381
signature: entity_map.inlined_sig_ref(*signature),
13271382
colocated: *colocated,
13281383
});
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
test inline precise-output
2+
target x86_64
3+
4+
function %callee() -> i32 {
5+
sig0 = () -> i32
6+
fn0 = colocated u0:36 sig0
7+
fn1 = colocated u0:42 sig0
8+
block0:
9+
v0 = call fn0()
10+
v1 = call fn1()
11+
v2 = iadd v0, v1
12+
return v2
13+
}
14+
15+
; (no functions inlined into %callee)
16+
17+
function %caller(i32) -> i32 {
18+
sig0 = (i32) -> i32
19+
sig1 = () -> i32
20+
fn0 = colocated u0:1234 sig0
21+
fn1 = colocated u0:36 sig0
22+
fn2 = %callee sig1
23+
block0(v0: i32):
24+
v1 = call fn0(v0)
25+
v2 = call fn2()
26+
v3 = call fn1(v1)
27+
return v3
28+
}
29+
30+
; function %caller(i32) -> i32 fast {
31+
; sig0 = (i32) -> i32 fast
32+
; sig1 = () -> i32 fast
33+
; sig2 = () -> i32 fast
34+
; fn0 = colocated u0:1234 sig0
35+
; fn1 = colocated u0:36 sig0
36+
; fn2 = %callee sig1
37+
; fn3 = colocated u0:36 sig2
38+
; fn4 = colocated u0:42 sig2
39+
;
40+
; block0(v0: i32):
41+
; v1 = call fn0(v0)
42+
; jump block1
43+
;
44+
; block1:
45+
; v5 = call fn3()
46+
; v6 = call fn4()
47+
; v7 = iadd v5, v6
48+
; jump block2(v7)
49+
;
50+
; block2(v4: i32):
51+
; v2 -> v4
52+
; v3 = call fn1(v1)
53+
; return v3
54+
; }
55+

crates/wasmtime/src/compile.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,10 @@ the use case.
840840
None
841841
}
842842
}));
843+
log::trace!(
844+
"call graph edges for {output_index:?} = {:?}: {calls:?}",
845+
output.key
846+
);
843847
Ok(())
844848
}
845849
})?;
@@ -865,6 +869,7 @@ the use case.
865869
&mut layer_outputs,
866870
|output: &mut CompileOutput<'_>| {
867871
debug_assert_eq!(output.key.kind(), CompileKind::WasmFunction);
872+
log::trace!("processing inlining for {:?}", output.key);
868873

869874
let caller_translation = output.translation.unwrap();
870875
let caller_module = output.key.module();
@@ -885,11 +890,15 @@ the use case.
885890
{
886891
(caller_module, def_func, Some(caller_needs_gc_heap))
887892
} else {
888-
let (def_module, def_func) =
889-
caller_translation.known_imported_functions[callee].expect(
890-
"a direct call to an imported function must have a \
891-
statically-known import",
892-
);
893+
let (def_module, def_func) = caller_translation
894+
.known_imported_functions[callee]
895+
.unwrap_or_else(|| {
896+
panic!(
897+
"a direct call to an imported function must have a \
898+
statically-known definition, but direct call to imported \
899+
function {callee:?} has no statically-known definition",
900+
)
901+
});
893902
(def_module, def_func, None)
894903
};
895904

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
;;! target = "x86_64"
2+
;;! test = "optimize"
3+
;;! filter = "wasm[2]--function"
4+
;;! flags = "-C inlining=y"
5+
6+
(component
7+
(core module $A
8+
(func (export "f0") (result i32)
9+
(i32.const 100)
10+
)
11+
(func (export "f1") (result i32)
12+
(i32.const 101)
13+
)
14+
)
15+
16+
(core module $B
17+
(import "a" "f0" (func $f0 (result i32)))
18+
(import "a" "f1" (func $f1 (result i32)))
19+
(func (export "f2") (result i32)
20+
(i32.add (call $f0) (call $f1))
21+
)
22+
)
23+
24+
(core module $C
25+
(import "b" "f2" (func $f2 (result i32)))
26+
(func (export "f3") (result i32)
27+
(i32.add (i32.const 100) (call $f2))
28+
)
29+
)
30+
31+
(core instance $a (instantiate $A))
32+
(core instance $b (instantiate $B (with "a" (instance $a))))
33+
(core instance $c (instantiate $C (with "b" (instance $b))))
34+
35+
(func (export "f") (result u32)
36+
(canon lift (core func $c "f3"))
37+
)
38+
)
39+
40+
;; function u0:1(i64 vmctx, i64) -> i32 tail {
41+
;; gv0 = vmctx
42+
;; gv1 = load.i64 notrap aligned readonly gv0+8
43+
;; gv2 = load.i64 notrap aligned gv1+16
44+
;; gv3 = vmctx
45+
;; gv4 = vmctx
46+
;; gv5 = load.i64 notrap aligned readonly gv4+8
47+
;; gv6 = load.i64 notrap aligned gv5+16
48+
;; gv7 = vmctx
49+
;; gv8 = vmctx
50+
;; gv9 = load.i64 notrap aligned readonly gv8+8
51+
;; gv10 = load.i64 notrap aligned gv9+16
52+
;; gv11 = vmctx
53+
;; gv12 = load.i64 notrap aligned readonly gv11+8
54+
;; gv13 = load.i64 notrap aligned gv12+16
55+
;; sig0 = (i64 vmctx, i64) -> i32 tail
56+
;; sig1 = (i64 vmctx, i64) -> i32 tail
57+
;; sig2 = (i64 vmctx, i64) -> i32 tail
58+
;; fn0 = colocated u0:0 sig0
59+
;; fn1 = colocated u0:0 sig1
60+
;; fn2 = colocated u0:1 sig2
61+
;; stack_limit = gv2
62+
;;
63+
;; block0(v0: i64, v1: i64):
64+
;; @00c3 jump block2
65+
;;
66+
;; block2:
67+
;; jump block4
68+
;;
69+
;; block4:
70+
;; jump block5
71+
;;
72+
;; block5:
73+
;; jump block6
74+
;;
75+
;; block6:
76+
;; jump block7
77+
;;
78+
;; block7:
79+
;; jump block8
80+
;;
81+
;; block8:
82+
;; jump block9
83+
;;
84+
;; block9:
85+
;; jump block3
86+
;;
87+
;; block3:
88+
;; jump block10
89+
;;
90+
;; block10:
91+
;; @00c6 jump block1
92+
;;
93+
;; block1:
94+
;; v26 = iconst.i32 301
95+
;; @00c6 return v26 ; v26 = 301
96+
;; }

0 commit comments

Comments
 (0)