@@ -506,6 +506,24 @@ macro benchmarkable(args...)
506506 end
507507end
508508
509+ struct PerfInterface
510+ setup:: Function
511+ start:: Function
512+ stop:: Function
513+ read:: Function
514+ teardown:: Function
515+
516+ function PerfInterface (;
517+ setup= Returns (nothing ),
518+ start= Returns (nothing ),
519+ stop= Returns (nothing ),
520+ read= Returns ((- 1 , - 1 )),
521+ teardown= Returns (nothing ),
522+ )
523+ return new (setup, start, stop, read, teardown)
524+ end
525+ end
526+
509527# `eval` an expression that forcibly defines the specified benchmark at
510528# top-level in order to allow transfer of locally-scoped variables into
511529# benchmark scope.
@@ -553,6 +571,8 @@ function generate_benchmark_definition(
553571 end
554572 )
555573 end
574+ ext = Base. get_extension (BenchmarkTools, :BenchmarkToolsLinuxPerfExt )
575+ LinuxPerf = isnothing (ext) ? PerfInterface () : ext. interface ()
556576 return Core. eval (
557577 eval_module,
558578 quote
@@ -563,17 +583,42 @@ function generate_benchmark_definition(
563583 $ (Expr (:tuple , quote_vars... )), __params:: $BenchmarkTools.Parameters
564584 )
565585 $ (setup)
586+ __perf_bench = $ (LinuxPerf. setup)()
587+ __gcdiff = nothing
588+ __return_val = nothing
589+ __sample_time:: Int64 = 0
590+ __sample_instructions:: Int64 = 0
591+ __sample_branches:: Int64 = 0
566592 __evals = __params. evals
567- __gc_start = Base. gc_num ()
568- __start_time = time_ns ()
569- __return_val = $ (invocation)
570- for __iter in 2 : __evals
571- $ (invocation)
593+ try
594+ __gc_start = Base. gc_num ()
595+ $ (LinuxPerf. start)(__perf_bench)
596+ __start_time = time_ns ()
597+ __return_val = $ (invocation)
598+ for __iter in 2 : __evals
599+ $ (invocation)
600+ end
601+ __sample_time = time_ns () - __start_time
602+ $ (LinuxPerf. stop)(__perf_bench)
603+ __gcdiff = Base. GC_Diff (Base. gc_num (), __gc_start)
604+ __sample_instructions, __sample_branches = $ (LinuxPerf. read)(
605+ __perf_bench
606+ )
607+ finally
608+ $ (LinuxPerf. teardown)(__perf_bench)
609+ $ (teardown)
572610 end
573- __sample_time = time_ns () - __start_time
574- __gcdiff = Base. GC_Diff (Base. gc_num (), __gc_start)
575- $ (teardown)
576611 __time = max ((__sample_time / __evals) - __params. overhead, 0.001 )
612+ __instructions = if (__sample_instructions == - 1 )
613+ NaN
614+ else
615+ max ((__sample_instructions / __evals) - __params. insts_overhead, 0.0 )
616+ end
617+ __branches = if (__sample_branches == - 1 )
618+ NaN
619+ else
620+ max ((__sample_branches / __evals) - 0.0 , 0.0 )
621+ end
577622 __gctime = max ((__gcdiff. total_time / __evals) - __params. overhead, 0.0 )
578623 __memory = Int (Base. fld (__gcdiff. allocd, __evals))
579624 __allocs = Int (
@@ -585,7 +630,9 @@ function generate_benchmark_definition(
585630 __evals,
586631 ),
587632 )
588- return __time, __gctime, __memory, __allocs, __return_val
633+ return __time,
634+ __instructions, __branches, __gctime, __memory, __allocs,
635+ __return_val
589636 end
590637 $ BenchmarkTools. Benchmark ($ (samplefunc), $ (quote_vals), $ (params))
591638 end ,
0 commit comments