Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ version = "0.1.17"
BitTwiddlingConvenienceFunctions = "62783981-4cbd-42fc-bca8-16325de8dc4b"
IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"

[compat]
Expand Down
14 changes: 11 additions & 3 deletions src/HostCPUFeatures.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ end
using Libdl, Static
using Static: Zero, One, lt, gt
using IfElse: ifelse
using Preferences

using BitTwiddlingConvenienceFunctions: prevpow2, nextpow2, intlog2

Expand Down Expand Up @@ -37,19 +38,26 @@ unwrap(::StaticSymbol{S}) where {S} = S

@noinline function redefine()
@debug "Defining CPU name."
define_cpu_name()
redefine_cpu_name()

reset_features!()
reset_extra_features!()
end
const BASELINE_CPU_NAME = get_cpu_name()
const allow_eval = @load_preference("allow_runtime_invalidation", false)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not to check that --trim is enabled in JLOptions ?

everyone who will use it with trim will have to find out this culprit themself

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main thinking is that this preference is usable by more than just JuliaC.jl, since some users may wish to be opt-in to error / warn on invalidation storms from this package. JuliaC.jl can set this preference automatically, so the end-user experience is the same.

Also technically checking JLOptions at pre-compilation time won't detect --trim properly, but something like JuliaLang/JuliaC.jl#31 would work


function __init__()
ccall(:jl_generating_output, Cint, ()) == 1 && return
if Sys.ARCH === :x86_64 || Sys.ARCH === :i686
target = Base.unsafe_string(Base.JLOptions().cpu_target)
occursin("native", target) || return make_generic(target)
if !occursin("native", target)
make_generic(target)
return nothing
end
end
if BASELINE_CPU_NAME != Sys.CPU_NAME::String
redefine()
end
BASELINE_CPU_NAME == Sys.CPU_NAME::String || redefine()
return nothing
end

Expand Down
19 changes: 14 additions & 5 deletions src/cpu_info.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,12 @@ function reset_features!()
for ext ∈ features
feature, has = process_feature(ext)
if _has_feature(feature) ≠ has
@debug "Defining $(has ? "presence" : "absense") of feature $feature."
set_feature(feature, has)
if allow_eval
@debug "Defining $(has ? "presence" : "absense") of feature $feature."
set_feature(feature, has)
else
@warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU feature flag: $ext."
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aside of this change, I expect that feature_string not always return same set of features, that will make certain features stuck in wrong state just because LLVM does not report them for that CPU

For the reference, that's the info I've collected from real and virtual machines with

@ccall jl_get_cpu_name()::String
@ccall jl_get_cpu_features()::String
* "broadwell"
  "+prfchw,+avx,+aes,+sahf,+pclmul,+crc32,+sse4.1,+xsave,+sse4.2,+invpcid,+64bit,+cmov,+movbe,+rtm,+adx,+avx2,+bmi,+sse,+xsaveopt,+rdrnd,+cx8,+sse3,+fsgsbase,+lzcnt,+ssse3,+cx16,+bmi2,+fma,+popcnt,+f16c,+mmx,+sse2,+rdseed,+fxsr,-cldemote,-xop,-xsaves,-avx512fp16,-usermsr,-sm4,-avx512ifma,-avx512pf,-tsxldtrk,-ptwrite,-widekl,-sm3,-xsavec,-avx10.1-512,-avx512vpopcntdq,-avx512vp2intersect,-avx512cd,-avxvnniint8,-avx512er,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-hreset,-movdiri,-serialize,-vpclmulqdq,-avx512vl,-uintr,-clflushopt,-raoint,-cmpccxadd,-amx-tile,-gfni,-avxvnniint16,-amx-fp16,-avx512f,-amx-bf16,-avx512bf16,-avx512vnni,-avx512bw,-pku,-clzero,-mwaitx,-lwp,-sha,-movdir64b,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,-avxifma,-avx512bitalg,-rdpru,-clwb,-avx512vbmi2,-prefetchi,-rdpid,-fma4,-avx512vbmi,-shstk,-vaes,-waitpkg,-sgx,-avx512dq,-sse4a"
* "cascadelake"
  "+prfchw,+avx,+aes,+sahf,+pclmul,+crc32,+xsaves,+sse4.1,+xsave,+sse4.2,+invpcid,+64bit,+xsavec,+cmov,+avx512cd,+movbe,+evex512,+adx,+avx2,+avx512vl,+clflushopt,+bmi,+sse,+xsaveopt,+rdrnd,+avx512f,+avx512vnni,+cx8,+avx512bw,+sse3,+pku,+fsgsbase,+lzcnt,+ssse3,+cx16,+bmi2,+fma,+popcnt,+f16c,+clwb,+mmx,+sse2,+rdseed,+fxsr,+avx512dq,-cldemote,-xop,-avx512fp16,-usermsr,-sm4,-avx512ifma,-avx512pf,-tsxldtrk,-ptwrite,-widekl,-sm3,-avx10.1-512,-avx512vpopcntdq,-avx512vp2intersect,-avxvnniint8,-avx512er,-amx-int8,-kl,-avx10.1-256,-avxvnni,-rtm,-hreset,-movdiri,-serialize,-sha512,-vpclmulqdq,-uintr,-raoint,-cmpccxadd,-amx-tile,-gfni,-avxvnniint16,-amx-fp16,-amx-bf16,-avx512bf16,-clzero,-mwaitx,-lwp,-sha,-movdir64b,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,-avxifma,-avx512bitalg,-rdpru,-avx512vbmi2,-prefetchi,-rdpid,-fma4,-avx512vbmi,-shstk,-vaes,-waitpkg,-sgx,-sse4a"
* "skylake-avx512"
  "+cx16,+sahf,+crc32,+prfchw,+bmi2,+fsgsbase,+popcnt,+aes,+xsaves,+clwb,+avx512f,+xsavec,+pku,+mmx,+rdseed,+avx512bw,+clflushopt,+xsave,+64bit,+avx512vl,+invpcid,+avx512cd,+avx,+cx8,+fma,+bmi,+rdrnd,+sse4.1,+sse4.2,+avx2,+fxsr,+sse,+lzcnt,+pclmul,+f16c,+ssse3,+cmov,+movbe,+xsaveopt,+avx512dq,+sse2,+adx,+sse3,-avx512pf,-tsxldtrk,-tbm,-avx512ifma,-sha,-fma4,-vpclmulqdq,-cldemote,-avx512bf16,-amx-tile,-raoint,-uintr,-gfni,-ptwrite,-avx512bitalg,-movdiri,-widekl,-avx512er,-avxvnni,-avx512fp16,-avx512vnni,-amx-bf16,-avxvnniint8,-avx512vpopcntdq,-pconfig,-cmpccxadd,-clzero,-amx-fp16,-lwp,-rdpid,-xop,-waitpkg,-prefetchi,-kl,-movdir64b,-sse4a,-avxneconvert,-avx512vbmi2,-serialize,-hreset,-vaes,-amx-int8,-rtm,-enqcmd,-mwaitx,-wbnoinvd,-rdpru,-avxifma,-sgx,-prefetchwt1,-avx512vbmi,-shstk,-avx512vp2intersect"
* "skylake-avx512"
  "+prfchw,+avx,+aes,+sahf,+pclmul,+crc32,+sse4.1,+xsave,+sse4.2,+64bit,+cmov,+movbe,+sse,+rdrnd,+cx8,+sse3,+fsgsbase,+ssse3,+cx16,+fma,+popcnt,+f16c,+mmx,+sse2,+fxsr,-cldemote,-xop,-xsaves,-avx512fp16,-usermsr,-sm4,-avx512ifma,-avx512pf,-tsxldtrk,-ptwrite,-widekl,-sm3,-invpcid,-xsavec,-avx10.1-512,-avx512vpopcntdq,-avx512vp2intersect,-avx512cd,-avxvnniint8,-avx512er,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,-adx,-avx2,-hreset,-movdiri,-serialize,-vpclmulqdq,-avx512vl,-uintr,-clflushopt,-raoint,-cmpccxadd,-bmi,-amx-tile,-gfni,-avxvnniint16,-amx-fp16,-xsaveopt,-avx512f,-amx-bf16,-avx512bf16,-avx512vnni,-avx512bw,-pku,-clzero,-mwaitx,-lwp,-lzcnt,-sha,-movdir64b,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,-bmi2,-avxifma,-avx512bitalg,-rdpru,-clwb,-rdseed,-avx512vbmi2,-prefetchi,-rdpid,-fma4,-avx512vbmi,-shstk,-vaes,-waitpkg,-sgx,-avx512dq,-sse4a"
* "icelake-server"
  "+prfchw,+avx,+aes,+sahf,+pclmul,+crc32,+sse4.1,+xsave,+sse4.2,+64bit,+cmov,+movbe,+sse,+rdrnd,+cx8,+sse3,+fsgsbase,+ssse3,+cx16,+fma,+popcnt,+f16c,+mmx,+sse2,+fxsr,-cldemote,-xop,-xsaves,-avx512fp16,-usermsr,-sm4,-avx512ifma,-avx512pf,-tsxldtrk,-ptwrite,-widekl,-sm3,-invpcid,-xsavec,-avx10.1-512,-avx512vpopcntdq,-avx512vp2intersect,-avx512cd,-avxvnniint8,-avx512er,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,-adx,-avx2,-hreset,-movdiri,-serialize,-vpclmulqdq,-avx512vl,-uintr,-clflushopt,-raoint,-cmpccxadd,-bmi,-amx-tile,-gfni,-avxvnniint16,-amx-fp16,-xsaveopt,-avx512f,-amx-bf16,-avx512bf16,-avx512vnni,-avx512bw,-pku,-clzero,-mwaitx,-lwp,-lzcnt,-sha,-movdir64b,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,-bmi2,-avxifma,-avx512bitalg,-rdpru,-clwb,-rdseed,-avx512vbmi2,-prefetchi,-rdpid,-fma4,-avx512vbmi,-shstk,-vaes,-waitpkg,-sgx,-avx512dq,-sse4a"  
* "sandybridge"
  "+avx,+aes,+sahf,+pclmul,+crc32,+sse4.1,+xsave,+sse4.2,+64bit,+cmov,+sse,+xsaveopt,+cx8,+sse3,+ssse3,+cx16,+popcnt,+mmx,+sse2,+fxsr,-prfchw,-cldemote,-xop,-xsaves,-avx512fp16,-usermsr,-sm4,-avx512ifma,-avx512pf,-tsxldtrk,-ptwrite,-widekl,-sm3,-invpcid,-xsavec,-avx10.1-512,-avx512vpopcntdq,-avx512vp2intersect,-avx512cd,-movbe,-avxvnniint8,-avx512er,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,-adx,-avx2,-hreset,-movdiri,-serialize,-vpclmulqdq,-avx512vl,-uintr,-clflushopt,-raoint,-cmpccxadd,-bmi,-amx-tile,-gfni,-avxvnniint16,-amx-fp16,-rdrnd,-avx512f,-amx-bf16,-avx512bf16,-avx512vnni,-avx512bw,-pku,-fsgsbase,-clzero,-mwaitx,-lwp,-lzcnt,-sha,-movdir64b,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,-bmi2,-fma,-avxifma,-f16c,-avx512bitalg,-rdpru,-clwb,-rdseed,-avx512vbmi2,-prefetchi,-rdpid,-fma4,-avx512vbmi,-shstk,-vaes,-waitpkg,-sgx,-avx512dq,-sse4a"

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point. We probably need to tighten that up.

I'm curious to have @gbaraldi 's input on this

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a huge mess. some VMs hide features for example

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess to be clear, the mess I mean is as a whole. I think this is mergeable without too many issues but I wish there was something nicer that LLVM exposed, but there isn't currently

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

end
end
end
Libc.free(features_cstring)
Expand All @@ -58,8 +62,13 @@ end
register_size(::Type{T}) where {T} = register_size()
register_size(::Type{T}) where {T<:Union{Signed,Unsigned}} = simd_integer_register_size()

function define_cpu_name()
function redefine_cpu_name()
cpu = QuoteNode(Symbol(get_cpu_name()))
@eval cpu_name() = Val{$cpu}()
if allow_eval
@eval cpu_name() = Val{$cpu}()
else
@warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU name (from build time)."
end
end
define_cpu_name()
cpu = QuoteNode(Symbol(get_cpu_name()))
@eval cpu_name() = Val{$cpu}()
14 changes: 12 additions & 2 deletions src/cpu_info_aarch64.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,20 @@ end

function reset_extra_features!()
drs = _dynamic_register_size()
register_size() drs && _set_sve_vector_width!(drs)
if register_size() drs
if allow_eval
_set_sve_vector_width!(drs)
else
@warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU register size."
end
end
hassve = _has_aarch64_sve()
if hassve has_feature(Val(:aarch64_sve_cpuid))
@eval has_feature(::Val{:aarch64_sve_cpuid}) = $(Expr(:call, hassve ? :True : :False))
if allow_eval
@eval has_feature(::Val{:aarch64_sve_cpuid}) = $(Expr(:call, hassve ? :True : :False))
else
@warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU feature flag: :aarch64_sve_cpuid."
end
end
end

Expand Down
16 changes: 12 additions & 4 deletions src/cpu_info_x86.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,22 @@ fast_int64_to_double() = has_feature(Val(:x86_64_avx512dq))

fast_half() = False()

@noinline function setfeaturefalse(s)
@inline function setfeaturefalse(s)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why the inlining switch?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was to improve inferrability - has_feature(Val(s)) is only inferrable if the literal value of s is available in the function, which is true in all of the callers (the argument is always a literal Symbol)

if has_feature(Val(s)) === True()
@eval has_feature(::Val{$(QuoteNode(s))}) = False()
if allow_eval
@eval has_feature(::Val{$(QuoteNode(s))}) = False()
else
@warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU feature flag: $s."
end
end
end
@noinline function setfeaturetrue(s)
@inline function setfeaturetrue(s)
if has_feature(Val(s)) === False()
@eval has_feature(::Val{$(QuoteNode(s))}) = True()
if allow_eval
@eval has_feature(::Val{$(QuoteNode(s))}) = True()
else
@warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU feature flag: $s."
end
end
end

Expand Down
Loading