Skip to content
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
6f554ef
Added erf(x) Float64/Float32 Julia implementation
AhmedYKadah Mar 31, 2025
7f4fd2d
changed erf to _erf, got rid of unnecessary branch
AhmedYKadah Mar 31, 2025
e784c9f
fixed syntax error in ccall
AhmedYKadah Mar 31, 2025
da16cb1
fixed syntax error in ccall 2
AhmedYKadah Mar 31, 2025
0a755b6
NaN edge case for erf(x)
AhmedYKadah Mar 31, 2025
6efcec8
added test cases for erf(x)
AhmedYKadah Mar 31, 2025
0fc6d4d
Merge branch 'master' into erf(x)-implementation
AhmedYKadah Aug 2, 2025
3cee8ce
cleaned up erf(Float64)
AhmedYKadah Aug 2, 2025
b819c58
added erf(x::Float32) implementation
AhmedYKadah Sep 13, 2025
57bbaf2
added NaN edge case to erf(x::Float32)
AhmedYKadah Sep 13, 2025
5ad8278
Merge branch 'master' into erf(x)-implementation
AhmedYKadah Sep 13, 2025
26b3b1f
reversed NaN check
AhmedYKadah Sep 13, 2025
693f788
cleaned up float32 polynomials, added accuracy of float32, removed un…
AhmedYKadah Sep 14, 2025
642c1de
Explicit NaN case for erf(Float64)
AhmedYKadah Sep 17, 2025
2911312
Added NaN case to erf(Float32)
AhmedYKadah Sep 17, 2025
bba5f9b
Removed unneccessary indent
AhmedYKadah Sep 17, 2025
c2a37a1
Fixed wrong NaN type
AhmedYKadah Sep 17, 2025
1d45644
added test for NaN for erf
AhmedYKadah Sep 19, 2025
6fd39f1
Merge branch 'master' into erf(x)-implementation
AhmedYKadah Nov 14, 2025
57877e0
added erfc tests
AhmedYKadah Nov 14, 2025
57bd581
added Inf erfc test
AhmedYKadah Nov 14, 2025
a0a5379
Changed interval edges to 16 bit versions for Float64
AhmedYKadah Nov 14, 2025
a9079c8
missed condition
AhmedYKadah Nov 14, 2025
462a3cf
docstring fix
AhmedYKadah Nov 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 161 additions & 5 deletions src/erf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ for f in (:erf, :erfc)
@eval begin
$f(x::Number) = $internalf(float(x))

$internalf(x::Float64) = ccall(($libopenlibmf, libopenlibm), Float64, (Float64,), x)
$internalf(x::Float32) = ccall(($libopenlibmf0, libopenlibm), Float32, (Float32,), x)
$internalf(x::Float16) = Float16($internalf(Float32(x)))

$internalf(z::Complex{Float64}) = Complex{Float64}(ccall(($openspecfunf, libopenspecfun), Complex{Float64}, (Complex{Float64}, Float64), z, zero(Float64)))
$internalf(z::Complex{Float32}) = Complex{Float32}(ccall(($openspecfunf, libopenspecfun), Complex{Float64}, (Complex{Float64}, Float64), Complex{Float64}(z), Float64(eps(Float32))))
Expand All @@ -28,6 +25,10 @@ for f in (:erf, :erfc)
end
end

_erfc(x::Float64) = ccall((:erfc, libopenlibm), Float64, (Float64,), x)
_erfc(x::Float32) = ccall((:erfcf, libopenlibm), Float32, (Float32,), x)
_erfc(x::Float16) = Float16(_erfc(Float32(x)))

for f in (:erfcx, :erfi, :dawson, :faddeeva)
internalf = Symbol(:_, f)
openspecfunfsym = Symbol(:Faddeeva_, f === :dawson ? :Dawson : f === :faddeeva ? :w : f)
Expand Down Expand Up @@ -96,10 +97,165 @@ See also:
[`erfinv(x)`](@ref erfinv), [`erfcinv(x)`](@ref erfcinv).

# Implementation by
- `Float32`/`Float64`: C standard math library
[libm](https://en.wikipedia.org/wiki/C_mathematical_functions#libm).
- `Float32`: polynomial approximations of erf
- `Float64`: Julia implementation of https://github.com/ARM-software/optimized-routines/blob/master/math/erf.c
- `BigFloat`: C library for multiple-precision floating-point [MPFR](https://www.mpfr.org/)
"""

# Fast erf implementation using a mix of
# rational and polynomial approximations.
# Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0.
function _erf(x::Float64)

# # top 32 bits
ix::UInt32=reinterpret(UInt64,x)>>32
# # top 32, without sign bit
ia::UInt32=ix & 0x7fffffff

if (ia < 0x3feb0000)
# a = |x| < 0.84375.

x2 = x * x

if (ia < 0x3fe00000)
## a < 0.5 - Use polynomial approximation.

# Minimax approximation of erf of the form x*P(x^2) approximately on the interval [0;0.5]
PA=(0.12837916709551256, -0.3761263890318287, 0.11283791670896592, -0.026866170630075903, 0.005223977428649761, -0.0008548312229989974, 0.00012054654502151287, -1.4906315067498891e-5, 1.6126444349070824e-6, -1.3074259056679966e-7)

r= fma(x,evalpoly(x2,PA),x) ## This fma is crucial for accuracy.
return r
else
## 0.5 <= a < 0.84375 - Use rational approximation.

# Rational approximation on [0x1p-28, 0.84375]
NA=(0x1.06eba8214db68p-3, -0x1.4cd7d691cb913p-2, -0x1.d2a51dbd7194fp-6,-0x1.7a291236668e4p-8, -0x1.8ead6120016acp-16)
DA=(1,0x1.97779cddadc09p-2, 0x1.0a54c5536cebap-4, 0x1.4d022c4d36b0fp-8,0x1.15dc9221c1a1p-13, -0x1.09c4342a2612p-18)

P=evalpoly(x2,NA)
Q=evalpoly(x2,DA)

return fma(P / Q, x, x)
end
elseif (ia < 0x3ff40000)
## 0.84375 <= |x| < 1.25.

# Rational approximation on [0.84375, 1.25]
NB=( -0x1.359b8bef77538p-9, 0x1.a8d00ad92b34dp-2, -0x1.7d240fbb8c3f1p-2, 0x1.45fca805120e4p-2, -0x1.c63983d3e28ecp-4, 0x1.22a36599795ebp-5, -0x1.1bf380a96073fp-9 )
DB=( 1, 0x1.b3e6618eee323p-4, 0x1.14af092eb6f33p-1, 0x1.2635cd99fe9a7p-4, 0x1.02660e763351fp-3, 0x1.bedc26b51dd1cp-7, 0x1.88b545735151dp-7 )

C = 0x1.b0ac16p-1

a = abs(x) - 1.0

P=evalpoly(a,NB)
Q=evalpoly(a,DB)

r= C + P / Q
return copysign(r,x)

elseif (ia < 0x40000000)
## 1.25 <= |x| < 2.0.
a = abs(x)
a = a - 1.25

# erfc polynomial approximation
# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25
PC=( 0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2, -0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5, -0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8, -0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12, 0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16, -0x1.578c9e375d37p-19)

# Obtains erfc of |x|
r=1.0-evalpoly(a,PC)
return copysign(r,x)

elseif (ia < 0x400a0000)
## 2 <= |x| < 3.25.
a = abs(x)
a = fma(0.5, a, -1.0)

# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2
PD=( 0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3, -0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2, -0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2, 0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3, -0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3, 0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10 )

# Obtains erfc of |x|
r=1.0-evalpoly(a,PD)
return copysign(r,x)

elseif (ia < 0x40100000)
## 3.25 <= |x| < 4.0.
a = abs(x)
a = a - 3.25

# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25
PE=( 0x1.20c13035539e4p-18, -0x1.e9b5e8d16df7ep-16, 0x1.8de3cd4733bf9p-14, -0x1.9aa48beb8382fp-13, 0x1.2c7d713370a9fp-12, -0x1.490b12110b9e2p-12, 0x1.1459c5d989d23p-12, -0x1.64b28e9f1269p-13, 0x1.57c76d9d05cf8p-14, -0x1.bf271d9951cf8p-16, 0x1.db7ea4d4535c9p-19, 0x1.91c2e102d5e49p-20, -0x1.e9f0826c2149ep-21, 0x1.60eebaea236e1p-23 )

r=1.0-evalpoly(a,PE)
return copysign(r,x)

elseif (ia < 0x4017a000)
## 4 <= |x| < 5.90625.
a = abs(x)
a = fma(0.5, a, -2.0)

# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4
PF=( 0x1.08ddd130d1fa6p-26, -0x1.10b146f59ff06p-22, 0x1.10b135328b7b2p-19, -0x1.6039988e7575fp-17, 0x1.497d365e19367p-15, -0x1.da48d9afac83ep-14, 0x1.1024c9b1fbb48p-12, -0x1.fc962e7066272p-12, 0x1.87297282d4651p-11, -0x1.f057b255f8c59p-11, 0x1.0228d0eee063p-10, -0x1.b1b21b84ec41cp-11, 0x1.1ead8ae9e1253p-11, -0x1.1e708fba37fccp-12, 0x1.9559363991edap-14, -0x1.68c827b783d9cp-16, 0x1.2ec4adeccf4a2p-19 )

r=1.0-evalpoly(a,PF)
return copysign(r,x)
elseif isnan(x)
return NaN
else
return copysign(1.0,x)
end
end

# Fast erf implementation using
# polynomial approximations of erf and erfc.
# Highest measured error is 1.12 ULPs at x = 1.232469
function _erf(x::Float32)
xabs=abs(x)

if (xabs< 0.5)
# range [0;0.5]
# # erf approximation using erf(x)=x+x*P(x^2) with degree 6
# Sollya::remez(erf(sqrt(x))/sqrt(x)-1,6,[1e-32;0.5],1,1e-32);

p=(0.12837917f0, -0.37612638f0, 0.11283784f0, -0.026865287f0, 0.005218856f0, -0.0008394848f0)
return copysign(fma(evalpoly(x^2,p),x,x),x)

elseif(xabs<1.25)
# range [0.5;1.25]
# # erfc approximation with degree 11
# Sollya::remez(erfc(x+0.5),11,[0;1.25-0.5],1,1e-32);

p=(0.47950011f0, -0.8787826f0, 0.4393913f0, 0.14646415f0, -0.18308467f0, -0.007286422f0, 0.04987047f0, -0.0048868246f0, -0.011067663f0, 0.003422347f0, 0.00073027064f0, -0.0003758171f0)
return copysign(1f0-evalpoly(xabs-0.5f0,p),x)

elseif(xabs<2.5)
# range [1.25;2.5]
# erfc approximation with degree 13
# Sollya::remez(erfc(x+1.25),13,[1.25-1.25;2.5-1.25],1,1e-32);

p=(0.077099875f0, -0.23652112f0, 0.2956514f0, -0.16753574f0, 0.006158593f0, 0.04718712f0, -0.021331023f0, -0.0035262543f0, 0.005461831f0, -0.00047858673f0, -0.0012763853f0, 0.00073386944f0, -0.00017831658f0, 1.7451624f-5)
return copysign(1f0-evalpoly(xabs-1.25f0,p),x)

elseif (xabs<4.0)
# range [2.5;4.0]
# erfc approximation with degree 13
# Sollya::remez(erfc(x+2.5),13,[0;4-2.5],1,1e-32);

p=(0.00040695202f0, -0.002178284f0, 0.0054457085f0, -0.008350053f0, 0.008622011f0, -0.006115167f0, 0.0027899458f0, -0.000519395f0, -0.00030461047f0, 0.00031068458f0, -0.00013866898f0, 3.6909692f-5, -5.682889f-6, 3.929763f-7)
return copysign(1f0-evalpoly(xabs-2.5f0,p),x)

elseif isnan(x)
return NaN32
else
# range [4.0,inf)
return copysign(1f0, x)
end
end

_erf(x::Float16)=Float16(_erf(Float32(x)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you wanted to do a Float16 impl, it should be easier than the others. Specifically, the domain is only to 2, and the accuracy required is much reduced.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

100% could wait for a followup PR.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking that too to be honest.
this and the poli regen.



function erf end
"""
erf(x, y)
Expand Down
38 changes: 37 additions & 1 deletion test/erf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,46 @@
@testset "real argument" begin
for T in (Float16, Float32, Float64)
@test @inferred(erf(T(1))) isa T
@test erf(T(0.25)) T(0.27632639016823696) rtol=2*eps(T)
@test erf(T(0.75)) T(0.7111556336535151) rtol=2*eps(T)
@test erf(T(1)) T(0.84270079294971486934) rtol=2*eps(T)
@test erf(T(1.5)) T(0.9661051464753108) rtol=2*eps(T)
@test erf(T(2.5)) T(0.9995930479825551) rtol=2*eps(T)
@test erf(T(3.5)) T(0.9999992569016276) rtol=2*eps(T)
@test erf(T(4.5)) T(0.9999999998033839) rtol=2*eps(T)
@test erf(T(6)) T(1.0) rtol=2*eps(T)

@test erf(T(-0.25)) T(-0.27632639016823696) rtol=2*eps(T)
@test erf(T(-0.75)) T(-0.7111556336535151) rtol=2*eps(T)
@test erf(T(-1)) T(-0.84270079294971486934) rtol=2*eps(T)
@test erf(T(-1.5)) T(-0.9661051464753108) rtol=2*eps(T)
@test erf(T(-2.5)) T(-0.9995930479825551) rtol=2*eps(T)
@test erf(T(-3.5)) T(-0.9999992569016276) rtol=2*eps(T)
@test erf(T(-4.5)) T(-0.9999999998033839) rtol=2*eps(T)
@test erf(T(-6)) T(-1.0) rtol=2*eps(T)

@test isnan(erf(T(NaN)))

@test @inferred(erfc(T(1))) isa T
@test erfc(T(1)) T(0.15729920705028513066) rtol=2*eps(T)
@test erfc(T(0.25)) T(0.7236736098317631) rtol=2*eps(T)
@test erfc(T(0.75)) T(0.28884436634648486) rtol=2*eps(T)
@test erfc(T(1.0)) T(0.15729920705028513) rtol=2*eps(T)
@test erfc(T(2.0)) T(0.004677734981047266) rtol=2*eps(T)
@test erfc(T(3.0)) T(2.209049699858544e-5) rtol=2*eps(T)
@test erfc(T(4.0)) T(1.541725790028002e-8) rtol=2*eps(T)
@test erfc(T(5.0)) T(1.537459794428035e-12) rtol=2*eps(T)
@test erfc(T(6.0)) T(2.1519736712498913e-17) rtol=2*eps(T)

@test erfc(T(-0.25)) T(1.276326390168237) rtol=2*eps(T)
@test erfc(T(-0.75)) T(1.7111556336535152) rtol=2*eps(T)
@test erfc(T(-1.0)) T(1.8427007929497148) rtol=2*eps(T)
@test erfc(T(-1.5)) T(1.9661051464753108) rtol=2*eps(T)
@test erfc(T(-2.5)) T(1.999593047982555) rtol=2*eps(T)
@test erfc(T(-3.5)) T(1.9999992569016276) rtol=2*eps(T)
@test erfc(T(-4.5)) T(1.999999999803384) rtol=2*eps(T)
@test erfc(T(-6.0)) T(2) rtol=2*eps(T)

@test isnan(erfc(T(NaN)))

@test @inferred(erfcx(T(1))) isa T
@test erfcx(T(1)) T(0.42758357615580700442) rtol=2*eps(T)
Expand Down