Skip to content

Commit 84ba78a

Browse files
QuantizeLinear(opset 23): align type constraints with spec (separate T1/T2/T3)
1 parent 22e6cd6 commit 84ba78a

File tree

1 file changed

+32
-11
lines changed

1 file changed

+32
-11
lines changed

onnxruntime/core/providers/cpu/quantization/quantize_linear.cc

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,25 @@ Status DequantizeLinear<T>::Compute(OpKernelContext* ctx) const {
601601
.TypeConstraint("T3", DataTypeImpl::GetTensorType<T>()), \
602602
QuantizeLinear<T>);
603603

604+
// Opset 23 — תואם שמות הסכמה: T1=x, T2=y_scale, T3=y/y_zero_point
605+
#define REGISTER_QUANTIZELINEAR_OPSET23(T) \
606+
ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( \
607+
QuantizeLinear, \
608+
23, \
609+
23, \
610+
T, \
611+
KernelDefBuilder() \
612+
/* T1: x */ \
613+
.TypeConstraint("T1", {DataTypeImpl::GetTensorType<float>(), \
614+
DataTypeImpl::GetTensorType<MLFloat16>()}) \
615+
/* T2: y_scale */ \
616+
.TypeConstraint("T2", {DataTypeImpl::GetTensorType<float>(), \
617+
DataTypeImpl::GetTensorType<MLFloat16>()}) \
618+
/* T3: y / y_zero_point == סוג הפלט */ \
619+
.TypeConstraint("T3", DataTypeImpl::GetTensorType<T>()), \
620+
QuantizeLinear<T>)
621+
622+
604623
#define REGISTER_QUANTIZELINEAR_VERSIONED(T, start_version, end_version) \
605624
ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( \
606625
QuantizeLinear, \
@@ -648,20 +667,22 @@ REGISTER_QUANTIZELINEAR(Float8E5M2)
648667
REGISTER_QUANTIZELINEAR(Float8E5M2FNUZ)
649668
#endif
650669

651-
// Opset 23 added support for float4e2m1.
652-
REGISTER_QUANTIZELINEAR_VERSIONED(int8_t, 23, 23)
653-
REGISTER_QUANTIZELINEAR_VERSIONED(uint8_t, 23, 23)
654-
REGISTER_QUANTIZELINEAR_VERSIONED(int16_t, 23, 23)
655-
REGISTER_QUANTIZELINEAR_VERSIONED(uint16_t, 23, 23)
656-
REGISTER_QUANTIZELINEAR_VERSIONED(Int4x2, 23, 23)
657-
REGISTER_QUANTIZELINEAR_VERSIONED(UInt4x2, 23, 23)
670+
671+
// Opset 23
672+
REGISTER_QUANTIZELINEAR_OPSET23(int8_t)
673+
REGISTER_QUANTIZELINEAR_OPSET23(uint8_t)
674+
REGISTER_QUANTIZELINEAR_OPSET23(int16_t)
675+
REGISTER_QUANTIZELINEAR_OPSET23(uint16_t)
676+
REGISTER_QUANTIZELINEAR_OPSET23(Int4x2)
677+
REGISTER_QUANTIZELINEAR_OPSET23(UInt4x2)
658678
#if !defined(DISABLE_FLOAT8_TYPES)
659-
REGISTER_QUANTIZELINEAR_VERSIONED(Float8E4M3FN, 23, 23)
660-
REGISTER_QUANTIZELINEAR_VERSIONED(Float8E4M3FNUZ, 23, 23)
661-
REGISTER_QUANTIZELINEAR_VERSIONED(Float8E5M2, 23, 23)
662-
REGISTER_QUANTIZELINEAR_VERSIONED(Float8E5M2FNUZ, 23, 23)
679+
REGISTER_QUANTIZELINEAR_OPSET23(Float8E4M3FN)
680+
REGISTER_QUANTIZELINEAR_OPSET23(Float8E4M3FNUZ)
681+
REGISTER_QUANTIZELINEAR_OPSET23(Float8E5M2)
682+
REGISTER_QUANTIZELINEAR_OPSET23(Float8E5M2FNUZ)
663683
#endif
664684

685+
665686
// Opset 21 added 16-bit and 4-bit int support to Q ops.
666687
// TODO(adrianlizarraga): Support int4 and block quantization.
667688
REGISTER_QUANTIZELINEAR_VERSIONED(int8_t, 21, 22)

0 commit comments

Comments
 (0)