diff --git a/onnxruntime/python/tools/quantization/registry.py b/onnxruntime/python/tools/quantization/registry.py index 319c5aa468f7e..e7274b14fe0f5 100644 --- a/onnxruntime/python/tools/quantization/registry.py +++ b/onnxruntime/python/tools/quantization/registry.py @@ -87,6 +87,7 @@ "LayerNormalization": QDQNormalization, "BatchNormalization": QDQNormalization, "TopK": QDQDirect8BitOp, + "CumSum": QDQDirect8BitOp, } diff --git a/onnxruntime/python/tools/quantization/static_quantize_runner.py b/onnxruntime/python/tools/quantization/static_quantize_runner.py index d222ba5b59ac1..53ad2d8bfcf15 100644 --- a/onnxruntime/python/tools/quantization/static_quantize_runner.py +++ b/onnxruntime/python/tools/quantization/static_quantize_runner.py @@ -20,7 +20,7 @@ def __init__(self, model_path): name2tensors = [] for data_dir in data_dirs: name2tensor = {} - data_paths = [os.path.join(data_dir, a) for a in sorted(os.listdir(data_dir))] + data_paths = [os.path.join(data_dir, f"input_{input_idx}.pb") for input_idx in range(len(model_inputs))] data_ndarrays = [self.read_onnx_pb_data(data_path) for data_path in data_paths] for model_input, data_ndarray in zip(model_inputs, data_ndarrays, strict=False): name2tensor[model_input.name] = data_ndarray