Use INT8 input for quantized models (#201)

anmarques · anmarques · commit 92bfd817377f · 2023-04-19T16:38:57.000Z
* Added call to function that skips the quantization of the input if the model is quantized

* Pass model path as string
diff --git a/utils/neuralmagic/utils.py b/utils/neuralmagic/utils.py
@@ -7,6 +7,7 @@
 import torch
 import yaml
 from sparseml.pytorch.optim import ScheduledModifierManager
+from sparseml.pytorch.sparsification.quantization import skip_onnx_input_quantize
 from sparseml.pytorch.utils import ModuleExporter, download_framework_model_by_recipe_type
 from sparseml.onnx.utils import override_model_input_shape
 from sparsezoo import Model
@@ -214,6 +215,11 @@ def neuralmagic_onnx_export(
 
     saved_model_path = save_dir / onnx_file_name
 
+    try:
+        skip_onnx_input_quantize(str(saved_model_path), str(saved_model_path))
+    except Exception:
+        pass
+
     # set model input shape to a static shape (graph is still dynamic compatible)
     # for performance with deepsparse engine + extractable shape for analysis
     sample_data_shape = list(sample_data.shape)