Merge branch 'master' into py3.8

p-wysocki · web-flow · commit 627511683cc0 · 2025-01-10T10:11:02.000+01:00
diff --git a/.github/workflows/cpp_gapi-demos.yml b/.github/workflows/cpp_gapi-demos.yml
@@ -40,7 +40,7 @@ jobs:
           rm -rf cache/opencv/.git/  # Minimize cache
           mkdir cache/opencv/build
           cd cache/opencv/build
-          cmake -DCMAKE_BUILD_TYPE=Release -DWITH_INF_ENGINE=y -DOpenVINO_DIR=$GITHUB_WORKSPACE/ov/runtime/cmake/ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_LINKER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_C_LINKER_LAUNCHER=ccache -DBUILD_TESTS=y -DVIDEOIO_ENABLE_PLUGINS=y -DBUILD_PERF_TESTS=n -DBUILD_EXAMPLES=n -DBUILD_opencv_apps=y -DWITH_OPENCL=n -DWITH_OPENCLAMDBLAS=n -DWITH_GSTREAMER=n -DWITH_V4L=ON -DWITH_LIBV4L=ON -DWITH_OPENCLAMDFFT=n -DWITH_VA=n -DWITH_VA_INTEL=n -DWITH_PROTOBUF=n -DBUILD_PROTOBUF=n -DBUILD_JAVA=n -DBUILD_opencv_java_bindings_generator=n -DBUILD_opencv_python2=n -DBUILD_opencv_python3=n -DWITH_IMGCODEC_HDR=y -DWITH_IMGCODEC_SUNRASTER=y -DWITH_IMGCODEC_PXM=y -DWITH_IMGCODEC_PFM=y -DWITH_PNG=y -DWITH_TIFF=n -DWITH_WEBP=n -DWITH_OPENJPEG=n -DWITH_JASPER=n -DWITH_OPENEXR=n -DBUILD_opencv_dnn=n -DBUILD_opencv_features2d=n -DBUILD_opencv_flann=n -DWITH_TBB=n -DBUILD_INFO_SKIP_EXTRA_MODULES=n -DBUILD_JASPER=n -DBUILD_PNG=n -DBUILD_OPENEXR=n -DBUILD_WEBP=n -DBUILD_ZLIB=n -DWITH_CUDA=n -DWITH_EIGEN=n -DWITH_GPHOTO2=n -DOPENCV_GAPI_GSTREAMER=n -DWITH_LAPACK=n -DWITH_MATLAB=n -DWITH_MFX=n -DWITH_QUIRC=n -DWITH_VTK=n -DINSTALL_PDB=n -DINSTALL_TESTS=n -DINSTALL_C_EXAMPLES=n -DINSTALL_PYTHON_EXAMPLES=n -DOPENCV_GENERATE_SETUPVARS=n -DWITH_1394=n -DWITH_FFMPEG=y -DWITH_GTK_2_X=y -DBUILD_JPEG=y -DWITH_IPP=y -DENABLE_CONFIG_VERIFICATION=y -DBUILD_LIST=core,gapi,highgui,imgcodecs,imgproc,videoio,video ..
+          cmake -DCMAKE_BUILD_TYPE=Release -DWITH_INF_ENGINE=y -DOpenVINO_DIR=$GITHUB_WORKSPACE/ov/runtime/cmake/ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_LINKER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_C_LINKER_LAUNCHER=ccache -DBUILD_TESTS=y -DVIDEOIO_ENABLE_PLUGINS=y -DBUILD_PERF_TESTS=n -DBUILD_EXAMPLES=n -DBUILD_opencv_apps=y -DWITH_OPENCL=n -DWITH_OPENCLAMDBLAS=n -DWITH_GSTREAMER=n -DWITH_V4L=ON -DWITH_LIBV4L=ON -DWITH_OPENCLAMDFFT=n -DWITH_VA=n -DWITH_VA_INTEL=n -DWITH_PROTOBUF=n -DBUILD_PROTOBUF=n -DBUILD_JAVA=n -DBUILD_opencv_java_bindings_generator=n -DBUILD_opencv_python2=n -DBUILD_opencv_python3=n -DWITH_IMGCODEC_HDR=y -DWITH_IMGCODEC_SUNRASTER=y -DWITH_IMGCODEC_PXM=y -DWITH_IMGCODEC_PFM=y -DWITH_PNG=y -DWITH_TIFF=n -DWITH_WEBP=n -DWITH_OPENJPEG=n -DWITH_JASPER=n -DWITH_OPENEXR=n -DBUILD_opencv_dnn=n -DBUILD_opencv_features2d=n -DBUILD_opencv_flann=n -DWITH_TBB=n -DBUILD_INFO_SKIP_EXTRA_MODULES=n -DBUILD_JASPER=n -DBUILD_PNG=n -DBUILD_OPENEXR=n -DBUILD_WEBP=n -DBUILD_ZLIB=n -DWITH_CUDA=n -DWITH_EIGEN=n -DWITH_GPHOTO2=n -DOPENCV_GAPI_GSTREAMER=n -DWITH_LAPACK=n -DWITH_MATLAB=n -DWITH_MFX=n -DWITH_QUIRC=n -DWITH_VTK=n -DINSTALL_PDB=n -DINSTALL_TESTS=n -DINSTALL_C_EXAMPLES=n -DINSTALL_PYTHON_EXAMPLES=n -DOPENCV_GENERATE_SETUPVARS=n -DWITH_1394=n -DWITH_FFMPEG=y -DWITH_GTK_2_X=y -DBUILD_JPEG=y -DWITH_IPP=y -DWITH_AVIF=n -DENABLE_CONFIG_VERIFICATION=y -DBUILD_LIST=core,gapi,highgui,imgcodecs,imgproc,videoio,video ..
           cmake --build . -j $((`nproc`*2+2))
       - name: build_demos.sh
         run: |
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -0,0 +1,22 @@
+#!groovy
+
+properties([
+    parameters([
+        booleanParam(defaultValue: false,
+                     description: 'Cancel the rest of parallel stages if one of them fails and return status immediately',
+                     name: 'failFast'),
+        booleanParam(defaultValue: true,
+                     description: 'Whether to propagate commit status to GitHub',
+                     name: 'propagateStatus'),
+        string(defaultValue: '',
+               description: 'Pipeline shared library version (branch/tag/commit). Determined automatically if empty',
+               name: 'library_version'),
+        string(defaultValue: '',
+               description: 'Docker tag to take images with. Determined automatically if empty',
+               name: 'docker_tag')
+    ])
+])
+
+loadOpenVinoLibrary {
+    entrypoint(this)
+}
diff --git a/demos/common/cpp/utils/src/config_factory.cpp b/demos/common/cpp/utils/src/config_factory.cpp
@@ -48,7 +48,7 @@ ModelConfig ConfigFactory::getUserConfig(const std::string& flags_d,
             if (flags_nthreads != 0)
                 config.compiledModelConfig.emplace(ov::inference_num_threads.name(), flags_nthreads);
 
-            config.compiledModelConfig.emplace(ov::affinity.name(), ov::Affinity::NONE);
+            config.compiledModelConfig.emplace(ov::hint::enable_cpu_pinning.name(), false);
 
             ov::streams::Num nstreams =
                 deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO;
diff --git a/demos/multi_channel_common/cpp/graph.hpp b/demos/multi_channel_common/cpp/graph.hpp
@@ -29,7 +29,7 @@ static inline size_t roundUp(size_t enumerator, size_t denominator) {
 
 static inline std::queue<ov::InferRequest> compile(std::shared_ptr<ov::Model>&& model, const std::string& modelPath,
         const std::string& device, size_t performanceHintNumRequests, ov::Core& core) {
-    core.set_property("CPU", ov::affinity(ov::Affinity::NONE));
+    core.set_property("CPU", ov::hint::enable_cpu_pinning(false));
     ov::CompiledModel compiled = core.compile_model(model, device, {
         {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
         {ov::hint::num_requests(performanceHintNumRequests)}});
diff --git a/demos/security_barrier_camera_demo/cpp/main.cpp b/demos/security_barrier_camera_demo/cpp/main.cpp
@@ -718,7 +718,7 @@ int main(int argc, char* argv[]) {
                 if (FLAGS_nthreads != 0) {
                     core.set_property("CPU", ov::inference_num_threads(FLAGS_nthreads));
                 }
-                core.set_property("CPU", ov::affinity(ov::Affinity::NONE));
+                core.set_property("CPU", ov::hint::enable_cpu_pinning(false));
                 core.set_property("CPU", ov::streams::num((device_nstreams.count("CPU") > 0 ? ov::streams::Num(device_nstreams["CPU"]) : ov::streams::AUTO)));
 
                 device_nstreams["CPU"] = core.get_property("CPU", ov::streams::num);
diff --git a/demos/social_distance_demo/cpp/main.cpp b/demos/social_distance_demo/cpp/main.cpp
@@ -695,7 +695,7 @@ int main(int argc, char* argv[]) {
                 if (FLAGS_nthreads != 0) {
                     core.set_property("CPU", ov::inference_num_threads(FLAGS_nthreads));
                 }
-                core.set_property("CPU", ov::affinity(ov::Affinity::NONE));
+                core.set_property("CPU", ov::hint::enable_cpu_pinning(false));
                 core.set_property("CPU", ov::streams::num((deviceNStreams.count("CPU") > 0 ? ov::streams::Num(deviceNStreams["CPU"]) : ov::streams::AUTO)));
                 deviceNStreams["CPU"] = core.get_property("CPU", ov::streams::num);
             }
diff --git a/tools/accuracy_checker/accuracy_checker/evaluators/custom_evaluators/whisper_evaluator.py b/tools/accuracy_checker/accuracy_checker/evaluators/custom_evaluators/whisper_evaluator.py
@@ -0,0 +1,186 @@
+"""
+Copyright (c) 2024 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import re
+
+from ...representation import CharacterRecognitionPrediction
+from ...utils import UnsupportedPackage, extract_image_representations
+from .base_custom_evaluator import BaseCustomEvaluator
+
+try:
+    from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
+except ImportError as import_err:
+    AutoModelForSpeechSeq2Seq = UnsupportedPackage("transformers", import_err.msg)
+    AutoProcessor = UnsupportedPackage("transformers", import_err.msg)
+
+try:
+    from transformers.pipelines.automatic_speech_recognition import AutomaticSpeechRecognitionPipeline
+except ImportError as import_err:
+    AutomaticSpeechRecognitionPipeline = UnsupportedPackage("transformers", import_err.msg)
+
+try:
+    import inflect
+except ImportError as import_err:
+    inflect = UnsupportedPackage("inflect", import_err.msg)
+
+
+class WhisperEvaluator(BaseCustomEvaluator):
+    VALID_PIPELINE_CLASSES = [
+        "GenAIWhisperPipeline",
+        "HFWhisperPipeline",
+        "OptimumWhisperPipeline"
+    ]
+
+    def __init__(self, dataset_config, pipe, orig_config):
+        super().__init__(dataset_config, None, orig_config)
+        self.pipe = pipe
+        if hasattr(self.pipe, "adapter"):
+            self.adapter_type = self.pipe.adapter.__provider__
+
+    @classmethod
+    def from_configs(cls, config, delayed_model_loading=False, orig_config=None):
+        dataset_config = config["datasets"]
+        pipeline_class_name = config["pipeline_class"]
+        if 'device' in config['launchers'][0]:
+            config["_device"] = config['launchers'][0]['device']
+
+        if pipeline_class_name not in cls.VALID_PIPELINE_CLASSES:
+            raise ValueError(f"Invalid pipeline class name: {pipeline_class_name}. "
+                             f"Must be one of {cls.VALID_PIPELINE_CLASSES}")
+
+        pipeline_class = globals()[pipeline_class_name]
+        pipe = pipeline_class(config)
+        return cls(dataset_config, pipe, orig_config)
+
+    def _process(self, output_callback, calculate_metrics, progress_reporter, metric_config, csv_file):
+        for batch_id, (batch_input_ids, batch_annotation, batch_inputs, batch_identifiers) in enumerate(self.dataset):
+            batch_inputs = self.preprocessor.process(batch_inputs, batch_annotation)
+            batch_inputs_extr, batch_meta = extract_image_representations(batch_inputs)
+
+            batch_raw_prediction, batch_prediction = self.pipe.predict(
+                batch_identifiers, batch_inputs_extr, batch_meta
+            )
+            metrics_result = self._get_metrics_result(batch_input_ids, batch_annotation, batch_prediction,
+                                                      calculate_metrics)
+            if output_callback:
+                output_callback(batch_raw_prediction[0], metrics_result=metrics_result,
+                                element_identifiers=batch_identifiers, dataset_indices=batch_input_ids)
+            self._update_progress(progress_reporter, metric_config, batch_id, len(batch_prediction), csv_file)
+
+    def release(self):
+        pass
+
+
+def normalize_transcription(engine, text):
+    # Convert numbers to words
+    tokens = (engine.number_to_words(token) if token.isdigit() else token for token in text.split())
+    # Remove punctuation except for apostrophes that are in the middle of words
+    text = re.sub(r"\b'\b|[^\w\s]", "", " ".join(tokens))
+    # Remove leading, trailing, and multiple consecutive spaces, and convert to uppercase
+    return " ".join(text.upper().split())
+
+
+class WhisperPipeline:
+    def __init__(self, config):
+        self.engine = inflect.engine()
+        self.pipeline = self._initialize_pipeline(config)
+
+    def _initialize_pipeline(self, config):
+        raise NotImplementedError
+
+    def _get_predictions(self, data, identifiers, input_meta):
+        raise NotImplementedError
+
+    def predict(self, identifiers, input_data, input_meta, encoder_callback=None):
+        predictions = []
+        outputs = []
+        for data in input_data:
+            transcription = self._get_predictions(data, identifiers, input_meta)
+            prediction_text = normalize_transcription(self.engine, transcription)
+            predictions.append(prediction_text)
+            outputs.append(CharacterRecognitionPrediction(identifiers[0], predictions[0]))
+        return [], outputs
+
+
+class GenAIWhisperPipeline(WhisperPipeline):
+    def _initialize_pipeline(self, config):
+        try:
+            import openvino_genai as ov_genai  # pylint: disable=C0415
+        except ImportError as import_error:
+            UnsupportedPackage("openvino_genai", import_error.msg).raise_error(self.__class__.__name__)
+
+        model_dir = config.get("_models", [None])[0]
+        device = config.get("_device", "CPU")
+        pipeline = ov_genai.WhisperPipeline(str(model_dir), device=device)
+        return pipeline
+
+    def _get_predictions(self, data, identifiers, input_meta):
+        return self.pipeline.generate(data[0], return_timestamps=True).texts[0]
+
+
+class HFWhisperPipeline(WhisperPipeline):
+    def _initialize_pipeline(self, config):
+        try:
+            import torch  # pylint: disable=C0415
+        except ImportError as import_error:
+            UnsupportedPackage("torch", import_error.msg).raise_error(self.__class__.__name__)
+
+        model_id = config.get("model_id")
+        device = "cpu"
+        torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+        model = AutoModelForSpeechSeq2Seq.from_pretrained(
+            model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True
+        ).to(device)
+
+        processor = AutoProcessor.from_pretrained(model_id)
+
+        pipeline = AutomaticSpeechRecognitionPipeline(
+            model=model,
+            tokenizer=processor.tokenizer,
+            feature_extractor=processor.feature_extractor,
+            torch_dtype=torch_dtype,
+            device=device,
+        )
+        return pipeline
+
+    def _get_predictions(self, data, identifiers, input_meta):
+        sampling_rate = input_meta[0].get("sample_rate")
+        sample = {"path": identifiers[0], "array": data[0], "sampling_rate": sampling_rate}
+        return self.pipeline(sample, return_timestamps=True)["text"]
+
+
+class OptimumWhisperPipeline(WhisperPipeline):
+    def _initialize_pipeline(self, config):
+        try:
+            from optimum.intel.openvino import OVModelForSpeechSeq2Seq  # pylint: disable=C0415
+        except ImportError as import_error:
+            UnsupportedPackage("optimum.intel.openvino", import_error.msg).raise_error(self.__class__.__name__)
+
+        device = config.get("_device", "CPU")
+        model_dir = config.get("_models", [None])[0]
+        ov_model = OVModelForSpeechSeq2Seq.from_pretrained(str(model_dir)).to(device)
+        ov_processor = AutoProcessor.from_pretrained(str(model_dir))
+
+        pipeline = AutomaticSpeechRecognitionPipeline(
+            model=ov_model,
+            tokenizer=ov_processor.tokenizer,
+            feature_extractor=ov_processor.feature_extractor
+        )
+        return pipeline
+
+    def _get_predictions(self, data, identifiers, input_meta):
+        sampling_rate = input_meta[0].get("sample_rate")
+        sample = {"path": identifiers[0], "array": data[0], "sampling_rate": sampling_rate}
+        return self.pipeline(sample, return_timestamps=True)["text"]
diff --git a/tools/accuracy_checker/requirements-extra.in b/tools/accuracy_checker/requirements-extra.in
@@ -48,3 +48,6 @@ lmdb>=1.2.1
 
 # pandas datasets support
 pandas>=1.1.5,<2.1
+
+# word-based representations of numbers
+inflect>=7.4.0
diff --git a/tools/accuracy_checker/requirements-test.in b/tools/accuracy_checker/requirements-test.in
@@ -7,3 +7,4 @@ pytest-mock~=2.0
 # will not include atomicwrites and thus will not work on Windows.
 # So as a workaround, make the atomicwrites dependency unconditional.
 atomicwrites
+datasets
diff --git a/tools/accuracy_checker/tests/test_whisper_evaluator.py b/tools/accuracy_checker/tests/test_whisper_evaluator.py
@@ -0,0 +1,92 @@
+"""
+Copyright (c) 2024-2025 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+from accuracy_checker.evaluators.custom_evaluators.whisper_evaluator import (
+    GenAIWhisperPipeline, HFWhisperPipeline, OptimumWhisperPipeline,
+    WhisperEvaluator)
+from datasets import load_dataset
+
+AutoProcessor = pytest.importorskip("transformers", reason="transformers is not available").AutoProcessor
+AutoTokenizer = pytest.importorskip("transformers", reason="transformers is not available").AutoTokenizer
+export_tokenizer = pytest.importorskip("optimum.exporters.openvino.convert", reason="optimum.exporters.openvino.convert is not available").export_tokenizer
+OVModelForSpeechSeq2Seq = pytest.importorskip("optimum.intel.openvino", reason="optimum.intel.openvino is not available").OVModelForSpeechSeq2Seq
+
+
+model_id = "openai/whisper-tiny"
+model_dir = Path("/tmp/whisper-tiny")
+
+def setup_module(module):
+    # Setup code here
+    global input_data, input_meta, identifiers
+
+    # Load a single sample from the dataset
+    dataset = load_dataset("openslr/librispeech_asr", "clean", split="validation", streaming=True, trust_remote_code=True)
+    sample = next(iter(dataset))
+    input_data = [sample["audio"]["array"]]
+    input_meta = [{"sample_rate": sample["audio"]["sampling_rate"]}]
+    identifiers = [sample["id"]]
+
+def teardown_module(module):
+    # Cleanup code here
+    if model_dir.exists():
+        for item in model_dir.iterdir():
+            if item.is_file():
+                item.unlink()
+        model_dir.rmdir()
+
+def test_optimum_convert_model_to_ir():
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    processor = AutoProcessor.from_pretrained(model_id)
+    base_model = OVModelForSpeechSeq2Seq.from_pretrained(model_id)
+
+    model_dir.mkdir(parents=True, exist_ok=True)
+    base_model.save_pretrained(model_dir)
+    tokenizer.save_pretrained(model_dir)
+    processor.save_pretrained(model_dir)
+    export_tokenizer(tokenizer, model_dir)
+
+    assert base_model.__class__.__module__.startswith('optimum.intel.openvino')
+
+class TestWhisperEvaluator:
+    def test_hf_whisper_pipeline(self):
+        config = {"model_id": model_id}
+        pipeline = HFWhisperPipeline(config)
+        evaluator = WhisperEvaluator(None, pipeline, None)
+
+        result = evaluator.pipe._get_predictions(input_data, identifiers, input_meta)
+        assert isinstance(result, str)
+
+    @pytest.mark.dependency(depends=["test_optimum_convert_model_to_ir"])
+    def test_genai_whisper_pipeline(self):
+        config = {"_models": [model_dir], "_device": "CPU"}
+        pipeline = GenAIWhisperPipeline(config)
+        evaluator = WhisperEvaluator(None, pipeline, None)
+
+        result = evaluator.pipe._get_predictions(input_data, identifiers, input_meta)
+        assert isinstance(result, str)
+
+    @pytest.mark.dependency(depends=["test_optimum_convert_model_to_ir"])
+    def test_optimum_whisper_pipeline(self):
+        config = {"_models": [model_dir], "_device": "CPU"}
+        pipeline = OptimumWhisperPipeline(config)
+        evaluator = WhisperEvaluator(None, pipeline, None)
+
+        result = evaluator.pipe._get_predictions(input_data, identifiers, input_meta)
+        assert isinstance(result, str)

Original file line number	Diff line number	Diff line change
`@@ -718,7 +718,7 @@ int main(int argc, char* argv[]) {`
`718`	`718`	`if (FLAGS_nthreads != 0) {`
`719`	`719`	`core.set_property("CPU", ov::inference_num_threads(FLAGS_nthreads));`
`720`	`720`	`}`
`721`		`- core.set_property("CPU", ov::affinity(ov::Affinity::NONE));`
	`721`	`+ core.set_property("CPU", ov::hint::enable_cpu_pinning(false));`
`722`	`722`	`core.set_property("CPU", ov::streams::num((device_nstreams.count("CPU") > 0 ? ov::streams::Num(device_nstreams["CPU"]) : ov::streams::AUTO)));`
`723`	`723`
`724`	`724`	`device_nstreams["CPU"] = core.get_property("CPU", ov::streams::num);`
Original file line number	Diff line number	Diff line change
`@@ -695,7 +695,7 @@ int main(int argc, char* argv[]) {`
`695`	`695`	`if (FLAGS_nthreads != 0) {`
`696`	`696`	`core.set_property("CPU", ov::inference_num_threads(FLAGS_nthreads));`
`697`	`697`	`}`
`698`		`- core.set_property("CPU", ov::affinity(ov::Affinity::NONE));`
	`698`	`+ core.set_property("CPU", ov::hint::enable_cpu_pinning(false));`
`699`	`699`	`core.set_property("CPU", ov::streams::num((deviceNStreams.count("CPU") > 0 ? ov::streams::Num(deviceNStreams["CPU"]) : ov::streams::AUTO)));`
`700`	`700`	`deviceNStreams["CPU"] = core.get_property("CPU", ov::streams::num);`
`701`	`701`	`}`