@@ -438,7 +438,8 @@ class Tokenizer::TokenizerImpl {
438438 set_state_if_necessary (infer_request_guard, tokenization_params);
439439 size_t batch_size = 1 ;
440440 infer_request_guard.get ().set_input_tensor (ov::Tensor{ov::element::string, {batch_size}, &prompt});
441- infer_request_guard.get ().infer ();
441+ infer_request_guard.get ().start_async ();
442+ infer_request_guard.get ().wait ();
442443
443444 return get_copied_results (
444445 infer_request_guard.get ().get_tensor (" input_ids" ),
@@ -456,7 +457,8 @@ class Tokenizer::TokenizerImpl {
456457 set_state_if_necessary (infer_request_guard, tokenization_params);
457458 infer_request_guard.get ().set_input_tensor (ov::Tensor{ov::element::string, {prompts.size ()}, prompts.data ()});
458459 auto size_ = infer_request_guard.get ().get_input_tensor ().get_shape ();
459- infer_request_guard.get ().infer ();
460+ infer_request_guard.get ().start_async ();
461+ infer_request_guard.get ().wait ();
460462
461463 unpadded = get_copied_results (
462464 infer_request_guard.get ().get_tensor (" input_ids" ),
@@ -483,7 +485,8 @@ class Tokenizer::TokenizerImpl {
483485 set_state_if_necessary (infer_request_guard, detokenization_params);
484486 size_t batch_size = 1 ;
485487 infer_request_guard.get ().set_input_tensor (ov::Tensor{ov::element::i64 , {batch_size, tokens.size ()}, tokens.data ()});
486- infer_request_guard.get ().infer ();
488+ infer_request_guard.get ().start_async ();
489+ infer_request_guard.get ().wait ();
487490 return infer_request_guard.get ().get_output_tensor ().data <std::string>()[0 ];
488491 }
489492
@@ -495,7 +498,8 @@ class Tokenizer::TokenizerImpl {
495498 CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard (this ->m_ireq_queue_detokenizer .get ());
496499 set_state_if_necessary (infer_request_guard, detokenization_params);
497500 infer_request_guard.get ().set_input_tensor (tokens);
498- infer_request_guard.get ().infer ();
501+ infer_request_guard.get ().start_async ();
502+ infer_request_guard.get ().wait ();
499503
500504 auto res = infer_request_guard.get ().get_output_tensor ();
501505 auto res_data = res.data <std::string>();
@@ -523,7 +527,8 @@ class Tokenizer::TokenizerImpl {
523527 CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard (this ->m_ireq_queue_detokenizer .get ());
524528 set_state_if_necessary (infer_request_guard, detokenization_params);
525529 infer_request_guard.get ().set_input_tensor (tokens);
526- infer_request_guard.get ().infer ();
530+ infer_request_guard.get ().start_async ();
531+ infer_request_guard.get ().wait ();
527532 auto res = infer_request_guard.get ().get_output_tensor ();
528533 auto res_data = res.data <std::string>();
529534 return std::vector<std::string>(res_data, res_data + res.get_shape ()[0 ]);
0 commit comments