openvinotoolkit · Retribution98 · Nov 13, 2025 · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025
diff --git a/samples/js/text_generation/benchmark_genai.js b/samples/js/text_generation/benchmark_genai.js
@@ -90,6 +90,10 @@ async function main() {
     pipe = await LLMPipeline(modelsPath, device, { schedulerConfig: schedulerConfig });
   }
 
+  const inputData = await pipe.getTokenizer().encode(prompt);
+  const promptTokenSize = inputData.input_ids.shape[1];
+  console.log(`Prompt token size: ${promptTokenSize}`);
+
   for (let i = 0; i < numWarmup; i++) {
     await pipe.generate(prompt, config);
   }

diff --git a/src/js/include/addon.hpp b/src/js/include/addon.hpp
@@ -12,6 +12,7 @@ struct AddonData {
     Napi::FunctionReference tokenizer;
     Napi::FunctionReference perf_metrics;
     Napi::FunctionReference chat_history;
+    Napi::ObjectReference openvino_addon;
 };
 
 void init_class(Napi::Env env,

diff --git a/src/js/include/helper.hpp b/src/js/include/helper.hpp
@@ -37,9 +37,13 @@ ov::AnyMap js_to_cpp<ov::AnyMap>(const Napi::Env& env, const Napi::Value& value)
 /** @brief  A template specialization for TargetType std::string */
 template <>
 std::string js_to_cpp<std::string>(const Napi::Env& env, const Napi::Value& value);
+template <>
+int64_t js_to_cpp<int64_t>(const Napi::Env& env, const Napi::Value& value);
 /** @brief  A template specialization for TargetType std::vector<std::string> */
 template <>
 std::vector<std::string> js_to_cpp<std::vector<std::string>>(const Napi::Env& env, const Napi::Value& value);
+template <>
+std::vector<int64_t> js_to_cpp<std::vector<int64_t>>(const Napi::Env& env, const Napi::Value& value);
 /** @brief  A template specialization for TargetType GenerateInputs */
 template <>
 GenerateInputs js_to_cpp<GenerateInputs>(const Napi::Env& env, const Napi::Value& value);
@@ -58,6 +62,8 @@ ov::genai::StructuredOutputConfig::Tag js_to_cpp<ov::genai::StructuredOutputConf
 /** @brief  A template specialization for TargetType ov::genai::StructuredOutputConfig::StructuralTag */
 template <>
 ov::genai::StructuredOutputConfig::StructuralTag js_to_cpp<ov::genai::StructuredOutputConfig::StructuralTag>(const Napi::Env& env, const Napi::Value& value);
+template <>
+ov::Tensor js_to_cpp<ov::Tensor>(const Napi::Env& env, const Napi::Value& value);
 /**
  * @brief  Unwraps a C++ object from a JavaScript wrapper.
  * @tparam TargetType The C++ class type to extract.
@@ -110,6 +116,12 @@ Napi::Value cpp_to_js<std::vector<size_t>, Napi::Value>(const Napi::Env& env, co
 
 template <>
 Napi::Value cpp_to_js<ov::genai::JsonContainer, Napi::Value>(const Napi::Env& env, const ov::genai::JsonContainer& json_container);
+
+template <>
+Napi::Value cpp_to_js<ov::Tensor, Napi::Value>(const Napi::Env& env, const ov::Tensor& tensor);
+
+template <>
+Napi::Value cpp_to_js<ov::genai::TokenizedInputs, Napi::Value>(const Napi::Env& env, const ov::genai::TokenizedInputs& tokenized_inputs);
 /**
  * @brief  Template function to convert C++ map into Javascript Object. Map key must be std::string.
  * @tparam MapElementType C++ data type of map elements.
@@ -130,3 +142,5 @@ bool is_chat_history(const Napi::Env& env, const Napi::Value& value);
 std::string json_stringify(const Napi::Env& env, const Napi::Value& value);
 
 Napi::Value json_parse(const Napi::Env& env, const std::string& value);
+
+Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::string& ctor_name);
diff --git a/src/js/include/tokenizer.hpp b/src/js/include/tokenizer.hpp
@@ -15,6 +15,12 @@ class TokenizerWrapper : public Napi::ObjectWrap<TokenizerWrapper> {
     Napi::Value get_eos_token_id(const Napi::CallbackInfo& info);
     Napi::Value get_pad_token(const Napi::CallbackInfo& info);
     Napi::Value get_pad_token_id(const Napi::CallbackInfo& info);
+    Napi::Value get_chat_template(const Napi::CallbackInfo& info);
+    Napi::Value get_original_chat_template(const Napi::CallbackInfo& info);
+    Napi::Value set_chat_template(const Napi::CallbackInfo& info);
+    Napi::Value supports_paired_input(const Napi::CallbackInfo& info);
+    Napi::Value encode(const Napi::CallbackInfo& info);
+    Napi::Value decode(const Napi::CallbackInfo& info);
 private:
     ov::genai::Tokenizer _tokenizer;
 };
diff --git a/src/js/lib/addon.ts b/src/js/lib/addon.ts
@@ -2,6 +2,7 @@ import { createRequire } from "module";
 import { platform } from "node:os";
 import { join, dirname, resolve } from "node:path";
 import type { ChatHistory as IChatHistory } from "./chatHistory.js";
+import { addon as ovAddon } from "openvino-node";
 
 export type EmbeddingResult = Float32Array | Int8Array | Uint8Array;
 export type EmbeddingResults = Float32Array[] | Int8Array[] | Uint8Array[];
@@ -60,6 +61,7 @@ interface OpenVINOGenAIAddon {
   TextEmbeddingPipeline: TextEmbeddingPipelineWrapper;
   LLMPipeline: any;
   ChatHistory: IChatHistory;
+  setOpenvinoAddon: (ovAddon: any) => void;
 }
 
 // We need to use delayed import to get an updated Path if required
@@ -78,7 +80,7 @@ function getGenAIAddon(): OpenVINOGenAIAddon {
 }
 
 const addon = getGenAIAddon();
+addon.setOpenvinoAddon(ovAddon);
 
-export const { ChatHistory } = addon;
+export const { TextEmbeddingPipeline, LLMPipeline, ChatHistory } = addon;
 export type ChatHistory = IChatHistory;
-export default addon;
diff --git a/src/js/lib/index.ts b/src/js/lib/index.ts
@@ -40,3 +40,4 @@ export const { LLMPipeline, TextEmbeddingPipeline } = PipelineFactory;
 export { DecodedResults } from "./pipelines/llmPipeline.js";
 export * from "./utils.js";
 export * from "./addon.js";
+export * from "./tokenizer.js";
diff --git a/src/js/lib/pipelines/llmPipeline.ts b/src/js/lib/pipelines/llmPipeline.ts
@@ -1,30 +1,14 @@
 import util from "node:util";
-import addon, { ChatHistory } from "../addon.js";
+import { ChatHistory, LLMPipeline as LLMPipelineWrap } from "../addon.js";
 import { GenerationConfig, StreamingStatus, LLMPipelineProperties } from "../utils.js";
+import { Tokenizer } from "../tokenizer.js";
 
 export type ResolveFunction = (arg: { value: string; done: boolean }) => void;
 export type Options = {
   disableStreamer?: boolean;
   max_new_tokens?: number;
 };
 
-interface Tokenizer {
-  /** Applies a chat template to format chat history into a prompt string. */
-  applyChatTemplate(
-    chatHistory: Record<string, any>[] | ChatHistory,
-    addGenerationPrompt: boolean,
-    chatTemplate?: string,
-    tools?: Record<string, any>[],
-    extraContext?: Record<string, any>,
-  ): string;
-  getBosToken(): string;
-  getBosTokenId(): number;
-  getEosToken(): string;
-  getEosTokenId(): number;
-  getPadToken(): string;
-  getPadTokenId(): number;
-}
-
 /** Structure with raw performance metrics for each generation before any statistics are calculated. */
 export type RawMetrics = {
   /** Durations for each generate call in milliseconds. */
@@ -167,7 +151,7 @@ export class LLMPipeline {
   async init() {
     if (this.isInitialized) throw new Error("LLMPipeline is already initialized");
 
-    this.pipeline = new addon.LLMPipeline();
+    this.pipeline = new LLMPipelineWrap();
 
     const initPromise = util.promisify(this.pipeline.init.bind(this.pipeline));
     const result = await initPromise(this.modelPath, this.device, this.properties);

diff --git a/src/js/lib/pipelines/textEmbeddingPipeline.ts b/src/js/lib/pipelines/textEmbeddingPipeline.ts
@@ -1,9 +1,10 @@
 import util from "node:util";
-import addon, {
+import {
   TextEmbeddingPipelineWrapper,
   EmbeddingResult,
   EmbeddingResults,
   TextEmbeddingConfig,
+  TextEmbeddingPipeline as TextEmbeddingPipelineWrap,
 } from "../addon.js";
 
 export class TextEmbeddingPipeline {
@@ -29,7 +30,7 @@ export class TextEmbeddingPipeline {
   async init() {
     if (this.pipeline) throw new Error("TextEmbeddingPipeline is already initialized");
 
-    this.pipeline = new addon.TextEmbeddingPipeline();
+    this.pipeline = new TextEmbeddingPipelineWrap();
 
     const initPromise = util.promisify(this.pipeline.init.bind(this.pipeline));
     await initPromise(this.modelPath, this.device, this.config, this.ovProperties);

diff --git a/src/js/lib/tokenizer.ts b/src/js/lib/tokenizer.ts
@@ -0,0 +1,186 @@
+// Copyright (C) 2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { Tensor } from "openvino-node";
+import { ChatHistory } from "./addon.js";
+
+/**
+ * TokenizedInputs contains input_ids and attention_mask tensors.
+ * This is the result of encoding prompts using the Tokenizer.
+ */
+export interface TokenizedInputs {
+  /** Tensor containing token IDs for the encoded input */
+  input_ids: Tensor;
+  /** Tensor containing attention mask (1 for real tokens, 0 for padding) */
+  attention_mask: Tensor;
+}
+
+/**
+ * Options for encode method.
+ */
+export interface EncodeOptions {
+  /**
+   * Whether to add special tokens like BOS, EOS, PAD.
+   * @defaultValue true
+   */
+  addSpecialTokens?: boolean;
+
+  /**
+   * Whether to pad the sequence to the maximum length.
+   * @defaultValue false
+   */
+  padToMaxLength?: boolean;
+
+  /**
+   * Maximum length of the sequence.
+   * If undefined, the value will be taken from the IR.
+   */
+  maxLength?: number;
+
+  /**
+   * Side to pad the sequence, can be 'left' or 'right'.
+   * If undefined, the value will be taken from the IR.
+   */
+  paddingSide?: "left" | "right";
+}
+
+/**
+ * The Tokenizer class is used to encode prompts and decode resulting tokens.
+ *
+ * Chat template is initialized from sources in the following order, overriding the previous value:
+ * 1. chat_template entry from tokenizer_config.json
+ * 2. chat_template entry from processor_config.json
+ * 3. chat_template entry from chat_template.json
+ * 4. chat_template entry from rt_info section of openvino.Model
+ * 5. If the template is known to be not supported by GenAI, it's replaced with a simplified supported version.
+ */
+export interface Tokenizer {
+  /**
+   * Applies a chat template to format chat history into a prompt string.
+   * @param chatHistory - chat history as an array of message objects or ChatHistory instance
+   * @param addGenerationPrompt - whether to add a generation prompt at the end
+   * @param chatTemplate - optional custom chat template to use instead of the default
+   * @param tools - optional array of tool definitions for function calling
+   * @param extraContext - optional extra context object for custom template variables
+   * @returns formatted prompt string
+   */
+  applyChatTemplate(
+    chatHistory: Record<string, any>[] | ChatHistory,
+    addGenerationPrompt: boolean,
+    chatTemplate?: string,
+    tools?: Record<string, any>[],
+    extraContext?: Record<string, any>,
+  ): string;
+
+  /**
+   * Encodes a single prompt or a list of prompts into tokenized inputs.
+   * @param prompts - single prompt string or array of prompts
+   * @param options - encoding options
+   * @returns TokenizedInputs object containing input_ids and attention_mask tensors.
+   */
+  encode(prompts: string | string[], options?: EncodeOptions): TokenizedInputs;
+
+  /**
+   * Encodes two lists of prompts into tokenized inputs (for paired input).
+   * The number of strings must be the same, or one of the inputs can contain one string.
+   * In the latter case, the single-string input will be broadcast into the shape of the other input,
+   * which is more efficient than repeating the string in pairs.
+   * @param prompts1 - first list of prompts to encode
+   * @param prompts2 - second list of prompts to encode
+   * @param options - encoding options
+   * @returns TokenizedInputs object containing input_ids and attention_mask tensors.
+   */
+  encode(prompts1: string[], prompts2: string[], options?: EncodeOptions): TokenizedInputs;
+
+  /**
+   * Encodes a list of paired prompts into tokenized inputs.
+   * Input format is same as for HF paired input [[prompt_1, prompt_2], ...].
+   * @param prompts - list of paired prompts to encode
+   * @param options - encoding options
+   * @returns TokenizedInputs object containing input_ids and attention_mask tensors.
+   */
+  encode(prompts: [string, string][], options?: EncodeOptions): TokenizedInputs;
+
+  // TODO: move decode options to another interface
+  /**
+   * Decode a sequence of token IDs into a string prompt.
+   * @param tokens - sequence of token IDs to decode
+   * @param skipSpecialTokens - whether to skip special tokens. Default is true.
+   * @returns decoded string.
+   */
+  decode(tokens: number[], skipSpecialTokens?: boolean): string;
+
+  /**
+   * Decode a batch of token sequences (as Tensor or array of arrays) into a list of string prompts.
+   * @param tokens - tensor containing token IDs or batch of token ID sequences
+   * @param skipSpecialTokens - whether to skip special tokens. Default is true.
+   * @returns list of decoded strings.
+   */
+  decode(tokens: Tensor | number[][], skipSpecialTokens?: boolean): string[];
+
+  /**
+   * Returns the BOS (Beginning of Sequence) token string.
+   * @returns BOS token string
+   */
+  getBosToken(): string;
+
+  /**
+   * Returns the BOS (Beginning of Sequence) token ID.
+   * @returns BOS token ID
+   */
+  getBosTokenId(): number;
+
+  /**
+   * Returns the EOS (End of Sequence) token string.
+   * @returns EOS token string
+   */
+  getEosToken(): string;
+
+  /**
+   * Returns the EOS (End of Sequence) token ID.
+   * @returns EOS token ID
+   */
+  getEosTokenId(): number;
+
+  /**
+   * Returns the PAD (Padding) token string.
+   * @returns PAD token string
+   */
+  getPadToken(): string;
+
+  /**
+   * Returns the PAD (Padding) token ID.
+   * @returns PAD token ID
+   */
+  getPadTokenId(): number;
+
+  /**
+   * Returns the current chat template string.
+   * @returns current chat template string
+   */
+  getChatTemplate(): string;
+
+  /**
+   * Returns the original chat template from the tokenizer configuration.
+   * @returns original chat template string
+   */
+  getOriginalChatTemplate(): string;
+
+  /**
+   * Override a chat template read from tokenizer_config.json.
+   * @param chatTemplate - custom chat template string to use
+   */
+  setChatTemplate(chatTemplate: string): void;
+
+  /**
+   * Returns true if the tokenizer supports paired input, false otherwise.
+   * @returns whether the tokenizer supports paired input
+   */
+  supportsPairedInput(): boolean;
+
+  /**
+   * The current chat template string.
+   * Can be used to get or set the chat template.
+   */
+  chatTemplate: string;
+}
diff --git a/src/js/src/addon.cpp b/src/js/src/addon.cpp
@@ -20,6 +20,28 @@ void init_class(Napi::Env env,
     exports.Set(class_name, prototype);
 }
 
+Napi::Value init_ov_addon(const Napi::CallbackInfo& info) {
-Napi::Value init_ov_addon(const Napi::CallbackInfo& info) {
+void init_ov_addon(const Napi::CallbackInfo& info) {
 void ChatHistoryWrap::pop_back(const Napi::CallbackInfo& info) { 
-Napi::Value init_ov_addon(const Napi::CallbackInfo& info) {
+void init_ov_addon(const Napi::CallbackInfo& info) {
 void ChatHistoryWrap::pop_back(const Napi::CallbackInfo& info) { 
+    Napi::Env env = info.Env();
+    if (info.Length() < 1) {
+        Napi::TypeError::New(env, "setOpenvinoAddon expects one argument").ThrowAsJavaScriptException();
+        return env.Undefined();
+    }
+    if (info[0].IsUndefined() || info[0].IsNull() || !info[0].IsObject()) {
+        Napi::TypeError::New(env, "Passed addon must be an object").ThrowAsJavaScriptException();
+        return env.Undefined();
+    }
+
+    auto addon_data = env.GetInstanceData<AddonData>();
+    if (!addon_data) {
+        Napi::TypeError::New(env, "Addon data is not initialized").ThrowAsJavaScriptException();
+        return env.Undefined();
+    }
+
+    auto ov_addon = info[0].As<Napi::Object>();
+    addon_data->openvino_addon = Napi::Persistent(ov_addon);
+    return env.Undefined();
+}
+
 // Define the addon initialization function
 Napi::Object init_module(Napi::Env env, Napi::Object exports) {
     auto addon_data = new AddonData();
@@ -31,6 +53,9 @@ Napi::Object init_module(Napi::Env env, Napi::Object exports) {
     init_class(env, exports, "PerfMetrics", &PerfMetricsWrapper::get_class, addon_data->perf_metrics);
     init_class(env, exports, "ChatHistory", &ChatHistoryWrap::get_class, addon_data->chat_history);
 
+    // Expose a helper to set the openvino-node addon from JS (useful for ESM)
+    exports.Set("setOpenvinoAddon", Napi::Function::New(env, init_ov_addon));
+
     return exports;
 }