diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml
index c4a19e7090..465fc3f448 100644
--- a/.github/workflows/coverity.yml
+++ b/.github/workflows/coverity.yml
@@ -43,7 +43,7 @@ jobs:
with:
platform: ubuntu22
commit_packages_to_provide: wheels
- revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c
+ revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800
# Set specific revision and uncomment to use OV from its PR build:
# branch_name: master
# event_name: pull_request
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index a4ad20710b..15c89b987c 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -93,7 +93,7 @@ jobs:
with:
platform: ubuntu22
commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz
- revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c
+ revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800
# Set specific revision and uncomment to use OV from its PR build:
# branch_name: master
# event_name: pull_request
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index 86dedec156..127dd4b57c 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -85,7 +85,7 @@ jobs:
platform: macos_14_7
arch: 'arm64'
commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz
- revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c
+ revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800
# Set specific revision and uncomment to use OV from its PR build:
# branch_name: master
# event_name: pull_request
diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml
index 1be636623b..e39b44d5af 100644
--- a/.github/workflows/manylinux_2_28.yml
+++ b/.github/workflows/manylinux_2_28.yml
@@ -93,7 +93,7 @@ jobs:
with:
platform: almalinux8
commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz
- revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c
+ revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800
- name: Clone docker tag from OpenVINO repo
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 93a03be433..6607fc795a 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -89,7 +89,7 @@ jobs:
with:
platform: windows
commit_packages_to_provide: wheels,openvino_node_npm_package.zip
- revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c
+ revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800
# Set specific revision and uncomment to use OV from its PR build:
# branch_name: master
# event_name: pull_request
diff --git a/samples/js/text_generation/benchmark_genai.js b/samples/js/text_generation/benchmark_genai.js
index 1ae77a323f..7aa0d5136d 100644
--- a/samples/js/text_generation/benchmark_genai.js
+++ b/samples/js/text_generation/benchmark_genai.js
@@ -90,6 +90,10 @@ async function main() {
pipe = await LLMPipeline(modelsPath, device, { schedulerConfig: schedulerConfig });
}
+ const inputData = await pipe.getTokenizer().encode(prompt);
+ const promptTokenSize = inputData.input_ids.getShape()[1];
+ console.log(`Prompt token size: ${promptTokenSize}`);
+
for (let i = 0; i < numWarmup; i++) {
await pipe.generate(prompt, config);
}
diff --git a/site/docs/bindings/node-js.md b/site/docs/bindings/node-js.md
index 1e7d59fb13..7abaef3504 100644
--- a/site/docs/bindings/node-js.md
+++ b/site/docs/bindings/node-js.md
@@ -24,6 +24,12 @@ Node.js bindings currently support:
- Structured output
- ReAct agent support
- `TextEmbeddingPipeline`: Generate text embeddings for semantic search and RAG applications
+- `Tokenizer`: Fast tokenization / detokenization and chat prompt formatting
+ - Encode strings into token id and attention mask tensors
+ - Decode token sequences
+ - Apply chat template
+ - Access special tokens (BOS/EOS/PAD)
+ - Supports paired input
## Installation
diff --git a/site/docs/guides/tokenization.mdx b/site/docs/guides/tokenization.mdx
index 7d2c9d6c62..7405523a92 100644
--- a/site/docs/guides/tokenization.mdx
+++ b/site/docs/guides/tokenization.mdx
@@ -34,6 +34,20 @@ It can be initialized from the path, in-memory IR representation or obtained fro
auto tokenzier = pipe.get_tokenizer();
```
+
+ ```js
+ import { LLMPipeline, Tokenizer } from 'openvino-genai-node';
+
+ let tokenizer;
+
+ // Initialize from the path
+ tokenizer = new Tokenizer(models_path);
+
+ // Or get tokenizer instance from LLMPipeline
+ const pipe = await LLMPipeline(models_path, "CPU");
+ tokenizer = pipe.getTokenizer();
+ ```
+
`Tokenizer` has `encode()` and `decode()` methods which support the following arguments: `add_special_tokens`, `skip_special_tokens`, `pad_to_max_length`, `max_length`.
@@ -51,6 +65,11 @@ It can be initialized from the path, in-memory IR representation or obtained fro
auto tokens = tokenizer.encode("The Sun is yellow because", ov::genai::add_special_tokens(false));
```
+
+ ```js
+ const tokens = tokenizer.encode("The Sun is yellow because", { add_special_tokens: false });
+ ```
+
The `encode()` method returns a [`TokenizedInputs`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.TokenizedInputs.html) object containing `input_ids` and `attention_mask`, both stored as `ov::Tensor`.
@@ -121,4 +140,40 @@ If `pad_to_max_length` is set to true, then instead of padding to the longest se
// out_shape: [1, 128]
```
+
+ ```js
+ import { Tokenizer } from 'openvino-genai-node';
+
+ const tokenizer = new Tokenizer(models_path);
+ const prompts = ["The Sun is yellow because", "The"];
+ let tokens;
+
+ // Since prompt is definitely shorter than maximal length (which is taken from IR) will not affect shape.
+ // Resulting shape is defined by length of the longest tokens sequence.
+ // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="longest", truncation=True)
+ tokens = tokenizer.encode(["The Sun is yellow because", "The"]);
+ // or is equivalent to
+ tokens = tokenizer.encode(["The Sun is yellow because", "The"], { pad_to_max_length: false });
+ console.log(tokens.input_ids.getShape());
+ // out_shape: [2, 6]
+
+ // Resulting tokens tensor will be padded to 1024, sequences which exceed this length will be truncated.
+ // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="max_length", truncation=True, max_length=1024)
+ tokens = tokenizer.encode([
+ "The Sun is yellow because",
+ "The",
+ "The longest string ever".repeat(2000),
+ ], {
+ pad_to_max_length: true,
+ max_length: 1024,
+ });
+ console.log(tokens.input_ids.getShape());
+ // out_shape: [3, 1024]
+
+ // For single string prompts truncation and padding are also applied.
+ tokens = tokenizer.encode("The Sun is yellow because", { pad_to_max_length: true, max_length: 128 });
+ console.log(tokens.input_ids.getShape());
+ // out_shape: [1, 128]
+ ```
+
diff --git a/src/js/eslint.config.cjs b/src/js/eslint.config.cjs
index b69cf72f62..a8c41a70d2 100644
--- a/src/js/eslint.config.cjs
+++ b/src/js/eslint.config.cjs
@@ -53,6 +53,13 @@ module.exports = defineConfig([
"json_schema",
"structured_output_config",
"structural_tags_config",
+ "skip_special_tokens",
+ "add_special_tokens",
+ "pad_to_max_length",
+ "max_length",
+ "padding_side",
+ "add_second_input",
+ "number_of_inputs",
],
},
],
diff --git a/src/js/include/addon.hpp b/src/js/include/addon.hpp
index f2a23b026c..28371ba822 100644
--- a/src/js/include/addon.hpp
+++ b/src/js/include/addon.hpp
@@ -12,6 +12,7 @@ struct AddonData {
Napi::FunctionReference tokenizer;
Napi::FunctionReference perf_metrics;
Napi::FunctionReference chat_history;
+ Napi::ObjectReference openvino_addon;
};
void init_class(Napi::Env env,
diff --git a/src/js/include/helper.hpp b/src/js/include/helper.hpp
index a28f7c071c..55370d91e3 100644
--- a/src/js/include/helper.hpp
+++ b/src/js/include/helper.hpp
@@ -37,9 +37,13 @@ ov::AnyMap js_to_cpp(const Napi::Env& env, const Napi::Value& value)
/** @brief A template specialization for TargetType std::string */
template <>
std::string js_to_cpp(const Napi::Env& env, const Napi::Value& value);
+template <>
+int64_t js_to_cpp(const Napi::Env& env, const Napi::Value& value);
/** @brief A template specialization for TargetType std::vector */
template <>
std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value);
+template <>
+std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value);
/** @brief A template specialization for TargetType GenerateInputs */
template <>
GenerateInputs js_to_cpp(const Napi::Env& env, const Napi::Value& value);
@@ -58,6 +62,8 @@ ov::genai::StructuredOutputConfig::Tag js_to_cpp
ov::genai::StructuredOutputConfig::StructuralTag js_to_cpp(const Napi::Env& env, const Napi::Value& value);
+template <>
+ov::Tensor js_to_cpp(const Napi::Env& env, const Napi::Value& value);
/**
* @brief Unwraps a C++ object from a JavaScript wrapper.
* @tparam TargetType The C++ class type to extract.
@@ -110,6 +116,12 @@ Napi::Value cpp_to_js, Napi::Value>(const Napi::Env& env, co
template <>
Napi::Value cpp_to_js(const Napi::Env& env, const ov::genai::JsonContainer& json_container);
+
+template <>
+Napi::Value cpp_to_js(const Napi::Env& env, const ov::Tensor& tensor);
+
+template <>
+Napi::Value cpp_to_js(const Napi::Env& env, const ov::genai::TokenizedInputs& tokenized_inputs);
/**
* @brief Template function to convert C++ map into Javascript Object. Map key must be std::string.
* @tparam MapElementType C++ data type of map elements.
@@ -130,3 +142,5 @@ bool is_chat_history(const Napi::Env& env, const Napi::Value& value);
std::string json_stringify(const Napi::Env& env, const Napi::Value& value);
Napi::Value json_parse(const Napi::Env& env, const std::string& value);
+
+Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::string& ctor_name);
diff --git a/src/js/include/tokenizer.hpp b/src/js/include/tokenizer.hpp
index a7e6f3ef42..c1d0386525 100644
--- a/src/js/include/tokenizer.hpp
+++ b/src/js/include/tokenizer.hpp
@@ -15,6 +15,12 @@ class TokenizerWrapper : public Napi::ObjectWrap {
Napi::Value get_eos_token_id(const Napi::CallbackInfo& info);
Napi::Value get_pad_token(const Napi::CallbackInfo& info);
Napi::Value get_pad_token_id(const Napi::CallbackInfo& info);
+ Napi::Value get_chat_template(const Napi::CallbackInfo& info);
+ Napi::Value get_original_chat_template(const Napi::CallbackInfo& info);
+ Napi::Value set_chat_template(const Napi::CallbackInfo& info);
+ Napi::Value supports_paired_input(const Napi::CallbackInfo& info);
+ Napi::Value encode(const Napi::CallbackInfo& info);
+ Napi::Value decode(const Napi::CallbackInfo& info);
private:
ov::genai::Tokenizer _tokenizer;
};
diff --git a/src/js/lib/addon.ts b/src/js/lib/addon.ts
index f8b9abee61..b6023e5a09 100644
--- a/src/js/lib/addon.ts
+++ b/src/js/lib/addon.ts
@@ -2,6 +2,8 @@ import { createRequire } from "module";
import { platform } from "node:os";
import { join, dirname, resolve } from "node:path";
import type { ChatHistory as IChatHistory } from "./chatHistory.js";
+import type { Tokenizer as ITokenizer } from "./tokenizer.js";
+import { addon as ovAddon } from "openvino-node";
export type EmbeddingResult = Float32Array | Int8Array | Uint8Array;
export type EmbeddingResults = Float32Array[] | Int8Array[] | Uint8Array[];
@@ -60,6 +62,8 @@ interface OpenVINOGenAIAddon {
TextEmbeddingPipeline: TextEmbeddingPipelineWrapper;
LLMPipeline: any;
ChatHistory: IChatHistory;
+ Tokenizer: ITokenizer;
+ setOpenvinoAddon: (ovAddon: any) => void;
}
// We need to use delayed import to get an updated Path if required
@@ -78,7 +82,8 @@ function getGenAIAddon(): OpenVINOGenAIAddon {
}
const addon = getGenAIAddon();
+addon.setOpenvinoAddon(ovAddon);
-export const { ChatHistory } = addon;
+export const { TextEmbeddingPipeline, LLMPipeline, ChatHistory, Tokenizer } = addon;
export type ChatHistory = IChatHistory;
-export default addon;
+export type Tokenizer = ITokenizer;
diff --git a/src/js/lib/index.ts b/src/js/lib/index.ts
index cd673a6797..ad8e49168f 100644
--- a/src/js/lib/index.ts
+++ b/src/js/lib/index.ts
@@ -40,3 +40,5 @@ export const { LLMPipeline, TextEmbeddingPipeline } = PipelineFactory;
export { DecodedResults } from "./pipelines/llmPipeline.js";
export * from "./utils.js";
export * from "./addon.js";
+export type { TokenizedInputs, EncodeOptions, DecodeOptions } from "./tokenizer.js";
+export type { ChatMessage, ExtraContext, ToolDefinition } from "./chatHistory.js";
diff --git a/src/js/lib/pipelines/llmPipeline.ts b/src/js/lib/pipelines/llmPipeline.ts
index b03146e0d2..01fe1aa5f3 100644
--- a/src/js/lib/pipelines/llmPipeline.ts
+++ b/src/js/lib/pipelines/llmPipeline.ts
@@ -1,6 +1,7 @@
import util from "node:util";
-import addon, { ChatHistory } from "../addon.js";
+import { ChatHistory, LLMPipeline as LLMPipelineWrap } from "../addon.js";
import { GenerationConfig, StreamingStatus, LLMPipelineProperties } from "../utils.js";
+import { Tokenizer } from "../tokenizer.js";
export type ResolveFunction = (arg: { value: string; done: boolean }) => void;
export type Options = {
@@ -8,23 +9,6 @@ export type Options = {
max_new_tokens?: number;
};
-interface Tokenizer {
- /** Applies a chat template to format chat history into a prompt string. */
- applyChatTemplate(
- chatHistory: Record[] | ChatHistory,
- addGenerationPrompt: boolean,
- chatTemplate?: string,
- tools?: Record[],
- extraContext?: Record,
- ): string;
- getBosToken(): string;
- getBosTokenId(): number;
- getEosToken(): string;
- getEosTokenId(): number;
- getPadToken(): string;
- getPadTokenId(): number;
-}
-
/** Structure with raw performance metrics for each generation before any statistics are calculated. */
export type RawMetrics = {
/** Durations for each generate call in milliseconds. */
@@ -167,7 +151,7 @@ export class LLMPipeline {
async init() {
if (this.isInitialized) throw new Error("LLMPipeline is already initialized");
- this.pipeline = new addon.LLMPipeline();
+ this.pipeline = new LLMPipelineWrap();
const initPromise = util.promisify(this.pipeline.init.bind(this.pipeline));
const result = await initPromise(this.modelPath, this.device, this.properties);
diff --git a/src/js/lib/pipelines/textEmbeddingPipeline.ts b/src/js/lib/pipelines/textEmbeddingPipeline.ts
index 5517b7ad97..44c4d45fa1 100644
--- a/src/js/lib/pipelines/textEmbeddingPipeline.ts
+++ b/src/js/lib/pipelines/textEmbeddingPipeline.ts
@@ -1,9 +1,10 @@
import util from "node:util";
-import addon, {
+import {
TextEmbeddingPipelineWrapper,
EmbeddingResult,
EmbeddingResults,
TextEmbeddingConfig,
+ TextEmbeddingPipeline as TextEmbeddingPipelineWrap,
} from "../addon.js";
export class TextEmbeddingPipeline {
@@ -29,7 +30,7 @@ export class TextEmbeddingPipeline {
async init() {
if (this.pipeline) throw new Error("TextEmbeddingPipeline is already initialized");
- this.pipeline = new addon.TextEmbeddingPipeline();
+ this.pipeline = new TextEmbeddingPipelineWrap();
const initPromise = util.promisify(this.pipeline.init.bind(this.pipeline));
await initPromise(this.modelPath, this.device, this.config, this.ovProperties);
diff --git a/src/js/lib/tokenizer.ts b/src/js/lib/tokenizer.ts
new file mode 100644
index 0000000000..37e1fe5ec9
--- /dev/null
+++ b/src/js/lib/tokenizer.ts
@@ -0,0 +1,228 @@
+/* eslint-disable @typescript-eslint/no-misused-new */
+// Copyright (C) 2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import type { Tensor } from "openvino-node";
+import type { ChatHistory } from "./chatHistory.js";
+
+/**
+ * TokenizedInputs contains input_ids, attention_mask and (optionally) token_type_ids tensors.
+ * token_type_ids is returned if the tokenizer supports paired input, otherwise the field is undefined.
+ * This is the result of encoding prompts using the Tokenizer.
+ */
+export interface TokenizedInputs {
+ /** Tensor containing token IDs of the encoded input */
+ input_ids: Tensor;
+ /** Tensor containing attention mask (1 for real tokens, 0 for padding) */
+ attention_mask: Tensor;
+ /**
+ * Optional tensor with token type IDs (segment ids) for paired inputs.
+ * Present only if the model/tokenizer supports paired input.
+ */
+ token_type_ids?: Tensor;
+}
+
+/**
+ * Options for encode method.
+ */
+export interface EncodeOptions {
+ /**
+ * Whether to add special tokens like BOS, EOS, PAD.
+ * @defaultValue true
+ */
+ add_special_tokens?: boolean;
+
+ /**
+ * Whether to pad the sequence to the maximum length.
+ * @defaultValue false
+ */
+ pad_to_max_length?: boolean;
+
+ /**
+ * Maximum length of the sequence.
+ * If undefined, the value will be taken from the IR.
+ */
+ max_length?: number;
+
+ /**
+ * Side to pad the sequence, can be 'left' or 'right'.
+ * If undefined, the value will be taken from the IR.
+ */
+ padding_side?: "left" | "right";
+}
+
+/**
+ * Options for decode method.
+ */
+export interface DecodeOptions {
+ /**
+ * Whether to skip special tokens like BOS, EOS, PAD during detokenization.
+ * @defaultValue true
+ */
+ skip_special_tokens?: boolean;
+}
+
+/**
+ * The Tokenizer class is used to encode prompts and decode resulting tokens.
+ *
+ * Chat template is initialized from sources in the following order, overriding the previous value:
+ * 1. chat_template entry from tokenizer_config.json
+ * 2. chat_template entry from processor_config.json
+ * 3. chat_template entry from chat_template.json
+ * 4. chat_template entry from rt_info section of openvino.Model
+ * 5. If the template is known to be not supported by GenAI, it's replaced with a simplified supported version.
+ */
+export interface Tokenizer {
+ /**
+ * Load tokenizer and detokenizer IRs by path.
+ * @param tokenizerPath Path to a directory containing tokenizer/detokenizer XML/BIN files.
+ * @param properties Optional OpenVINO compilation properties.
+ */
+ new (tokenizerPath: string, properties?: Record): Tokenizer;
+
+ /**
+ * Create tokenizer from already loaded IR contents.
+ * @param tokenizerModel Tokenizer XML string.
+ * @param tokenizerWeights Tokenizer weights tensor.
+ * @param detokenizerModel Detokenizer XML string.
+ * @param detokenizerWeights Detokenizer weights tensor.
+ * @param properties Optional OpenVINO compilation properties.
+ */
+ new (
+ tokenizerModel: string,
+ tokenizerWeights: Tensor,
+ detokenizerModel: string,
+ detokenizerWeights: Tensor,
+ properties?: Record,
+ ): Tokenizer;
+
+ /**
+ * Applies a chat template to format chat history into a prompt string.
+ * @param chatHistory - chat history as an array of message objects or ChatHistory instance
+ * @param addGenerationPrompt - whether to add a generation prompt at the end
+ * @param chatTemplate - optional custom chat template to use instead of the default
+ * @param tools - optional array of tool definitions for function calling
+ * @param extraContext - optional extra context object for custom template variables
+ * @returns formatted prompt string
+ */
+ applyChatTemplate(
+ chatHistory: Record[] | ChatHistory,
+ addGenerationPrompt: boolean,
+ chatTemplate?: string,
+ tools?: Record[],
+ extraContext?: Record,
+ ): string;
+
+ /**
+ * Encodes a single prompt or a list of prompts into tokenized inputs.
+ * @param prompts - single prompt string or array of prompts
+ * @param options - encoding options
+ * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors.
+ */
+ encode(prompts: string | string[], options?: EncodeOptions): TokenizedInputs;
+
+ /**
+ * Encodes two lists of prompts into tokenized inputs (for paired input).
+ * The number of strings must be the same, or one of the inputs can contain one string.
+ * In the latter case, the single-string input will be broadcast into the shape of the other input,
+ * which is more efficient than repeating the string in pairs.
+ * @param prompts1 - first list of prompts to encode
+ * @param prompts2 - second list of prompts to encode
+ * @param options - encoding options
+ * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors.
+ */
+ encode(prompts1: string[], prompts2: string[], options?: EncodeOptions): TokenizedInputs;
+
+ /**
+ * Encodes a list of paired prompts into tokenized inputs.
+ * Input format is same as for HF paired input [[prompt_1, prompt_2], ...].
+ * @param prompts - list of paired prompts to encode
+ * @param options - encoding options
+ * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors.
+ */
+ encode(prompts: [string, string][], options?: EncodeOptions): TokenizedInputs;
+
+ /**
+ * Decode a sequence of token IDs into a string prompt.
+ *
+ * @param tokens - sequence of token IDs to decode
+ * @param options - decoding options
+ * @returns decoded string.
+ */
+ decode(tokens: number[] | bigint[], options?: DecodeOptions): string;
+
+ /**
+ * Decode a batch of token sequences (as Tensor or array of arrays) into a list of string prompts.
+ *
+ * @param tokens - tensor containing token IDs or batch of token ID sequences
+ * @param options - decoding options
+ * @returns list of decoded strings.
+ */
+ decode(tokens: Tensor | number[][] | bigint[][], options?: DecodeOptions): string[];
+
+ /**
+ * Returns the BOS (Beginning of Sequence) token string.
+ * @returns BOS token string
+ */
+ getBosToken(): string;
+
+ /**
+ * Returns the BOS (Beginning of Sequence) token ID.
+ * @returns BOS token ID
+ */
+ getBosTokenId(): bigint;
+
+ /**
+ * Returns the EOS (End of Sequence) token string.
+ * @returns EOS token string
+ */
+ getEosToken(): string;
+
+ /**
+ * Returns the EOS (End of Sequence) token ID.
+ * @returns EOS token ID
+ */
+ getEosTokenId(): bigint;
+
+ /**
+ * Returns the PAD (Padding) token string.
+ * @returns PAD token string
+ */
+ getPadToken(): string;
+
+ /**
+ * Returns the PAD (Padding) token ID.
+ * @returns PAD token ID
+ */
+ getPadTokenId(): bigint;
+
+ /**
+ * Returns the current chat template string.
+ * @returns current chat template string
+ */
+ getChatTemplate(): string;
+
+ /**
+ * Returns the original chat template from the tokenizer configuration.
+ * @returns original chat template string
+ */
+ getOriginalChatTemplate(): string;
+
+ /**
+ * Override a chat template read from tokenizer_config.json.
+ * @param chatTemplate - custom chat template string to use
+ */
+ setChatTemplate(chatTemplate: string): void;
+
+ /**
+ * Returns true if the tokenizer supports paired input, false otherwise.
+ * @returns whether the tokenizer supports paired input
+ */
+ supportsPairedInput(): boolean;
+
+ /**
+ * The current chat template string.
+ * Can be used to get or set the chat template.
+ */
+ chatTemplate: string;
+}
diff --git a/src/js/src/addon.cpp b/src/js/src/addon.cpp
index 210673da93..72cb3b6b16 100644
--- a/src/js/src/addon.cpp
+++ b/src/js/src/addon.cpp
@@ -20,6 +20,27 @@ void init_class(Napi::Env env,
exports.Set(class_name, prototype);
}
+void set_ov_addon(const Napi::CallbackInfo& info) {
+ Napi::Env env = info.Env();
+ if (info.Length() < 1) {
+ Napi::TypeError::New(env, "setOpenvinoAddon expects one argument").ThrowAsJavaScriptException();
+ return;
+ }
+ if (info[0].IsUndefined() || info[0].IsNull() || !info[0].IsObject()) {
+ Napi::TypeError::New(env, "Passed addon must be an object").ThrowAsJavaScriptException();
+ return;
+ }
+
+ auto addon_data = env.GetInstanceData();
+ if (!addon_data) {
+ Napi::TypeError::New(env, "Addon data is not initialized").ThrowAsJavaScriptException();
+ return;
+ }
+
+ auto ov_addon = info[0].As();
+ addon_data->openvino_addon = Napi::Persistent(ov_addon);
+}
+
// Define the addon initialization function
Napi::Object init_module(Napi::Env env, Napi::Object exports) {
auto addon_data = new AddonData();
@@ -31,6 +52,9 @@ Napi::Object init_module(Napi::Env env, Napi::Object exports) {
init_class(env, exports, "PerfMetrics", &PerfMetricsWrapper::get_class, addon_data->perf_metrics);
init_class(env, exports, "ChatHistory", &ChatHistoryWrap::get_class, addon_data->chat_history);
+ // Expose a helper to set the openvino-node addon from JS (useful for ESM)
+ exports.Set("setOpenvinoAddon", Napi::Function::New(env, set_ov_addon));
+
return exports;
}
diff --git a/src/js/src/helper.cpp b/src/js/src/helper.cpp
index b282ba0636..00ddf0e2c9 100644
--- a/src/js/src/helper.cpp
+++ b/src/js/src/helper.cpp
@@ -102,6 +102,18 @@ std::string js_to_cpp(const Napi::Env& env, const Napi::Value& valu
return value.As().Utf8Value();
}
+template <>
+int64_t js_to_cpp(const Napi::Env& env, const Napi::Value& value) {
+ OPENVINO_ASSERT(value.IsNumber() || value.IsBigInt(), "Passed argument must be of type Number or BigInt.");
+ if (value.IsNumber()) {
+ return value.As().Int64Value();
+ }
+ bool lossless;
+ auto result = value.As().Int64Value(&lossless);
+ OPENVINO_ASSERT(lossless, "BigInt value is too large to fit in int64_t without precision loss.");
+ return result;
+}
+
template <>
std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value) {
if (value.IsArray()) {
@@ -123,6 +135,20 @@ std::vector js_to_cpp>(const Napi::Env& en
}
}
+template <>
+std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value) {
+ OPENVINO_ASSERT(value.IsArray(), "Passed argument must be of type Array.");
+ auto array = value.As();
+ size_t arrayLength = array.Length();
+
+ std::vector vector;
+ vector.reserve(arrayLength);
+ for (uint32_t i = 0; i < arrayLength; ++i) {
+ vector.push_back(js_to_cpp(env, array[i]));
+ }
+ return vector;
+}
+
template <>
ov::genai::JsonContainer js_to_cpp(const Napi::Env& env, const Napi::Value& value) {
OPENVINO_ASSERT(value.IsObject() || value.IsArray(), "JsonContainer must be a JS object or an array but got " + std::string(value.ToString().Utf8Value()));
@@ -292,6 +318,23 @@ ov::genai::StructuredOutputConfig js_to_cpp(c
return config;
}
+template <>
+ov::Tensor js_to_cpp(const Napi::Env& env, const Napi::Value& value) {
+ OPENVINO_ASSERT(value.IsObject(), "Passed argument must be an object.");
+
+ auto tensor_wrap = value.As();
+ auto tensor_prototype = get_prototype_from_ov_addon(env, "Tensor");
+ OPENVINO_ASSERT(tensor_wrap.InstanceOf(tensor_prototype), "Passed argument is not of type Tensor");
+
+ auto native_tensor_func = tensor_wrap.Get("__getExternalTensor").As();
+ Napi::Value native_tensor_value = native_tensor_func.Call(tensor_wrap, {});
+ OPENVINO_ASSERT(native_tensor_value.IsExternal(), "__getExternalTensor() did not return an External object.");
+
+ auto external = native_tensor_value.As>();
+ auto tensor_ptr = external.Data();
+ return *tensor_ptr;
+}
+
template <>
ov::genai::PerfMetrics& unwrap(const Napi::Env& env, const Napi::Value& value) {
const auto obj = value.As();
@@ -417,6 +460,38 @@ Napi::Value cpp_to_js(const Napi::Env& en
return json_parse(env, json_container.to_json_string());
}
+template <>
+Napi::Value cpp_to_js(const Napi::Env& env, const ov::Tensor& tensor) {
+ try {
+ auto prototype = get_prototype_from_ov_addon(env, "Tensor");
+
+ auto external = Napi::External::New(env, new ov::Tensor(tensor),
+ [](Napi::Env /*env*/, ov::Tensor* external_tensor) {
+ delete external_tensor;
+ });
+ auto tensor_wrap = prototype.New({ external });
+
+ return tensor_wrap;
+ } catch (const ov::Exception& e) {
+ Napi::Error::New(env, std::string("Cannot create Tensor wrapper: ") + e.what()).ThrowAsJavaScriptException();
+ return env.Undefined();
+ }
+}
+
+template <>
+Napi::Value cpp_to_js(const Napi::Env& env, const ov::genai::TokenizedInputs& tokenized_inputs) {
+ auto js_object = Napi::Object::New(env);
+
+ js_object.Set("input_ids", cpp_to_js(env, tokenized_inputs.input_ids));
+ js_object.Set("attention_mask", cpp_to_js(env, tokenized_inputs.attention_mask));
+ // token_type_ids is optional and present only for paired inputs
+ if (tokenized_inputs.token_type_ids.has_value()) {
+ js_object.Set("token_type_ids", cpp_to_js(env, tokenized_inputs.token_type_ids.value()));
+ }
+
+ return js_object;
+}
+
bool is_napi_value_int(const Napi::Env& env, const Napi::Value& num) {
return env.Global().Get("Number").ToObject().Get("isInteger").As().Call({num}).ToBoolean().Value();
}
@@ -449,3 +524,16 @@ Napi::Value json_parse(const Napi::Env& env, const std::string& value) {
.As()
.Call({ Napi::String::New(env, value) });
}
+
+Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::string& ctor_name) {
+ auto addon_data = env.GetInstanceData();
+ OPENVINO_ASSERT(!addon_data->openvino_addon.IsEmpty(), "Addon data is not initialized");
+ Napi::Value ov_addon = addon_data->openvino_addon.Value();
+ OPENVINO_ASSERT(!ov_addon.IsUndefined() && !ov_addon.IsNull() && ov_addon.IsObject(), "OV addon value is not an object");
+ Napi::Object addon_obj = ov_addon.As();
+ OPENVINO_ASSERT(addon_obj.Has(ctor_name), std::string("OV addon does not export '") + ctor_name + "' class");
+ Napi::Value ctor_val = addon_obj.Get(ctor_name);
+ OPENVINO_ASSERT(ctor_val.IsFunction(), ctor_name + std::string(" is not a prototype"));
+
+ return ctor_val.As();
+}
diff --git a/src/js/src/tokenizer.cpp b/src/js/src/tokenizer.cpp
index 1cf9c822d7..a8f1c72380 100644
--- a/src/js/src/tokenizer.cpp
+++ b/src/js/src/tokenizer.cpp
@@ -2,7 +2,51 @@
#include "include/helper.hpp"
#include "include/tokenizer.hpp"
-TokenizerWrapper::TokenizerWrapper(const Napi::CallbackInfo& info) : Napi::ObjectWrap(info) {};
+TokenizerWrapper::TokenizerWrapper(const Napi::CallbackInfo& info) : Napi::ObjectWrap(info) {
+ if (info.Length() == 0) {
+ return;
+ }
+
+ auto env = info.Env();
+ try {
+ if (info.Length() == 1 || info.Length() == 2) {
+ OPENVINO_ASSERT(info[0].IsString(), "Tokenizer constructor expects 'tokenizerPath' to be a string");
+ const auto tokenizer_path = js_to_cpp(env, info[0]);
+ ov::AnyMap properties;
+ if (info.Length() == 2) {
+ properties = js_to_cpp(env, info[1]);
+ }
+ this->_tokenizer = ov::genai::Tokenizer(tokenizer_path, properties);
+ return;
+ }
+
+ OPENVINO_ASSERT(info.Length() == 4 || info.Length() == 5,
+ "Tokenizer constructor expects 1-2 arguments (path[, properties]) or 4-5 arguments (models, tensors[, properties])");
+ OPENVINO_ASSERT(info[0].IsString(), "The argument 'tokenizerModel' must be a string");
+ OPENVINO_ASSERT(info[1].IsObject(), "The argument 'tokenizerWeights' must be an OpenVINO Tensor");
+ OPENVINO_ASSERT(info[2].IsString(), "The argument 'detokenizerModel' must be a string");
+ OPENVINO_ASSERT(info[3].IsObject(), "The argument 'detokenizerWeights' must be an OpenVINO Tensor");
+
+ const auto tokenizer_model = js_to_cpp(env, info[0]);
+ const auto tokenizer_weights = js_to_cpp(env, info[1]);
+ const auto detokenizer_model = js_to_cpp(env, info[2]);
+ const auto detokenizer_weights = js_to_cpp(env, info[3]);
+ ov::AnyMap properties;
+ if (info.Length() == 5) {
+ properties = js_to_cpp(env, info[4]);
+ }
+
+ this->_tokenizer = ov::genai::Tokenizer(
+ tokenizer_model,
+ tokenizer_weights,
+ detokenizer_model,
+ detokenizer_weights,
+ properties
+ );
+ } catch (const std::exception& err) {
+ Napi::Error::New(env, err.what()).ThrowAsJavaScriptException();
+ }
+}
Napi::Function TokenizerWrapper::get_class(Napi::Env env) {
return DefineClass(env,
@@ -15,6 +59,12 @@ Napi::Function TokenizerWrapper::get_class(Napi::Env env) {
InstanceMethod("getEosTokenId", &TokenizerWrapper::get_eos_token_id),
InstanceMethod("getPadToken", &TokenizerWrapper::get_pad_token),
InstanceMethod("getPadTokenId", &TokenizerWrapper::get_pad_token_id),
+ InstanceMethod("getChatTemplate", &TokenizerWrapper::get_chat_template),
+ InstanceMethod("getOriginalChatTemplate", &TokenizerWrapper::get_original_chat_template),
+ InstanceMethod("setChatTemplate", &TokenizerWrapper::set_chat_template),
+ InstanceMethod("supportsPairedInput", &TokenizerWrapper::supports_paired_input),
+ InstanceMethod("decode", &TokenizerWrapper::decode),
+ InstanceMethod("encode", &TokenizerWrapper::encode),
}
);
}
@@ -76,7 +126,7 @@ Napi::Value TokenizerWrapper::get_bos_token(const Napi::CallbackInfo& info) {
Napi::Value TokenizerWrapper::get_bos_token_id(const Napi::CallbackInfo& info) {
try {
- return Napi::Number::New(info.Env(), this->_tokenizer.get_bos_token_id());
+ return Napi::BigInt::New(info.Env(), this->_tokenizer.get_bos_token_id());
} catch (std::exception& err) {
Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException();
return info.Env().Undefined();
@@ -94,7 +144,7 @@ Napi::Value TokenizerWrapper::get_eos_token(const Napi::CallbackInfo& info) {
Napi::Value TokenizerWrapper::get_eos_token_id(const Napi::CallbackInfo& info) {
try {
- return Napi::Number::New(info.Env(), this->_tokenizer.get_eos_token_id());
+ return Napi::BigInt::New(info.Env(), this->_tokenizer.get_eos_token_id());
} catch (std::exception& err) {
Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException();
return info.Env().Undefined();
@@ -112,7 +162,148 @@ Napi::Value TokenizerWrapper::get_pad_token(const Napi::CallbackInfo& info) {
Napi::Value TokenizerWrapper::get_pad_token_id(const Napi::CallbackInfo& info) {
try {
- return Napi::Number::New(info.Env(), this->_tokenizer.get_pad_token_id());
+ return Napi::BigInt::New(info.Env(), this->_tokenizer.get_pad_token_id());
+ } catch (std::exception& err) {
+ Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException();
+ return info.Env().Undefined();
+ }
+}
+
+Napi::Value TokenizerWrapper::encode(const Napi::CallbackInfo& info) {
+ auto env = info.Env();
+ try {
+ OPENVINO_ASSERT(info.Length() >= 1, "Tokenizer.encode requires at least one argument: text or prompts");
+
+ // Parse encoding options from the last argument if it's an object
+ ov::AnyMap tokenization_params;
+ auto count_text_args = info.Length();
+
+ if (info[count_text_args - 1].IsObject() && !info[count_text_args - 1].IsArray()) {
+ tokenization_params = js_to_cpp(env, info[count_text_args - 1]);
+ count_text_args--;
+ }
+
+ ov::genai::TokenizedInputs result;
+
+ // Handle different input types
+ if (info[0].IsString()) {
+ // Single string
+ auto text = js_to_cpp(env, info[0]);
+ result = this->_tokenizer.encode(text, tokenization_params);
+ } else if (count_text_args == 1 && info[0].IsArray()) {
+ auto arr = info[0].As();
+
+ // Check if it's array of pairs [[str, str], ...]
+ if (arr.Length() > 0 && arr.Get(uint32_t(0)).IsArray()) {
+ // Array of pairs
+ std::vector> paired_prompts;
+ for (uint32_t i = 0; i < arr.Length(); ++i) {
+ OPENVINO_ASSERT(arr.Get(i).IsArray(), "Each pair must be an array");
+ auto pair = arr.Get(i).As();
+ OPENVINO_ASSERT(pair.Length() == 2, "Each pair must contain exactly 2 strings");
+ paired_prompts.emplace_back(
+ js_to_cpp(env, pair.Get(uint32_t(0))),
+ js_to_cpp(env, pair.Get(uint32_t(1)))
+ );
+ }
+ result = this->_tokenizer.encode(paired_prompts, tokenization_params);
+ } else {
+ // Regular array of strings
+ auto prompts = js_to_cpp>(env, info[0]);
+ result = this->_tokenizer.encode(prompts, tokenization_params);
+ }
+ } else if (count_text_args == 2 && info[0].IsArray() && info[1].IsArray()) {
+ // Two arrays (paired input: prompts_1, prompts_2)
+ auto prompts1 = js_to_cpp>(env, info[0]);
+ auto prompts2 = js_to_cpp>(env, info[1]);
+ result = this->_tokenizer.encode(prompts1, prompts2, tokenization_params);
+ } else {
+ OPENVINO_THROW("Unsupported input type for encode. Expected: string, string[], [string, string][], or two string arrays");
+ }
+
+ return cpp_to_js(env, result);
+ } catch (std::exception& err) {
+ Napi::Error::New(env, err.what()).ThrowAsJavaScriptException();
+ return env.Undefined();
+ }
+}
+
+Napi::Value TokenizerWrapper::decode(const Napi::CallbackInfo& info) {
+ auto env = info.Env();
+ try {
+ OPENVINO_ASSERT(info.Length() >= 1, "Tokenizer.decode requires at least one argument: tokens");
+
+ ov::AnyMap detokenization_params;
+ if (info.Length() >= 2) {
+ const auto& options_candidate = info[1];
+ detokenization_params = js_to_cpp(env, options_candidate);
+ }
+
+ // Handle different input types
+ if (info[0].IsArray()) {
+ auto arr = info[0].As();
+
+ // Check if it's a 2D array (batch of sequences)
+ if (arr.Length() > 0 && arr.Get(uint32_t(0)).IsArray()) {
+ // Batch decoding: number[][] | bigint[][]
+ std::vector> batch_tokens;
+ for (uint32_t i = 0; i < arr.Length(); ++i) {
+ batch_tokens.push_back(js_to_cpp>(env, arr.Get(i)));
+ }
+ auto result = this->_tokenizer.decode(batch_tokens, detokenization_params);
+ return cpp_to_js, Napi::Value>(env, result);
+ } else {
+ // Single sequence: number[] | bigint[]
+ auto tokens = js_to_cpp>(env, info[0]);
+ auto result = this->_tokenizer.decode(tokens, detokenization_params);
+ return Napi::String::New(env, result);
+ }
+ } else {
+ // Tensor input
+ auto tensor = js_to_cpp(env, info[0]);
+ auto result = this->_tokenizer.decode(tensor, detokenization_params);
+ return cpp_to_js, Napi::Value>(env, result);
+ }
+ } catch (std::exception& err) {
+ Napi::Error::New(env, err.what()).ThrowAsJavaScriptException();
+ return env.Undefined();
+ }
+}
+
+Napi::Value TokenizerWrapper::get_chat_template(const Napi::CallbackInfo& info) {
+ try {
+ return Napi::String::New(info.Env(), this->_tokenizer.get_chat_template());
+ } catch (std::exception& err) {
+ Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException();
+ return info.Env().Undefined();
+ }
+}
+
+Napi::Value TokenizerWrapper::get_original_chat_template(const Napi::CallbackInfo& info) {
+ try {
+ return Napi::String::New(info.Env(), this->_tokenizer.get_original_chat_template());
+ } catch (std::exception& err) {
+ Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException();
+ return info.Env().Undefined();
+ }
+}
+
+Napi::Value TokenizerWrapper::set_chat_template(const Napi::CallbackInfo& info) {
+ try {
+ OPENVINO_ASSERT(info.Length() >= 1, "Tokenizer.setChatTemplate requires one argument: chatTemplate");
+ OPENVINO_ASSERT(info[0].IsString(), "The argument 'chatTemplate' must be a string");
+
+ this->_tokenizer.set_chat_template(js_to_cpp(info.Env(), info[0]));
+ return info.Env().Undefined();
+ } catch (std::exception& err) {
+ Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException();
+ return info.Env().Undefined();
+ }
+}
+
+Napi::Value TokenizerWrapper::supports_paired_input(const Napi::CallbackInfo& info) {
+ try {
+ return Napi::Boolean::New(info.Env(), this->_tokenizer.supports_paired_input());
} catch (std::exception& err) {
Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException();
return info.Env().Undefined();
diff --git a/src/js/tests/bindings.test.js b/src/js/tests/bindings.test.js
index 8c1ac78760..0119f25a0f 100644
--- a/src/js/tests/bindings.test.js
+++ b/src/js/tests/bindings.test.js
@@ -1,4 +1,4 @@
-import addon from "../dist/addon.js";
+import { LLMPipeline } from "../dist/addon.js";
import assert from "node:assert";
import { describe, it, before, after } from "node:test";
@@ -10,7 +10,7 @@ describe("bindings", () => {
let pipeline = null;
before((_, done) => {
- pipeline = new addon.LLMPipeline();
+ pipeline = new LLMPipeline();
pipeline.init(MODEL_PATH, "CPU", {}, (err) => {
if (err) {
diff --git a/src/js/tests/tokenizer.test.js b/src/js/tests/tokenizer.test.js
index dc470f2014..dba1474540 100644
--- a/src/js/tests/tokenizer.test.js
+++ b/src/js/tests/tokenizer.test.js
@@ -1,12 +1,48 @@
-import { LLMPipeline, ChatHistory } from "../dist/index.js";
+import { LLMPipeline, ChatHistory, Tokenizer } from "../dist/index.js";
import assert from "node:assert/strict";
import { describe, it, before, after } from "node:test";
import { models } from "./models.js";
+import fs from "node:fs/promises";
+import { join } from "node:path";
+import { addon as ovAddon } from "openvino-node";
const MODEL_PATH = process.env.MODEL_PATH || `./tests/models/${models.LLM.split("/")[1]}`;
-describe("tokenizer", async () => {
+describe("tokenizer constructors", () => {
+ it("tokenizer constructors with one argument", () => {
+ const tokenizer = new Tokenizer(MODEL_PATH);
+
+ assert.ok(tokenizer);
+ });
+
+ it("tokenizer constructors with multiple arguments", async () => {
+ const tokenizerName = join(MODEL_PATH, "openvino_tokenizer");
+ const detokenizerName = join(MODEL_PATH, "openvino_detokenizer");
+ const tokenizerModel = await fs.readFile(`${tokenizerName}.xml`, "utf8");
+ const tokenizerWeights = await fs.readFile(`${tokenizerName}.bin`);
+ const detokenizerModel = await fs.readFile(`${detokenizerName}.xml`, "utf8");
+ const detokenizerWeights = await fs.readFile(`${detokenizerName}.bin`);
+
+ const tokenizerTensor = new ovAddon.Tensor("u8", [tokenizerWeights.length], tokenizerWeights);
+ const detokenizerTensor = new ovAddon.Tensor(
+ "u8",
+ [detokenizerWeights.length],
+ detokenizerWeights,
+ );
+
+ const tokenizer = new Tokenizer(
+ tokenizerModel,
+ tokenizerTensor,
+ detokenizerModel,
+ detokenizerTensor,
+ );
+
+ assert.ok(tokenizer);
+ });
+});
+
+describe("tokenizer functions", async () => {
let pipeline = null;
let tokenizer = null;
@@ -21,7 +57,7 @@ describe("tokenizer", async () => {
await pipeline.finishChat();
});
- it("applyChatTemplate return string", async () => {
+ it("applyChatTemplate return string", () => {
const template = tokenizer.applyChatTemplate(
[
{
@@ -34,7 +70,7 @@ describe("tokenizer", async () => {
assert.strictEqual(typeof template, "string");
});
- it("applyChatTemplate with chat history", async () => {
+ it("applyChatTemplate with chat history", () => {
const chatHistory = new ChatHistory([
{
role: "user",
@@ -45,7 +81,7 @@ describe("tokenizer", async () => {
assert.strictEqual(typeof template, "string");
});
- it("applyChatTemplate with true addGenerationPrompt", async () => {
+ it("applyChatTemplate with true addGenerationPrompt", () => {
const template = tokenizer.applyChatTemplate(
[
{
@@ -58,7 +94,7 @@ describe("tokenizer", async () => {
assert.ok(template.includes("assistant"));
});
- it("applyChatTemplate with missed addGenerationPrompt", async () => {
+ it("applyChatTemplate with missed addGenerationPrompt", () => {
assert.throws(() =>
tokenizer.applyChatTemplate([
{
@@ -69,11 +105,11 @@ describe("tokenizer", async () => {
);
});
- it("applyChatTemplate with incorrect type of history", async () => {
+ it("applyChatTemplate with incorrect type of history", () => {
assert.throws(() => tokenizer.applyChatTemplate("prompt", false));
});
- it("applyChatTemplate with unknown property", async () => {
+ it("applyChatTemplate with unknown property", () => {
const testValue = "1234567890";
const template = tokenizer.applyChatTemplate(
[
@@ -88,7 +124,7 @@ describe("tokenizer", async () => {
assert.ok(!template.includes(testValue));
});
- it("applyChatTemplate use custom chatTemplate", async () => {
+ it("applyChatTemplate use custom chatTemplate", () => {
const prompt = "continue: 1 2 3";
const chatTemplate = `{% for message in messages %}
{{ message['content'] }}
@@ -106,7 +142,7 @@ describe("tokenizer", async () => {
assert.strictEqual(template, `${prompt}\n`);
});
- it("applyChatTemplate use tools", async () => {
+ it("applyChatTemplate use tools", () => {
const prompt = "question";
const chatHistory = [
{
@@ -124,7 +160,7 @@ describe("tokenizer", async () => {
assert.strictEqual(templatedHistory, expected);
});
- it("applyChatTemplate use tool from chat history", async () => {
+ it("applyChatTemplate use tool from chat history", () => {
const prompt = "question";
const chatHistory = new ChatHistory();
chatHistory.push({ role: "user", content: prompt });
@@ -139,7 +175,7 @@ describe("tokenizer", async () => {
assert.strictEqual(templatedHistory, expected);
});
- it("applyChatTemplate use extra_context", async () => {
+ it("applyChatTemplate use extra_context", () => {
const prompt = "question";
const chatHistory = [
{
@@ -165,7 +201,7 @@ describe("tokenizer", async () => {
assert.strictEqual(templatedHistory, expected);
});
- it("applyChatTemplate use extra_context from chat history", async () => {
+ it("applyChatTemplate use extra_context from chat history", () => {
const prompt = "question";
const chatHistory = new ChatHistory();
chatHistory.push({ role: "user", content: prompt });
@@ -181,33 +217,183 @@ describe("tokenizer", async () => {
assert.strictEqual(templatedHistory, expected);
});
- it("getBosToken return string", async () => {
+ it("getBosToken return string", () => {
const token = tokenizer.getBosToken();
assert.strictEqual(typeof token, "string");
});
- it("getBosTokenId return number", async () => {
+ it("getBosTokenId return number", () => {
const token = tokenizer.getBosTokenId();
- assert.strictEqual(typeof token, "number");
+ assert.strictEqual(typeof token, "bigint");
});
- it("getEosToken return string", async () => {
+ it("getEosToken return string", () => {
const token = tokenizer.getEosToken();
assert.strictEqual(typeof token, "string");
});
- it("getEosTokenId return number", async () => {
+ it("getEosTokenId return number", () => {
const token = tokenizer.getEosTokenId();
- assert.strictEqual(typeof token, "number");
+ assert.strictEqual(typeof token, "bigint");
});
- it("getPadToken return string", async () => {
+ it("getPadToken return string", () => {
const token = tokenizer.getPadToken();
assert.strictEqual(typeof token, "string");
});
- it("getPadTokenId return number", async () => {
+ it("getPadTokenId return number", () => {
const token = tokenizer.getPadTokenId();
- assert.strictEqual(typeof token, "number");
+ assert.strictEqual(typeof token, "bigint");
+ });
+
+ it("setChatTemplate updates template", () => {
+ const originalTemplate = tokenizer.getChatTemplate();
+ assert.strictEqual(typeof originalTemplate, "string");
+
+ const customTemplate = "Custom template: {{ messages }}";
+ tokenizer.setChatTemplate(customTemplate);
+
+ const updatedTemplate = tokenizer.getChatTemplate();
+ assert.strictEqual(updatedTemplate, customTemplate);
+
+ // Restore original template
+ tokenizer.setChatTemplate(originalTemplate);
+ });
+
+ // Fix getOriginalChatTemplate issue CVS-176638
+ it.skip("getOriginalChatTemplate returns the original string", () => {
+ const originalTemplate = tokenizer.getChatTemplate();
+ tokenizer.setChatTemplate("Custom template: {{ messages }}");
+
+ const template = tokenizer.getOriginalChatTemplate();
+ assert.strictEqual(template, originalTemplate);
+
+ // Restore original template
+ tokenizer.setChatTemplate(originalTemplate);
+ });
+
+ it("encode single string returns TokenizedInputs", () => {
+ const text = "Hello world";
+ const result = tokenizer.encode(text);
+
+ assert.ok(result.input_ids, "Should have input_ids");
+ assert.ok(result.attention_mask, "Should have attention_mask");
+ assert.strictEqual(typeof result.input_ids, "object");
+ assert.strictEqual(typeof result.attention_mask, "object");
+ });
+
+ it("encode with options", () => {
+ const text = "Hello world";
+ const result = tokenizer.encode(text, {
+ add_special_tokens: false,
+ pad_to_max_length: true,
+ max_length: 1000,
+ padding_side: "left",
+ });
+ // const padTokenId = tokenizer.getPadTokenId();
+
+ assert.ok(result.input_ids);
+ assert.strictEqual(
+ result.input_ids.getShape()[1],
+ 1000,
+ "input_ids should be padded to maxLength",
+ );
+ // Uncomment after fixing padding issue CVS-176636
+ // assert.strictEqual(
+ // result.input_ids.getData()[0],
+ // padTokenId,
+ // "input_ids should be left padded",
+ // );
+ });
+
+ it("encode array of strings", () => {
+ const texts = ["Hello", "World"];
+ const result = tokenizer.encode(texts);
+
+ assert.strictEqual(result.input_ids.getShape()[0], texts.length);
+ assert.strictEqual(result.attention_mask.getShape()[0], 2);
+ });
+
+ it("decode array of token IDs to string", () => {
+ const tokenIds = [1, 2, 3];
+ const decoded = tokenizer.decode(tokenIds);
+
+ assert.strictEqual(typeof decoded, "string");
+ });
+
+ // Fix skip_special_tokens functionality CVS-176639
+ it.skip("decode with skip_special_tokens option", () => {
+ const eos = tokenizer.getEosToken();
+ const eosId = tokenizer.getEosTokenId();
+ const tokenIds = [10n, 20n, 30n, eosId];
+ const decoded1 = tokenizer.decode(tokenIds, { skip_special_tokens: true });
+ const decoded2 = tokenizer.decode(tokenIds, { skip_special_tokens: false });
+
+ assert.strictEqual(typeof decoded1, "string");
+ assert.strictEqual(typeof decoded2, "string");
+ assert.strictEqual(decoded2, decoded1 + eos);
+ });
+
+ it("decode batch of token sequences", () => {
+ const batchTokens = [
+ [1, 2, 3],
+ [4, 5, 6],
+ ];
+ const decoded = tokenizer.decode(batchTokens);
+
+ assert.strictEqual(decoded.length, 2);
+ });
+
+ it("encode and decode round trip", () => {
+ const originalText = "Hello world";
+ const encoded = tokenizer.encode(originalText);
+ const decodedText = tokenizer.decode(encoded.input_ids);
+
+ assert.deepEqual(decodedText, [originalText]);
+ });
+});
+
+// Add model with paired input support CVS-176639
+describe.skip("tokenizer with paired input", () => {
+ let tokenizer = null;
+
+ before(async () => {
+ tokenizer = new Tokenizer(MODEL_PATH, { add_second_input: true, number_of_inputs: 2 });
+ });
+
+ it("supportsPairedInput return boolean", () => {
+ const result = tokenizer.supportsPairedInput();
+
+ assert.strictEqual(result, true);
+ });
+
+ it("encode paired prompts (two arrays)", () => {
+ const prompts1 = ["Question 1", "Question 2"];
+ const prompts2 = ["Answer 1", "Answer 2"];
+ const result = tokenizer.encode(prompts1, prompts2);
+
+ assert.strictEqual(result.input_ids.getShape()[0], prompts1.length);
+ assert.strictEqual(result.attention_mask.getShape()[0], prompts1.length);
+ });
+
+ it("encode paired prompts (array of pairs)", () => {
+ const pairs = [
+ ["Question 1", "Answer 1"],
+ ["Question 2", "Answer 2"],
+ ];
+ const result = tokenizer.encode(pairs);
+
+ assert.strictEqual(result.input_ids.getShape()[0], pairs.length);
+ assert.strictEqual(result.attention_mask.getShape()[0], pairs.length);
+ });
+
+ it("encode paired prompts broadcasting second array", () => {
+ const prompts1 = ["Question 1", "Question 2", "Question 3"]; // batch size 3
+ const prompts2 = ["Single answer"]; // will be broadcast
+ const result = tokenizer.encode(prompts1, prompts2);
+
+ assert.strictEqual(result.input_ids.getShape()[0], prompts1.length);
+ assert.strictEqual(result.attention_mask.getShape()[0], prompts1.length);
});
});