From 884f0ecf6573ba4f3180bf859cc49c33c5f089b1 Mon Sep 17 00:00:00 2001 From: Kirill Suvorov Date: Thu, 13 Nov 2025 21:32:25 +0100 Subject: [PATCH 01/10] [JS] Update tokenizer methods Signed-off-by: Kirill Suvorov --- samples/js/text_generation/benchmark_genai.js | 4 + src/js/include/addon.hpp | 1 + src/js/include/helper.hpp | 14 ++ src/js/include/tokenizer.hpp | 6 + src/js/lib/addon.ts | 6 +- src/js/lib/index.ts | 1 + src/js/lib/pipelines/llmPipeline.ts | 22 +-- src/js/lib/pipelines/textEmbeddingPipeline.ts | 5 +- src/js/lib/tokenizer.ts | 186 ++++++++++++++++++ src/js/src/addon.cpp | 25 +++ src/js/src/helper.cpp | 81 ++++++++ src/js/src/tokenizer.cpp | 160 +++++++++++++++ src/js/tests/bindings.test.js | 4 +- src/js/tests/tokenizer.test.js | 181 +++++++++++++++-- 14 files changed, 653 insertions(+), 43 deletions(-) create mode 100644 src/js/lib/tokenizer.ts diff --git a/samples/js/text_generation/benchmark_genai.js b/samples/js/text_generation/benchmark_genai.js index 1ae77a323f..972b5765b1 100644 --- a/samples/js/text_generation/benchmark_genai.js +++ b/samples/js/text_generation/benchmark_genai.js @@ -90,6 +90,10 @@ async function main() { pipe = await LLMPipeline(modelsPath, device, { schedulerConfig: schedulerConfig }); } + const inputData = await pipe.getTokenizer().encode(prompt); + const promptTokenSize = inputData.input_ids.shape[1]; + console.log(`Prompt token size: ${promptTokenSize}`); + for (let i = 0; i < numWarmup; i++) { await pipe.generate(prompt, config); } diff --git a/src/js/include/addon.hpp b/src/js/include/addon.hpp index f2a23b026c..28371ba822 100644 --- a/src/js/include/addon.hpp +++ b/src/js/include/addon.hpp @@ -12,6 +12,7 @@ struct AddonData { Napi::FunctionReference tokenizer; Napi::FunctionReference perf_metrics; Napi::FunctionReference chat_history; + Napi::ObjectReference openvino_addon; }; void init_class(Napi::Env env, diff --git a/src/js/include/helper.hpp b/src/js/include/helper.hpp index a28f7c071c..55370d91e3 100644 --- a/src/js/include/helper.hpp +++ b/src/js/include/helper.hpp @@ -37,9 +37,13 @@ ov::AnyMap js_to_cpp(const Napi::Env& env, const Napi::Value& value) /** @brief A template specialization for TargetType std::string */ template <> std::string js_to_cpp(const Napi::Env& env, const Napi::Value& value); +template <> +int64_t js_to_cpp(const Napi::Env& env, const Napi::Value& value); /** @brief A template specialization for TargetType std::vector */ template <> std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value); +template <> +std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value); /** @brief A template specialization for TargetType GenerateInputs */ template <> GenerateInputs js_to_cpp(const Napi::Env& env, const Napi::Value& value); @@ -58,6 +62,8 @@ ov::genai::StructuredOutputConfig::Tag js_to_cpp ov::genai::StructuredOutputConfig::StructuralTag js_to_cpp(const Napi::Env& env, const Napi::Value& value); +template <> +ov::Tensor js_to_cpp(const Napi::Env& env, const Napi::Value& value); /** * @brief Unwraps a C++ object from a JavaScript wrapper. * @tparam TargetType The C++ class type to extract. @@ -110,6 +116,12 @@ Napi::Value cpp_to_js, Napi::Value>(const Napi::Env& env, co template <> Napi::Value cpp_to_js(const Napi::Env& env, const ov::genai::JsonContainer& json_container); + +template <> +Napi::Value cpp_to_js(const Napi::Env& env, const ov::Tensor& tensor); + +template <> +Napi::Value cpp_to_js(const Napi::Env& env, const ov::genai::TokenizedInputs& tokenized_inputs); /** * @brief Template function to convert C++ map into Javascript Object. Map key must be std::string. * @tparam MapElementType C++ data type of map elements. @@ -130,3 +142,5 @@ bool is_chat_history(const Napi::Env& env, const Napi::Value& value); std::string json_stringify(const Napi::Env& env, const Napi::Value& value); Napi::Value json_parse(const Napi::Env& env, const std::string& value); + +Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::string& ctor_name); diff --git a/src/js/include/tokenizer.hpp b/src/js/include/tokenizer.hpp index a7e6f3ef42..c1d0386525 100644 --- a/src/js/include/tokenizer.hpp +++ b/src/js/include/tokenizer.hpp @@ -15,6 +15,12 @@ class TokenizerWrapper : public Napi::ObjectWrap { Napi::Value get_eos_token_id(const Napi::CallbackInfo& info); Napi::Value get_pad_token(const Napi::CallbackInfo& info); Napi::Value get_pad_token_id(const Napi::CallbackInfo& info); + Napi::Value get_chat_template(const Napi::CallbackInfo& info); + Napi::Value get_original_chat_template(const Napi::CallbackInfo& info); + Napi::Value set_chat_template(const Napi::CallbackInfo& info); + Napi::Value supports_paired_input(const Napi::CallbackInfo& info); + Napi::Value encode(const Napi::CallbackInfo& info); + Napi::Value decode(const Napi::CallbackInfo& info); private: ov::genai::Tokenizer _tokenizer; }; diff --git a/src/js/lib/addon.ts b/src/js/lib/addon.ts index f8b9abee61..5f8a9bc625 100644 --- a/src/js/lib/addon.ts +++ b/src/js/lib/addon.ts @@ -2,6 +2,7 @@ import { createRequire } from "module"; import { platform } from "node:os"; import { join, dirname, resolve } from "node:path"; import type { ChatHistory as IChatHistory } from "./chatHistory.js"; +import { addon as ovAddon } from "openvino-node"; export type EmbeddingResult = Float32Array | Int8Array | Uint8Array; export type EmbeddingResults = Float32Array[] | Int8Array[] | Uint8Array[]; @@ -60,6 +61,7 @@ interface OpenVINOGenAIAddon { TextEmbeddingPipeline: TextEmbeddingPipelineWrapper; LLMPipeline: any; ChatHistory: IChatHistory; + setOpenvinoAddon: (ovAddon: any) => void; } // We need to use delayed import to get an updated Path if required @@ -78,7 +80,7 @@ function getGenAIAddon(): OpenVINOGenAIAddon { } const addon = getGenAIAddon(); +addon.setOpenvinoAddon(ovAddon); -export const { ChatHistory } = addon; +export const { TextEmbeddingPipeline, LLMPipeline, ChatHistory } = addon; export type ChatHistory = IChatHistory; -export default addon; diff --git a/src/js/lib/index.ts b/src/js/lib/index.ts index cd673a6797..89ee0e1f71 100644 --- a/src/js/lib/index.ts +++ b/src/js/lib/index.ts @@ -40,3 +40,4 @@ export const { LLMPipeline, TextEmbeddingPipeline } = PipelineFactory; export { DecodedResults } from "./pipelines/llmPipeline.js"; export * from "./utils.js"; export * from "./addon.js"; +export * from "./tokenizer.js"; diff --git a/src/js/lib/pipelines/llmPipeline.ts b/src/js/lib/pipelines/llmPipeline.ts index b03146e0d2..01fe1aa5f3 100644 --- a/src/js/lib/pipelines/llmPipeline.ts +++ b/src/js/lib/pipelines/llmPipeline.ts @@ -1,6 +1,7 @@ import util from "node:util"; -import addon, { ChatHistory } from "../addon.js"; +import { ChatHistory, LLMPipeline as LLMPipelineWrap } from "../addon.js"; import { GenerationConfig, StreamingStatus, LLMPipelineProperties } from "../utils.js"; +import { Tokenizer } from "../tokenizer.js"; export type ResolveFunction = (arg: { value: string; done: boolean }) => void; export type Options = { @@ -8,23 +9,6 @@ export type Options = { max_new_tokens?: number; }; -interface Tokenizer { - /** Applies a chat template to format chat history into a prompt string. */ - applyChatTemplate( - chatHistory: Record[] | ChatHistory, - addGenerationPrompt: boolean, - chatTemplate?: string, - tools?: Record[], - extraContext?: Record, - ): string; - getBosToken(): string; - getBosTokenId(): number; - getEosToken(): string; - getEosTokenId(): number; - getPadToken(): string; - getPadTokenId(): number; -} - /** Structure with raw performance metrics for each generation before any statistics are calculated. */ export type RawMetrics = { /** Durations for each generate call in milliseconds. */ @@ -167,7 +151,7 @@ export class LLMPipeline { async init() { if (this.isInitialized) throw new Error("LLMPipeline is already initialized"); - this.pipeline = new addon.LLMPipeline(); + this.pipeline = new LLMPipelineWrap(); const initPromise = util.promisify(this.pipeline.init.bind(this.pipeline)); const result = await initPromise(this.modelPath, this.device, this.properties); diff --git a/src/js/lib/pipelines/textEmbeddingPipeline.ts b/src/js/lib/pipelines/textEmbeddingPipeline.ts index 5517b7ad97..44c4d45fa1 100644 --- a/src/js/lib/pipelines/textEmbeddingPipeline.ts +++ b/src/js/lib/pipelines/textEmbeddingPipeline.ts @@ -1,9 +1,10 @@ import util from "node:util"; -import addon, { +import { TextEmbeddingPipelineWrapper, EmbeddingResult, EmbeddingResults, TextEmbeddingConfig, + TextEmbeddingPipeline as TextEmbeddingPipelineWrap, } from "../addon.js"; export class TextEmbeddingPipeline { @@ -29,7 +30,7 @@ export class TextEmbeddingPipeline { async init() { if (this.pipeline) throw new Error("TextEmbeddingPipeline is already initialized"); - this.pipeline = new addon.TextEmbeddingPipeline(); + this.pipeline = new TextEmbeddingPipelineWrap(); const initPromise = util.promisify(this.pipeline.init.bind(this.pipeline)); await initPromise(this.modelPath, this.device, this.config, this.ovProperties); diff --git a/src/js/lib/tokenizer.ts b/src/js/lib/tokenizer.ts new file mode 100644 index 0000000000..d321aa2b27 --- /dev/null +++ b/src/js/lib/tokenizer.ts @@ -0,0 +1,186 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import { Tensor } from "openvino-node"; +import { ChatHistory } from "./addon.js"; + +/** + * TokenizedInputs contains input_ids and attention_mask tensors. + * This is the result of encoding prompts using the Tokenizer. + */ +export interface TokenizedInputs { + /** Tensor containing token IDs for the encoded input */ + input_ids: Tensor; + /** Tensor containing attention mask (1 for real tokens, 0 for padding) */ + attention_mask: Tensor; +} + +/** + * Options for encode method. + */ +export interface EncodeOptions { + /** + * Whether to add special tokens like BOS, EOS, PAD. + * @defaultValue true + */ + addSpecialTokens?: boolean; + + /** + * Whether to pad the sequence to the maximum length. + * @defaultValue false + */ + padToMaxLength?: boolean; + + /** + * Maximum length of the sequence. + * If undefined, the value will be taken from the IR. + */ + maxLength?: number; + + /** + * Side to pad the sequence, can be 'left' or 'right'. + * If undefined, the value will be taken from the IR. + */ + paddingSide?: "left" | "right"; +} + +/** + * The Tokenizer class is used to encode prompts and decode resulting tokens. + * + * Chat template is initialized from sources in the following order, overriding the previous value: + * 1. chat_template entry from tokenizer_config.json + * 2. chat_template entry from processor_config.json + * 3. chat_template entry from chat_template.json + * 4. chat_template entry from rt_info section of openvino.Model + * 5. If the template is known to be not supported by GenAI, it's replaced with a simplified supported version. + */ +export interface Tokenizer { + /** + * Applies a chat template to format chat history into a prompt string. + * @param chatHistory - chat history as an array of message objects or ChatHistory instance + * @param addGenerationPrompt - whether to add a generation prompt at the end + * @param chatTemplate - optional custom chat template to use instead of the default + * @param tools - optional array of tool definitions for function calling + * @param extraContext - optional extra context object for custom template variables + * @returns formatted prompt string + */ + applyChatTemplate( + chatHistory: Record[] | ChatHistory, + addGenerationPrompt: boolean, + chatTemplate?: string, + tools?: Record[], + extraContext?: Record, + ): string; + + /** + * Encodes a single prompt or a list of prompts into tokenized inputs. + * @param prompts - single prompt string or array of prompts + * @param options - encoding options + * @returns TokenizedInputs object containing input_ids and attention_mask tensors. + */ + encode(prompts: string | string[], options?: EncodeOptions): TokenizedInputs; + + /** + * Encodes two lists of prompts into tokenized inputs (for paired input). + * The number of strings must be the same, or one of the inputs can contain one string. + * In the latter case, the single-string input will be broadcast into the shape of the other input, + * which is more efficient than repeating the string in pairs. + * @param prompts1 - first list of prompts to encode + * @param prompts2 - second list of prompts to encode + * @param options - encoding options + * @returns TokenizedInputs object containing input_ids and attention_mask tensors. + */ + encode(prompts1: string[], prompts2: string[], options?: EncodeOptions): TokenizedInputs; + + /** + * Encodes a list of paired prompts into tokenized inputs. + * Input format is same as for HF paired input [[prompt_1, prompt_2], ...]. + * @param prompts - list of paired prompts to encode + * @param options - encoding options + * @returns TokenizedInputs object containing input_ids and attention_mask tensors. + */ + encode(prompts: [string, string][], options?: EncodeOptions): TokenizedInputs; + + // TODO: move decode options to another interface + /** + * Decode a sequence of token IDs into a string prompt. + * @param tokens - sequence of token IDs to decode + * @param skipSpecialTokens - whether to skip special tokens. Default is true. + * @returns decoded string. + */ + decode(tokens: number[], skipSpecialTokens?: boolean): string; + + /** + * Decode a batch of token sequences (as Tensor or array of arrays) into a list of string prompts. + * @param tokens - tensor containing token IDs or batch of token ID sequences + * @param skipSpecialTokens - whether to skip special tokens. Default is true. + * @returns list of decoded strings. + */ + decode(tokens: Tensor | number[][], skipSpecialTokens?: boolean): string[]; + + /** + * Returns the BOS (Beginning of Sequence) token string. + * @returns BOS token string + */ + getBosToken(): string; + + /** + * Returns the BOS (Beginning of Sequence) token ID. + * @returns BOS token ID + */ + getBosTokenId(): number; + + /** + * Returns the EOS (End of Sequence) token string. + * @returns EOS token string + */ + getEosToken(): string; + + /** + * Returns the EOS (End of Sequence) token ID. + * @returns EOS token ID + */ + getEosTokenId(): number; + + /** + * Returns the PAD (Padding) token string. + * @returns PAD token string + */ + getPadToken(): string; + + /** + * Returns the PAD (Padding) token ID. + * @returns PAD token ID + */ + getPadTokenId(): number; + + /** + * Returns the current chat template string. + * @returns current chat template string + */ + getChatTemplate(): string; + + /** + * Returns the original chat template from the tokenizer configuration. + * @returns original chat template string + */ + getOriginalChatTemplate(): string; + + /** + * Override a chat template read from tokenizer_config.json. + * @param chatTemplate - custom chat template string to use + */ + setChatTemplate(chatTemplate: string): void; + + /** + * Returns true if the tokenizer supports paired input, false otherwise. + * @returns whether the tokenizer supports paired input + */ + supportsPairedInput(): boolean; + + /** + * The current chat template string. + * Can be used to get or set the chat template. + */ + chatTemplate: string; +} diff --git a/src/js/src/addon.cpp b/src/js/src/addon.cpp index 210673da93..15f60597da 100644 --- a/src/js/src/addon.cpp +++ b/src/js/src/addon.cpp @@ -20,6 +20,28 @@ void init_class(Napi::Env env, exports.Set(class_name, prototype); } +Napi::Value init_ov_addon(const Napi::CallbackInfo& info) { + Napi::Env env = info.Env(); + if (info.Length() < 1) { + Napi::TypeError::New(env, "setOpenvinoAddon expects one argument").ThrowAsJavaScriptException(); + return env.Undefined(); + } + if (info[0].IsUndefined() || info[0].IsNull() || !info[0].IsObject()) { + Napi::TypeError::New(env, "Passed addon must be an object").ThrowAsJavaScriptException(); + return env.Undefined(); + } + + auto addon_data = env.GetInstanceData(); + if (!addon_data) { + Napi::TypeError::New(env, "Addon data is not initialized").ThrowAsJavaScriptException(); + return env.Undefined(); + } + + auto ov_addon = info[0].As(); + addon_data->openvino_addon = Napi::Persistent(ov_addon); + return env.Undefined(); +} + // Define the addon initialization function Napi::Object init_module(Napi::Env env, Napi::Object exports) { auto addon_data = new AddonData(); @@ -31,6 +53,9 @@ Napi::Object init_module(Napi::Env env, Napi::Object exports) { init_class(env, exports, "PerfMetrics", &PerfMetricsWrapper::get_class, addon_data->perf_metrics); init_class(env, exports, "ChatHistory", &ChatHistoryWrap::get_class, addon_data->chat_history); + // Expose a helper to set the openvino-node addon from JS (useful for ESM) + exports.Set("setOpenvinoAddon", Napi::Function::New(env, init_ov_addon)); + return exports; } diff --git a/src/js/src/helper.cpp b/src/js/src/helper.cpp index b282ba0636..851dfb372e 100644 --- a/src/js/src/helper.cpp +++ b/src/js/src/helper.cpp @@ -102,6 +102,12 @@ std::string js_to_cpp(const Napi::Env& env, const Napi::Value& valu return value.As().Utf8Value(); } +template <> +int64_t js_to_cpp(const Napi::Env& env, const Napi::Value& value) { + OPENVINO_ASSERT(value.IsNumber(), "Passed argument must be of type Number."); + return value.As().Int64Value(); +} + template <> std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value) { if (value.IsArray()) { @@ -123,6 +129,23 @@ std::vector js_to_cpp>(const Napi::Env& en } } +template <> +std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value) { + if (value.IsArray()) { + auto array = value.As(); + size_t arrayLength = array.Length(); + + std::vector vector; + vector.reserve(arrayLength); + for (uint32_t i = 0; i < arrayLength; ++i) { + vector.push_back(js_to_cpp(env, array[i])); + } + return vector; + } else { + OPENVINO_THROW("Passed argument must be of type Array."); + } +} + template <> ov::genai::JsonContainer js_to_cpp(const Napi::Env& env, const Napi::Value& value) { OPENVINO_ASSERT(value.IsObject() || value.IsArray(), "JsonContainer must be a JS object or an array but got " + std::string(value.ToString().Utf8Value())); @@ -292,6 +315,23 @@ ov::genai::StructuredOutputConfig js_to_cpp(c return config; } +template <> +ov::Tensor js_to_cpp(const Napi::Env& env, const Napi::Value& value) { + OPENVINO_ASSERT(value.IsObject(), "Passed argument must be an object."); + + auto tensor_wrap = value.As(); + auto tensor_prototype = get_prototype_from_ov_addon(env, "Tensor"); + OPENVINO_ASSERT(tensor_wrap.InstanceOf(tensor_prototype), "Passed argument is not of type Tensor"); + + auto native_tensor_func = tensor_wrap.Get("__getExternalTensor").As(); + Napi::Value native_tensor_value = native_tensor_func.Call(tensor_wrap, {}); + OPENVINO_ASSERT(native_tensor_value.IsExternal(), "__getExternalTensor() did not return an External object."); + + auto external = native_tensor_value.As>(); + auto tensor_ptr = external.Data(); + return *tensor_ptr; +} + template <> ov::genai::PerfMetrics& unwrap(const Napi::Env& env, const Napi::Value& value) { const auto obj = value.As(); @@ -417,6 +457,34 @@ Napi::Value cpp_to_js(const Napi::Env& en return json_parse(env, json_container.to_json_string()); } +template <> +Napi::Value cpp_to_js(const Napi::Env& env, const ov::Tensor& tensor) { + try { + auto prototype = get_prototype_from_ov_addon(env, "Tensor"); + + auto external = Napi::External::New(env, new ov::Tensor(tensor), + [](Napi::Env /*env*/, ov::Tensor* external_tensor) { + delete external_tensor; + }); + auto tensor_wrap = prototype.New({ external }); + + return tensor_wrap; + } catch (const ov::Exception& e) { + Napi::Error::New(env, std::string("Cannot create Tensor wrapper: ") + e.what()).ThrowAsJavaScriptException(); + return env.Undefined(); + } +} + +template <> +Napi::Value cpp_to_js(const Napi::Env& env, const ov::genai::TokenizedInputs& tokenized_inputs) { + auto js_object = Napi::Object::New(env); + + js_object.Set("input_ids", cpp_to_js(env, tokenized_inputs.input_ids)); + js_object.Set("attention_mask", cpp_to_js(env, tokenized_inputs.attention_mask)); + + return js_object; +} + bool is_napi_value_int(const Napi::Env& env, const Napi::Value& num) { return env.Global().Get("Number").ToObject().Get("isInteger").As().Call({num}).ToBoolean().Value(); } @@ -449,3 +517,16 @@ Napi::Value json_parse(const Napi::Env& env, const std::string& value) { .As() .Call({ Napi::String::New(env, value) }); } + +Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::string& ctor_name) { + auto addon_data = env.GetInstanceData(); + OPENVINO_ASSERT(!addon_data->openvino_addon.IsEmpty(), "Addon data is not initialized"); + Napi::Value ov_addon = addon_data->openvino_addon.Value(); + OPENVINO_ASSERT(!ov_addon.IsUndefined() && !ov_addon.IsNull() && ov_addon.IsObject(), "OV addon value is not an object"); + Napi::Object addon_obj = ov_addon.As(); + OPENVINO_ASSERT(addon_obj.Has(ctor_name), std::string("OV addon does not export '") + ctor_name + "' class"); + Napi::Value ctor_val = addon_obj.Get(ctor_name); + OPENVINO_ASSERT(ctor_val.IsFunction(), ctor_name + std::string(" is not a prototype")); + + return ctor_val.As(); +} diff --git a/src/js/src/tokenizer.cpp b/src/js/src/tokenizer.cpp index 1cf9c822d7..311af11f1b 100644 --- a/src/js/src/tokenizer.cpp +++ b/src/js/src/tokenizer.cpp @@ -15,6 +15,12 @@ Napi::Function TokenizerWrapper::get_class(Napi::Env env) { InstanceMethod("getEosTokenId", &TokenizerWrapper::get_eos_token_id), InstanceMethod("getPadToken", &TokenizerWrapper::get_pad_token), InstanceMethod("getPadTokenId", &TokenizerWrapper::get_pad_token_id), + InstanceMethod("getChatTemplate", &TokenizerWrapper::get_chat_template), + InstanceMethod("getOriginalChatTemplate", &TokenizerWrapper::get_original_chat_template), + InstanceMethod("setChatTemplate", &TokenizerWrapper::set_chat_template), + InstanceMethod("supportsPairedInput", &TokenizerWrapper::supports_paired_input), + InstanceMethod("decode", &TokenizerWrapper::decode), + InstanceMethod("encode", &TokenizerWrapper::encode), } ); } @@ -118,3 +124,157 @@ Napi::Value TokenizerWrapper::get_pad_token_id(const Napi::CallbackInfo& info) { return info.Env().Undefined(); } } + +Napi::Value TokenizerWrapper::encode(const Napi::CallbackInfo& info) { + auto env = info.Env(); + try { + OPENVINO_ASSERT(info.Length() >= 1, "Tokenizer.encode requires at least one argument: text or prompts"); + + // Parse encoding options from the last argument if it's an object + ov::AnyMap tokenization_params; + size_t last_text_arg_idx = info.Length() - 1; + + if (info[last_text_arg_idx].IsObject() && !info[last_text_arg_idx].IsArray()) { + auto options = info[last_text_arg_idx].As(); + + if (options.Has("addSpecialTokens")) { + tokenization_params["add_special_tokens"] = options.Get("addSpecialTokens").ToBoolean().Value(); + } + if (options.Has("padToMaxLength")) { + tokenization_params["pad_to_max_length"] = options.Get("padToMaxLength").ToBoolean().Value(); + } + if (options.Has("maxLength")) { + tokenization_params["max_length"] = static_cast(options.Get("maxLength").ToNumber().Int64Value()); + } + if (options.Has("paddingSide")) { + tokenization_params["padding_side"] = options.Get("paddingSide").ToString().Utf8Value(); + } + + last_text_arg_idx--; + } + + ov::genai::TokenizedInputs result; + + // Handle different input types + if (info[0].IsString()) { + // Single string + auto text = js_to_cpp(env, info[0]); + result = this->_tokenizer.encode(text, tokenization_params); + } else if (info[0].IsArray()) { + auto arr = info[0].As(); + + // Check if it's array of pairs [[str, str], ...] + if (arr.Length() > 0 && arr.Get(uint32_t(0)).IsArray()) { + // Array of pairs + std::vector> paired_prompts; + for (uint32_t i = 0; i < arr.Length(); ++i) { + OPENVINO_ASSERT(arr.Get(i).IsArray(), "Each pair must be an array"); + auto pair = arr.Get(i).As(); + OPENVINO_ASSERT(pair.Length() == 2, "Each pair must contain exactly 2 strings"); + paired_prompts.emplace_back( + js_to_cpp(env, pair.Get(uint32_t(0))), + js_to_cpp(env, pair.Get(uint32_t(1))) + ); + } + result = this->_tokenizer.encode(paired_prompts, tokenization_params); + } else { + // Regular array of strings + auto prompts = js_to_cpp>(env, info[0]); + result = this->_tokenizer.encode(prompts, tokenization_params); + } + } else if (last_text_arg_idx >= 1 && info[0].IsArray() && info[1].IsArray()) { + // Two arrays (paired input: prompts_1, prompts_2) + auto prompts1 = js_to_cpp>(env, info[0]); + auto prompts2 = js_to_cpp>(env, info[1]); + result = this->_tokenizer.encode(prompts1, prompts2, tokenization_params); + } else { + OPENVINO_THROW("Unsupported input type for encode. Expected: string, string[], [string, string][], or two string arrays"); + } + + return cpp_to_js(env, result); + } catch (std::exception& err) { + Napi::Error::New(env, err.what()).ThrowAsJavaScriptException(); + return env.Undefined(); + } +} + +Napi::Value TokenizerWrapper::decode(const Napi::CallbackInfo& info) { + auto env = info.Env(); + try { + OPENVINO_ASSERT(info.Length() >= 1, "Tokenizer.decode requires at least one argument: tokens"); + + ov::AnyMap detokenization_params; + if (info.Length() >= 2 && info[1].IsBoolean()) { + detokenization_params["skip_special_tokens"] = info[1].ToBoolean().Value(); + } + + // Handle different input types + if (info[0].IsArray()) { + auto arr = info[0].As(); + + // Check if it's a 2D array (batch of sequences) + if (arr.Length() > 0 && arr.Get(uint32_t(0)).IsArray()) { + // Batch decoding: number[][] + std::vector> batch_tokens; + for (uint32_t i = 0; i < arr.Length(); ++i) { + batch_tokens.push_back(js_to_cpp>(env, arr.Get(i))); + } + auto result = this->_tokenizer.decode(batch_tokens, detokenization_params); + return cpp_to_js, Napi::Value>(env, result); + } else { + // Single sequence: number[] + auto tokens = js_to_cpp>(env, info[0]); + auto result = this->_tokenizer.decode(tokens, detokenization_params); + return Napi::String::New(env, result); + } + } else { + // Tensor input + auto tensor = js_to_cpp(env, info[0]); + auto result = this->_tokenizer.decode(tensor, detokenization_params); + return cpp_to_js, Napi::Value>(env, result); + } + } catch (std::exception& err) { + Napi::Error::New(env, err.what()).ThrowAsJavaScriptException(); + return env.Undefined(); + } +} + +Napi::Value TokenizerWrapper::get_chat_template(const Napi::CallbackInfo& info) { + try { + return Napi::String::New(info.Env(), this->_tokenizer.get_chat_template()); + } catch (std::exception& err) { + Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); + return info.Env().Undefined(); + } +} + +Napi::Value TokenizerWrapper::get_original_chat_template(const Napi::CallbackInfo& info) { + try { + return Napi::String::New(info.Env(), this->_tokenizer.get_original_chat_template()); + } catch (std::exception& err) { + Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); + return info.Env().Undefined(); + } +} + +Napi::Value TokenizerWrapper::set_chat_template(const Napi::CallbackInfo& info) { + try { + OPENVINO_ASSERT(info.Length() >= 1, "Tokenizer.setChatTemplate requires one argument: chatTemplate"); + OPENVINO_ASSERT(info[0].IsString(), "The argument 'chatTemplate' must be a string"); + + this->_tokenizer.set_chat_template(js_to_cpp(info.Env(), info[0])); + return info.Env().Undefined(); + } catch (std::exception& err) { + Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); + return info.Env().Undefined(); + } +} + +Napi::Value TokenizerWrapper::supports_paired_input(const Napi::CallbackInfo& info) { + try { + return Napi::Boolean::New(info.Env(), this->_tokenizer.supports_paired_input()); + } catch (std::exception& err) { + Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); + return info.Env().Undefined(); + } +} diff --git a/src/js/tests/bindings.test.js b/src/js/tests/bindings.test.js index 8c1ac78760..0119f25a0f 100644 --- a/src/js/tests/bindings.test.js +++ b/src/js/tests/bindings.test.js @@ -1,4 +1,4 @@ -import addon from "../dist/addon.js"; +import { LLMPipeline } from "../dist/addon.js"; import assert from "node:assert"; import { describe, it, before, after } from "node:test"; @@ -10,7 +10,7 @@ describe("bindings", () => { let pipeline = null; before((_, done) => { - pipeline = new addon.LLMPipeline(); + pipeline = new LLMPipeline(); pipeline.init(MODEL_PATH, "CPU", {}, (err) => { if (err) { diff --git a/src/js/tests/tokenizer.test.js b/src/js/tests/tokenizer.test.js index dc470f2014..502d641f8c 100644 --- a/src/js/tests/tokenizer.test.js +++ b/src/js/tests/tokenizer.test.js @@ -4,7 +4,7 @@ import assert from "node:assert/strict"; import { describe, it, before, after } from "node:test"; import { models } from "./models.js"; -const MODEL_PATH = process.env.MODEL_PATH || `./tests/models/${models.LLM.split("/")[1]}`; +const MODEL_PATH = process.env.MODEL_PATH || `./tests/models/${models.InstructLLM.split("/")[1]}`; describe("tokenizer", async () => { let pipeline = null; @@ -21,7 +21,7 @@ describe("tokenizer", async () => { await pipeline.finishChat(); }); - it("applyChatTemplate return string", async () => { + it("applyChatTemplate return string", () => { const template = tokenizer.applyChatTemplate( [ { @@ -34,7 +34,7 @@ describe("tokenizer", async () => { assert.strictEqual(typeof template, "string"); }); - it("applyChatTemplate with chat history", async () => { + it("applyChatTemplate with chat history", () => { const chatHistory = new ChatHistory([ { role: "user", @@ -45,7 +45,7 @@ describe("tokenizer", async () => { assert.strictEqual(typeof template, "string"); }); - it("applyChatTemplate with true addGenerationPrompt", async () => { + it("applyChatTemplate with true addGenerationPrompt", () => { const template = tokenizer.applyChatTemplate( [ { @@ -58,7 +58,7 @@ describe("tokenizer", async () => { assert.ok(template.includes("assistant")); }); - it("applyChatTemplate with missed addGenerationPrompt", async () => { + it("applyChatTemplate with missed addGenerationPrompt", () => { assert.throws(() => tokenizer.applyChatTemplate([ { @@ -69,11 +69,11 @@ describe("tokenizer", async () => { ); }); - it("applyChatTemplate with incorrect type of history", async () => { + it("applyChatTemplate with incorrect type of history", () => { assert.throws(() => tokenizer.applyChatTemplate("prompt", false)); }); - it("applyChatTemplate with unknown property", async () => { + it("applyChatTemplate with unknown property", () => { const testValue = "1234567890"; const template = tokenizer.applyChatTemplate( [ @@ -88,7 +88,7 @@ describe("tokenizer", async () => { assert.ok(!template.includes(testValue)); }); - it("applyChatTemplate use custom chatTemplate", async () => { + it("applyChatTemplate use custom chatTemplate", () => { const prompt = "continue: 1 2 3"; const chatTemplate = `{% for message in messages %} {{ message['content'] }} @@ -106,7 +106,7 @@ describe("tokenizer", async () => { assert.strictEqual(template, `${prompt}\n`); }); - it("applyChatTemplate use tools", async () => { + it("applyChatTemplate use tools", () => { const prompt = "question"; const chatHistory = [ { @@ -124,7 +124,7 @@ describe("tokenizer", async () => { assert.strictEqual(templatedHistory, expected); }); - it("applyChatTemplate use tool from chat history", async () => { + it("applyChatTemplate use tool from chat history", () => { const prompt = "question"; const chatHistory = new ChatHistory(); chatHistory.push({ role: "user", content: prompt }); @@ -139,7 +139,7 @@ describe("tokenizer", async () => { assert.strictEqual(templatedHistory, expected); }); - it("applyChatTemplate use extra_context", async () => { + it("applyChatTemplate use extra_context", () => { const prompt = "question"; const chatHistory = [ { @@ -165,7 +165,7 @@ describe("tokenizer", async () => { assert.strictEqual(templatedHistory, expected); }); - it("applyChatTemplate use extra_context from chat history", async () => { + it("applyChatTemplate use extra_context from chat history", () => { const prompt = "question"; const chatHistory = new ChatHistory(); chatHistory.push({ role: "user", content: prompt }); @@ -181,33 +181,178 @@ describe("tokenizer", async () => { assert.strictEqual(templatedHistory, expected); }); - it("getBosToken return string", async () => { + it("getBosToken return string", () => { const token = tokenizer.getBosToken(); assert.strictEqual(typeof token, "string"); }); - it("getBosTokenId return number", async () => { + it("getBosTokenId return number", () => { const token = tokenizer.getBosTokenId(); assert.strictEqual(typeof token, "number"); }); - it("getEosToken return string", async () => { + it("getEosToken return string", () => { const token = tokenizer.getEosToken(); assert.strictEqual(typeof token, "string"); }); - it("getEosTokenId return number", async () => { + it("getEosTokenId return number", () => { const token = tokenizer.getEosTokenId(); assert.strictEqual(typeof token, "number"); }); - it("getPadToken return string", async () => { + it("getPadToken return string", () => { const token = tokenizer.getPadToken(); assert.strictEqual(typeof token, "string"); }); - it("getPadTokenId return number", async () => { + it("getPadTokenId return number", () => { const token = tokenizer.getPadTokenId(); assert.strictEqual(typeof token, "number"); }); + + it("getChatTemplate return string", () => { + const template = tokenizer.getChatTemplate(); + assert.strictEqual(typeof template, "string"); + }); + + it("setChatTemplate updates template", () => { + const originalTemplate = tokenizer.getChatTemplate(); + const customTemplate = "Custom template: {{ messages }}"; + + tokenizer.setChatTemplate(customTemplate); + const updatedTemplate = tokenizer.getChatTemplate(); + assert.strictEqual(updatedTemplate, customTemplate); + + // Restore original template + tokenizer.setChatTemplate(originalTemplate); + }); + + it("getOriginalChatTemplate return the original string", (testContext) => { + testContext.skip("Invalid test"); + return; + // eslint-disable-next-line no-unreachable + const originalTemplate = tokenizer.getChatTemplate(); + tokenizer.setChatTemplate("Custom template: {{ messages }}"); + + const template = tokenizer.getOriginalChatTemplate(); + assert.strictEqual(template, originalTemplate); + + // Restore original template + tokenizer.setChatTemplate(originalTemplate); + }); + + it("supportsPairedInput return boolean", () => { + const result = tokenizer.supportsPairedInput(); + assert.strictEqual(typeof result, "boolean"); + }); + + it("encode single string returns TokenizedInputs", () => { + const text = "Hello world"; + const result = tokenizer.encode(text); + + assert.ok(result.input_ids, "Should have input_ids"); + assert.ok(result.attention_mask, "Should have attention_mask"); + assert.strictEqual(typeof result.input_ids, "object"); + assert.strictEqual(typeof result.attention_mask, "object"); + }); + + it("encode with options", (testContext) => { + testContext.skip("Invalid test"); + return; + // eslint-disable-next-line no-unreachable + const text = "Hello world"; + const result = tokenizer.encode(text, { + addSpecialTokens: false, + padToMaxLength: true, + maxLength: 1000, + paddingSide: "left", + }); + const padTokenId = tokenizer.getPadTokenId(); + + assert.ok(result.input_ids); + assert.strictEqual( + result.input_ids.getShape()[1], + 1000, + "input_ids should be padded to maxLength", + ); + assert.strictEqual( + result.input_ids.getData()[0], + padTokenId, + "input_ids should be left padded", + ); + }); + + it("encode array of strings", () => { + const texts = ["Hello", "World"]; + const result = tokenizer.encode(texts); + + assert.strictEqual(result.input_ids.getShape()[0], texts.length); + assert.strictEqual(result.attention_mask.getShape()[0], 2); + }); + + it("encode paired prompts (two arrays)", (testContext) => { + if (!tokenizer.supportsPairedInput()) { + testContext.skip(); + return; + } + const prompts1 = ["Question 1", "Question 2"]; + const prompts2 = ["Answer 1", "Answer 2"]; + const result = tokenizer.encode(prompts1, prompts2); + + assert.strictEqual(result.input_ids.getShape()[0], prompts1.length); + assert.strictEqual(result.attention_mask.getShape()[0], prompts1.length); + }); + + it("encode paired prompts (array of pairs)", (testContext) => { + if (!tokenizer.supportsPairedInput()) { + testContext.skip(); + return; + } + const pairs = [ + ["Question 1", "Answer 1"], + ["Question 2", "Answer 2"], + ]; + const result = tokenizer.encode(pairs); + + assert.strictEqual(result.input_ids.getSize(), pairs.length); + assert.strictEqual(result.attention_mask.getSize(), pairs.length); + }); + + it("decode array of token IDs to string", () => { + const tokenIds = [1, 2, 3]; + const decoded = tokenizer.decode(tokenIds); + + assert.strictEqual(typeof decoded, "string"); + }); + + it("decode with skipSpecialTokens parameter", () => { + const eos = tokenizer.getEosToken(); + const eosId = tokenizer.getEosTokenId(); + const tokenIds = [1, 2, 3, eosId]; + const decoded1 = tokenizer.decode(tokenIds, true); + const decoded2 = tokenizer.decode(tokenIds, false); + + assert.strictEqual(typeof decoded1, "string"); + assert.strictEqual(typeof decoded2, "string"); + assert.strictEqual(decoded2, decoded1 + eos); + }); + + it("decode batch of token sequences", () => { + const batchTokens = [ + [1, 2, 3], + [4, 5, 6], + ]; + const decoded = tokenizer.decode(batchTokens); + + assert.strictEqual(decoded.length, 2); + }); + + it("encode and decode round trip", () => { + const originalText = "Hello world"; + const encoded = tokenizer.encode(originalText); + const decodedText = tokenizer.decode(encoded.input_ids); + + assert.deepEqual(decodedText, [originalText]); + }); }); From fd3bbd7a6007611ced43a73c816400a54587364b Mon Sep 17 00:00:00 2001 From: Kirill Suvorov Date: Fri, 14 Nov 2025 14:57:19 +0100 Subject: [PATCH 02/10] Fix after code review --- src/js/eslint.config.cjs | 5 +++++ src/js/lib/tokenizer.ts | 30 +++++++++++++++++++++--------- src/js/src/addon.cpp | 4 ++-- src/js/src/helper.cpp | 21 +++++++++------------ src/js/src/tokenizer.cpp | 31 +++++++++---------------------- 5 files changed, 46 insertions(+), 45 deletions(-) diff --git a/src/js/eslint.config.cjs b/src/js/eslint.config.cjs index b69cf72f62..6b81924b6d 100644 --- a/src/js/eslint.config.cjs +++ b/src/js/eslint.config.cjs @@ -53,6 +53,11 @@ module.exports = defineConfig([ "json_schema", "structured_output_config", "structural_tags_config", + "skip_special_tokens", + "add_special_tokens", + "pad_to_max_length", + "max_length", + "padding_side", ], }, ], diff --git a/src/js/lib/tokenizer.ts b/src/js/lib/tokenizer.ts index d321aa2b27..b9b7747e24 100644 --- a/src/js/lib/tokenizer.ts +++ b/src/js/lib/tokenizer.ts @@ -23,25 +23,36 @@ export interface EncodeOptions { * Whether to add special tokens like BOS, EOS, PAD. * @defaultValue true */ - addSpecialTokens?: boolean; + add_special_tokens?: boolean; /** * Whether to pad the sequence to the maximum length. * @defaultValue false */ - padToMaxLength?: boolean; + pad_to_max_length?: boolean; /** * Maximum length of the sequence. * If undefined, the value will be taken from the IR. */ - maxLength?: number; + max_length?: number; /** * Side to pad the sequence, can be 'left' or 'right'. * If undefined, the value will be taken from the IR. */ - paddingSide?: "left" | "right"; + padding_side?: "left" | "right"; +} + +/** + * Options for decode method. + */ +export interface DecodeOptions { + /** + * Whether to skip special tokens like BOS, EOS, PAD during detokenization. + * @defaultValue true + */ + skip_special_tokens?: boolean; } /** @@ -101,22 +112,23 @@ export interface Tokenizer { */ encode(prompts: [string, string][], options?: EncodeOptions): TokenizedInputs; - // TODO: move decode options to another interface /** * Decode a sequence of token IDs into a string prompt. + * * @param tokens - sequence of token IDs to decode - * @param skipSpecialTokens - whether to skip special tokens. Default is true. + * @param options - decoding options * @returns decoded string. */ - decode(tokens: number[], skipSpecialTokens?: boolean): string; + decode(tokens: number[], options?: DecodeOptions): string; /** * Decode a batch of token sequences (as Tensor or array of arrays) into a list of string prompts. + * * @param tokens - tensor containing token IDs or batch of token ID sequences - * @param skipSpecialTokens - whether to skip special tokens. Default is true. + * @param options - decoding options * @returns list of decoded strings. */ - decode(tokens: Tensor | number[][], skipSpecialTokens?: boolean): string[]; + decode(tokens: Tensor | number[][], options?: DecodeOptions): string[]; /** * Returns the BOS (Beginning of Sequence) token string. diff --git a/src/js/src/addon.cpp b/src/js/src/addon.cpp index 15f60597da..082f6c8532 100644 --- a/src/js/src/addon.cpp +++ b/src/js/src/addon.cpp @@ -20,7 +20,7 @@ void init_class(Napi::Env env, exports.Set(class_name, prototype); } -Napi::Value init_ov_addon(const Napi::CallbackInfo& info) { +Napi::Value set_ov_addon(const Napi::CallbackInfo& info) { Napi::Env env = info.Env(); if (info.Length() < 1) { Napi::TypeError::New(env, "setOpenvinoAddon expects one argument").ThrowAsJavaScriptException(); @@ -54,7 +54,7 @@ Napi::Object init_module(Napi::Env env, Napi::Object exports) { init_class(env, exports, "ChatHistory", &ChatHistoryWrap::get_class, addon_data->chat_history); // Expose a helper to set the openvino-node addon from JS (useful for ESM) - exports.Set("setOpenvinoAddon", Napi::Function::New(env, init_ov_addon)); + exports.Set("setOpenvinoAddon", Napi::Function::New(env, set_ov_addon)); return exports; } diff --git a/src/js/src/helper.cpp b/src/js/src/helper.cpp index 851dfb372e..939db03728 100644 --- a/src/js/src/helper.cpp +++ b/src/js/src/helper.cpp @@ -131,19 +131,16 @@ std::vector js_to_cpp>(const Napi::Env& en template <> std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value) { - if (value.IsArray()) { - auto array = value.As(); - size_t arrayLength = array.Length(); - - std::vector vector; - vector.reserve(arrayLength); - for (uint32_t i = 0; i < arrayLength; ++i) { - vector.push_back(js_to_cpp(env, array[i])); - } - return vector; - } else { - OPENVINO_THROW("Passed argument must be of type Array."); + OPENVINO_ASSERT(value.IsArray(), "Passed argument must be of type Array."); + auto array = value.As(); + size_t arrayLength = array.Length(); + + std::vector vector; + vector.reserve(arrayLength); + for (uint32_t i = 0; i < arrayLength; ++i) { + vector.push_back(js_to_cpp(env, array[i])); } + return vector; } template <> diff --git a/src/js/src/tokenizer.cpp b/src/js/src/tokenizer.cpp index 311af11f1b..e68895c914 100644 --- a/src/js/src/tokenizer.cpp +++ b/src/js/src/tokenizer.cpp @@ -132,25 +132,11 @@ Napi::Value TokenizerWrapper::encode(const Napi::CallbackInfo& info) { // Parse encoding options from the last argument if it's an object ov::AnyMap tokenization_params; - size_t last_text_arg_idx = info.Length() - 1; + auto count_text_args = info.Length(); - if (info[last_text_arg_idx].IsObject() && !info[last_text_arg_idx].IsArray()) { - auto options = info[last_text_arg_idx].As(); - - if (options.Has("addSpecialTokens")) { - tokenization_params["add_special_tokens"] = options.Get("addSpecialTokens").ToBoolean().Value(); - } - if (options.Has("padToMaxLength")) { - tokenization_params["pad_to_max_length"] = options.Get("padToMaxLength").ToBoolean().Value(); - } - if (options.Has("maxLength")) { - tokenization_params["max_length"] = static_cast(options.Get("maxLength").ToNumber().Int64Value()); - } - if (options.Has("paddingSide")) { - tokenization_params["padding_side"] = options.Get("paddingSide").ToString().Utf8Value(); - } - - last_text_arg_idx--; + if (info[count_text_args - 1].IsObject() && !info[count_text_args - 1].IsArray()) { + tokenization_params = js_to_cpp(env, info[count_text_args - 1]); + count_text_args--; } ov::genai::TokenizedInputs result; @@ -160,7 +146,7 @@ Napi::Value TokenizerWrapper::encode(const Napi::CallbackInfo& info) { // Single string auto text = js_to_cpp(env, info[0]); result = this->_tokenizer.encode(text, tokenization_params); - } else if (info[0].IsArray()) { + } else if (count_text_args == 1 && info[0].IsArray()) { auto arr = info[0].As(); // Check if it's array of pairs [[str, str], ...] @@ -182,7 +168,7 @@ Napi::Value TokenizerWrapper::encode(const Napi::CallbackInfo& info) { auto prompts = js_to_cpp>(env, info[0]); result = this->_tokenizer.encode(prompts, tokenization_params); } - } else if (last_text_arg_idx >= 1 && info[0].IsArray() && info[1].IsArray()) { + } else if (count_text_args == 2 && info[0].IsArray() && info[1].IsArray()) { // Two arrays (paired input: prompts_1, prompts_2) auto prompts1 = js_to_cpp>(env, info[0]); auto prompts2 = js_to_cpp>(env, info[1]); @@ -204,8 +190,9 @@ Napi::Value TokenizerWrapper::decode(const Napi::CallbackInfo& info) { OPENVINO_ASSERT(info.Length() >= 1, "Tokenizer.decode requires at least one argument: tokens"); ov::AnyMap detokenization_params; - if (info.Length() >= 2 && info[1].IsBoolean()) { - detokenization_params["skip_special_tokens"] = info[1].ToBoolean().Value(); + if (info.Length() >= 2) { + const auto& options_candidate = info[1]; + detokenization_params = js_to_cpp(env, options_candidate); } // Handle different input types From d886ad19a84e3d16903df6580a5c1eca2ab17f06 Mon Sep 17 00:00:00 2001 From: Kirill Suvorov Date: Fri, 14 Nov 2025 20:11:40 +0100 Subject: [PATCH 03/10] Add constructor, use bigint for tokenID and fix tests --- samples/js/text_generation/benchmark_genai.js | 2 +- site/docs/guides/tokenization.mdx | 53 ++++++ src/js/eslint.config.cjs | 2 + src/js/lib/addon.ts | 5 +- src/js/lib/index.ts | 3 +- src/js/lib/tokenizer.ts | 38 ++++- src/js/src/helper.cpp | 10 +- src/js/src/tokenizer.cpp | 56 ++++++- src/js/tests/tokenizer.test.js | 155 +++++++++++------- 9 files changed, 248 insertions(+), 76 deletions(-) diff --git a/samples/js/text_generation/benchmark_genai.js b/samples/js/text_generation/benchmark_genai.js index 972b5765b1..7aa0d5136d 100644 --- a/samples/js/text_generation/benchmark_genai.js +++ b/samples/js/text_generation/benchmark_genai.js @@ -91,7 +91,7 @@ async function main() { } const inputData = await pipe.getTokenizer().encode(prompt); - const promptTokenSize = inputData.input_ids.shape[1]; + const promptTokenSize = inputData.input_ids.getShape()[1]; console.log(`Prompt token size: ${promptTokenSize}`); for (let i = 0; i < numWarmup; i++) { diff --git a/site/docs/guides/tokenization.mdx b/site/docs/guides/tokenization.mdx index 7d2c9d6c62..1726c72606 100644 --- a/site/docs/guides/tokenization.mdx +++ b/site/docs/guides/tokenization.mdx @@ -34,6 +34,18 @@ It can be initialized from the path, in-memory IR representation or obtained fro auto tokenzier = pipe.get_tokenizer(); ``` + + ```js + import { LLMPipeline, Tokenizer } from 'openvino-genai-node'; + + // Initialize from the path + const tokenizer = new Tokenizer(models_path); + + // Or get tokenizer instance from LLMPipeline + const pipe = await LLMPipeline(models_path, "CPU"); + const tokenzier = pipe.getTokenizer(); + ``` + `Tokenizer` has `encode()` and `decode()` methods which support the following arguments: `add_special_tokens`, `skip_special_tokens`, `pad_to_max_length`, `max_length`. @@ -51,6 +63,11 @@ It can be initialized from the path, in-memory IR representation or obtained fro auto tokens = tokenizer.encode("The Sun is yellow because", ov::genai::add_special_tokens(false)); ``` + + ```js + const tokens = tokenizer.encode("The Sun is yellow because", { add_special_tokens: false }); + ``` + The `encode()` method returns a [`TokenizedInputs`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.TokenizedInputs.html) object containing `input_ids` and `attention_mask`, both stored as `ov::Tensor`. @@ -121,4 +138,40 @@ If `pad_to_max_length` is set to true, then instead of padding to the longest se // out_shape: [1, 128] ``` + + ```js + import { Tokenizer } from 'openvino-genai-node'; + + const tokenizer = new Tokenizer(models_path); + const prompts = ["The Sun is yellow because", "The"]; + let tokens; + + // Since prompt is definitely shorter than maximal length (which is taken from IR) will not affect shape. + // Resulting shape is defined by length of the longest tokens sequence. + // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="longest", truncation=True) + tokens = tokenizer.encode(["The Sun is yellow because", "The"]); + // or is equivalent to + tokens = tokenizer.encode(["The Sun is yellow because", "The"], { pad_to_max_length: false }); + console.log(tokens.input_ids.shape); + // out_shape: [2, 6] + + // Resulting tokens tensor will be padded to 1024, sequences which exceed this length will be truncated. + // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="max_length", truncation=True, max_length=1024) + tokens = tokenizer.encode([ + "The Sun is yellow because", + "The", + "The longest string ever" * 2000, + ], { + pad_to_max_length: true, + max_length: 1024, + }); + console.log(tokens.input_ids.shape); + // out_shape: [3, 1024] + + // For single string prompts truncation and padding are also applied. + tokens = tokenizer.encode("The Sun is yellow because", { pad_to_max_length: true, max_length: 128 }); + console.log(tokens.input_ids.shape); + // out_shape: [1, 128] + ``` + diff --git a/src/js/eslint.config.cjs b/src/js/eslint.config.cjs index 6b81924b6d..a8c41a70d2 100644 --- a/src/js/eslint.config.cjs +++ b/src/js/eslint.config.cjs @@ -58,6 +58,8 @@ module.exports = defineConfig([ "pad_to_max_length", "max_length", "padding_side", + "add_second_input", + "number_of_inputs", ], }, ], diff --git a/src/js/lib/addon.ts b/src/js/lib/addon.ts index 5f8a9bc625..b6023e5a09 100644 --- a/src/js/lib/addon.ts +++ b/src/js/lib/addon.ts @@ -2,6 +2,7 @@ import { createRequire } from "module"; import { platform } from "node:os"; import { join, dirname, resolve } from "node:path"; import type { ChatHistory as IChatHistory } from "./chatHistory.js"; +import type { Tokenizer as ITokenizer } from "./tokenizer.js"; import { addon as ovAddon } from "openvino-node"; export type EmbeddingResult = Float32Array | Int8Array | Uint8Array; @@ -61,6 +62,7 @@ interface OpenVINOGenAIAddon { TextEmbeddingPipeline: TextEmbeddingPipelineWrapper; LLMPipeline: any; ChatHistory: IChatHistory; + Tokenizer: ITokenizer; setOpenvinoAddon: (ovAddon: any) => void; } @@ -82,5 +84,6 @@ function getGenAIAddon(): OpenVINOGenAIAddon { const addon = getGenAIAddon(); addon.setOpenvinoAddon(ovAddon); -export const { TextEmbeddingPipeline, LLMPipeline, ChatHistory } = addon; +export const { TextEmbeddingPipeline, LLMPipeline, ChatHistory, Tokenizer } = addon; export type ChatHistory = IChatHistory; +export type Tokenizer = ITokenizer; diff --git a/src/js/lib/index.ts b/src/js/lib/index.ts index 89ee0e1f71..ad8e49168f 100644 --- a/src/js/lib/index.ts +++ b/src/js/lib/index.ts @@ -40,4 +40,5 @@ export const { LLMPipeline, TextEmbeddingPipeline } = PipelineFactory; export { DecodedResults } from "./pipelines/llmPipeline.js"; export * from "./utils.js"; export * from "./addon.js"; -export * from "./tokenizer.js"; +export type { TokenizedInputs, EncodeOptions, DecodeOptions } from "./tokenizer.js"; +export type { ChatMessage, ExtraContext, ToolDefinition } from "./chatHistory.js"; diff --git a/src/js/lib/tokenizer.ts b/src/js/lib/tokenizer.ts index b9b7747e24..eefa10e618 100644 --- a/src/js/lib/tokenizer.ts +++ b/src/js/lib/tokenizer.ts @@ -1,8 +1,9 @@ +/* eslint-disable @typescript-eslint/no-misused-new */ // Copyright (C) 2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 -import { Tensor } from "openvino-node"; -import { ChatHistory } from "./addon.js"; +import type { Tensor } from "openvino-node"; +import type { ChatHistory } from "./chatHistory.js"; /** * TokenizedInputs contains input_ids and attention_mask tensors. @@ -66,6 +67,29 @@ export interface DecodeOptions { * 5. If the template is known to be not supported by GenAI, it's replaced with a simplified supported version. */ export interface Tokenizer { + /** + * Load tokenizer and detokenizer IRs by path. + * @param tokenizerPath Path to a directory containing tokenizer/detokenizer XML/BIN files. + * @param properties Optional OpenVINO compilation properties. + */ + new (tokenizerPath: string, properties?: Record): Tokenizer; + + /** + * Create tokenizer from already loaded IR contents. + * @param tokenizerModel Tokenizer XML string. + * @param tokenizerWeights Tokenizer weights tensor. + * @param detokenizerModel Detokenizer XML string. + * @param detokenizerWeights Detokenizer weights tensor. + * @param properties Optional OpenVINO compilation properties. + */ + new ( + tokenizerModel: string, + tokenizerWeights: Tensor, + detokenizerModel: string, + detokenizerWeights: Tensor, + properties?: Record, + ): Tokenizer; + /** * Applies a chat template to format chat history into a prompt string. * @param chatHistory - chat history as an array of message objects or ChatHistory instance @@ -119,7 +143,7 @@ export interface Tokenizer { * @param options - decoding options * @returns decoded string. */ - decode(tokens: number[], options?: DecodeOptions): string; + decode(tokens: number[] | bigint[], options?: DecodeOptions): string; /** * Decode a batch of token sequences (as Tensor or array of arrays) into a list of string prompts. @@ -128,7 +152,7 @@ export interface Tokenizer { * @param options - decoding options * @returns list of decoded strings. */ - decode(tokens: Tensor | number[][], options?: DecodeOptions): string[]; + decode(tokens: Tensor | number[][] | bigint[][], options?: DecodeOptions): string[]; /** * Returns the BOS (Beginning of Sequence) token string. @@ -140,7 +164,7 @@ export interface Tokenizer { * Returns the BOS (Beginning of Sequence) token ID. * @returns BOS token ID */ - getBosTokenId(): number; + getBosTokenId(): bigint; /** * Returns the EOS (End of Sequence) token string. @@ -152,7 +176,7 @@ export interface Tokenizer { * Returns the EOS (End of Sequence) token ID. * @returns EOS token ID */ - getEosTokenId(): number; + getEosTokenId(): bigint; /** * Returns the PAD (Padding) token string. @@ -164,7 +188,7 @@ export interface Tokenizer { * Returns the PAD (Padding) token ID. * @returns PAD token ID */ - getPadTokenId(): number; + getPadTokenId(): bigint; /** * Returns the current chat template string. diff --git a/src/js/src/helper.cpp b/src/js/src/helper.cpp index 939db03728..2e9ceefbb2 100644 --- a/src/js/src/helper.cpp +++ b/src/js/src/helper.cpp @@ -104,8 +104,14 @@ std::string js_to_cpp(const Napi::Env& env, const Napi::Value& valu template <> int64_t js_to_cpp(const Napi::Env& env, const Napi::Value& value) { - OPENVINO_ASSERT(value.IsNumber(), "Passed argument must be of type Number."); - return value.As().Int64Value(); + OPENVINO_ASSERT(value.IsNumber() || value.IsBigInt(), "Passed argument must be of type Number."); + if (value.IsNumber()) { + return value.As().Int64Value(); + } + bool lossless; + auto result = value.As().Int64Value(&lossless); + OPENVINO_ASSERT(lossless, "BigInt value is too large to fit in int64_t without precision loss."); + return result; } template <> diff --git a/src/js/src/tokenizer.cpp b/src/js/src/tokenizer.cpp index e68895c914..a8f1c72380 100644 --- a/src/js/src/tokenizer.cpp +++ b/src/js/src/tokenizer.cpp @@ -2,7 +2,51 @@ #include "include/helper.hpp" #include "include/tokenizer.hpp" -TokenizerWrapper::TokenizerWrapper(const Napi::CallbackInfo& info) : Napi::ObjectWrap(info) {}; +TokenizerWrapper::TokenizerWrapper(const Napi::CallbackInfo& info) : Napi::ObjectWrap(info) { + if (info.Length() == 0) { + return; + } + + auto env = info.Env(); + try { + if (info.Length() == 1 || info.Length() == 2) { + OPENVINO_ASSERT(info[0].IsString(), "Tokenizer constructor expects 'tokenizerPath' to be a string"); + const auto tokenizer_path = js_to_cpp(env, info[0]); + ov::AnyMap properties; + if (info.Length() == 2) { + properties = js_to_cpp(env, info[1]); + } + this->_tokenizer = ov::genai::Tokenizer(tokenizer_path, properties); + return; + } + + OPENVINO_ASSERT(info.Length() == 4 || info.Length() == 5, + "Tokenizer constructor expects 1-2 arguments (path[, properties]) or 4-5 arguments (models, tensors[, properties])"); + OPENVINO_ASSERT(info[0].IsString(), "The argument 'tokenizerModel' must be a string"); + OPENVINO_ASSERT(info[1].IsObject(), "The argument 'tokenizerWeights' must be an OpenVINO Tensor"); + OPENVINO_ASSERT(info[2].IsString(), "The argument 'detokenizerModel' must be a string"); + OPENVINO_ASSERT(info[3].IsObject(), "The argument 'detokenizerWeights' must be an OpenVINO Tensor"); + + const auto tokenizer_model = js_to_cpp(env, info[0]); + const auto tokenizer_weights = js_to_cpp(env, info[1]); + const auto detokenizer_model = js_to_cpp(env, info[2]); + const auto detokenizer_weights = js_to_cpp(env, info[3]); + ov::AnyMap properties; + if (info.Length() == 5) { + properties = js_to_cpp(env, info[4]); + } + + this->_tokenizer = ov::genai::Tokenizer( + tokenizer_model, + tokenizer_weights, + detokenizer_model, + detokenizer_weights, + properties + ); + } catch (const std::exception& err) { + Napi::Error::New(env, err.what()).ThrowAsJavaScriptException(); + } +} Napi::Function TokenizerWrapper::get_class(Napi::Env env) { return DefineClass(env, @@ -82,7 +126,7 @@ Napi::Value TokenizerWrapper::get_bos_token(const Napi::CallbackInfo& info) { Napi::Value TokenizerWrapper::get_bos_token_id(const Napi::CallbackInfo& info) { try { - return Napi::Number::New(info.Env(), this->_tokenizer.get_bos_token_id()); + return Napi::BigInt::New(info.Env(), this->_tokenizer.get_bos_token_id()); } catch (std::exception& err) { Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); return info.Env().Undefined(); @@ -100,7 +144,7 @@ Napi::Value TokenizerWrapper::get_eos_token(const Napi::CallbackInfo& info) { Napi::Value TokenizerWrapper::get_eos_token_id(const Napi::CallbackInfo& info) { try { - return Napi::Number::New(info.Env(), this->_tokenizer.get_eos_token_id()); + return Napi::BigInt::New(info.Env(), this->_tokenizer.get_eos_token_id()); } catch (std::exception& err) { Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); return info.Env().Undefined(); @@ -118,7 +162,7 @@ Napi::Value TokenizerWrapper::get_pad_token(const Napi::CallbackInfo& info) { Napi::Value TokenizerWrapper::get_pad_token_id(const Napi::CallbackInfo& info) { try { - return Napi::Number::New(info.Env(), this->_tokenizer.get_pad_token_id()); + return Napi::BigInt::New(info.Env(), this->_tokenizer.get_pad_token_id()); } catch (std::exception& err) { Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); return info.Env().Undefined(); @@ -201,7 +245,7 @@ Napi::Value TokenizerWrapper::decode(const Napi::CallbackInfo& info) { // Check if it's a 2D array (batch of sequences) if (arr.Length() > 0 && arr.Get(uint32_t(0)).IsArray()) { - // Batch decoding: number[][] + // Batch decoding: number[][] | bigint[][] std::vector> batch_tokens; for (uint32_t i = 0; i < arr.Length(); ++i) { batch_tokens.push_back(js_to_cpp>(env, arr.Get(i))); @@ -209,7 +253,7 @@ Napi::Value TokenizerWrapper::decode(const Napi::CallbackInfo& info) { auto result = this->_tokenizer.decode(batch_tokens, detokenization_params); return cpp_to_js, Napi::Value>(env, result); } else { - // Single sequence: number[] + // Single sequence: number[] | bigint[] auto tokens = js_to_cpp>(env, info[0]); auto result = this->_tokenizer.decode(tokens, detokenization_params); return Napi::String::New(env, result); diff --git a/src/js/tests/tokenizer.test.js b/src/js/tests/tokenizer.test.js index 502d641f8c..a06f5f6117 100644 --- a/src/js/tests/tokenizer.test.js +++ b/src/js/tests/tokenizer.test.js @@ -1,12 +1,48 @@ -import { LLMPipeline, ChatHistory } from "../dist/index.js"; +import { LLMPipeline, ChatHistory, Tokenizer } from "../dist/index.js"; import assert from "node:assert/strict"; import { describe, it, before, after } from "node:test"; import { models } from "./models.js"; +import fs from "node:fs/promises"; +import { join } from "node:path"; +import { addon as ovAddon } from "openvino-node"; -const MODEL_PATH = process.env.MODEL_PATH || `./tests/models/${models.InstructLLM.split("/")[1]}`; +const MODEL_PATH = process.env.MODEL_PATH || `./tests/models/${models.LLM.split("/")[1]}`; -describe("tokenizer", async () => { +describe("tokenizer constructors", () => { + it("tokenizer constructors with one argument", () => { + const tokenizer = new Tokenizer(MODEL_PATH); + + assert.ok(tokenizer); + }); + + it("tokenizer constructors with multiple arguments", async () => { + const tokenizerName = join(MODEL_PATH, "openvino_tokenizer"); + const detokenizerName = join(MODEL_PATH, "openvino_detokenizer"); + const tokenizerModel = await fs.readFile(`${tokenizerName}.xml`, "utf8"); + const tokenizerWeights = await fs.readFile(`${tokenizerName}.bin`); + const detokenizerModel = await fs.readFile(`${detokenizerName}.xml`, "utf8"); + const detokenizerWeights = await fs.readFile(`${detokenizerName}.bin`); + + const tokenizerTensor = new ovAddon.Tensor("u8", [tokenizerWeights.length], tokenizerWeights); + const detokenizerTensor = new ovAddon.Tensor( + "u8", + [detokenizerWeights.length], + detokenizerWeights, + ); + + const tokenizer = new Tokenizer( + tokenizerModel, + tokenizerTensor, + detokenizerModel, + detokenizerTensor, + ); + + assert.ok(tokenizer); + }); +}); + +describe("tokenizer functions", async () => { let pipeline = null; let tokenizer = null; @@ -188,7 +224,7 @@ describe("tokenizer", async () => { it("getBosTokenId return number", () => { const token = tokenizer.getBosTokenId(); - assert.strictEqual(typeof token, "number"); + assert.strictEqual(typeof token, "bigint"); }); it("getEosToken return string", () => { @@ -198,7 +234,7 @@ describe("tokenizer", async () => { it("getEosTokenId return number", () => { const token = tokenizer.getEosTokenId(); - assert.strictEqual(typeof token, "number"); + assert.strictEqual(typeof token, "bigint"); }); it("getPadToken return string", () => { @@ -208,7 +244,7 @@ describe("tokenizer", async () => { it("getPadTokenId return number", () => { const token = tokenizer.getPadTokenId(); - assert.strictEqual(typeof token, "number"); + assert.strictEqual(typeof token, "bigint"); }); it("getChatTemplate return string", () => { @@ -218,9 +254,11 @@ describe("tokenizer", async () => { it("setChatTemplate updates template", () => { const originalTemplate = tokenizer.getChatTemplate(); - const customTemplate = "Custom template: {{ messages }}"; + assert.strictEqual(typeof originalTemplate, "string"); + const customTemplate = "Custom template: {{ messages }}"; tokenizer.setChatTemplate(customTemplate); + const updatedTemplate = tokenizer.getChatTemplate(); assert.strictEqual(updatedTemplate, customTemplate); @@ -242,11 +280,6 @@ describe("tokenizer", async () => { tokenizer.setChatTemplate(originalTemplate); }); - it("supportsPairedInput return boolean", () => { - const result = tokenizer.supportsPairedInput(); - assert.strictEqual(typeof result, "boolean"); - }); - it("encode single string returns TokenizedInputs", () => { const text = "Hello world"; const result = tokenizer.encode(text); @@ -257,18 +290,15 @@ describe("tokenizer", async () => { assert.strictEqual(typeof result.attention_mask, "object"); }); - it("encode with options", (testContext) => { - testContext.skip("Invalid test"); - return; - // eslint-disable-next-line no-unreachable + it("encode with options", () => { const text = "Hello world"; const result = tokenizer.encode(text, { - addSpecialTokens: false, - padToMaxLength: true, - maxLength: 1000, - paddingSide: "left", + add_special_tokens: false, + pad_to_max_length: true, + max_length: 1000, + padding_side: "left", }); - const padTokenId = tokenizer.getPadTokenId(); + // const padTokenId = tokenizer.getPadTokenId(); assert.ok(result.input_ids); assert.strictEqual( @@ -276,11 +306,12 @@ describe("tokenizer", async () => { 1000, "input_ids should be padded to maxLength", ); - assert.strictEqual( - result.input_ids.getData()[0], - padTokenId, - "input_ids should be left padded", - ); + // TODO Uncomment after fixing padding issue + // assert.strictEqual( + // result.input_ids.getData()[0], + // padTokenId, + // "input_ids should be left padded", + // ); }); it("encode array of strings", () => { @@ -291,34 +322,6 @@ describe("tokenizer", async () => { assert.strictEqual(result.attention_mask.getShape()[0], 2); }); - it("encode paired prompts (two arrays)", (testContext) => { - if (!tokenizer.supportsPairedInput()) { - testContext.skip(); - return; - } - const prompts1 = ["Question 1", "Question 2"]; - const prompts2 = ["Answer 1", "Answer 2"]; - const result = tokenizer.encode(prompts1, prompts2); - - assert.strictEqual(result.input_ids.getShape()[0], prompts1.length); - assert.strictEqual(result.attention_mask.getShape()[0], prompts1.length); - }); - - it("encode paired prompts (array of pairs)", (testContext) => { - if (!tokenizer.supportsPairedInput()) { - testContext.skip(); - return; - } - const pairs = [ - ["Question 1", "Answer 1"], - ["Question 2", "Answer 2"], - ]; - const result = tokenizer.encode(pairs); - - assert.strictEqual(result.input_ids.getSize(), pairs.length); - assert.strictEqual(result.attention_mask.getSize(), pairs.length); - }); - it("decode array of token IDs to string", () => { const tokenIds = [1, 2, 3]; const decoded = tokenizer.decode(tokenIds); @@ -326,12 +329,13 @@ describe("tokenizer", async () => { assert.strictEqual(typeof decoded, "string"); }); - it("decode with skipSpecialTokens parameter", () => { + // TODO Fix skip_special_tokens functionality + it.skip("decode with skip_special_tokens option", () => { const eos = tokenizer.getEosToken(); const eosId = tokenizer.getEosTokenId(); - const tokenIds = [1, 2, 3, eosId]; - const decoded1 = tokenizer.decode(tokenIds, true); - const decoded2 = tokenizer.decode(tokenIds, false); + const tokenIds = [10n, 20n, 30n, eosId]; + const decoded1 = tokenizer.decode(tokenIds, { skip_special_tokens: true }); + const decoded2 = tokenizer.decode(tokenIds, { skip_special_tokens: false }); assert.strictEqual(typeof decoded1, "string"); assert.strictEqual(typeof decoded2, "string"); @@ -356,3 +360,38 @@ describe("tokenizer", async () => { assert.deepEqual(decodedText, [originalText]); }); }); + +// TODO Add model with paired input support +describe.skip("tokenizer with paired input", () => { + let tokenizer = null; + + before(async () => { + tokenizer = new Tokenizer(MODEL_PATH, { add_second_input: true, number_of_inputs: 2 }); + }); + + it("supportsPairedInput return boolean", () => { + const result = tokenizer.supportsPairedInput(); + + assert.strictEqual(result, true); + }); + + it("encode paired prompts (two arrays)", () => { + const prompts1 = ["Question 1", "Question 2"]; + const prompts2 = ["Answer 1", "Answer 2"]; + const result = tokenizer.encode(prompts1, prompts2); + + assert.strictEqual(result.input_ids.getShape()[0], prompts1.length); + assert.strictEqual(result.attention_mask.getShape()[0], prompts1.length); + }); + + it("encode paired prompts (array of pairs)", () => { + const pairs = [ + ["Question 1", "Answer 1"], + ["Question 2", "Answer 2"], + ]; + const result = tokenizer.encode(pairs); + + assert.strictEqual(result.input_ids.getShape()[0], pairs.length); + assert.strictEqual(result.attention_mask.getShape()[0], pairs.length); + }); +}); From f3e48bf7a51c94efc12483d408c1770c20e688fc Mon Sep 17 00:00:00 2001 From: Kirill Suvorov Date: Fri, 14 Nov 2025 20:41:42 +0100 Subject: [PATCH 04/10] Update docs --- site/docs/bindings/node-js.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/site/docs/bindings/node-js.md b/site/docs/bindings/node-js.md index 1e7d59fb13..7abaef3504 100644 --- a/site/docs/bindings/node-js.md +++ b/site/docs/bindings/node-js.md @@ -24,6 +24,12 @@ Node.js bindings currently support: - Structured output - ReAct agent support - `TextEmbeddingPipeline`: Generate text embeddings for semantic search and RAG applications +- `Tokenizer`: Fast tokenization / detokenization and chat prompt formatting + - Encode strings into token id and attention mask tensors + - Decode token sequences + - Apply chat template + - Access special tokens (BOS/EOS/PAD) + - Supports paired input ## Installation From 9b6daa37bae79cf78372af045673bbeb1c250344 Mon Sep 17 00:00:00 2001 From: Kirill Suvorov Date: Fri, 14 Nov 2025 21:23:59 +0100 Subject: [PATCH 05/10] Add token_type_ids --- src/js/lib/tokenizer.ts | 16 +++++++++++----- src/js/src/helper.cpp | 4 ++++ src/js/tests/tokenizer.test.js | 15 +++++++++++---- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/js/lib/tokenizer.ts b/src/js/lib/tokenizer.ts index eefa10e618..81395f7a09 100644 --- a/src/js/lib/tokenizer.ts +++ b/src/js/lib/tokenizer.ts @@ -6,14 +6,20 @@ import type { Tensor } from "openvino-node"; import type { ChatHistory } from "./chatHistory.js"; /** - * TokenizedInputs contains input_ids and attention_mask tensors. + * TokenizedInputs contains input_ids, attention_mask and (optionally) token_type_ids tensors. + * token_type_ids is returned if the tokenizer supports paired input, otherwise the field is undefined. * This is the result of encoding prompts using the Tokenizer. */ export interface TokenizedInputs { - /** Tensor containing token IDs for the encoded input */ + /** Tensor containing token IDs of the encoded input */ input_ids: Tensor; /** Tensor containing attention mask (1 for real tokens, 0 for padding) */ attention_mask: Tensor; + /** + * Optional tensor with token type IDs (segment ids) for paired inputs. + * Present only if the model/tokenizer supports paired input. + */ + token_type_ids?: Tensor; } /** @@ -111,7 +117,7 @@ export interface Tokenizer { * Encodes a single prompt or a list of prompts into tokenized inputs. * @param prompts - single prompt string or array of prompts * @param options - encoding options - * @returns TokenizedInputs object containing input_ids and attention_mask tensors. + * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. */ encode(prompts: string | string[], options?: EncodeOptions): TokenizedInputs; @@ -123,7 +129,7 @@ export interface Tokenizer { * @param prompts1 - first list of prompts to encode * @param prompts2 - second list of prompts to encode * @param options - encoding options - * @returns TokenizedInputs object containing input_ids and attention_mask tensors. + * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. */ encode(prompts1: string[], prompts2: string[], options?: EncodeOptions): TokenizedInputs; @@ -132,7 +138,7 @@ export interface Tokenizer { * Input format is same as for HF paired input [[prompt_1, prompt_2], ...]. * @param prompts - list of paired prompts to encode * @param options - encoding options - * @returns TokenizedInputs object containing input_ids and attention_mask tensors. + * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. */ encode(prompts: [string, string][], options?: EncodeOptions): TokenizedInputs; diff --git a/src/js/src/helper.cpp b/src/js/src/helper.cpp index 2e9ceefbb2..80fcea8400 100644 --- a/src/js/src/helper.cpp +++ b/src/js/src/helper.cpp @@ -484,6 +484,10 @@ Napi::Value cpp_to_js(const Napi::Env& js_object.Set("input_ids", cpp_to_js(env, tokenized_inputs.input_ids)); js_object.Set("attention_mask", cpp_to_js(env, tokenized_inputs.attention_mask)); + // token_type_ids is optional and present only for paired inputs + if (tokenized_inputs.token_type_ids.has_value()) { + js_object.Set("token_type_ids", cpp_to_js(env, tokenized_inputs.token_type_ids.value())); + } return js_object; } diff --git a/src/js/tests/tokenizer.test.js b/src/js/tests/tokenizer.test.js index a06f5f6117..1a2051ecf6 100644 --- a/src/js/tests/tokenizer.test.js +++ b/src/js/tests/tokenizer.test.js @@ -266,10 +266,8 @@ describe("tokenizer functions", async () => { tokenizer.setChatTemplate(originalTemplate); }); - it("getOriginalChatTemplate return the original string", (testContext) => { - testContext.skip("Invalid test"); - return; - // eslint-disable-next-line no-unreachable + // TODO Fix getOriginalChatTemplate + it.skip("getOriginalChatTemplate return the original string", () => { const originalTemplate = tokenizer.getChatTemplate(); tokenizer.setChatTemplate("Custom template: {{ messages }}"); @@ -394,4 +392,13 @@ describe.skip("tokenizer with paired input", () => { assert.strictEqual(result.input_ids.getShape()[0], pairs.length); assert.strictEqual(result.attention_mask.getShape()[0], pairs.length); }); + + it("encode paired prompts broadcasting second array", () => { + const prompts1 = ["Question 1", "Question 2", "Question 3"]; // batch size 3 + const prompts2 = ["Single answer"]; // will be broadcast + const result = tokenizer.encode(prompts1, prompts2); + + assert.strictEqual(result.input_ids.getShape()[0], prompts1.length); + assert.strictEqual(result.attention_mask.getShape()[0], prompts1.length); + }); }); From 03e6a6e65df7994b257681a483ec66b264638023 Mon Sep 17 00:00:00 2001 From: Kirill Suvorov Date: Fri, 14 Nov 2025 22:00:23 +0100 Subject: [PATCH 06/10] Create issues for found problems --- src/js/tests/tokenizer.test.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/js/tests/tokenizer.test.js b/src/js/tests/tokenizer.test.js index 1a2051ecf6..c7a9bb8193 100644 --- a/src/js/tests/tokenizer.test.js +++ b/src/js/tests/tokenizer.test.js @@ -266,7 +266,7 @@ describe("tokenizer functions", async () => { tokenizer.setChatTemplate(originalTemplate); }); - // TODO Fix getOriginalChatTemplate + // Fix getOriginalChatTemplate issue CVS-176638 it.skip("getOriginalChatTemplate return the original string", () => { const originalTemplate = tokenizer.getChatTemplate(); tokenizer.setChatTemplate("Custom template: {{ messages }}"); @@ -304,7 +304,7 @@ describe("tokenizer functions", async () => { 1000, "input_ids should be padded to maxLength", ); - // TODO Uncomment after fixing padding issue + // Uncomment after fixing padding issue CVS-176636 // assert.strictEqual( // result.input_ids.getData()[0], // padTokenId, @@ -327,8 +327,8 @@ describe("tokenizer functions", async () => { assert.strictEqual(typeof decoded, "string"); }); - // TODO Fix skip_special_tokens functionality - it.skip("decode with skip_special_tokens option", () => { + // Change model to fix skip_special_tokens functionality CVS-176639 + it("decode with skip_special_tokens option", () => { const eos = tokenizer.getEosToken(); const eosId = tokenizer.getEosTokenId(); const tokenIds = [10n, 20n, 30n, eosId]; @@ -359,7 +359,7 @@ describe("tokenizer functions", async () => { }); }); -// TODO Add model with paired input support +// Add model with paired input support CVS-176639 describe.skip("tokenizer with paired input", () => { let tokenizer = null; From d85604b3af911387de7e04f2508ccf5bc8199dfd Mon Sep 17 00:00:00 2001 From: Kirill Suvorov Date: Fri, 14 Nov 2025 22:07:22 +0100 Subject: [PATCH 07/10] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- site/docs/guides/tokenization.mdx | 2 +- src/js/lib/tokenizer.ts | 4 ++-- src/js/tests/tokenizer.test.js | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/site/docs/guides/tokenization.mdx b/site/docs/guides/tokenization.mdx index 1726c72606..8c6dacd952 100644 --- a/site/docs/guides/tokenization.mdx +++ b/site/docs/guides/tokenization.mdx @@ -43,7 +43,7 @@ It can be initialized from the path, in-memory IR representation or obtained fro // Or get tokenizer instance from LLMPipeline const pipe = await LLMPipeline(models_path, "CPU"); - const tokenzier = pipe.getTokenizer(); + const tokenizer = pipe.getTokenizer(); ``` diff --git a/src/js/lib/tokenizer.ts b/src/js/lib/tokenizer.ts index 81395f7a09..af664db206 100644 --- a/src/js/lib/tokenizer.ts +++ b/src/js/lib/tokenizer.ts @@ -129,7 +129,7 @@ export interface Tokenizer { * @param prompts1 - first list of prompts to encode * @param prompts2 - second list of prompts to encode * @param options - encoding options - * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. + * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. */ encode(prompts1: string[], prompts2: string[], options?: EncodeOptions): TokenizedInputs; @@ -138,7 +138,7 @@ export interface Tokenizer { * Input format is same as for HF paired input [[prompt_1, prompt_2], ...]. * @param prompts - list of paired prompts to encode * @param options - encoding options - * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. + * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. */ encode(prompts: [string, string][], options?: EncodeOptions): TokenizedInputs; diff --git a/src/js/tests/tokenizer.test.js b/src/js/tests/tokenizer.test.js index c7a9bb8193..0346366377 100644 --- a/src/js/tests/tokenizer.test.js +++ b/src/js/tests/tokenizer.test.js @@ -267,7 +267,7 @@ describe("tokenizer functions", async () => { }); // Fix getOriginalChatTemplate issue CVS-176638 - it.skip("getOriginalChatTemplate return the original string", () => { + it.skip("getOriginalChatTemplate returns the original string", () => { const originalTemplate = tokenizer.getChatTemplate(); tokenizer.setChatTemplate("Custom template: {{ messages }}"); From 3e22f65a7716493cf3422195e93fe886404362ef Mon Sep 17 00:00:00 2001 From: Kirill Suvorov Date: Fri, 14 Nov 2025 22:23:15 +0100 Subject: [PATCH 08/10] Small fixes --- site/docs/guides/tokenization.mdx | 14 ++++++++------ src/js/lib/tokenizer.ts | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/site/docs/guides/tokenization.mdx b/site/docs/guides/tokenization.mdx index 8c6dacd952..7405523a92 100644 --- a/site/docs/guides/tokenization.mdx +++ b/site/docs/guides/tokenization.mdx @@ -38,12 +38,14 @@ It can be initialized from the path, in-memory IR representation or obtained fro ```js import { LLMPipeline, Tokenizer } from 'openvino-genai-node'; + let tokenizer; + // Initialize from the path - const tokenizer = new Tokenizer(models_path); + tokenizer = new Tokenizer(models_path); // Or get tokenizer instance from LLMPipeline const pipe = await LLMPipeline(models_path, "CPU"); - const tokenizer = pipe.getTokenizer(); + tokenizer = pipe.getTokenizer(); ``` @@ -152,7 +154,7 @@ If `pad_to_max_length` is set to true, then instead of padding to the longest se tokens = tokenizer.encode(["The Sun is yellow because", "The"]); // or is equivalent to tokens = tokenizer.encode(["The Sun is yellow because", "The"], { pad_to_max_length: false }); - console.log(tokens.input_ids.shape); + console.log(tokens.input_ids.getShape()); // out_shape: [2, 6] // Resulting tokens tensor will be padded to 1024, sequences which exceed this length will be truncated. @@ -160,17 +162,17 @@ If `pad_to_max_length` is set to true, then instead of padding to the longest se tokens = tokenizer.encode([ "The Sun is yellow because", "The", - "The longest string ever" * 2000, + "The longest string ever".repeat(2000), ], { pad_to_max_length: true, max_length: 1024, }); - console.log(tokens.input_ids.shape); + console.log(tokens.input_ids.getShape()); // out_shape: [3, 1024] // For single string prompts truncation and padding are also applied. tokens = tokenizer.encode("The Sun is yellow because", { pad_to_max_length: true, max_length: 128 }); - console.log(tokens.input_ids.shape); + console.log(tokens.input_ids.getShape()); // out_shape: [1, 128] ``` diff --git a/src/js/lib/tokenizer.ts b/src/js/lib/tokenizer.ts index af664db206..ec3e4e2fd6 100644 --- a/src/js/lib/tokenizer.ts +++ b/src/js/lib/tokenizer.ts @@ -117,7 +117,7 @@ export interface Tokenizer { * Encodes a single prompt or a list of prompts into tokenized inputs. * @param prompts - single prompt string or array of prompts * @param options - encoding options - * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. + * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. */ encode(prompts: string | string[], options?: EncodeOptions): TokenizedInputs; From 4815ef83316ac930c9b1b45f6e5bb8bc82a7c6b3 Mon Sep 17 00:00:00 2001 From: Kirill Suvorov Date: Thu, 27 Nov 2025 11:19:42 +0100 Subject: [PATCH 09/10] Fix review comments --- src/js/lib/tokenizer.ts | 6 +++--- src/js/src/addon.cpp | 10 +++++----- src/js/src/helper.cpp | 2 +- src/js/tests/tokenizer.test.js | 9 ++------- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/js/lib/tokenizer.ts b/src/js/lib/tokenizer.ts index ec3e4e2fd6..37e1fe5ec9 100644 --- a/src/js/lib/tokenizer.ts +++ b/src/js/lib/tokenizer.ts @@ -106,11 +106,11 @@ export interface Tokenizer { * @returns formatted prompt string */ applyChatTemplate( - chatHistory: Record[] | ChatHistory, + chatHistory: Record[] | ChatHistory, addGenerationPrompt: boolean, chatTemplate?: string, - tools?: Record[], - extraContext?: Record, + tools?: Record[], + extraContext?: Record, ): string; /** diff --git a/src/js/src/addon.cpp b/src/js/src/addon.cpp index 082f6c8532..3dbc4da9d9 100644 --- a/src/js/src/addon.cpp +++ b/src/js/src/addon.cpp @@ -20,26 +20,26 @@ void init_class(Napi::Env env, exports.Set(class_name, prototype); } -Napi::Value set_ov_addon(const Napi::CallbackInfo& info) { +void set_ov_addon(const Napi::CallbackInfo& info) { Napi::Env env = info.Env(); if (info.Length() < 1) { Napi::TypeError::New(env, "setOpenvinoAddon expects one argument").ThrowAsJavaScriptException(); - return env.Undefined(); + return; } if (info[0].IsUndefined() || info[0].IsNull() || !info[0].IsObject()) { Napi::TypeError::New(env, "Passed addon must be an object").ThrowAsJavaScriptException(); - return env.Undefined(); + return; } auto addon_data = env.GetInstanceData(); if (!addon_data) { Napi::TypeError::New(env, "Addon data is not initialized").ThrowAsJavaScriptException(); - return env.Undefined(); + return; } auto ov_addon = info[0].As(); addon_data->openvino_addon = Napi::Persistent(ov_addon); - return env.Undefined(); + return; } // Define the addon initialization function diff --git a/src/js/src/helper.cpp b/src/js/src/helper.cpp index 80fcea8400..00ddf0e2c9 100644 --- a/src/js/src/helper.cpp +++ b/src/js/src/helper.cpp @@ -104,7 +104,7 @@ std::string js_to_cpp(const Napi::Env& env, const Napi::Value& valu template <> int64_t js_to_cpp(const Napi::Env& env, const Napi::Value& value) { - OPENVINO_ASSERT(value.IsNumber() || value.IsBigInt(), "Passed argument must be of type Number."); + OPENVINO_ASSERT(value.IsNumber() || value.IsBigInt(), "Passed argument must be of type Number or BigInt."); if (value.IsNumber()) { return value.As().Int64Value(); } diff --git a/src/js/tests/tokenizer.test.js b/src/js/tests/tokenizer.test.js index 0346366377..dba1474540 100644 --- a/src/js/tests/tokenizer.test.js +++ b/src/js/tests/tokenizer.test.js @@ -247,11 +247,6 @@ describe("tokenizer functions", async () => { assert.strictEqual(typeof token, "bigint"); }); - it("getChatTemplate return string", () => { - const template = tokenizer.getChatTemplate(); - assert.strictEqual(typeof template, "string"); - }); - it("setChatTemplate updates template", () => { const originalTemplate = tokenizer.getChatTemplate(); assert.strictEqual(typeof originalTemplate, "string"); @@ -327,8 +322,8 @@ describe("tokenizer functions", async () => { assert.strictEqual(typeof decoded, "string"); }); - // Change model to fix skip_special_tokens functionality CVS-176639 - it("decode with skip_special_tokens option", () => { + // Fix skip_special_tokens functionality CVS-176639 + it.skip("decode with skip_special_tokens option", () => { const eos = tokenizer.getEosToken(); const eosId = tokenizer.getEosTokenId(); const tokenIds = [10n, 20n, 30n, eosId]; From 77cf065baf18a104c82195d50ec85a15ada79a05 Mon Sep 17 00:00:00 2001 From: Kirill Suvorov Date: Thu, 27 Nov 2025 12:20:56 +0100 Subject: [PATCH 10/10] Update pinned OV commit --- .github/workflows/coverity.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/mac.yml | 2 +- .github/workflows/manylinux_2_28.yml | 2 +- .github/workflows/windows.yml | 2 +- src/js/src/addon.cpp | 1 - 6 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index c4a19e7090..465fc3f448 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -43,7 +43,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800 # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index a4ad20710b..15c89b987c 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -93,7 +93,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800 # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 86dedec156..127dd4b57c 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -85,7 +85,7 @@ jobs: platform: macos_14_7 arch: 'arm64' commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800 # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml index 1be636623b..e39b44d5af 100644 --- a/.github/workflows/manylinux_2_28.yml +++ b/.github/workflows/manylinux_2_28.yml @@ -93,7 +93,7 @@ jobs: with: platform: almalinux8 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800 - name: Clone docker tag from OpenVINO repo uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 93a03be433..6607fc795a 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -89,7 +89,7 @@ jobs: with: platform: windows commit_packages_to_provide: wheels,openvino_node_npm_package.zip - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800 # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/src/js/src/addon.cpp b/src/js/src/addon.cpp index 3dbc4da9d9..72cb3b6b16 100644 --- a/src/js/src/addon.cpp +++ b/src/js/src/addon.cpp @@ -39,7 +39,6 @@ void set_ov_addon(const Napi::CallbackInfo& info) { auto ov_addon = info[0].As(); addon_data->openvino_addon = Napi::Persistent(ov_addon); - return; } // Define the addon initialization function