diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index c4a19e7090..465fc3f448 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -43,7 +43,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800 # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index a4ad20710b..15c89b987c 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -93,7 +93,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800 # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 86dedec156..127dd4b57c 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -85,7 +85,7 @@ jobs: platform: macos_14_7 arch: 'arm64' commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800 # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml index 1be636623b..e39b44d5af 100644 --- a/.github/workflows/manylinux_2_28.yml +++ b/.github/workflows/manylinux_2_28.yml @@ -93,7 +93,7 @@ jobs: with: platform: almalinux8 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800 - name: Clone docker tag from OpenVINO repo uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 93a03be433..6607fc795a 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -89,7 +89,7 @@ jobs: with: platform: windows commit_packages_to_provide: wheels,openvino_node_npm_package.zip - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: 17efa42572fcac28b14cde43c8af1ca79fc2f800 # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/samples/js/text_generation/benchmark_genai.js b/samples/js/text_generation/benchmark_genai.js index 1ae77a323f..7aa0d5136d 100644 --- a/samples/js/text_generation/benchmark_genai.js +++ b/samples/js/text_generation/benchmark_genai.js @@ -90,6 +90,10 @@ async function main() { pipe = await LLMPipeline(modelsPath, device, { schedulerConfig: schedulerConfig }); } + const inputData = await pipe.getTokenizer().encode(prompt); + const promptTokenSize = inputData.input_ids.getShape()[1]; + console.log(`Prompt token size: ${promptTokenSize}`); + for (let i = 0; i < numWarmup; i++) { await pipe.generate(prompt, config); } diff --git a/site/docs/bindings/node-js.md b/site/docs/bindings/node-js.md index 1e7d59fb13..7abaef3504 100644 --- a/site/docs/bindings/node-js.md +++ b/site/docs/bindings/node-js.md @@ -24,6 +24,12 @@ Node.js bindings currently support: - Structured output - ReAct agent support - `TextEmbeddingPipeline`: Generate text embeddings for semantic search and RAG applications +- `Tokenizer`: Fast tokenization / detokenization and chat prompt formatting + - Encode strings into token id and attention mask tensors + - Decode token sequences + - Apply chat template + - Access special tokens (BOS/EOS/PAD) + - Supports paired input ## Installation diff --git a/site/docs/guides/tokenization.mdx b/site/docs/guides/tokenization.mdx index 7d2c9d6c62..7405523a92 100644 --- a/site/docs/guides/tokenization.mdx +++ b/site/docs/guides/tokenization.mdx @@ -34,6 +34,20 @@ It can be initialized from the path, in-memory IR representation or obtained fro auto tokenzier = pipe.get_tokenizer(); ``` + + ```js + import { LLMPipeline, Tokenizer } from 'openvino-genai-node'; + + let tokenizer; + + // Initialize from the path + tokenizer = new Tokenizer(models_path); + + // Or get tokenizer instance from LLMPipeline + const pipe = await LLMPipeline(models_path, "CPU"); + tokenizer = pipe.getTokenizer(); + ``` + `Tokenizer` has `encode()` and `decode()` methods which support the following arguments: `add_special_tokens`, `skip_special_tokens`, `pad_to_max_length`, `max_length`. @@ -51,6 +65,11 @@ It can be initialized from the path, in-memory IR representation or obtained fro auto tokens = tokenizer.encode("The Sun is yellow because", ov::genai::add_special_tokens(false)); ``` + + ```js + const tokens = tokenizer.encode("The Sun is yellow because", { add_special_tokens: false }); + ``` + The `encode()` method returns a [`TokenizedInputs`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.TokenizedInputs.html) object containing `input_ids` and `attention_mask`, both stored as `ov::Tensor`. @@ -121,4 +140,40 @@ If `pad_to_max_length` is set to true, then instead of padding to the longest se // out_shape: [1, 128] ``` + + ```js + import { Tokenizer } from 'openvino-genai-node'; + + const tokenizer = new Tokenizer(models_path); + const prompts = ["The Sun is yellow because", "The"]; + let tokens; + + // Since prompt is definitely shorter than maximal length (which is taken from IR) will not affect shape. + // Resulting shape is defined by length of the longest tokens sequence. + // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="longest", truncation=True) + tokens = tokenizer.encode(["The Sun is yellow because", "The"]); + // or is equivalent to + tokens = tokenizer.encode(["The Sun is yellow because", "The"], { pad_to_max_length: false }); + console.log(tokens.input_ids.getShape()); + // out_shape: [2, 6] + + // Resulting tokens tensor will be padded to 1024, sequences which exceed this length will be truncated. + // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="max_length", truncation=True, max_length=1024) + tokens = tokenizer.encode([ + "The Sun is yellow because", + "The", + "The longest string ever".repeat(2000), + ], { + pad_to_max_length: true, + max_length: 1024, + }); + console.log(tokens.input_ids.getShape()); + // out_shape: [3, 1024] + + // For single string prompts truncation and padding are also applied. + tokens = tokenizer.encode("The Sun is yellow because", { pad_to_max_length: true, max_length: 128 }); + console.log(tokens.input_ids.getShape()); + // out_shape: [1, 128] + ``` + diff --git a/src/js/eslint.config.cjs b/src/js/eslint.config.cjs index b69cf72f62..a8c41a70d2 100644 --- a/src/js/eslint.config.cjs +++ b/src/js/eslint.config.cjs @@ -53,6 +53,13 @@ module.exports = defineConfig([ "json_schema", "structured_output_config", "structural_tags_config", + "skip_special_tokens", + "add_special_tokens", + "pad_to_max_length", + "max_length", + "padding_side", + "add_second_input", + "number_of_inputs", ], }, ], diff --git a/src/js/include/addon.hpp b/src/js/include/addon.hpp index f2a23b026c..28371ba822 100644 --- a/src/js/include/addon.hpp +++ b/src/js/include/addon.hpp @@ -12,6 +12,7 @@ struct AddonData { Napi::FunctionReference tokenizer; Napi::FunctionReference perf_metrics; Napi::FunctionReference chat_history; + Napi::ObjectReference openvino_addon; }; void init_class(Napi::Env env, diff --git a/src/js/include/helper.hpp b/src/js/include/helper.hpp index a28f7c071c..55370d91e3 100644 --- a/src/js/include/helper.hpp +++ b/src/js/include/helper.hpp @@ -37,9 +37,13 @@ ov::AnyMap js_to_cpp(const Napi::Env& env, const Napi::Value& value) /** @brief A template specialization for TargetType std::string */ template <> std::string js_to_cpp(const Napi::Env& env, const Napi::Value& value); +template <> +int64_t js_to_cpp(const Napi::Env& env, const Napi::Value& value); /** @brief A template specialization for TargetType std::vector */ template <> std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value); +template <> +std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value); /** @brief A template specialization for TargetType GenerateInputs */ template <> GenerateInputs js_to_cpp(const Napi::Env& env, const Napi::Value& value); @@ -58,6 +62,8 @@ ov::genai::StructuredOutputConfig::Tag js_to_cpp ov::genai::StructuredOutputConfig::StructuralTag js_to_cpp(const Napi::Env& env, const Napi::Value& value); +template <> +ov::Tensor js_to_cpp(const Napi::Env& env, const Napi::Value& value); /** * @brief Unwraps a C++ object from a JavaScript wrapper. * @tparam TargetType The C++ class type to extract. @@ -110,6 +116,12 @@ Napi::Value cpp_to_js, Napi::Value>(const Napi::Env& env, co template <> Napi::Value cpp_to_js(const Napi::Env& env, const ov::genai::JsonContainer& json_container); + +template <> +Napi::Value cpp_to_js(const Napi::Env& env, const ov::Tensor& tensor); + +template <> +Napi::Value cpp_to_js(const Napi::Env& env, const ov::genai::TokenizedInputs& tokenized_inputs); /** * @brief Template function to convert C++ map into Javascript Object. Map key must be std::string. * @tparam MapElementType C++ data type of map elements. @@ -130,3 +142,5 @@ bool is_chat_history(const Napi::Env& env, const Napi::Value& value); std::string json_stringify(const Napi::Env& env, const Napi::Value& value); Napi::Value json_parse(const Napi::Env& env, const std::string& value); + +Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::string& ctor_name); diff --git a/src/js/include/tokenizer.hpp b/src/js/include/tokenizer.hpp index a7e6f3ef42..c1d0386525 100644 --- a/src/js/include/tokenizer.hpp +++ b/src/js/include/tokenizer.hpp @@ -15,6 +15,12 @@ class TokenizerWrapper : public Napi::ObjectWrap { Napi::Value get_eos_token_id(const Napi::CallbackInfo& info); Napi::Value get_pad_token(const Napi::CallbackInfo& info); Napi::Value get_pad_token_id(const Napi::CallbackInfo& info); + Napi::Value get_chat_template(const Napi::CallbackInfo& info); + Napi::Value get_original_chat_template(const Napi::CallbackInfo& info); + Napi::Value set_chat_template(const Napi::CallbackInfo& info); + Napi::Value supports_paired_input(const Napi::CallbackInfo& info); + Napi::Value encode(const Napi::CallbackInfo& info); + Napi::Value decode(const Napi::CallbackInfo& info); private: ov::genai::Tokenizer _tokenizer; }; diff --git a/src/js/lib/addon.ts b/src/js/lib/addon.ts index f8b9abee61..b6023e5a09 100644 --- a/src/js/lib/addon.ts +++ b/src/js/lib/addon.ts @@ -2,6 +2,8 @@ import { createRequire } from "module"; import { platform } from "node:os"; import { join, dirname, resolve } from "node:path"; import type { ChatHistory as IChatHistory } from "./chatHistory.js"; +import type { Tokenizer as ITokenizer } from "./tokenizer.js"; +import { addon as ovAddon } from "openvino-node"; export type EmbeddingResult = Float32Array | Int8Array | Uint8Array; export type EmbeddingResults = Float32Array[] | Int8Array[] | Uint8Array[]; @@ -60,6 +62,8 @@ interface OpenVINOGenAIAddon { TextEmbeddingPipeline: TextEmbeddingPipelineWrapper; LLMPipeline: any; ChatHistory: IChatHistory; + Tokenizer: ITokenizer; + setOpenvinoAddon: (ovAddon: any) => void; } // We need to use delayed import to get an updated Path if required @@ -78,7 +82,8 @@ function getGenAIAddon(): OpenVINOGenAIAddon { } const addon = getGenAIAddon(); +addon.setOpenvinoAddon(ovAddon); -export const { ChatHistory } = addon; +export const { TextEmbeddingPipeline, LLMPipeline, ChatHistory, Tokenizer } = addon; export type ChatHistory = IChatHistory; -export default addon; +export type Tokenizer = ITokenizer; diff --git a/src/js/lib/index.ts b/src/js/lib/index.ts index cd673a6797..ad8e49168f 100644 --- a/src/js/lib/index.ts +++ b/src/js/lib/index.ts @@ -40,3 +40,5 @@ export const { LLMPipeline, TextEmbeddingPipeline } = PipelineFactory; export { DecodedResults } from "./pipelines/llmPipeline.js"; export * from "./utils.js"; export * from "./addon.js"; +export type { TokenizedInputs, EncodeOptions, DecodeOptions } from "./tokenizer.js"; +export type { ChatMessage, ExtraContext, ToolDefinition } from "./chatHistory.js"; diff --git a/src/js/lib/pipelines/llmPipeline.ts b/src/js/lib/pipelines/llmPipeline.ts index b03146e0d2..01fe1aa5f3 100644 --- a/src/js/lib/pipelines/llmPipeline.ts +++ b/src/js/lib/pipelines/llmPipeline.ts @@ -1,6 +1,7 @@ import util from "node:util"; -import addon, { ChatHistory } from "../addon.js"; +import { ChatHistory, LLMPipeline as LLMPipelineWrap } from "../addon.js"; import { GenerationConfig, StreamingStatus, LLMPipelineProperties } from "../utils.js"; +import { Tokenizer } from "../tokenizer.js"; export type ResolveFunction = (arg: { value: string; done: boolean }) => void; export type Options = { @@ -8,23 +9,6 @@ export type Options = { max_new_tokens?: number; }; -interface Tokenizer { - /** Applies a chat template to format chat history into a prompt string. */ - applyChatTemplate( - chatHistory: Record[] | ChatHistory, - addGenerationPrompt: boolean, - chatTemplate?: string, - tools?: Record[], - extraContext?: Record, - ): string; - getBosToken(): string; - getBosTokenId(): number; - getEosToken(): string; - getEosTokenId(): number; - getPadToken(): string; - getPadTokenId(): number; -} - /** Structure with raw performance metrics for each generation before any statistics are calculated. */ export type RawMetrics = { /** Durations for each generate call in milliseconds. */ @@ -167,7 +151,7 @@ export class LLMPipeline { async init() { if (this.isInitialized) throw new Error("LLMPipeline is already initialized"); - this.pipeline = new addon.LLMPipeline(); + this.pipeline = new LLMPipelineWrap(); const initPromise = util.promisify(this.pipeline.init.bind(this.pipeline)); const result = await initPromise(this.modelPath, this.device, this.properties); diff --git a/src/js/lib/pipelines/textEmbeddingPipeline.ts b/src/js/lib/pipelines/textEmbeddingPipeline.ts index 5517b7ad97..44c4d45fa1 100644 --- a/src/js/lib/pipelines/textEmbeddingPipeline.ts +++ b/src/js/lib/pipelines/textEmbeddingPipeline.ts @@ -1,9 +1,10 @@ import util from "node:util"; -import addon, { +import { TextEmbeddingPipelineWrapper, EmbeddingResult, EmbeddingResults, TextEmbeddingConfig, + TextEmbeddingPipeline as TextEmbeddingPipelineWrap, } from "../addon.js"; export class TextEmbeddingPipeline { @@ -29,7 +30,7 @@ export class TextEmbeddingPipeline { async init() { if (this.pipeline) throw new Error("TextEmbeddingPipeline is already initialized"); - this.pipeline = new addon.TextEmbeddingPipeline(); + this.pipeline = new TextEmbeddingPipelineWrap(); const initPromise = util.promisify(this.pipeline.init.bind(this.pipeline)); await initPromise(this.modelPath, this.device, this.config, this.ovProperties); diff --git a/src/js/lib/tokenizer.ts b/src/js/lib/tokenizer.ts new file mode 100644 index 0000000000..37e1fe5ec9 --- /dev/null +++ b/src/js/lib/tokenizer.ts @@ -0,0 +1,228 @@ +/* eslint-disable @typescript-eslint/no-misused-new */ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import type { Tensor } from "openvino-node"; +import type { ChatHistory } from "./chatHistory.js"; + +/** + * TokenizedInputs contains input_ids, attention_mask and (optionally) token_type_ids tensors. + * token_type_ids is returned if the tokenizer supports paired input, otherwise the field is undefined. + * This is the result of encoding prompts using the Tokenizer. + */ +export interface TokenizedInputs { + /** Tensor containing token IDs of the encoded input */ + input_ids: Tensor; + /** Tensor containing attention mask (1 for real tokens, 0 for padding) */ + attention_mask: Tensor; + /** + * Optional tensor with token type IDs (segment ids) for paired inputs. + * Present only if the model/tokenizer supports paired input. + */ + token_type_ids?: Tensor; +} + +/** + * Options for encode method. + */ +export interface EncodeOptions { + /** + * Whether to add special tokens like BOS, EOS, PAD. + * @defaultValue true + */ + add_special_tokens?: boolean; + + /** + * Whether to pad the sequence to the maximum length. + * @defaultValue false + */ + pad_to_max_length?: boolean; + + /** + * Maximum length of the sequence. + * If undefined, the value will be taken from the IR. + */ + max_length?: number; + + /** + * Side to pad the sequence, can be 'left' or 'right'. + * If undefined, the value will be taken from the IR. + */ + padding_side?: "left" | "right"; +} + +/** + * Options for decode method. + */ +export interface DecodeOptions { + /** + * Whether to skip special tokens like BOS, EOS, PAD during detokenization. + * @defaultValue true + */ + skip_special_tokens?: boolean; +} + +/** + * The Tokenizer class is used to encode prompts and decode resulting tokens. + * + * Chat template is initialized from sources in the following order, overriding the previous value: + * 1. chat_template entry from tokenizer_config.json + * 2. chat_template entry from processor_config.json + * 3. chat_template entry from chat_template.json + * 4. chat_template entry from rt_info section of openvino.Model + * 5. If the template is known to be not supported by GenAI, it's replaced with a simplified supported version. + */ +export interface Tokenizer { + /** + * Load tokenizer and detokenizer IRs by path. + * @param tokenizerPath Path to a directory containing tokenizer/detokenizer XML/BIN files. + * @param properties Optional OpenVINO compilation properties. + */ + new (tokenizerPath: string, properties?: Record): Tokenizer; + + /** + * Create tokenizer from already loaded IR contents. + * @param tokenizerModel Tokenizer XML string. + * @param tokenizerWeights Tokenizer weights tensor. + * @param detokenizerModel Detokenizer XML string. + * @param detokenizerWeights Detokenizer weights tensor. + * @param properties Optional OpenVINO compilation properties. + */ + new ( + tokenizerModel: string, + tokenizerWeights: Tensor, + detokenizerModel: string, + detokenizerWeights: Tensor, + properties?: Record, + ): Tokenizer; + + /** + * Applies a chat template to format chat history into a prompt string. + * @param chatHistory - chat history as an array of message objects or ChatHistory instance + * @param addGenerationPrompt - whether to add a generation prompt at the end + * @param chatTemplate - optional custom chat template to use instead of the default + * @param tools - optional array of tool definitions for function calling + * @param extraContext - optional extra context object for custom template variables + * @returns formatted prompt string + */ + applyChatTemplate( + chatHistory: Record[] | ChatHistory, + addGenerationPrompt: boolean, + chatTemplate?: string, + tools?: Record[], + extraContext?: Record, + ): string; + + /** + * Encodes a single prompt or a list of prompts into tokenized inputs. + * @param prompts - single prompt string or array of prompts + * @param options - encoding options + * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. + */ + encode(prompts: string | string[], options?: EncodeOptions): TokenizedInputs; + + /** + * Encodes two lists of prompts into tokenized inputs (for paired input). + * The number of strings must be the same, or one of the inputs can contain one string. + * In the latter case, the single-string input will be broadcast into the shape of the other input, + * which is more efficient than repeating the string in pairs. + * @param prompts1 - first list of prompts to encode + * @param prompts2 - second list of prompts to encode + * @param options - encoding options + * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. + */ + encode(prompts1: string[], prompts2: string[], options?: EncodeOptions): TokenizedInputs; + + /** + * Encodes a list of paired prompts into tokenized inputs. + * Input format is same as for HF paired input [[prompt_1, prompt_2], ...]. + * @param prompts - list of paired prompts to encode + * @param options - encoding options + * @returns TokenizedInputs object containing input_ids, attention_mask and optional token_type_ids tensors. + */ + encode(prompts: [string, string][], options?: EncodeOptions): TokenizedInputs; + + /** + * Decode a sequence of token IDs into a string prompt. + * + * @param tokens - sequence of token IDs to decode + * @param options - decoding options + * @returns decoded string. + */ + decode(tokens: number[] | bigint[], options?: DecodeOptions): string; + + /** + * Decode a batch of token sequences (as Tensor or array of arrays) into a list of string prompts. + * + * @param tokens - tensor containing token IDs or batch of token ID sequences + * @param options - decoding options + * @returns list of decoded strings. + */ + decode(tokens: Tensor | number[][] | bigint[][], options?: DecodeOptions): string[]; + + /** + * Returns the BOS (Beginning of Sequence) token string. + * @returns BOS token string + */ + getBosToken(): string; + + /** + * Returns the BOS (Beginning of Sequence) token ID. + * @returns BOS token ID + */ + getBosTokenId(): bigint; + + /** + * Returns the EOS (End of Sequence) token string. + * @returns EOS token string + */ + getEosToken(): string; + + /** + * Returns the EOS (End of Sequence) token ID. + * @returns EOS token ID + */ + getEosTokenId(): bigint; + + /** + * Returns the PAD (Padding) token string. + * @returns PAD token string + */ + getPadToken(): string; + + /** + * Returns the PAD (Padding) token ID. + * @returns PAD token ID + */ + getPadTokenId(): bigint; + + /** + * Returns the current chat template string. + * @returns current chat template string + */ + getChatTemplate(): string; + + /** + * Returns the original chat template from the tokenizer configuration. + * @returns original chat template string + */ + getOriginalChatTemplate(): string; + + /** + * Override a chat template read from tokenizer_config.json. + * @param chatTemplate - custom chat template string to use + */ + setChatTemplate(chatTemplate: string): void; + + /** + * Returns true if the tokenizer supports paired input, false otherwise. + * @returns whether the tokenizer supports paired input + */ + supportsPairedInput(): boolean; + + /** + * The current chat template string. + * Can be used to get or set the chat template. + */ + chatTemplate: string; +} diff --git a/src/js/src/addon.cpp b/src/js/src/addon.cpp index 210673da93..72cb3b6b16 100644 --- a/src/js/src/addon.cpp +++ b/src/js/src/addon.cpp @@ -20,6 +20,27 @@ void init_class(Napi::Env env, exports.Set(class_name, prototype); } +void set_ov_addon(const Napi::CallbackInfo& info) { + Napi::Env env = info.Env(); + if (info.Length() < 1) { + Napi::TypeError::New(env, "setOpenvinoAddon expects one argument").ThrowAsJavaScriptException(); + return; + } + if (info[0].IsUndefined() || info[0].IsNull() || !info[0].IsObject()) { + Napi::TypeError::New(env, "Passed addon must be an object").ThrowAsJavaScriptException(); + return; + } + + auto addon_data = env.GetInstanceData(); + if (!addon_data) { + Napi::TypeError::New(env, "Addon data is not initialized").ThrowAsJavaScriptException(); + return; + } + + auto ov_addon = info[0].As(); + addon_data->openvino_addon = Napi::Persistent(ov_addon); +} + // Define the addon initialization function Napi::Object init_module(Napi::Env env, Napi::Object exports) { auto addon_data = new AddonData(); @@ -31,6 +52,9 @@ Napi::Object init_module(Napi::Env env, Napi::Object exports) { init_class(env, exports, "PerfMetrics", &PerfMetricsWrapper::get_class, addon_data->perf_metrics); init_class(env, exports, "ChatHistory", &ChatHistoryWrap::get_class, addon_data->chat_history); + // Expose a helper to set the openvino-node addon from JS (useful for ESM) + exports.Set("setOpenvinoAddon", Napi::Function::New(env, set_ov_addon)); + return exports; } diff --git a/src/js/src/helper.cpp b/src/js/src/helper.cpp index b282ba0636..00ddf0e2c9 100644 --- a/src/js/src/helper.cpp +++ b/src/js/src/helper.cpp @@ -102,6 +102,18 @@ std::string js_to_cpp(const Napi::Env& env, const Napi::Value& valu return value.As().Utf8Value(); } +template <> +int64_t js_to_cpp(const Napi::Env& env, const Napi::Value& value) { + OPENVINO_ASSERT(value.IsNumber() || value.IsBigInt(), "Passed argument must be of type Number or BigInt."); + if (value.IsNumber()) { + return value.As().Int64Value(); + } + bool lossless; + auto result = value.As().Int64Value(&lossless); + OPENVINO_ASSERT(lossless, "BigInt value is too large to fit in int64_t without precision loss."); + return result; +} + template <> std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value) { if (value.IsArray()) { @@ -123,6 +135,20 @@ std::vector js_to_cpp>(const Napi::Env& en } } +template <> +std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value) { + OPENVINO_ASSERT(value.IsArray(), "Passed argument must be of type Array."); + auto array = value.As(); + size_t arrayLength = array.Length(); + + std::vector vector; + vector.reserve(arrayLength); + for (uint32_t i = 0; i < arrayLength; ++i) { + vector.push_back(js_to_cpp(env, array[i])); + } + return vector; +} + template <> ov::genai::JsonContainer js_to_cpp(const Napi::Env& env, const Napi::Value& value) { OPENVINO_ASSERT(value.IsObject() || value.IsArray(), "JsonContainer must be a JS object or an array but got " + std::string(value.ToString().Utf8Value())); @@ -292,6 +318,23 @@ ov::genai::StructuredOutputConfig js_to_cpp(c return config; } +template <> +ov::Tensor js_to_cpp(const Napi::Env& env, const Napi::Value& value) { + OPENVINO_ASSERT(value.IsObject(), "Passed argument must be an object."); + + auto tensor_wrap = value.As(); + auto tensor_prototype = get_prototype_from_ov_addon(env, "Tensor"); + OPENVINO_ASSERT(tensor_wrap.InstanceOf(tensor_prototype), "Passed argument is not of type Tensor"); + + auto native_tensor_func = tensor_wrap.Get("__getExternalTensor").As(); + Napi::Value native_tensor_value = native_tensor_func.Call(tensor_wrap, {}); + OPENVINO_ASSERT(native_tensor_value.IsExternal(), "__getExternalTensor() did not return an External object."); + + auto external = native_tensor_value.As>(); + auto tensor_ptr = external.Data(); + return *tensor_ptr; +} + template <> ov::genai::PerfMetrics& unwrap(const Napi::Env& env, const Napi::Value& value) { const auto obj = value.As(); @@ -417,6 +460,38 @@ Napi::Value cpp_to_js(const Napi::Env& en return json_parse(env, json_container.to_json_string()); } +template <> +Napi::Value cpp_to_js(const Napi::Env& env, const ov::Tensor& tensor) { + try { + auto prototype = get_prototype_from_ov_addon(env, "Tensor"); + + auto external = Napi::External::New(env, new ov::Tensor(tensor), + [](Napi::Env /*env*/, ov::Tensor* external_tensor) { + delete external_tensor; + }); + auto tensor_wrap = prototype.New({ external }); + + return tensor_wrap; + } catch (const ov::Exception& e) { + Napi::Error::New(env, std::string("Cannot create Tensor wrapper: ") + e.what()).ThrowAsJavaScriptException(); + return env.Undefined(); + } +} + +template <> +Napi::Value cpp_to_js(const Napi::Env& env, const ov::genai::TokenizedInputs& tokenized_inputs) { + auto js_object = Napi::Object::New(env); + + js_object.Set("input_ids", cpp_to_js(env, tokenized_inputs.input_ids)); + js_object.Set("attention_mask", cpp_to_js(env, tokenized_inputs.attention_mask)); + // token_type_ids is optional and present only for paired inputs + if (tokenized_inputs.token_type_ids.has_value()) { + js_object.Set("token_type_ids", cpp_to_js(env, tokenized_inputs.token_type_ids.value())); + } + + return js_object; +} + bool is_napi_value_int(const Napi::Env& env, const Napi::Value& num) { return env.Global().Get("Number").ToObject().Get("isInteger").As().Call({num}).ToBoolean().Value(); } @@ -449,3 +524,16 @@ Napi::Value json_parse(const Napi::Env& env, const std::string& value) { .As() .Call({ Napi::String::New(env, value) }); } + +Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::string& ctor_name) { + auto addon_data = env.GetInstanceData(); + OPENVINO_ASSERT(!addon_data->openvino_addon.IsEmpty(), "Addon data is not initialized"); + Napi::Value ov_addon = addon_data->openvino_addon.Value(); + OPENVINO_ASSERT(!ov_addon.IsUndefined() && !ov_addon.IsNull() && ov_addon.IsObject(), "OV addon value is not an object"); + Napi::Object addon_obj = ov_addon.As(); + OPENVINO_ASSERT(addon_obj.Has(ctor_name), std::string("OV addon does not export '") + ctor_name + "' class"); + Napi::Value ctor_val = addon_obj.Get(ctor_name); + OPENVINO_ASSERT(ctor_val.IsFunction(), ctor_name + std::string(" is not a prototype")); + + return ctor_val.As(); +} diff --git a/src/js/src/tokenizer.cpp b/src/js/src/tokenizer.cpp index 1cf9c822d7..a8f1c72380 100644 --- a/src/js/src/tokenizer.cpp +++ b/src/js/src/tokenizer.cpp @@ -2,7 +2,51 @@ #include "include/helper.hpp" #include "include/tokenizer.hpp" -TokenizerWrapper::TokenizerWrapper(const Napi::CallbackInfo& info) : Napi::ObjectWrap(info) {}; +TokenizerWrapper::TokenizerWrapper(const Napi::CallbackInfo& info) : Napi::ObjectWrap(info) { + if (info.Length() == 0) { + return; + } + + auto env = info.Env(); + try { + if (info.Length() == 1 || info.Length() == 2) { + OPENVINO_ASSERT(info[0].IsString(), "Tokenizer constructor expects 'tokenizerPath' to be a string"); + const auto tokenizer_path = js_to_cpp(env, info[0]); + ov::AnyMap properties; + if (info.Length() == 2) { + properties = js_to_cpp(env, info[1]); + } + this->_tokenizer = ov::genai::Tokenizer(tokenizer_path, properties); + return; + } + + OPENVINO_ASSERT(info.Length() == 4 || info.Length() == 5, + "Tokenizer constructor expects 1-2 arguments (path[, properties]) or 4-5 arguments (models, tensors[, properties])"); + OPENVINO_ASSERT(info[0].IsString(), "The argument 'tokenizerModel' must be a string"); + OPENVINO_ASSERT(info[1].IsObject(), "The argument 'tokenizerWeights' must be an OpenVINO Tensor"); + OPENVINO_ASSERT(info[2].IsString(), "The argument 'detokenizerModel' must be a string"); + OPENVINO_ASSERT(info[3].IsObject(), "The argument 'detokenizerWeights' must be an OpenVINO Tensor"); + + const auto tokenizer_model = js_to_cpp(env, info[0]); + const auto tokenizer_weights = js_to_cpp(env, info[1]); + const auto detokenizer_model = js_to_cpp(env, info[2]); + const auto detokenizer_weights = js_to_cpp(env, info[3]); + ov::AnyMap properties; + if (info.Length() == 5) { + properties = js_to_cpp(env, info[4]); + } + + this->_tokenizer = ov::genai::Tokenizer( + tokenizer_model, + tokenizer_weights, + detokenizer_model, + detokenizer_weights, + properties + ); + } catch (const std::exception& err) { + Napi::Error::New(env, err.what()).ThrowAsJavaScriptException(); + } +} Napi::Function TokenizerWrapper::get_class(Napi::Env env) { return DefineClass(env, @@ -15,6 +59,12 @@ Napi::Function TokenizerWrapper::get_class(Napi::Env env) { InstanceMethod("getEosTokenId", &TokenizerWrapper::get_eos_token_id), InstanceMethod("getPadToken", &TokenizerWrapper::get_pad_token), InstanceMethod("getPadTokenId", &TokenizerWrapper::get_pad_token_id), + InstanceMethod("getChatTemplate", &TokenizerWrapper::get_chat_template), + InstanceMethod("getOriginalChatTemplate", &TokenizerWrapper::get_original_chat_template), + InstanceMethod("setChatTemplate", &TokenizerWrapper::set_chat_template), + InstanceMethod("supportsPairedInput", &TokenizerWrapper::supports_paired_input), + InstanceMethod("decode", &TokenizerWrapper::decode), + InstanceMethod("encode", &TokenizerWrapper::encode), } ); } @@ -76,7 +126,7 @@ Napi::Value TokenizerWrapper::get_bos_token(const Napi::CallbackInfo& info) { Napi::Value TokenizerWrapper::get_bos_token_id(const Napi::CallbackInfo& info) { try { - return Napi::Number::New(info.Env(), this->_tokenizer.get_bos_token_id()); + return Napi::BigInt::New(info.Env(), this->_tokenizer.get_bos_token_id()); } catch (std::exception& err) { Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); return info.Env().Undefined(); @@ -94,7 +144,7 @@ Napi::Value TokenizerWrapper::get_eos_token(const Napi::CallbackInfo& info) { Napi::Value TokenizerWrapper::get_eos_token_id(const Napi::CallbackInfo& info) { try { - return Napi::Number::New(info.Env(), this->_tokenizer.get_eos_token_id()); + return Napi::BigInt::New(info.Env(), this->_tokenizer.get_eos_token_id()); } catch (std::exception& err) { Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); return info.Env().Undefined(); @@ -112,7 +162,148 @@ Napi::Value TokenizerWrapper::get_pad_token(const Napi::CallbackInfo& info) { Napi::Value TokenizerWrapper::get_pad_token_id(const Napi::CallbackInfo& info) { try { - return Napi::Number::New(info.Env(), this->_tokenizer.get_pad_token_id()); + return Napi::BigInt::New(info.Env(), this->_tokenizer.get_pad_token_id()); + } catch (std::exception& err) { + Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); + return info.Env().Undefined(); + } +} + +Napi::Value TokenizerWrapper::encode(const Napi::CallbackInfo& info) { + auto env = info.Env(); + try { + OPENVINO_ASSERT(info.Length() >= 1, "Tokenizer.encode requires at least one argument: text or prompts"); + + // Parse encoding options from the last argument if it's an object + ov::AnyMap tokenization_params; + auto count_text_args = info.Length(); + + if (info[count_text_args - 1].IsObject() && !info[count_text_args - 1].IsArray()) { + tokenization_params = js_to_cpp(env, info[count_text_args - 1]); + count_text_args--; + } + + ov::genai::TokenizedInputs result; + + // Handle different input types + if (info[0].IsString()) { + // Single string + auto text = js_to_cpp(env, info[0]); + result = this->_tokenizer.encode(text, tokenization_params); + } else if (count_text_args == 1 && info[0].IsArray()) { + auto arr = info[0].As(); + + // Check if it's array of pairs [[str, str], ...] + if (arr.Length() > 0 && arr.Get(uint32_t(0)).IsArray()) { + // Array of pairs + std::vector> paired_prompts; + for (uint32_t i = 0; i < arr.Length(); ++i) { + OPENVINO_ASSERT(arr.Get(i).IsArray(), "Each pair must be an array"); + auto pair = arr.Get(i).As(); + OPENVINO_ASSERT(pair.Length() == 2, "Each pair must contain exactly 2 strings"); + paired_prompts.emplace_back( + js_to_cpp(env, pair.Get(uint32_t(0))), + js_to_cpp(env, pair.Get(uint32_t(1))) + ); + } + result = this->_tokenizer.encode(paired_prompts, tokenization_params); + } else { + // Regular array of strings + auto prompts = js_to_cpp>(env, info[0]); + result = this->_tokenizer.encode(prompts, tokenization_params); + } + } else if (count_text_args == 2 && info[0].IsArray() && info[1].IsArray()) { + // Two arrays (paired input: prompts_1, prompts_2) + auto prompts1 = js_to_cpp>(env, info[0]); + auto prompts2 = js_to_cpp>(env, info[1]); + result = this->_tokenizer.encode(prompts1, prompts2, tokenization_params); + } else { + OPENVINO_THROW("Unsupported input type for encode. Expected: string, string[], [string, string][], or two string arrays"); + } + + return cpp_to_js(env, result); + } catch (std::exception& err) { + Napi::Error::New(env, err.what()).ThrowAsJavaScriptException(); + return env.Undefined(); + } +} + +Napi::Value TokenizerWrapper::decode(const Napi::CallbackInfo& info) { + auto env = info.Env(); + try { + OPENVINO_ASSERT(info.Length() >= 1, "Tokenizer.decode requires at least one argument: tokens"); + + ov::AnyMap detokenization_params; + if (info.Length() >= 2) { + const auto& options_candidate = info[1]; + detokenization_params = js_to_cpp(env, options_candidate); + } + + // Handle different input types + if (info[0].IsArray()) { + auto arr = info[0].As(); + + // Check if it's a 2D array (batch of sequences) + if (arr.Length() > 0 && arr.Get(uint32_t(0)).IsArray()) { + // Batch decoding: number[][] | bigint[][] + std::vector> batch_tokens; + for (uint32_t i = 0; i < arr.Length(); ++i) { + batch_tokens.push_back(js_to_cpp>(env, arr.Get(i))); + } + auto result = this->_tokenizer.decode(batch_tokens, detokenization_params); + return cpp_to_js, Napi::Value>(env, result); + } else { + // Single sequence: number[] | bigint[] + auto tokens = js_to_cpp>(env, info[0]); + auto result = this->_tokenizer.decode(tokens, detokenization_params); + return Napi::String::New(env, result); + } + } else { + // Tensor input + auto tensor = js_to_cpp(env, info[0]); + auto result = this->_tokenizer.decode(tensor, detokenization_params); + return cpp_to_js, Napi::Value>(env, result); + } + } catch (std::exception& err) { + Napi::Error::New(env, err.what()).ThrowAsJavaScriptException(); + return env.Undefined(); + } +} + +Napi::Value TokenizerWrapper::get_chat_template(const Napi::CallbackInfo& info) { + try { + return Napi::String::New(info.Env(), this->_tokenizer.get_chat_template()); + } catch (std::exception& err) { + Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); + return info.Env().Undefined(); + } +} + +Napi::Value TokenizerWrapper::get_original_chat_template(const Napi::CallbackInfo& info) { + try { + return Napi::String::New(info.Env(), this->_tokenizer.get_original_chat_template()); + } catch (std::exception& err) { + Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); + return info.Env().Undefined(); + } +} + +Napi::Value TokenizerWrapper::set_chat_template(const Napi::CallbackInfo& info) { + try { + OPENVINO_ASSERT(info.Length() >= 1, "Tokenizer.setChatTemplate requires one argument: chatTemplate"); + OPENVINO_ASSERT(info[0].IsString(), "The argument 'chatTemplate' must be a string"); + + this->_tokenizer.set_chat_template(js_to_cpp(info.Env(), info[0])); + return info.Env().Undefined(); + } catch (std::exception& err) { + Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); + return info.Env().Undefined(); + } +} + +Napi::Value TokenizerWrapper::supports_paired_input(const Napi::CallbackInfo& info) { + try { + return Napi::Boolean::New(info.Env(), this->_tokenizer.supports_paired_input()); } catch (std::exception& err) { Napi::Error::New(info.Env(), err.what()).ThrowAsJavaScriptException(); return info.Env().Undefined(); diff --git a/src/js/tests/bindings.test.js b/src/js/tests/bindings.test.js index 8c1ac78760..0119f25a0f 100644 --- a/src/js/tests/bindings.test.js +++ b/src/js/tests/bindings.test.js @@ -1,4 +1,4 @@ -import addon from "../dist/addon.js"; +import { LLMPipeline } from "../dist/addon.js"; import assert from "node:assert"; import { describe, it, before, after } from "node:test"; @@ -10,7 +10,7 @@ describe("bindings", () => { let pipeline = null; before((_, done) => { - pipeline = new addon.LLMPipeline(); + pipeline = new LLMPipeline(); pipeline.init(MODEL_PATH, "CPU", {}, (err) => { if (err) { diff --git a/src/js/tests/tokenizer.test.js b/src/js/tests/tokenizer.test.js index dc470f2014..dba1474540 100644 --- a/src/js/tests/tokenizer.test.js +++ b/src/js/tests/tokenizer.test.js @@ -1,12 +1,48 @@ -import { LLMPipeline, ChatHistory } from "../dist/index.js"; +import { LLMPipeline, ChatHistory, Tokenizer } from "../dist/index.js"; import assert from "node:assert/strict"; import { describe, it, before, after } from "node:test"; import { models } from "./models.js"; +import fs from "node:fs/promises"; +import { join } from "node:path"; +import { addon as ovAddon } from "openvino-node"; const MODEL_PATH = process.env.MODEL_PATH || `./tests/models/${models.LLM.split("/")[1]}`; -describe("tokenizer", async () => { +describe("tokenizer constructors", () => { + it("tokenizer constructors with one argument", () => { + const tokenizer = new Tokenizer(MODEL_PATH); + + assert.ok(tokenizer); + }); + + it("tokenizer constructors with multiple arguments", async () => { + const tokenizerName = join(MODEL_PATH, "openvino_tokenizer"); + const detokenizerName = join(MODEL_PATH, "openvino_detokenizer"); + const tokenizerModel = await fs.readFile(`${tokenizerName}.xml`, "utf8"); + const tokenizerWeights = await fs.readFile(`${tokenizerName}.bin`); + const detokenizerModel = await fs.readFile(`${detokenizerName}.xml`, "utf8"); + const detokenizerWeights = await fs.readFile(`${detokenizerName}.bin`); + + const tokenizerTensor = new ovAddon.Tensor("u8", [tokenizerWeights.length], tokenizerWeights); + const detokenizerTensor = new ovAddon.Tensor( + "u8", + [detokenizerWeights.length], + detokenizerWeights, + ); + + const tokenizer = new Tokenizer( + tokenizerModel, + tokenizerTensor, + detokenizerModel, + detokenizerTensor, + ); + + assert.ok(tokenizer); + }); +}); + +describe("tokenizer functions", async () => { let pipeline = null; let tokenizer = null; @@ -21,7 +57,7 @@ describe("tokenizer", async () => { await pipeline.finishChat(); }); - it("applyChatTemplate return string", async () => { + it("applyChatTemplate return string", () => { const template = tokenizer.applyChatTemplate( [ { @@ -34,7 +70,7 @@ describe("tokenizer", async () => { assert.strictEqual(typeof template, "string"); }); - it("applyChatTemplate with chat history", async () => { + it("applyChatTemplate with chat history", () => { const chatHistory = new ChatHistory([ { role: "user", @@ -45,7 +81,7 @@ describe("tokenizer", async () => { assert.strictEqual(typeof template, "string"); }); - it("applyChatTemplate with true addGenerationPrompt", async () => { + it("applyChatTemplate with true addGenerationPrompt", () => { const template = tokenizer.applyChatTemplate( [ { @@ -58,7 +94,7 @@ describe("tokenizer", async () => { assert.ok(template.includes("assistant")); }); - it("applyChatTemplate with missed addGenerationPrompt", async () => { + it("applyChatTemplate with missed addGenerationPrompt", () => { assert.throws(() => tokenizer.applyChatTemplate([ { @@ -69,11 +105,11 @@ describe("tokenizer", async () => { ); }); - it("applyChatTemplate with incorrect type of history", async () => { + it("applyChatTemplate with incorrect type of history", () => { assert.throws(() => tokenizer.applyChatTemplate("prompt", false)); }); - it("applyChatTemplate with unknown property", async () => { + it("applyChatTemplate with unknown property", () => { const testValue = "1234567890"; const template = tokenizer.applyChatTemplate( [ @@ -88,7 +124,7 @@ describe("tokenizer", async () => { assert.ok(!template.includes(testValue)); }); - it("applyChatTemplate use custom chatTemplate", async () => { + it("applyChatTemplate use custom chatTemplate", () => { const prompt = "continue: 1 2 3"; const chatTemplate = `{% for message in messages %} {{ message['content'] }} @@ -106,7 +142,7 @@ describe("tokenizer", async () => { assert.strictEqual(template, `${prompt}\n`); }); - it("applyChatTemplate use tools", async () => { + it("applyChatTemplate use tools", () => { const prompt = "question"; const chatHistory = [ { @@ -124,7 +160,7 @@ describe("tokenizer", async () => { assert.strictEqual(templatedHistory, expected); }); - it("applyChatTemplate use tool from chat history", async () => { + it("applyChatTemplate use tool from chat history", () => { const prompt = "question"; const chatHistory = new ChatHistory(); chatHistory.push({ role: "user", content: prompt }); @@ -139,7 +175,7 @@ describe("tokenizer", async () => { assert.strictEqual(templatedHistory, expected); }); - it("applyChatTemplate use extra_context", async () => { + it("applyChatTemplate use extra_context", () => { const prompt = "question"; const chatHistory = [ { @@ -165,7 +201,7 @@ describe("tokenizer", async () => { assert.strictEqual(templatedHistory, expected); }); - it("applyChatTemplate use extra_context from chat history", async () => { + it("applyChatTemplate use extra_context from chat history", () => { const prompt = "question"; const chatHistory = new ChatHistory(); chatHistory.push({ role: "user", content: prompt }); @@ -181,33 +217,183 @@ describe("tokenizer", async () => { assert.strictEqual(templatedHistory, expected); }); - it("getBosToken return string", async () => { + it("getBosToken return string", () => { const token = tokenizer.getBosToken(); assert.strictEqual(typeof token, "string"); }); - it("getBosTokenId return number", async () => { + it("getBosTokenId return number", () => { const token = tokenizer.getBosTokenId(); - assert.strictEqual(typeof token, "number"); + assert.strictEqual(typeof token, "bigint"); }); - it("getEosToken return string", async () => { + it("getEosToken return string", () => { const token = tokenizer.getEosToken(); assert.strictEqual(typeof token, "string"); }); - it("getEosTokenId return number", async () => { + it("getEosTokenId return number", () => { const token = tokenizer.getEosTokenId(); - assert.strictEqual(typeof token, "number"); + assert.strictEqual(typeof token, "bigint"); }); - it("getPadToken return string", async () => { + it("getPadToken return string", () => { const token = tokenizer.getPadToken(); assert.strictEqual(typeof token, "string"); }); - it("getPadTokenId return number", async () => { + it("getPadTokenId return number", () => { const token = tokenizer.getPadTokenId(); - assert.strictEqual(typeof token, "number"); + assert.strictEqual(typeof token, "bigint"); + }); + + it("setChatTemplate updates template", () => { + const originalTemplate = tokenizer.getChatTemplate(); + assert.strictEqual(typeof originalTemplate, "string"); + + const customTemplate = "Custom template: {{ messages }}"; + tokenizer.setChatTemplate(customTemplate); + + const updatedTemplate = tokenizer.getChatTemplate(); + assert.strictEqual(updatedTemplate, customTemplate); + + // Restore original template + tokenizer.setChatTemplate(originalTemplate); + }); + + // Fix getOriginalChatTemplate issue CVS-176638 + it.skip("getOriginalChatTemplate returns the original string", () => { + const originalTemplate = tokenizer.getChatTemplate(); + tokenizer.setChatTemplate("Custom template: {{ messages }}"); + + const template = tokenizer.getOriginalChatTemplate(); + assert.strictEqual(template, originalTemplate); + + // Restore original template + tokenizer.setChatTemplate(originalTemplate); + }); + + it("encode single string returns TokenizedInputs", () => { + const text = "Hello world"; + const result = tokenizer.encode(text); + + assert.ok(result.input_ids, "Should have input_ids"); + assert.ok(result.attention_mask, "Should have attention_mask"); + assert.strictEqual(typeof result.input_ids, "object"); + assert.strictEqual(typeof result.attention_mask, "object"); + }); + + it("encode with options", () => { + const text = "Hello world"; + const result = tokenizer.encode(text, { + add_special_tokens: false, + pad_to_max_length: true, + max_length: 1000, + padding_side: "left", + }); + // const padTokenId = tokenizer.getPadTokenId(); + + assert.ok(result.input_ids); + assert.strictEqual( + result.input_ids.getShape()[1], + 1000, + "input_ids should be padded to maxLength", + ); + // Uncomment after fixing padding issue CVS-176636 + // assert.strictEqual( + // result.input_ids.getData()[0], + // padTokenId, + // "input_ids should be left padded", + // ); + }); + + it("encode array of strings", () => { + const texts = ["Hello", "World"]; + const result = tokenizer.encode(texts); + + assert.strictEqual(result.input_ids.getShape()[0], texts.length); + assert.strictEqual(result.attention_mask.getShape()[0], 2); + }); + + it("decode array of token IDs to string", () => { + const tokenIds = [1, 2, 3]; + const decoded = tokenizer.decode(tokenIds); + + assert.strictEqual(typeof decoded, "string"); + }); + + // Fix skip_special_tokens functionality CVS-176639 + it.skip("decode with skip_special_tokens option", () => { + const eos = tokenizer.getEosToken(); + const eosId = tokenizer.getEosTokenId(); + const tokenIds = [10n, 20n, 30n, eosId]; + const decoded1 = tokenizer.decode(tokenIds, { skip_special_tokens: true }); + const decoded2 = tokenizer.decode(tokenIds, { skip_special_tokens: false }); + + assert.strictEqual(typeof decoded1, "string"); + assert.strictEqual(typeof decoded2, "string"); + assert.strictEqual(decoded2, decoded1 + eos); + }); + + it("decode batch of token sequences", () => { + const batchTokens = [ + [1, 2, 3], + [4, 5, 6], + ]; + const decoded = tokenizer.decode(batchTokens); + + assert.strictEqual(decoded.length, 2); + }); + + it("encode and decode round trip", () => { + const originalText = "Hello world"; + const encoded = tokenizer.encode(originalText); + const decodedText = tokenizer.decode(encoded.input_ids); + + assert.deepEqual(decodedText, [originalText]); + }); +}); + +// Add model with paired input support CVS-176639 +describe.skip("tokenizer with paired input", () => { + let tokenizer = null; + + before(async () => { + tokenizer = new Tokenizer(MODEL_PATH, { add_second_input: true, number_of_inputs: 2 }); + }); + + it("supportsPairedInput return boolean", () => { + const result = tokenizer.supportsPairedInput(); + + assert.strictEqual(result, true); + }); + + it("encode paired prompts (two arrays)", () => { + const prompts1 = ["Question 1", "Question 2"]; + const prompts2 = ["Answer 1", "Answer 2"]; + const result = tokenizer.encode(prompts1, prompts2); + + assert.strictEqual(result.input_ids.getShape()[0], prompts1.length); + assert.strictEqual(result.attention_mask.getShape()[0], prompts1.length); + }); + + it("encode paired prompts (array of pairs)", () => { + const pairs = [ + ["Question 1", "Answer 1"], + ["Question 2", "Answer 2"], + ]; + const result = tokenizer.encode(pairs); + + assert.strictEqual(result.input_ids.getShape()[0], pairs.length); + assert.strictEqual(result.attention_mask.getShape()[0], pairs.length); + }); + + it("encode paired prompts broadcasting second array", () => { + const prompts1 = ["Question 1", "Question 2", "Question 3"]; // batch size 3 + const prompts2 = ["Single answer"]; // will be broadcast + const result = tokenizer.encode(prompts1, prompts2); + + assert.strictEqual(result.input_ids.getShape()[0], prompts1.length); + assert.strictEqual(result.attention_mask.getShape()[0], prompts1.length); }); });