|
| 1 | +use encoding_rs::{Encoding, UTF_8, UTF_16BE, UTF_16LE}; |
| 2 | +use rquickjs::JsLifetime; |
| 3 | +use rquickjs::class::Trace; |
| 4 | +use std::ptr; |
| 5 | +use std::ptr::NonNull; |
| 6 | + |
| 7 | +// Native functions for the encoding implementation |
| 8 | +#[rquickjs::module(rename = "camelCase")] |
| 9 | +pub mod native_module { |
| 10 | + use encoding_rs::Encoding; |
| 11 | + use rquickjs::prelude::*; |
| 12 | + use rquickjs::{Ctx, TypedArray}; |
| 13 | + |
| 14 | + #[rquickjs::function] |
| 15 | + pub fn supports_encoding(encoding: String) -> bool { |
| 16 | + Encoding::for_label(encoding.as_bytes()).is_some() |
| 17 | + } |
| 18 | + |
| 19 | + #[rquickjs::function] |
| 20 | + pub fn decode( |
| 21 | + bytes: TypedArray<'_, u8>, |
| 22 | + encoding: String, |
| 23 | + stream: bool, |
| 24 | + fatal: bool, |
| 25 | + ignore_bom: bool, |
| 26 | + ) -> List<(Option<String>, Option<String>)> { |
| 27 | + let bytes = bytes |
| 28 | + .as_bytes() |
| 29 | + .expect("the UInt8Array passed to decode is detached"); |
| 30 | + match super::decode_impl(bytes, encoding, stream, fatal, ignore_bom) { |
| 31 | + Ok(result) => List((Some(result), None)), |
| 32 | + Err(error) => List((None, Some(error))), |
| 33 | + } |
| 34 | + } |
| 35 | + |
| 36 | + #[rquickjs::function] |
| 37 | + pub fn encode(string: String, ctx: Ctx<'_>) -> TypedArray<'_, u8> { |
| 38 | + TypedArray::new_copy(ctx, super::encode_impl(&string)) |
| 39 | + .expect("failed to create UInt8Array from string") |
| 40 | + } |
| 41 | + |
| 42 | + #[rquickjs::function] |
| 43 | + pub fn encode_into(string: String, target: TypedArray<'_, u8>) -> super::EncodeIntoResult { |
| 44 | + let raw = target |
| 45 | + .as_raw() |
| 46 | + .expect("the UInt8Array passed to encodeInto is detached"); |
| 47 | + super::encode_into_impl(&string, raw.len, raw.ptr) |
| 48 | + } |
| 49 | +} |
| 50 | + |
| 51 | +#[rquickjs::class] |
| 52 | +#[derive(Trace, JsLifetime)] |
| 53 | +pub struct EncodeIntoResult { |
| 54 | + pub read: usize, |
| 55 | + pub written: usize, |
| 56 | +} |
| 57 | + |
| 58 | +fn encode_impl(string: &str) -> &[u8] { |
| 59 | + string.as_bytes() |
| 60 | +} |
| 61 | + |
| 62 | +fn encode_into_impl(string: &str, target_len: usize, target: NonNull<u8>) -> EncodeIntoResult { |
| 63 | + let mut bytes_to_copy = 0; |
| 64 | + let mut chars_copied = 0; |
| 65 | + for (idx, _) in string.char_indices() { |
| 66 | + if idx <= target_len { |
| 67 | + bytes_to_copy = idx; |
| 68 | + chars_copied += 1; |
| 69 | + } else { |
| 70 | + break; |
| 71 | + } |
| 72 | + } |
| 73 | + unsafe { ptr::copy_nonoverlapping(string.as_ptr(), target.as_ptr(), bytes_to_copy) } |
| 74 | + |
| 75 | + EncodeIntoResult { |
| 76 | + read: chars_copied, |
| 77 | + written: bytes_to_copy, |
| 78 | + } |
| 79 | +} |
| 80 | + |
| 81 | +fn decode_impl( |
| 82 | + bytes: &[u8], |
| 83 | + encoding: String, |
| 84 | + _stream: bool, |
| 85 | + fatal: bool, |
| 86 | + ignore_bom: bool, |
| 87 | +) -> Result<String, String> { |
| 88 | + let encoding = Encoding::for_label(encoding.as_bytes()) |
| 89 | + .ok_or_else(|| format!("Unsupported encoding: {encoding}"))?; |
| 90 | + |
| 91 | + // TODO: we are not implementing streaming yet. to do so, TextDecoder should keep a native |
| 92 | + // decoding state with a String and `new_decoder` variants should be used with `decoder.decode_to_string` variants. |
| 93 | + |
| 94 | + match (ignore_bom, fatal) { |
| 95 | + (false, false) => { |
| 96 | + let (result, _replaced) = encoding.decode_with_bom_removal(bytes); |
| 97 | + Ok(result.to_string()) |
| 98 | + } |
| 99 | + (false, true) => { |
| 100 | + let without_bom = if encoding == UTF_8 && bytes.starts_with(b"\xEF\xBB\xBF") { |
| 101 | + &bytes[3..] |
| 102 | + } else if (encoding == UTF_16LE && bytes.starts_with(b"\xFF\xFE")) |
| 103 | + || (encoding == UTF_16BE && bytes.starts_with(b"\xFE\xFF")) |
| 104 | + { |
| 105 | + &bytes[2..] |
| 106 | + } else { |
| 107 | + bytes |
| 108 | + }; |
| 109 | + let result = encoding |
| 110 | + .decode_without_bom_handling_and_without_replacement(without_bom) |
| 111 | + .ok_or_else(|| "Malformed input".to_string())?; |
| 112 | + Ok(result.to_string()) |
| 113 | + } |
| 114 | + (true, false) => { |
| 115 | + let (result, _replaced) = encoding.decode_without_bom_handling(bytes); |
| 116 | + Ok(result.to_string()) |
| 117 | + } |
| 118 | + (true, true) => { |
| 119 | + let result = encoding |
| 120 | + .decode_without_bom_handling_and_without_replacement(bytes) |
| 121 | + .ok_or_else(|| "Malformed input".to_string())?; |
| 122 | + Ok(result.to_string()) |
| 123 | + } |
| 124 | + } |
| 125 | +} |
| 126 | + |
| 127 | +// JS functions for the Encoding API implementation |
| 128 | +pub const ENCODING_JS: &str = include_str!("encoding.js"); |
| 129 | + |
| 130 | +// JS code wiring the encoding module into the global context |
| 131 | +pub const WIRE_JS: &str = r#" |
| 132 | + import * as __wasm_rquickjs_encoding from '__wasm_rquickjs_builtin/encoding'; |
| 133 | + globalThis.TextDecoder = __wasm_rquickjs_encoding.TextDecoder; |
| 134 | + globalThis.TextEncoder = __wasm_rquickjs_encoding.TextEncoder; |
| 135 | + globalThis.TextDecoderStream = __wasm_rquickjs_encoding.TextDecoderStream; |
| 136 | + globalThis.TextEncoderStream = __wasm_rquickjs_encoding.TextEncoderStream; |
| 137 | + "#; |
0 commit comments