Skip to content

Commit 533ccb3

Browse files
authored
Merge pull request #26 from golemcloud/encoding-api
Implemented the Encoding API
2 parents d627966 + c664d87 commit 533ccb3

File tree

8 files changed

+365
-2
lines changed

8 files changed

+365
-2
lines changed

compile-all-examples.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ cargo run --package wasm-rquickjs-cli --bin wasm-rquickjs -- generate-wrapper-cr
1717
cargo run --package wasm-rquickjs-cli --bin wasm-rquickjs -- generate-wrapper-crate --wit examples/timeout/wit --js examples/timeout/src/timeout.js --output tmp/timeout
1818
cargo run --package wasm-rquickjs-cli --bin wasm-rquickjs -- generate-wrapper-crate --wit examples/streams/wit --js examples/streams/src/streams.js --output tmp/streams
1919
cargo run --package wasm-rquickjs-cli --bin wasm-rquickjs -- generate-wrapper-crate --wit examples/fetch/wit --js examples/fetch/src/fetch.js --output tmp/fetch
20+
cargo run --package wasm-rquickjs-cli --bin wasm-rquickjs -- generate-wrapper-crate --wit examples/encoding/wit --js examples/encoding/src/encoding.js --output tmp/encoding
2021

2122
# Generate .d.ts files for all examples
2223
cargo run --package wasm-rquickjs-cli --bin wasm-rquickjs -- generate-dts --wit examples/export-from-inner-package/wit --output tmp/export-from-inner-package/dts
@@ -33,6 +34,7 @@ cargo run --package wasm-rquickjs-cli --bin wasm-rquickjs -- generate-dts --wit
3334
cargo run --package wasm-rquickjs-cli --bin wasm-rquickjs -- generate-dts --wit examples/timeout/wit --output tmp/timeout/dts
3435
cargo run --package wasm-rquickjs-cli --bin wasm-rquickjs -- generate-dts --wit examples/streams/wit --output tmp/streams/dts
3536
cargo run --package wasm-rquickjs-cli --bin wasm-rquickjs -- generate-dts --wit examples/fetch/wit --output tmp/fetch/dts
37+
cargo run --package wasm-rquickjs-cli --bin wasm-rquickjs -- generate-dts --wit examples/encoding/wit --output tmp/encoding/dts
3638

3739
# All generated crates can be compiled
3840
pushd tmp/export-from-inner-package

crates/wasm-rquickjs/skeleton/Cargo.toml_

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ logging = ["dep:log", "dep:wasi-logger"]
2222
# Core dependencies
2323
rquickjs = { version = "0.9.0", default-features = false, features = ["futures", "bindgen", "loader", "macro"] }
2424

25+
encoding_rs = "0.8.35"
2526
futures = { version = "0.3.31", features = [] }
2627
futures-concurrency = "7.6.3"
2728
pin-project = "1.1.10"
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import * as encodingNative from '__wasm_rquickjs_builtin/encoding_native'
2+
import * as streams from '__wasm_rquickjs_builtin/streams';
3+
4+
export class TextDecoder {
5+
constructor(label, options) {
6+
const safeLabel = label || 'utf-8';
7+
if (!encodingNative.supports_encoding(safeLabel)) {
8+
throw new RangeError(safeLabel + ' is not supported');
9+
}
10+
11+
this._label = safeLabel;
12+
this._fatal = !!(options && options.fatal);
13+
this._ignoreBOM = !!(options && options.ignoreBOM);
14+
}
15+
16+
get encoding() {
17+
return this._label;
18+
}
19+
20+
get fatal() {
21+
return this._fatal;
22+
}
23+
24+
get ignoreBOM() {
25+
return this._ignoreBOM;
26+
}
27+
28+
decode(buffer, options) {
29+
let bytes;
30+
if (buffer instanceof Uint8Array) {
31+
bytes = buffer;
32+
} else if (buffer instanceof ArrayBuffer) {
33+
bytes = new Uint8Array(buffer);
34+
} else if (ArrayBuffer.isView(buffer) && buffer.buffer instanceof ArrayBuffer) {
35+
bytes = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength);
36+
} else if (Array.isArray(buffer)) {
37+
bytes = new Uint8Array(buffer);
38+
} else {
39+
bytes = new Uint8Array(0);
40+
}
41+
const stream = !!(options && options.stream);
42+
43+
let [result, error] = encodingNative.decode(bytes, this._label, stream, this._fatal, this._ignoreBOM);
44+
if (error !== undefined) {
45+
throw new TypeError(error);
46+
} else {
47+
return result;
48+
}
49+
}
50+
}
51+
52+
export class TextEncoder {
53+
constructor() {
54+
}
55+
56+
get encoding() {
57+
return 'utf-8';
58+
}
59+
60+
encode(string) {
61+
return encodingNative.encode(string);
62+
}
63+
64+
encodeInto(string, uint8Array) {
65+
return encodingNative.encode_into(string);
66+
}
67+
}
68+
69+
export class TextDecoderStream extends streams.TransformStream {
70+
constructor(label, options) {
71+
const safeLabel = label || 'utf-8';
72+
const fatal = !!(options && options.fatal);
73+
if (!encodingNative.supports_encoding(safeLabel)) {
74+
throw new RangeError(safeLabel + ' is not supported');
75+
}
76+
77+
let decoder;
78+
super({
79+
start(ctl) {
80+
decoder = new TextDecoder(safeLabel, options);
81+
},
82+
transform(chunk, ctl) {
83+
if (fatal) {
84+
try {
85+
ctl.enqueue(decoder.decode(chunk));
86+
} catch (e) {
87+
ctl.error(e);
88+
}
89+
} else {
90+
ctl.enqueue(decoder.decode(chunk));
91+
}
92+
},
93+
flush() {
94+
decoder = null;
95+
},
96+
});
97+
98+
this._label = safeLabel;
99+
this._fatal = fatal;
100+
this._ignoreBOM = !!(options && options.ignoreBOM);
101+
102+
}
103+
104+
get encoding() {
105+
return this._label;
106+
}
107+
108+
get fatal() {
109+
return this._fatal;
110+
}
111+
112+
get ignoreBOM() {
113+
return this._ignoreBOM;
114+
}
115+
}
116+
117+
export class TextEncoderStream extends streams.TransformStream {
118+
constructor(label, options) {
119+
let encoder;
120+
super({
121+
start(ctl) {
122+
encoder = new TextEncoder();
123+
},
124+
transform(chunk, ctl) {
125+
ctl.enqueue(encoder.encode(chunk));
126+
},
127+
flush() {
128+
encoder = null;
129+
},
130+
});
131+
}
132+
133+
get encoding() {
134+
return 'utf-8';
135+
}
136+
}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
use encoding_rs::{Encoding, UTF_8, UTF_16BE, UTF_16LE};
2+
use rquickjs::JsLifetime;
3+
use rquickjs::class::Trace;
4+
use std::ptr;
5+
use std::ptr::NonNull;
6+
7+
// Native functions for the encoding implementation
8+
#[rquickjs::module(rename = "camelCase")]
9+
pub mod native_module {
10+
use encoding_rs::Encoding;
11+
use rquickjs::prelude::*;
12+
use rquickjs::{Ctx, TypedArray};
13+
14+
#[rquickjs::function]
15+
pub fn supports_encoding(encoding: String) -> bool {
16+
Encoding::for_label(encoding.as_bytes()).is_some()
17+
}
18+
19+
#[rquickjs::function]
20+
pub fn decode(
21+
bytes: TypedArray<'_, u8>,
22+
encoding: String,
23+
stream: bool,
24+
fatal: bool,
25+
ignore_bom: bool,
26+
) -> List<(Option<String>, Option<String>)> {
27+
let bytes = bytes
28+
.as_bytes()
29+
.expect("the UInt8Array passed to decode is detached");
30+
match super::decode_impl(bytes, encoding, stream, fatal, ignore_bom) {
31+
Ok(result) => List((Some(result), None)),
32+
Err(error) => List((None, Some(error))),
33+
}
34+
}
35+
36+
#[rquickjs::function]
37+
pub fn encode(string: String, ctx: Ctx<'_>) -> TypedArray<'_, u8> {
38+
TypedArray::new_copy(ctx, super::encode_impl(&string))
39+
.expect("failed to create UInt8Array from string")
40+
}
41+
42+
#[rquickjs::function]
43+
pub fn encode_into(string: String, target: TypedArray<'_, u8>) -> super::EncodeIntoResult {
44+
let raw = target
45+
.as_raw()
46+
.expect("the UInt8Array passed to encodeInto is detached");
47+
super::encode_into_impl(&string, raw.len, raw.ptr)
48+
}
49+
}
50+
51+
#[rquickjs::class]
52+
#[derive(Trace, JsLifetime)]
53+
pub struct EncodeIntoResult {
54+
pub read: usize,
55+
pub written: usize,
56+
}
57+
58+
fn encode_impl(string: &str) -> &[u8] {
59+
string.as_bytes()
60+
}
61+
62+
fn encode_into_impl(string: &str, target_len: usize, target: NonNull<u8>) -> EncodeIntoResult {
63+
let mut bytes_to_copy = 0;
64+
let mut chars_copied = 0;
65+
for (idx, _) in string.char_indices() {
66+
if idx <= target_len {
67+
bytes_to_copy = idx;
68+
chars_copied += 1;
69+
} else {
70+
break;
71+
}
72+
}
73+
unsafe { ptr::copy_nonoverlapping(string.as_ptr(), target.as_ptr(), bytes_to_copy) }
74+
75+
EncodeIntoResult {
76+
read: chars_copied,
77+
written: bytes_to_copy,
78+
}
79+
}
80+
81+
fn decode_impl(
82+
bytes: &[u8],
83+
encoding: String,
84+
_stream: bool,
85+
fatal: bool,
86+
ignore_bom: bool,
87+
) -> Result<String, String> {
88+
let encoding = Encoding::for_label(encoding.as_bytes())
89+
.ok_or_else(|| format!("Unsupported encoding: {encoding}"))?;
90+
91+
// TODO: we are not implementing streaming yet. to do so, TextDecoder should keep a native
92+
// decoding state with a String and `new_decoder` variants should be used with `decoder.decode_to_string` variants.
93+
94+
match (ignore_bom, fatal) {
95+
(false, false) => {
96+
let (result, _replaced) = encoding.decode_with_bom_removal(bytes);
97+
Ok(result.to_string())
98+
}
99+
(false, true) => {
100+
let without_bom = if encoding == UTF_8 && bytes.starts_with(b"\xEF\xBB\xBF") {
101+
&bytes[3..]
102+
} else if (encoding == UTF_16LE && bytes.starts_with(b"\xFF\xFE"))
103+
|| (encoding == UTF_16BE && bytes.starts_with(b"\xFE\xFF"))
104+
{
105+
&bytes[2..]
106+
} else {
107+
bytes
108+
};
109+
let result = encoding
110+
.decode_without_bom_handling_and_without_replacement(without_bom)
111+
.ok_or_else(|| "Malformed input".to_string())?;
112+
Ok(result.to_string())
113+
}
114+
(true, false) => {
115+
let (result, _replaced) = encoding.decode_without_bom_handling(bytes);
116+
Ok(result.to_string())
117+
}
118+
(true, true) => {
119+
let result = encoding
120+
.decode_without_bom_handling_and_without_replacement(bytes)
121+
.ok_or_else(|| "Malformed input".to_string())?;
122+
Ok(result.to_string())
123+
}
124+
}
125+
}
126+
127+
// JS functions for the Encoding API implementation
128+
pub const ENCODING_JS: &str = include_str!("encoding.js");
129+
130+
// JS code wiring the encoding module into the global context
131+
pub const WIRE_JS: &str = r#"
132+
import * as __wasm_rquickjs_encoding from '__wasm_rquickjs_builtin/encoding';
133+
globalThis.TextDecoder = __wasm_rquickjs_encoding.TextDecoder;
134+
globalThis.TextEncoder = __wasm_rquickjs_encoding.TextEncoder;
135+
globalThis.TextDecoderStream = __wasm_rquickjs_encoding.TextDecoderStream;
136+
globalThis.TextEncoderStream = __wasm_rquickjs_encoding.TextEncoderStream;
137+
"#;

crates/wasm-rquickjs/skeleton/src/builtin/http.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ pub mod native_module {
77

88
use futures::channel::mpsc::{UnboundedReceiver, UnboundedSender};
99
use futures::SinkExt;
10-
use futures_concurrency::future::Race;
1110
use futures_concurrency::stream::IntoStream;
1211
use reqwest::header::{HeaderName, HeaderValue};
1312
use reqwest::{

crates/wasm-rquickjs/skeleton/src/builtin/mod.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::fmt::Write;
22

33
mod console;
4+
mod encoding;
45

56
#[cfg(feature = "http")]
67
mod http;
@@ -27,6 +28,8 @@ pub fn add_module_resolvers(
2728
.with_module("__wasm_rquickjs_builtin/http")
2829
.with_module("__wasm_rquickjs_builtin/streams_native")
2930
.with_module("__wasm_rquickjs_builtin/streams")
31+
.with_module("__wasm_rquickjs_builtin/encoding_native")
32+
.with_module("__wasm_rquickjs_builtin/encoding")
3033
}
3134

3235
pub fn module_loader() -> (
@@ -50,12 +53,17 @@ pub fn module_loader() -> (
5053
.with_module(
5154
"__wasm_rquickjs_builtin/streams_native",
5255
streams::js_native_module,
56+
)
57+
.with_module(
58+
"__wasm_rquickjs_builtin/encoding_native",
59+
encoding::js_native_module,
5360
),
5461
rquickjs::loader::BuiltinLoader::default()
5562
.with_module("__wasm_rquickjs_builtin/console", console::CONSOLE_JS)
5663
.with_module("__wasm_rquickjs_builtin/timeout", timeout::TIMEOUT_JS)
5764
.with_module("__wasm_rquickjs_builtin/http", http::HTTP_JS)
58-
.with_module("__wasm_rquickjs_builtin/streams", streams::STREAMS_JS),
65+
.with_module("__wasm_rquickjs_builtin/streams", streams::STREAMS_JS)
66+
.with_module("__wasm_rquickjs_builtin/encoding", encoding::ENCODING_JS),
5967
)
6068
}
6169

@@ -65,5 +73,6 @@ pub fn wire_builtins() -> String {
6573
writeln!(result, "{}", timeout::WIRE_JS).unwrap();
6674
writeln!(result, "{}", http::WIRE_JS).unwrap();
6775
writeln!(result, "{}", streams::WIRE_JS).unwrap();
76+
writeln!(result, "{}", encoding::WIRE_JS).unwrap();
6877
result
6978
}

0 commit comments

Comments
 (0)