Skip to content

Commit 77e978f

Browse files
benglsabrenneruurien
committed
new wasm rewriter w/ orchestrion (#5494)
Co-authored-by: Sam Brenner <[email protected]> Co-authored-by: Ugaitz Urien <[email protected]>
1 parent 25cbd62 commit 77e978f

File tree

33 files changed

+499
-286
lines changed

33 files changed

+499
-286
lines changed

.npmignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
!packages/*/lib/**/*
44
!packages/*/src/**/*
55
!packages/*/index.js
6+
!packages/datadog-instrumentations/orchestrion.yml
67
!scripts/preinstall.js
78
!vendor/**/*
89
!LICENSE

LICENSE-3rdparty.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ Component,Origin,License,Copyright
22
require,@datadog/libdatadog,Apache license 2.0,Copyright 2024 Datadog Inc.
33
require,@datadog/native-appsec,Apache license 2.0,Copyright 2018 Datadog Inc.
44
require,@datadog/native-metrics,Apache license 2.0,Copyright 2018 Datadog Inc.
5-
require,@datadog/wasm-js-rewriter,Apache license 2.0,Copyright 2018 Datadog Inc.
65
require,@datadog/native-iast-taint-tracking,Apache license 2.0,Copyright 2018 Datadog Inc.
76
require,@datadog/pprof,Apache license 2.0,Copyright 2019 Google Inc.
87
require,@datadog/sketches-js,Apache license 2.0,Copyright 2020 Datadog Inc.
8+
require,@datadog/wasm-js-rewriter,Apache license 2.0,Copyright 2018 Datadog Inc.
99
require,@opentelemetry/api,Apache license 2.0,Copyright OpenTelemetry Authors
1010
require,@opentelemetry/core,Apache license 2.0,Copyright OpenTelemetry Authors
1111
require,@isaacs/ttlcache,ISC,Copyright (c) 2022-2023 - Isaac Z. Schlueter and Contributors

benchmark/sirun/runall.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ fi
3838
TOTAL_CPU_CORES=$(nproc 2>/dev/null || echo "24")
3939
export CPU_AFFINITY="${CPU_START_ID:-$TOTAL_CPU_CORES}" # Benchmarking Platform convention
4040

41-
nvm use $MAJOR_VERSION # provided by each benchmark stage
41+
nvm install $MAJOR_VERSION # provided by each benchmark stage
4242
export VERSION=`nvm current`
4343
export ENABLE_AFFINITY=true
4444
echo "using Node.js ${VERSION}"

integration-tests/helpers/index.js

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ const id = require('../../packages/dd-trace/src/id')
1515

1616
const hookFile = 'dd-trace/loader-hook.mjs'
1717

18+
// This is set by the setShouldKill function
19+
let shouldKill
20+
1821
async function runAndCheckOutput (filename, cwd, expectedOut) {
1922
const proc = spawn('node', [filename], { cwd, stdio: 'pipe' })
2023
const pid = proc.pid
@@ -26,9 +29,11 @@ async function runAndCheckOutput (filename, cwd, expectedOut) {
2629
})
2730
proc.stderr.pipe(process.stdout)
2831
proc.on('exit', () => resolve(out.toString('utf8')))
29-
setTimeout(() => {
30-
if (proc.exitCode === null) proc.kill()
31-
}, 1000) // TODO this introduces flakiness. find a better way to end the process.
32+
if (shouldKill) {
33+
setTimeout(() => {
34+
if (proc.exitCode === null) proc.kill()
35+
}, 1000) // TODO this introduces flakiness. find a better way to end the process.
36+
}
3237
})
3338
if (typeof expectedOut === 'function') {
3439
expectedOut(out)
@@ -339,6 +344,15 @@ function sandboxCwd () {
339344
return sandbox.folder
340345
}
341346

347+
function setShouldKill (value) {
348+
before(() => {
349+
shouldKill = value
350+
})
351+
after(() => {
352+
shouldKill = true
353+
})
354+
}
355+
342356
function assertObjectContains (actual, expected) {
343357
for (const [key, val] of Object.entries(expected)) {
344358
if (val !== null && typeof val === 'object') {
@@ -372,5 +386,6 @@ module.exports = {
372386
spawnPluginIntegrationTestProc,
373387
useEnv,
374388
useSandbox,
375-
sandboxCwd
389+
sandboxCwd,
390+
setShouldKill
376391
}

integration-tests/init.spec.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ const {
33
runAndCheckWithTelemetry: testFile,
44
useEnv,
55
useSandbox,
6-
sandboxCwd
6+
sandboxCwd,
7+
setShouldKill
78
} = require('./helpers')
89
const path = require('path')
910
const fs = require('fs')
@@ -159,6 +160,7 @@ function stubTracerIfNeeded () {
159160
}
160161

161162
describe('init.js', () => {
163+
setShouldKill(false)
162164
useSandbox()
163165
stubTracerIfNeeded()
164166

@@ -173,6 +175,7 @@ if (
173175
semver.satisfies(process.versions.node, '>=14.13.1')
174176
) {
175177
describe('initialize.mjs', () => {
178+
setShouldKill(false)
176179
useSandbox()
177180
stubTracerIfNeeded()
178181

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,12 @@
9090
"@datadog/native-metrics": "^3.1.1",
9191
"@datadog/pprof": "5.7.1",
9292
"@datadog/sketches-js": "^2.1.0",
93-
"@datadog/wasm-js-rewriter": "3.1.0",
93+
"@datadog/wasm-js-rewriter": "4.0.0",
9494
"@isaacs/ttlcache": "^1.4.1",
9595
"@opentelemetry/api": ">=1.0.0 <1.9.0",
9696
"@opentelemetry/core": "^1.14.0",
9797
"crypto-randomuuid": "^1.0.0",
98-
"dc-polyfill": "0.1.6",
98+
"dc-polyfill": "0.1.8",
9999
"ignore": "^5.2.4",
100100
"import-in-the-middle": "1.13.1",
101101
"istanbul-lib-coverage": "3.2.0",
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
version: 1
2+
dc_module: dc-polyfill
3+
instrumentations:
4+
- module_name: "@langchain/core"
5+
version_range: ">=0.1.0"
6+
file_path: dist/runnables/base.js
7+
function_query:
8+
name: invoke
9+
type: method
10+
kind: async
11+
class: RunnableSequence
12+
operator: tracePromise
13+
channel_name: "RunnableSequence_invoke"
14+
- module_name: "@langchain/core"
15+
version_range: ">=0.1.0"
16+
file_path: dist/runnables/base.js
17+
function_query:
18+
name: batch
19+
type: method
20+
kind: async
21+
class: RunnableSequence
22+
operator: tracePromise
23+
channel_name: "RunnableSequence_batch"
24+
- module_name: "@langchain/core"
25+
version_range: ">=0.1.0"
26+
file_path: dist/language_models/chat_models.js
27+
function_query:
28+
name: generate
29+
type: method
30+
kind: async
31+
class: BaseChatModel
32+
operator: tracePromise
33+
channel_name: "BaseChatModel_generate"
34+
- module_name: "@langchain/core"
35+
version_range: ">=0.1.0"
36+
file_path: dist/language_models/llms.js
37+
function_query:
38+
name: generate
39+
type: method
40+
kind: async
41+
operator: tracePromise
42+
channel_name: "BaseLLM_generate"
43+
- module_name: "@langchain/core"
44+
version_range: ">=0.1.0"
45+
file_path: dist/embeddings.js
46+
function_query:
47+
name: constructor
48+
type: method
49+
kind: sync
50+
class: Embeddings
51+
operator: traceSync
52+
channel_name: "Embeddings_constructor"
Lines changed: 49 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,24 @@
11
'use strict'
2-
32
const { addHook } = require('./helpers/instrument')
3+
44
const shimmer = require('../../datadog-shimmer')
55

66
const tracingChannel = require('dc-polyfill').tracingChannel
77

8-
const invokeTracingChannel = tracingChannel('apm:langchain:invoke')
9-
10-
function wrapLangChainPromise (fn, type, namespace = []) {
11-
return function () {
12-
if (!invokeTracingChannel.start.hasSubscribers) {
13-
return fn.apply(this, arguments)
14-
}
15-
16-
// Runnable interfaces have an `lc_namespace` property
17-
const ns = this.lc_namespace || namespace
18-
const resource = [...ns, this.constructor.name].join('.')
19-
20-
const ctx = {
21-
args: arguments,
22-
instance: this,
23-
type,
24-
resource
8+
function wrap (obj, name, channelName, namespace) {
9+
const channel = tracingChannel(channelName)
10+
shimmer.wrap(obj, name, function (original) {
11+
return function () {
12+
if (!channel.start.hasSubscribers) {
13+
return original.apply(this, arguments)
14+
}
15+
const ctx = { self: this, arguments }
16+
if (namespace) {
17+
ctx.namespace = namespace
18+
}
19+
return channel.tracePromise(original, ctx, this, ...arguments)
2520
}
26-
27-
return invokeTracingChannel.tracePromise(fn, ctx, this, ...arguments)
28-
}
21+
})
2922
}
3023

3124
// langchain compiles into ESM and CommonJS, with ESM being the default and landing in the `.js` files
@@ -35,9 +28,10 @@ const extensions = ['js', 'cjs']
3528

3629
for (const extension of extensions) {
3730
addHook({ name: '@langchain/core', file: `dist/runnables/base.${extension}`, versions: ['>=0.1'] }, exports => {
38-
const RunnableSequence = exports.RunnableSequence
39-
shimmer.wrap(RunnableSequence.prototype, 'invoke', invoke => wrapLangChainPromise(invoke, 'chain'))
40-
shimmer.wrap(RunnableSequence.prototype, 'batch', batch => wrapLangChainPromise(batch, 'chain'))
31+
if (extension === 'cjs') {
32+
wrap(exports.RunnableSequence.prototype, 'invoke', 'orchestrion:@langchain/core:RunnableSequence_invoke')
33+
wrap(exports.RunnableSequence.prototype, 'batch', 'orchestrion:@langchain/core:RunnableSequence_batch')
34+
}
4135
return exports
4236
})
4337

@@ -46,51 +40,53 @@ for (const extension of extensions) {
4640
file: `dist/language_models/chat_models.${extension}`,
4741
versions: ['>=0.1']
4842
}, exports => {
49-
const BaseChatModel = exports.BaseChatModel
50-
shimmer.wrap(
51-
BaseChatModel.prototype,
52-
'generate',
53-
generate => wrapLangChainPromise(generate, 'chat_model')
54-
)
43+
if (extension === 'cjs') {
44+
wrap(exports.BaseChatModel.prototype, 'generate', 'orchestrion:@langchain/core:BaseChatModel_generate')
45+
}
5546
return exports
5647
})
5748

5849
addHook({ name: '@langchain/core', file: `dist/language_models/llms.${extension}`, versions: ['>=0.1'] }, exports => {
59-
const BaseLLM = exports.BaseLLM
60-
shimmer.wrap(BaseLLM.prototype, 'generate', generate => wrapLangChainPromise(generate, 'llm'))
50+
if (extension === 'cjs') {
51+
wrap(exports.BaseLLM.prototype, 'generate', 'orchestrion:@langchain/core:BaseLLM_generate')
52+
}
6153
return exports
6254
})
6355

6456
addHook({ name: '@langchain/core', file: `dist/embeddings.${extension}`, versions: ['>=0.1'] }, exports => {
65-
// we cannot patch the prototype of the Embeddings class directly
66-
// this is because the "abstract class Embeddings" is transpiled from TypeScript to not include abstract functions
67-
// thus, we patch the exported class directly instead instead.
57+
if (extension === 'cjs') {
58+
shimmer.wrap(exports, 'Embeddings', Embeddings => {
59+
return class extends Embeddings {
60+
constructor (...args) {
61+
super(...args)
62+
63+
const namespace = ['langchain', 'embeddings']
6864

69-
shimmer.wrap(exports, 'Embeddings', Embeddings => {
70-
return class extends Embeddings {
71-
constructor (...args) {
72-
super(...args)
65+
if (this.constructor.name === 'OpenAIEmbeddings') {
66+
namespace.push('openai')
67+
}
7368

69+
wrap(this, 'embedQuery', 'apm:@langchain/core:Embeddings_embedQuery', namespace)
70+
wrap(this, 'embedDocuments', 'apm:@langchain/core:Embeddings_embedDocuments', namespace)
71+
}
72+
}
73+
})
74+
} else {
75+
const channel = tracingChannel('orchestrion:@langchain/core:Embeddings_constructor')
76+
channel.subscribe({
77+
end (ctx) {
78+
const { self } = ctx
7479
const namespace = ['langchain', 'embeddings']
7580

76-
// when originally implemented, we only wrapped OpenAI embeddings
77-
// these embeddings had the resource name of `langchain.embeddings.openai.OpenAIEmbeddings`
78-
// we need to make sure `openai` is appended to the resource name until a new tracer major version
79-
if (this.constructor.name === 'OpenAIEmbeddings') {
81+
if (self.constructor.name === 'OpenAIEmbeddings') {
8082
namespace.push('openai')
8183
}
8284

83-
shimmer.wrap(this, 'embedQuery', embedQuery => wrapLangChainPromise(embedQuery, 'embedding', namespace))
84-
shimmer.wrap(this, 'embedDocuments',
85-
embedDocuments => wrapLangChainPromise(embedDocuments, 'embedding', namespace))
85+
wrap(self, 'embedQuery', 'apm:@langchain/core:Embeddings_embedQuery', namespace)
86+
wrap(self, 'embedDocuments', 'apm:@langchain/core:Embeddings_embedDocuments', namespace)
8687
}
87-
88-
static [Symbol.hasInstance] (instance) {
89-
return instance instanceof Embeddings
90-
}
91-
}
92-
})
93-
88+
})
89+
}
9490
return exports
9591
})
9692
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
const path = require('path')
2+
const fs = require('fs')
3+
4+
// TODO this needs to be inlined to prevent issues in bundling
5+
module.exports = fs.readFileSync(path.join(__dirname, '../../orchestrion.yml'), 'utf8')
Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,27 @@
11
'use strict'
22

3-
const LangChainTracingPlugin = require('./tracing')
4-
const LangChainLLMObsPlugin = require('../../dd-trace/src/llmobs/plugins/langchain')
3+
const langChainTracingPlugins = require('./tracing')
4+
const langChainLLMObsPlugins = require('../../dd-trace/src/llmobs/plugins/langchain')
5+
// const LangChainLLMObsPlugin = require('../../dd-trace/src/llmobs/plugins/langchain')
56
const CompositePlugin = require('../../dd-trace/src/plugins/composite')
67

8+
const plugins = {}
9+
10+
// ordering here is important - the llm observability plugin must come first
11+
// so that we can add annotations associated with the span before it finishes.
12+
// however, because the tracing plugin uses `bindStart` vs the llmobs' `start`,
13+
// the span is guaranteed to be created in the tracing plugin before the llmobs one is called
14+
for (const Plugin of langChainLLMObsPlugins) {
15+
plugins[Plugin.id] = Plugin
16+
}
17+
18+
for (const Plugin of langChainTracingPlugins) {
19+
plugins[Plugin.id] = Plugin
20+
}
21+
722
class LangChainPlugin extends CompositePlugin {
823
static get id () { return 'langchain' }
9-
static get plugins () {
10-
return {
11-
// ordering here is important - the llm observability plugin must come first
12-
// so that we can add annotations associated with the span before it finishes.
13-
// however, because the tracing plugin uses `bindStart` vs the llmobs' `start`,
14-
// the span is guaranteed to be created in the tracing plugin before the llmobs one is called
15-
llmobs: LangChainLLMObsPlugin,
16-
tracing: LangChainTracingPlugin
17-
}
18-
}
24+
static get plugins () { return plugins }
1925
}
2026

2127
module.exports = LangChainPlugin

0 commit comments

Comments
 (0)