Skip to content

Commit 194af3b

Browse files
bdbchCopilot
andauthored
feat: fix MarkdownManager to handle mixed Markdown and HTML parsing (#7154)
* feat: enhance MarkdownManager to handle mixed Markdown and HTML parsing * fix: correct parsing of mixed inline HTML within Markdown content * Update demos/vite.config.ts Co-authored-by: Copilot <[email protected]> * Update packages/markdown/src/MarkdownManager.ts Co-authored-by: Copilot <[email protected]> * Update packages/markdown/src/MarkdownManager.ts Co-authored-by: Copilot <[email protected]> * Update tests/cypress/integration/markdown/mixed-html.spec.ts Co-authored-by: Copilot <[email protected]> * fix: escape regex special characters in HTML tag matching Sanitize tagName before using it in RegExp constructor to prevent regex compilation errors or unintended matching behavior when tag names contain special characters. --------- Co-authored-by: Copilot <[email protected]>
1 parent 6cdba33 commit 194af3b

File tree

4 files changed

+155
-3
lines changed

4 files changed

+155
-3
lines changed

.changeset/inline-html-fix.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@tiptap/markdown": patch
3+
---
4+
5+
Fix parsing of mixed inline HTML within Markdown content so that inline HTML fragments are parsed correctly.

demos/vite.config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ export default defineConfig({
317317
return () => {
318318
viteDevServer.middlewares.use(async (req, res, next) => {
319319
if (req?.originalUrl?.startsWith('/preview')) {
320+
// @ts-expect-error - req.url is not typed but exists at runtime in Vite middleware
320321
req.url = '/preview/index.html'
321322
}
322323

packages/markdown/src/MarkdownManager.ts

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -375,21 +375,95 @@ export class MarkdownManager {
375375
}
376376
}
377377

378+
/**
379+
* Escape special regex characters in a string.
380+
*/
381+
private escapeRegex(str: string): string {
382+
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
383+
}
384+
378385
/**
379386
* Parse inline tokens (bold, italic, links, etc.) into text nodes with marks.
380387
* This is the complex part that handles mark nesting and boundaries.
381388
*/
382389
private parseInlineTokens(tokens: MarkdownToken[]): JSONContent[] {
383390
const result: JSONContent[] = []
384391

385-
// Process tokens sequentially
386-
tokens.forEach(token => {
392+
// Process tokens sequentially using an index so we can lookahead and
393+
// merge split inline HTML fragments like: text / <em> / inner / </em> / text
394+
for (let i = 0; i < tokens.length; i += 1) {
395+
const token = tokens[i]
396+
387397
if (token.type === 'text') {
388398
// Create text node
389399
result.push({
390400
type: 'text',
391401
text: token.text || '',
392402
})
403+
} else if (token.type === 'html') {
404+
// Handle possible split inline HTML by attempting to detect an
405+
// opening tag and searching forward for a matching closing tag.
406+
const raw = (token.raw ?? token.text ?? '').toString()
407+
408+
// Quick checks for opening vs. closing tag
409+
const isClosing = /^<\/[\s]*[\w-]+/i.test(raw)
410+
const openMatch = raw.match(/^<[\s]*([\w-]+)(\s|>|\/|$)/i)
411+
412+
if (!isClosing && openMatch && !/\/>$/.test(raw)) {
413+
// Try to find the corresponding closing html token for this tag
414+
const tagName = openMatch[1]
415+
const escapedTagName = this.escapeRegex(tagName)
416+
const closingRegex = new RegExp(`^<\\/\\s*${escapedTagName}\\b`, 'i')
417+
let foundIndex = -1
418+
419+
// Collect intermediate raw parts to reconstruct full HTML fragment
420+
const parts: string[] = [raw]
421+
for (let j = i + 1; j < tokens.length; j += 1) {
422+
const t = tokens[j]
423+
const tRaw = (t.raw ?? t.text ?? '').toString()
424+
parts.push(tRaw)
425+
if (t.type === 'html' && closingRegex.test(tRaw)) {
426+
foundIndex = j
427+
break
428+
}
429+
}
430+
431+
if (foundIndex !== -1) {
432+
// Merge opening + inner + closing into one html fragment and parse
433+
const mergedRaw = parts.join('')
434+
const mergedToken = {
435+
type: 'html',
436+
raw: mergedRaw,
437+
text: mergedRaw,
438+
block: false,
439+
} as unknown as MarkdownToken
440+
441+
const parsed = this.parseHTMLToken(mergedToken)
442+
if (parsed) {
443+
const normalized = this.normalizeParseResult(parsed as any)
444+
if (Array.isArray(normalized)) {
445+
result.push(...normalized)
446+
} else if (normalized) {
447+
result.push(normalized)
448+
}
449+
}
450+
451+
// Advance i to the closing token
452+
i = foundIndex
453+
continue
454+
}
455+
}
456+
457+
// Fallback: single html token parse
458+
const parsedSingle = this.parseHTMLToken(token)
459+
if (parsedSingle) {
460+
const normalized = this.normalizeParseResult(parsedSingle as any)
461+
if (Array.isArray(normalized)) {
462+
result.push(...normalized)
463+
} else if (normalized) {
464+
result.push(normalized)
465+
}
466+
}
393467
} else if (token.type) {
394468
// Handle inline marks (bold, italic, etc.)
395469
const markHandler = this.getHandlerForToken(token.type)
@@ -415,7 +489,7 @@ export class MarkdownManager {
415489
result.push(...this.parseInlineTokens(token.tokens))
416490
}
417491
}
418-
})
492+
}
419493

420494
return result
421495
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import { Document } from '@tiptap/extension-document'
2+
import { Heading } from '@tiptap/extension-heading'
3+
import { Italic } from '@tiptap/extension-italic'
4+
import { Paragraph } from '@tiptap/extension-paragraph'
5+
import { Text } from '@tiptap/extension-text'
6+
import { MarkdownManager } from '@tiptap/markdown'
7+
8+
describe('MarkdownManager Mixed Markdown + HTML', () => {
9+
let manager: MarkdownManager
10+
const basicExtensions = [Document, Paragraph, Text, Heading, Italic]
11+
12+
beforeEach(() => {
13+
manager = new MarkdownManager({ extensions: basicExtensions })
14+
})
15+
16+
it('parses heading with inline HTML <em> as italic', () => {
17+
const md = '## hello <em>world</em>'
18+
const doc = manager.parse(md)
19+
20+
expect(doc.type).to.equal('doc')
21+
expect(doc.content).to.be.an('array')
22+
const heading = doc.content[0]
23+
expect(heading.type).to.equal('heading')
24+
// Find the text node that contains 'world'
25+
const textNodes = heading.content.flatMap((n: any) => (n.type === 'text' ? [n] : n.content || []))
26+
const worldNode = textNodes.find((n: any) => n.text && n.text.includes('world'))
27+
// Use a function-call assertion to avoid the "no-unused-expressions" lint error
28+
expect(worldNode).to.not.equal(undefined)
29+
// The italic mark should be present
30+
expect(worldNode!.marks).to.be.an('array')
31+
const hasItalic = worldNode!.marks.some((m: any) => m.type === 'italic')
32+
expect(hasItalic).to.equal(true)
33+
})
34+
35+
it('parses standalone inline HTML <em>world</em> as italic', () => {
36+
const md = '<em>world</em>'
37+
const doc = manager.parse(md)
38+
39+
expect(doc.type).to.equal('doc')
40+
// Inline HTML typically produces a paragraph wrapper
41+
const paragraph = doc.content[0]
42+
expect(paragraph.type).to.equal('paragraph')
43+
const textNode = paragraph.content[0]
44+
expect(textNode.text).to.equal('world')
45+
expect(textNode.marks).to.be.an('array')
46+
const hasItalic = (textNode.marks || []).some((m: any) => m.type === 'italic')
47+
expect(hasItalic).to.equal(true)
48+
})
49+
50+
it('parses markdown italic next to HTML italic correctly', () => {
51+
const md = '*a* <em>b</em> *c*'
52+
const doc = manager.parse(md)
53+
54+
expect(doc.type).to.equal('doc')
55+
const para = doc.content[0]
56+
expect(para.type).to.equal('paragraph')
57+
58+
// Collect texts and their mark states
59+
const runs = para.content.map((n: any) => ({ text: n.text, marks: n.marks || [] }))
60+
// Expect there to be runs containing a, b, c
61+
const texts = runs.map(r => (r.text || '').trim())
62+
expect(texts).to.include('a')
63+
expect(texts).to.include('b')
64+
expect(texts).to.include('c')
65+
;['a', 'b', 'c'].forEach(letter => {
66+
const node = runs.find(r => (r.text || '').trim() === letter)
67+
expect(node).to.not.equal(undefined)
68+
const hasItalic = ((node as any).marks || []).some((m: any) => m.type === 'italic')
69+
expect(hasItalic).to.equal(true)
70+
})
71+
})
72+
})

0 commit comments

Comments
 (0)