Skip to content

Commit c779ba9

Browse files
committed
feat(docql): add frontmatter and table extraction for markdown
Implements $.frontmatter queries to extract YAML frontmatter and adds support for extracting and filtering markdown tables in DocQL. Updates result formatting to display frontmatter data.
1 parent 1b1baa4 commit c779ba9

File tree

5 files changed

+257
-2
lines changed

5 files changed

+257
-2
lines changed

mpp-core/src/commonMain/kotlin/cc/unitmesh/agent/tool/impl/docql/DocQLResultFormatter.kt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,16 @@ object DocQLResultFormatter {
679679
}
680680
}
681681

682+
is DocQLResult.Frontmatter -> {
683+
buildString {
684+
appendLine("Frontmatter:")
685+
appendLine()
686+
result.data.forEach { (key, value) ->
687+
appendLine(" $key: $value")
688+
}
689+
}
690+
}
691+
682692
is DocQLResult.Empty -> {
683693
"No results found."
684694
}

mpp-core/src/commonMain/kotlin/cc/unitmesh/devins/document/docql/BaseDocQLExecutor.kt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ abstract class BaseDocQLExecutor(
3535
"entities" -> executeEntitiesQuery(query.nodes.drop(2))
3636
"content" -> executeContentQuery(query.nodes.drop(2))
3737
"code" -> executeCodeQuery(query.nodes.drop(2))
38+
"frontmatter" -> executeFrontmatterQuery(query.nodes.drop(2))
3839
"files" -> DocQLResult.Error("$.files queries must be executed via DocumentRegistry.queryDocuments()")
3940
"structure" -> executeStructureQuery(query.nodes.drop(2))
4041
else -> DocQLResult.Error("Unknown context '${contextNode.name}'")
@@ -168,6 +169,11 @@ abstract class BaseDocQLExecutor(
168169
*/
169170
protected abstract suspend fun executeCodeQuery(nodes: List<DocQLNode>): DocQLResult
170171

172+
/**
173+
* Execute frontmatter query - must be implemented by subclasses
174+
*/
175+
protected abstract suspend fun executeFrontmatterQuery(nodes: List<DocQLNode>): DocQLResult
176+
171177
/**
172178
* Flatten TOC tree into a list
173179
*/

mpp-core/src/commonMain/kotlin/cc/unitmesh/devins/document/docql/CodeDocQLExecutor.kt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,15 @@ class CodeDocQLExecutor(
106106
}
107107
}
108108

109+
/**
110+
* Execute frontmatter query - not applicable for code files
111+
* Returns Empty since code files don't typically have YAML frontmatter
112+
*/
113+
override suspend fun executeFrontmatterQuery(nodes: List<DocQLNode>): DocQLResult {
114+
// Code files don't have frontmatter, return empty
115+
return DocQLResult.Empty
116+
}
117+
109118
/**
110119
* Execute $.code.classes[*] or $.code.classes[filter]
111120
*/

mpp-core/src/commonMain/kotlin/cc/unitmesh/devins/document/docql/DocQLResult.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@ sealed class DocQLResult {
7979
val fileCount: Int
8080
) : DocQLResult()
8181

82+
/**
83+
* Frontmatter result (for $.frontmatter queries)
84+
* Contains parsed YAML frontmatter data
85+
*/
86+
data class Frontmatter(val data: Map<String, Any>) : DocQLResult()
87+
8288
/**
8389
* Empty result
8490
*/
@@ -113,6 +119,7 @@ sealed class DocQLResult {
113119
is DocQLResult.Tables -> result.totalCount == 0
114120
is DocQLResult.Files -> result.items.isEmpty()
115121
is DocQLResult.Structure -> result.paths.isEmpty()
122+
is DocQLResult.Frontmatter -> result.data.isEmpty()
116123
is DocQLResult.Error -> true
117124
}
118125
}

mpp-core/src/commonMain/kotlin/cc/unitmesh/devins/document/docql/MarkdownDocQLExecutor.kt

Lines changed: 225 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package cc.unitmesh.devins.document.docql
22

33
import cc.unitmesh.devins.document.*
4+
import cc.unitmesh.yaml.YamlUtils
45

56
/**
67
* Code block extracted from document
@@ -76,6 +77,28 @@ class MarkdownDocQLExecutor(
7677
return DocQLResult.Empty
7778
}
7879

80+
/**
81+
* Execute frontmatter query: $.frontmatter
82+
* Extracts and parses YAML frontmatter from markdown documents
83+
*/
84+
override suspend fun executeFrontmatterQuery(nodes: List<DocQLNode>): DocQLResult {
85+
if (documentFile == null || parserService == null) {
86+
return DocQLResult.Error("No document loaded")
87+
}
88+
89+
val content = parserService.getDocumentContent()
90+
if (content.isNullOrEmpty()) {
91+
return DocQLResult.Empty
92+
}
93+
94+
val frontmatter = extractFrontmatter(content)
95+
return if (frontmatter != null) {
96+
DocQLResult.Frontmatter(frontmatter)
97+
} else {
98+
DocQLResult.Empty
99+
}
100+
}
101+
79102
/**
80103
* Execute all chunks query: $.content.chunks() or $.content.all()
81104
*/
@@ -371,9 +394,209 @@ class MarkdownDocQLExecutor(
371394

372395
/**
373396
* Execute table query: $.content.table[*]
397+
* Extracts tables from markdown content.
398+
*
399+
* Supports:
400+
* - $.content.table[*] - All tables
401+
* - $.content.table[0] - First table
402+
* - $.content.table[?(@.headers~="Name")] - Filter by header content
374403
*/
375404
private fun executeTableQuery(nodes: List<DocQLNode>): DocQLResult {
376-
// TODO: Implement table extraction
377-
return DocQLResult.Tables(emptyMap())
405+
if (documentFile == null || parserService == null) {
406+
return DocQLResult.Error("No document loaded")
407+
}
408+
409+
val content = parserService.getDocumentContent()
410+
if (content.isNullOrEmpty()) {
411+
return DocQLResult.Empty
412+
}
413+
414+
// Extract tables from content
415+
var tables = extractTables(content)
416+
417+
// Apply filters from nodes
418+
for (node in nodes) {
419+
when (node) {
420+
is DocQLNode.ArrayAccess.All -> {
421+
// Return all tables - no filtering needed
422+
}
423+
424+
is DocQLNode.ArrayAccess.Index -> {
425+
tables = if (node.index < tables.size) {
426+
listOf(tables[node.index])
427+
} else {
428+
emptyList()
429+
}
430+
}
431+
432+
is DocQLNode.ArrayAccess.Filter -> {
433+
tables = filterTables(tables, node.condition)
434+
}
435+
436+
else -> {
437+
return DocQLResult.Error("Invalid operation for table query")
438+
}
439+
}
440+
}
441+
442+
return if (tables.isNotEmpty()) {
443+
DocQLResult.Tables(mapOf(documentFile.path to tables))
444+
} else {
445+
DocQLResult.Empty
446+
}
447+
}
448+
449+
/**
450+
* Extract tables from markdown content.
451+
* Parses markdown pipe tables: | Header 1 | Header 2 |
452+
*/
453+
private fun extractTables(content: String): List<TableBlock> {
454+
val tables = mutableListOf<TableBlock>()
455+
val lines = content.lines()
456+
var i = 0
457+
var lineNumber = 1
458+
459+
while (i < lines.size) {
460+
val line = lines[i]
461+
// Detect table start: line with pipes
462+
if (line.trim().startsWith("|") && line.trim().endsWith("|")) {
463+
val startLine = lineNumber
464+
val tableLines = mutableListOf<String>()
465+
466+
// Collect table lines
467+
while (i < lines.size && lines[i].trim().let {
468+
it.startsWith("|") && it.endsWith("|")
469+
}) {
470+
tableLines.add(lines[i])
471+
i++
472+
lineNumber++
473+
}
474+
475+
// Parse table (need at least header + separator + 1 row)
476+
if (tableLines.size >= 2) {
477+
val headers = parseTableRow(tableLines[0])
478+
479+
// Skip separator line (e.g., |---|---|)
480+
val dataRows = if (tableLines.size > 2 && tableLines[1].trim().matches(Regex("\\|[:\\s-|]+\\|"))) {
481+
tableLines.drop(2)
482+
} else {
483+
tableLines.drop(1)
484+
}
485+
486+
val rows = dataRows.map { parseTableRow(it) }
487+
488+
tables.add(
489+
TableBlock(
490+
headers = headers,
491+
rows = rows,
492+
location = Location(
493+
anchor = "#table-$startLine",
494+
line = startLine
495+
)
496+
)
497+
)
498+
}
499+
} else {
500+
i++
501+
lineNumber++
502+
}
503+
}
504+
505+
return tables
506+
}
507+
508+
/**
509+
* Parse a table row into cells
510+
*/
511+
private fun parseTableRow(line: String): List<String> {
512+
return line.trim()
513+
.removePrefix("|")
514+
.removeSuffix("|")
515+
.split("|")
516+
.map { it.trim() }
517+
}
518+
519+
/**
520+
* Filter tables by condition
521+
*/
522+
private fun filterTables(tables: List<TableBlock>, condition: FilterCondition): List<TableBlock> {
523+
return tables.filter { table ->
524+
when (condition) {
525+
is FilterCondition.Equals -> {
526+
when (condition.property) {
527+
"rowCount" -> table.rows.size.toString() == condition.value
528+
"columnCount" -> table.headers.size.toString() == condition.value
529+
else -> false
530+
}
531+
}
532+
533+
is FilterCondition.Contains -> {
534+
when (condition.property) {
535+
"headers" -> table.headers.any { it.contains(condition.value, ignoreCase = true) }
536+
else -> false
537+
}
538+
}
539+
540+
is FilterCondition.GreaterThan -> {
541+
when (condition.property) {
542+
"rowCount" -> table.rows.size > condition.value
543+
"columnCount" -> table.headers.size > condition.value
544+
else -> false
545+
}
546+
}
547+
548+
is FilterCondition.GreaterThanOrEquals -> {
549+
when (condition.property) {
550+
"rowCount" -> table.rows.size >= condition.value
551+
"columnCount" -> table.headers.size >= condition.value
552+
else -> false
553+
}
554+
}
555+
556+
is FilterCondition.LessThan -> {
557+
when (condition.property) {
558+
"rowCount" -> table.rows.size < condition.value
559+
"columnCount" -> table.headers.size < condition.value
560+
else -> false
561+
}
562+
}
563+
564+
is FilterCondition.LessThanOrEquals -> {
565+
when (condition.property) {
566+
"rowCount" -> table.rows.size <= condition.value
567+
"columnCount" -> table.headers.size <= condition.value
568+
else -> false
569+
}
570+
}
571+
572+
else -> false
573+
}
574+
}
575+
}
576+
577+
/**
578+
* Extract frontmatter from markdown content.
579+
* Frontmatter is YAML content between --- delimiters at the start of the file.
580+
*
581+
* Example:
582+
* ---
583+
* title: My Document
584+
* author: John Doe
585+
* tags: [markdown, documentation]
586+
* ---
587+
*/
588+
private fun extractFrontmatter(content: String): Map<String, Any>? {
589+
// Match frontmatter at the start of the file
590+
val frontmatterRegex = Regex("^---\\s*\\n([\\s\\S]*?)\\n---\\s*\\n", RegexOption.MULTILINE)
591+
val match = frontmatterRegex.find(content) ?: return null
592+
593+
val yamlContent = match.groupValues[1]
594+
return try {
595+
YamlUtils.load(yamlContent)
596+
} catch (e: Exception) {
597+
// Invalid YAML, return null
598+
null
599+
}
378600
}
379601
}
602+

0 commit comments

Comments
 (0)