|
1 | 1 | package cc.unitmesh.devins.document.docql |
2 | 2 |
|
3 | 3 | import cc.unitmesh.devins.document.* |
| 4 | +import cc.unitmesh.yaml.YamlUtils |
4 | 5 |
|
5 | 6 | /** |
6 | 7 | * Code block extracted from document |
@@ -76,6 +77,28 @@ class MarkdownDocQLExecutor( |
76 | 77 | return DocQLResult.Empty |
77 | 78 | } |
78 | 79 |
|
| 80 | + /** |
| 81 | + * Execute frontmatter query: $.frontmatter |
| 82 | + * Extracts and parses YAML frontmatter from markdown documents |
| 83 | + */ |
| 84 | + override suspend fun executeFrontmatterQuery(nodes: List<DocQLNode>): DocQLResult { |
| 85 | + if (documentFile == null || parserService == null) { |
| 86 | + return DocQLResult.Error("No document loaded") |
| 87 | + } |
| 88 | + |
| 89 | + val content = parserService.getDocumentContent() |
| 90 | + if (content.isNullOrEmpty()) { |
| 91 | + return DocQLResult.Empty |
| 92 | + } |
| 93 | + |
| 94 | + val frontmatter = extractFrontmatter(content) |
| 95 | + return if (frontmatter != null) { |
| 96 | + DocQLResult.Frontmatter(frontmatter) |
| 97 | + } else { |
| 98 | + DocQLResult.Empty |
| 99 | + } |
| 100 | + } |
| 101 | + |
79 | 102 | /** |
80 | 103 | * Execute all chunks query: $.content.chunks() or $.content.all() |
81 | 104 | */ |
@@ -371,9 +394,209 @@ class MarkdownDocQLExecutor( |
371 | 394 |
|
372 | 395 | /** |
373 | 396 | * Execute table query: $.content.table[*] |
| 397 | + * Extracts tables from markdown content. |
| 398 | + * |
| 399 | + * Supports: |
| 400 | + * - $.content.table[*] - All tables |
| 401 | + * - $.content.table[0] - First table |
| 402 | + * - $.content.table[?(@.headers~="Name")] - Filter by header content |
374 | 403 | */ |
375 | 404 | private fun executeTableQuery(nodes: List<DocQLNode>): DocQLResult { |
376 | | - // TODO: Implement table extraction |
377 | | - return DocQLResult.Tables(emptyMap()) |
| 405 | + if (documentFile == null || parserService == null) { |
| 406 | + return DocQLResult.Error("No document loaded") |
| 407 | + } |
| 408 | + |
| 409 | + val content = parserService.getDocumentContent() |
| 410 | + if (content.isNullOrEmpty()) { |
| 411 | + return DocQLResult.Empty |
| 412 | + } |
| 413 | + |
| 414 | + // Extract tables from content |
| 415 | + var tables = extractTables(content) |
| 416 | + |
| 417 | + // Apply filters from nodes |
| 418 | + for (node in nodes) { |
| 419 | + when (node) { |
| 420 | + is DocQLNode.ArrayAccess.All -> { |
| 421 | + // Return all tables - no filtering needed |
| 422 | + } |
| 423 | + |
| 424 | + is DocQLNode.ArrayAccess.Index -> { |
| 425 | + tables = if (node.index < tables.size) { |
| 426 | + listOf(tables[node.index]) |
| 427 | + } else { |
| 428 | + emptyList() |
| 429 | + } |
| 430 | + } |
| 431 | + |
| 432 | + is DocQLNode.ArrayAccess.Filter -> { |
| 433 | + tables = filterTables(tables, node.condition) |
| 434 | + } |
| 435 | + |
| 436 | + else -> { |
| 437 | + return DocQLResult.Error("Invalid operation for table query") |
| 438 | + } |
| 439 | + } |
| 440 | + } |
| 441 | + |
| 442 | + return if (tables.isNotEmpty()) { |
| 443 | + DocQLResult.Tables(mapOf(documentFile.path to tables)) |
| 444 | + } else { |
| 445 | + DocQLResult.Empty |
| 446 | + } |
| 447 | + } |
| 448 | + |
| 449 | + /** |
| 450 | + * Extract tables from markdown content. |
| 451 | + * Parses markdown pipe tables: | Header 1 | Header 2 | |
| 452 | + */ |
| 453 | + private fun extractTables(content: String): List<TableBlock> { |
| 454 | + val tables = mutableListOf<TableBlock>() |
| 455 | + val lines = content.lines() |
| 456 | + var i = 0 |
| 457 | + var lineNumber = 1 |
| 458 | + |
| 459 | + while (i < lines.size) { |
| 460 | + val line = lines[i] |
| 461 | + // Detect table start: line with pipes |
| 462 | + if (line.trim().startsWith("|") && line.trim().endsWith("|")) { |
| 463 | + val startLine = lineNumber |
| 464 | + val tableLines = mutableListOf<String>() |
| 465 | + |
| 466 | + // Collect table lines |
| 467 | + while (i < lines.size && lines[i].trim().let { |
| 468 | + it.startsWith("|") && it.endsWith("|") |
| 469 | + }) { |
| 470 | + tableLines.add(lines[i]) |
| 471 | + i++ |
| 472 | + lineNumber++ |
| 473 | + } |
| 474 | + |
| 475 | + // Parse table (need at least header + separator + 1 row) |
| 476 | + if (tableLines.size >= 2) { |
| 477 | + val headers = parseTableRow(tableLines[0]) |
| 478 | + |
| 479 | + // Skip separator line (e.g., |---|---|) |
| 480 | + val dataRows = if (tableLines.size > 2 && tableLines[1].trim().matches(Regex("\\|[:\\s-|]+\\|"))) { |
| 481 | + tableLines.drop(2) |
| 482 | + } else { |
| 483 | + tableLines.drop(1) |
| 484 | + } |
| 485 | + |
| 486 | + val rows = dataRows.map { parseTableRow(it) } |
| 487 | + |
| 488 | + tables.add( |
| 489 | + TableBlock( |
| 490 | + headers = headers, |
| 491 | + rows = rows, |
| 492 | + location = Location( |
| 493 | + anchor = "#table-$startLine", |
| 494 | + line = startLine |
| 495 | + ) |
| 496 | + ) |
| 497 | + ) |
| 498 | + } |
| 499 | + } else { |
| 500 | + i++ |
| 501 | + lineNumber++ |
| 502 | + } |
| 503 | + } |
| 504 | + |
| 505 | + return tables |
| 506 | + } |
| 507 | + |
| 508 | + /** |
| 509 | + * Parse a table row into cells |
| 510 | + */ |
| 511 | + private fun parseTableRow(line: String): List<String> { |
| 512 | + return line.trim() |
| 513 | + .removePrefix("|") |
| 514 | + .removeSuffix("|") |
| 515 | + .split("|") |
| 516 | + .map { it.trim() } |
| 517 | + } |
| 518 | + |
| 519 | + /** |
| 520 | + * Filter tables by condition |
| 521 | + */ |
| 522 | + private fun filterTables(tables: List<TableBlock>, condition: FilterCondition): List<TableBlock> { |
| 523 | + return tables.filter { table -> |
| 524 | + when (condition) { |
| 525 | + is FilterCondition.Equals -> { |
| 526 | + when (condition.property) { |
| 527 | + "rowCount" -> table.rows.size.toString() == condition.value |
| 528 | + "columnCount" -> table.headers.size.toString() == condition.value |
| 529 | + else -> false |
| 530 | + } |
| 531 | + } |
| 532 | + |
| 533 | + is FilterCondition.Contains -> { |
| 534 | + when (condition.property) { |
| 535 | + "headers" -> table.headers.any { it.contains(condition.value, ignoreCase = true) } |
| 536 | + else -> false |
| 537 | + } |
| 538 | + } |
| 539 | + |
| 540 | + is FilterCondition.GreaterThan -> { |
| 541 | + when (condition.property) { |
| 542 | + "rowCount" -> table.rows.size > condition.value |
| 543 | + "columnCount" -> table.headers.size > condition.value |
| 544 | + else -> false |
| 545 | + } |
| 546 | + } |
| 547 | + |
| 548 | + is FilterCondition.GreaterThanOrEquals -> { |
| 549 | + when (condition.property) { |
| 550 | + "rowCount" -> table.rows.size >= condition.value |
| 551 | + "columnCount" -> table.headers.size >= condition.value |
| 552 | + else -> false |
| 553 | + } |
| 554 | + } |
| 555 | + |
| 556 | + is FilterCondition.LessThan -> { |
| 557 | + when (condition.property) { |
| 558 | + "rowCount" -> table.rows.size < condition.value |
| 559 | + "columnCount" -> table.headers.size < condition.value |
| 560 | + else -> false |
| 561 | + } |
| 562 | + } |
| 563 | + |
| 564 | + is FilterCondition.LessThanOrEquals -> { |
| 565 | + when (condition.property) { |
| 566 | + "rowCount" -> table.rows.size <= condition.value |
| 567 | + "columnCount" -> table.headers.size <= condition.value |
| 568 | + else -> false |
| 569 | + } |
| 570 | + } |
| 571 | + |
| 572 | + else -> false |
| 573 | + } |
| 574 | + } |
| 575 | + } |
| 576 | + |
| 577 | + /** |
| 578 | + * Extract frontmatter from markdown content. |
| 579 | + * Frontmatter is YAML content between --- delimiters at the start of the file. |
| 580 | + * |
| 581 | + * Example: |
| 582 | + * --- |
| 583 | + * title: My Document |
| 584 | + * author: John Doe |
| 585 | + * tags: [markdown, documentation] |
| 586 | + * --- |
| 587 | + */ |
| 588 | + private fun extractFrontmatter(content: String): Map<String, Any>? { |
| 589 | + // Match frontmatter at the start of the file |
| 590 | + val frontmatterRegex = Regex("^---\\s*\\n([\\s\\S]*?)\\n---\\s*\\n", RegexOption.MULTILINE) |
| 591 | + val match = frontmatterRegex.find(content) ?: return null |
| 592 | + |
| 593 | + val yamlContent = match.groupValues[1] |
| 594 | + return try { |
| 595 | + YamlUtils.load(yamlContent) |
| 596 | + } catch (e: Exception) { |
| 597 | + // Invalid YAML, return null |
| 598 | + null |
| 599 | + } |
378 | 600 | } |
379 | 601 | } |
| 602 | + |
0 commit comments