Skip to content

Commit a6b3daa

Browse files
committed
Also include "input index" in SourceSpan
The existing line/column indexes in `SourceSpan` are useful for some cases, e.g. editors that are line based. But for other cases, it's useful to be able to get the index within the original input string. An example: If the input string is "foo\n\nbar", the "bar" paragraph has the following `SourceSpan`: line 2 (third line), column 0, length 3. With this change, now it also includes the input index: 5 ("b" is the character at index 5 in the string). That means it's possible to use e.g. `substring` instead of having to split the input text into lines first.
1 parent 6e93f85 commit a6b3daa

File tree

17 files changed

+425
-262
lines changed

17 files changed

+425
-262
lines changed

commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/internal/AutolinkPostProcessor.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,7 @@ private static Text createTextNode(String literal, Span span, SourceSpan sourceS
6161
String text = literal.substring(beginIndex, endIndex);
6262
Text textNode = new Text(text);
6363
if (sourceSpan != null) {
64-
int length = endIndex - beginIndex;
65-
textNode.addSourceSpan(SourceSpan.of(sourceSpan.getLineIndex(), beginIndex, length));
64+
textNode.addSourceSpan(sourceSpan.subSpan(beginIndex, endIndex));
6665
}
6766
return textNode;
6867
}

commonmark-ext-autolink/src/test/java/org/commonmark/ext/autolink/AutolinkTest.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,43 +71,43 @@ public void sourceSpans() {
7171

7272
Paragraph paragraph = (Paragraph) document.getFirstChild();
7373
Text abc = (Text) paragraph.getFirstChild();
74-
assertEquals(List.of(SourceSpan.of(0, 0, 3)),
74+
assertEquals(List.of(SourceSpan.of(0, 0, 0, 3)),
7575
abc.getSourceSpans());
7676

7777
assertTrue(abc.getNext() instanceof SoftLineBreak);
7878

7979
Link one = (Link) abc.getNext().getNext();
8080
assertEquals("http://example.com/one", one.getDestination());
81-
assertEquals(List.of(SourceSpan.of(1, 0, 22)),
81+
assertEquals(List.of(SourceSpan.of(1, 0, 4, 22)),
8282
one.getSourceSpans());
8383

8484
assertTrue(one.getNext() instanceof SoftLineBreak);
8585

8686
Text def = (Text) one.getNext().getNext();
8787
assertEquals("def ", def.getLiteral());
88-
assertEquals(List.of(SourceSpan.of(2, 0, 4)),
88+
assertEquals(List.of(SourceSpan.of(2, 0, 27, 4)),
8989
def.getSourceSpans());
9090

9191
Link two = (Link) def.getNext();
9292
assertEquals("http://example.com/two", two.getDestination());
93-
assertEquals(List.of(SourceSpan.of(2, 4, 22)),
93+
assertEquals(List.of(SourceSpan.of(2, 4, 31, 22)),
9494
two.getSourceSpans());
9595

9696
assertTrue(two.getNext() instanceof SoftLineBreak);
9797

9898
Text ghi = (Text) two.getNext().getNext();
9999
assertEquals("ghi ", ghi.getLiteral());
100-
assertEquals(List.of(SourceSpan.of(3, 0, 4)),
100+
assertEquals(List.of(SourceSpan.of(3, 0, 54, 4)),
101101
ghi.getSourceSpans());
102102

103103
Link three = (Link) ghi.getNext();
104104
assertEquals("http://example.com/three", three.getDestination());
105-
assertEquals(List.of(SourceSpan.of(3, 4, 24)),
105+
assertEquals(List.of(SourceSpan.of(3, 4, 58, 24)),
106106
three.getSourceSpans());
107107

108108
Text jkl = (Text) three.getNext();
109109
assertEquals(" jkl", jkl.getLiteral());
110-
assertEquals(List.of(SourceSpan.of(3, 28, 4)),
110+
assertEquals(List.of(SourceSpan.of(3, 28, 82, 4)),
111111
jkl.getSourceSpans());
112112
}
113113

commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnotesTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,10 +287,10 @@ public void testSourcePositions() {
287287

288288
var doc = parser.parse("Test [^foo]\n\n[^foo]: /url\n");
289289
var ref = find(doc, FootnoteReference.class);
290-
assertEquals(ref.getSourceSpans(), List.of(SourceSpan.of(0, 5, 6)));
290+
assertEquals(ref.getSourceSpans(), List.of(SourceSpan.of(0, 5, 5, 6)));
291291

292292
var def = find(doc, FootnoteDefinition.class);
293-
assertEquals(def.getSourceSpans(), List.of(SourceSpan.of(2, 0, 12)));
293+
assertEquals(def.getSourceSpans(), List.of(SourceSpan.of(2, 0, 13, 12)));
294294
}
295295

296296
private static <T> T find(Node parent, Class<T> nodeClass) {

commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ public void sourceSpans() {
117117
Node document = parser.parse("hey ~~there~~\n");
118118
Paragraph block = (Paragraph) document.getFirstChild();
119119
Node strikethrough = block.getLastChild();
120-
assertEquals(List.of(SourceSpan.of(0, 4, 9)),
120+
assertEquals(List.of(SourceSpan.of(0, 4, 4, 9)),
121121
strikethrough.getSourceSpans());
122122
}
123123

commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTest.java

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -791,45 +791,45 @@ public void sourceSpans() {
791791
Node document = parser.parse("Abc|Def\n---|---\n|1|2\n 3|four|\n|||\n");
792792

793793
TableBlock block = (TableBlock) document.getFirstChild();
794-
assertEquals(List.of(SourceSpan.of(0, 0, 7), SourceSpan.of(1, 0, 7),
795-
SourceSpan.of(2, 0, 4), SourceSpan.of(3, 0, 8), SourceSpan.of(4, 0, 3)),
794+
assertEquals(List.of(SourceSpan.of(0, 0, 0, 7), SourceSpan.of(1, 0, 8, 7),
795+
SourceSpan.of(2, 0, 16, 4), SourceSpan.of(3, 0, 21, 8), SourceSpan.of(4, 0, 30, 3)),
796796
block.getSourceSpans());
797797

798798
TableHead head = (TableHead) block.getFirstChild();
799-
assertEquals(List.of(SourceSpan.of(0, 0, 7)), head.getSourceSpans());
799+
assertEquals(List.of(SourceSpan.of(0, 0, 0, 7)), head.getSourceSpans());
800800

801801
TableRow headRow = (TableRow) head.getFirstChild();
802-
assertEquals(List.of(SourceSpan.of(0, 0, 7)), headRow.getSourceSpans());
802+
assertEquals(List.of(SourceSpan.of(0, 0, 0, 7)), headRow.getSourceSpans());
803803
TableCell headRowCell1 = (TableCell) headRow.getFirstChild();
804804
TableCell headRowCell2 = (TableCell) headRow.getLastChild();
805-
assertEquals(List.of(SourceSpan.of(0, 0, 3)), headRowCell1.getSourceSpans());
806-
assertEquals(List.of(SourceSpan.of(0, 0, 3)), headRowCell1.getFirstChild().getSourceSpans());
807-
assertEquals(List.of(SourceSpan.of(0, 4, 3)), headRowCell2.getSourceSpans());
808-
assertEquals(List.of(SourceSpan.of(0, 4, 3)), headRowCell2.getFirstChild().getSourceSpans());
805+
assertEquals(List.of(SourceSpan.of(0, 0, 0, 3)), headRowCell1.getSourceSpans());
806+
assertEquals(List.of(SourceSpan.of(0, 0, 0, 3)), headRowCell1.getFirstChild().getSourceSpans());
807+
assertEquals(List.of(SourceSpan.of(0, 4, 4, 3)), headRowCell2.getSourceSpans());
808+
assertEquals(List.of(SourceSpan.of(0, 4, 4, 3)), headRowCell2.getFirstChild().getSourceSpans());
809809

810810
TableBody body = (TableBody) block.getLastChild();
811-
assertEquals(List.of(SourceSpan.of(2, 0, 4), SourceSpan.of(3, 0, 8), SourceSpan.of(4, 0, 3)), body.getSourceSpans());
811+
assertEquals(List.of(SourceSpan.of(2, 0, 16, 4), SourceSpan.of(3, 0, 21, 8), SourceSpan.of(4, 0, 30, 3)), body.getSourceSpans());
812812

813813
TableRow bodyRow1 = (TableRow) body.getFirstChild();
814-
assertEquals(List.of(SourceSpan.of(2, 0, 4)), bodyRow1.getSourceSpans());
814+
assertEquals(List.of(SourceSpan.of(2, 0, 16, 4)), bodyRow1.getSourceSpans());
815815
TableCell bodyRow1Cell1 = (TableCell) bodyRow1.getFirstChild();
816816
TableCell bodyRow1Cell2 = (TableCell) bodyRow1.getLastChild();
817-
assertEquals(List.of(SourceSpan.of(2, 1, 1)), bodyRow1Cell1.getSourceSpans());
818-
assertEquals(List.of(SourceSpan.of(2, 1, 1)), bodyRow1Cell1.getFirstChild().getSourceSpans());
819-
assertEquals(List.of(SourceSpan.of(2, 3, 1)), bodyRow1Cell2.getSourceSpans());
820-
assertEquals(List.of(SourceSpan.of(2, 3, 1)), bodyRow1Cell2.getFirstChild().getSourceSpans());
817+
assertEquals(List.of(SourceSpan.of(2, 1, 17, 1)), bodyRow1Cell1.getSourceSpans());
818+
assertEquals(List.of(SourceSpan.of(2, 1, 17, 1)), bodyRow1Cell1.getFirstChild().getSourceSpans());
819+
assertEquals(List.of(SourceSpan.of(2, 3, 19, 1)), bodyRow1Cell2.getSourceSpans());
820+
assertEquals(List.of(SourceSpan.of(2, 3, 19, 1)), bodyRow1Cell2.getFirstChild().getSourceSpans());
821821

822822
TableRow bodyRow2 = (TableRow) body.getFirstChild().getNext();
823-
assertEquals(List.of(SourceSpan.of(3, 0, 8)), bodyRow2.getSourceSpans());
823+
assertEquals(List.of(SourceSpan.of(3, 0, 21, 8)), bodyRow2.getSourceSpans());
824824
TableCell bodyRow2Cell1 = (TableCell) bodyRow2.getFirstChild();
825825
TableCell bodyRow2Cell2 = (TableCell) bodyRow2.getLastChild();
826-
assertEquals(List.of(SourceSpan.of(3, 1, 1)), bodyRow2Cell1.getSourceSpans());
827-
assertEquals(List.of(SourceSpan.of(3, 1, 1)), bodyRow2Cell1.getFirstChild().getSourceSpans());
828-
assertEquals(List.of(SourceSpan.of(3, 3, 4)), bodyRow2Cell2.getSourceSpans());
829-
assertEquals(List.of(SourceSpan.of(3, 3, 4)), bodyRow2Cell2.getFirstChild().getSourceSpans());
826+
assertEquals(List.of(SourceSpan.of(3, 1, 22, 1)), bodyRow2Cell1.getSourceSpans());
827+
assertEquals(List.of(SourceSpan.of(3, 1, 22, 1)), bodyRow2Cell1.getFirstChild().getSourceSpans());
828+
assertEquals(List.of(SourceSpan.of(3, 3, 24, 4)), bodyRow2Cell2.getSourceSpans());
829+
assertEquals(List.of(SourceSpan.of(3, 3, 24, 4)), bodyRow2Cell2.getFirstChild().getSourceSpans());
830830

831831
TableRow bodyRow3 = (TableRow) body.getLastChild();
832-
assertEquals(List.of(SourceSpan.of(4, 0, 3)), bodyRow3.getSourceSpans());
832+
assertEquals(List.of(SourceSpan.of(4, 0, 30, 3)), bodyRow3.getSourceSpans());
833833
TableCell bodyRow3Cell1 = (TableCell) bodyRow3.getFirstChild();
834834
TableCell bodyRow3Cell2 = (TableCell) bodyRow3.getLastChild();
835835
assertEquals(List.of(), bodyRow3Cell1.getSourceSpans());

commonmark-ext-image-attributes/src/test/java/org/commonmark/ext/image/attributes/ImageAttributesTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ public void sourceSpans() {
131131
Node document = parser.parse("x{height=3 width=4}\n");
132132
Paragraph block = (Paragraph) document.getFirstChild();
133133
Node text = block.getFirstChild();
134-
assertEquals(List.of(SourceSpan.of(0, 0, 19)),
134+
assertEquals(List.of(SourceSpan.of(0, 0, 0, 19)),
135135
text.getSourceSpans());
136136
}
137137

commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ public void sourceSpans() {
102102
Node document = parser.parse("hey ++there++\n");
103103
Paragraph block = (Paragraph) document.getFirstChild();
104104
Node ins = block.getLastChild();
105-
assertEquals(List.of(SourceSpan.of(0, 4, 9)),
105+
assertEquals(List.of(SourceSpan.of(0, 4, 4, 9)),
106106
ins.getSourceSpans());
107107
}
108108

commonmark/src/main/java/org/commonmark/internal/DocumentParser.java

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.commonmark.internal;
22

3+
import org.commonmark.internal.util.LineReader;
34
import org.commonmark.internal.util.Parsing;
45
import org.commonmark.node.*;
56
import org.commonmark.parser.IncludeSourceSpans;
@@ -127,7 +128,7 @@ public Document parse(String input) {
127128
int lineBreak;
128129
while ((lineBreak = Characters.findLineBreak(input, lineStart)) != -1) {
129130
String line = input.substring(lineStart, lineBreak);
130-
parseLine(line);
131+
parseLine(line, lineStart);
131132
if (lineBreak + 1 < input.length() && input.charAt(lineBreak) == '\r' && input.charAt(lineBreak + 1) == '\n') {
132133
lineStart = lineBreak + 2;
133134
} else {
@@ -136,23 +137,23 @@ public Document parse(String input) {
136137
}
137138
if (!input.isEmpty() && (lineStart == 0 || lineStart < input.length())) {
138139
String line = input.substring(lineStart);
139-
parseLine(line);
140+
parseLine(line, lineStart);
140141
}
141142

142143
return finalizeAndProcess();
143144
}
144145

145146
public Document parse(Reader input) throws IOException {
146-
BufferedReader bufferedReader;
147-
if (input instanceof BufferedReader) {
148-
bufferedReader = (BufferedReader) input;
149-
} else {
150-
bufferedReader = new BufferedReader(input);
151-
}
152-
147+
var lineReader = new LineReader(input);
148+
int inputIndex = 0;
153149
String line;
154-
while ((line = bufferedReader.readLine()) != null) {
155-
parseLine(line);
150+
while ((line = lineReader.readLine()) != null) {
151+
parseLine(line, inputIndex);
152+
inputIndex += line.length();
153+
var eol = lineReader.getLineTerminator();
154+
if (eol != null) {
155+
inputIndex += eol.length();
156+
}
156157
}
157158

158159
return finalizeAndProcess();
@@ -197,8 +198,8 @@ public BlockParser getActiveBlockParser() {
197198
* Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each
198199
* line of input, then finalizing the document.
199200
*/
200-
private void parseLine(String ln) {
201-
setLine(ln);
201+
private void parseLine(String ln, int inputIndex) {
202+
setLine(ln, inputIndex);
202203

203204
// For each containing block, try to parse the associated line start.
204205
// The document will always match, so we can skip the first block parser and start at 1 matches
@@ -322,7 +323,7 @@ private void parseLine(String ln) {
322323
}
323324
}
324325

325-
private void setLine(String ln) {
326+
private void setLine(String ln, int inputIndex) {
326327
lineIndex++;
327328
index = 0;
328329
column = 0;
@@ -331,7 +332,7 @@ private void setLine(String ln) {
331332
String lineContent = prepareLine(ln);
332333
SourceSpan sourceSpan = null;
333334
if (includeSourceSpans != IncludeSourceSpans.NONE) {
334-
sourceSpan = SourceSpan.of(lineIndex, 0, lineContent.length());
335+
sourceSpan = SourceSpan.of(lineIndex, 0, inputIndex, lineContent.length());
335336
}
336337
this.line = SourceLine.of(lineContent, sourceSpan);
337338
}
@@ -430,10 +431,9 @@ private void addLine() {
430431
content = line.getContent().subSequence(index, line.getContent().length());
431432
}
432433
SourceSpan sourceSpan = null;
433-
if (includeSourceSpans == IncludeSourceSpans.BLOCKS_AND_INLINES) {
434-
// Note that if we're in a partially-consumed tab, the length here corresponds to the content but not to the
435-
// actual source length. That sounds like a problem, but I haven't found a test case where it matters (yet).
436-
sourceSpan = SourceSpan.of(lineIndex, index, content.length());
434+
if (includeSourceSpans == IncludeSourceSpans.BLOCKS_AND_INLINES && index < line.getSourceSpan().getLength()) {
435+
// Note that if we're in a partially-consumed tab the length of the source span and the content don't match.
436+
sourceSpan = line.getSourceSpan().subSpan(index);
437437
}
438438
getActiveBlockParser().addLine(SourceLine.of(content, sourceSpan));
439439
addSourceSpans();
@@ -449,7 +449,7 @@ private void addSourceSpans() {
449449
int blockIndex = Math.min(openBlockParser.sourceIndex, index);
450450
int length = line.getContent().length() - blockIndex;
451451
if (length != 0) {
452-
openBlockParser.blockParser.addSourceSpan(SourceSpan.of(lineIndex, blockIndex, length));
452+
openBlockParser.blockParser.addSourceSpan(line.getSourceSpan().subSpan(blockIndex));
453453
}
454454
}
455455
}

commonmark/src/main/java/org/commonmark/node/SourceSpan.java

Lines changed: 65 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,39 +27,97 @@ public class SourceSpan {
2727

2828
private final int lineIndex;
2929
private final int columnIndex;
30+
private final int inputIndex;
3031
private final int length;
3132

33+
public static SourceSpan of(int line, int col, int input, int length) {
34+
return new SourceSpan(line, col, input, length);
35+
}
36+
37+
/**
38+
* @deprecated Use {{@link #of(int, int, int, int)}} instead to also specify input index. Using the deprecated one
39+
* will set {@link #inputIndex} to 0.
40+
*/
41+
@Deprecated
3242
public static SourceSpan of(int lineIndex, int columnIndex, int length) {
33-
return new SourceSpan(lineIndex, columnIndex, length);
43+
return of(lineIndex, columnIndex, 0, length);
3444
}
3545

36-
private SourceSpan(int lineIndex, int columnIndex, int length) {
46+
private SourceSpan(int lineIndex, int columnIndex, int inputIndex, int length) {
47+
if (lineIndex < 0) {
48+
throw new IllegalArgumentException("lineIndex " + lineIndex + " must be >= 0");
49+
}
50+
if (columnIndex < 0) {
51+
throw new IllegalArgumentException("columnIndex " + columnIndex + " must be >= 0");
52+
}
53+
if (inputIndex < 0) {
54+
throw new IllegalArgumentException("inputIndex " + inputIndex + " must be >= 0");
55+
}
56+
if (length < 0) {
57+
throw new IllegalArgumentException("length " + length + " must be >= 0");
58+
}
3759
this.lineIndex = lineIndex;
3860
this.columnIndex = columnIndex;
61+
this.inputIndex = inputIndex;
3962
this.length = length;
4063
}
4164

4265
/**
43-
* @return 0-based index of line in source
66+
* @return 0-based line index, e.g. 0 for first line, 1 for the second line, etc
4467
*/
4568
public int getLineIndex() {
4669
return lineIndex;
4770
}
4871

4972
/**
50-
* @return 0-based index of column (character on line) in source
73+
* @return 0-based index of column (character on line) in source, e.g. 0 for the first character of a line, 1 for
74+
* the second character, etc
5175
*/
5276
public int getColumnIndex() {
5377
return columnIndex;
5478
}
5579

80+
/**
81+
* @return 0-based index in whole input
82+
* @since 0.24.0
83+
*/
84+
public int getInputIndex() {
85+
return inputIndex;
86+
}
87+
5688
/**
5789
* @return length of the span in characters
5890
*/
5991
public int getLength() {
6092
return length;
6193
}
6294

95+
public SourceSpan subSpan(int beginIndex) {
96+
return subSpan(beginIndex, length);
97+
}
98+
99+
public SourceSpan subSpan(int beginIndex, int endIndex) {
100+
if (beginIndex < 0) {
101+
throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " + must be >= 0");
102+
}
103+
if (beginIndex > length) {
104+
throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " must be <= length " + length);
105+
}
106+
if (endIndex < 0) {
107+
throw new IndexOutOfBoundsException("endIndex " + endIndex + " + must be >= 0");
108+
}
109+
if (endIndex > length) {
110+
throw new IndexOutOfBoundsException("endIndex " + endIndex + " must be <= length " + length);
111+
}
112+
if (beginIndex > endIndex) {
113+
throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " must be <= endIndex " + endIndex);
114+
}
115+
if (beginIndex == 0 && endIndex == length) {
116+
return this;
117+
}
118+
return new SourceSpan(lineIndex, columnIndex + beginIndex, inputIndex + beginIndex, endIndex - beginIndex);
119+
}
120+
63121
@Override
64122
public boolean equals(Object o) {
65123
if (this == o) {
@@ -71,19 +129,21 @@ public boolean equals(Object o) {
71129
SourceSpan that = (SourceSpan) o;
72130
return lineIndex == that.lineIndex &&
73131
columnIndex == that.columnIndex &&
132+
inputIndex == that.inputIndex &&
74133
length == that.length;
75134
}
76135

77136
@Override
78137
public int hashCode() {
79-
return Objects.hash(lineIndex, columnIndex, length);
138+
return Objects.hash(lineIndex, columnIndex, inputIndex, length);
80139
}
81140

82141
@Override
83142
public String toString() {
84143
return "SourceSpan{" +
85144
"line=" + lineIndex +
86145
", column=" + columnIndex +
146+
", input=" + inputIndex +
87147
", length=" + length +
88148
"}";
89149
}

0 commit comments

Comments
 (0)