Skip to content

Commit 6e93f85

Browse files
committed
Add LineReader for reading lines with terminators
1 parent a08d760 commit 6e93f85

File tree

2 files changed

+277
-0
lines changed

2 files changed

+277
-0
lines changed
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
package org.commonmark.internal.util;
2+
3+
import java.io.Closeable;
4+
import java.io.IOException;
5+
import java.io.Reader;
6+
7+
/**
8+
* Reads lines from a reader like {@link java.io.BufferedReader} but also returns the line terminators.
9+
* <p>
10+
* Line terminators can be either a line feed {@code "\n"}, carriage return {@code "\r"}, or a carriage return followed
11+
* by a line feed {@code "\r\n"}. Call {@link #getLineTerminator()} after {@link #readLine()} to obtain the
12+
* corresponding line terminator. If a stream has a line at the end without a terminator, {@link #getLineTerminator()}
13+
* returns {@code null}.
14+
*/
15+
public class LineReader implements Closeable {
16+
17+
// Same as java.io.BufferedReader
18+
static final int CHAR_BUFFER_SIZE = 8192;
19+
static final int EXPECTED_LINE_LENGTH = 80;
20+
21+
private Reader reader;
22+
private char[] cbuf;
23+
24+
private int position = 0;
25+
private int limit = 0;
26+
27+
private String lineTerminator = null;
28+
29+
public LineReader(Reader reader) {
30+
this.reader = reader;
31+
this.cbuf = new char[CHAR_BUFFER_SIZE];
32+
}
33+
34+
/**
35+
* Read a line of text.
36+
*
37+
* @return the line, or {@code null} when the end of the stream has been reached and no more lines can be read
38+
*/
39+
public String readLine() throws IOException {
40+
StringBuilder sb = null;
41+
boolean cr = false;
42+
43+
while (true) {
44+
if (position >= limit) {
45+
fill();
46+
}
47+
48+
if (cr) {
49+
// We saw a CR before, check if we have CR LF or just CR.
50+
if (position < limit && cbuf[position] == '\n') {
51+
position++;
52+
return line(sb.toString(), "\r\n");
53+
} else {
54+
return line(sb.toString(), "\r");
55+
}
56+
}
57+
58+
if (position >= limit) {
59+
// End of stream, return either the last line without terminator or null for end.
60+
return line(sb != null ? sb.toString() : null, null);
61+
}
62+
63+
int start = position;
64+
int i = position;
65+
for (; i < limit; i++) {
66+
char c = cbuf[i];
67+
if (c == '\n') {
68+
position = i + 1;
69+
return line(finish(sb, start, i), "\n");
70+
} else if (c == '\r') {
71+
if (i + 1 < limit) {
72+
// We know what the next character is, so we can check now whether we have
73+
// a CR LF or just a CR and return.
74+
if (cbuf[i + 1] == '\n') {
75+
position = i + 2;
76+
return line(finish(sb, start, i), "\r\n");
77+
} else {
78+
position = i + 1;
79+
return line(finish(sb, start, i), "\r");
80+
}
81+
} else {
82+
// We don't know what the next character is yet, check on next iteration.
83+
cr = true;
84+
position = i + 1;
85+
break;
86+
}
87+
}
88+
}
89+
90+
if (position < i) {
91+
position = i;
92+
}
93+
94+
// Haven't found a finished line yet, copy the data from the buffer so that we can fill
95+
// the buffer again.
96+
if (sb == null) {
97+
sb = new StringBuilder(EXPECTED_LINE_LENGTH);
98+
}
99+
sb.append(cbuf, start, i - start);
100+
}
101+
}
102+
103+
/**
104+
* Return the line terminator of the last read line from {@link #readLine()}.
105+
*
106+
* @return {@code "\n"}, {@code "\r"}, {@code "\r\n"}, or {@code null}
107+
*/
108+
public String getLineTerminator() {
109+
return lineTerminator;
110+
}
111+
112+
@Override
113+
public void close() throws IOException {
114+
if (reader == null) {
115+
return;
116+
}
117+
try {
118+
reader.close();
119+
} finally {
120+
reader = null;
121+
cbuf = null;
122+
}
123+
}
124+
125+
private void fill() throws IOException {
126+
int read;
127+
do {
128+
read = reader.read(cbuf, 0, cbuf.length);
129+
} while (read == 0);
130+
if (read > 0) {
131+
limit = read;
132+
position = 0;
133+
}
134+
}
135+
136+
private String line(String line, String lineTerminator) {
137+
this.lineTerminator = lineTerminator;
138+
return line;
139+
}
140+
141+
private String finish(StringBuilder sb, int start, int end) {
142+
int len = end - start;
143+
if (sb == null) {
144+
return new String(cbuf, start, len);
145+
} else {
146+
return sb.append(cbuf, start, len).toString();
147+
}
148+
}
149+
}
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
package org.commonmark.internal.util;
2+
3+
import org.junit.Test;
4+
5+
import java.io.*;
6+
import java.nio.charset.StandardCharsets;
7+
import java.util.ArrayList;
8+
import java.util.Arrays;
9+
import java.util.Objects;
10+
11+
import static java.util.stream.Collectors.joining;
12+
import static org.commonmark.internal.util.LineReader.CHAR_BUFFER_SIZE;
13+
import static org.junit.Assert.*;
14+
15+
public class LineReaderTest {
16+
17+
@Test
18+
public void testReadLine() throws IOException {
19+
assertLines();
20+
21+
assertLines("", "\n");
22+
assertLines("foo", "\n", "bar", "\n");
23+
assertLines("foo", "\n", "bar", null);
24+
assertLines("", "\n", "", "\n");
25+
assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\n");
26+
assertLines(repeat("a", CHAR_BUFFER_SIZE), "\n");
27+
assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\n");
28+
29+
assertLines("", "\r\n");
30+
assertLines("foo", "\r\n", "bar", "\r\n");
31+
assertLines("foo", "\r\n", "bar", null);
32+
assertLines("", "\r\n", "", "\r\n");
33+
assertLines(repeat("a", CHAR_BUFFER_SIZE - 2), "\r\n");
34+
assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\r\n");
35+
assertLines(repeat("a", CHAR_BUFFER_SIZE), "\r\n");
36+
assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\r\n");
37+
38+
assertLines("", "\r");
39+
assertLines("foo", "\r", "bar", "\r");
40+
assertLines("foo", "\r", "bar", null);
41+
assertLines("", "\r", "", "\r");
42+
assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\r");
43+
assertLines(repeat("a", CHAR_BUFFER_SIZE), "\r");
44+
assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\r");
45+
46+
assertLines("", "\n", "", "\r", "", "\r\n", "", "\n");
47+
assertLines("what", "\r", "are", "\r", "", "\r", "you", "\r\n", "", "\r\n", "even", "\n", "doing", null);
48+
}
49+
50+
@Test
51+
public void testClose() throws IOException {
52+
var reader = new InputStreamReader(new ByteArrayInputStream("test".getBytes(StandardCharsets.UTF_8)));
53+
var lineReader = new LineReader(reader);
54+
lineReader.close();
55+
lineReader.close();
56+
try {
57+
reader.read();
58+
fail("Expected read to throw after closing reader");
59+
} catch (IOException e) {
60+
// Expected
61+
}
62+
}
63+
64+
private void assertLines(String... s) throws IOException {
65+
assertTrue("Expected parts needs to be even (pairs of content and terminator)", s.length % 2 == 0);
66+
var input = Arrays.stream(s).filter(Objects::nonNull).collect(joining(""));
67+
68+
assertLines(new StringReader(input), s);
69+
assertLines(new SlowStringReader(input), s);
70+
}
71+
72+
private static void assertLines(Reader reader, String... expectedParts) throws IOException {
73+
try (var lineReader = new LineReader(reader)) {
74+
var lines = new ArrayList<>();
75+
String line;
76+
while ((line = lineReader.readLine()) != null) {
77+
lines.add(line);
78+
lines.add(lineReader.getLineTerminator());
79+
}
80+
assertNull(lineReader.getLineTerminator());
81+
assertEquals(Arrays.asList(expectedParts), lines);
82+
}
83+
}
84+
85+
private static String repeat(String s, int count) {
86+
StringBuilder sb = new StringBuilder(s.length() * count);
87+
for (int i = 0; i < count; i++) {
88+
sb.append(s);
89+
}
90+
return sb.toString();
91+
}
92+
93+
/**
94+
* Reader that only reads 0 or 1 chars at a time to test the corner cases.
95+
*/
96+
private static class SlowStringReader extends Reader {
97+
98+
private final String s;
99+
private int position = 0;
100+
private boolean empty = false;
101+
102+
private SlowStringReader(String s) {
103+
this.s = s;
104+
}
105+
106+
@Override
107+
public int read(char[] cbuf, int off, int len) throws IOException {
108+
Objects.checkFromIndexSize(off, len, cbuf.length);
109+
if (len == 0) {
110+
return 0;
111+
}
112+
empty = !empty;
113+
if (empty) {
114+
// Return 0 every other time to test handling of 0.
115+
return 0;
116+
}
117+
if (position >= s.length()) {
118+
return -1;
119+
}
120+
cbuf[off] = s.charAt(position++);
121+
return 1;
122+
}
123+
124+
@Override
125+
public void close() throws IOException {
126+
}
127+
}
128+
}

0 commit comments

Comments
 (0)