Skip to content

Commit a536d90

Browse files
fix nested lists
1 parent 23e6644 commit a536d90

30 files changed

+783
-151
lines changed

commonmark.go

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@ package md
22

33
import (
44
"fmt"
5+
"unicode"
56

67
"regexp"
78
"strconv"
89
"strings"
9-
"unicode"
1010
"unicode/utf8"
1111

1212
"github.com/JohannesKaufmann/html-to-markdown/escape"
@@ -23,7 +23,7 @@ var commonmark = []Rule{
2323

2424
// we have a nested list, were the ul/ol is inside a list item
2525
// -> based on work done by @requilence from @anytypeio
26-
if parent.Is("li") && parent.Children().Last().IsSelection(selec) {
26+
if (parent.Is("li") || parent.Is("ul") || parent.Is("ol")) && parent.Children().Last().IsSelection(selec) {
2727
// add a line break prefix if the parent's text node doesn't have it.
2828
// that makes sure that every list item is on its on line
2929
lastContentTextNode := strings.TrimRight(parent.Nodes[0].FirstChild.Data, " \t")
@@ -49,21 +49,28 @@ var commonmark = []Rule{
4949
return nil
5050
}
5151

52-
parent := selec.Parent()
53-
index := selec.Index()
54-
55-
var prefix string
56-
if parent.Is("ol") {
57-
prefix = strconv.Itoa(index+1) + ". "
58-
} else {
59-
prefix = opt.BulletListMarker + " "
60-
}
6152
// remove leading newlines
6253
content = leadingNewlinesR.ReplaceAllString(content, "")
6354
// replace trailing newlines with just a single one
6455
content = trailingNewlinesR.ReplaceAllString(content, "\n")
65-
// indent
66-
content = indentR.ReplaceAllString(content, "\n ")
56+
// remove leading spaces
57+
content = strings.TrimLeft(content, " ")
58+
59+
prefix := selec.AttrOr(attrListPrefix, "")
60+
61+
// `prefixCount` is not nessesarily the length of the empty string `prefix`
62+
// but how much space is reserved for the prefixes of the siblings.
63+
prefixCount, previousPrefixCounts := countListParents(opt, selec)
64+
65+
// if the prefix is not needed, balance it by adding the usual prefix spaces
66+
if prefix == "" {
67+
prefix = strings.Repeat(" ", prefixCount)
68+
}
69+
// indent the prefix so that the nested links are represented
70+
indent := strings.Repeat(" ", previousPrefixCounts)
71+
prefix = indent + prefix
72+
73+
content = IndentMultiLineListItem(opt, content, prefixCount+previousPrefixCounts)
6774

6875
return String(prefix + content + "\n")
6976
},
@@ -82,6 +89,12 @@ var commonmark = []Rule{
8289
text = multipleSpacesR.ReplaceAllString(text, " ")
8390

8491
text = escape.MarkdownCharacters(text)
92+
93+
// if its inside a list, trim the spaces to not mess up the indentation
94+
if IndexWithText(selec) == 0 && (selec.Parent().Is("li") || selec.Parent().Is("ol") || selec.Parent().Is("ul")) {
95+
text = strings.Trim(text, ` `)
96+
}
97+
8598
return &text
8699
},
87100
},

from.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ func validateOptions(opt Options) error {
9393
return nil
9494
}
9595

96+
var (
97+
attrListPrefix = "data-converter-list-prefix"
98+
)
99+
96100
// NewConverter initializes a new converter and holds all the rules.
97101
// - `domain` is used for links and images to convert relative urls ("/image.png") to absolute urls.
98102
// - CommonMark is the default set of rules. Set enableCommonmark to false if you want
@@ -111,6 +115,13 @@ func NewConverter(domain string, enableCommonmark bool, options *Options) *Conve
111115
s.SetAttr("data-index", strconv.Itoa(i+1))
112116
})
113117
})
118+
conv.before = append(conv.before, func(selec *goquery.Selection) {
119+
selec.Find("li").Each(func(i int, s *goquery.Selection) {
120+
prefix := getListPrefix(options, s)
121+
122+
s.SetAttr(attrListPrefix, prefix)
123+
})
124+
})
114125
conv.after = append(conv.after, func(markdown string) string {
115126
markdown = strings.TrimSpace(markdown)
116127
markdown = multipleNewLinesRegex.ReplaceAllString(markdown, "\n\n")

testdata/TestCommonmark/link/output.inlined.golden

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
Second Text](http://multi.org/)
1616

1717
- [First Text\
18-
\
19-
Second Text](http://list.org/)
18+
\
19+
Second Text](http://list.org/)
2020

2121
[GitHub](https://github.com "GitHub")
2222

testdata/TestCommonmark/link/output.referenced_collapsed.golden

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
Second Text][]
1616

1717
- [First Text\
18-
\
19-
Second Text][]
18+
\
19+
Second Text][]
2020

2121
[GitHub][]
2222

testdata/TestCommonmark/link/output.referenced_full.golden

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
Second Text][7]
1616

1717
- [First Text\
18-
\
19-
Second Text][8]
18+
\
19+
Second Text][8]
2020

2121
[GitHub][9]
2222

testdata/TestCommonmark/link/output.referenced_shortcut.golden

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
Second Text]
1616

1717
- [First Text\
18-
\
19-
Second Text]
18+
\
19+
Second Text]
2020

2121
[GitHub]
2222

testdata/TestCommonmark/link/output.relative.golden

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
Second Text](http://multi.org/)
1616

1717
- [First Text\
18-
\
19-
Second Text](http://list.org/)
18+
\
19+
Second Text](http://list.org/)
2020

2121
[GitHub](https://github.com "GitHub")
2222

testdata/TestCommonmark/list/goldmark.golden

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,41 @@
5555
<p>15</p>
5656
</li>
5757
<li>
58-
<p>First Thing</p>
58+
<p>16</p>
59+
</li>
60+
<li>
61+
<p>17</p>
62+
</li>
63+
<li>
64+
<p>18</p>
65+
</li>
66+
<li>
67+
<p>19</p>
68+
</li>
69+
<li>
70+
<p>20</p>
71+
</li>
72+
<li>
73+
<p><img src="http://example.com/example.png" alt=""></p>
74+
</li>
75+
<li>
76+
<p>22</p>
77+
</li>
78+
</ol>
79+
<ul>
80+
<li>Link:<a href="https://example.com">example</a> works</li>
81+
<li>Link:
82+
<a href="https://example.com">example</a>
83+
works</li>
84+
</ul>
85+
<ol>
86+
<li>First Thing
5987
<ul>
6088
<li>Some Thing</li>
6189
<li>Another Thing</li>
6290
</ul>
6391
</li>
64-
<li>
65-
<p>Second Thing</p>
66-
</li>
92+
<li>Second Thing</li>
6793
</ol>
6894
<ul>
6995
<li>
@@ -92,3 +118,14 @@
92118
<p>- Not List</p>
93119
<p>1. Not List 1. Not List
94120
1. Not List</p>
121+
<ol>
122+
<li>
123+
<p>A paragraph
124+
with two lines.</p>
125+
<pre><code>indented code
126+
</code></pre>
127+
<blockquote>
128+
<p>A block quote.</p>
129+
</blockquote>
130+
</li>
131+
</ol>

testdata/TestCommonmark/list/input.html

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,29 @@
3131
<li>13</li>
3232
<li>14</li>
3333
<li>15</li>
34+
<li></li>
35+
<li> </li>
36+
<li> <p> </p> </li>
37+
<li>16</li>
38+
<li>17</li>
39+
<li>18</li>
40+
<li>19</li>
41+
<li>20</li>
42+
<li><img src="/example.png" /></li>
43+
<li>22</li>
3444
</ol>
3545

3646

47+
<!--list with link inside-->
48+
<ul>
49+
<li>Link: <a href="https://example.com" target="_blank">example</a> works</li>
50+
<li>
51+
Link:
52+
<a href="https://example.com" target="_blank">example</a>
53+
works
54+
</li>
55+
</ul>
56+
3757
<!--ol with a ul inside-->
3858
<ol>
3959
<li>
@@ -78,3 +98,18 @@
7898

7999
<p>1. Not List 1. Not List
80100
1. Not List</p>
101+
102+
103+
<!--with other whitespace aware elements-->
104+
<ol>
105+
<li>
106+
<p>A paragraph
107+
with two lines.</p>
108+
109+
<pre><code>indented code</code></pre>
110+
111+
<blockquote>
112+
<p>A block quote.</p>
113+
</blockquote>
114+
</li>
115+
</ol>

testdata/TestCommonmark/list/output.asterisks.golden

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,38 @@
44
1. First Thing
55
2. Second Thing
66

7-
1. 1
8-
2. 2
9-
3. 3
10-
4. 4
11-
5. 5
12-
6. 6
13-
7. 7
14-
8. 8
15-
9. 9
7+
01. 1
8+
02. 2
9+
03. 3
10+
04. 4
11+
05. 5
12+
06. 6
13+
07. 7
14+
08. 8
15+
09. 9
1616
10. 10
1717
11. 11
1818
12. 12
1919
13. 13
2020
14. 14
2121
15. 15
22+
19. 16
23+
20. 17
24+
21. 18
25+
22. 19
26+
23. 20
27+
24. ![](http://example.com/example.png)
28+
25. 22
29+
30+
* Link:[example](https://example.com) works
31+
* Link:
32+
[example](https://example.com)
33+
works
34+
2235

2336
1. First Thing
24-
* Some Thing
25-
* Another Thing
37+
* Some Thing
38+
* Another Thing
2639
2. Second Thing
2740

2841
* foo
@@ -32,13 +45,25 @@
3245
* Ending with
3346
* A space
3447

35-
* Indent First Thing
48+
* Indent First Thing
3649

37-
Second Thing
50+
Second Thing
3851

3952
* Third Thing
4053

4154
\- Not List
4255

4356
1\. Not List 1. Not List
44-
1\. Not List
57+
1\. Not List
58+
59+
1. A paragraph
60+
with two lines.
61+
62+
63+
```
64+
indented code
65+
```
66+
67+
68+
69+
> A block quote.

0 commit comments

Comments
 (0)