@@ -23,7 +23,6 @@ import (
2323 safe "github.com/google/safehtml"
2424 "github.com/google/safehtml/legacyconversions"
2525 "github.com/google/safehtml/template"
26- "golang.org/x/net/html"
2726 "golang.org/x/pkgsite/internal/log"
2827)
2928
@@ -53,10 +52,7 @@ const (
5352 rfcRx = `RFC\s+(\d{3,5})(,?\s+[Ss]ection\s+(\d+(\.\d+)*))?`
5453)
5554
56- var (
57- matchRx = regexp .MustCompile (urlRx + `|` + rfcRx )
58- badAnchorRx = regexp .MustCompile (`[^a-zA-Z0-9]` )
59- )
55+ var matchRx = regexp .MustCompile (urlRx + `|` + rfcRx )
6056
6157type link struct {
6258 Class string
@@ -65,19 +61,23 @@ type link struct {
6561}
6662
6763type heading struct {
68- ID safe.Identifier
64+ ID safe.Identifier // if empty, the title is not linked
6965 Title safe.HTML
7066}
7167
7268var (
7369 // tocTemplate expects a []heading.
7470 tocTemplate = template .Must (template .New ("toc" ).Parse (`<div role="navigation" aria-label="Table of Contents">
7571 <ul class="Documentation-toc{{if gt (len .) 5}} Documentation-toc-columns{{end}}">
76- {{range . - }}
72+ {{- range .}}
7773 <li class="Documentation-tocItem">
78- <a href="#{{.ID}}">{{.Title}}</a>
74+ {{- if .ID.String -}}
75+ <a href="#{{.ID}}">{{.Title}}</a>
76+ {{- else -}}
77+ {{.Title}}
78+ {{- end -}}
7979 </li>
80- {{end - }}
80+ {{- end }}
8181 </ul>
8282</div>
8383` ))
8888
8989 paraTemplate = template .Must (template .New ("para" ).Parse ("<p>{{.}}\n </p>" ))
9090
91- headingTemplate = template .Must (template .New ("heading" ).Parse (
92- `<h4 id="{{.ID}}">{{.Title}} <a class="Documentation-idLink" href="#{{.ID}}" aria-label="Go to {{.Title}}">¶</a></h4>` ))
91+ // expects a heading
92+ headingTemplate = template .Must (template .New ("heading" ).Parse (`
93+ {{- if .ID.String -}}
94+ <h4 id="{{.ID}}">{{.Title}} <a class="Documentation-idLink" href="#{{.ID}}" aria-label="Go to {{.Title}}">¶</a></h4>
95+ {{- else -}}
96+ <h4>{{.Title}}</h4>
97+ {{- end}}` ))
9398
9499 linkTemplate = template .Must (template .New ("link" ).Parse (
95100 `<a{{with .Class}}class="{{.}}" {{end}} href="{{.Href}}">{{.Text}}</a>` ))
@@ -119,57 +124,14 @@ func (r *Renderer) formatDocHTML(text string, decl ast.Decl, extractLinks bool)
119124 if extractLinks {
120125 r .removeLinks (doc )
121126 }
122-
123- h := r .blocksToHTML (doc .Content , decl , true , extractLinks )
124- if headings := extractHeadings ( h ); len (headings ) > 0 {
125- h = safe .HTMLConcat (ExecuteToHTML (tocTemplate , headings ), h )
127+ hscope := newHeadingScope ( headingIDSuffix ( decl ))
128+ h := r .blocksToHTML (doc .Content , true , hscope )
129+ if len (hscope . headings ) > 0 {
130+ h = safe .HTMLConcat (ExecuteToHTML (tocTemplate , hscope . headings ), h )
126131 }
127-
128132 return h
129133}
130134
131- func extractHeadings (h safe.HTML ) []heading {
132- var headings []heading
133-
134- doc , err := html .Parse (strings .NewReader (h .String ()))
135- if err != nil {
136- return nil
137- }
138-
139- var f func (* html.Node )
140- f = func (n * html.Node ) {
141- for _ , a := range n .Attr {
142- if a .Key == "id" && strings .HasPrefix (a .Val , "hdr-" ) {
143- if tn := firstTextNode (n ); tn != nil {
144- title := strings .TrimSpace (tn .Data )
145- hdrName := strings .TrimPrefix (a .Val , "hdr-" )
146- id := safe .IdentifierFromConstantPrefix ("hdr" , hdrName )
147- headings = append (headings , heading {id , safe .HTMLEscaped (title )})
148- }
149- break
150- }
151- }
152- for c := n .FirstChild ; c != nil ; c = c .NextSibling {
153- f (c )
154- }
155- }
156- f (doc )
157-
158- return headings
159- }
160-
161- func firstTextNode (n * html.Node ) * html.Node {
162- if n .Type == html .TextNode {
163- return n
164- }
165- for c := n .FirstChild ; c != nil ; c = c .NextSibling {
166- if r := firstTextNode (c ); r != nil {
167- return r
168- }
169- }
170- return nil
171- }
172-
173135// removeLinks removes the "Links" section from doc.
174136// Pkgsite has a convention where a "Links" heading in a doc comment provides links
175137// that are rendered in a separate place in the UI.
@@ -263,13 +225,13 @@ func parseLink(line string) *Link {
263225 }
264226}
265227
266- func (r * Renderer ) blocksToHTML (bs []comment.Block , decl ast. Decl , useParagraph , extractLinks bool ) safe.HTML {
228+ func (r * Renderer ) blocksToHTML (bs []comment.Block , useParagraph bool , hscope * headingScope ) safe.HTML {
267229 return concatHTML (bs , func (b comment.Block ) safe.HTML {
268- return r .blockToHTML (b , decl , useParagraph , extractLinks )
230+ return r .blockToHTML (b , useParagraph , hscope )
269231 })
270232}
271233
272- func (r * Renderer ) blockToHTML (b comment.Block , decl ast. Decl , useParagraph , extractLinks bool ) safe.HTML {
234+ func (r * Renderer ) blockToHTML (b comment.Block , useParagraph bool , hscope * headingScope ) safe.HTML {
273235 switch b := b .(type ) {
274236 case * comment.Paragraph :
275237 th := r .textsToHTML (b .Text )
@@ -282,7 +244,7 @@ func (r *Renderer) blockToHTML(b comment.Block, decl ast.Decl, useParagraph, ext
282244 return ExecuteToHTML (codeTemplate , b .Text )
283245
284246 case * comment.Heading :
285- return ExecuteToHTML (headingTemplate , r .newHeading (b , decl ))
247+ return ExecuteToHTML (headingTemplate , r .newHeading (b , hscope ))
286248
287249 case * comment.List :
288250 var items []safe.HTML
@@ -291,7 +253,7 @@ func (r *Renderer) blockToHTML(b comment.Block, decl ast.Decl, useParagraph, ext
291253 items = append (items , ExecuteToHTML (listItemTemplate , struct {
292254 Number string
293255 Content safe.HTML
294- }{item .Number , r .blocksToHTML (item .Content , decl , useParagraph , false )}))
256+ }{item .Number , r .blocksToHTML (item .Content , useParagraph , hscope )}))
295257 }
296258 t := oListTemplate
297259 if b .Items [0 ].Number == "" {
@@ -303,53 +265,143 @@ func (r *Renderer) blockToHTML(b comment.Block, decl ast.Decl, useParagraph, ext
303265 }
304266}
305267
306- // headingIDs keeps track of used heading ids to prevent duplicates.
307- type headingIDs map [safe. Identifier ] bool
268+ /*
269+ We want to convert headings in doc comments to unique and stable HTML IDs.
308270
309- // headingID returns a unique identifier for a *comment.Heading & ast.Decl pair.
310- func (r * Renderer ) headingID (h * comment.Heading , decl ast.Decl ) safe.Identifier {
311- s := textsToString (h .Text )
312- hdrTitle := badAnchorRx .ReplaceAllString (s , "_" )
313- id := safe .IdentifierFromConstantPrefix ("hdr" , hdrTitle )
314- if ! r .headingIDs [id ] {
315- r .headingIDs [id ] = true
316- return id
317- }
271+ The IDs in an HTML page should be unique, so URLs with fragments can link to
272+ them. The headings in the doc comments of a Go package (which corresponds to an
273+ HTML page for us) need not be unique. The same heading can be used in the doc
274+ comment of several symbols, and in the package comment as well.
318275
319- // The id is not unique. Attempt to generate an identifier using the decl.
320- for _ , v := range generateAnchorPoints (decl ) {
321- if v .Kind == "field" {
322- continue
323- }
324- hdrTitle = badAnchorRx .ReplaceAllString (v .ID .String ()+ "_" + s , "_" )
325- if v .Kind == "constant" || v .Kind == "variable" {
326- if specs := decl .(* ast.GenDecl ).Specs ; len (specs ) > 1 {
327- // Grouped consts and vars cannot be deterministically identified,
328- // so treat them as a single section. e.g. "hdr-constant-Title"
329- hdrTitle = badAnchorRx .ReplaceAllString (v .Kind + "_" + s , "_" )
330- }
331- }
332- if v .Kind != "method" {
333- // Continue iterating until we find a higher precedence kind.
334- break
335- }
276+ If uniqueness were the only criterion, we could just append a number. But we
277+ also want the IDs to be stable. The page for the latest version of a package
278+ has no version in the URL, so that a saved link will always reference the latest
279+ version. We do the same for other links on the page, like links to symbols. We
280+ want to do so for heading links too. That means that a heading link, which uses
281+ the ID as a fragment, may visit pages with different content at different times.
282+ We want that saved link to return to the same heading every time, if the heading
283+ still exists.
284+
285+ If we numbered the IDs from top to bottom, a heading ID would change whenever headings
286+ were added above it. Instead, we try to construct a unique suffix from the declaration that the
287+ doc comment refers to. (The suffix is empty for the package comment.) For ungrouped declarations,
288+ we use the symbol name, which must be unique in the package. For grouped declarations, like
289+
290+ const (
291+ A = 1
292+ B = 2
293+ )
294+
295+ or
296+
297+ var x, Y int
298+
299+ there is no good way to pick a suffix, so we don't link headings at all. Evidence suggests
300+ that such headings are very rare.
301+
302+ After constructing the unique suffix, we may still have duplicate IDs because the same doc comment
303+ can repeat headings. So we append a number that is unique for the given suffix.
304+
305+ This approach will always produce unique, stable IDs when it produces IDs at all.
306+ */
307+
308+ // A headingScope is a collection of headings that arise from the documentation of
309+ // a single element, either the package itself or a symbol within it.
310+ // A headingScope ensures that heading IDs are unique, and keeps a list of headings as they appear in the scope.
311+ type headingScope struct {
312+ suffix string // ID suffix; unique for the package; empty for package doc
313+ createIDs bool // create IDs for the headings
314+ headings []heading // headings for this scope, in order
315+ ids map [string ]int // count of seen IDs
316+ }
317+
318+ func newHeadingScope (suffix string , createIDs bool ) * headingScope {
319+ return & headingScope {
320+ suffix : badAnchorRx .ReplaceAllString (suffix , "_" ),
321+ createIDs : createIDs ,
322+ ids : map [string ]int {},
336323 }
324+ }
337325
338- for {
339- id = safe .IdentifierFromConstantPrefix ("hdr" , hdrTitle )
340- if ! r .headingIDs [id ] {
341- r .headingIDs [id ] = true
342- break
343- }
344- // The id is still not unique. Append _ until unique.
345- hdrTitle = hdrTitle + "_"
326+ // newHeading constructs a heading from the comment.Heading and remembers it.
327+ func (r * Renderer ) newHeading (h * comment.Heading , hs * headingScope ) heading {
328+ return hs .addHeading (h , r .textsToHTML (h .Text ))
329+ }
330+
331+ // addHeading constructs a heading and adds it to hs.
332+ func (hs * headingScope ) addHeading (ch * comment.Heading , html safe.HTML ) heading {
333+ h := heading {Title : html }
334+ if hs .createIDs {
335+ h .ID = safe .IdentifierFromConstantPrefix ("hdr" , hs .newHeadingID (ch ))
346336 }
337+ hs .headings = append (hs .headings , h )
338+ return h
339+ }
347340
341+ var badAnchorRx = regexp .MustCompile (`[^a-zA-Z0-9-]` )
342+
343+ // newHeadingID constructs a unique heading ID from the argument, which is the text
344+ // of a heading.
345+ func (hs * headingScope ) newHeadingID (ch * comment.Heading ) string {
346+ // Start with the heading's default ID.
347+ // We can only construct a [safe.Identifier] by adding a prefix, so remove the
348+ // default's prefix.
349+ id := strings .TrimPrefix (ch .DefaultID (), "hdr-" )
350+ if hs .suffix != "" {
351+ id += "-" + hs .suffix
352+ }
353+ n := hs .ids [id ]
354+ hs .ids [id ]++
355+ if n > 0 {
356+ id += "-" + strconv .Itoa (n )
357+ }
348358 return id
349359}
350360
351- func (r * Renderer ) newHeading (h * comment.Heading , decl ast.Decl ) heading {
352- return heading {r .headingID (h , decl ), r .textsToHTML (h .Text )}
361+ // headingIDSuffix returns a unique string from the decl, to be
362+ // used as the suffix in heading IDs.
363+ // If a unique string can be created, the second return value is true.
364+ // If not (the declaration is a group), headingIDSuffix returns "", false.
365+ func headingIDSuffix (decl ast.Decl ) (string , bool ) {
366+ if decl == nil {
367+ return "" , true
368+ }
369+ switch decl := decl .(type ) {
370+ case * ast.FuncDecl :
371+ // functionName, or recvTypeName_methodName
372+ if decl .Recv == nil || len (decl .Recv .List ) == 0 {
373+ return decl .Name .Name , true
374+ }
375+ return recvTypeString (decl .Recv .List [0 ].Type ) + "_" + decl .Name .Name , true
376+ case * ast.GenDecl :
377+ // We can only pick a stable name when there is a single decl.
378+ if len (decl .Specs ) == 1 {
379+ switch spec := decl .Specs [0 ].(type ) {
380+ case * ast.TypeSpec :
381+ return spec .Name .Name , true
382+ case * ast.ValueSpec :
383+ if len (spec .Names ) == 1 {
384+ return spec .Names [0 ].Name , true
385+ }
386+ }
387+ }
388+ }
389+ return "" , false
390+ }
391+
392+ // recvTypeString returns a string for the symbol in a type expression
393+ // for a method receiver.
394+ func recvTypeString (t ast.Expr ) string {
395+ switch t := t .(type ) {
396+ case * ast.Ident :
397+ return t .Name
398+ case * ast.IndexExpr :
399+ return t .X .(* ast.Ident ).Name
400+ case * ast.StarExpr :
401+ return recvTypeString (t .X )
402+ default : // should never happen
403+ return "unknown"
404+ }
353405}
354406
355407func (r * Renderer ) textsToHTML (ts []comment.Text ) safe.HTML {
0 commit comments