Skip to content

Commit b8f0807

Browse files
Language.Nix.Identifier: correctly implement Nix escape sequences
This code previously assumed escape sequences in strings work like in Haskell. This is not generally the case. This commit adds a custom implementation of the relevant parsing and rendering logic which even simplifies some things. Escape sequences are a little weird in Nix since only very few things _have_ to be escaped and some things not always, depending on context (i.e. $). I've linked the best reference (the lexer) in the code, but the manual is also helpful: https://nix.dev/manual/nix/2.30/language/string-literals.html
1 parent e89ec9a commit b8f0807

File tree

3 files changed

+66
-10
lines changed

3 files changed

+66
-10
lines changed

language-nix/CHANGELOG.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,13 @@
1010
`quote`, `needsQuoting` and `Pretty` will take this list into account
1111
and quote such identifiers. However, `HasParser` will _not_ reject them
1212
even if they are unquoted.
13-
* Add an hspec/QuickCheck based test suite.
13+
* Resolved discrepancies between `Language.Nix.Identifier` and Nix w.r.t.
14+
quoting and escaping:
15+
16+
- Fixed missing escaping of some Nix syntax elements, e.g. in the case of
17+
`ident # "${foo}"`.
18+
- Pretty printing `Identifier`s will no longer produce escape sequences
19+
Haskell supports, but Nix doesn't.
20+
- Parsing `Identifier`s won't interpret escape sequences that Nix wouldn't
21+
understand.
22+
* Added an hspec/QuickCheck based test suite.

language-nix/src/Language/Nix/Identifier.hs

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,11 @@ instance Pretty Identifier where
8787

8888
-- | Note that this parser is more lenient than Nix w.r.t. simple identifiers,
8989
-- since it will accept 'nixKeywords'.
90+
--
91+
-- Naturally, it does not support string interpolation, but does not reject
92+
-- strings that contain them. E.g. the string literal @"hello ${world}"@
93+
-- will contain @${world}@ verbatim after parsing. Do not rely on this
94+
-- behavior, as it may be changed in the future.
9095
instance HasParser Identifier where
9196
parser = parseQuotedIdentifier <|> parseSimpleIdentifier
9297

@@ -108,17 +113,24 @@ parseQuotedIdentifier :: CharParser st tok m Identifier
108113
parseQuotedIdentifier = Identifier <$> qstring
109114
where
110115
qstring :: CharParser st tok m String
111-
qstring = do txt <- between (P.char '"') (P.char '"') (many qtext)
112-
return (read ('"' : concat txt ++ ['"']))
116+
qstring = between (P.char '"') (P.char '"') (many qtext)
113117

114-
qtext :: CharParser st tok m String
115-
qtext = quotedPair <|> many1 (P.noneOf "\\\"")
118+
qtext :: CharParser st tok m Char
119+
qtext = quotedPair <|> P.noneOf "\\\""
116120

117-
quotedPair :: CharParser st tok m String
121+
quotedPair :: CharParser st tok m Char
118122
quotedPair = do
119-
c1 <- P.char '\\'
120-
c2 <- anyChar
121-
return [c1,c2]
123+
_ <- P.char '\\'
124+
c <- anyChar
125+
-- See https://github.com/NixOS/nix/blob/2d83bc6b83763290e9bbf556209927ba469956aa/src/libexpr/lexer.l#L54-L60
126+
return $ case c of
127+
'n' -> '\n'
128+
't' -> '\t'
129+
'r' -> '\r'
130+
-- Note that this handles actual escapes like \" and \\ and
131+
-- bogus cases like \f which Nix doesn't fail on (despite not
132+
-- supporting it), but simply maps to plain f
133+
_ -> c
122134

123135
-- | Checks whether a given string needs quoting when interpreted as an
124136
-- 'Identifier'.
@@ -142,5 +154,28 @@ nixKeywords =
142154
-- abc
143155
-- >>> putStrLn (quote "abc.def")
144156
-- "abc.def"
157+
-- >>> putStrLn (quote "$foo")
158+
-- "$foo"
159+
-- >>> putStrLn (quote "${foo}")
160+
-- "\${foo}"
145161
quote :: String -> String
146-
quote s = if needsQuoting s then show s else s
162+
quote s = if needsQuoting s then '"' : quote' s else s
163+
where
164+
quote' (c1:c2:cs) = escapeChar c1 (Just c2) ++ quote' (c2:cs)
165+
quote' (c:cs) = escapeChar c Nothing ++ quote' cs
166+
quote' "" = "\""
167+
168+
escapeChar :: Char -> Maybe Char -> String
169+
escapeChar c1 c2 =
170+
case c1 of
171+
-- supported escape sequences, see quotedPair above
172+
-- N.B. technically, we only need to escape \r (since Nix converts raw \r to \n),
173+
-- but it's nicer to escape what we can.
174+
'\n' -> "\\n"
175+
'\t' -> "\\t"
176+
'\r' -> "\\r"
177+
-- syntactically significant in doubly quoted strings
178+
'\\' -> "\\\\"
179+
'"' -> "\\\""
180+
'$' | c2 == Just '{' -> "\\$"
181+
_ -> [c1]

language-nix/test/hspec.hs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,18 @@ main = hspec $ do
2626
identProperty $ \i -> parseM "Identifier" (prettyShow i) == Just (i :: Identifier)
2727
it "can parse the result of quote" $
2828
stringIdentProperty $ \str -> parseM "Identifier" (quote str) == Just (ident # str)
29+
it "parses redundant escape sequences" $
30+
forM_
31+
[ ("\"\\f\"", "f")
32+
, ("\"echo \\$var\"", "echo $var")
33+
, ("\"\\h\\e\\l\\l\\o\\ \\w\\or\\l\\d\"", "hello world")
34+
-- \t and \n don't need to be escaped, though it's advisable
35+
, ("\"only\\ttechnically\nredundant\"", "only\ttechnically\nredundant")
36+
]
37+
$ \(i, e) -> do
38+
let e' = Just (ident # e)
39+
parseM "Identifier" i `shouldBe` e'
40+
parseM "Identifier" ("\"" ++ e ++ "\"") `shouldBe` e'
2941

3042
describe "nixKeywords" $ do
3143
it "are quoted" $ forM_ nixKeywords $ \str -> do

0 commit comments

Comments
 (0)