Merge branch 'main' into transformer-overhaul

kkozik-amplify · kkozik-amplify · commit aba419feaa0f · 2025-07-04T11:30:46.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## \[Unreleased\]
 
-- Nothing yet.
+- Issue parsing interpolations and escaped interpolations in a single string. ([#239](https://github.com/amplify-education/python-hcl2/pull/239))
 
 ## \[7.2.1\] - 2025-05-16
 
diff --git a/hcl2/dict_transformer.py b/hcl2/dict_transformer.py
@@ -247,7 +247,8 @@ def heredoc_template(self, args: List) -> str:
             raise RuntimeError(f"Invalid Heredoc token: {args[0]}")
 
         trim_chars = "\n\t "
-        return f'"{match.group(2).rstrip(trim_chars)}"'
+        result = match.group(2).rstrip(trim_chars)
+        return f'"{result}"'
 
     def heredoc_template_trim(self, args: List) -> str:
         # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions
@@ -301,12 +302,17 @@ def for_object_expr(self, args: List) -> str:
         # e.g. f"{2 + 2} {{2 + 2}}" == "4 {2 + 2}"
         return f"{{{for_expr}}}"
 
-    def string_with_interpolation(self, args: List) -> str:
-        return '"' + ("".join(args)) + '"'
+    def string(self, args: List) -> str:
+        return '"' + "".join(args) + '"'
 
-    def interpolation_maybe_nested(self, args: List) -> str:
-        # return "".join(args)
-        return "${" + ("".join(args)) + "}"
+    def string_part(self, args: List) -> str:
+        value = self.to_tf_inline(args[0])
+        if value.startswith('"') and value.endswith('"'):
+            value = value[1:-1]
+        return value
+
+    def interpolation(self, args: List) -> str:
+        return '"${' + str(args[0]) + '}"'
 
     def strip_new_line_tokens(self, args: List) -> List:
         """
diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark
@@ -1,7 +1,7 @@
 start : body
 body : (new_line_or_comment? (attribute | block))* new_line_or_comment?
 attribute : identifier EQ expression
-block : identifier (identifier | STRING_LIT | string_with_interpolation)* new_line_or_comment? "{" body "}"
+block : identifier (identifier | string)* new_line_or_comment? "{" body "}"
 new_line_or_comment: ( NL_OR_COMMENT )+
 NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/
 
@@ -44,8 +44,7 @@ COLON : ":"
 expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR
             | float_lit
             | int_lit
-            | STRING_LIT
-            | string_with_interpolation
+            | string
             | tuple
             | object
             | function_call
@@ -60,11 +59,13 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR
             | for_tuple_expr
             | for_object_expr
 
-STRING_LIT : "\"" STRING_CHARS? "\""
-STRING_CHARS : /(?:(?!\${)([^"\\]|\\.|\$\$))+/ // any character except '"', including escaped $$
-string_with_interpolation: "\"" (STRING_CHARS)* interpolation_maybe_nested (STRING_CHARS | interpolation_maybe_nested)* "\""
-interpolation_maybe_nested: "${" expression "}"
-
+string: "\"" string_part* "\""
+string_part: STRING_CHARS
+           | ESCAPED_INTERPOLATION
+           | interpolation
+interpolation: "${" expression "}"
+ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/
+STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/
 
 int_lit : NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+
 !float_lit: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)?
@@ -77,7 +78,7 @@ EQ : /[ \t]*=(?!=|>)/
 tuple : "[" (new_line_or_comment* expression new_line_or_comment* ",")* (new_line_or_comment* expression)? new_line_or_comment* "]"
 object : "{" new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* "}"
 object_elem : object_elem_key ( EQ | COLON ) expression
-object_elem_key : float_lit | int_lit | identifier | STRING_LIT | object_elem_key_dot_accessor | object_elem_key_expression | string_with_interpolation
+object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression
 object_elem_key_expression : LPAR expression RPAR
 object_elem_key_dot_accessor : identifier (DOT identifier)+
 
diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py
@@ -1,7 +1,6 @@
 """A reconstructor for HCL2 implemented using Lark's experimental reconstruction functionality"""
 
 import re
-import json
 from typing import List, Dict, Callable, Optional, Union, Any, Tuple
 
 from lark import Lark, Tree
@@ -137,7 +136,7 @@ def _is_equals_sign(self, terminal) -> bool:
         )
 
     # pylint: disable=too-many-branches, too-many-return-statements
-    def _should_add_space(self, rule, current_terminal):
+    def _should_add_space(self, rule, current_terminal, is_block_label: bool = False):
         """
         This method documents the situations in which we add space around
         certain tokens while reconstructing the generated HCL.
@@ -155,6 +154,7 @@ def _should_add_space(self, rule, current_terminal):
 
         This should be sufficient to make a spacing decision.
         """
+
         # we don't need to add multiple spaces
         if self._last_char_space:
             return False
@@ -166,6 +166,14 @@ def _should_add_space(self, rule, current_terminal):
         if self._is_equals_sign(current_terminal):
             return True
 
+        if is_block_label and isinstance(rule, Token) and rule.value == "string":
+            if (
+                current_terminal == self._last_terminal == Terminal("DBLQUOTE")
+                or current_terminal == Terminal("DBLQUOTE")
+                and self._last_terminal == Terminal("NAME")
+            ):
+                return True
+
         # if we're in a ternary or binary operator, add space around the operator
         if (
             isinstance(rule, Token)
@@ -235,7 +243,7 @@ def _should_add_space(self, rule, current_terminal):
                 return True
 
             # always add space between string literals
-            if current_terminal == Terminal("STRING_LIT"):
+            if current_terminal == Terminal("STRING_CHARS"):
                 return True
 
         # if we just opened a block, add a space, unless the block is empty
@@ -257,7 +265,7 @@ def _should_add_space(self, rule, current_terminal):
             # preceded by a space if they're following a comma in a tuple or
             # function arg
             if current_terminal in [
-                Terminal("STRING_LIT"),
+                Terminal("DBLQUOTE"),
                 Terminal("DECIMAL"),
                 Terminal("NAME"),
                 Terminal("NEGATIVE_DECIMAL"),
@@ -267,13 +275,15 @@ def _should_add_space(self, rule, current_terminal):
         # the catch-all case, we're not sure, so don't add a space
         return False
 
-    def _reconstruct(self, tree):
+    def _reconstruct(self, tree, is_block_label=False):
         unreduced_tree = self.match_tree(tree, tree.data)
         res = self.write_tokens.transform(unreduced_tree)
         for item in res:
             # any time we encounter a child tree, we recurse
             if isinstance(item, Tree):
-                yield from self._reconstruct(item)
+                yield from self._reconstruct(
+                    item, (unreduced_tree.data == "block" and item.data != "body")
+                )
 
             # every leaf should be a tuple, which contains information about
             # which terminal the leaf represents
@@ -309,7 +319,7 @@ def _reconstruct(self, tree):
                     self._deferred_item = None
 
                 # potentially add a space before the next token
-                if self._should_add_space(rule, terminal):
+                if self._should_add_space(rule, terminal, is_block_label):
                     yield " "
                     self._last_char_space = True
 
@@ -353,21 +363,21 @@ def _name_to_identifier(name: str) -> Tree:
 
     @staticmethod
     def _escape_interpolated_str(interp_s: str) -> str:
-        if interp_s.strip().startswith('<<-') or interp_s.strip().startswith('<<'):
+        if interp_s.strip().startswith("<<-") or interp_s.strip().startswith("<<"):
             # For heredoc strings, preserve their format exactly
             return reverse_quotes_within_interpolation(interp_s)
         # Escape backslashes first (very important to do this first)
-        escaped = interp_s.replace('\\', '\\\\')
+        escaped = interp_s.replace("\\", "\\\\")
         # Escape quotes
         escaped = escaped.replace('"', '\\"')
         # Escape control characters
-        escaped = escaped.replace('\n', '\\n')
-        escaped = escaped.replace('\r', '\\r')
-        escaped = escaped.replace('\t', '\\t')
-        escaped = escaped.replace('\b', '\\b')
-        escaped = escaped.replace('\f', '\\f')
+        escaped = escaped.replace("\n", "\\n")
+        escaped = escaped.replace("\r", "\\r")
+        escaped = escaped.replace("\t", "\\t")
+        escaped = escaped.replace("\b", "\\b")
+        escaped = escaped.replace("\f", "\\f")
         # find each interpolation within the string and remove the backslashes
-        interp_s = reverse_quotes_within_interpolation(f'"{escaped}"')
+        interp_s = reverse_quotes_within_interpolation(f"{escaped}")
         return interp_s
 
     @staticmethod
@@ -420,6 +430,48 @@ def _newline(self, level: int, count: int = 1) -> Tree:
             [Token("NL_OR_COMMENT", f"\n{'  ' * level}") for _ in range(count)],
         )
 
+    def _build_string_rule(self, string: str, level: int = 0) -> Tree:
+        # grammar in hcl2.lark defines that a string is built of any number of string parts,
+        #   each string part can be either interpolation expression, escaped interpolation string
+        #   or regular string
+        # this method build hcl2 string rule based on arbitrary string,
+        #   splitting such string into individual parts and building a lark tree out of them
+        #
+        result = []
+
+        pattern = re.compile(r"(\${1,2}\{(?:[^{}]|\{[^{}]*})*})")
+        parts = re.split(pattern, string)
+        # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}']
+
+        if parts[-1] == "":
+            parts.pop()
+        if len(parts) > 0 and parts[0] == "":
+            parts.pop(0)
+
+        for part in parts:
+            if part.startswith("$${") and part.endswith("}"):
+                result.append(Token("ESCAPED_INTERPOLATION", part))
+
+            # unwrap interpolation expression and recurse into it
+            elif part.startswith("${") and part.endswith("}"):
+                part = part[2:-1]
+                if part.startswith('"') and part.endswith('"'):
+                    part = part[1:-1]
+                    part = self._transform_value_to_expr_term(part, level)
+                else:
+                    part = Tree(
+                        Token("RULE", "expr_term"),
+                        [Tree(Token("RULE", "identifier"), [Token("NAME", part)])],
+                    )
+
+                result.append(Tree(Token("RULE", "interpolation"), [part]))
+
+            else:
+                result.append(Token("STRING_CHARS", part))
+
+        result = [Tree(Token("RULE", "string_part"), [element]) for element in result]
+        return Tree(Token("RULE", "string"), result)
+
     def _is_block(self, value: Any) -> bool:
         if isinstance(value, dict):
             block_body = value
@@ -485,8 +537,8 @@ def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree:
                     block_labels, block_body_dict = self._calculate_block_labels(
                         block_v
                     )
-                    block_label_tokens = [
-                        Token("STRING_LIT", f'"{block_label}"')
+                    block_label_trees = [
+                        self._build_string_rule(block_label, level)
                         for block_label in block_labels
                     ]
                     block_body = self._transform_dict_to_body(
@@ -496,7 +548,7 @@ def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree:
                     # create our actual block to add to our own body
                     block = Tree(
                         Token("RULE", "block"),
-                        [identifier_name] + block_label_tokens + [block_body],
+                        [identifier_name] + block_label_trees + [block_body],
                     )
                     children.append(block)
                     # add empty line after block
@@ -675,10 +727,10 @@ def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]:
                 parsed_value = attribute.children[2]
                 return parsed_value
 
-            # otherwise it's just a string.
+            # otherwise it's a string
             return Tree(
                 Token("RULE", "expr_term"),
-                [Token("STRING_LIT", self._escape_interpolated_str(value))],
+                [self._build_string_rule(self._escape_interpolated_str(value), level)],
             )
 
         # otherwise, we don't know the type
diff --git a/test/helpers/terraform-config-json/string_interpolations.json b/test/helpers/terraform-config-json/string_interpolations.json
@@ -1 +1,13 @@
-{"locals": [{"simple_interpolation": "prefix:${var.foo}-suffix", "embedded_interpolation": "(long substring without interpolation); ${module.special_constants.aws_accounts[\"aaa-${local.foo}-${local.bar}\"]}/us-west-2/key_foo", "deeply_nested_interpolation": "prefix1-${\"prefix2-${\"prefix3-${local.foo}\"}\"}", "escaped_interpolation": "prefix:$${aws:username}-suffix"}]}
+{
+ "locals": [
+  {
+   "simple_interpolation": "prefix:${var.foo}-suffix",
+   "embedded_interpolation": "(long substring without interpolation); ${module.special_constants.aws_accounts[\"aaa-${local.foo}-${local.bar}\"]}/us-west-2/key_foo",
+   "deeply_nested_interpolation": "prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}",
+   "escaped_interpolation": "prefix:$${aws:username}-suffix",
+   "simple_and_escaped": "${\"bar\"}$${baz:bat}",
+   "simple_and_escaped_reversed": "$${baz:bat}${\"bar\"}",
+   "nested_escaped": "bar-${\"$${baz:bat}\"}"
+  }
+ ]
+}
diff --git a/test/helpers/terraform-config/string_interpolations.tf b/test/helpers/terraform-config/string_interpolations.tf
@@ -1,6 +1,9 @@
 locals {
   simple_interpolation = "prefix:${var.foo}-suffix"
   embedded_interpolation = "(long substring without interpolation); ${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo"
-  deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-${local.foo}"}"}"
+  deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}"
   escaped_interpolation = "prefix:$${aws:username}-suffix"
+  simple_and_escaped = "${"bar"}$${baz:bat}"
+  simple_and_escaped_reversed = "$${baz:bat}${"bar"}"
+  nested_escaped = "bar-${"$${baz:bat}"}"
 }
diff --git a/test/unit/test_builder.py b/test/unit/test_builder.py
@@ -73,8 +73,11 @@ def test_locals_embedded_interpolation_tf(self):
             "simple_interpolation": "prefix:${var.foo}-suffix",
             "embedded_interpolation": "(long substring without interpolation); "
             '${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo',
-            "deeply_nested_interpolation": 'prefix1-${"prefix2-${"prefix3-${local.foo}"}"}',
+            "deeply_nested_interpolation": 'prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}',
             "escaped_interpolation": "prefix:$${aws:username}-suffix",
+            "simple_and_escaped": '${"bar"}$${baz:bat}',
+            "simple_and_escaped_reversed": '$${baz:bat}${"bar"}',
+            "nested_escaped": 'bar-${"$${baz:bat}"}',
         }
 
         builder.block("locals", **attributes)