11"""A reconstructor for HCL2 implemented using Lark's experimental reconstruction functionality"""
22
33import re
4- import json
54from typing import List , Dict , Callable , Optional , Union , Any , Tuple
65
76from lark import Lark , Tree
@@ -137,7 +136,7 @@ def _is_equals_sign(self, terminal) -> bool:
137136 )
138137
139138 # pylint: disable=too-many-branches, too-many-return-statements
140- def _should_add_space (self , rule , current_terminal ):
139+ def _should_add_space (self , rule , current_terminal , is_block_label : bool = False ):
141140 """
142141 This method documents the situations in which we add space around
143142 certain tokens while reconstructing the generated HCL.
@@ -155,6 +154,7 @@ def _should_add_space(self, rule, current_terminal):
155154
156155 This should be sufficient to make a spacing decision.
157156 """
157+
158158 # we don't need to add multiple spaces
159159 if self ._last_char_space :
160160 return False
@@ -166,6 +166,14 @@ def _should_add_space(self, rule, current_terminal):
166166 if self ._is_equals_sign (current_terminal ):
167167 return True
168168
169+ if is_block_label and isinstance (rule , Token ) and rule .value == "string" :
170+ if (
171+ current_terminal == self ._last_terminal == Terminal ("DBLQUOTE" )
172+ or current_terminal == Terminal ("DBLQUOTE" )
173+ and self ._last_terminal == Terminal ("NAME" )
174+ ):
175+ return True
176+
169177 # if we're in a ternary or binary operator, add space around the operator
170178 if (
171179 isinstance (rule , Token )
@@ -235,7 +243,7 @@ def _should_add_space(self, rule, current_terminal):
235243 return True
236244
237245 # always add space between string literals
238- if current_terminal == Terminal ("STRING_LIT " ):
246+ if current_terminal == Terminal ("STRING_CHARS " ):
239247 return True
240248
241249 # if we just opened a block, add a space, unless the block is empty
@@ -257,7 +265,7 @@ def _should_add_space(self, rule, current_terminal):
257265 # preceded by a space if they're following a comma in a tuple or
258266 # function arg
259267 if current_terminal in [
260- Terminal ("STRING_LIT " ),
268+ Terminal ("DBLQUOTE " ),
261269 Terminal ("DECIMAL" ),
262270 Terminal ("NAME" ),
263271 Terminal ("NEGATIVE_DECIMAL" ),
@@ -267,13 +275,15 @@ def _should_add_space(self, rule, current_terminal):
267275 # the catch-all case, we're not sure, so don't add a space
268276 return False
269277
270- def _reconstruct (self , tree ):
278+ def _reconstruct (self , tree , is_block_label = False ):
271279 unreduced_tree = self .match_tree (tree , tree .data )
272280 res = self .write_tokens .transform (unreduced_tree )
273281 for item in res :
274282 # any time we encounter a child tree, we recurse
275283 if isinstance (item , Tree ):
276- yield from self ._reconstruct (item )
284+ yield from self ._reconstruct (
285+ item , (unreduced_tree .data == "block" and item .data != "body" )
286+ )
277287
278288 # every leaf should be a tuple, which contains information about
279289 # which terminal the leaf represents
@@ -309,7 +319,7 @@ def _reconstruct(self, tree):
309319 self ._deferred_item = None
310320
311321 # potentially add a space before the next token
312- if self ._should_add_space (rule , terminal ):
322+ if self ._should_add_space (rule , terminal , is_block_label ):
313323 yield " "
314324 self ._last_char_space = True
315325
@@ -353,21 +363,21 @@ def _name_to_identifier(name: str) -> Tree:
353363
354364 @staticmethod
355365 def _escape_interpolated_str (interp_s : str ) -> str :
356- if interp_s .strip ().startswith (' <<-' ) or interp_s .strip ().startswith ('<<' ):
366+ if interp_s .strip ().startswith (" <<-" ) or interp_s .strip ().startswith ("<<" ):
357367 # For heredoc strings, preserve their format exactly
358368 return reverse_quotes_within_interpolation (interp_s )
359369 # Escape backslashes first (very important to do this first)
360- escaped = interp_s .replace (' \\ ' , ' \\ \\ ' )
370+ escaped = interp_s .replace (" \\ " , " \\ \\ " )
361371 # Escape quotes
362372 escaped = escaped .replace ('"' , '\\ "' )
363373 # Escape control characters
364- escaped = escaped .replace (' \n ' , ' \\ n' )
365- escaped = escaped .replace (' \r ' , ' \\ r' )
366- escaped = escaped .replace (' \t ' , ' \\ t' )
367- escaped = escaped .replace (' \b ' , ' \\ b' )
368- escaped = escaped .replace (' \f ' , ' \\ f' )
374+ escaped = escaped .replace (" \n " , " \\ n" )
375+ escaped = escaped .replace (" \r " , " \\ r" )
376+ escaped = escaped .replace (" \t " , " \\ t" )
377+ escaped = escaped .replace (" \b " , " \\ b" )
378+ escaped = escaped .replace (" \f " , " \\ f" )
369379 # find each interpolation within the string and remove the backslashes
370- interp_s = reverse_quotes_within_interpolation (f' "{ escaped } "' )
380+ interp_s = reverse_quotes_within_interpolation (f"{ escaped } " )
371381 return interp_s
372382
373383 @staticmethod
@@ -420,6 +430,48 @@ def _newline(self, level: int, count: int = 1) -> Tree:
420430 [Token ("NL_OR_COMMENT" , f"\n { ' ' * level } " ) for _ in range (count )],
421431 )
422432
433+ def _build_string_rule (self , string : str , level : int = 0 ) -> Tree :
434+ # grammar in hcl2.lark defines that a string is built of any number of string parts,
435+ # each string part can be either interpolation expression, escaped interpolation string
436+ # or regular string
437+ # this method build hcl2 string rule based on arbitrary string,
438+ # splitting such string into individual parts and building a lark tree out of them
439+ #
440+ result = []
441+
442+ pattern = re .compile (r"(\${1,2}\{(?:[^{}]|\{[^{}]*})*})" )
443+ parts = re .split (pattern , string )
444+ # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}']
445+
446+ if parts [- 1 ] == "" :
447+ parts .pop ()
448+ if len (parts ) > 0 and parts [0 ] == "" :
449+ parts .pop (0 )
450+
451+ for part in parts :
452+ if part .startswith ("$${" ) and part .endswith ("}" ):
453+ result .append (Token ("ESCAPED_INTERPOLATION" , part ))
454+
455+ # unwrap interpolation expression and recurse into it
456+ elif part .startswith ("${" ) and part .endswith ("}" ):
457+ part = part [2 :- 1 ]
458+ if part .startswith ('"' ) and part .endswith ('"' ):
459+ part = part [1 :- 1 ]
460+ part = self ._transform_value_to_expr_term (part , level )
461+ else :
462+ part = Tree (
463+ Token ("RULE" , "expr_term" ),
464+ [Tree (Token ("RULE" , "identifier" ), [Token ("NAME" , part )])],
465+ )
466+
467+ result .append (Tree (Token ("RULE" , "interpolation" ), [part ]))
468+
469+ else :
470+ result .append (Token ("STRING_CHARS" , part ))
471+
472+ result = [Tree (Token ("RULE" , "string_part" ), [element ]) for element in result ]
473+ return Tree (Token ("RULE" , "string" ), result )
474+
423475 def _is_block (self , value : Any ) -> bool :
424476 if isinstance (value , dict ):
425477 block_body = value
@@ -485,8 +537,8 @@ def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree:
485537 block_labels , block_body_dict = self ._calculate_block_labels (
486538 block_v
487539 )
488- block_label_tokens = [
489- Token ( "STRING_LIT" , f'" { block_label } "' )
540+ block_label_trees = [
541+ self . _build_string_rule ( block_label , level )
490542 for block_label in block_labels
491543 ]
492544 block_body = self ._transform_dict_to_body (
@@ -496,7 +548,7 @@ def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree:
496548 # create our actual block to add to our own body
497549 block = Tree (
498550 Token ("RULE" , "block" ),
499- [identifier_name ] + block_label_tokens + [block_body ],
551+ [identifier_name ] + block_label_trees + [block_body ],
500552 )
501553 children .append (block )
502554 # add empty line after block
@@ -675,10 +727,10 @@ def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]:
675727 parsed_value = attribute .children [2 ]
676728 return parsed_value
677729
678- # otherwise it's just a string.
730+ # otherwise it's a string
679731 return Tree (
680732 Token ("RULE" , "expr_term" ),
681- [Token ( "STRING_LIT" , self ._escape_interpolated_str (value ))],
733+ [self . _build_string_rule ( self ._escape_interpolated_str (value ), level )],
682734 )
683735
684736 # otherwise, we don't know the type
0 commit comments