33
44use std:: hash:: Hash ;
55
6+ use typed_arena:: Arena ;
7+
68use crate :: diff:: changes:: { insert_deep_unchanged, ChangeKind , ChangeMap } ;
79use crate :: diff:: lcs_diff;
810use crate :: hash:: DftHashSet ;
9- use crate :: parse:: syntax:: Syntax ;
11+ use crate :: parse:: syntax:: { init_all_info , AtomKind , Syntax , SyntaxInfo } ;
1012
1113const TINY_TREE_THRESHOLD : u32 = 10 ;
1214const MOSTLY_UNCHANGED_MIN_COMMON_CHILDREN : usize = 4 ;
@@ -16,10 +18,30 @@ const MOSTLY_UNCHANGED_MIN_COMMON_CHILDREN: usize = 4;
1618pub ( crate ) fn mark_unchanged < ' a > (
1719 lhs_nodes : & [ & ' a Syntax < ' a > ] ,
1820 rhs_nodes : & [ & ' a Syntax < ' a > ] ,
21+ arena : & ' a Arena < Syntax < ' a > > ,
1922 change_map : & mut ChangeMap < ' a > ,
2023) -> Vec < ( Vec < & ' a Syntax < ' a > > , Vec < & ' a Syntax < ' a > > ) > {
2124 let ( _, lhs_nodes, rhs_nodes) = shrink_unchanged_at_ends ( lhs_nodes, rhs_nodes, change_map) ;
2225
26+ let lhs_symbol_ids = symbol_atom_ids ( & lhs_nodes) ;
27+ let rhs_symbol_ids = symbol_atom_ids ( & rhs_nodes) ;
28+
29+ let lhs_nodes = mark_novel_symbols (
30+ & lhs_nodes,
31+ & lhs_symbol_ids,
32+ & rhs_symbol_ids,
33+ arena,
34+ change_map,
35+ ) ;
36+ let rhs_nodes = mark_novel_symbols (
37+ & rhs_nodes,
38+ & rhs_symbol_ids,
39+ & lhs_symbol_ids,
40+ arena,
41+ change_map,
42+ ) ;
43+ init_all_info ( & lhs_nodes, & rhs_nodes) ;
44+
2345 let mut nodes_to_diff = vec ! [ ] ;
2446 for ( lhs_nodes, rhs_nodes) in split_mostly_unchanged_toplevel ( & lhs_nodes, & rhs_nodes) {
2547 let ( _, lhs_nodes, rhs_nodes) =
@@ -30,6 +52,97 @@ pub(crate) fn mark_unchanged<'a>(
3052 nodes_to_diff
3153}
3254
55+ /// For every node in `nodes` (including descendants), discard any
56+ /// atom that only has its ID on this side and mark it as novel.
57+ fn mark_novel_symbols < ' a > (
58+ nodes : & [ & ' a Syntax < ' a > ] ,
59+ symbol_ids_this_side : & DftHashSet < u32 > ,
60+ symbols_ids_other_side : & DftHashSet < u32 > ,
61+ arena : & ' a Arena < Syntax < ' a > > ,
62+ change_map : & mut ChangeMap < ' a > ,
63+ ) -> Vec < & ' a Syntax < ' a > > {
64+ let mut result = vec ! [ ] ;
65+
66+ for node in nodes {
67+ match node {
68+ Syntax :: Atom { .. } => {
69+ if symbol_ids_this_side. contains ( & node. content_id ( ) )
70+ && !symbols_ids_other_side. contains ( & node. content_id ( ) )
71+ {
72+ change_map. insert ( node, ChangeKind :: Novel ) ;
73+ } else {
74+ result. push ( * node) ;
75+ }
76+ }
77+ Syntax :: List {
78+ children,
79+ open_content,
80+ close_content,
81+ open_position,
82+ close_position,
83+ num_descendants,
84+ info,
85+ } => {
86+ let children = mark_novel_symbols (
87+ children,
88+ symbol_ids_this_side,
89+ symbols_ids_other_side,
90+ arena,
91+ change_map,
92+ ) ;
93+
94+ let new_info = SyntaxInfo :: default ( ) ;
95+ new_info. unique_id . set ( info. unique_id . get ( ) ) ;
96+
97+ let new_list = arena. alloc ( Syntax :: List {
98+ info : new_info,
99+ open_position : open_position. clone ( ) ,
100+ open_content : open_content. clone ( ) ,
101+ children,
102+ close_position : close_position. clone ( ) ,
103+ close_content : close_content. clone ( ) ,
104+ num_descendants : * num_descendants,
105+ } ) ;
106+ result. push ( new_list) ;
107+ }
108+ }
109+ }
110+
111+ result
112+ }
113+
114+ /// The IDs of all atoms that are symbols.
115+ pub ( crate ) fn symbol_atom_ids < ' a > ( nodes : & [ & ' a Syntax < ' a > ] ) -> DftHashSet < u32 > {
116+ let mut ids = DftHashSet :: default ( ) ;
117+ symbol_atom_ids_ ( nodes, & mut ids) ;
118+ ids
119+ }
120+
121+ fn symbol_atom_ids_ < ' a > ( nodes : & [ & ' a Syntax < ' a > ] , ids : & mut DftHashSet < u32 > ) {
122+ for node in nodes {
123+ match node {
124+ Syntax :: List { children, .. } => symbol_atom_ids_ ( children, ids) ,
125+ Syntax :: Atom { kind, .. } => match kind {
126+ AtomKind :: Comment | AtomKind :: String ( _) => {
127+ // We don't want to consider comments and strings
128+ // as symbol atoms, because we match up similar
129+ // but non-identical comments and strings during
130+ // diffing.
131+ //
132+ // A string being unique to one side isn't
133+ // sufficient to discard it.
134+ }
135+ AtomKind :: Normal
136+ | AtomKind :: Type
137+ | AtomKind :: Keyword
138+ | AtomKind :: TreeSitterError => {
139+ ids. insert ( node. content_id ( ) ) ;
140+ }
141+ } ,
142+ }
143+ }
144+ }
145+
33146#[ derive( Debug ) ]
34147enum ChangeState {
35148 UnchangedDelimiter ,
0 commit comments