Skip to content

Commit fe58eb5

Browse files
committed
WIP skip symbols unique to one side for perf
1 parent 43274db commit fe58eb5

File tree

3 files changed

+122
-6
lines changed

3 files changed

+122
-6
lines changed

src/diff/unchanged.rs

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
44
use std::hash::Hash;
55

6+
use typed_arena::Arena;
7+
68
use crate::diff::changes::{insert_deep_unchanged, ChangeKind, ChangeMap};
79
use crate::diff::lcs_diff;
810
use crate::hash::DftHashSet;
9-
use crate::parse::syntax::Syntax;
11+
use crate::parse::syntax::{init_all_info, AtomKind, Syntax, SyntaxInfo};
1012

1113
const TINY_TREE_THRESHOLD: u32 = 10;
1214
const MOSTLY_UNCHANGED_MIN_COMMON_CHILDREN: usize = 4;
@@ -16,10 +18,30 @@ const MOSTLY_UNCHANGED_MIN_COMMON_CHILDREN: usize = 4;
1618
pub(crate) fn mark_unchanged<'a>(
1719
lhs_nodes: &[&'a Syntax<'a>],
1820
rhs_nodes: &[&'a Syntax<'a>],
21+
arena: &'a Arena<Syntax<'a>>,
1922
change_map: &mut ChangeMap<'a>,
2023
) -> Vec<(Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>)> {
2124
let (_, lhs_nodes, rhs_nodes) = shrink_unchanged_at_ends(lhs_nodes, rhs_nodes, change_map);
2225

26+
let lhs_symbol_ids = symbol_atom_ids(&lhs_nodes);
27+
let rhs_symbol_ids = symbol_atom_ids(&rhs_nodes);
28+
29+
let lhs_nodes = mark_novel_symbols(
30+
&lhs_nodes,
31+
&lhs_symbol_ids,
32+
&rhs_symbol_ids,
33+
arena,
34+
change_map,
35+
);
36+
let rhs_nodes = mark_novel_symbols(
37+
&rhs_nodes,
38+
&rhs_symbol_ids,
39+
&lhs_symbol_ids,
40+
arena,
41+
change_map,
42+
);
43+
init_all_info(&lhs_nodes, &rhs_nodes);
44+
2345
let mut nodes_to_diff = vec![];
2446
for (lhs_nodes, rhs_nodes) in split_mostly_unchanged_toplevel(&lhs_nodes, &rhs_nodes) {
2547
let (_, lhs_nodes, rhs_nodes) =
@@ -30,6 +52,97 @@ pub(crate) fn mark_unchanged<'a>(
3052
nodes_to_diff
3153
}
3254

55+
/// For every node in `nodes` (including descendants), discard any
56+
/// atom that only has its ID on this side and mark it as novel.
57+
fn mark_novel_symbols<'a>(
58+
nodes: &[&'a Syntax<'a>],
59+
symbol_ids_this_side: &DftHashSet<u32>,
60+
symbols_ids_other_side: &DftHashSet<u32>,
61+
arena: &'a Arena<Syntax<'a>>,
62+
change_map: &mut ChangeMap<'a>,
63+
) -> Vec<&'a Syntax<'a>> {
64+
let mut result = vec![];
65+
66+
for node in nodes {
67+
match node {
68+
Syntax::Atom { .. } => {
69+
if symbol_ids_this_side.contains(&node.content_id())
70+
&& !symbols_ids_other_side.contains(&node.content_id())
71+
{
72+
change_map.insert(node, ChangeKind::Novel);
73+
} else {
74+
result.push(*node);
75+
}
76+
}
77+
Syntax::List {
78+
children,
79+
open_content,
80+
close_content,
81+
open_position,
82+
close_position,
83+
num_descendants,
84+
info,
85+
} => {
86+
let children = mark_novel_symbols(
87+
children,
88+
symbol_ids_this_side,
89+
symbols_ids_other_side,
90+
arena,
91+
change_map,
92+
);
93+
94+
let new_info = SyntaxInfo::default();
95+
new_info.unique_id.set(info.unique_id.get());
96+
97+
let new_list = arena.alloc(Syntax::List {
98+
info: new_info,
99+
open_position: open_position.clone(),
100+
open_content: open_content.clone(),
101+
children,
102+
close_position: close_position.clone(),
103+
close_content: close_content.clone(),
104+
num_descendants: *num_descendants,
105+
});
106+
result.push(new_list);
107+
}
108+
}
109+
}
110+
111+
result
112+
}
113+
114+
/// The IDs of all atoms that are symbols.
115+
pub(crate) fn symbol_atom_ids<'a>(nodes: &[&'a Syntax<'a>]) -> DftHashSet<u32> {
116+
let mut ids = DftHashSet::default();
117+
symbol_atom_ids_(nodes, &mut ids);
118+
ids
119+
}
120+
121+
fn symbol_atom_ids_<'a>(nodes: &[&'a Syntax<'a>], ids: &mut DftHashSet<u32>) {
122+
for node in nodes {
123+
match node {
124+
Syntax::List { children, .. } => symbol_atom_ids_(children, ids),
125+
Syntax::Atom { kind, .. } => match kind {
126+
AtomKind::Comment | AtomKind::String(_) => {
127+
// We don't want to consider comments and strings
128+
// as symbol atoms, because we match up similar
129+
// but non-identical comments and strings during
130+
// diffing.
131+
//
132+
// A string being unique to one side isn't
133+
// sufficient to discard it.
134+
}
135+
AtomKind::Normal
136+
| AtomKind::Type
137+
| AtomKind::Keyword
138+
| AtomKind::TreeSitterError => {
139+
ids.insert(node.content_id());
140+
}
141+
},
142+
}
143+
}
144+
}
145+
33146
#[derive(Debug)]
34147
enum ChangeState {
35148
UnchangedDelimiter,

src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ fn diff_file_content(
674674
let possibly_changed = if env::var("DFT_DBG_KEEP_UNCHANGED").is_ok() {
675675
vec![(lhs.clone(), rhs.clone())]
676676
} else {
677-
unchanged::mark_unchanged(&lhs, &rhs, &mut change_map)
677+
unchanged::mark_unchanged(&lhs, &rhs, &arena, &mut change_map)
678678
};
679679

680680
let mut exceeded_graph_limit = false;

src/parse/syntax.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ pub(crate) struct SyntaxInfo<'a> {
6363
num_ancestors: Cell<u32>,
6464
pub(crate) num_after: Cell<usize>,
6565
/// A number that uniquely identifies this syntax node.
66-
unique_id: Cell<SyntaxId>,
66+
pub(crate) unique_id: Cell<SyntaxId>,
6767
/// A number that uniquely identifies the content of this syntax
6868
/// node. This may be the same as nodes on the other side of the
6969
/// diff, or nodes at different positions.
@@ -497,9 +497,12 @@ fn init_info_on_side<'a>(roots: &[&'a Syntax<'a>], next_id: &mut SyntaxId) {
497497

498498
fn set_unique_id(nodes: &[&Syntax], next_id: &mut SyntaxId) {
499499
for node in nodes {
500-
node.info().unique_id.set(*next_id);
501-
*next_id = NonZeroU32::new(u32::from(*next_id) + 1)
502-
.expect("Should not have more than u32::MAX nodes");
500+
if node.info().unique_id.get() == NonZeroU32::new(u32::MAX).unwrap() {
501+
node.info().unique_id.set(*next_id);
502+
*next_id = NonZeroU32::new(u32::from(*next_id) + 1)
503+
.expect("Should not have more than u32::MAX nodes");
504+
}
505+
503506
if let List { children, .. } = node {
504507
set_unique_id(children, next_id);
505508
}

0 commit comments

Comments
 (0)