Skip to content

Commit c2585db

Browse files
committed
feat: upon evaluation changes, rewire derivations for relevant suggestions
Lost derivations are handled by marking them as outdated. Signed-off-by: Raito Bezarius <[email protected]>
1 parent e360103 commit c2585db

File tree

3 files changed

+138
-2
lines changed

3 files changed

+138
-2
lines changed

src/website/shared/channels.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from dataclasses import dataclass
22

3-
from pgpubsub.channel import TriggerChannel
3+
from pgpubsub.channel import Channel, TriggerChannel
44

55
from shared.models import NixDerivation
66
from shared.models.cve import Container
@@ -46,3 +46,12 @@ class CVEDerivationClusterProposalChannel(TriggerChannel):
4646
# We don't need to lock notifications.
4747
# If we are caching twice the same proposal, we will just replace it.
4848
lock_notifications = False
49+
50+
51+
@dataclass
52+
class NixEvaluationCompleteChannel(Channel):
53+
evaluation_id: int
54+
# We do not want to want to perform twice attribute path tracking.
55+
# It's expensive and the second time it's the identity mapping we are constructing.
56+
# We may revisit this if needed.
57+
lock_notifications = True

src/website/shared/listeners/nix_evaluation.py

Lines changed: 124 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,18 @@
1313
from django.conf import settings
1414
from django.db.models import Avg
1515

16-
from shared.channels import NixEvaluationChannel
16+
from shared.channels import NixEvaluationChannel, NixEvaluationCompleteChannel
1717
from shared.evaluation import (
1818
SyncBatchAttributeIngester,
1919
parse_evaluation_result,
2020
)
2121
from shared.git import GitRepo
22+
from shared.listeners.cache_suggestions import cache_new_suggestions
2223
from shared.models import NixDerivation, NixEvaluation
24+
from shared.models.linkage import (
25+
CVEDerivationClusterProposal,
26+
DerivationClusterProposalLink,
27+
)
2328

2429
logger = logging.getLogger(__name__)
2530

@@ -240,6 +245,14 @@ async def evaluation_entrypoint(
240245
state=NixEvaluation.EvaluationState.COMPLETED,
241246
elapsed=elapsed,
242247
)
248+
249+
# Notify that we have a new evaluation ready and
250+
# any listeners should now proceed to an global update of old derivations
251+
# via attribute path.
252+
pgpubsub.notify(
253+
"shared.channels.NixEvaluationCompleteChannel",
254+
model_id=evaluation.pk,
255+
)
243256
except Exception as e:
244257
elapsed = time.time() - start
245258
logger.exception(
@@ -279,3 +292,113 @@ def run_evaluation_job(old: NixEvaluation, new: NixEvaluation) -> None:
279292
new,
280293
)
281294
)
295+
296+
297+
def rewire_new_derivations_following_attribute_paths(
298+
proposals: list[CVEDerivationClusterProposal], evaluation: NixEvaluation
299+
) -> None:
300+
"""
301+
This takes a list of proposals which have derivations from an older Nix channel attached to it.
302+
The passed evaluation is supposed to be the result of a newer Nix channel evaluation.
303+
304+
The game is to update all the M2M links from that proposal to the newer derivations.
305+
How to do this? Attribute paths.
306+
307+
Attribute paths are supposed to be constant and can relate from a channel to another.
308+
This is what Hydra does to track what happens to a package build time history, etc.
309+
310+
This falls short whenever we will rename a package, move it to another attribute path, e.g. promoting a GNOME variant
311+
to the top-level space of packages.
312+
313+
In those situations, this is not a big deal, this should not change intrinsically that we made the suggestion
314+
for that GNOME variant based on intrinsic parameters of said package and therefore, we will lose its trace in
315+
this new evaluation and will just "lose" it from this suggestion.
316+
317+
Once we obtain the ability to upgrade existing suggestions, we may re-attach it.
318+
Nonetheless, we should return the list of lost derivations by rewiring with the new evaluation.
319+
"""
320+
321+
# Ideally, we would settle this in a single UPDATE statement which looks like this:
322+
# UPDATE SET derivation_id = nnd.id FROM derivation_cluster_proposal_link JOIN nixderivations AS ond ON ond.id = derivation_id JOIN nixderivations AS nnd ON nnd.attribute_path = ond.attribute_path WHERE proposal_id IN eligible_proposals AND nnd.parent_evaluation_id = new_evaluation_id;
323+
# But this is a complicated one and it does not handle the situation where the right JOIN has missing items, i.e. lost derivations.
324+
# Let's do slowly and we will see the impact in production.
325+
# In general, we do not expect that size of proposal (:= nr of derivations in proposal) to be large and number of proposals should stay low.
326+
# If we end up do one query for _all_ proposals, we are therefore looking at O(size of all proposals merged) in terms of query complexity.
327+
328+
current_links = {
329+
link.derivation.attribute: link
330+
for link in DerivationClusterProposalLink.objects.select_related("derivation")
331+
.filter(proposal__in=proposals)
332+
.iterator()
333+
}
334+
attribute_paths = list(current_links.keys())
335+
new_derivations = {
336+
d.attribute: d
337+
for d in NixDerivation.objects.filter(
338+
attribute__in=attribute_paths, parent_evaluation=evaluation
339+
)
340+
.values_list("attribute", "id")
341+
.iterator()
342+
}
343+
updates = []
344+
# This loop is O(size of all proposals merged) which is ≤ O(sum of size of all proposals).
345+
# Depending on the situation, we may have many suggestions that shares the same derivations because they are suggestions
346+
# for different CVE that ends up affecting the same package set.
347+
# In that scenario, we are reduced to O(max(size of the largest proposal)).
348+
# In the other scenario, where we have suggestions that affects uniformly all of nixpkgs, we are reduced to O(sum of all sizes of all proposals).
349+
# The reality is probably between those two extremes.
350+
for apath in attribute_paths:
351+
current_link = current_links[apath]
352+
# This is a lost derivation.
353+
if apath not in new_derivations:
354+
logger.warning(
355+
"We lost the trace of '%s' following the channel update in '%s', marking that proposal's derivation as outdated."
356+
)
357+
current_link.outdated = True
358+
updates.append(current_link)
359+
# This is a known derivation!
360+
else:
361+
current_link.derivation = new_derivations[apath]
362+
updates.append(current_link)
363+
364+
DerivationClusterProposalLink.objects.bulk_update(
365+
updates, fields=("outdated", "derivation"), batch_size=1_000
366+
)
367+
368+
# TODO: how to handle the activity log here?
369+
# Bulk updates may not trigger anything and we would need special rendering here.
370+
# To inform that the services updated N derivations and could not deal with M derivations (outdated ones).
371+
372+
373+
@pgpubsub.listener(NixEvaluationCompleteChannel)
374+
def run_attribute_tracking_job(evaluation_id: int) -> None:
375+
# Our objective is to update:
376+
# - pending suggestions
377+
# - accepted suggestions
378+
# TODO: in the future, we should not update "mitigated" issues so we can easily revisit _which_ derivation was the last one.
379+
try:
380+
evaluation = NixEvaluation.objects.get(evaluation_id)
381+
except NixEvaluation.DoesNotExist:
382+
logger.warning(
383+
"Evaluation ID '%d' disappeared when we were updating the attributes it induced; this might be normal if we are recovering from a very old state",
384+
evaluation_id,
385+
)
386+
return
387+
388+
# At this point, we have a known full evaluation.
389+
# We would like to select all pending or accepted proposals.
390+
eligible_proposals = list(
391+
CVEDerivationClusterProposal.objects.filter(
392+
status__in=(
393+
CVEDerivationClusterProposal.Status.PENDING,
394+
CVEDerivationClusterProposal.Status.ACCEPTED,
395+
)
396+
)
397+
)
398+
rewire_new_derivations_following_attribute_paths(eligible_proposals, evaluation)
399+
400+
for proposal in eligible_proposals:
401+
# Once a proposal is rewired, we need to recache it.
402+
# TODO(Raito): performance-wise, we could do a more complicated recalculation if we had better guarantees on the schema of derivations inside of it.
403+
# We do not, so we will not until we prove there's a concern about performance.
404+
cache_new_suggestions(proposal)

src/website/shared/models/linkage.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ class DerivationClusterProposalLink(models.Model):
6565

6666
derivation = models.ForeignKey(NixDerivation, on_delete=models.CASCADE)
6767

68+
# Whether this M2M is obsolete with regards to the existence of a younger NixEvaluation containing
69+
# a potentially newer derivation.
70+
outdated = models.BooleanField()
71+
6872
# TODO: how to design the integrity here?
6973
# we probably want to add a fancy check here.
7074
provenance_flags = models.IntegerField()

0 commit comments

Comments
 (0)