|
13 | 13 | from django.conf import settings |
14 | 14 | from django.db.models import Avg |
15 | 15 |
|
16 | | -from shared.channels import NixEvaluationChannel |
| 16 | +from shared.channels import NixEvaluationChannel, NixEvaluationCompleteChannel |
17 | 17 | from shared.evaluation import ( |
18 | 18 | SyncBatchAttributeIngester, |
19 | 19 | parse_evaluation_result, |
20 | 20 | ) |
21 | 21 | from shared.git import GitRepo |
| 22 | +from shared.listeners.cache_suggestions import cache_new_suggestions |
22 | 23 | from shared.models import NixDerivation, NixEvaluation |
| 24 | +from shared.models.linkage import ( |
| 25 | + CVEDerivationClusterProposal, |
| 26 | + DerivationClusterProposalLink, |
| 27 | +) |
23 | 28 |
|
24 | 29 | logger = logging.getLogger(__name__) |
25 | 30 |
|
@@ -240,6 +245,14 @@ async def evaluation_entrypoint( |
240 | 245 | state=NixEvaluation.EvaluationState.COMPLETED, |
241 | 246 | elapsed=elapsed, |
242 | 247 | ) |
| 248 | + |
| 249 | + # Notify that we have a new evaluation ready and |
| 250 | + # any listeners should now proceed to an global update of old derivations |
| 251 | + # via attribute path. |
| 252 | + pgpubsub.notify( |
| 253 | + "shared.channels.NixEvaluationCompleteChannel", |
| 254 | + model_id=evaluation.pk, |
| 255 | + ) |
243 | 256 | except Exception as e: |
244 | 257 | elapsed = time.time() - start |
245 | 258 | logger.exception( |
@@ -279,3 +292,113 @@ def run_evaluation_job(old: NixEvaluation, new: NixEvaluation) -> None: |
279 | 292 | new, |
280 | 293 | ) |
281 | 294 | ) |
| 295 | + |
| 296 | + |
| 297 | +def rewire_new_derivations_following_attribute_paths( |
| 298 | + proposals: list[CVEDerivationClusterProposal], evaluation: NixEvaluation |
| 299 | +) -> None: |
| 300 | + """ |
| 301 | + This takes a list of proposals which have derivations from an older Nix channel attached to it. |
| 302 | + The passed evaluation is supposed to be the result of a newer Nix channel evaluation. |
| 303 | +
|
| 304 | + The game is to update all the M2M links from that proposal to the newer derivations. |
| 305 | + How to do this? Attribute paths. |
| 306 | +
|
| 307 | + Attribute paths are supposed to be constant and can relate from a channel to another. |
| 308 | + This is what Hydra does to track what happens to a package build time history, etc. |
| 309 | +
|
| 310 | + This falls short whenever we will rename a package, move it to another attribute path, e.g. promoting a GNOME variant |
| 311 | + to the top-level space of packages. |
| 312 | +
|
| 313 | + In those situations, this is not a big deal, this should not change intrinsically that we made the suggestion |
| 314 | + for that GNOME variant based on intrinsic parameters of said package and therefore, we will lose its trace in |
| 315 | + this new evaluation and will just "lose" it from this suggestion. |
| 316 | +
|
| 317 | + Once we obtain the ability to upgrade existing suggestions, we may re-attach it. |
| 318 | + Nonetheless, we should return the list of lost derivations by rewiring with the new evaluation. |
| 319 | + """ |
| 320 | + |
| 321 | + # Ideally, we would settle this in a single UPDATE statement which looks like this: |
| 322 | + # UPDATE SET derivation_id = nnd.id FROM derivation_cluster_proposal_link JOIN nixderivations AS ond ON ond.id = derivation_id JOIN nixderivations AS nnd ON nnd.attribute_path = ond.attribute_path WHERE proposal_id IN eligible_proposals AND nnd.parent_evaluation_id = new_evaluation_id; |
| 323 | + # But this is a complicated one and it does not handle the situation where the right JOIN has missing items, i.e. lost derivations. |
| 324 | + # Let's do slowly and we will see the impact in production. |
| 325 | + # In general, we do not expect that size of proposal (:= nr of derivations in proposal) to be large and number of proposals should stay low. |
| 326 | + # If we end up do one query for _all_ proposals, we are therefore looking at O(size of all proposals merged) in terms of query complexity. |
| 327 | + |
| 328 | + current_links = { |
| 329 | + link.derivation.attribute: link |
| 330 | + for link in DerivationClusterProposalLink.objects.select_related("derivation") |
| 331 | + .filter(proposal__in=proposals) |
| 332 | + .iterator() |
| 333 | + } |
| 334 | + attribute_paths = list(current_links.keys()) |
| 335 | + new_derivations = { |
| 336 | + d.attribute: d |
| 337 | + for d in NixDerivation.objects.filter( |
| 338 | + attribute__in=attribute_paths, parent_evaluation=evaluation |
| 339 | + ) |
| 340 | + .values_list("attribute", "id") |
| 341 | + .iterator() |
| 342 | + } |
| 343 | + updates = [] |
| 344 | + # This loop is O(size of all proposals merged) which is ≤ O(sum of size of all proposals). |
| 345 | + # Depending on the situation, we may have many suggestions that shares the same derivations because they are suggestions |
| 346 | + # for different CVE that ends up affecting the same package set. |
| 347 | + # In that scenario, we are reduced to O(max(size of the largest proposal)). |
| 348 | + # In the other scenario, where we have suggestions that affects uniformly all of nixpkgs, we are reduced to O(sum of all sizes of all proposals). |
| 349 | + # The reality is probably between those two extremes. |
| 350 | + for apath in attribute_paths: |
| 351 | + current_link = current_links[apath] |
| 352 | + # This is a lost derivation. |
| 353 | + if apath not in new_derivations: |
| 354 | + logger.warning( |
| 355 | + "We lost the trace of '%s' following the channel update in '%s', marking that proposal's derivation as outdated." |
| 356 | + ) |
| 357 | + current_link.outdated = True |
| 358 | + updates.append(current_link) |
| 359 | + # This is a known derivation! |
| 360 | + else: |
| 361 | + current_link.derivation = new_derivations[apath] |
| 362 | + updates.append(current_link) |
| 363 | + |
| 364 | + DerivationClusterProposalLink.objects.bulk_update( |
| 365 | + updates, fields=("outdated", "derivation"), batch_size=1_000 |
| 366 | + ) |
| 367 | + |
| 368 | + # TODO: how to handle the activity log here? |
| 369 | + # Bulk updates may not trigger anything and we would need special rendering here. |
| 370 | + # To inform that the services updated N derivations and could not deal with M derivations (outdated ones). |
| 371 | + |
| 372 | + |
| 373 | +@pgpubsub.listener(NixEvaluationCompleteChannel) |
| 374 | +def run_attribute_tracking_job(evaluation_id: int) -> None: |
| 375 | + # Our objective is to update: |
| 376 | + # - pending suggestions |
| 377 | + # - accepted suggestions |
| 378 | + # TODO: in the future, we should not update "mitigated" issues so we can easily revisit _which_ derivation was the last one. |
| 379 | + try: |
| 380 | + evaluation = NixEvaluation.objects.get(evaluation_id) |
| 381 | + except NixEvaluation.DoesNotExist: |
| 382 | + logger.warning( |
| 383 | + "Evaluation ID '%d' disappeared when we were updating the attributes it induced; this might be normal if we are recovering from a very old state", |
| 384 | + evaluation_id, |
| 385 | + ) |
| 386 | + return |
| 387 | + |
| 388 | + # At this point, we have a known full evaluation. |
| 389 | + # We would like to select all pending or accepted proposals. |
| 390 | + eligible_proposals = list( |
| 391 | + CVEDerivationClusterProposal.objects.filter( |
| 392 | + status__in=( |
| 393 | + CVEDerivationClusterProposal.Status.PENDING, |
| 394 | + CVEDerivationClusterProposal.Status.ACCEPTED, |
| 395 | + ) |
| 396 | + ) |
| 397 | + ) |
| 398 | + rewire_new_derivations_following_attribute_paths(eligible_proposals, evaluation) |
| 399 | + |
| 400 | + for proposal in eligible_proposals: |
| 401 | + # Once a proposal is rewired, we need to recache it. |
| 402 | + # TODO(Raito): performance-wise, we could do a more complicated recalculation if we had better guarantees on the schema of derivations inside of it. |
| 403 | + # We do not, so we will not until we prove there's a concern about performance. |
| 404 | + cache_new_suggestions(proposal) |
0 commit comments