-
-
Notifications
You must be signed in to change notification settings - Fork 285
Add support for Reference Fix Commits improver #2163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
ca2fc64
374de5e
14d5b4f
5f06678
b08c66c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| # | ||
| # Copyright (c) nexB Inc. and others. All rights reserved. | ||
| # VulnerableCode is a trademark of nexB Inc. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
| # See https://github.com/aboutcode-org/vulnerablecode for support or download. | ||
| # See https://aboutcode.org for more information about nexB OSS projects. | ||
| # | ||
|
|
||
| from aboutcode.pipeline import LoopProgress | ||
| from packageurl.contrib.purl2url import purl2url | ||
| from packageurl.contrib.url2purl import url2purl | ||
|
|
||
| from aboutcode.federated import get_core_purl | ||
| from vulnerabilities.models import AdvisoryV2 | ||
| from vulnerabilities.models import PackageCommitPatch | ||
| from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 | ||
| from vulnerabilities.pipes.advisory import VCS_URLS_SUPPORTED_TYPES | ||
| from vulnerabilities.utils import is_commit | ||
|
|
||
|
|
||
| class CollectReferencesFixCommitsPipeline(VulnerableCodeBaseImporterPipelineV2): | ||
| """ | ||
| Improver pipeline to scout References/Patch and create PackageCommitPatch entries. | ||
| """ | ||
|
|
||
| pipeline_id = "collect_ref_fix_commits_v2" | ||
|
|
||
| @classmethod | ||
| def steps(cls): | ||
| return (cls.collect_and_store_fix_commits,) | ||
|
|
||
| def get_vcs_commit(self, url): | ||
| """Extracts and VCS URL and commit hash from URL. | ||
| >> get_vcs_commit('https://github.com/aboutcode-org/vulnerablecode/commit/98e516011d6e096e25247b82fc5f196bbeecff10') | ||
| ('https://github.com/aboutcode-org/vulnerablecode', '98e516011d6e096e25247b82fc5f196bbeecff10') | ||
| >> get_vcs_commit('https://github.com/aboutcode-org/vulnerablecode/pull/1974') | ||
| None | ||
| """ | ||
| purl = url2purl(url) | ||
| if not purl or purl.type not in VCS_URLS_SUPPORTED_TYPES: | ||
| return None | ||
|
|
||
| version = getattr(purl, "version", None) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why getattr, purl.version should always be a valid call ? |
||
| if not version or not is_commit(version): | ||
| return None | ||
|
|
||
| vcs_url = purl2url(get_core_purl(purl).to_string()) | ||
| return (vcs_url, version) if vcs_url else None | ||
|
|
||
| def collect_and_store_fix_commits(self): | ||
| impacted_packages_advisories = ( | ||
| AdvisoryV2.objects.filter(impacted_packages__isnull=False) | ||
| .prefetch_related("references", "patches", "impacted_packages") | ||
| .distinct() | ||
| ) | ||
|
|
||
| progress = LoopProgress( | ||
| total_iterations=impacted_packages_advisories.count(), logger=self.log | ||
| ) | ||
| for adv in progress.iter(impacted_packages_advisories.paginated(per_page=500)): | ||
| urls = {r.url for r in adv.references.all()} | {p.patch_url for p in adv.patches.all()} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. refs = adv.references.values_list("url", flat=True) Fetch only needed columns ? |
||
| impacted_packages = list(adv.impacted_packages.all()) | ||
|
|
||
| for url in urls: | ||
| vcs_data = self.get_vcs_commit(url) | ||
| if not vcs_data: | ||
| continue | ||
|
|
||
| vcs_url, commit_hash = vcs_data | ||
| package_commit_obj, _ = PackageCommitPatch.objects.get_or_create( | ||
| vcs_url=vcs_url, commit_hash=commit_hash | ||
| ) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we handle this at bulk level, and avoid get or create and addition in a loop ? |
||
| package_commit_obj.fixed_in_impacts.add(*impacted_packages) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,82 @@ | ||
| # | ||
| # Copyright (c) nexB Inc. and others. All rights reserved. | ||
| # VulnerableCode is a trademark of nexB Inc. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
| # See https://github.com/aboutcode-org/vulnerablecode for support or download. | ||
| # See https://aboutcode.org for more information about nexB OSS projects. | ||
|
|
||
| from datetime import datetime | ||
|
|
||
| import pytest | ||
|
|
||
| from vulnerabilities.models import AdvisoryReference | ||
| from vulnerabilities.models import AdvisoryV2 | ||
| from vulnerabilities.models import ImpactedPackage | ||
| from vulnerabilities.models import PackageCommitPatch | ||
| from vulnerabilities.models import PackageV2 | ||
| from vulnerabilities.pipelines.v2_improvers.reference_collect_commits import ( | ||
| CollectReferencesFixCommitsPipeline, | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.django_db | ||
| def test_collect_fix_commits_pipeline_creates_entry(): | ||
| advisory = AdvisoryV2.objects.create( | ||
| advisory_id="CVE-2025-1000", | ||
| datasource_id="test-ds", | ||
| avid="test-ds/CVE-2025-1000", | ||
| url="https://example.com/advisory/CVE-2025-1000", | ||
| unique_content_id="11111", | ||
| date_collected=datetime.now(), | ||
| ) | ||
| package = PackageV2.objects.create( | ||
| type="foo", | ||
| name="testpkg", | ||
| version="1.0", | ||
| ) | ||
| reference = AdvisoryReference.objects.create( | ||
| url="https://github.com/test/testpkg/commit/6bd301819f8f69331a55ae2336c8b111fc933f3d" | ||
| ) | ||
| impact = ImpactedPackage.objects.create(advisory=advisory) | ||
| impact.affecting_packages.add(package) | ||
| advisory.references.add(reference) | ||
|
|
||
| pipeline = CollectReferencesFixCommitsPipeline() | ||
| pipeline.collect_and_store_fix_commits() | ||
|
|
||
| package_commit_patch = PackageCommitPatch.objects.all() | ||
|
|
||
| assert package_commit_patch.count() == 1 | ||
| fix = package_commit_patch.first() | ||
| assert fix.commit_hash == "6bd301819f8f69331a55ae2336c8b111fc933f3d" | ||
| assert fix.vcs_url == "https://github.com/test/testpkg" | ||
| assert impact.fixed_by_package_commit_patches.count() == 1 | ||
|
|
||
|
|
||
| @pytest.mark.django_db | ||
| def test_collect_fix_commits_pipeline_skips_non_commit_urls(): | ||
| advisory = AdvisoryV2.objects.create( | ||
| advisory_id="CVE-2025-2000", | ||
| datasource_id="test-ds", | ||
| avid="test-ds/CVE-2025-2000", | ||
| url="https://example.com/advisory/CVE-2025-2000", | ||
| unique_content_id="11111", | ||
| date_collected=datetime.now(), | ||
| ) | ||
| package = PackageV2.objects.create( | ||
| type="pypi", | ||
| name="otherpkg", | ||
| version="2.0", | ||
| ) | ||
| impact = ImpactedPackage.objects.create(advisory=advisory) | ||
| impact.affecting_packages.add(package) | ||
|
|
||
| reference = AdvisoryReference.objects.create( | ||
| url="https://github.com/test/testpkg/issues/12" | ||
| ) # invalid reference 1 | ||
| advisory.references.add(reference) | ||
|
|
||
| pipeline = CollectReferencesFixCommitsPipeline() | ||
| pipeline.collect_and_store_fix_commits() | ||
| assert PackageCommitPatch.objects.count() == 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the reason to do
purl.type not in VCS_URLS_SUPPORTED_TYPES:When we are already doing url2purl(purl) to ensure we get a package. Also put it in try except block with a logger.