Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 76 additions & 4 deletions cycode/cli/apps/scan/code_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,15 @@
generate_unique_scan_id,
is_cycodeignore_allowed_by_scan_config,
set_issue_detected_by_scan_results,
should_use_presigned_upload,
)
from cycode.cyclient.models import ZippedFileScanResult
from cycode.logger import get_logger

if TYPE_CHECKING:
from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip
from cycode.cli.printers.console_printer import ConsolePrinter
from cycode.cli.utils.progress_bar import BaseProgressBar
from cycode.cyclient.scan_client import ScanClient

start_scan_time = time.time()
Expand Down Expand Up @@ -106,7 +109,10 @@ def _should_use_sync_flow(command_scan_type: str, scan_type: str, sync_option: b


def _get_scan_documents_thread_func(
ctx: typer.Context, is_git_diff: bool, is_commit_range: bool, scan_parameters: dict
ctx: typer.Context,
is_git_diff: bool,
is_commit_range: bool,
scan_parameters: dict,
) -> Callable[[list[Document]], tuple[str, CliError, LocalScanResult]]:
cycode_client = ctx.obj['client']
scan_type = ctx.obj['scan_type']
Expand Down Expand Up @@ -180,6 +186,36 @@ def _scan_batch_thread_func(batch: list[Document]) -> tuple[str, CliError, Local
return _scan_batch_thread_func


def _run_presigned_upload_scan(
scan_batch_thread_func: Callable,
scan_type: str,
documents_to_scan: list[Document],
progress_bar: 'BaseProgressBar',
printer: 'ConsolePrinter',
) -> tuple:
try:
# Try to zip all documents as a single batch; ZipTooLargeError raised if it exceeds the scan type's limit
zip_documents(scan_type, documents_to_scan)
# It fits: skip batching and upload everything as one ZIP
return run_parallel_batched_scan(
scan_batch_thread_func,
scan_type,
documents_to_scan,
progress_bar=progress_bar,
skip_batching=True,
)
except custom_exceptions.ZipTooLargeError:
printer.print_warning(
'The scan is too large to upload as a single file. This may result in corrupted scan results.'
)
return run_parallel_batched_scan(
scan_batch_thread_func,
scan_type,
documents_to_scan,
progress_bar=progress_bar,
)


def scan_documents(
ctx: typer.Context,
documents_to_scan: list[Document],
Expand All @@ -203,9 +239,15 @@ def scan_documents(
return

scan_batch_thread_func = _get_scan_documents_thread_func(ctx, is_git_diff, is_commit_range, scan_parameters)
errors, local_scan_results = run_parallel_batched_scan(
scan_batch_thread_func, scan_type, documents_to_scan, progress_bar=progress_bar
)

if should_use_presigned_upload(scan_type):
errors, local_scan_results = _run_presigned_upload_scan(
scan_batch_thread_func, scan_type, documents_to_scan, progress_bar, printer
)
else:
errors, local_scan_results = run_parallel_batched_scan(
scan_batch_thread_func, scan_type, documents_to_scan, progress_bar=progress_bar
)

try_set_aggregation_report_url_if_needed(ctx, scan_parameters, ctx.obj['client'], scan_type)

Expand All @@ -217,6 +259,31 @@ def scan_documents(
print_local_scan_results(ctx, local_scan_results, errors)


def _perform_scan_v4_async(
cycode_client: 'ScanClient',
zipped_documents: 'InMemoryZip',
scan_type: str,
scan_parameters: dict,
is_git_diff: bool,
is_commit_range: bool,
) -> ZippedFileScanResult:
upload_link = cycode_client.get_upload_link(scan_type)
logger.debug('Got upload link, %s', {'upload_id': upload_link.upload_id})

cycode_client.upload_to_presigned_post(upload_link.url, upload_link.presigned_post_fields, zipped_documents)
logger.debug('Uploaded zip to presigned URL')

scan_async_result = cycode_client.scan_repository_from_upload_id(
scan_type, upload_link.upload_id, scan_parameters, is_git_diff, is_commit_range
)
logger.debug(
'Presigned upload scan request triggered, %s',
{'scan_id': scan_async_result.scan_id, 'upload_id': upload_link.upload_id},
)

return poll_scan_results(cycode_client, scan_async_result.scan_id, scan_type, scan_parameters)


def _perform_scan_async(
cycode_client: 'ScanClient',
zipped_documents: 'InMemoryZip',
Expand Down Expand Up @@ -262,6 +329,11 @@ def _perform_scan(
# it does not support commit range scans; should_use_sync_flow handles it
return _perform_scan_sync(cycode_client, zipped_documents, scan_type, scan_parameters, is_git_diff)

if should_use_presigned_upload(scan_type):
return _perform_scan_v4_async(
cycode_client, zipped_documents, scan_type, scan_parameters, is_git_diff, is_commit_range
)

return _perform_scan_async(cycode_client, zipped_documents, scan_type, scan_parameters, is_commit_range)


Expand Down
59 changes: 51 additions & 8 deletions cycode/cli/apps/scan/commit_range_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
generate_unique_scan_id,
is_cycodeignore_allowed_by_scan_config,
set_issue_detected_by_scan_results,
should_use_presigned_upload,
)
from cycode.cyclient.models import ZippedFileScanResult
from cycode.logger import get_logger
Expand Down Expand Up @@ -86,6 +87,38 @@ def _perform_commit_range_scan_async(
return poll_scan_results(cycode_client, scan_async_result.scan_id, scan_type, scan_parameters, timeout)


def _perform_commit_range_scan_v4_async(
cycode_client: 'ScanClient',
from_commit_zipped_documents: 'InMemoryZip',
to_commit_zipped_documents: 'InMemoryZip',
scan_type: str,
scan_parameters: dict,
timeout: Optional[int] = None,
) -> ZippedFileScanResult:
from_upload_link = cycode_client.get_upload_link(scan_type)
logger.debug('Got from-commit upload link, %s', {'upload_id': from_upload_link.upload_id})

cycode_client.upload_to_presigned_post(
from_upload_link.url, from_upload_link.presigned_post_fields, from_commit_zipped_documents
)
logger.debug('Uploaded from-commit zip')

to_upload_link = cycode_client.get_upload_link(scan_type)
logger.debug('Got to-commit upload link, %s', {'upload_id': to_upload_link.upload_id})

cycode_client.upload_to_presigned_post(
to_upload_link.url, to_upload_link.presigned_post_fields, to_commit_zipped_documents
)
logger.debug('Uploaded to-commit zip')

scan_async_result = cycode_client.commit_range_scan_from_upload_ids(
scan_type, from_upload_link.upload_id, to_upload_link.upload_id, scan_parameters
)
logger.debug('V4 commit range scan request triggered, %s', {'scan_id': scan_async_result.scan_id})

return poll_scan_results(cycode_client, scan_async_result.scan_id, scan_type, scan_parameters, timeout)


def _scan_commit_range_documents(
ctx: typer.Context,
from_documents_to_scan: list[Document],
Expand Down Expand Up @@ -118,14 +151,24 @@ def _scan_commit_range_documents(
# for SAST it is files with diff between from_commit and to_commit
to_commit_zipped_documents = zip_documents(scan_type, to_documents_to_scan)

scan_result = _perform_commit_range_scan_async(
cycode_client,
from_commit_zipped_documents,
to_commit_zipped_documents,
scan_type,
scan_parameters,
timeout,
)
if should_use_presigned_upload(scan_type):
scan_result = _perform_commit_range_scan_v4_async(
cycode_client,
from_commit_zipped_documents,
to_commit_zipped_documents,
scan_type,
scan_parameters,
timeout,
)
else:
scan_result = _perform_commit_range_scan_async(
cycode_client,
from_commit_zipped_documents,
to_commit_zipped_documents,
scan_type,
scan_parameters,
timeout,
)
enrich_scan_result_with_data_from_detection_rules(cycode_client, scan_result)

progress_bar.update(ScanProgressBarSection.SCAN)
Expand Down
7 changes: 5 additions & 2 deletions cycode/cli/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,15 +192,18 @@
# 5MB in bytes (in decimal)
FILE_MAX_SIZE_LIMIT_IN_BYTES = 5000000

PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 5 * 1024 * 1024 * 1024 # 5 GB (S3 presigned POST limit)
PRESIGNED_UPLOAD_SCAN_TYPES = {SAST_SCAN_TYPE}

DEFAULT_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 20 * 1024 * 1024
ZIP_MAX_SIZE_LIMIT_IN_BYTES = {
SCA_SCAN_TYPE: 200 * 1024 * 1024,
SAST_SCAN_TYPE: 50 * 1024 * 1024,
SAST_SCAN_TYPE: PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES,
}

# scan in batches
DEFAULT_SCAN_BATCH_MAX_SIZE_IN_BYTES = 9 * 1024 * 1024
SCAN_BATCH_MAX_SIZE_IN_BYTES = {SAST_SCAN_TYPE: 50 * 1024 * 1024}
SCAN_BATCH_MAX_SIZE_IN_BYTES = {SAST_SCAN_TYPE: PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES}
SCAN_BATCH_MAX_SIZE_IN_BYTES_ENV_VAR_NAME = 'SCAN_BATCH_MAX_SIZE_IN_BYTES'

DEFAULT_SCAN_BATCH_MAX_FILES_COUNT = 1000
Expand Down
6 changes: 5 additions & 1 deletion cycode/cli/files_collector/zip_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ def _validate_zip_file_size(scan_type: str, zip_file_size: int) -> None:
raise custom_exceptions.ZipTooLargeError(max_size_limit)


def zip_documents(scan_type: str, documents: list[Document], zip_file: Optional[InMemoryZip] = None) -> InMemoryZip:
def zip_documents(
scan_type: str,
documents: list[Document],
zip_file: Optional[InMemoryZip] = None,
) -> InMemoryZip:
if zip_file is None:
zip_file = InMemoryZip()

Expand Down
6 changes: 5 additions & 1 deletion cycode/cli/utils/scan_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,13 @@ def run_parallel_batched_scan(
scan_type: str,
documents: list[Document],
progress_bar: 'BaseProgressBar',
skip_batching: bool = False,
) -> tuple[dict[str, 'CliError'], list['LocalScanResult']]:
# batching is disabled for SCA; requested by Mor
batches = [documents] if scan_type == consts.SCA_SCAN_TYPE else split_documents_into_batches(scan_type, documents)
if scan_type == consts.SCA_SCAN_TYPE or skip_batching:
batches = [documents]
else:
batches = split_documents_into_batches(scan_type, documents)

progress_bar.set_section_length(ScanProgressBarSection.SCAN, len(batches)) # * 3
# TODO(MarshalX): we should multiply the count of batches in SCAN section because each batch has 3 steps:
Expand Down
5 changes: 5 additions & 0 deletions cycode/cli/utils/scan_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import typer

from cycode.cli import consts
from cycode.cli.cli_types import SeverityOption

if TYPE_CHECKING:
Expand All @@ -31,6 +32,10 @@ def is_cycodeignore_allowed_by_scan_config(ctx: typer.Context) -> bool:
return scan_config.is_cycode_ignore_allowed if scan_config else True


def should_use_presigned_upload(scan_type: str) -> bool:
return scan_type in consts.PRESIGNED_UPLOAD_SCAN_TYPES


def generate_unique_scan_id() -> UUID:
if 'PYTEST_TEST_UNIQUE_ID' in os.environ:
return UUID(os.environ['PYTEST_TEST_UNIQUE_ID'])
Expand Down
20 changes: 20 additions & 0 deletions cycode/cyclient/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,26 @@ def build_dto(self, data: dict[str, Any], **_) -> 'ScanResult':
return ScanResult(**data)


@dataclass
class UploadLinkResponse:
upload_id: str
url: str
presigned_post_fields: dict[str, str]


class UploadLinkResponseSchema(Schema):
class Meta:
unknown = EXCLUDE

upload_id = fields.String()
url = fields.String()
presigned_post_fields = fields.Dict(keys=fields.String(), values=fields.String())

@post_load
def build_dto(self, data: dict[str, Any], **_) -> 'UploadLinkResponse':
return UploadLinkResponse(**data)


class ScanInitializationResponse(Schema):
def __init__(self, scan_id: Optional[str] = None, err: Optional[str] = None) -> None:
super().__init__()
Expand Down
61 changes: 61 additions & 0 deletions cycode/cyclient/scan_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import TYPE_CHECKING, Optional, Union
from uuid import UUID

import requests
from requests import Response

from cycode.cli import consts
Expand All @@ -25,6 +26,7 @@ def __init__(
self.scan_config = scan_config

self._SCAN_SERVICE_CLI_CONTROLLER_PATH = 'api/v1/cli-scan'
self._SCAN_SERVICE_V4_CLI_CONTROLLER_PATH = 'api/v4/scans/cli'
self._DETECTIONS_SERVICE_CLI_CONTROLLER_PATH = 'api/v1/detections/cli'
self._POLICIES_SERVICE_CONTROLLER_PATH_V3 = 'api/v3/policies'

Expand Down Expand Up @@ -56,6 +58,10 @@ def get_scan_aggregation_report_url(self, aggregation_id: str, scan_type: str) -
)
return models.ScanReportUrlResponseSchema().build_dto(response.json())

def get_scan_service_v4_url_path(self, scan_type: str) -> str:
service_path = self.scan_config.get_service_name(scan_type)
return f'{service_path}/{self._SCAN_SERVICE_V4_CLI_CONTROLLER_PATH}'

def get_zipped_file_scan_async_url_path(self, scan_type: str, should_use_sync_flow: bool = False) -> str:
async_scan_type = self.scan_config.get_async_scan_type(scan_type)
async_entity_type = self.scan_config.get_async_entity_type(scan_type)
Expand Down Expand Up @@ -123,6 +129,40 @@ def zipped_file_scan_async(
)
return models.ScanInitializationResponseSchema().load(response.json())

def get_upload_link(self, scan_type: str) -> models.UploadLinkResponse:
async_scan_type = self.scan_config.get_async_scan_type(scan_type)
url_path = f'{self.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/upload-link'
response = self.scan_cycode_client.get(url_path=url_path, hide_response_content_log=self._hide_response_log)
return models.UploadLinkResponseSchema().load(response.json())

def upload_to_presigned_post(self, url: str, fields: dict[str, str], zip_file: 'InMemoryZip') -> None:
multipart = {key: (None, value) for key, value in fields.items()}
multipart['file'] = (None, zip_file.read())
# We are not using Cycode client, as we are calling aws S3.
response = requests.post(url, files=multipart, timeout=self.scan_cycode_client.timeout)
response.raise_for_status()

def scan_repository_from_upload_id(
self,
scan_type: str,
upload_id: str,
scan_parameters: dict,
is_git_diff: bool = False,
is_commit_range: bool = False,
) -> models.ScanInitializationResponse:
async_scan_type = self.scan_config.get_async_scan_type(scan_type)
url_path = f'{self.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/repository'
response = self.scan_cycode_client.post(
url_path=url_path,
body={
'upload_id': upload_id,
'is_git_diff': is_git_diff,
'is_commit_range': is_commit_range,
'scan_parameters': json.dumps(scan_parameters),
},
)
return models.ScanInitializationResponseSchema().load(response.json())

def commit_range_scan_async(
self,
from_commit_zip_file: InMemoryZip,
Expand Down Expand Up @@ -161,6 +201,27 @@ def commit_range_scan_async(
)
return models.ScanInitializationResponseSchema().load(response.json())

def commit_range_scan_from_upload_ids(
self,
scan_type: str,
from_commit_upload_id: str,
to_commit_upload_id: str,
scan_parameters: dict,
is_git_diff: bool = False,
) -> models.ScanInitializationResponse:
async_scan_type = self.scan_config.get_async_scan_type(scan_type)
url_path = f'{self.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/commit-range'
response = self.scan_cycode_client.post(
url_path=url_path,
body={
'from_commit_upload_id': from_commit_upload_id,
'to_commit_upload_id': to_commit_upload_id,
'is_git_diff': is_git_diff,
'scan_parameters': json.dumps(scan_parameters),
},
)
return models.ScanInitializationResponseSchema().load(response.json())

def get_scan_details_path(self, scan_type: str, scan_id: str) -> str:
return f'{self.get_scan_service_url_path(scan_type)}/{scan_id}'

Expand Down