Skip to content

Commit e454271

Browse files
committed
Simplify worker argument handling and remove parallel processing
- Change benchmark command to take commit_range as positional argument - Make repo-path optional with --repo-path flag, defaults to temporary clone - Remove parallel processing options (--max-workers, --batch-size, --local-checkout) - Simplify command processing by removing unused parallel execution paths - Clean up imports and remove multiprocessing dependencies - Add trailing newline to runs.py and minor formatting cleanup
1 parent 5a6392d commit e454271

File tree

6 files changed

+51
-429
lines changed

6 files changed

+51
-429
lines changed

backend/app/routers/runs.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,6 @@ async def get_runs(
4141
timestamp=run.timestamp,
4242
)
4343
for run in runs
44-
]
44+
]
45+
46+

worker/src/memory_tracker_worker/args.py

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,13 @@ def parse_args():
6060
help='Run memory benchmarks on CPython commits'
6161
)
6262
benchmark_parser.add_argument(
63-
'repo_path',
64-
nargs='?',
65-
type=Path,
66-
help='Path to CPython repository (optional, will clone if not provided)'
63+
'commit_range',
64+
help='Git commit range to benchmark (e.g., HEAD~5..HEAD, HEAD^, commit_sha)'
6765
)
6866
benchmark_parser.add_argument(
69-
'commit_range',
70-
help='Git commit range to benchmark (e.g., HEAD~5..HEAD)'
67+
'--repo-path', '-r',
68+
type=Path,
69+
help='Path to CPython repository. If not provided, will clone CPython to a temporary directory.'
7170
)
7271
benchmark_parser.add_argument(
7372
'--output-dir', '-o',
@@ -106,18 +105,6 @@ def parse_args():
106105
action='store_true',
107106
help='Force overwrite existing output directories for commits'
108107
)
109-
benchmark_parser.add_argument(
110-
'--max-workers', '-j',
111-
type=int,
112-
default=1,
113-
help='Maximum number of parallel workers. Creates temporary repo copies for each worker to avoid conflicts. (default: 1 for sequential processing)'
114-
)
115-
benchmark_parser.add_argument(
116-
'--batch-size', '-b',
117-
type=int,
118-
default=None,
119-
help='Number of commits to process in each parallel batch. Useful for memory management with large commit ranges. (default: same as max-workers)'
120-
)
121108
benchmark_parser.add_argument(
122109
'--auth-token',
123110
help='Authentication token for uploading results to server. Can also be set via MEMORY_TRACKER_TOKEN environment variable.'
@@ -127,11 +114,6 @@ def parse_args():
127114
default='http://localhost:8000',
128115
help='Base URL for the memory tracker API (default: http://localhost:8000)'
129116
)
130-
benchmark_parser.add_argument(
131-
'--local-checkout',
132-
action='store_true',
133-
help='Use local checkout for building. Runs git clean -fxd, configures once, and runs make for each commit. Incompatible with parallel processing (-j > 1).'
134-
)
135117
benchmark_parser.set_defaults(func=benchmark_command)
136118

137119
return parser.parse_args()

worker/src/memory_tracker_worker/benchmarks/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ def list_environments(server_url: str = "http://localhost:8000") -> list:
199199
raise ValueError(f"Failed to fetch environments: {e}")
200200

201201

202+
202203
def validate_binary_and_environment(binary_id: str, environment_id: str, server_url: str = "http://localhost:8000") -> None:
203204
"""Validate that binary and environment exist on the server before running benchmarks."""
204205
logger.info(f"Validating binary_id: {binary_id} and environment_id: {environment_id}")

worker/src/memory_tracker_worker/commands.py

Lines changed: 23 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import os
44
import sys
55
import tempfile
6-
from multiprocessing import cpu_count
76
from pathlib import Path
87
import git
98

@@ -14,7 +13,7 @@
1413
check_build_environment,
1514
get_commits_to_process
1615
)
17-
from .processing import process_commit, process_commits_in_parallel, process_commits_local_checkout
16+
from .processing import process_commits
1817

1918
logger = logging.getLogger(__name__)
2019

@@ -148,33 +147,16 @@ def benchmark_command(args):
148147
logger.error(f"Pre-flight validation failed: {e}")
149148
sys.exit(1)
150149

150+
# Use the provided commit range directly
151+
commit_range = args.commit_range
152+
151153
# Get commits to process
152154
try:
153-
commits = get_commits_to_process(repo, args.commit_range)
155+
commits = get_commits_to_process(repo, commit_range)
154156
except ValueError as e:
155157
logger.error(f"Failed to get commits: {e}")
156158
sys.exit(1)
157159

158-
# Validate local-checkout compatibility
159-
if hasattr(args, 'local_checkout') and args.local_checkout and args.max_workers > 1:
160-
logger.error("--local-checkout is incompatible with parallel processing (-j > 1)")
161-
sys.exit(1)
162-
163-
# Validate and set defaults for parallel processing arguments
164-
if args.max_workers < 1:
165-
logger.error(f"Invalid max-workers value: {args.max_workers}. Must be >= 1")
166-
sys.exit(1)
167-
168-
if args.batch_size is None:
169-
args.batch_size = args.max_workers
170-
elif args.batch_size < 1:
171-
logger.error(f"Invalid batch-size value: {args.batch_size}. Must be >= 1")
172-
sys.exit(1)
173-
174-
# Warn if using more workers than available CPUs
175-
available_cpus = cpu_count()
176-
if args.max_workers > available_cpus:
177-
logger.warning(f"Using {args.max_workers} workers on a system with {available_cpus} CPUs. This may reduce performance.")
178160

179161
# Get authentication token from CLI or environment variable
180162
auth_token = args.auth_token or os.getenv('MEMORY_TRACKER_TOKEN')
@@ -189,71 +171,30 @@ def benchmark_command(args):
189171
logger.info(f"Output directory: {args.output_dir}")
190172
logger.info(f"Configure flags: {args.configure_flags}")
191173
logger.info(f"Make flags: {args.make_flags}")
192-
logger.info(f"Max workers: {args.max_workers}")
193-
logger.info(f"Batch size: {args.batch_size}")
194-
logger.info(f"Local checkout mode: {getattr(args, 'local_checkout', False)}")
195174
logger.info(f"Number of commits to process: {len(commits)}")
196175
if len(commits) > 0:
197176
logger.info("Commits to process:")
198177
for commit in commits:
199178
logger.info(f" {commit.hexsha[:8]} - {commit.message.splitlines()[0]}")
200179

201-
# Process commits (parallel, sequential, or local checkout mode)
202-
if args.max_workers > 1:
203-
logger.info(f"Using parallel processing with {args.max_workers} workers and batch size {args.batch_size}")
204-
results = process_commits_in_parallel(
205-
commits,
206-
repo_path,
207-
args.output_dir,
208-
args.configure_flags,
209-
args.make_flags,
210-
args.verbose,
211-
args.binary_id,
212-
args.environment_id,
213-
args.force,
214-
args.max_workers,
215-
args.batch_size,
216-
auth_token,
217-
args.api_base
218-
)
219-
errors = [(commit, error) for commit, error in results if error is not None]
220-
elif getattr(args, 'local_checkout', False):
221-
logger.info("Using local checkout mode")
222-
errors = []
223-
error = process_commits_local_checkout(
224-
commits,
225-
repo_path,
226-
args.output_dir,
227-
args.configure_flags,
228-
args.make_flags,
229-
args.verbose,
230-
args.binary_id,
231-
args.environment_id,
232-
args.force,
233-
auth_token,
234-
args.api_base
235-
)
236-
if error:
237-
errors.append((None, error))
238-
else:
239-
logger.info("Using sequential processing")
240-
errors = []
241-
for commit in commits:
242-
error = process_commit(
243-
commit,
244-
repo_path,
245-
args.output_dir,
246-
args.configure_flags,
247-
args.make_flags,
248-
args.verbose,
249-
args.binary_id,
250-
args.environment_id,
251-
args.force,
252-
auth_token,
253-
args.api_base
254-
)
255-
if error:
256-
errors.append((commit, error))
180+
# Process commits using incremental mode (previously local checkout)
181+
logger.info("Processing commits using incremental mode")
182+
errors = []
183+
error = process_commits(
184+
commits,
185+
repo_path,
186+
args.output_dir,
187+
args.configure_flags,
188+
args.make_flags,
189+
args.verbose,
190+
args.binary_id,
191+
args.environment_id,
192+
args.force,
193+
auth_token,
194+
args.api_base
195+
)
196+
if error:
197+
errors.append((None, error))
257198

258199
# Print final status
259200
if errors:

0 commit comments

Comments
 (0)