From a85f62e375f577a8e8b0ed3276995ba1263c644e Mon Sep 17 00:00:00 2001 From: Molly He Date: Fri, 10 Apr 2026 16:24:25 -0700 Subject: [PATCH 1/2] Add mlflowconfig to eval --- .../sagemaker/train/evaluate/pipeline_templates.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sagemaker-train/src/sagemaker/train/evaluate/pipeline_templates.py b/sagemaker-train/src/sagemaker/train/evaluate/pipeline_templates.py index bc46a23a3a..aa457ac0a1 100644 --- a/sagemaker-train/src/sagemaker/train/evaluate/pipeline_templates.py +++ b/sagemaker-train/src/sagemaker/train/evaluate/pipeline_templates.py @@ -326,6 +326,10 @@ LLMAJ_TEMPLATE_BASE_MODEL_ONLY = """{ "Version": "2020-12-01", "Metadata": {}, + "MlflowConfig": { + "MlflowResourceArn": "{{ mlflow_resource_arn }}"{% if mlflow_experiment_name %}, + "MlflowExperimentName": "{{ mlflow_experiment_name }}"{% endif %} + }, "Parameters": [], "Steps": [ { @@ -457,6 +461,10 @@ DETERMINISTIC_TEMPLATE_BASE_MODEL_ONLY = """{ "Version": "2020-12-01", "Metadata": {}, + "MlflowConfig": { + "MlflowResourceArn": "{{ mlflow_resource_arn }}"{% if mlflow_experiment_name %}, + "MlflowExperimentName": "{{ mlflow_experiment_name }}"{% endif %} + }, "Parameters": [], "Steps": [ { @@ -843,6 +851,10 @@ CUSTOM_SCORER_TEMPLATE_BASE_MODEL_ONLY = """{ "Version": "2020-12-01", "Metadata": {}, + "MlflowConfig": { + "MlflowResourceArn": "{{ mlflow_resource_arn }}"{% if mlflow_experiment_name %}, + "MlflowExperimentName": "{{ mlflow_experiment_name }}"{% endif %} + }, "Parameters": [], "Steps": [ { From 65d1b419a725b30fb1c4201c4e4008176d5d4ae1 Mon Sep 17 00:00:00 2001 From: Molly He Date: Fri, 10 Apr 2026 17:03:03 -0700 Subject: [PATCH 2/2] Update unit and integ test --- .../tests/integ/train/test_benchmark_evaluator.py | 2 +- .../train/evaluate/test_pipeline_templates.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py index b39de3e5e6..454fdd67c7 100644 --- a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py @@ -307,7 +307,7 @@ def test_benchmark_evaluation_base_model_only(self): benchmark=Benchmark.MMLU, model=BASE_MODEL_ONLY_CONFIG["base_model_id"], s3_output_path=BASE_MODEL_ONLY_CONFIG["s3_output_path"], - # mlflow_resource_arn=BASE_MODEL_ONLY_CONFIG["mlflow_tracking_server_arn"], + mlflow_resource_arn=BASE_MODEL_ONLY_CONFIG["mlflow_tracking_server_arn"], base_eval_name="integ-test-base-model-only", # Note: model_package_group not needed for JumpStart models ) diff --git a/sagemaker-train/tests/unit/train/evaluate/test_pipeline_templates.py b/sagemaker-train/tests/unit/train/evaluate/test_pipeline_templates.py index 36092fbeb3..99a796623c 100644 --- a/sagemaker-train/tests/unit/train/evaluate/test_pipeline_templates.py +++ b/sagemaker-train/tests/unit/train/evaluate/test_pipeline_templates.py @@ -317,8 +317,9 @@ def test_deterministic_base_model_only_with_all_params(self): base_model_step = pipeline_def["Steps"][0] - # Verify MLflow config is not present in BASE_MODEL_ONLY template - assert "MlflowConfig" not in pipeline_def + # Verify MLflow config is present in BASE_MODEL_ONLY template + assert "MlflowConfig" in pipeline_def + assert pipeline_def["MlflowConfig"]["MlflowResourceArn"] == BASE_CONTEXT["mlflow_resource_arn"] # Verify KMS key assert base_model_step["Arguments"]["OutputDataConfig"]["KmsKeyId"] == context["kms_key_id"] @@ -403,8 +404,9 @@ def test_custom_scorer_base_model_only_minimal(self): pipeline_def = json.loads(rendered) - # Verify MLflow config is not present in BASE_MODEL_ONLY template - assert "MlflowConfig" not in pipeline_def + # Verify MLflow config is present in BASE_MODEL_ONLY template + assert "MlflowConfig" in pipeline_def + assert pipeline_def["MlflowConfig"]["MlflowResourceArn"] == BASE_CONTEXT["mlflow_resource_arn"] # Should have only 1 step assert len(pipeline_def["Steps"]) == 1 @@ -574,8 +576,9 @@ def test_llmaj_base_model_only_minimal(self): pipeline_def = json.loads(rendered) - # Verify MLflow config is not present in BASE_MODEL_ONLY template - assert "MlflowConfig" not in pipeline_def + # Verify MLflow config is present in BASE_MODEL_ONLY template + assert "MlflowConfig" in pipeline_def + assert pipeline_def["MlflowConfig"]["MlflowResourceArn"] == BASE_CONTEXT["mlflow_resource_arn"] # Should have 2 steps: EvaluateBaseInferenceModel and EvaluateBaseModelMetrics assert len(pipeline_def["Steps"]) == 2