From d8ae816a32e1a8cb931c8048ae83a06bb0d997a4 Mon Sep 17 00:00:00 2001 From: Chris Butler Date: Mon, 9 Mar 2026 10:05:51 +0900 Subject: [PATCH 1/4] feat: add PCR measurement and attestation infrastructure Add PCR measurement extraction script, update initdata gzipper and default toml template for attestation, and configure trustee and secret template values for PCR stash support. Co-authored-by: Beraldo Leal Co-Authored-By: Claude Opus 4.6 (1M context) --- ansible/init-data-gzipper.yaml | 39 +++++++--- ansible/initdata-default.toml.tpl | 39 +++++++--- overrides/values-trustee.yaml | 7 +- scripts/get-pcr.sh | 122 ++++++++++++++++++++++++++++++ values-secret.yaml.template | 95 +++++++++++++++++++---- 5 files changed, 265 insertions(+), 37 deletions(-) create mode 100755 scripts/get-pcr.sh diff --git a/ansible/init-data-gzipper.yaml b/ansible/init-data-gzipper.yaml index 20459b84..c5a161ca 100644 --- a/ansible/init-data-gzipper.yaml +++ b/ansible/init-data-gzipper.yaml @@ -1,4 +1,4 @@ -- name: Gzip initdata +- name: Gzip initdata and register init data become: false connection: local hosts: localhost @@ -7,6 +7,7 @@ kubeconfig: "{{ lookup('env', 'KUBECONFIG') }}" cluster_platform: "{{ global.clusterPlatform | default('none') | lower }}" hub_domain: "{{ global.hubClusterDomain | default('none') | lower}}" + security_policy_flavour: "{{ global.coco.securityPolicyFlavour | default('insecure') }}" template_src: "initdata-default.toml.tpl" tasks: - name: Create temporary working directory @@ -36,7 +37,6 @@ - name: Define temp file paths ansible.builtin.set_fact: rendered_path: "{{ tmpdir.path }}/rendered.toml" - gz_path: "{{ tmpdir.path }}/rendered.toml.gz" - name: Render template to temp file ansible.builtin.template: @@ -45,15 +45,33 @@ mode: "0600" - - name: Gzip the rendered content + - name: Gzip and base64 encode the rendered content ansible.builtin.shell: | - gzip -c "{{ rendered_path }}" > "{{ gz_path }}" - changed_when: true + set -o pipefail + cat "{{ rendered_path }}" | gzip | base64 -w0 + register: initdata_encoded + changed_when: false + + # This block runs a shell script that calculates a hash value (PCR8_HASH) derived from the contents of 'initdata.toml'. + # The script performs the following steps: + # 1. hash=$(sha256sum initdata.toml | cut -d' ' -f1): Computes the sha256 hash of 'initdata.toml' and assigns it to $hash. + # 2. initial_pcr=0000000000000000000000000000000000000000000000000000000000000000: Initializes a string of zeros as the initial PCR value. + # 3. PCR8_HASH=$(echo -n "$initial_pcr$hash" | xxd -r -p | sha256sum | cut -d' ' -f1): + # Concatenates initial_pcr and $hash, converts from hex to binary, + # computes its sha256 hash, and stores the result as PCR8_HASH. + # 4. echo $PCR8_HASH: Outputs the PCR hash value. + # The important part: The 'register: pcr8_hash' registers the **stdout of the command**, + # which is the value output by 'echo $PCR8_HASH', as 'pcr8_hash.stdout' in Ansible. + # It does NOT register an environment variable, but rather the value actually printed by 'echo'. + - name: Register init data pcr into a var + ansible.builtin.shell: | + set -o pipefail + hash=$(sha256sum "{{ rendered_path }}" | cut -d' ' -f1) + initial_pcr=0000000000000000000000000000000000000000000000000000000000000000 + PCR8_HASH=$(echo -n "$initial_pcr$hash" | xxd -r -p | sha256sum | cut -d' ' -f1) && echo $PCR8_HASH + register: pcr8_hash + changed_when: false - - name: Read gzip as base64 - ansible.builtin.slurp: - path: "{{ gz_path }}" - register: gz_slurped - name: Create/update ConfigMap with gzipped+base64 content kubernetes.core.k8s: @@ -66,4 +84,5 @@ name: "initdata" namespace: "imperative" data: - INITDATA: "{{ gz_slurped.content }}" + INITDATA: "{{ initdata_encoded.stdout }}" + PCR8_HASH: "{{ pcr8_hash.stdout }}" diff --git a/ansible/initdata-default.toml.tpl b/ansible/initdata-default.toml.tpl index 9cadbc1c..3fd1ecc3 100644 --- a/ansible/initdata-default.toml.tpl +++ b/ansible/initdata-default.toml.tpl @@ -1,4 +1,4 @@ -algorithm = "sha384" +algorithm = "sha256" version = "0.1.0" [data] @@ -9,9 +9,7 @@ url = "https://kbs.{{ hub_domain }}" [token_configs.kbs] url = "https://kbs.{{ hub_domain }}" -cert = """ -{{ trustee_cert }} -""" +cert = """{{ trustee_cert }}""" ''' "cdh.toml" = ''' @@ -21,14 +19,20 @@ credentials = [] [kbc] name = "cc_kbc" url = "https://kbs.{{ hub_domain }}" -kbs_cert = """ -{{ trustee_cert }} -""" +kbs_cert = """{{ trustee_cert }}""" + + +[image] +image_security_policy_uri = 'kbs:///default/security-policy/{{ security_policy_flavour }}' ''' "policy.rego" = ''' package agent_policy +import future.keywords.in +import future.keywords.if +import future.keywords.every + default AddARPNeighborsRequest := true default AddSwapRequest := true default CloseStdinRequest := true @@ -36,7 +40,6 @@ default CopyFileRequest := true default CreateContainerRequest := true default CreateSandboxRequest := true default DestroySandboxRequest := true -default ExecProcessRequest := false default GetMetricsRequest := true default GetOOMEventRequest := true default GuestDetailsRequest := true @@ -52,7 +55,6 @@ default RemoveStaleVirtiofsShareMountsRequest := true default ReseedRandomDevRequest := true default ResumeContainerRequest := true default SetGuestDateTimeRequest := true -default SetPolicyRequest := true default SignalProcessRequest := true default StartContainerRequest := true default StartTracingRequest := true @@ -64,5 +66,20 @@ default UpdateEphemeralMountsRequest := true default UpdateInterfaceRequest := true default UpdateRoutesRequest := true default WaitProcessRequest := true -default WriteStreamRequest := true -''' \ No newline at end of file +default ExecProcessRequest := false +default SetPolicyRequest := true +default WriteStreamRequest := false + +ExecProcessRequest if { + input_command = concat(" ", input.process.Args) + some allowed_command in policy_data.allowed_commands + input_command == allowed_command +} + +policy_data := { + "allowed_commands": [ + "curl http://127.0.0.1:8006/cdh/resource/default/attestation-status/status", + "curl http://127.0.0.1:8006/cdh/resource/default/attestation-status/random" + ] +} +''' diff --git a/overrides/values-trustee.yaml b/overrides/values-trustee.yaml index ee42e416..03dd120a 100644 --- a/overrides/values-trustee.yaml +++ b/overrides/values-trustee.yaml @@ -6,4 +6,9 @@ kbs: - name: "kbsres1" # name is the name of the k8s secret that will be presented to trustee and accessible via the CDH key: "secret/data/hub/kbsres1" # this is the path to the secret in vault. - name: "passphrase" - key: "secret/data/hub/passphrase" \ No newline at end of file + key: "secret/data/hub/passphrase" +# Override the default values for the coco pattern this is because when testing against a branch strange stuff happens +# FIXME: Don't commit this to main +global: + coco: + secured: true # true or false. If true, the cluster will be secured. If false, the cluster will be insecure. \ No newline at end of file diff --git a/scripts/get-pcr.sh b/scripts/get-pcr.sh new file mode 100755 index 00000000..c432aa42 --- /dev/null +++ b/scripts/get-pcr.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +set -e + +# Script to retrieve the sandboxed container operator CSV for the current clusterGroup +# using the pull secret for authentication if needed. + +# 1. Locate pull secret +PULL_SECRET_PATH="${HOME}/pull-secret.json" +if [ ! -f "$PULL_SECRET_PATH" ]; then + if [ -n "${PULL_SECRET}" ]; then + PULL_SECRET_PATH="${PULL_SECRET}" + if [ ! -f "$PULL_SECRET_PATH" ]; then + echo "ERROR: Pull secret file not found at path specified in PULL_SECRET: $PULL_SECRET_PATH" + exit 1 + fi + else + echo "ERROR: Pull secret not found at ~/pull-secret.json" + echo "Please either place your pull secret at ~/pull-secret.json or set the PULL_SECRET environment variable" + exit 1 + fi +fi + +echo "Using pull secret: $PULL_SECRET_PATH" + +# 2. Check for required tools +if ! command -v yq &> /dev/null; then + echo "ERROR: yq is required but not installed" + echo "Please install yq: https://github.com/mikefarah/yq#install" + exit 1 +fi + +if ! command -v skopeo &> /dev/null; then + echo "ERROR: skopeo is required but not installed" + echo "Please install skopeo: https://github.com/containers/skopeo/blob/main/install.md" + exit 1 +fi + +if ! command -v podman &> /dev/null; then + echo "ERROR: podman is required but not installed" + echo "Please install podman: https://podman.io/docs/installation" + exit 1 +fi + +# 3. Check values-global.yaml exists +if [ ! -f "values-global.yaml" ]; then + echo "ERROR: values-global.yaml not found in current directory" + echo "Please run this script from the root directory of the project" + exit 1 +fi + +# 4. Get the active clusterGroupName from values-global.yaml +CLUSTER_GROUP_NAME=$(yq eval '.main.clusterGroupName' values-global.yaml) + +if [ -z "$CLUSTER_GROUP_NAME" ] || [ "$CLUSTER_GROUP_NAME" == "null" ]; then + echo "ERROR: Could not determine clusterGroupName from values-global.yaml" + echo "Expected: main.clusterGroupName to be set" + exit 1 +fi + +echo "Active clusterGroup: $CLUSTER_GROUP_NAME" + +# 5. Locate the values file for the active clusterGroup +VALUES_FILE="values-${CLUSTER_GROUP_NAME}.yaml" + +if [ ! -f "$VALUES_FILE" ]; then + echo "ERROR: Values file for clusterGroup not found: $VALUES_FILE" + exit 1 +fi + +# 6. Get the sandboxed container operator CSV from the clusterGroup values +SANDBOX_CSV=$(yq eval '.clusterGroup.subscriptions.sandbox.csv' "$VALUES_FILE") + +if [ -z "$SANDBOX_CSV" ] || [ "$SANDBOX_CSV" == "null" ]; then + echo "WARNING: No sandboxed container operator CSV found in $VALUES_FILE" + echo "The subscription clusterGroup.subscriptions.sandbox.csv is not defined" + exit 0 +fi + +# Extract version from CSV (e.g., "sandboxed-containers-operator.v1.11.0" -> "1.11.0") +# Remove everything up to and including ".v" +SANDBOX_VERSION="${SANDBOX_CSV##*.v}" + +echo "Sandboxed container operator CSV: $SANDBOX_CSV" +echo "Version: $SANDBOX_VERSION" +# alternatively, use the operator-version tag. +# OSC_VERSION=1.11.1 +VERITY_IMAGE=registry.redhat.io/openshift-sandboxed-containers/osc-dm-verity-image + +TAG=$(skopeo inspect --authfile $PULL_SECRET_PATH docker://${VERITY_IMAGE}:${SANDBOX_VERSION} | jq -r .Digest) + +IMAGE=${VERITY_IMAGE}@${TAG} + +echo "IMAGE: $IMAGE" + +curl -L https://tuf-default.apps.rosa.rekor-prod.2jng.p3.openshiftapps.com/targets/rekor.pub -o rekor.pub +curl -L https://security.access.redhat.com/data/63405576.txt -o cosign-pub-key.pem +# export REGISTRY_AUTH_FILE=${PULL_SECRET_PATH} +# echo "REGISTRY_AUTH_FILE: $REGISTRY_AUTH_FILE" +# export SIGSTORE_REKOR_PUBLIC_KEY=${PWD}/rekor.pub +# echo "SIGSTORE_REKOR_PUBLIC_KEY: $SIGSTORE_REKOR_PUBLIC_KEY" +# cosign verify --key cosign-pub-key.pem --output json --rekor-url=https://rekor-server-default.apps.rosa.rekor-prod.2jng.p3.openshiftapps.com $IMAGE > cosign_verify.log + + +# Ensure output directory exists +mkdir -p ~/.coco-pattern + +# Clean up any existing measurement files +rm -f ~/.coco-pattern/measurements-raw.json ~/.coco-pattern/measurements.json + +# Download the measurements using podman cp (works on macOS with remote podman) +podman pull --authfile $PULL_SECRET_PATH $IMAGE + +cid=$(podman create --entrypoint /bin/true $IMAGE) +echo "CID: ${cid}" +podman cp $cid:/image/measurements.json ~/.coco-pattern/measurements-raw.json +podman rm $cid + +# Trim leading "0x" from all measurement values +jq 'walk(if type == "string" and startswith("0x") then .[2:] else . end)' \ + ~/.coco-pattern/measurements-raw.json > ~/.coco-pattern/measurements.json + +echo "Measurements saved to ~/.coco-pattern/measurements.json (0x prefixes removed)" \ No newline at end of file diff --git a/values-secret.yaml.template b/values-secret.yaml.template index fe410d42..4ed9d158 100644 --- a/values-secret.yaml.template +++ b/values-secret.yaml.template @@ -6,21 +6,91 @@ version: "2.0" # automatically generated inside the vault this should not really matter) secrets: - - name: 'sshKey' + + + # SSH keys for podvm debug access (optional). + # To enable: set global.coco.enableSSHDebug=true in values-global.yaml, + # run COCO_ENABLE_SSH_DEBUG=true ./scripts/gen-secrets.sh, + # then uncomment the block below. + #- name: sshKey + # vaultPrefixes: + # - global + # fields: + # - name: id_rsa.pub + # path: ~/.coco-pattern/id_rsa.pub + # - name: id_rsa + # path: ~/.coco-pattern/id_rsa + + - name: securityPolicyConfig vaultPrefixes: - - global + - hub fields: - - name: id_rsa.pub - path: ~/.coco-pattern/id_rsa.pub - - name: id_rsa - path: ~/.coco-pattern/id_rsa + # Accept all images without verification (INSECURE - dev/testing only) + - name: insecure + value: | + { + "default": [{"type": "insecureAcceptAnything"}], + "transports": {} + } + # Reject all images (useful for testing policy enforcement) + - name: reject + value: | + { + "default": [{"type": "reject"}], + "transports": {} + } + # Only accept signed images (production) + # Edit the transports section to add your signed images. + # Each image needs a corresponding cosign public key in cosign-keys secret. + # The keys much line up with the keys below + - name: signed + value: | + { + "default": [{"type": "reject"}], + "transports": { + "docker": { + "registry.example.com/my-image": [ + { + "type": "sigstoreSigned", + "keyPath": "kbs:///default/cosign-keys/key-0" + } + ] + } + } + } + + # Cosign public keys for image signature verification + # Required when using the "signed" policy above. + # Add your cosign public key files here. + # Generate a cosign key pair: cosign generate-key-pair + #- name: cosign-keys + # vaultPrefixes: + # - hub + # fields: + # - name: key-0 + # path: ~/.coco-pattern/cosign-key-0.pub - - name: 'securityPolicyConfig' + + # PCR measurements for attestation. + # Required: run ./scripts/get-pcr.sh before deploying. + - name: pcrStash vaultPrefixes: - hub fields: - - name: osc - path: ~/.coco-pattern/security-policy-config.json + - name: json + path: ~/.coco-pattern/measurements.json + + + - name: attestationStatus + vaultPrefixes: + - hub + fields: + - name: status + value: 'attested' + - name: random + value: '' + onMissingValue: generate + vaultPolicy: validatedPatternDefaultPolicy - name: kbsPublicKey vaultPrefixes: @@ -29,12 +99,6 @@ secrets: - name: publicKey path: ~/.coco-pattern/kbsPublicKey - - name: kbsPrivateKey - vaultPrefixes: - - global - fields: - - name: privateKey - path: ~/.coco-pattern/kbsPrivateKey - name: kbsres1 vaultPrefixes: @@ -61,3 +125,4 @@ secrets: value: '' onMissingValue: generate vaultPolicy: validatedPatternDefaultPolicy + From 18c0b08bf06e72c74f88c8e26e1623d031b44b28 Mon Sep 17 00:00:00 2001 From: Chris Butler Date: Mon, 9 Mar 2026 10:06:35 +0900 Subject: [PATCH 2/4] feat: update RHDP tooling and wrapper scripts Update RHDP cluster definition tooling, wrapper script improvements, gen-secrets simplification, and letsencrypt chart version bump. Co-authored-by: Beraldo Leal Co-Authored-By: Claude Opus 4.6 (1M context) --- charts/all/letsencrypt/Chart.yaml | 6 ++- rhdp/requirements.txt | 3 +- rhdp/rhdp-cluster-define.py | 34 ++++++++++++-- rhdp/wrapper.sh | 76 ++++++++++++++++++++++++++++--- scripts/gen-secrets.sh | 45 +++++++----------- 5 files changed, 122 insertions(+), 42 deletions(-) diff --git a/charts/all/letsencrypt/Chart.yaml b/charts/all/letsencrypt/Chart.yaml index c4d83704..e3203347 100644 --- a/charts/all/letsencrypt/Chart.yaml +++ b/charts/all/letsencrypt/Chart.yaml @@ -1,6 +1,10 @@ apiVersion: v2 name: letsencrypt -description: A Helm chart to add letsencrypt support to Validated Patterns. +description: >- + DEPRECATED: This chart is unsupported and will be removed in a future release. + Trustee 1.0 uses cert-manager for certificate management, making Let's Encrypt + integration unnecessary. A Helm chart to add letsencrypt support to Validated Patterns. +deprecated: true type: application diff --git a/rhdp/requirements.txt b/rhdp/requirements.txt index e02a4479..3a98310e 100644 --- a/rhdp/requirements.txt +++ b/rhdp/requirements.txt @@ -1,3 +1,4 @@ typer rich -Jinja2 \ No newline at end of file +Jinja2 +typing_extensions \ No newline at end of file diff --git a/rhdp/rhdp-cluster-define.py b/rhdp/rhdp-cluster-define.py index 522c0bb3..b65cb4b3 100644 --- a/rhdp/rhdp-cluster-define.py +++ b/rhdp/rhdp-cluster-define.py @@ -13,8 +13,22 @@ from typing_extensions import Annotated -def get_default_cluster_configs() -> List[Dict]: - """Get default cluster configurations""" +def get_default_cluster_configs(prefix: str = "") -> List[Dict]: + """Get default cluster configurations + + Args: + prefix: Optional prefix to add to cluster name and directory + """ + if prefix: + return [ + { + "name": f"coco-{prefix}", + "directory": f"openshift-install-{prefix}", + "cluster_network_cidr": "10.128.0.0/14", + "machine_network_cidr": "10.0.0.0/16", + "service_network_cidr": "172.30.0.0/16", + } + ] return [ { "name": "coco", @@ -135,6 +149,9 @@ def run( multicluster: Annotated[ bool, typer.Option("--multicluster", help="Deploy hub and spoke clusters") ] = False, + prefix: Annotated[ + str, typer.Option("--prefix", help="Prefix for cluster name and directory") + ] = "", ): """ Region flag requires an azure region key which can be (authoritatively) @@ -142,16 +159,25 @@ def run( Use --multicluster flag to deploy both hub (coco-hub) and spoke (coco-spoke) clusters. + + Use --prefix to add a prefix to cluster name and install directory, enabling + multiple cluster deployments (e.g., --prefix cluster1 creates coco-cluster1 + in openshift-install-cluster1). """ validate_dir() # Choose cluster configurations based on multicluster flag if multicluster: + if prefix: + rprint("WARNING: --prefix is ignored when using --multicluster") cluster_configs = get_multicluster_configs() rprint("Setting up multicluster deployment (hub and spoke)") else: - cluster_configs = get_default_cluster_configs() - rprint("Setting up single cluster deployment") + cluster_configs = get_default_cluster_configs(prefix) + if prefix: + rprint(f"Setting up single cluster deployment with prefix: {prefix}") + else: + rprint("Setting up single cluster deployment") cleanup(pathlib.Path.cwd(), cluster_configs) setup_install( diff --git a/rhdp/wrapper.sh b/rhdp/wrapper.sh index 5fbf6994..18763936 100755 --- a/rhdp/wrapper.sh +++ b/rhdp/wrapper.sh @@ -14,13 +14,56 @@ get_python_cmd() { fi } -if [ "$#" -ne 1 ]; then - echo "Error: Exactly one argument is required." - echo "Usage: $0 {azure-region-code}" +# Parse arguments +AZUREREGION="" +PREFIX="" + +while [[ $# -gt 0 ]]; do + case $1 in + --prefix) + PREFIX="$2" + shift 2 + ;; + --prefix=*) + PREFIX="${1#*=}" + shift + ;; + -*) + echo "Error: Unknown option $1" + echo "Usage: $0 [--prefix ] {azure-region-code}" + echo "Example: $0 eastasia" + echo "Example: $0 --prefix cluster1 eastasia" + exit 1 + ;; + *) + if [ -z "$AZUREREGION" ]; then + AZUREREGION="$1" + else + echo "Error: Too many positional arguments." + echo "Usage: $0 [--prefix ] {azure-region-code}" + exit 1 + fi + shift + ;; + esac +done + +if [ -z "$AZUREREGION" ]; then + echo "Error: Azure region is required." + echo "Usage: $0 [--prefix ] {azure-region-code}" echo "Example: $0 eastasia" + echo "Example: $0 --prefix cluster1 eastasia" exit 1 fi -AZUREREGION=$1 + +# Set install directory based on prefix +if [ -n "$PREFIX" ]; then + INSTALL_DIR="openshift-install-${PREFIX}" + echo "Using prefix: $PREFIX" + echo "Install directory: $INSTALL_DIR" +else + INSTALL_DIR="openshift-install" +fi echo "---------------------" echo "Validating configuration" @@ -40,6 +83,17 @@ if ! command -v yq &> /dev/null; then exit 1 fi +# Check if podman is available and running +if ! command -v podman &> /dev/null; then + echo "ERROR: podman is required but not installed" + exit 1 +fi + +if ! podman info &> /dev/null; then + echo "ERROR: podman is installed but not responding" + exit 1 +fi + # Extract clusterGroupName from values-global.yaml using yq CLUSTER_GROUP_NAME=$(yq eval '.main.clusterGroupName' values-global.yaml) @@ -113,7 +167,11 @@ echo "---------------------" echo "defining cluster" echo "---------------------" PYTHON_CMD=$(get_python_cmd) -$PYTHON_CMD rhdp/rhdp-cluster-define.py ${AZUREREGION} +if [ -n "$PREFIX" ]; then + $PYTHON_CMD rhdp/rhdp-cluster-define.py --prefix "${PREFIX}" ${AZUREREGION} +else + $PYTHON_CMD rhdp/rhdp-cluster-define.py ${AZUREREGION} +fi echo "---------------------" echo "cluster defined" echo "---------------------" @@ -121,19 +179,23 @@ sleep 10 echo "---------------------" echo "openshift-install" echo "---------------------" -openshift-install create cluster --dir=./openshift-install +openshift-install create cluster --dir=./${INSTALL_DIR} echo "openshift-install done" echo "---------------------" echo "setting up secrets" bash ./scripts/gen-secrets.sh +echo "---------------------" +echo "retrieving PCR measurements" +echo "---------------------" +bash ./scripts/get-pcr.sh sleep 60 echo "---------------------" echo "pattern install" echo "---------------------" -export KUBECONFIG="$(pwd)/openshift-install/auth/kubeconfig" +export KUBECONFIG="$(pwd)/${INSTALL_DIR}/auth/kubeconfig" ./pattern.sh make install diff --git a/scripts/gen-secrets.sh b/scripts/gen-secrets.sh index 25c4713a..c487bcac 100755 --- a/scripts/gen-secrets.sh +++ b/scripts/gen-secrets.sh @@ -1,49 +1,36 @@ #!/usr/bin/env bash echo "Creating secrets as required" -echo +echo COCO_SECRETS_DIR="${HOME}/.coco-pattern" -SECURITY_POLICY_FILE="${COCO_SECRETS_DIR}/security-policy-config.json" -SSH_KEY_FILE="${COCO_SECRETS_DIR}/id_rsa" KBS_PRIVATE_KEY="${COCO_SECRETS_DIR}/kbsPrivateKey" KBS_PUBLIC_KEY="${COCO_SECRETS_DIR}/kbsPublicKey" -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" VALUES_FILE="${HOME}/values-secret-coco-pattern.yaml" mkdir -p ${COCO_SECRETS_DIR} -if [ ! -f "${SECURITY_POLICY_FILE}" ]; then -echo "Creating security policy" -cat > ${SECURITY_POLICY_FILE} < ${KBS_PRIVATE_KEY} - openssl pkey -in "${KBS_PRIVATE_KEY}" -pubout -out "${KBS_PUBLIC_KEY}" -fi - -if [ ! -f "${SSH_KEY_FILE}" ]; then - echo "Creating ssh keys" - rm -f "${SSH_KEY_FILE}.pub" - ssh-keygen -f "${SSH_KEY_FILE}" -N "" + echo "Creating kbs keys" + rm -f "${KBS_PUBLIC_KEY}" + openssl genpkey -algorithm ed25519 >${KBS_PRIVATE_KEY} + openssl pkey -in "${KBS_PRIVATE_KEY}" -pubout -out "${KBS_PUBLIC_KEY}" fi - ## Copy a sample values file if this stuff doesn't exist if [ ! -f "${VALUES_FILE}" ]; then - echo "No values file was found copying template.. please review before deploying" - cp "${SCRIPT_DIR}/../values-secret.yaml.template" "${VALUES_FILE}" -fi \ No newline at end of file + echo "No values file was found copying template.. please review before deploying" + cp "${SCRIPT_DIR}/../values-secret.yaml.template" "${VALUES_FILE}" +fi From 8ad7686bf621ac9e6c2ceebfddc5343ec61687e1 Mon Sep 17 00:00:00 2001 From: Chris Butler Date: Mon, 9 Mar 2026 10:06:44 +0900 Subject: [PATCH 3/4] feat: update values files to use released charts Update global, simple, spoke, and trusted-hub values files to align with released chart versions and configuration. Co-authored-by: Beraldo Leal Co-Authored-By: Claude Opus 4.6 (1M context) --- values-global.yaml | 7 +++++++ values-simple.yaml | 34 ++++++++++++---------------------- values-spoke.yaml | 18 ++++++++++-------- values-trusted-hub.yaml | 28 ++++++++++++++++------------ 4 files changed, 45 insertions(+), 42 deletions(-) diff --git a/values-global.yaml b/values-global.yaml index e91c7f0c..d7c202a3 100644 --- a/values-global.yaml +++ b/values-global.yaml @@ -11,6 +11,12 @@ global: # This defines whether or not to use upstream resources for CoCo. # Defines whether or not the hub cluster can be used for confidential containers coco: + securityPolicyFlavour: "insecure" # insecure, signed or reject is expected. + secured: true # true or false. If true, the cluster will be secured. If false, the cluster will be insecure. + # Enable SSH key injection into podvm for debugging. Do not enable in production. + # Also requires: COCO_ENABLE_SSH_DEBUG=true ./scripts/gen-secrets.sh + # and uncommenting the sshKey block in values-secret.yaml.template. + enableSSHDebug: false azure: defaultVMFlavour: "Standard_DC2as_v5" VMFlavours: "Standard_DC2as_v5,Standard_DC4as_v5,Standard_DC8as_v5,Standard_DC16as_v5" @@ -24,6 +30,7 @@ main: clusterGroupChartVersion: 0.9.* # Common secret store configuration used across multiple charts +# Warning do not rely on this. it does not consistently apply. secretStore: name: vault-backend kind: ClusterSecretStore diff --git a/values-simple.yaml b/values-simple.yaml index a6405a2d..fed2b1d3 100644 --- a/values-simple.yaml +++ b/values-simple.yaml @@ -3,6 +3,8 @@ clusterGroup: name: simple isHubCluster: true + # Override health check for Subscriptions to treat UpgradePending as healthy + # Only applies to pinned CSV subscriptions (sandbox and trustee) namespaces: - open-cluster-management - vault @@ -12,7 +14,6 @@ clusterGroup: - hello-openshift - cert-manager-operator - cert-manager - - letsencrypt - kbs-access - encrypted-storage subscriptions: @@ -26,14 +27,14 @@ clusterGroup: source: redhat-operators channel: stable installPlanApproval: Manual - csv: sandboxed-containers-operator.v1.10.1 + csv: sandboxed-containers-operator.v1.11.0 trustee: name: trustee-operator namespace: trustee-operator-system source: redhat-operators channel: stable installPlanApproval: Manual - csv: trustee-operator.v0.4.1 + csv: trustee-operator.v1.0.0 cert-manager: name: openshift-cert-manager-operator namespace: cert-manager-operator @@ -72,39 +73,28 @@ clusterGroup: project: golang-external-secrets chart: golang-external-secrets chartVersion: 0.1.* - trustee: name: trustee namespace: trustee-operator-system #upstream config project: trustee chart: trustee - chartVersion: 0.1.* - # Use the override file to specify the list of secrets accessible to trustee from the ESO backend (today by default, Vault). - extraValueFiles: - - '$patternref/overrides/values-trustee.yaml' + chartVersion: 0.2.* sandbox: name: sandbox namespace: openshift-sandboxed-containers-operator #upstream config project: sandbox chart: sandboxed-containers - chartVersion: 0.0.* + chartVersion: 0.2.* sandbox-policies: name: sandbox-policies namespace: openshift-sandboxed-containers-operator #upstream config chart: sandboxed-policies - chartVersion: 0.0.* - - # Letsencrypt is not required anymore for trustee. - # It's only here if you need it for your needs. - letsencrypt: - name: letsencrypt - namespace: letsencrypt - project: hub - path: charts/all/letsencrypt - # Default to 'safe' for ARO + chartVersion: 0.1.* overrides: - - name: letsencrypt.enabled - value: false + - name: global.coco.azure.tags + value: "key1=value1,key2=value2" + - name: global.coco.azure.rootVolumeSize + value: "20" hello-openshift: name: hello-openshift namespace: hello-openshift @@ -117,7 +107,6 @@ clusterGroup: project: workloads path: charts/coco-supported/kbs-access - imperative: # NOTE: We *must* use lists and not hashes. As hashes lose ordering once parsed by helm # The default schedule is every 10 minutes: imperative.schedule @@ -125,6 +114,7 @@ clusterGroup: # imagePullPolicy is set to always: imperative.imagePullPolicy # For additional overrides that apply to the jobs, please refer to # https://validatedpatterns.io/imperative-actions/#additional-job-customizations + image: ghcr.io/butler54/imperative-container:latest jobs: - name: install-deps playbook: ansible/install-deps.yaml diff --git a/values-spoke.yaml b/values-spoke.yaml index 270e4cfd..c604a5a6 100644 --- a/values-spoke.yaml +++ b/values-spoke.yaml @@ -17,7 +17,7 @@ clusterGroup: source: redhat-operators channel: stable installPlanApproval: Manual - csv: sandboxed-containers-operator.v1.10.1 + csv: sandboxed-containers-operator.v1.11.0 cert-manager: name: openshift-cert-manager-operator namespace: cert-manager-operator @@ -49,7 +49,14 @@ clusterGroup: namespace: openshift-sandboxed-containers-operator #upstream config project: sandbox chart: sandboxed-containers - chartVersion: 0.0.* + chartVersion: 0.2.* + overrides: + - name: global.secretStore.backend + value: vault + - name: secretStore.name + value: vault-backend + - name: secretStore.kind + value: ClusterSecretStore hello-openshift: name: hello-openshift @@ -64,12 +71,7 @@ clusterGroup: path: charts/coco-supported/kbs-access imperative: - # NOTE: We *must* use lists and not hashes. As hashes lose ordering once parsed by helm - # The default schedule is every 10 minutes: imperative.schedule - # Total timeout of all jobs is 1h: imperative.activeDeadlineSeconds - # imagePullPolicy is set to always: imperative.imagePullPolicy - # For additional overrides that apply to the jobs, please refer to - # https://validatedpatterns.io/imperative-actions/#additional-job-customizations + image: ghcr.io/butler54/imperative-container:latest jobs: - name: install-deps playbook: ansible/install-deps.yaml diff --git a/values-trusted-hub.yaml b/values-trusted-hub.yaml index be5a2da6..c3531ba1 100644 --- a/values-trusted-hub.yaml +++ b/values-trusted-hub.yaml @@ -22,7 +22,7 @@ clusterGroup: source: redhat-operators channel: stable installPlanApproval: Manual - csv: trustee-operator.v0.4.1 + csv: trustee-operator.v1.0.0 cert-manager: name: openshift-cert-manager-operator namespace: cert-manager-operator @@ -67,30 +67,34 @@ clusterGroup: namespace: trustee-operator-system #upstream config project: trustee chart: trustee - chartVersion: 0.1.* - # Use the override file to specify the list of secrets accessible to trustee from the ESO backend (today by default, Vault). - extraValueFiles: - - '$patternref/overrides/values-trustee.yaml' + chartVersion: 0.2.* + overrides: + - name: global.coco.secured + value: "true" sandbox-policies: name: sandbox-policies namespace: openshift-sandboxed-containers-operator #upstream config chart: sandboxed-policies - chartVersion: 0.0.* + chartVersion: 0.1.* + overrides: + - name: global.coco.azure.tags + value: "key1=value1,key2=value2" + - name: global.coco.azure.rootVolumeSize + value: "20" imperative: - # NOTE: We *must* use lists and not hashes. As hashes lose ordering once parsed by helm - # The default schedule is every 10 minutes: imperative.schedule - # Total timeout of all jobs is 1h: imperative.activeDeadlineSeconds - # imagePullPolicy is set to always: imperative.imagePullPolicy - # For additional overrides that apply to the jobs, please refer to - # https://validatedpatterns.io/imperative-actions/#additional-job-customizations + image: ghcr.io/butler54/imperative-container:latest jobs: - name: install-deps playbook: ansible/install-deps.yaml verbosity: -vvv timeout: 3600 + - name: configure-azure-dns + playbook: ansible/configure-issuer.yaml + verbosity: -vvv + timeout: 3600 - name: init-data-gzipper playbook: ansible/init-data-gzipper.yaml verbosity: -vvv From 3108ec06e68f08ab88994d6ef6c579d7e00c6981 Mon Sep 17 00:00:00 2001 From: Chris Butler Date: Mon, 9 Mar 2026 10:07:22 +0900 Subject: [PATCH 4/4] docs: update README and add AGENTS.md Update README with current deployment instructions and add AGENTS.md for AI coding assistant guidelines. Co-Authored-By: Claude Opus 4.6 (1M context) --- AGENTS.md | 150 ++++++++++++++++++++++++++++++++++ README.md | 236 +++++++++++++++++++----------------------------------- 2 files changed, 234 insertions(+), 152 deletions(-) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..d88316da --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,150 @@ +# CoCo Pattern — AI Coding Assistant Guidance + +This is a [Validated Pattern](https://validatedpatterns.io/) for deploying confidential containers (CoCo) on OpenShift. +This file provides rules and context for any AI coding assistant working in this repository. + +## Critical Rules + +- **DO NOT** edit anything under `/common/`. It is a read-only Git subtree from the upstream validated patterns framework. +- **DO NOT** commit secrets, credentials, or private keys. `values-secret.yaml.template` is a template only. +- **DO NOT** use Kustomize. This project uses Helm exclusively. +- **DO NOT** create charts with `apiVersion: v1`. Use `apiVersion: v2` (Helm 3+). +- **DO NOT** place cloud-provider-specific logic in chart templates. Use `/overrides/` via `sharedValueFiles` instead. +- **DO NOT** hardcode secrets in templates. Use External Secrets Operator with vault paths (see `charts/hub/trustee/templates/dynamic-eso.yaml` for reference). + +## Feature Development Precedence Order + +Use the **first** approach that fits your requirement: + +1. **Helm charts** — Declarative Kubernetes resources in `/charts/`, deployed by ArgoCD. Preferred for installing operators, configuring CRDs, and creating Kubernetes resources. +2. **ACM policies** — Red Hat Advanced Cluster Management policies for propagating configuration from hub to spoke clusters and enforcing multi-cluster governance. Reference: `charts/hub/sandbox-policies/templates/`. +3. **Imperative framework (Ansible)** — Playbooks in `/ansible/`, executed as Kubernetes Jobs on a 10-minute schedule. **Must be idempotent.** Use for API calls, runtime data lookups, and multi-step orchestration that cannot be expressed declaratively. Register playbooks in `clusterGroup.imperative.jobs` as an ordered list. +4. **Out-of-band scripts** — `/scripts/` or `/rhdp/`. Last resort for one-time setup or local development tooling. These are not managed by GitOps. + +## Project Structure + +```text +├── ansible/ # Ansible playbooks (imperative jobs) +├── charts/ +│ ├── all/ +│ │ └── letsencrypt/ # Shared across cluster groups +│ ├── coco-supported/ +│ │ ├── baremetal/ # Bare-metal TDX configuration +│ │ ├── hello-openshift/ # Sample workloads +│ │ ├── kbs-access/ # KBS access verification workload +│ │ └── sandbox/ # Sandboxed containers runtime +│ └── hub/ +│ ├── lvm-storage/ # LVM storage for bare-metal +│ ├── sandbox-policies/ # ACM policies (hub → spoke) +│ └── trustee/ # Trustee / KBS +├── common/ # READ-ONLY — upstream framework subtree +├── overrides/ # Cloud-provider value overrides +│ ├── values-AWS.yaml +│ ├── values-Azure.yaml +│ └── values-IBMCloud.yaml +├── rhdp/ # Red Hat Demo Platform tooling +├── scripts/ # Utility scripts +├── values-global.yaml # Global configuration +├── values-simple.yaml # Cluster group: simple +├── values-baremetal.yaml # Cluster group: baremetal +├── values-trusted-hub.yaml # Cluster group: trusted-hub +├── values-untrusted-spoke.yaml # Cluster group: untrusted-spoke +└── values-secret.yaml.template # Secrets template (never commit filled-in copy) +``` + +## Companion Chart Repositories + +Several charts in this repository have companion repositories for independent versioning and reuse. Develop and test in this repository first (charts deploy via `path:`), then sync changes to the companion repository. + +| Local Path | Companion Repository | Purpose | +|---|---|---| +| `charts/hub/trustee/` | `trustee-chart` | Trustee / KBS on hub | +| `charts/hub/sandbox-policies/` | `sandboxed-policies-chart` | ACM policies hub → spoke | +| `charts/coco-supported/sandbox/` | `sandboxed-containers-chart` | Sandboxed runtime on spoke | + +Large features may require coordinated changes across multiple companion repos. References are org-agnostic — contributors should fork all relevant repos as needed. + +## Cluster Groups + +Set via `main.clusterGroupName` in `values-global.yaml`. + +| Cluster Group | Values File | Role | Description | +|---|---|---|---| +| `simple` | `values-simple.yaml` | Hub (single cluster) | All components on one cluster | +| `baremetal` | `values-baremetal.yaml` | Hub (single cluster) | TDX + LVM storage on bare metal | +| `trusted-hub` | `values-trusted-hub.yaml` | Multi-cluster hub | Trustee + ACM policies | +| `untrusted-spoke` | `values-untrusted-spoke.yaml` | Multi-cluster spoke | Sandbox runtime + workloads | + +## Values File Hierarchy + +Merge order (last wins): + +1. Chart defaults (`charts///values.yaml`) +2. `values-global.yaml` +3. `values-.yaml` +4. `/overrides/values-{{ clusterPlatform }}.yaml` (via `sharedValueFiles`) +5. `values-secret.yaml` (runtime only, never committed) + +Key conventions: + +- Global settings go under the `global:` key in `values-global.yaml`. +- Subscriptions go under `clusterGroup.subscriptions:` in the cluster group values file. +- Applications go under `clusterGroup.applications:` in the cluster group values file. +- Local charts use `path:` (e.g., `path: charts/hub/trustee`). Shared framework charts use `chart:` + `chartVersion:`. +- Imperative jobs go under `clusterGroup.imperative.jobs:` as an **ordered list** (not a hash — hashes lose ordering in Helm). + +## Helm Chart Conventions + +- Use `apiVersion: v2`. Place charts in `charts///`. +- Use ArgoCD sync-wave annotations to control deployment ordering. +- Use `ExternalSecret` resources to pull secrets from vault. Reference: `charts/hub/trustee/templates/dynamic-eso.yaml`. +- Use `.Values.global.clusterPlatform` for platform-conditional logic only when overrides files are insufficient. +- Reference patterns: + - ESO integration: `charts/hub/trustee/templates/dynamic-eso.yaml` + - Template helpers: `charts/coco-supported/hello-openshift/templates/_helpers.tpl` + +## Ansible Playbook Conventions + +- Place playbooks in `/ansible/`. They **must be idempotent**. +- Use `connection: local`, `hosts: localhost`, `become: false`. +- Use `kubernetes.core.k8s` and `kubernetes.core.k8s_info` modules for cluster interaction. +- Register playbooks in the cluster group values file under `clusterGroup.imperative.jobs` with `name`, `playbook`, `verbosity`, and `timeout` fields. + +## Git Workflow + +- **Fork-first**: ArgoCD reconciles against your fork. Clone and push to your own fork. +- **Conventional commits**: Enforced by commitlint (`@commitlint/config-conventional`). +- **Branch-based deployment**: The branch of your local checkout determines the ArgoCD deployment target. +- **Changes require commit + push** to take effect — ArgoCD watches the remote. + +## Commands Reference + +All commands run via `./pattern.sh make `: + +| Command | Purpose | +|---|---| +| `install` | Install the pattern and load secrets | +| `show` | Render the starting template without installing | +| `preview-all` | Preview all applications across cluster groups | +| `validate-schema` | Validate values files against JSON schema | +| `validate-cluster` | Validate cluster prerequisites | +| `super-linter` | Run super-linter locally | +| `load-secrets` | Load secrets into the configured backend | +| `uninstall` | Uninstall the pattern | + +See the readme for secrets backend configuration, RHDP environment variables, and additional maintenance commands. + +## Validation and CI + +CI runs the following checks on pull requests: + +- **JSON schema validation** — values files validated against `common/clustergroup` schema +- **Super Linter** — multi-language linting +- **Conventional PR title lint** — PR titles must follow conventional commit format + +Run locally before pushing: + +```bash +./pattern.sh make preview-all +./pattern.sh make validate-schema +``` diff --git a/README.md b/README.md index 8afd14a7..32ebe80b 100644 --- a/README.md +++ b/README.md @@ -1,199 +1,131 @@ # coco-pattern -This is a validated pattern for deploying confidential containers on OpenShift. +Validated pattern for deploying confidential containers on OpenShift using the [Validated Patterns](https://validatedpatterns.io/) framework. -There are two topologies for deploying this pattern: +Confidential containers use hardware-backed Trusted Execution Environments (TEEs) to isolate workloads from cluster and hypervisor administrators. This pattern deploys and configures the Red Hat CoCo stack — including the sandboxed containers operator, Trustee (Key Broker Service), and peer-pod infrastructure — on Azure. -1. *Default* using a single cluster. This breaks the RACI expected in a remote attestation architecture, however, makes it easier to test. This uses the `simple` `clusterGroup`. -2. A more secure operating model that has two clusters: - - One in a "trusted" zone where the remote attestation, KMS and Key Broker infrastructure are deployed. This is also the Advanced Cluster Manager Hub cluster. It uses the `trusted-hub` `clusterGroup`. - - A second where a subset of workloads are deployed in confidential containers. It uses the `spoke` `clusterGroup` +## Topologies -The current version of this application the confidential containers assumes deployment to Azure. +The pattern provides two deployment topologies: -On the cluster where confidential workloads are deployed two sample applications are deployed: +1. **Single cluster** (`simple` clusterGroup) — deploys all components (Trustee, Vault, ACM, sandboxed containers, workloads) in one cluster. This breaks the RACI separation expected in a remote attestation architecture but simplifies testing and demonstrations. -1. Sample hello world applications to allow users to experiment with the policies for CoCo and the KBS (trustee). -2. A sample application `kbs-access` which presents secrets obtained from trustee to a web service. This is designed to allow users to test locked down environments. +2. **Multi-cluster** (`trusted-hub` + `spoke` clusterGroups) — separates the trusted zone from the untrusted workload zone: + - **Hub** (`trusted-hub`): Runs Trustee (KBS + attestation service), HashiCorp Vault, ACM, and cert-manager. This cluster is the trust anchor. + - **Spoke** (`spoke`): Runs the sandboxed containers operator and confidential workloads. The spoke is imported into ACM and managed from the hub. -Future work includes: +The topology is controlled by the `main.clusterGroupName` field in `values-global.yaml`. -1. ~~Supporting a multiple cluster deployment~~ Done -2. Supporting multiple infrastructure providers - Work in Progress. -3. Supporting air-gapped deployments - Work in Progress. -4. Supporting a more sophisticated workload such as confidential AI inference with protected GPUs. +Currently supports Azure via peer-pods. Peer-pods provision confidential VMs (`Standard_DCas_v5` family) directly on the Azure hypervisor rather than nesting VMs inside worker nodes. -## Current constraints and assumptions +## Current version (4.*) -- Only currently is known to work with `azure` as the provider of confidential vms via peer-pods. -- Below version 3.1, if not using ARO you must either provide your own CA signed certs, or use let's encrypt. -- Must be on 4.16.14 or later. +Breaking change from v3. This is the first version using GA (Generally Available) releases of the CoCo stack: -## Major versions +- **OpenShift Sandboxed Containers 1.11+** (requires OCP 4.17+) +- **Red Hat Build of Trustee 1.0** (first GA release; all prior versions were Technology Preview) +- External chart repositories for [Trustee](https://github.com/validatedpatterns/trustee-chart), [sandboxed-containers](https://github.com/validatedpatterns/sandboxed-containers-chart), and [sandboxed-policies](https://github.com/validatedpatterns/sandboxed-policies-chart) +- Self-signed certificates via cert-manager (Let's Encrypt no longer required) +- Multi-cluster support via ACM -### `3.*` +### Previous versions -Version `3.*` of the pattern is currently constrained to support the general availability releases of coco. +All previous versions used pre-GA (Technology Preview) releases of Trustee: -- (OpenShift Sandboxed Containers Operator) `1.10.*` and above -- Trustee `0.4.*` +| Version | Trustee | OSC | Min OCP | +|---------|---------|-----|---------| +| **3.*** | 0.4.* (Tech Preview) | 1.10.* | 4.16+ | +| **2.*** | 0.3.* (Tech Preview) | 1.9.* | 4.16+ | +| **1.0.0** | 0.2.0 (Tech Preview) | 1.8.1 | 4.16+ | -This limits support to OpenShift 4.16 and higher. +## Setup -The pattern has been tested on Azure for two installation methods: +### Prerequisites -1. Installing onto an ARO cluster -2. Self managed OpenShift install using the `openshift-install` CLI. +- OpenShift 4.17+ cluster on Azure (self-managed via `openshift-install` or ARO) +- Azure `Standard_DCas_v5` VM quota in your target region (these are confidential computing VMs and are not available in all regions). See the note below for more details. +- Azure DNS hosting the cluster's DNS zone +- Tools on your workstation: `podman`, `yq`, `jq`, `skopeo` +- OpenShift pull secret saved at `~/pull-secret.json` (download from [console.redhat.com](https://console.redhat.com/openshift/downloads)) +- Fork the repository — ArgoCD reconciles cluster state against your fork, so changes must be pushed to your remote -#### Known limitations +### Secrets and PCR setup -[Additional configuration](https://issues.redhat.com/browse/KATA-4107) is required to pull secrets from authenticated registries. +These scripts generate the cryptographic material and attestation measurements needed by Trustee and the peer-pod VMs. Run them once before your first deployment. -### `2.*` +1. `bash scripts/gen-secrets.sh` — generates KBS key pairs, attestation policy seeds, and copies `values-secret.yaml.template` to `~/values-secret-coco-pattern.yaml` +2. `bash scripts/get-pcr.sh` — retrieves PCR measurements from the peer-pod VM image and stores them at `~/.coco-pattern/measurements.json` (requires `podman`, `skopeo`, and `~/pull-secret.json`) +3. Review and customise `~/values-secret-coco-pattern.yaml` — this file is loaded into Vault and provides secrets to the pattern -Version `2.*` of the pattern is currently constrained to support: +> **Note:** `gen-secrets.sh` will not overwrite existing secrets. Delete `~/.coco-pattern/` if you need to regenerate. -- (OpenShift Sandboxed Containers Operator) `1.9.*` -- Trustee `0.3.*` +### Single cluster deployment -This limits support to OpenShift 4.16 and higher. +1. Set `main.clusterGroupName: simple` in `values-global.yaml` +2. Ensure your Azure configuration is populated in `values-global.yaml` (see `global.azure.*` fields) +3. `./pattern.sh make install` +4. Wait for the cluster to reboot all nodes (the sandboxed containers operator triggers a MachineConfig update). Monitor progress in the ArgoCD UI. -The pattern has been tested on Azure for two installation methods: +### Multi-cluster deployment -1. Installing onto an ARO cluster -2. Self managed OpenShift install using the `openshift-install` CLI. +1. Set `main.clusterGroupName: trusted-hub` in `values-global.yaml` +2. Deploy the hub cluster: `./pattern.sh make install` +3. Wait for ACM (`MultiClusterHub`) to reach `Running` state on the hub +4. Provision a second OpenShift 4.17+ cluster on Azure for the spoke +5. Import the spoke into ACM with label `clusterGroup=spoke` + (see [importing a cluster](https://validatedpatterns.io/learn/importing-a-cluster/)) +6. ACM will automatically deploy the `spoke` clusterGroup applications (sandboxed containers, workloads) to the imported cluster -### `1.0.0` +## Sample applications -1.0.0 supports OpenShift Sandboxed containers version `1.8.1` along with Trustee version `0.2.0`. +Two sample applications are deployed on the cluster running confidential workloads (the single cluster in `simple` mode, or the spoke in multi-cluster mode): -The pattern has been tested on Azure for one installation method: +- **hello-openshift**: Three pods demonstrating CoCo security boundaries: + - `standard` — a regular Kubernetes pod (no confidential computing) + - `secure` — a confidential container with a strict policy; `oc exec` is denied even for `kubeadmin` + - `insecure-policy` — a confidential container with a relaxed policy allowing `oc exec` (useful for testing the Confidential Data Hub) -1. Self managed OpenShift install using the `openshift-install` CLI -2. Installing on top of an existing Azure Red Hat OpenShift (ARO) cluster + Each confidential pod runs on its own `Standard_DC2as_v5` Azure VM (visible in the Azure portal). Pods use `runtimeClassName: kata-remote`. -## Changing deployment topoloiges +- **kbs-access**: A web service that retrieves and presents secrets obtained from the Trustee Key Broker Service (KBS) via the Confidential Data Hub (CDH). Useful for verifying end-to-end attestation and secret delivery in locked-down environments. -**Today the demo has two deployment topologies** -The most important change is what `clusterGroup` is deployed to your main or 'hub' cluster. +## Confidential computing virtual machine availability on Microsoft Azure -You can change between behaviour by configuring [`global.main.clusterGroupName`](https://validatedpatterns.io/learn/values-files/) key in the `values-global.yaml` file. +Confidential computing VM availability on Azure varies by region. Not all regions offer the required VM families, and available sizes differ between regions. Before deploying, verify the following: -- `values-simple.yaml`: or the `simple` cluster group is the default for the pattern. It deploys everything in one cluster. --`values-trusted-hub`: or the `trusted-hub` cluster group can be configured as the main cluster group. A second cluster should be deployed with the `spoke` cluster group. Follow [instructions here](https://validatedpatterns.io/learn/importing-a-cluster/) to add the second cluster. +1. **Check regional availability.** Confirm that your target Azure region supports confidential computing VMs. Microsoft's [products available by region](https://azure.microsoft.com/en-us/explore/global-infrastructure/products-by-region/) page lists which services and VM families are offered in each region. -## Setup instructions +2. **Check your subscription quota.** Even in supported regions, your subscription may have zero default quota for confidential VM sizes. Go to **Azure Portal > Subscriptions > Usage + quotas** and filter for the DCas/DCads/ECas/ECads families. Request a quota increase if needed. -### Default single cluster setup with `values-simple.yaml` +3. **Select a VM size.** The pattern defaults to `Standard_DC2as_v5` but supports a configurable list of sizes. The following VM families are relevant for confidential containers on Azure: -The instructions here presume you have a cluster. See further down for provisioning instructions for a cluster. +| VM Family | CPU | Architecture | Notes | +|-----------|-----|--------------|-------| +| `Standard_DC2as_v5` | AMD SEV-SNP | AMD EPYC (Genoa) | Default for this pattern. Smallest CoCo-capable size. | +| `Standard_DC4as_v5` | AMD SEV-SNP | AMD EPYC (Genoa) | More vCPUs/memory for larger workloads. | +| `Standard_DC2ads_v5` | AMD SEV-SNP | AMD EPYC (Genoa) | Same as DC2as_v5 with a local temp disk. | +| `Standard_DC2es_v5` | Intel TDX | Intel Xeon (Sapphire Rapids) | Intel-based confidential VMs. Regional availability is more limited than AMD. | -#### Fork and Clone the GitHub repository +The available sizes can be configured via the `global.coco.azure.VMFlavours` field in `values-global.yaml` and the sandbox-policies chart overrides. The default VM flavour is set in `global.coco.azure.defaultVMFlavour`. -1. Following [standard validated patterns workflow](https://validatedpatterns.io/learn/workflow/) fork the repository and clone to your development environment which has `podman` and `git` -2. If using a particular version (e.g. `1.0.0`) checkout the correct tag. +### RHDP deployment (Red Hat Demo Platform) -> [!TIP] -> Forking is essential as the validated pattern uses ArgoCD to reconcile it's state against your remote (forked) repository. +For Red Hat associates and partners, the pattern includes wrapper scripts that automate cluster provisioning and deployment using RHDP Azure Open Environments. -#### Configuring required secrets / parameters - -The secrets here secure Trustee and the peer-pod vms. Mostly they are for demonstration purposes. -This only has to be done once. - -1. Run `sh scripts/gen-secrets.sh` - -> [!NOTE] -> Once generated this script will not override secrets. Be careful when doing multiple tests. - -#### Configuring let's encrypt (deprecated) - -> [!IMPORTANT] -> Ensure you have password login available to the cluster. Let's encrypt will replace the API certificate in addition to the certificates to user with routes. - -Trustee (guest agents) requires that Trustee uses a Mozilla trusted CA issued certificate, or a specific certificate which is known in advance. Today the pattern uses specific self signed certs. Let's encrypt was an option for getting a trusted certificate onto OpenShift's routes, and therefore Trustee. Ths functionality will be removed at a later date. - -If you need a Let's Encrypt certificate to be issued the `letsencrypt` application configuration needs to be changed as below. - -```yaml - --- - # Default configuration, safe for ARO - letsencrypt: - name: letsencrypt - namespace: letsencrypt - project: hub - path: charts/all/letsencrypt - # Default to 'safe' for ARO - overrides: - - name: letsencrypt.enabled - value: false - --- - # Explicitly correct configuration for enabling let's encrypt - letsencrypt: - name: letsencrypt - namespace: letsencrypt - project: hub - path: charts/all/letsencrypt - overrides: - - name: letsencrypt.enabled - value: true -``` - -> [!WARNING] -> Configuration changes are only effective once committed and pushed to your remote repository. - -#### Installing onto a cluster - -Once you configuration is pushed (if required) `./pattern.sh make install` to provision a cluster. - -> [!TIP] -> The branch and default origin you have checked-out in your local repository is used to determine what ArgoCD and the patterns operator should reconcile against. Typical choices are to use the main for your fork. - -## Cluster setup (if not already setup) - -### Single cluster install on an OCP cluster on azure using Red Hat Demo Platform - -Red Hat a demo platform. This allows easy access for Red Hat associates and partners to ephemeral cloud resources. The pattern is known to work with this setup. - -1. Get the [openshift installer](https://console.redhat.com/openshift/downloads) - 1. **NOTE: openshift installer must be updated regularly if you want to automatically provision the latest versions of OCP** -2. Get access to an [Azure Subscription Based Blank Open Environment](https://catalog.demo.redhat.com/catalog?category=Open_Environments&search=azure&item=babylon-catalog-prod%2Fazure-gpte.open-environment-azure-subscription.prod). -3. Import the required azure environmental variables (see code block below) -4. Ensure certificates are configured (via let's encrypt or do so manually) -5. Run the wrapper install script - 1. `bash ./rhdp/wrapper.sh azure-region-code` - 2. Where azure region code is `eastasia`, `useast2` etc. -6. You *should* be done - 1. You *may* need to recreate the hello world peer-pods depending on timeouts. +Required environment variables (provided by your RHDP environment): ```shell - export GUID= - export CLIENT_ID= - export PASSWORD= - export TENANT= - export SUBSCRIPTION= - export RESOURCEGROUP= +export GUID= +export CLIENT_ID= +export PASSWORD= +export TENANT= +export SUBSCRIPTION= +export RESOURCEGROUP= ``` -### Single cluster install on plain old azure *not* using Red Hat Demo Platform - -> [!TIP] -> Don't use the default node sizes.. increase the node sizes such as below - -1. Login to console.redhat.com -2. Get the openshift installer -3. Login to azure locally. -4. `openshift-install create install-config` - 1. Select azure - 2. For Red Hatter's and partners using RHDP make sure you select the same region for your account that you selected in RHDP -5. Change worker machine type e.g. change `type: Standard_D4s_v5` to `type: Standard_D8s_v5` or similar based on your needs. -6. `mkdir ./ocp-install && mv openshift-install.yaml ./ocp-install` -7. `openshift-install create cluster --dir=./ocp-install` -8. Once installed: - 1. Login to `oc` - 2. Configure Let's Encrypt (if required) - 3. `./pattern.sh make install` +Deployment commands: -### Multi cluster setup +- Single cluster: `bash rhdp/wrapper.sh ` (e.g. `bash rhdp/wrapper.sh eastasia`) +- Multi-cluster: `bash rhdp/wrapper-multicluster.sh ` -TBD +The wrapper scripts handle cluster provisioning via `openshift-install`, secret generation, PCR retrieval, and pattern installation.