Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
409f754
Add History CRD and related types for scheduling history management
SoWieMarkus Mar 12, 2026
97892a8
Add note that we keep the name of the flag to avoid breaking changes
SoWieMarkus Mar 12, 2026
5d24cc4
Remove explanation controller from configs
SoWieMarkus Mar 12, 2026
57b7cd1
Remove explanation controller
SoWieMarkus Mar 12, 2026
1483969
Unify scheduling intent types
SoWieMarkus Mar 12, 2026
326b2cd
Add histories resource to ClusterRole and update event permissions
SoWieMarkus Mar 12, 2026
e6f8124
Refactor history types to include CurrentDecision structure and enhan…
SoWieMarkus Mar 12, 2026
f09e933
Implement HistoryManager for decision tracking in scheduling controllers
SoWieMarkus Mar 12, 2026
d6a7ba6
Merge branch 'main' into refactor-decision-crd-v2
SoWieMarkus Mar 12, 2026
ca94a11
Refactor clean up tasks to clean up history crd
SoWieMarkus Mar 12, 2026
f1aa74d
Refactor explanation formatting in TestGenerateExplanation for clarity
SoWieMarkus Mar 12, 2026
ea4f99e
Lint fix
SoWieMarkus Mar 12, 2026
36b9195
Add AvailabilityZone field to HistorySpec and update related logic
SoWieMarkus Mar 13, 2026
3dd85ca
Add host list cap to explanation generation and enhance tests for lar…
SoWieMarkus Mar 13, 2026
328a2ef
Refactor UpsertFromGoroutine test to use WaitGroup for synchronizatio…
SoWieMarkus Mar 13, 2026
45977be
Refactor comments in DecisionsCleanup functions to clarify history en…
SoWieMarkus Mar 13, 2026
c2abd19
Add nil check for decision parameter in Upsert method
SoWieMarkus Mar 13, 2026
e82b766
Add MaxItems validation to OrderedHosts in SchedulingHistoryEntry
SoWieMarkus Mar 13, 2026
0bcae9d
Add orderedHosts to required fields in History spec
SoWieMarkus Mar 13, 2026
a790c7b
Refactor decision upsert logic to remove goroutine and ensure proper …
SoWieMarkus Mar 13, 2026
47ea72b
lint fix
SoWieMarkus Mar 13, 2026
5f9b2f2
Update ResourceID format to include pod namespace in decision spec
SoWieMarkus Mar 13, 2026
572adbf
Make OrderedHosts field optional in SchedulingHistoryEntry
SoWieMarkus Mar 13, 2026
8fa4dbb
Fix ResourceID format in decision spec and history deletion to use do…
SoWieMarkus Mar 13, 2026
64c5f5e
Refactor history upsert logic to use context and avoid goroutine for …
SoWieMarkus Mar 13, 2026
24f31f1
Handle nil OrderedHosts in Upsert method to prevent potential nil poi…
SoWieMarkus Mar 13, 2026
45d827b
Remove magic number
SoWieMarkus Mar 13, 2026
d9404b8
Refactor Upsert method to handle OrderedHosts more efficiently and us…
SoWieMarkus Mar 13, 2026
1e4a0e8
Refactor Upsert method to simplify OrderedHosts length check by remov…
SoWieMarkus Mar 13, 2026
d69b026
Refactor Upsert method to improve history management and handle concu…
SoWieMarkus Mar 13, 2026
a3736b6
Refactor Upsert method to add first attempt tracking for retry logic …
SoWieMarkus Mar 13, 2026
b797d04
Refactor ProcessNewMachine method to use context in Upsert call for h…
SoWieMarkus Mar 13, 2026
a3044e6
Refactor history validation to enforce max items limit for OrderedHos…
SoWieMarkus Mar 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions api/external/nova/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"log/slog"
"strings"

"github.com/cobaltcore-dev/cortex/api/v1alpha1"
"github.com/cobaltcore-dev/cortex/internal/scheduling/lib"
)

Expand Down Expand Up @@ -129,23 +130,21 @@ func (req ExternalSchedulerRequest) GetHypervisorType() (HypervisorType, error)
return "", errors.New("hypervisor type not specified in flavor extra specs")
}

type RequestIntent string

const (
// LiveMigrationIntent indicates that the request is intended for live migration.
LiveMigrationIntent RequestIntent = "live_migration"
LiveMigrationIntent v1alpha1.SchedulingIntent = "live_migration"
// RebuildIntent indicates that the request is intended for rebuilding a VM.
RebuildIntent RequestIntent = "rebuild"
RebuildIntent v1alpha1.SchedulingIntent = "rebuild"
// ResizeIntent indicates that the request is intended for resizing a VM.
ResizeIntent RequestIntent = "resize"
ResizeIntent v1alpha1.SchedulingIntent = "resize"
// EvacuateIntent indicates that the request is intended for evacuating a VM.
EvacuateIntent RequestIntent = "evacuate"
EvacuateIntent v1alpha1.SchedulingIntent = "evacuate"
// CreateIntent indicates that the request is intended for creating a new VM.
CreateIntent RequestIntent = "create"
CreateIntent v1alpha1.SchedulingIntent = "create"
)

// GetIntent analyzes the request spec and determines the intent of the scheduling request.
func (req ExternalSchedulerRequest) GetIntent() (RequestIntent, error) {
func (req ExternalSchedulerRequest) GetIntent() (v1alpha1.SchedulingIntent, error) {
str, err := req.Spec.Data.GetSchedulerHintStr("_nova_check_type")
if err != nil {
return "", err
Expand Down
4 changes: 3 additions & 1 deletion api/external/nova/messages_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@ package api
import (
"encoding/json"
"testing"

"github.com/cobaltcore-dev/cortex/api/v1alpha1"
)

func TestGetIntent(t *testing.T) {
tests := []struct {
name string
schedulerHints map[string]any
expectedIntent RequestIntent
expectedIntent v1alpha1.SchedulingIntent
expectError bool
}{
{
Expand Down
123 changes: 123 additions & 0 deletions api/v1alpha1/history_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0

package v1alpha1

import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// SchedulingIntent defines the intent of a scheduling decision.
type SchedulingIntent string

// Other intents can be defined by the operators.
const (
// Used as default intent if the operator does not specify one.
SchedulingIntentUnknown SchedulingIntent = "Unknown"
)

type SchedulingHistoryEntry struct {
// The timestamp of when the decision was made.
Timestamp metav1.Time `json:"timestamp"`
// The pipeline that was used for the decision.
PipelineRef corev1.ObjectReference `json:"pipelineRef"`
// The intent of the decision (e.g., initial scheduling, rescheduling, etc.).
Intent SchedulingIntent `json:"intent"`
// The top hosts ordered by score for the decision (limited to 3).
// This is not a complete list of all candidates — only the highest-ranked
// hosts are retained to keep the history compact.
// +kubebuilder:validation:Optional
// +kubebuilder:validation:MaxItems=3
OrderedHosts []string `json:"orderedHosts,omitempty"`
// Whether the scheduling decision was successful.
// +kubebuilder:validation:Optional
Successful bool `json:"successful"`
}

type HistorySpec struct {
// The scheduling domain this object with the history belongs to.
SchedulingDomain SchedulingDomain `json:"schedulingDomain"`
// The resource ID this history belongs to (e.g., the UUID of a nova instance).
ResourceID string `json:"resourceID"`
// The availability zone of the resource, if known. Only set for scheduling
// domains that provide AZ information (e.g., Nova).
// +kubebuilder:validation:Optional
AvailabilityZone *string `json:"availabilityZone,omitempty"`
}

// CurrentDecision holds the full context of the most recent scheduling
// decision. When a new decision arrives the previous CurrentDecision is
// compacted into a SchedulingHistoryEntry and appended to History.
type CurrentDecision struct {
// The timestamp of when the decision was made.
Timestamp metav1.Time `json:"timestamp"`
// The pipeline that was used for the decision.
PipelineRef corev1.ObjectReference `json:"pipelineRef"`
// The intent of the decision (e.g., initial scheduling, rescheduling, etc.).
Intent SchedulingIntent `json:"intent"`
// Whether the scheduling decision was successful.
Successful bool `json:"successful"`
// The target host selected for the resource. nil when no host was found.
// +kubebuilder:validation:Optional
TargetHost *string `json:"targetHost,omitempty"`
// A human-readable explanation of the scheduling decision.
// +kubebuilder:validation:Optional
Explanation string `json:"explanation,omitempty"`
// The top hosts ordered by score (limited to 3).
// +kubebuilder:validation:Optional
// +kubebuilder:validation:MaxItems=3
OrderedHosts []string `json:"orderedHosts,omitempty"`
}

type HistoryStatus struct {
// Current represents the latest scheduling decision with full context.
// +kubebuilder:validation:Optional
Current CurrentDecision `json:"current,omitempty"`
// History of past scheduling decisions (limited to last 10).
// +kubebuilder:validation:Optional
History []SchedulingHistoryEntry `json:"history,omitempty"`

// Conditions represent the latest available observations of the history's state.
// +kubebuilder:validation:Optional
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
}

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:resource:scope=Cluster
// +kubebuilder:printcolumn:name="Domain",type="string",JSONPath=".spec.schedulingDomain"
// +kubebuilder:printcolumn:name="Resource ID",type="string",JSONPath=".spec.resourceID"
// +kubebuilder:printcolumn:name="AZ",type="string",JSONPath=".spec.availabilityZone"
// +kubebuilder:printcolumn:name="Target Host",type="string",JSONPath=".status.current.targetHost"
// +kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].reason"
// +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp"

// History is the Schema for the history API
type History struct {
metav1.TypeMeta `json:",inline"`

// Standard object metadata.
// +optional
metav1.ObjectMeta `json:"metadata,omitempty"`

// Spec defines the desired state of History.
// +required
Spec HistorySpec `json:"spec"`
// Status defines the observed state of History.
// +optional
Status HistoryStatus `json:"status,omitempty"`
}

// +kubebuilder:object:root=true

// HistoryList contains a list of History
type HistoryList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []History `json:"items"`
}

func init() {
SchemeBuilder.Register(&History{}, &HistoryList{})
}
3 changes: 3 additions & 0 deletions api/v1alpha1/pipeline_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ type PipelineSpec struct {

// If this pipeline should create decision objects.
// When this is false, the pipeline will still process requests.
// NOTE: This flag is intentionally kept as "createDecisions" to avoid
// breaking changes. It will be renamed when the deprecated Decision CRD
// is fully replaced in a future refactoring.
// +kubebuilder:default=false
CreateDecisions bool `json:"createDecisions,omitempty"`

Expand Down
158 changes: 158 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 0 additions & 14 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ import (
"github.com/cobaltcore-dev/cortex/internal/knowledge/extractor"
"github.com/cobaltcore-dev/cortex/internal/knowledge/kpis"
"github.com/cobaltcore-dev/cortex/internal/scheduling/cinder"
"github.com/cobaltcore-dev/cortex/internal/scheduling/explanation"
schedulinglib "github.com/cobaltcore-dev/cortex/internal/scheduling/lib"
"github.com/cobaltcore-dev/cortex/internal/scheduling/machines"
"github.com/cobaltcore-dev/cortex/internal/scheduling/manila"
Expand Down Expand Up @@ -443,19 +442,6 @@ func main() {
os.Exit(1)
}
}
if slices.Contains(mainConfig.EnabledControllers, "explanation-controller") {
// Setup a controller which will reconcile the history and explanation for
// decision resources.
explanationControllerConfig := conf.GetConfigOrDie[explanation.ControllerConfig]()
explanationController := &explanation.Controller{
Client: multiclusterClient,
Config: explanationControllerConfig,
}
if err := explanationController.SetupWithManager(mgr, multiclusterClient); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "ExplanationController")
os.Exit(1)
}
}
if slices.Contains(mainConfig.EnabledControllers, "reservations-controller") {
monitor := reservationscontroller.NewControllerMonitor(multiclusterClient)
metrics.Registry.MustRegister(&monitor)
Expand Down
Loading