From 706faa8d088bb0467770d364b374f060398e9b25 Mon Sep 17 00:00:00 2001 From: Serge Logvinov Date: Wed, 12 Nov 2025 04:31:43 +0700 Subject: [PATCH] feat: enhance ha-group handling Add the group.topology.proxmox.sinextra.dev/ label to improve support for node selector and affinity rules. Signed-off-by: Serge Logvinov --- README.md | 3 +- pkg/proxmox/instances.go | 84 ++++++++++++++++++++++------------- pkg/proxmox/instances_test.go | 10 +++-- pkg/proxmox/labels.go | 4 +- pkg/proxmoxpool/pool.go | 21 ++++++--- test/cluster/cluster.go | 11 +++++ 6 files changed, 90 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 018e57f..737e9dc 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,8 @@ metadata: # Proxmox specific labels topology.proxmox.sinextra.dev/region: cluster-1 topology.proxmox.sinextra.dev/zone: pve-node-1 - topology.proxmox.sinextra.dev/ha-group: default + # HA group labels - the same idea as node-role + group.topology.proxmox.sinextra.dev/${HAGroup}: "" name: worker-1 spec: diff --git a/pkg/proxmox/instances.go b/pkg/proxmox/instances.go index bd0155c..5978ac0 100644 --- a/pkg/proxmox/instances.go +++ b/pkg/proxmox/instances.go @@ -150,9 +150,16 @@ func (i *instances) InstanceShutdown(ctx context.Context, node *v1.Node) (bool, vmID, region, err := provider.ParseProviderID(node.Spec.ProviderID) if err != nil { - klog.ErrorS(err, "instances.InstanceShutdown() failed to parse providerID", "providerID", node.Spec.ProviderID) + if i.provider == providerconfig.ProviderDefault { + klog.ErrorS(err, "instances.InstanceShutdown() failed to parse providerID", "providerID", node.Spec.ProviderID) + } - return false, nil + vmID, region, err = i.parseProviderIDFromNode(node) + if err != nil { + klog.ErrorS(err, "instances.InstanceShutdown() failed to parse providerID from node", "node", klog.KObj(node)) + + return false, nil + } } px, err := i.c.pxpool.GetProxmoxCluster(region) @@ -239,16 +246,29 @@ func (i *instances) InstanceMetadata(ctx context.Context, node *v1.Node) (*cloud AdditionalLabels: labels, } - if i.zoneAsHAGroup { - haGroup, err := i.c.pxpool.GetNodeGroup(ctx, info.Region, info.Node) - if err != nil { + haGroups, err := i.c.pxpool.GetNodeHAGroups(ctx, info.Region, info.Node) + if err != nil { + if !errors.Is(err, proxmoxpool.ErrHAGroupNotFound) { klog.ErrorS(err, "instances.InstanceMetadata() failed to get HA group for the node", "node", klog.KRef("", node.Name), "region", info.Region) return nil, err } + } - metadata.Zone = haGroup - labels[LabelTopologyHAGroup] = haGroup + for _, g := range haGroups { + labels[LabelTopologyHAGroupPrefix+g] = "" + } + + if i.zoneAsHAGroup { + if len(haGroups) == 0 { + err := fmt.Errorf("cannot set zone as HA-Group") + klog.ErrorS(err, "instances.InstanceMetadata() no HA groups found for the node", "node", klog.KRef("", node.Name)) + + return nil, err + } + + metadata.Zone = haGroups[0] + labels[LabelTopologyZone] = haGroups[0] } if !hasUninitializedTaint(node) { @@ -284,31 +304,17 @@ func (i *instances) getInstanceInfo(ctx context.Context, node *v1.Node) (*instan vmID, region, err = provider.ParseProviderID(providerID) if err != nil { if i.provider == providerconfig.ProviderDefault { - klog.V(4).InfoS("instances.getInstanceInfo() failed to parse providerID", "node", klog.KObj(node), "providerID", providerID) + klog.ErrorS(err, "instances.getInstanceInfo() failed to parse providerID", "node", klog.KObj(node), "providerID", providerID) } - // ProviderID parsing failed, probably cluster is running with kubernetes distribution - if node.Annotations[AnnotationProxmoxInstanceID] != "" { - region = node.Labels[LabelTopologyRegion] - if region == "" { - region = node.Labels[v1.LabelTopologyRegion] - } - - vmID, err = strconv.Atoi(node.Annotations[AnnotationProxmoxInstanceID]) - if err != nil { - return nil, fmt.Errorf("instances.getInstanceInfo() parse annotation error: %v", err) - } - - if _, err := i.c.pxpool.GetProxmoxCluster(region); err == nil { - providerID = provider.GetProviderIDFromID(region, vmID) - - klog.V(4).InfoS("instances.getInstanceInfo() set providerID", "node", klog.KObj(node), "providerID", providerID) - } + vmID, region, err = i.parseProviderIDFromNode(node) + if err != nil { + klog.ErrorS(err, "instances.getInstanceInfo() failed to parse providerID from node", "node", klog.KObj(node)) } } if vmID == 0 || region == "" { - klog.V(4).InfoS("instances.getInstanceInfo() trying find node", "node", klog.KObj(node), "providerID", providerID) + klog.V(4).InfoS("instances.getInstanceInfo() trying to find node in cluster", "node", klog.KObj(node), "providerID", providerID) mc := metrics.NewMetricContext("findVmByNode") @@ -325,10 +331,6 @@ func (i *instances) getInstanceInfo(ctx context.Context, node *v1.Node) (*instan return nil, err } } - - providerID = provider.GetProviderIDFromID(region, vmID) - - klog.V(4).InfoS("instances.getInstanceInfo() set providerID", "node", klog.KObj(node), "providerID", providerID) } px, err := i.c.pxpool.GetProxmoxCluster(region) @@ -379,3 +381,25 @@ func (i *instances) getInstanceInfo(ctx context.Context, node *v1.Node) (*instan return info, nil } + +func (i *instances) parseProviderIDFromNode(node *v1.Node) (vmID int, region string, err error) { + if node.Annotations[AnnotationProxmoxInstanceID] != "" { + region = node.Labels[LabelTopologyRegion] + if region == "" { + region = node.Labels[v1.LabelTopologyRegion] + } + + vmID, err = strconv.Atoi(node.Annotations[AnnotationProxmoxInstanceID]) + if err != nil { + return 0, "", fmt.Errorf("instances.getProviderIDFromNode() parse annotation error: %v", err) + } + + if _, err := i.c.pxpool.GetProxmoxCluster(region); err != nil { + return 0, "", fmt.Errorf("instances.getProviderIDFromNode() get cluster error: %v", err) + } + + return vmID, region, nil + } + + return 0, "", fmt.Errorf("instances.getProviderIDFromNode() no annotation found") +} diff --git a/pkg/proxmox/instances_test.go b/pkg/proxmox/instances_test.go index eadff4c..dd2972e 100644 --- a/pkg/proxmox/instances_test.go +++ b/pkg/proxmox/instances_test.go @@ -665,8 +665,9 @@ func (ts *configuredTestSuite) TestInstanceMetadata() { Region: "cluster-1", Zone: "pve-1", AdditionalLabels: map[string]string{ - "topology.proxmox.sinextra.dev/region": "cluster-1", - "topology.proxmox.sinextra.dev/zone": "pve-1", + "group.topology.proxmox.sinextra.dev/rnd": "", + "topology.proxmox.sinextra.dev/region": "cluster-1", + "topology.proxmox.sinextra.dev/zone": "pve-1", }, }, }, @@ -717,8 +718,9 @@ func (ts *configuredTestSuite) TestInstanceMetadata() { Region: "cluster-1", Zone: "pve-1", AdditionalLabels: map[string]string{ - "topology.proxmox.sinextra.dev/region": "cluster-1", - "topology.proxmox.sinextra.dev/zone": "pve-1", + "group.topology.proxmox.sinextra.dev/rnd": "", + "topology.proxmox.sinextra.dev/region": "cluster-1", + "topology.proxmox.sinextra.dev/zone": "pve-1", }, }, }, diff --git a/pkg/proxmox/labels.go b/pkg/proxmox/labels.go index 7794a3a..aa11b8a 100644 --- a/pkg/proxmox/labels.go +++ b/pkg/proxmox/labels.go @@ -23,6 +23,6 @@ const ( // LabelTopologyZone is the label used to store the Proxmox zone name. LabelTopologyZone = "topology." + Group + "/zone" - // LabelTopologyHAGroup is the label used to store the Proxmox HA group name. - LabelTopologyHAGroup = "topology." + Group + "/ha-group" + // LabelTopologyHAGroupPrefix is the prefix for labels used to store Proxmox HA group information. + LabelTopologyHAGroupPrefix = "group.topology." + Group + "/" ) diff --git a/pkg/proxmoxpool/pool.go b/pkg/proxmoxpool/pool.go index 315e544..d74e17a 100644 --- a/pkg/proxmoxpool/pool.go +++ b/pkg/proxmoxpool/pool.go @@ -25,6 +25,7 @@ import ( "fmt" "net/http" "os" + "slices" "strings" proxmox "github.com/luthermonson/go-proxmox" @@ -192,16 +193,18 @@ func (c *ProxmoxPool) DeleteVMByIDInRegion(ctx context.Context, region string, v return px.DeleteVMByID(ctx, vm.Node, int(vm.VMID)) } -// GetNodeGroup returns a Proxmox node ha-group in a given region. -func (c *ProxmoxPool) GetNodeGroup(ctx context.Context, region string, node string) (string, error) { +// GetNodeHAGroups returns a Proxmox node ha-group in a given region for the node. +func (c *ProxmoxPool) GetNodeHAGroups(ctx context.Context, region string, node string) ([]string, error) { + groups := []string{} + px, err := c.GetProxmoxCluster(region) if err != nil { - return "", err + return nil, err } haGroups, err := px.GetHAGroupList(ctx) if err != nil { - return "", fmt.Errorf("error get ha-groups %v", err) + return nil, fmt.Errorf("error get ha-groups %v", err) } for _, g := range haGroups { @@ -211,12 +214,18 @@ func (c *ProxmoxPool) GetNodeGroup(ctx context.Context, region string, node stri for n := range strings.SplitSeq(g.Nodes, ",") { if node == strings.Split(n, ":")[0] { - return g.Group, nil + groups = append(groups, g.Group) } } } - return "", ErrHAGroupNotFound + if len(groups) > 0 { + slices.Sort(groups) + + return groups, nil + } + + return nil, ErrHAGroupNotFound } // FindVMByNode find a VM by kubernetes node resource in all Proxmox clusters. diff --git a/test/cluster/cluster.go b/test/cluster/cluster.go index 8044af8..76824b9 100644 --- a/test/cluster/cluster.go +++ b/test/cluster/cluster.go @@ -22,6 +22,8 @@ import ( "github.com/jarcoal/httpmock" "github.com/luthermonson/go-proxmox" + + goproxmox "github.com/sergelogvinov/go-proxmox" ) // SetupMockResponders sets up the HTTP mock responders for Proxmox API calls. @@ -38,6 +40,15 @@ func SetupMockResponders() { "data": proxmox.NodeStatuses{{Name: "pve-1"}, {Name: "pve-2"}, {Name: "pve-3"}, {Name: "pve-4"}}, }) }) + httpmock.RegisterResponder(http.MethodGet, `=~/cluster/ha/groups`, + func(_ *http.Request) (*http.Response, error) { + return httpmock.NewJsonResponse(200, map[string]any{ + "data": []goproxmox.HAGroup{ + {Group: "rnd", Type: "group", Nodes: "pve-1,pve-2"}, + {Group: "dev", Type: "group", Nodes: "pve-4"}, + }, + }) + }) httpmock.RegisterResponder(http.MethodGet, "https://127.0.0.2:8006/api2/json/cluster/resources", func(_ *http.Request) (*http.Response, error) {