feat: enhance ha-group handling

Add the group.topology.proxmox.sinextra.dev/ label to improve support for node selector and affinity rules.

Signed-off-by: Serge Logvinov <serge.logvinov@sinextra.dev>
This commit is contained in:
Serge Logvinov
2025-11-12 04:31:43 +07:00
committed by Serge
parent 0a31716c17
commit 706faa8d08
6 changed files with 90 additions and 43 deletions

View File

@@ -54,7 +54,8 @@ metadata:
# Proxmox specific labels
topology.proxmox.sinextra.dev/region: cluster-1
topology.proxmox.sinextra.dev/zone: pve-node-1
topology.proxmox.sinextra.dev/ha-group: default
# HA group labels - the same idea as node-role
group.topology.proxmox.sinextra.dev/${HAGroup}: ""
name: worker-1
spec:

View File

@@ -150,9 +150,16 @@ func (i *instances) InstanceShutdown(ctx context.Context, node *v1.Node) (bool,
vmID, region, err := provider.ParseProviderID(node.Spec.ProviderID)
if err != nil {
klog.ErrorS(err, "instances.InstanceShutdown() failed to parse providerID", "providerID", node.Spec.ProviderID)
if i.provider == providerconfig.ProviderDefault {
klog.ErrorS(err, "instances.InstanceShutdown() failed to parse providerID", "providerID", node.Spec.ProviderID)
}
return false, nil
vmID, region, err = i.parseProviderIDFromNode(node)
if err != nil {
klog.ErrorS(err, "instances.InstanceShutdown() failed to parse providerID from node", "node", klog.KObj(node))
return false, nil
}
}
px, err := i.c.pxpool.GetProxmoxCluster(region)
@@ -239,16 +246,29 @@ func (i *instances) InstanceMetadata(ctx context.Context, node *v1.Node) (*cloud
AdditionalLabels: labels,
}
if i.zoneAsHAGroup {
haGroup, err := i.c.pxpool.GetNodeGroup(ctx, info.Region, info.Node)
if err != nil {
haGroups, err := i.c.pxpool.GetNodeHAGroups(ctx, info.Region, info.Node)
if err != nil {
if !errors.Is(err, proxmoxpool.ErrHAGroupNotFound) {
klog.ErrorS(err, "instances.InstanceMetadata() failed to get HA group for the node", "node", klog.KRef("", node.Name), "region", info.Region)
return nil, err
}
}
metadata.Zone = haGroup
labels[LabelTopologyHAGroup] = haGroup
for _, g := range haGroups {
labels[LabelTopologyHAGroupPrefix+g] = ""
}
if i.zoneAsHAGroup {
if len(haGroups) == 0 {
err := fmt.Errorf("cannot set zone as HA-Group")
klog.ErrorS(err, "instances.InstanceMetadata() no HA groups found for the node", "node", klog.KRef("", node.Name))
return nil, err
}
metadata.Zone = haGroups[0]
labels[LabelTopologyZone] = haGroups[0]
}
if !hasUninitializedTaint(node) {
@@ -284,31 +304,17 @@ func (i *instances) getInstanceInfo(ctx context.Context, node *v1.Node) (*instan
vmID, region, err = provider.ParseProviderID(providerID)
if err != nil {
if i.provider == providerconfig.ProviderDefault {
klog.V(4).InfoS("instances.getInstanceInfo() failed to parse providerID", "node", klog.KObj(node), "providerID", providerID)
klog.ErrorS(err, "instances.getInstanceInfo() failed to parse providerID", "node", klog.KObj(node), "providerID", providerID)
}
// ProviderID parsing failed, probably cluster is running with kubernetes distribution
if node.Annotations[AnnotationProxmoxInstanceID] != "" {
region = node.Labels[LabelTopologyRegion]
if region == "" {
region = node.Labels[v1.LabelTopologyRegion]
}
vmID, err = strconv.Atoi(node.Annotations[AnnotationProxmoxInstanceID])
if err != nil {
return nil, fmt.Errorf("instances.getInstanceInfo() parse annotation error: %v", err)
}
if _, err := i.c.pxpool.GetProxmoxCluster(region); err == nil {
providerID = provider.GetProviderIDFromID(region, vmID)
klog.V(4).InfoS("instances.getInstanceInfo() set providerID", "node", klog.KObj(node), "providerID", providerID)
}
vmID, region, err = i.parseProviderIDFromNode(node)
if err != nil {
klog.ErrorS(err, "instances.getInstanceInfo() failed to parse providerID from node", "node", klog.KObj(node))
}
}
if vmID == 0 || region == "" {
klog.V(4).InfoS("instances.getInstanceInfo() trying find node", "node", klog.KObj(node), "providerID", providerID)
klog.V(4).InfoS("instances.getInstanceInfo() trying to find node in cluster", "node", klog.KObj(node), "providerID", providerID)
mc := metrics.NewMetricContext("findVmByNode")
@@ -325,10 +331,6 @@ func (i *instances) getInstanceInfo(ctx context.Context, node *v1.Node) (*instan
return nil, err
}
}
providerID = provider.GetProviderIDFromID(region, vmID)
klog.V(4).InfoS("instances.getInstanceInfo() set providerID", "node", klog.KObj(node), "providerID", providerID)
}
px, err := i.c.pxpool.GetProxmoxCluster(region)
@@ -379,3 +381,25 @@ func (i *instances) getInstanceInfo(ctx context.Context, node *v1.Node) (*instan
return info, nil
}
func (i *instances) parseProviderIDFromNode(node *v1.Node) (vmID int, region string, err error) {
if node.Annotations[AnnotationProxmoxInstanceID] != "" {
region = node.Labels[LabelTopologyRegion]
if region == "" {
region = node.Labels[v1.LabelTopologyRegion]
}
vmID, err = strconv.Atoi(node.Annotations[AnnotationProxmoxInstanceID])
if err != nil {
return 0, "", fmt.Errorf("instances.getProviderIDFromNode() parse annotation error: %v", err)
}
if _, err := i.c.pxpool.GetProxmoxCluster(region); err != nil {
return 0, "", fmt.Errorf("instances.getProviderIDFromNode() get cluster error: %v", err)
}
return vmID, region, nil
}
return 0, "", fmt.Errorf("instances.getProviderIDFromNode() no annotation found")
}

View File

@@ -665,8 +665,9 @@ func (ts *configuredTestSuite) TestInstanceMetadata() {
Region: "cluster-1",
Zone: "pve-1",
AdditionalLabels: map[string]string{
"topology.proxmox.sinextra.dev/region": "cluster-1",
"topology.proxmox.sinextra.dev/zone": "pve-1",
"group.topology.proxmox.sinextra.dev/rnd": "",
"topology.proxmox.sinextra.dev/region": "cluster-1",
"topology.proxmox.sinextra.dev/zone": "pve-1",
},
},
},
@@ -717,8 +718,9 @@ func (ts *configuredTestSuite) TestInstanceMetadata() {
Region: "cluster-1",
Zone: "pve-1",
AdditionalLabels: map[string]string{
"topology.proxmox.sinextra.dev/region": "cluster-1",
"topology.proxmox.sinextra.dev/zone": "pve-1",
"group.topology.proxmox.sinextra.dev/rnd": "",
"topology.proxmox.sinextra.dev/region": "cluster-1",
"topology.proxmox.sinextra.dev/zone": "pve-1",
},
},
},

View File

@@ -23,6 +23,6 @@ const (
// LabelTopologyZone is the label used to store the Proxmox zone name.
LabelTopologyZone = "topology." + Group + "/zone"
// LabelTopologyHAGroup is the label used to store the Proxmox HA group name.
LabelTopologyHAGroup = "topology." + Group + "/ha-group"
// LabelTopologyHAGroupPrefix is the prefix for labels used to store Proxmox HA group information.
LabelTopologyHAGroupPrefix = "group.topology." + Group + "/"
)

View File

@@ -25,6 +25,7 @@ import (
"fmt"
"net/http"
"os"
"slices"
"strings"
proxmox "github.com/luthermonson/go-proxmox"
@@ -192,16 +193,18 @@ func (c *ProxmoxPool) DeleteVMByIDInRegion(ctx context.Context, region string, v
return px.DeleteVMByID(ctx, vm.Node, int(vm.VMID))
}
// GetNodeGroup returns a Proxmox node ha-group in a given region.
func (c *ProxmoxPool) GetNodeGroup(ctx context.Context, region string, node string) (string, error) {
// GetNodeHAGroups returns a Proxmox node ha-group in a given region for the node.
func (c *ProxmoxPool) GetNodeHAGroups(ctx context.Context, region string, node string) ([]string, error) {
groups := []string{}
px, err := c.GetProxmoxCluster(region)
if err != nil {
return "", err
return nil, err
}
haGroups, err := px.GetHAGroupList(ctx)
if err != nil {
return "", fmt.Errorf("error get ha-groups %v", err)
return nil, fmt.Errorf("error get ha-groups %v", err)
}
for _, g := range haGroups {
@@ -211,12 +214,18 @@ func (c *ProxmoxPool) GetNodeGroup(ctx context.Context, region string, node stri
for n := range strings.SplitSeq(g.Nodes, ",") {
if node == strings.Split(n, ":")[0] {
return g.Group, nil
groups = append(groups, g.Group)
}
}
}
return "", ErrHAGroupNotFound
if len(groups) > 0 {
slices.Sort(groups)
return groups, nil
}
return nil, ErrHAGroupNotFound
}
// FindVMByNode find a VM by kubernetes node resource in all Proxmox clusters.

View File

@@ -22,6 +22,8 @@ import (
"github.com/jarcoal/httpmock"
"github.com/luthermonson/go-proxmox"
goproxmox "github.com/sergelogvinov/go-proxmox"
)
// SetupMockResponders sets up the HTTP mock responders for Proxmox API calls.
@@ -38,6 +40,15 @@ func SetupMockResponders() {
"data": proxmox.NodeStatuses{{Name: "pve-1"}, {Name: "pve-2"}, {Name: "pve-3"}, {Name: "pve-4"}},
})
})
httpmock.RegisterResponder(http.MethodGet, `=~/cluster/ha/groups`,
func(_ *http.Request) (*http.Response, error) {
return httpmock.NewJsonResponse(200, map[string]any{
"data": []goproxmox.HAGroup{
{Group: "rnd", Type: "group", Nodes: "pve-1,pve-2"},
{Group: "dev", Type: "group", Nodes: "pve-4"},
},
})
})
httpmock.RegisterResponder(http.MethodGet, "https://127.0.0.2:8006/api2/json/cluster/resources",
func(_ *http.Request) (*http.Response, error) {