Revert "Revert "Modify nodes to register directly with the master.""

This reverts commit c53786ab31.
This commit is contained in:
CJ Cullen
2015-05-20 14:21:03 -07:00
parent a76bdd9710
commit bf646abf8d
12 changed files with 314 additions and 876 deletions

View File

@@ -17,6 +17,7 @@ limitations under the License.
package cloudprovider
import (
"errors"
"net"
"strings"
@@ -86,6 +87,8 @@ type Instances interface {
Release(name string) error
}
var InstanceNotFound = errors.New("instance not found")
// Zone represents the location of a particular machine.
type Zone struct {
FailureDomain string

View File

@@ -444,7 +444,10 @@ func (gce *GCECloud) getInstanceByName(name string) (*compute.Instance, error) {
name = canonicalizeInstanceName(name)
res, err := gce.service.Instances.Get(gce.projectID, gce.zone, name).Do()
if err != nil {
glog.Errorf("Failed to retrieve TargetInstance resource for instance:%s", name)
glog.Errorf("Failed to retrieve TargetInstance resource for instance: %s", name)
if apiErr, ok := err.(*googleapi.Error); ok && apiErr.Code == http.StatusNotFound {
return nil, cloudprovider.InstanceNotFound
}
return nil, err
}
return res, nil

View File

@@ -20,14 +20,9 @@ import (
"errors"
"fmt"
"net"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
apierrors "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider"
@@ -57,7 +52,6 @@ type NodeController struct {
cloud cloudprovider.Interface
matchRE string
staticResources *api.NodeResources
nodes []string
kubeClient client.Interface
recorder record.EventRecorder
registerRetryCount int
@@ -100,7 +94,6 @@ type NodeController struct {
func NewNodeController(
cloud cloudprovider.Interface,
matchRE string,
nodes []string,
staticResources *api.NodeResources,
kubeClient client.Interface,
registerRetryCount int,
@@ -125,7 +118,6 @@ func NewNodeController(
return &NodeController{
cloud: cloud,
matchRE: matchRE,
nodes: nodes,
staticResources: staticResources,
kubeClient: kubeClient,
recorder: recorder,
@@ -144,9 +136,9 @@ func NewNodeController(
}
// Generates num pod CIDRs that could be assigned to nodes.
func (nc *NodeController) generateCIDRs(num int) util.StringSet {
func generateCIDRs(clusterCIDR *net.IPNet, num int) util.StringSet {
res := util.NewStringSet()
cidrIP := nc.clusterCIDR.IP.To4()
cidrIP := clusterCIDR.IP.To4()
for i := 0; i < num; i++ {
// TODO: Make the CIDRs configurable.
b1 := byte(i >> 8)
@@ -156,37 +148,46 @@ func (nc *NodeController) generateCIDRs(num int) util.StringSet {
return res
}
// For each node from newNodes, finds its current spec in registeredNodes.
// If it is not there, it gets a new valid CIDR assigned.
func (nc *NodeController) reconcilePodCIDRs(newNodes, registeredNodes *api.NodeList) *api.NodeList {
registeredCIDRs := make(map[string]string)
availableCIDRs := nc.generateCIDRs(len(newNodes.Items) + len(registeredNodes.Items))
for _, node := range registeredNodes.Items {
registeredCIDRs[node.Name] = node.Spec.PodCIDR
availableCIDRs.Delete(node.Spec.PodCIDR)
}
for i, node := range newNodes.Items {
podCIDR, registered := registeredCIDRs[node.Name]
if !registered {
podCIDR, _ = availableCIDRs.PopAny()
// reconcilePodCIDRs looks at each node and assigns it a valid CIDR
// if it doesn't currently have one.
func (nc *NodeController) reconcilePodCIDRs(nodes *api.NodeList) {
glog.V(4).Infof("Reconciling pods cidrs for %d nodes", len(nodes.Items))
// TODO(roberthbailey): This seems inefficient. Why re-calculate CIDRs
// on each sync period?
availableCIDRs := generateCIDRs(nc.clusterCIDR, len(nodes.Items))
for _, node := range nodes.Items {
if node.Spec.PodCIDR != "" {
glog.V(4).Infof("CIDR %s is already being used by node %s", node.Spec.PodCIDR, node.Name)
availableCIDRs.Delete(node.Spec.PodCIDR)
}
}
for _, node := range nodes.Items {
if node.Spec.PodCIDR == "" {
podCIDR, found := availableCIDRs.PopAny()
if !found {
glog.Errorf("No available CIDR for node %s", node.Name)
continue
}
glog.V(4).Infof("Assigning node %s CIDR %s", node.Name, podCIDR)
node.Spec.PodCIDR = podCIDR
if err := nc.configureNodeCIDR(&node); err != nil {
glog.Errorf("Error configuring node %s: %s", node.Name, err)
// The newly assigned CIDR was not properly configured, so don't save it in the API server.
continue
}
if _, err := nc.kubeClient.Nodes().Update(&node); err != nil {
glog.Errorf("Unable to assign node %s CIDR %s: %v", node.Name, podCIDR, err)
}
}
newNodes.Items[i].Spec.PodCIDR = podCIDR
}
return newNodes
}
func (nc *NodeController) configureNodeCIDR(node *api.Node) {
func (nc *NodeController) configureNodeCIDR(node *api.Node) error {
instances, ok := nc.cloud.Instances()
if !ok {
glog.Errorf("Error configuring node %s: CloudProvider does not support Instances()", node.Name)
return
}
err := instances.Configure(node.Name, &node.Spec)
if err != nil {
glog.Errorf("Error configuring node %s: %s", node.Name, err)
// The newly assigned CIDR was not properly configured, so don't save it in the API server.
node.Spec.PodCIDR = ""
return fmt.Errorf("error configuring node %s: CloudProvider does not support Instances()", node.Name)
}
return instances.Configure(node.Name, &node.Spec)
}
func (nc *NodeController) unassignNodeCIDR(nodeName string) {
@@ -195,59 +196,14 @@ func (nc *NodeController) unassignNodeCIDR(nodeName string) {
glog.Errorf("Error deconfiguring node %s: CloudProvider does not support Instances()", nodeName)
return
}
err := instances.Release(nodeName)
if err != nil {
if err := instances.Release(nodeName); err != nil {
glog.Errorf("Error deconfiguring node %s: %s", nodeName, err)
}
}
// Run creates initial node list and start syncing instances from cloudprovider, if any.
// It also starts syncing or monitoring cluster node status.
// 1. registerNodes() is called only once to register all initial nodes (from cloudprovider
// or from command line flag). To make cluster bootstrap faster, node controller populates
// node addresses.
// 2. syncCloudNodes() is called periodically (if enabled) to sync instances from cloudprovider.
// Node created here will only have specs.
// 3. monitorNodeStatus() is called periodically to incorporate the results of node status
// pushed from kubelet to master.
// Run starts an asynchronous loop that monitors the status of cluster nodes.
func (nc *NodeController) Run(period time.Duration, syncNodeList bool) {
// Register intial set of nodes with their status set.
var nodes *api.NodeList
var err error
if nc.isRunningCloudProvider() {
if syncNodeList {
if nodes, err = nc.getCloudNodesWithSpec(); err != nil {
glog.Errorf("Error loading initial node from cloudprovider: %v", err)
}
} else {
nodes = &api.NodeList{}
}
} else {
if nodes, err = nc.getStaticNodesWithSpec(); err != nil {
glog.Errorf("Error loading initial static nodes: %v", err)
}
}
if nodes, err = nc.populateAddresses(nodes); err != nil {
glog.Errorf("Error getting nodes ips: %v", err)
}
if nc.isRunningCloudProvider() && nc.allocateNodeCIDRs {
nc.reconcilePodCIDRs(nodes, &api.NodeList{})
}
if err := nc.registerNodes(nodes, nc.registerRetryCount, period); err != nil {
glog.Errorf("Error registering node list %+v: %v", nodes, err)
}
// Start syncing node list from cloudprovider.
if syncNodeList && nc.isRunningCloudProvider() {
go util.Forever(func() {
if err := nc.syncCloudNodes(); err != nil {
glog.Errorf("Error syncing cloud: %v", err)
}
}, period)
}
// Start monitoring node status.
// Incorporate the results of node status pushed from kubelet to master.
go util.Forever(func() {
if err := nc.monitorNodeStatus(); err != nil {
glog.Errorf("Error monitoring node status: %v", err)
@@ -255,165 +211,6 @@ func (nc *NodeController) Run(period time.Duration, syncNodeList bool) {
}, nc.nodeMonitorPeriod)
}
// registerNodes registers the given list of nodes, it keeps retrying for `retryCount` times.
func (nc *NodeController) registerNodes(nodes *api.NodeList, retryCount int, retryInterval time.Duration) error {
if len(nodes.Items) == 0 {
return nil
}
nodes = nc.canonicalizeName(nodes)
toRegister := util.NewStringSet()
var wg sync.WaitGroup
var successfullyRegistered int32 = 0
for i := range nodes.Items {
node := &nodes.Items[i]
if !toRegister.Has(node.Name) {
wg.Add(1)
toRegister.Insert(node.Name)
go func(n *api.Node) {
defer wg.Done()
for i := 0; i < retryCount; i++ {
if nc.isRunningCloudProvider() && nc.allocateNodeCIDRs {
nc.configureNodeCIDR(n)
}
_, err := nc.kubeClient.Nodes().Create(n)
if err == nil || apierrors.IsAlreadyExists(err) {
glog.Infof("Registered node in registry: %v", n.Name)
atomic.AddInt32(&successfullyRegistered, 1)
return
} else {
glog.Errorf("Error registering node %v (retries left: %v): %v", n.Name, retryCount-i-1, err)
}
time.Sleep(retryInterval)
}
glog.Errorf("Unable to register node %v", n.Name)
}(node)
}
}
wg.Wait()
if int32(toRegister.Len()) != atomic.LoadInt32(&successfullyRegistered) {
return ErrRegistration
} else {
return nil
}
}
// syncCloudNodes synchronizes the list of instances from cloudprovider to master server.
func (nc *NodeController) syncCloudNodes() error {
matches, err := nc.getCloudNodesWithSpec()
if err != nil {
return err
}
nodes, err := nc.kubeClient.Nodes().List(labels.Everything(), fields.Everything())
if err != nil {
return err
}
nodeMap := make(map[string]*api.Node)
nodeMapLock := sync.Mutex{}
for i := range nodes.Items {
node := nodes.Items[i]
nodeMapLock.Lock()
nodeMap[node.Name] = &node
nodeMapLock.Unlock()
}
if nc.allocateNodeCIDRs {
nc.reconcilePodCIDRs(matches, nodes)
}
var wg sync.WaitGroup
wg.Add(len(matches.Items))
// Create nodes which have been created in cloud, but not in kubernetes cluster
// Skip nodes if we hit an error while trying to get their addresses.
for i := range matches.Items {
go func(node *api.Node) {
defer wg.Done()
nodeMapLock.Lock()
_, ok := nodeMap[node.Name]
nodeMapLock.Unlock()
if !ok {
glog.V(3).Infof("Querying addresses for new node: %s", node.Name)
nodeList := &api.NodeList{}
nodeList.Items = []api.Node{*node}
_, err = nc.populateAddresses(nodeList)
if err != nil {
glog.Errorf("Error fetching addresses for new node %s: %v", node.Name, err)
return
}
node.Status.Addresses = nodeList.Items[0].Status.Addresses
if nc.allocateNodeCIDRs {
nc.configureNodeCIDR(node)
}
glog.Infof("Create node in registry: %s", node.Name)
_, err = nc.kubeClient.Nodes().Create(node)
if err != nil {
glog.Errorf("Create node %s error: %v", node.Name, err)
}
}
nodeMapLock.Lock()
delete(nodeMap, node.Name)
nodeMapLock.Unlock()
}(&matches.Items[i])
}
wg.Wait()
wg.Add(len(nodeMap))
// Delete nodes which have been deleted from cloud, but not from kubernetes cluster.
for nodeID := range nodeMap {
go func(nodeID string) {
defer wg.Done()
if nc.allocateNodeCIDRs {
nc.unassignNodeCIDR(nodeID)
}
glog.Infof("Delete node from registry: %s", nodeID)
err = nc.kubeClient.Nodes().Delete(nodeID)
if err != nil {
glog.Errorf("Delete node %s error: %v", nodeID, err)
}
nc.deletePods(nodeID)
}(nodeID)
}
wg.Wait()
return nil
}
// populateAddresses queries Address for given list of nodes.
func (nc *NodeController) populateAddresses(nodes *api.NodeList) (*api.NodeList, error) {
if nc.isRunningCloudProvider() {
instances, ok := nc.cloud.Instances()
if !ok {
return nodes, ErrCloudInstance
}
for i := range nodes.Items {
node := &nodes.Items[i]
nodeAddresses, err := instances.NodeAddresses(node.Name)
if err != nil {
glog.Errorf("error getting instance addresses for %s: %v", node.Name, err)
} else {
node.Status.Addresses = nodeAddresses
}
}
} else {
for i := range nodes.Items {
node := &nodes.Items[i]
addr := net.ParseIP(node.Name)
if addr != nil {
address := api.NodeAddress{Type: api.NodeLegacyHostIP, Address: addr.String()}
node.Status.Addresses = []api.NodeAddress{address}
} else {
addrs, err := nc.lookupIP(node.Name)
if err != nil {
glog.Errorf("Can't get ip address of node %s: %v", node.Name, err)
} else if len(addrs) == 0 {
glog.Errorf("No ip address for node %v", node.Name)
} else {
address := api.NodeAddress{Type: api.NodeLegacyHostIP, Address: addrs[0].String()}
node.Status.Addresses = []api.NodeAddress{address}
}
}
}
}
return nodes, nil
}
func (nc *NodeController) recordNodeEvent(node *api.Node, event string) {
ref := &api.ObjectReference{
Kind: "Node",
@@ -567,6 +364,9 @@ func (nc *NodeController) monitorNodeStatus() error {
if err != nil {
return err
}
if nc.allocateNodeCIDRs {
nc.reconcilePodCIDRs(nodes)
}
for i := range nodes.Items {
var gracePeriod time.Duration
var lastReadyCondition api.NodeCondition
@@ -595,10 +395,12 @@ func (nc *NodeController) monitorNodeStatus() error {
if lastReadyCondition.Status == api.ConditionFalse &&
nc.now().After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
// Node stays in not ready for at least 'podEvictionTimeout' - evict all pods on the unhealthy node.
// Makes sure we are not removing pods from to many nodes in the same time.
// Makes sure we are not removing pods from too many nodes in the same time.
glog.Infof("Evicting pods: %v is later than %v + %v", nc.now(), nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout)
if nc.deletingPodsRateLimiter.CanAccept() {
nc.deletePods(node.Name)
if err := nc.deletePods(node.Name); err != nil {
glog.Errorf("Unable to delete pods from node %s: %v", node.Name, err)
}
}
}
if lastReadyCondition.Status == api.ConditionUnknown &&
@@ -607,7 +409,9 @@ func (nc *NodeController) monitorNodeStatus() error {
// need to substract monitoring grace period in order to get the real 'podEvictionTimeout'.
glog.Infof("Evicting pods2: %v is later than %v + %v", nc.now(), nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout-gracePeriod)
if nc.deletingPodsRateLimiter.CanAccept() {
nc.deletePods(node.Name)
if err := nc.deletePods(node.Name); err != nil {
glog.Errorf("Unable to delete pods from node %s: %v", node.Name, err)
}
}
}
@@ -621,71 +425,33 @@ func (nc *NodeController) monitorNodeStatus() error {
if readyCondition.Status == api.ConditionUnknown && lastReadyCondition.Status != api.ConditionUnknown {
nc.recordNodeEvent(node, "unknown")
}
// Check with the cloud provider to see if the node still exists. If it
// doesn't, delete the node and all pods scheduled on the node.
if readyCondition.Status != api.ConditionTrue && nc.cloud != nil {
instances, ok := nc.cloud.Instances()
if !ok {
glog.Errorf("%v", ErrCloudInstance)
continue
}
if _, err := instances.ExternalID(node.Name); err != nil && err == cloudprovider.InstanceNotFound {
if nc.allocateNodeCIDRs {
nc.unassignNodeCIDR(node.Name)
}
if err := nc.kubeClient.Nodes().Delete(node.Name); err != nil {
glog.Errorf("Unable to delete node %s: %v", node.Name, err)
continue
}
if err := nc.deletePods(node.Name); err != nil {
glog.Errorf("Unable to delete pods from node %s: %v", node.Name, err)
}
}
}
}
}
return nil
}
// getStaticNodesWithSpec constructs and returns api.NodeList for static nodes. If error
// occurs, an empty NodeList will be returned with a non-nil error info. The method only
// constructs spec fields for nodes.
func (nc *NodeController) getStaticNodesWithSpec() (*api.NodeList, error) {
result := &api.NodeList{}
for _, nodeID := range nc.nodes {
node := api.Node{
ObjectMeta: api.ObjectMeta{Name: nodeID},
Spec: api.NodeSpec{
ExternalID: nodeID,
},
Status: api.NodeStatus{
Capacity: nc.staticResources.Capacity,
},
}
result.Items = append(result.Items, node)
}
return result, nil
}
// getCloudNodesWithSpec constructs and returns api.NodeList from cloudprovider. If error
// occurs, an empty NodeList will be returned with a non-nil error info. The method only
// constructs spec fields for nodes.
func (nc *NodeController) getCloudNodesWithSpec() (*api.NodeList, error) {
result := &api.NodeList{}
instances, ok := nc.cloud.Instances()
if !ok {
return result, ErrCloudInstance
}
matches, err := instances.List(nc.matchRE)
if err != nil {
return result, err
}
for i := range matches {
node := api.Node{}
node.Name = matches[i]
resources, err := instances.GetNodeResources(matches[i])
if err != nil {
return nil, err
}
if resources == nil {
resources = nc.staticResources
}
if resources != nil {
node.Status.Capacity = resources.Capacity
if node.Status.Capacity != nil {
node.Status.Capacity[api.ResourcePods] = *resource.NewQuantity(0, resource.DecimalSI)
}
}
instanceID, err := instances.ExternalID(node.Name)
if err != nil {
glog.Errorf("Error getting instance id for %s: %v", node.Name, err)
} else {
node.Spec.ExternalID = instanceID
}
result.Items = append(result.Items, node)
}
return result, nil
}
// deletePods will delete all pods from master running on given node.
func (nc *NodeController) deletePods(nodeID string) error {
glog.V(2).Infof("Delete all pods from %v", nodeID)
@@ -708,19 +474,6 @@ func (nc *NodeController) deletePods(nodeID string) error {
return nil
}
// isRunningCloudProvider checks if cluster is running with cloud provider.
func (nc *NodeController) isRunningCloudProvider() bool {
return nc.cloud != nil && len(nc.matchRE) > 0
}
// canonicalizeName takes a node list and lowercases all nodes' name.
func (nc *NodeController) canonicalizeName(nodes *api.NodeList) *api.NodeList {
for i := range nodes.Items {
nodes.Items[i].Name = strings.ToLower(nodes.Items[i].Name)
}
return nodes
}
// getCondition returns a condition object for the specific condition
// type, nil if the condition is not set.
func (nc *NodeController) getCondition(status *api.NodeStatus, conditionType api.NodeConditionType) *api.NodeCondition {

View File

@@ -19,7 +19,6 @@ package nodecontroller
import (
"errors"
"fmt"
"reflect"
"sort"
"sync"
"testing"
@@ -30,7 +29,6 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/testclient"
fake_cloud "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/fake"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
@@ -142,506 +140,6 @@ func (m *FakeNodeHandler) Watch(label labels.Selector, field fields.Selector, re
return nil, nil
}
func TestRegisterNodes(t *testing.T) {
table := []struct {
fakeNodeHandler *FakeNodeHandler
machines []string
retryCount int
expectedRequestCount int
expectedCreateCount int
expectedFail bool
}{
{
// Register two nodes normally.
machines: []string{"node0", "node1"},
fakeNodeHandler: &FakeNodeHandler{
CreateHook: func(fake *FakeNodeHandler, node *api.Node) bool { return true },
},
retryCount: 1,
expectedRequestCount: 2,
expectedCreateCount: 2,
expectedFail: false,
},
{
// Canonicalize node names.
machines: []string{"NODE0", "node1"},
fakeNodeHandler: &FakeNodeHandler{
CreateHook: func(fake *FakeNodeHandler, node *api.Node) bool {
if node.Name == "NODE0" {
return false
}
return true
},
},
retryCount: 1,
expectedRequestCount: 2,
expectedCreateCount: 2,
expectedFail: false,
},
{
// No machine to register.
machines: []string{},
fakeNodeHandler: &FakeNodeHandler{
CreateHook: func(fake *FakeNodeHandler, node *api.Node) bool { return true },
},
retryCount: 1,
expectedRequestCount: 0,
expectedCreateCount: 0,
expectedFail: false,
},
{
// Fail the first two requests.
machines: []string{"node0", "node1"},
fakeNodeHandler: &FakeNodeHandler{
CreateHook: func(fake *FakeNodeHandler, node *api.Node) bool {
if fake.RequestCount == 0 || fake.RequestCount == 1 {
return false
}
return true
},
},
retryCount: 10,
expectedRequestCount: 4,
expectedCreateCount: 2,
expectedFail: false,
},
{
// One node already exists
machines: []string{"node0", "node1"},
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node1",
},
},
},
},
retryCount: 10,
expectedRequestCount: 2,
expectedCreateCount: 1,
expectedFail: false,
},
{
// The first node always fails.
machines: []string{"node0", "node1"},
fakeNodeHandler: &FakeNodeHandler{
CreateHook: func(fake *FakeNodeHandler, node *api.Node) bool {
if node.Name == "node0" {
return false
}
return true
},
},
retryCount: 2,
expectedRequestCount: 3, // 2 for node0, 1 for node1
expectedCreateCount: 1,
expectedFail: true,
},
}
for _, item := range table {
nodes := api.NodeList{}
for _, machine := range item.machines {
nodes.Items = append(nodes.Items, *newNode(machine))
}
nodeController := NewNodeController(nil, "", item.machines, &api.NodeResources{}, item.fakeNodeHandler, 10, time.Minute,
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
err := nodeController.registerNodes(&nodes, item.retryCount, time.Millisecond)
if !item.expectedFail && err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.expectedFail && err == nil {
t.Errorf("unexpected non-error")
}
if item.fakeNodeHandler.RequestCount != item.expectedRequestCount {
t.Errorf("expected %v calls, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount)
}
if len(item.fakeNodeHandler.CreatedNodes) != item.expectedCreateCount {
t.Errorf("expected %v nodes, but got %v.", item.expectedCreateCount, item.fakeNodeHandler.CreatedNodes)
}
}
}
func TestCreateGetStaticNodesWithSpec(t *testing.T) {
table := []struct {
machines []string
expectedNodes *api.NodeList
}{
{
machines: []string{},
expectedNodes: &api.NodeList{},
},
{
machines: []string{"node0"},
expectedNodes: &api.NodeList{
Items: []api.Node{
{
ObjectMeta: api.ObjectMeta{Name: "node0"},
Spec: api.NodeSpec{
ExternalID: "node0",
},
Status: api.NodeStatus{
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
},
},
},
},
{
machines: []string{"node0", "node1"},
expectedNodes: &api.NodeList{
Items: []api.Node{
{
ObjectMeta: api.ObjectMeta{Name: "node0"},
Spec: api.NodeSpec{
ExternalID: "node0",
},
Status: api.NodeStatus{
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
},
{
ObjectMeta: api.ObjectMeta{Name: "node1"},
Spec: api.NodeSpec{
ExternalID: "node1",
},
Status: api.NodeStatus{
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
},
},
},
},
}
resources := api.NodeResources{
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
}
for _, item := range table {
nodeController := NewNodeController(nil, "", item.machines, &resources, nil, 10, time.Minute,
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
nodes, err := nodeController.getStaticNodesWithSpec()
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(item.expectedNodes, nodes) {
t.Errorf("expected node list %+v, got %+v", item.expectedNodes, nodes)
}
}
}
func TestCreateGetCloudNodesWithSpec(t *testing.T) {
resourceList := api.ResourceList{
api.ResourceCPU: *resource.NewMilliQuantity(1000, resource.DecimalSI),
api.ResourceMemory: *resource.NewQuantity(3000, resource.DecimalSI),
}
table := []struct {
fakeCloud *fake_cloud.FakeCloud
machines []string
expectedNodes *api.NodeList
}{
{
fakeCloud: &fake_cloud.FakeCloud{},
expectedNodes: &api.NodeList{},
},
{
fakeCloud: &fake_cloud.FakeCloud{
Machines: []string{"node0"},
NodeResources: &api.NodeResources{Capacity: resourceList},
},
expectedNodes: &api.NodeList{
Items: []api.Node{
{
ObjectMeta: api.ObjectMeta{Name: "node0"},
Status: api.NodeStatus{Capacity: resourceList},
},
},
},
},
{
fakeCloud: &fake_cloud.FakeCloud{
Machines: []string{"node0", "node1"},
NodeResources: &api.NodeResources{Capacity: resourceList},
},
expectedNodes: &api.NodeList{
Items: []api.Node{
{
ObjectMeta: api.ObjectMeta{Name: "node0"},
Status: api.NodeStatus{Capacity: resourceList},
},
{
ObjectMeta: api.ObjectMeta{Name: "node1"},
Status: api.NodeStatus{Capacity: resourceList},
},
},
},
},
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, ".*", nil, &api.NodeResources{}, nil, 10, time.Minute,
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
nodes, err := nodeController.getCloudNodesWithSpec()
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(item.expectedNodes, nodes) {
t.Errorf("expected node list %+v, got %+v", item.expectedNodes, nodes)
}
}
}
func TestSyncCloudNodes(t *testing.T) {
table := []struct {
fakeNodeHandler *FakeNodeHandler
fakeCloud *fake_cloud.FakeCloud
matchRE string
expectedRequestCount int
expectedNameCreated []string
expectedExtIDCreated []string
expectedAddrsCreated []string
expectedDeleted []string
}{
{
// 1 existing node, 1 cloud nodes: do nothing.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{newNode("node0")},
},
fakeCloud: &fake_cloud.FakeCloud{
Machines: []string{"node0"},
ExtID: map[string]string{
"node0": "ext-node0",
"node1": "ext-node1",
},
Addresses: []api.NodeAddress{{Type: api.NodeLegacyHostIP, Address: "1.2.3.4"}},
},
matchRE: ".*",
expectedRequestCount: 1, // List
expectedNameCreated: []string{},
expectedExtIDCreated: []string{},
expectedAddrsCreated: []string{},
expectedDeleted: []string{},
},
{
// 1 existing node, 2 cloud nodes: create 1.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{newNode("node0")},
},
fakeCloud: &fake_cloud.FakeCloud{
Machines: []string{"node0", "node1"},
ExtID: map[string]string{
"node0": "ext-node0",
"node1": "ext-node1",
},
Addresses: []api.NodeAddress{{Type: api.NodeLegacyHostIP, Address: "1.2.3.4"}},
},
matchRE: ".*",
expectedRequestCount: 2, // List + Create
expectedNameCreated: []string{"node1"},
expectedExtIDCreated: []string{"ext-node1"},
expectedAddrsCreated: []string{"1.2.3.4"},
expectedDeleted: []string{},
},
{
// 2 existing nodes, 1 cloud node: delete 1.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{newNode("node0"), newNode("node1")},
},
fakeCloud: &fake_cloud.FakeCloud{
Machines: []string{"node0"},
ExtID: map[string]string{
"node0": "ext-node0",
"node1": "ext-node1",
},
Addresses: []api.NodeAddress{{Type: api.NodeLegacyHostIP, Address: "1.2.3.4"}},
},
matchRE: ".*",
expectedRequestCount: 2, // List + Delete
expectedNameCreated: []string{},
expectedExtIDCreated: []string{},
expectedAddrsCreated: []string{},
expectedDeleted: []string{"node1"},
},
{
// 1 existing node, 3 cloud nodes but only 2 match regex: delete 1.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{newNode("node0")},
},
fakeCloud: &fake_cloud.FakeCloud{
Machines: []string{"node0", "node1", "fake"},
ExtID: map[string]string{
"node0": "ext-node0",
"node1": "ext-node1",
"fake": "ext-fake",
},
Addresses: []api.NodeAddress{{Type: api.NodeLegacyHostIP, Address: "1.2.3.4"}},
},
matchRE: "node[0-9]+",
expectedRequestCount: 2, // List + Create
expectedNameCreated: []string{"node1"},
expectedExtIDCreated: []string{"ext-node1"},
expectedAddrsCreated: []string{"1.2.3.4"},
expectedDeleted: []string{},
},
}
for _, item := range table {
if item.fakeNodeHandler.Fake == nil {
item.fakeNodeHandler.Fake = testclient.NewSimpleFake()
}
nodeController := NewNodeController(item.fakeCloud, item.matchRE, nil, &api.NodeResources{}, item.fakeNodeHandler, 10, time.Minute,
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
if err := nodeController.syncCloudNodes(); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.fakeNodeHandler.RequestCount != item.expectedRequestCount {
t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount)
}
nodes := sortedNodeNames(item.fakeNodeHandler.CreatedNodes)
if !reflect.DeepEqual(item.expectedNameCreated, nodes) {
t.Errorf("expected node list %+v, got %+v", item.expectedNameCreated, nodes)
}
nodeExtIDs := sortedNodeExternalIDs(item.fakeNodeHandler.CreatedNodes)
if !reflect.DeepEqual(item.expectedExtIDCreated, nodeExtIDs) {
t.Errorf("expected node external id list %+v, got %+v", item.expectedExtIDCreated, nodeExtIDs)
}
nodeAddrs := sortedNodeAddresses(item.fakeNodeHandler.CreatedNodes)
if !reflect.DeepEqual(item.expectedAddrsCreated, nodeAddrs) {
t.Errorf("expected node address list %+v, got %+v", item.expectedAddrsCreated, nodeAddrs)
}
nodes = sortedNodeNames(item.fakeNodeHandler.DeletedNodes)
if !reflect.DeepEqual(item.expectedDeleted, nodes) {
t.Errorf("expected node list %+v, got %+v", item.expectedDeleted, nodes)
}
}
}
func TestSyncCloudNodesEvictPods(t *testing.T) {
table := []struct {
fakeNodeHandler *FakeNodeHandler
fakeCloud *fake_cloud.FakeCloud
matchRE string
expectedRequestCount int
expectedDeleted []string
expectedActions []testclient.FakeAction
}{
{
// No node to delete: do nothing.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{newNode("node0"), newNode("node1")},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0"), *newPod("pod1", "node1")}}),
},
fakeCloud: &fake_cloud.FakeCloud{
Machines: []string{"node0", "node1"},
},
matchRE: ".*",
expectedRequestCount: 1, // List
expectedDeleted: []string{},
expectedActions: nil,
},
{
// Delete node1, and pod0 is running on it.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{newNode("node0"), newNode("node1")},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node1")}}),
},
fakeCloud: &fake_cloud.FakeCloud{
Machines: []string{"node0"},
},
matchRE: ".*",
expectedRequestCount: 2, // List + Delete
expectedDeleted: []string{"node1"},
expectedActions: []testclient.FakeAction{{Action: "list-pods"}, {Action: "delete-pod", Value: "pod0"}},
},
{
// Delete node1, but pod0 is running on node0.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{newNode("node0"), newNode("node1")},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
fakeCloud: &fake_cloud.FakeCloud{
Machines: []string{"node0"},
},
matchRE: ".*",
expectedRequestCount: 2, // List + Delete
expectedDeleted: []string{"node1"},
expectedActions: []testclient.FakeAction{{Action: "list-pods"}},
},
}
for _, item := range table {
if item.fakeNodeHandler.Fake == nil {
item.fakeNodeHandler.Fake = testclient.NewSimpleFake()
}
nodeController := NewNodeController(item.fakeCloud, item.matchRE, nil, &api.NodeResources{}, item.fakeNodeHandler, 10, time.Minute,
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
if err := nodeController.syncCloudNodes(); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.fakeNodeHandler.RequestCount != item.expectedRequestCount {
t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount)
}
nodes := sortedNodeNames(item.fakeNodeHandler.DeletedNodes)
if !reflect.DeepEqual(item.expectedDeleted, nodes) {
t.Errorf("expected node list %+v, got %+v", item.expectedDeleted, nodes)
}
if !reflect.DeepEqual(item.expectedActions, item.fakeNodeHandler.Actions) {
t.Errorf("time out waiting for deleting pods, expected %+v, got %+v", item.expectedActions, item.fakeNodeHandler.Actions)
}
}
}
func TestPopulateNodeAddresses(t *testing.T) {
table := []struct {
nodes *api.NodeList
fakeCloud *fake_cloud.FakeCloud
expectedFail bool
expectedAddresses []api.NodeAddress
}{
{
nodes: &api.NodeList{Items: []api.Node{*newNode("node0"), *newNode("node1")}},
fakeCloud: &fake_cloud.FakeCloud{Addresses: []api.NodeAddress{{Type: api.NodeLegacyHostIP, Address: "1.2.3.4"}}},
expectedAddresses: []api.NodeAddress{
{Type: api.NodeLegacyHostIP, Address: "1.2.3.4"},
},
},
{
nodes: &api.NodeList{Items: []api.Node{*newNode("node0"), *newNode("node1")}},
fakeCloud: &fake_cloud.FakeCloud{Err: ErrQueryIPAddress},
expectedAddresses: nil,
},
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, ".*", nil, nil, nil, 10, time.Minute,
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
result, err := nodeController.populateAddresses(item.nodes)
// In case of IP querying error, we should continue.
if err != nil {
t.Errorf("unexpected error: %v", err)
}
for _, node := range result.Items {
if !reflect.DeepEqual(item.expectedAddresses, node.Status.Addresses) {
t.Errorf("expect HostIP %s, got %s", item.expectedAddresses, node.Status.Addresses)
}
}
}
}
func TestMonitorNodeStatusEvictPods(t *testing.T) {
fakeNow := util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
@@ -826,7 +324,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, 10,
nodeController := NewNodeController(nil, "", nil, item.fakeNodeHandler, 10,
evictionTimeout, util.NewFakeRateLimiter(), testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
nodeController.now = func() util.Time { return fakeNow }
@@ -1029,7 +527,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, 10, 5*time.Minute, util.NewFakeRateLimiter(),
nodeController := NewNodeController(nil, "", nil, item.fakeNodeHandler, 10, 5*time.Minute, util.NewFakeRateLimiter(),
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.monitorNodeStatus(); err != nil {