mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			333 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			333 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
Copyright 2016 The Kubernetes Authors.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package kubelet
 | 
						|
 | 
						|
import (
 | 
						|
	"fmt"
 | 
						|
	"io"
 | 
						|
	"io/ioutil"
 | 
						|
	"strings"
 | 
						|
 | 
						|
	"github.com/golang/glog"
 | 
						|
	"k8s.io/kubernetes/pkg/api/v1"
 | 
						|
	"k8s.io/kubernetes/pkg/apis/componentconfig"
 | 
						|
	"k8s.io/kubernetes/pkg/kubelet/network"
 | 
						|
	"k8s.io/kubernetes/pkg/util/bandwidth"
 | 
						|
	utiliptables "k8s.io/kubernetes/pkg/util/iptables"
 | 
						|
	"k8s.io/kubernetes/pkg/util/sets"
 | 
						|
)
 | 
						|
 | 
						|
const (
 | 
						|
	// the mark-for-masquerade chain
 | 
						|
	// TODO: clean up this logic in kube-proxy
 | 
						|
	KubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
 | 
						|
 | 
						|
	// the mark-for-drop chain
 | 
						|
	KubeMarkDropChain utiliptables.Chain = "KUBE-MARK-DROP"
 | 
						|
 | 
						|
	// kubernetes postrouting rules
 | 
						|
	KubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
 | 
						|
 | 
						|
	// kubernetes firewall rules
 | 
						|
	KubeFirewallChain utiliptables.Chain = "KUBE-FIREWALL"
 | 
						|
)
 | 
						|
 | 
						|
// effectiveHairpinMode determines the effective hairpin mode given the
 | 
						|
// configured mode, container runtime, and whether cbr0 should be configured.
 | 
						|
func effectiveHairpinMode(hairpinMode componentconfig.HairpinMode, containerRuntime string, networkPlugin string) (componentconfig.HairpinMode, error) {
 | 
						|
	// The hairpin mode setting doesn't matter if:
 | 
						|
	// - We're not using a bridge network. This is hard to check because we might
 | 
						|
	//   be using a plugin.
 | 
						|
	// - It's set to hairpin-veth for a container runtime that doesn't know how
 | 
						|
	//   to set the hairpin flag on the veth's of containers. Currently the
 | 
						|
	//   docker runtime is the only one that understands this.
 | 
						|
	// - It's set to "none".
 | 
						|
	if hairpinMode == componentconfig.PromiscuousBridge || hairpinMode == componentconfig.HairpinVeth {
 | 
						|
		// Only on docker.
 | 
						|
		if containerRuntime != "docker" {
 | 
						|
			glog.Warningf("Hairpin mode set to %q but container runtime is %q, ignoring", hairpinMode, containerRuntime)
 | 
						|
			return componentconfig.HairpinNone, nil
 | 
						|
		}
 | 
						|
		if hairpinMode == componentconfig.PromiscuousBridge && networkPlugin != "kubenet" {
 | 
						|
			// This is not a valid combination, since promiscuous-bridge only works on kubenet. Users might be using the
 | 
						|
			// default values (from before the hairpin-mode flag existed) and we
 | 
						|
			// should keep the old behavior.
 | 
						|
			glog.Warningf("Hairpin mode set to %q but kubenet is not enabled, falling back to %q", hairpinMode, componentconfig.HairpinVeth)
 | 
						|
			return componentconfig.HairpinVeth, nil
 | 
						|
		}
 | 
						|
	} else if hairpinMode != componentconfig.HairpinNone {
 | 
						|
		return "", fmt.Errorf("unknown value: %q", hairpinMode)
 | 
						|
	}
 | 
						|
	return hairpinMode, nil
 | 
						|
}
 | 
						|
 | 
						|
// providerRequiresNetworkingConfiguration returns whether the cloud provider
 | 
						|
// requires special networking configuration.
 | 
						|
func (kl *Kubelet) providerRequiresNetworkingConfiguration() bool {
 | 
						|
	// TODO: We should have a mechanism to say whether native cloud provider
 | 
						|
	// is used or whether we are using overlay networking. We should return
 | 
						|
	// true for cloud providers if they implement Routes() interface and
 | 
						|
	// we are not using overlay networking.
 | 
						|
	if kl.cloud == nil || kl.cloud.ProviderName() != "gce" {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	_, supported := kl.cloud.Routes()
 | 
						|
	return supported
 | 
						|
}
 | 
						|
 | 
						|
// Returns the list of DNS servers and DNS search domains.
 | 
						|
func (kl *Kubelet) parseResolvConf(reader io.Reader) (nameservers []string, searches []string, err error) {
 | 
						|
	var scrubber dnsScrubber
 | 
						|
	if kl.cloud != nil {
 | 
						|
		scrubber = kl.cloud
 | 
						|
	}
 | 
						|
	return parseResolvConf(reader, scrubber)
 | 
						|
}
 | 
						|
 | 
						|
// A helper for testing.
 | 
						|
type dnsScrubber interface {
 | 
						|
	ScrubDNS(nameservers, searches []string) (nsOut, srchOut []string)
 | 
						|
}
 | 
						|
 | 
						|
// parseResolveConf reads a resolv.conf file from the given reader, and parses
 | 
						|
// it into nameservers and searches, possibly returning an error.  The given
 | 
						|
// dnsScrubber allows cloud providers to post-process dns names.
 | 
						|
// TODO: move to utility package
 | 
						|
func parseResolvConf(reader io.Reader, dnsScrubber dnsScrubber) (nameservers []string, searches []string, err error) {
 | 
						|
	file, err := ioutil.ReadAll(reader)
 | 
						|
	if err != nil {
 | 
						|
		return nil, nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	// Lines of the form "nameserver 1.2.3.4" accumulate.
 | 
						|
	nameservers = []string{}
 | 
						|
 | 
						|
	// Lines of the form "search example.com" overrule - last one wins.
 | 
						|
	searches = []string{}
 | 
						|
 | 
						|
	lines := strings.Split(string(file), "\n")
 | 
						|
	for l := range lines {
 | 
						|
		trimmed := strings.TrimSpace(lines[l])
 | 
						|
		if strings.HasPrefix(trimmed, "#") {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		fields := strings.Fields(trimmed)
 | 
						|
		if len(fields) == 0 {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		if fields[0] == "nameserver" {
 | 
						|
			nameservers = append(nameservers, fields[1:]...)
 | 
						|
		}
 | 
						|
		if fields[0] == "search" {
 | 
						|
			searches = fields[1:]
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// Give the cloud-provider a chance to post-process DNS settings.
 | 
						|
	if dnsScrubber != nil {
 | 
						|
		nameservers, searches = dnsScrubber.ScrubDNS(nameservers, searches)
 | 
						|
	}
 | 
						|
	return nameservers, searches, nil
 | 
						|
}
 | 
						|
 | 
						|
// cleanupBandwidthLimits updates the status of bandwidth-limited containers
 | 
						|
// and ensures that only the appropriate CIDRs are active on the node.
 | 
						|
func (kl *Kubelet) cleanupBandwidthLimits(allPods []*v1.Pod) error {
 | 
						|
	if kl.shaper == nil {
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
	currentCIDRs, err := kl.shaper.GetCIDRs()
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	possibleCIDRs := sets.String{}
 | 
						|
	for ix := range allPods {
 | 
						|
		pod := allPods[ix]
 | 
						|
		ingress, egress, err := bandwidth.ExtractPodBandwidthResources(pod.Annotations)
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
		if ingress == nil && egress == nil {
 | 
						|
			glog.V(8).Infof("Not a bandwidth limited container...")
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		status, found := kl.statusManager.GetPodStatus(pod.UID)
 | 
						|
		if !found {
 | 
						|
			// TODO(random-liu): Cleanup status get functions. (issue #20477)
 | 
						|
			s, err := kl.containerRuntime.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
 | 
						|
			if err != nil {
 | 
						|
				return err
 | 
						|
			}
 | 
						|
			status = kl.generateAPIPodStatus(pod, s)
 | 
						|
		}
 | 
						|
		if status.Phase == v1.PodRunning {
 | 
						|
			possibleCIDRs.Insert(fmt.Sprintf("%s/32", status.PodIP))
 | 
						|
		}
 | 
						|
	}
 | 
						|
	for _, cidr := range currentCIDRs {
 | 
						|
		if !possibleCIDRs.Has(cidr) {
 | 
						|
			glog.V(2).Infof("Removing CIDR: %s (%v)", cidr, possibleCIDRs)
 | 
						|
			if err := kl.shaper.Reset(cidr); err != nil {
 | 
						|
				return err
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// syncNetworkStatus updates the network state
 | 
						|
func (kl *Kubelet) syncNetworkStatus() {
 | 
						|
	// For cri integration, network state will be updated in updateRuntimeUp,
 | 
						|
	// we'll get runtime network status through cri directly.
 | 
						|
	// TODO: Remove this once we completely switch to cri integration.
 | 
						|
	if kl.networkPlugin != nil {
 | 
						|
		kl.runtimeState.setNetworkState(kl.networkPlugin.Status())
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// updatePodCIDR updates the pod CIDR in the runtime state if it is different
 | 
						|
// from the current CIDR.
 | 
						|
func (kl *Kubelet) updatePodCIDR(cidr string) {
 | 
						|
	podCIDR := kl.runtimeState.podCIDR()
 | 
						|
 | 
						|
	if podCIDR == cidr {
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	// kubelet -> network plugin
 | 
						|
	// cri runtime shims are responsible for their own network plugins
 | 
						|
	if kl.networkPlugin != nil {
 | 
						|
		details := make(map[string]interface{})
 | 
						|
		details[network.NET_PLUGIN_EVENT_POD_CIDR_CHANGE_DETAIL_CIDR] = cidr
 | 
						|
		kl.networkPlugin.Event(network.NET_PLUGIN_EVENT_POD_CIDR_CHANGE, details)
 | 
						|
	}
 | 
						|
 | 
						|
	// kubelet -> generic runtime -> runtime shim -> network plugin
 | 
						|
	// docker/rkt non-cri implementations have a passthrough UpdatePodCIDR
 | 
						|
	if err := kl.GetRuntime().UpdatePodCIDR(cidr); err != nil {
 | 
						|
		glog.Errorf("Failed to update pod CIDR: %v", err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	glog.Infof("Setting Pod CIDR: %v -> %v", podCIDR, cidr)
 | 
						|
	kl.runtimeState.setPodCIDR(cidr)
 | 
						|
}
 | 
						|
 | 
						|
// shapingEnabled returns whether traffic shaping is enabled.
 | 
						|
func (kl *Kubelet) shapingEnabled() bool {
 | 
						|
	// Disable shaping if a network plugin is defined and supports shaping
 | 
						|
	if kl.networkPlugin != nil && kl.networkPlugin.Capabilities().Has(network.NET_PLUGIN_CAPABILITY_SHAPING) {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	// This is not strictly true but we need to figure out how to handle
 | 
						|
	// bandwidth shaping anyway. If the kubelet doesn't have a networkPlugin,
 | 
						|
	// it could mean:
 | 
						|
	// a. the kubelet is responsible for bandwidth shaping
 | 
						|
	// b. the kubelet is using cri, and the cri has a network plugin
 | 
						|
	// Today, the only plugin that understands bandwidth shaping is kubenet, and
 | 
						|
	// it doesn't support bandwidth shaping when invoked through cri, so it
 | 
						|
	// effectively boils down to letting the kubelet decide how to handle
 | 
						|
	// shaping annotations. The combination of (cri + network plugin that
 | 
						|
	// handles bandwidth shaping) may not work because of this.
 | 
						|
	return true
 | 
						|
}
 | 
						|
 | 
						|
// syncNetworkUtil ensures the network utility are present on host.
 | 
						|
// Network util includes:
 | 
						|
// 1. 	In nat table, KUBE-MARK-DROP rule to mark connections for dropping
 | 
						|
// 	Marked connection will be drop on INPUT/OUTPUT Chain in filter table
 | 
						|
// 2. 	In nat table, KUBE-MARK-MASQ rule to mark connections for SNAT
 | 
						|
// 	Marked connection will get SNAT on POSTROUTING Chain in nat table
 | 
						|
func (kl *Kubelet) syncNetworkUtil() {
 | 
						|
	if kl.iptablesMasqueradeBit < 0 || kl.iptablesMasqueradeBit > 31 {
 | 
						|
		glog.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", kl.iptablesMasqueradeBit)
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	if kl.iptablesDropBit < 0 || kl.iptablesDropBit > 31 {
 | 
						|
		glog.Errorf("invalid iptables-drop-bit %v not in [0, 31]", kl.iptablesDropBit)
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	if kl.iptablesDropBit == kl.iptablesMasqueradeBit {
 | 
						|
		glog.Errorf("iptables-masquerade-bit %v and iptables-drop-bit %v must be different", kl.iptablesMasqueradeBit, kl.iptablesDropBit)
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	// Setup KUBE-MARK-DROP rules
 | 
						|
	dropMark := getIPTablesMark(kl.iptablesDropBit)
 | 
						|
	if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubeMarkDropChain); err != nil {
 | 
						|
		glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubeMarkDropChain, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
	if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkDropChain, "-j", "MARK", "--set-xmark", dropMark); err != nil {
 | 
						|
		glog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkDropChain, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
	if _, err := kl.iptClient.EnsureChain(utiliptables.TableFilter, KubeFirewallChain); err != nil {
 | 
						|
		glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableFilter, KubeFirewallChain, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
	if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableFilter, KubeFirewallChain,
 | 
						|
		"-m", "comment", "--comment", "kubernetes firewall for dropping marked packets",
 | 
						|
		"-m", "mark", "--mark", dropMark,
 | 
						|
		"-j", "DROP"); err != nil {
 | 
						|
		glog.Errorf("Failed to ensure rule to drop packet marked by %v in %v chain %v: %v", KubeMarkDropChain, utiliptables.TableFilter, KubeFirewallChain, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
	if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableFilter, utiliptables.ChainOutput, "-j", string(KubeFirewallChain)); err != nil {
 | 
						|
		glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableFilter, utiliptables.ChainOutput, KubeFirewallChain, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
	if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableFilter, utiliptables.ChainInput, "-j", string(KubeFirewallChain)); err != nil {
 | 
						|
		glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableFilter, utiliptables.ChainInput, KubeFirewallChain, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	// Setup KUBE-MARK-MASQ rules
 | 
						|
	masqueradeMark := getIPTablesMark(kl.iptablesMasqueradeBit)
 | 
						|
	if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubeMarkMasqChain); err != nil {
 | 
						|
		glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubeMarkMasqChain, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
	if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubePostroutingChain); err != nil {
 | 
						|
		glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubePostroutingChain, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
	if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkMasqChain, "-j", "MARK", "--set-xmark", masqueradeMark); err != nil {
 | 
						|
		glog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkMasqChain, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
	if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting,
 | 
						|
		"-m", "comment", "--comment", "kubernetes postrouting rules", "-j", string(KubePostroutingChain)); err != nil {
 | 
						|
		glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, KubePostroutingChain, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
	if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubePostroutingChain,
 | 
						|
		"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT",
 | 
						|
		"-m", "mark", "--mark", masqueradeMark, "-j", "MASQUERADE"); err != nil {
 | 
						|
		glog.Errorf("Failed to ensure SNAT rule for packets marked by %v in %v chain %v: %v", KubeMarkMasqChain, utiliptables.TableNAT, KubePostroutingChain, err)
 | 
						|
		return
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// getIPTablesMark returns the fwmark given the bit
 | 
						|
func getIPTablesMark(bit int) string {
 | 
						|
	value := 1 << uint(bit)
 | 
						|
	return fmt.Sprintf("%#08x/%#08x", value, value)
 | 
						|
}
 |