mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	avoid updating nf_conntrack-related settings, by default, when running k8s on mesos
This commit is contained in:
		@@ -22,6 +22,16 @@ It is **strongly** recommended that all of the Kubernetes-Mesos executors are de
 | 
				
			|||||||
Not following the above steps prior to upgrading the scheduler can result in a cluster wherein pods will never again be scheduled upon one or more nodes.
 | 
					Not following the above steps prior to upgrading the scheduler can result in a cluster wherein pods will never again be scheduled upon one or more nodes.
 | 
				
			||||||
This issue is being tracked here: https://github.com/mesosphere/kubernetes-mesos/issues/572.
 | 
					This issue is being tracked here: https://github.com/mesosphere/kubernetes-mesos/issues/572.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Netfilter Connection Tracking
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The scheduler offers flags to tweak connection tracking for kube-proxy instances that are launched on slave nodes:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- conntrack-max (do **NOT** set this to a non-zero value if the Mesos slave process is running in a non-root network namespace)
 | 
				
			||||||
 | 
					- conntrack-tcp-timeout-established
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					By default both of these are set to 0 when running Kubernetes-Mesos.
 | 
				
			||||||
 | 
					Setting either of these flags to non-zero values may impact connection tracking for the entire slave.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Port Specifications
 | 
					### Port Specifications
 | 
				
			||||||
 | 
					
 | 
				
			||||||
In order for pods (replicated, or otherwise) to be scheduled on the cluster, it is strongly recommended that:
 | 
					In order for pods (replicated, or otherwise) to be scheduled on the cluster, it is strongly recommended that:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -23,6 +23,7 @@ import (
 | 
				
			|||||||
	"os"
 | 
						"os"
 | 
				
			||||||
	"os/signal"
 | 
						"os/signal"
 | 
				
			||||||
	"path"
 | 
						"path"
 | 
				
			||||||
 | 
						"strconv"
 | 
				
			||||||
	"strings"
 | 
						"strings"
 | 
				
			||||||
	"syscall"
 | 
						"syscall"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -70,6 +71,8 @@ type MinionServer struct {
 | 
				
			|||||||
	proxyLogV                      int
 | 
						proxyLogV                      int
 | 
				
			||||||
	proxyBindall                   bool
 | 
						proxyBindall                   bool
 | 
				
			||||||
	proxyMode                      string
 | 
						proxyMode                      string
 | 
				
			||||||
 | 
						conntrackMax                   int
 | 
				
			||||||
 | 
						conntrackTCPTimeoutEstablished int
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// NewMinionServer creates the MinionServer struct with default values to be used by hyperkube
 | 
					// NewMinionServer creates the MinionServer struct with default values to be used by hyperkube
 | 
				
			||||||
@@ -139,11 +142,8 @@ func (ms *MinionServer) launchProxyServer() {
 | 
				
			|||||||
		"--logtostderr=true",
 | 
							"--logtostderr=true",
 | 
				
			||||||
		"--resource-container=" + path.Join("/", ms.mesosCgroup, "kube-proxy"),
 | 
							"--resource-container=" + path.Join("/", ms.mesosCgroup, "kube-proxy"),
 | 
				
			||||||
		"--proxy-mode=" + ms.proxyMode,
 | 
							"--proxy-mode=" + ms.proxyMode,
 | 
				
			||||||
		// TODO(jdef) this is a temporary hack to fix failing smoke tests. a following PR
 | 
							"--conntrack-max=" + strconv.Itoa(ms.conntrackMax),
 | 
				
			||||||
		// will more properly fix the smoke tests as well as make these flags configrable
 | 
							"--conntrack-tcp-timeout-established=" + strconv.Itoa(ms.conntrackTCPTimeoutEstablished),
 | 
				
			||||||
		// at the framework level (as opposed to hardcoded here)
 | 
					 | 
				
			||||||
		"--conntrack-max=0",
 | 
					 | 
				
			||||||
		"--conntrack-tcp-timeout-established=0",
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if ms.clientConfig.Host != "" {
 | 
						if ms.clientConfig.Host != "" {
 | 
				
			||||||
@@ -351,4 +351,6 @@ func (ms *MinionServer) AddMinionFlags(fs *pflag.FlagSet) {
 | 
				
			|||||||
	fs.IntVar(&ms.proxyLogV, "proxy-logv", ms.proxyLogV, "Log verbosity of the child kube-proxy.")
 | 
						fs.IntVar(&ms.proxyLogV, "proxy-logv", ms.proxyLogV, "Log verbosity of the child kube-proxy.")
 | 
				
			||||||
	fs.BoolVar(&ms.proxyBindall, "proxy-bindall", ms.proxyBindall, "When true will cause kube-proxy to bind to 0.0.0.0.")
 | 
						fs.BoolVar(&ms.proxyBindall, "proxy-bindall", ms.proxyBindall, "When true will cause kube-proxy to bind to 0.0.0.0.")
 | 
				
			||||||
	fs.StringVar(&ms.proxyMode, "proxy-mode", ms.proxyMode, "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
 | 
						fs.StringVar(&ms.proxyMode, "proxy-mode", ms.proxyMode, "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
 | 
				
			||||||
 | 
						fs.IntVar(&ms.conntrackMax, "conntrack-max", ms.conntrackMax, "Maximum number of NAT connections to track on agent nodes (0 to leave as-is)")
 | 
				
			||||||
 | 
						fs.IntVar(&ms.conntrackTCPTimeoutEstablished, "conntrack-tcp-timeout-established", ms.conntrackTCPTimeoutEstablished, "Idle timeout for established TCP connections on agent nodes (0 to leave as-is)")
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -163,6 +163,8 @@ type SchedulerServer struct {
 | 
				
			|||||||
	containPodResources            bool
 | 
						containPodResources            bool
 | 
				
			||||||
	nodeRelistPeriod               time.Duration
 | 
						nodeRelistPeriod               time.Duration
 | 
				
			||||||
	sandboxOverlay                 string
 | 
						sandboxOverlay                 string
 | 
				
			||||||
 | 
						conntrackMax                   int
 | 
				
			||||||
 | 
						conntrackTCPTimeoutEstablished int
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	executable  string // path to the binary running this service
 | 
						executable  string // path to the binary running this service
 | 
				
			||||||
	client      *client.Client
 | 
						client      *client.Client
 | 
				
			||||||
@@ -216,6 +218,12 @@ func NewSchedulerServer() *SchedulerServer {
 | 
				
			|||||||
		kubeletEnableDebuggingHandlers: true,
 | 
							kubeletEnableDebuggingHandlers: true,
 | 
				
			||||||
		containPodResources:            true,
 | 
							containPodResources:            true,
 | 
				
			||||||
		nodeRelistPeriod:               defaultNodeRelistPeriod,
 | 
							nodeRelistPeriod:               defaultNodeRelistPeriod,
 | 
				
			||||||
 | 
							conntrackTCPTimeoutEstablished: 0, // non-zero values may require hand-tuning other sysctl's on the host; do so with caution
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// non-zero values can trigger failures when updating /sys/module/nf_conntrack/parameters/hashsize
 | 
				
			||||||
 | 
							// when kube-proxy is running in a non-root netns (init_net); setting this to a non-zero value will
 | 
				
			||||||
 | 
							// impact connection tracking for the entire host on which kube-proxy is running. xref (k8s#19182)
 | 
				
			||||||
 | 
							conntrackMax: 0,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	// cache this for later use. also useful in case the original binary gets deleted, e.g.
 | 
						// cache this for later use. also useful in case the original binary gets deleted, e.g.
 | 
				
			||||||
	// during upgrades, development deployments, etc.
 | 
						// during upgrades, development deployments, etc.
 | 
				
			||||||
@@ -294,6 +302,8 @@ func (s *SchedulerServer) addCoreFlags(fs *pflag.FlagSet) {
 | 
				
			|||||||
	fs.DurationVar(&s.kubeletSyncFrequency, "kubelet-sync-frequency", s.kubeletSyncFrequency, "Max period between synchronizing running containers and config")
 | 
						fs.DurationVar(&s.kubeletSyncFrequency, "kubelet-sync-frequency", s.kubeletSyncFrequency, "Max period between synchronizing running containers and config")
 | 
				
			||||||
	fs.StringVar(&s.kubeletNetworkPluginName, "kubelet-network-plugin", s.kubeletNetworkPluginName, "<Warning: Alpha feature> The name of the network plugin to be invoked for various events in kubelet/pod lifecycle")
 | 
						fs.StringVar(&s.kubeletNetworkPluginName, "kubelet-network-plugin", s.kubeletNetworkPluginName, "<Warning: Alpha feature> The name of the network plugin to be invoked for various events in kubelet/pod lifecycle")
 | 
				
			||||||
	fs.BoolVar(&s.kubeletEnableDebuggingHandlers, "kubelet-enable-debugging-handlers", s.kubeletEnableDebuggingHandlers, "Enables kubelet endpoints for log collection and local running of containers and commands")
 | 
						fs.BoolVar(&s.kubeletEnableDebuggingHandlers, "kubelet-enable-debugging-handlers", s.kubeletEnableDebuggingHandlers, "Enables kubelet endpoints for log collection and local running of containers and commands")
 | 
				
			||||||
 | 
						fs.IntVar(&s.conntrackMax, "conntrack-max", s.conntrackMax, "Maximum number of NAT connections to track on agent nodes (0 to leave as-is)")
 | 
				
			||||||
 | 
						fs.IntVar(&s.conntrackTCPTimeoutEstablished, "conntrack-tcp-timeout-established", s.conntrackTCPTimeoutEstablished, "Idle timeout for established TCP connections on agent nodes (0 to leave as-is)")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	//TODO(jdef) support this flag once we have a better handle on mesos-dns and k8s DNS integration
 | 
						//TODO(jdef) support this flag once we have a better handle on mesos-dns and k8s DNS integration
 | 
				
			||||||
	//fs.StringVar(&s.HADomain, "ha-domain", s.HADomain, "Domain of the HA scheduler service, only used in HA mode. If specified may be used to construct artifact download URIs.")
 | 
						//fs.StringVar(&s.HADomain, "ha-domain", s.HADomain, "Domain of the HA scheduler service, only used in HA mode. If specified may be used to construct artifact download URIs.")
 | 
				
			||||||
@@ -413,6 +423,8 @@ func (s *SchedulerServer) prepareExecutorInfo(hks hyperkube.Interface) (*mesos.E
 | 
				
			|||||||
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.kubeletSyncFrequency))
 | 
						ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.kubeletSyncFrequency))
 | 
				
			||||||
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--contain-pod-resources=%t", s.containPodResources))
 | 
						ci.Arguments = append(ci.Arguments, fmt.Sprintf("--contain-pod-resources=%t", s.containPodResources))
 | 
				
			||||||
	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--enable-debugging-handlers=%t", s.kubeletEnableDebuggingHandlers))
 | 
						ci.Arguments = append(ci.Arguments, fmt.Sprintf("--enable-debugging-handlers=%t", s.kubeletEnableDebuggingHandlers))
 | 
				
			||||||
 | 
						ci.Arguments = append(ci.Arguments, fmt.Sprintf("--conntrack-max=%d", s.conntrackMax))
 | 
				
			||||||
 | 
						ci.Arguments = append(ci.Arguments, fmt.Sprintf("--conntrack-tcp-timeout-established=%d", s.conntrackTCPTimeoutEstablished))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if s.authPath != "" {
 | 
						if s.authPath != "" {
 | 
				
			||||||
		//TODO(jdef) should probably support non-local files, e.g. hdfs:///some/config/file
 | 
							//TODO(jdef) should probably support non-local files, e.g. hdfs:///some/config/file
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user