mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	AWS: Fix race in security-group read/create
We need to find the ID for a named security group, or create a new one. We do this by listing the security groups, and then doing a create if we cannot find one. This is a race though; against another thread if the AWS API were consistent, but generally because the AWS API is actually eventually consistent. We wrap it in a retry loop.
This commit is contained in:
		@@ -50,6 +50,12 @@ const ProviderName = "aws"
 | 
			
		||||
// The tag name we use to differentiate multiple logically independent clusters running in the same AZ
 | 
			
		||||
const TagNameKubernetesCluster = "KubernetesCluster"
 | 
			
		||||
 | 
			
		||||
// We sometimes read to see if something exists; then try to create it if we didn't find it
 | 
			
		||||
// This can fail once in a consistent system if done in parallel
 | 
			
		||||
// In an eventually consistent system, it could fail unboundedly
 | 
			
		||||
// MaxReadThenCreateRetries sets the maxiumum number of attempts we will make
 | 
			
		||||
const MaxReadThenCreateRetries = 30
 | 
			
		||||
 | 
			
		||||
// Abstraction over AWS, to allow mocking/other implementations
 | 
			
		||||
type AWSServices interface {
 | 
			
		||||
	Compute(region string) (EC2, error)
 | 
			
		||||
@@ -1656,37 +1662,54 @@ func (s *AWSCloud) removeSecurityGroupIngress(securityGroupId string, removePerm
 | 
			
		||||
// Makes sure the security group exists
 | 
			
		||||
// Returns the security group id or error
 | 
			
		||||
func (s *AWSCloud) ensureSecurityGroup(name string, description string, vpcID string) (string, error) {
 | 
			
		||||
	request := &ec2.DescribeSecurityGroupsInput{}
 | 
			
		||||
	filters := []*ec2.Filter{
 | 
			
		||||
		newEc2Filter("group-name", name),
 | 
			
		||||
		newEc2Filter("vpc-id", vpcID),
 | 
			
		||||
	}
 | 
			
		||||
	request.Filters = s.addFilters(filters)
 | 
			
		||||
	groupID := ""
 | 
			
		||||
	attempt := 0
 | 
			
		||||
	for {
 | 
			
		||||
		attempt++
 | 
			
		||||
 | 
			
		||||
	securityGroups, err := s.ec2.DescribeSecurityGroups(request)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(securityGroups) >= 1 {
 | 
			
		||||
		if len(securityGroups) > 1 {
 | 
			
		||||
			glog.Warning("Found multiple security groups with name:", name)
 | 
			
		||||
		request := &ec2.DescribeSecurityGroupsInput{}
 | 
			
		||||
		filters := []*ec2.Filter{
 | 
			
		||||
			newEc2Filter("group-name", name),
 | 
			
		||||
			newEc2Filter("vpc-id", vpcID),
 | 
			
		||||
		}
 | 
			
		||||
		request.Filters = s.addFilters(filters)
 | 
			
		||||
 | 
			
		||||
		securityGroups, err := s.ec2.DescribeSecurityGroups(request)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return "", err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if len(securityGroups) >= 1 {
 | 
			
		||||
			if len(securityGroups) > 1 {
 | 
			
		||||
				glog.Warning("Found multiple security groups with name:", name)
 | 
			
		||||
			}
 | 
			
		||||
			return orEmpty(securityGroups[0].GroupID), nil
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		createRequest := &ec2.CreateSecurityGroupInput{}
 | 
			
		||||
		createRequest.VPCID = &vpcID
 | 
			
		||||
		createRequest.GroupName = &name
 | 
			
		||||
		createRequest.Description = &description
 | 
			
		||||
 | 
			
		||||
		createResponse, err := s.ec2.CreateSecurityGroup(createRequest)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			ignore := false
 | 
			
		||||
			if awsError, ok := err.(awserr.Error); ok {
 | 
			
		||||
				if awsError.Code() == "InvalidGroup.Duplicate" && attempt < MaxReadThenCreateRetries {
 | 
			
		||||
					glog.V(2).Infof("Got InvalidGroup.Duplicate while creating security group (race?); will retry")
 | 
			
		||||
					ignore = true
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			if !ignore {
 | 
			
		||||
				glog.Error("error creating security group: ", err)
 | 
			
		||||
				return "", err
 | 
			
		||||
			}
 | 
			
		||||
			time.Sleep(1 * time.Second)
 | 
			
		||||
		} else {
 | 
			
		||||
			groupID = orEmpty(createResponse.GroupID)
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		return orEmpty(securityGroups[0].GroupID), nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	createRequest := &ec2.CreateSecurityGroupInput{}
 | 
			
		||||
	createRequest.VPCID = &vpcID
 | 
			
		||||
	createRequest.GroupName = &name
 | 
			
		||||
	createRequest.Description = &description
 | 
			
		||||
 | 
			
		||||
	createResponse, err := s.ec2.CreateSecurityGroup(createRequest)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		glog.Error("error creating security group: ", err)
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	groupID := orEmpty(createResponse.GroupID)
 | 
			
		||||
	if groupID == "" {
 | 
			
		||||
		return "", fmt.Errorf("created security group, but id was not returned: %s", name)
 | 
			
		||||
	}
 | 
			
		||||
@@ -1702,7 +1725,7 @@ func (s *AWSCloud) ensureSecurityGroup(name string, description string, vpcID st
 | 
			
		||||
	tagRequest := &ec2.CreateTagsInput{}
 | 
			
		||||
	tagRequest.Resources = []*string{&groupID}
 | 
			
		||||
	tagRequest.Tags = tags
 | 
			
		||||
	_, err = s.createTags(tagRequest)
 | 
			
		||||
	_, err := s.createTags(tagRequest)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		// Not clear how to recover fully from this; we're OK because we don't match on tags, but that is a little odd
 | 
			
		||||
		return "", fmt.Errorf("error tagging security group: %v", err)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user