Add exclude filtering to regex and prefix matches (#182)

This commit is contained in:
Jay Gabriels
2022-02-05 13:14:54 -08:00
committed by GitHub
parent b3f8f62571
commit f925b5bb06
10 changed files with 337 additions and 101 deletions

View File

@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
## [1.7.7] - Unreleased
### Added
- Filtering repos by topics for gitlab; thanks @dschafhauser
- Exclude filtering for prefix and regex; thanks @dschafhauser
### Changed
### Deprecated
### Removed

View File

@@ -77,11 +77,21 @@ func cloneFunc(cmd *cobra.Command, argz []string) {
os.Setenv("GHORG_MATCH_PREFIX", prefix)
}
if cmd.Flags().Changed("exclude-match-prefix") {
prefix := cmd.Flag("exclude-match-prefix").Value.String()
os.Setenv("GHORG_EXCLUDE_MATCH_PREFIX", prefix)
}
if cmd.Flags().Changed("match-regex") {
regex := cmd.Flag("match-regex").Value.String()
os.Setenv("GHORG_MATCH_REGEX", regex)
}
if cmd.Flags().Changed("exclude-match-regex") {
regex := cmd.Flag("exclude-match-regex").Value.String()
os.Setenv("GHORG_EXCLUDE_MATCH_REGEX", regex)
}
if cmd.Flags().Changed("skip-archived") {
os.Setenv("GHORG_SKIP_ARCHIVED", "true")
}
@@ -287,13 +297,13 @@ func readGhorgIgnore() ([]string, error) {
return lines, scanner.Err()
}
func filterByRegex(repos []scm.Repo) []scm.Repo {
func filterByRegexMatch(repos []scm.Repo) []scm.Repo {
filteredRepos := []scm.Repo{}
regex := fmt.Sprint(os.Getenv("GHORG_MATCH_REGEX"))
for i, r := range repos {
re := regexp.MustCompile(regex)
match := re.FindString(getAppNameFromURL(r.URL))
match := re.FindString(r.Name)
if match != "" {
filteredRepos = append(filteredRepos, repos[i])
}
@@ -302,6 +312,59 @@ func filterByRegex(repos []scm.Repo) []scm.Repo {
return filteredRepos
}
func filterByExcludeRegexMatch(repos []scm.Repo) []scm.Repo {
filteredRepos := []scm.Repo{}
regex := fmt.Sprint(os.Getenv("GHORG_EXCLUDE_MATCH_REGEX"))
for i, r := range repos {
exclude := false
re := regexp.MustCompile(regex)
match := re.FindString(r.Name)
if match != "" {
exclude = true
}
if !exclude {
filteredRepos = append(filteredRepos, repos[i])
}
}
return filteredRepos
}
func filterByMatchPrefix(repos []scm.Repo) []scm.Repo {
filteredRepos := []scm.Repo{}
for i, r := range repos {
pfs := strings.Split(os.Getenv("GHORG_MATCH_PREFIX"), ",")
for _, p := range pfs {
if strings.HasPrefix(strings.ToLower(r.Name), strings.ToLower(p)) {
filteredRepos = append(filteredRepos, repos[i])
}
}
}
return filteredRepos
}
func filterByExcludeMatchPrefix(repos []scm.Repo) []scm.Repo {
filteredRepos := []scm.Repo{}
for i, r := range repos {
var exclude bool
pfs := strings.Split(os.Getenv("GHORG_EXCLUDE_MATCH_PREFIX"), ",")
for _, p := range pfs {
if strings.HasPrefix(strings.ToLower(r.Name), strings.ToLower(p)) {
exclude = true
}
}
if !exclude {
filteredRepos = append(filteredRepos, repos[i])
}
}
return filteredRepos
}
// exclude wikis from repo count
func getRepoCountOnly(targets []scm.Repo) int {
count := 0
@@ -325,12 +388,27 @@ func printDryRun(repos []scm.Repo) {
// CloneAllRepos clones all repos
func CloneAllRepos(git git.Gitter, cloneTargets []scm.Repo) {
// resc, errc, infoc := make(chan string), make(chan error), make(chan error)
// Filter repos that have attributes that don't need specific scm api calls
if os.Getenv("GHORG_MATCH_REGEX") != "" {
colorlog.PrintInfo("Filtering repos down by regex that match the provided...")
colorlog.PrintInfo("Filtering repos down by including regex matches...")
fmt.Println("")
cloneTargets = filterByRegex(cloneTargets)
cloneTargets = filterByRegexMatch(cloneTargets)
}
if os.Getenv("GHORG_EXCLUDE_MATCH_REGEX") != "" {
colorlog.PrintInfo("Filtering repos down by excluding regex matches...")
fmt.Println("")
cloneTargets = filterByExcludeRegexMatch(cloneTargets)
}
if os.Getenv("GHORG_MATCH_PREFIX") != "" {
colorlog.PrintInfo("Filtering repos down by including prefix matches...")
fmt.Println("")
cloneTargets = filterByMatchPrefix(cloneTargets)
}
if os.Getenv("GHORG_EXCLUDE_MATCH_PREFIX") != "" {
colorlog.PrintInfo("Filtering repos down by excluding prefix matches...")
fmt.Println("")
cloneTargets = filterByExcludeMatchPrefix(cloneTargets)
}
// filter repos down based on ghorgignore if one exists
@@ -394,20 +472,15 @@ func CloneAllRepos(git git.Gitter, cloneTargets []scm.Repo) {
var cloneCount, pulledCount int
for _, target := range cloneTargets {
appName := getAppNameFromURL(target.URL)
branch := target.CloneBranch
repo := target
for i := range cloneTargets {
repo := cloneTargets[i]
repoSlug := getAppNameFromURL(repo.URL)
limit.Execute(func() {
path := appName
if repo.Path != "" && os.Getenv("GHORG_PRESERVE_DIRECTORY_STRUCTURE") == "true" {
path = repo.Path
}
repo.HostPath = filepath.Join(os.Getenv("GHORG_ABSOLUTE_PATH_TO_CLONE_TO"), parentFolder, configs.GetCorrectFilePathSeparator(), path)
repo.HostPath = filepath.Join(os.Getenv("GHORG_ABSOLUTE_PATH_TO_CLONE_TO"), parentFolder, configs.GetCorrectFilePathSeparator(), repoSlug)
if repo.IsWiki {
if !strings.HasSuffix(repo.HostPath, ".wiki") {
@@ -416,7 +489,7 @@ func CloneAllRepos(git git.Gitter, cloneTargets []scm.Repo) {
}
if os.Getenv("GHORG_BACKUP") == "true" {
repo.HostPath = filepath.Join(os.Getenv("GHORG_ABSOLUTE_PATH_TO_CLONE_TO"), parentFolder+"_backup", configs.GetCorrectFilePathSeparator(), path)
repo.HostPath = filepath.Join(os.Getenv("GHORG_ABSOLUTE_PATH_TO_CLONE_TO"), parentFolder+"_backup", configs.GetCorrectFilePathSeparator(), repoSlug)
}
action := "cloning"
@@ -521,7 +594,7 @@ func CloneAllRepos(git git.Gitter, cloneTargets []scm.Repo) {
if os.Getenv("GHORG_BRANCH") != "" {
err := git.Checkout(repo)
if err != nil {
e := fmt.Sprintf("Could not checkout out %s, branch may not exist, no changes made Repo: %s Error: %v", branch, repo.URL, err)
e := fmt.Sprintf("Could not checkout out %s, branch may not exist, no changes made Repo: %s Error: %v", repo.CloneBranch, repo.URL, err)
cloneInfos = append(cloneInfos, e)
return
}
@@ -551,7 +624,7 @@ func CloneAllRepos(git git.Gitter, cloneTargets []scm.Repo) {
}
}
colorlog.PrintSuccess("Success " + action + " repo: " + repo.URL + " -> branch: " + branch)
colorlog.PrintSuccess("Success " + action + " repo: " + repo.URL + " -> branch: " + repo.CloneBranch)
})
}
@@ -614,6 +687,15 @@ func PrintConfigs() {
if os.Getenv("GHORG_MATCH_REGEX") != "" {
colorlog.PrintInfo("* Regex Match : " + os.Getenv("GHORG_MATCH_REGEX"))
}
if os.Getenv("GHORG_EXCLUDE_MATCH_REGEX") != "" {
colorlog.PrintInfo("* Exclude Regex : " + os.Getenv("GHORG_EXCLUDE_MATCH_REGEX"))
}
if os.Getenv("GHORG_MATCH_PREFIX") != "" {
colorlog.PrintInfo("* Prefix Match: " + os.Getenv("GHORG_MATCH_PREFIX"))
}
if os.Getenv("GHORG_EXCLUDE_MATCH_PREFIX") != "" {
colorlog.PrintInfo("* Exclude Prefix: " + os.Getenv("GHORG_EXCLUDE_MATCH_PREFIX"))
}
if os.Getenv("GHORG_OUTPUT_DIR") != "" {
colorlog.PrintInfo("* Output Dir : " + parentFolder)
}

View File

@@ -4,6 +4,7 @@ import (
"io/ioutil"
"log"
"os"
"strings"
"testing"
"github.com/gabrie30/ghorg/scm"
@@ -57,7 +58,7 @@ func NewMockGit() MockGitClient {
}
func (g MockGitClient) Clone(repo scm.Repo) error {
_, err := ioutil.TempDir(os.Getenv("GHORG_ABSOLUTE_PATH_TO_CLONE_TO"), "ghorg_test_repo")
_, err := ioutil.TempDir(os.Getenv("GHORG_ABSOLUTE_PATH_TO_CLONE_TO"), repo.Name)
if err != nil {
log.Fatal(err)
}
@@ -93,7 +94,8 @@ func (g MockGitClient) FetchAll(repo scm.Repo) error {
}
func TestInitialClone(t *testing.T) {
dir, err := ioutil.TempDir(".", "ghorg_tests")
defer UnsetEnv("GHORG_")()
dir, err := ioutil.TempDir("", "ghorg_test_initial")
if err != nil {
log.Fatal(err)
}
@@ -111,9 +113,214 @@ func TestInitialClone(t *testing.T) {
mockGit := NewMockGit()
CloneAllRepos(mockGit, testRepos)
got, _ := ioutil.ReadDir(dir)
got, _ := os.ReadDir(dir)
expected := len(testRepos)
if len(got) != expected {
t.Errorf("Wrong number of repos in clone, expected: %v, got: %v", expected, got)
}
}
func TestMatchPrefix(t *testing.T) {
defer UnsetEnv("GHORG_")()
dir, err := ioutil.TempDir("", "ghorg_test_match_prefix")
if err != nil {
log.Fatal(err)
}
defer os.RemoveAll(dir)
os.Setenv("GHORG_ABSOLUTE_PATH_TO_CLONE_TO", dir)
os.Setenv("GHORG_CONCURRENCY", "1")
os.Setenv("GHORG_MATCH_PREFIX", "test")
var testRepos = []scm.Repo{
{
Name: "testRepoOne",
},
{
Name: "testRepoTwo",
},
{
Name: "testRepoThree",
},
{
Name: "nottestRepoTwo",
},
{
Name: "nottestRepoThree",
},
}
mockGit := NewMockGit()
CloneAllRepos(mockGit, testRepos)
got, _ := os.ReadDir(dir)
expected := 3
if len(got) != expected {
t.Errorf("Wrong number of repos in clone, expected: %v, got: %v", expected, len(got))
}
}
func TestExcludeMatchPrefix(t *testing.T) {
defer UnsetEnv("GHORG_")()
dir, err := ioutil.TempDir("", "ghorg_test_exclude_match_prefix")
if err != nil {
log.Fatal(err)
}
defer os.RemoveAll(dir)
os.Setenv("GHORG_ABSOLUTE_PATH_TO_CLONE_TO", dir)
os.Setenv("GHORG_CONCURRENCY", "1")
os.Setenv("GHORG_EXCLUDE_MATCH_PREFIX", "test")
var testRepos = []scm.Repo{
{
Name: "testRepoOne",
},
{
Name: "testRepoTwo",
},
{
Name: "testRepoThree",
},
{
Name: "nottestRepoTwo",
},
{
Name: "nottestRepoThree",
},
}
mockGit := NewMockGit()
CloneAllRepos(mockGit, testRepos)
got, _ := os.ReadDir(dir)
expected := 2
if len(got) != expected {
t.Errorf("Wrong number of repos in clone, expected: %v, got: %v", expected, got)
}
}
func TestMatchRegex(t *testing.T) {
defer UnsetEnv("GHORG_")()
dir, err := ioutil.TempDir("", "ghorg_test_match_regex")
if err != nil {
log.Fatal(err)
}
defer os.RemoveAll(dir)
os.Setenv("GHORG_ABSOLUTE_PATH_TO_CLONE_TO", dir)
os.Setenv("GHORG_CONCURRENCY", "1")
os.Setenv("GHORG_MATCH_REGEX", "^test-")
var testRepos = []scm.Repo{
{
Name: "test-RepoOne",
},
{
Name: "test-RepoTwo",
},
{
Name: "test-RepoThree",
},
{
Name: "nottestRepoTwo",
},
{
Name: "nottestRepoThree",
},
}
mockGit := NewMockGit()
CloneAllRepos(mockGit, testRepos)
got, _ := os.ReadDir(dir)
expected := 3
if len(got) != expected {
t.Errorf("Wrong number of repos in clone, expected: %v, got: %v", expected, got)
}
}
func TestExcludeMatchRegex(t *testing.T) {
defer UnsetEnv("GHORG_")()
testDescriptor := "ghorg_test_exclude_match_regex"
dir, err := ioutil.TempDir("", testDescriptor)
if err != nil {
log.Fatal(err)
}
defer os.RemoveAll(dir)
os.Setenv("GHORG_ABSOLUTE_PATH_TO_CLONE_TO", dir)
os.Setenv("GHORG_CONCURRENCY", "1")
os.Setenv("GHORG_OUTPUT_DIR", testDescriptor)
os.Setenv("GHORG_EXCLUDE_MATCH_REGEX", "^test-")
var testRepos = []scm.Repo{
{
Name: "test-RepoOne",
},
{
Name: "test-RepoTwo",
},
{
Name: "test-RepoThree",
},
{
Name: "nottestRepoTwo",
},
{
Name: "nottestRepoThree",
},
}
mockGit := NewMockGit()
CloneAllRepos(mockGit, testRepos)
got, _ := os.ReadDir(dir)
expected := 2
if len(got) != expected {
t.Errorf("Wrong number of repos in clone, expected: %v, got: %v", expected, got)
}
}
// UnsetEnv unsets all envars having prefix and returns a function
// that restores the env. Any newly added envars having prefix are
// also unset by restore. It is idiomatic to use with a defer.
//
// Note that modifying the env may have unpredictable results when
// tests are run with t.Parallel.
func UnsetEnv(prefix string) (restore func()) {
before := map[string]string{}
for _, e := range os.Environ() {
if !strings.HasPrefix(e, prefix) {
continue
}
parts := strings.SplitN(e, "=", 2)
before[parts[0]] = parts[1]
os.Unsetenv(parts[0])
}
return func() {
after := map[string]string{}
for _, e := range os.Environ() {
if !strings.HasPrefix(e, prefix) {
continue
}
parts := strings.SplitN(e, "=", 2)
after[parts[0]] = parts[1]
// Check if the envar previously existed
v, ok := before[parts[0]]
if !ok {
// This is a newly added envar with prefix, zap it
os.Unsetenv(parts[0])
continue
}
if parts[1] != v {
// If the envar value has changed, set it back
os.Setenv(parts[0], v)
}
}
// Still need to check if there have been any deleted envars
for k, v := range before {
if _, ok := after[k]; !ok {
// k is not present in after, so we set it.
os.Setenv(k, v)
}
}
}
}

View File

@@ -38,7 +38,9 @@ var (
cloneInfos []string
targetCloneSource string
matchPrefix string
excludeMatchPrefix string
matchRegex string
excludeMatchRegex string
config string
)
@@ -163,7 +165,6 @@ func InitConfig() {
getOrSetDefaults("GHORG_INSECURE_GITLAB_CLIENT")
getOrSetDefaults("GHORG_BACKUP")
getOrSetDefaults("GHORG_CONCURRENCY")
getOrSetDefaults("GHORG_MATCH_PREFIX")
// Optionally set
getOrSetDefaults("GHORG_GITHUB_TOKEN")
getOrSetDefaults("GHORG_COLOR")
@@ -176,6 +177,9 @@ func InitConfig() {
getOrSetDefaults("GHORG_PRESERVE_DIRECTORY_STRUCTURE")
getOrSetDefaults("GHORG_OUTPUT_DIR")
getOrSetDefaults("GHORG_MATCH_REGEX")
getOrSetDefaults("GHORG_EXCLUDE_MATCH_REGEX")
getOrSetDefaults("GHORG_MATCH_PREFIX")
getOrSetDefaults("GHORG_EXCLUDE_MATCH_PREFIX")
if os.Getenv("GHORG_DEBUG") != "" {
viper.Debug()
@@ -216,8 +220,10 @@ func init() {
cloneCmd.Flags().StringVarP(&concurrency, "concurrency", "", "", "GHORG_CONCURRENCY - max goroutines to spin up while cloning (default 25)")
cloneCmd.Flags().StringVarP(&topics, "topics", "", "", "GHORG_TOPICS - comma separated list of github/gitea topics to filter for")
cloneCmd.Flags().StringVarP(&outputDir, "output-dir", "", "", "GHORG_OUTPUT_DIR - name of directory repos will be cloned into (default name of org/repo being cloned")
cloneCmd.Flags().StringVarP(&matchPrefix, "match-prefix", "", "", "GHORG_MATCH_PREFIX - only clone repos with matching prefix, can be a comma separated list (default \"\")")
cloneCmd.Flags().StringVarP(&matchPrefix, "match-prefix", "", "", "GHORG_MATCH_PREFIX - only clone repos with matching prefix, can be a comma separated list")
cloneCmd.Flags().StringVarP(&excludeMatchPrefix, "exclude-match-prefix", "", "", "GHORG_EXCLUDE_MATCH_PREFIX - exclude cloning repos with matching prefix, can be a comma separated list")
cloneCmd.Flags().StringVarP(&matchRegex, "match-regex", "", "", "GHORG_MATCH_REGEX - only clone repos that match name to regex provided")
cloneCmd.Flags().StringVarP(&excludeMatchRegex, "exclude-match-regex", "", "", "GHORG_EXCLUDE_MATCH_REGEX - exclude cloning repos that match name to regex provided")
rootCmd.AddCommand(lsCmd, versionCmd, cloneCmd)
}

View File

@@ -119,15 +119,21 @@ GHORG_CONCURRENCY: 25
GHORG_OUTPUT_DIR:
# Only clone repos with matching prefix, can be a comma separated list
# default: ""
# flag (--match-prefix) eg: --match-prefix=backend
GHORG_MATCH_PREFIX:
# Exclude cloning repos with matching prefix, can be a comma separated list
# flag (--exclude-match-prefix) eg: --exclude-match-prefix=backend
GHORG_MATCH_PREFIX:
# Only clone repos that match name to regex provided
# default: match all
# flag (--match-regex) eg: --match-regex=^foo
GHORG_MATCH_REGEX:
# Exclude cloning repos that match name to regex provided
# flag (--exclude-match-regex) eg: --exclude-match-regex=^foo
GHORG_EXCLUDE_MATCH_REGEX:
# Only clones new repos and does not perform a git clean on existing repos. Useful if you don't want to lose changes made to repos in the org/user directory.
# flag (--no-clean)
GHORG_NO_CLEAN: false

View File

@@ -2,7 +2,6 @@ package scm
import (
"os"
"strings"
"github.com/gabrie30/ghorg/colorlog"
"github.com/ktrysmt/go-bitbucket"
@@ -75,21 +74,8 @@ func (_ Bitbucket) filter(resp interface{}) (repoData []Repo, err error) {
colorlog.PrintError("WARNING: Filtering by topics is not supported for Bitbucket SCM")
}
if os.Getenv("GHORG_MATCH_PREFIX") != "" {
repoName := strings.ToLower(clone["name"].(string))
foundPrefix := false
pfs := strings.Split(os.Getenv("GHORG_MATCH_PREFIX"), ",")
for _, p := range pfs {
if strings.HasPrefix(repoName, strings.ToLower(p)) {
foundPrefix = true
}
}
if foundPrefix == false {
continue
}
}
r := Repo{}
r.Name = clone["name"].(string)
if os.Getenv("GHORG_BRANCH") == "" {
var defaultBranch string

View File

@@ -121,13 +121,13 @@ func (c Gitea) filter(rps []*gitea.Repository) (repoData []Repo, err error) {
for _, rp := range rps {
if os.Getenv("GHORG_SKIP_ARCHIVED") == "true" {
if rp.Archived == true {
if rp.Archived {
continue
}
}
if os.Getenv("GHORG_SKIP_FORKS") == "true" {
if rp.Fork == true {
if rp.Fork {
continue
}
}
@@ -142,22 +142,9 @@ func (c Gitea) filter(rps []*gitea.Repository) (repoData []Repo, err error) {
}
}
if os.Getenv("GHORG_MATCH_PREFIX") != "" {
repoName := strings.ToLower(rp.Name)
foundPrefix := false
pfs := strings.Split(os.Getenv("GHORG_MATCH_PREFIX"), ",")
for _, p := range pfs {
if strings.HasPrefix(repoName, strings.ToLower(p)) {
foundPrefix = true
}
}
if foundPrefix == false {
continue
}
}
r := Repo{}
r.Path = rp.FullName
r.Name = rp.Name
if os.Getenv("GHORG_BRANCH") == "" {
defaultBranch := rp.DefaultBranch

View File

@@ -139,13 +139,13 @@ func (c Github) filter(allRepos []*github.Repository) []Repo {
for _, ghRepo := range allRepos {
if os.Getenv("GHORG_SKIP_ARCHIVED") == "true" {
if *ghRepo.Archived == true {
if *ghRepo.Archived {
continue
}
}
if os.Getenv("GHORG_SKIP_FORKS") == "true" {
if *ghRepo.Fork == true {
if *ghRepo.Fork {
continue
}
}
@@ -154,22 +154,10 @@ func (c Github) filter(allRepos []*github.Repository) []Repo {
continue
}
if os.Getenv("GHORG_MATCH_PREFIX") != "" {
repoName := strings.ToLower(*ghRepo.Name)
foundPrefix := false
pfs := strings.Split(os.Getenv("GHORG_MATCH_PREFIX"), ",")
for _, p := range pfs {
if strings.HasPrefix(repoName, strings.ToLower(p)) {
foundPrefix = true
}
}
if foundPrefix == false {
continue
}
}
r := Repo{}
r.Name = *ghRepo.Name
if os.Getenv("GHORG_BRANCH") == "" {
defaultBranch := ghRepo.GetDefaultBranch()
if defaultBranch == "" {

View File

@@ -133,19 +133,4 @@ func TestGetOrgRepos(t *testing.T) {
}
os.Setenv("GHORG_TOPICS", "")
})
t.Run("Find all repos with specific prefix", func(tt *testing.T) {
os.Setenv("GHORG_MATCH_PREFIX", "tp-")
resp, err := github.GetOrgRepos("testorg")
if err != nil {
t.Fatal(err)
}
want := 3
got := len(resp)
if want != got {
tt.Errorf("Expected %v repo, got: %v", want, got)
}
os.Setenv("GHORG_MATCH_PREFIX", "")
})
}

View File

@@ -222,7 +222,7 @@ func (c Gitlab) filter(ps []*gitlab.Project) []Repo {
for _, p := range ps {
if os.Getenv("GHORG_SKIP_ARCHIVED") == "true" {
if p.Archived == true {
if p.Archived {
continue
}
}
@@ -237,22 +237,10 @@ func (c Gitlab) filter(ps []*gitlab.Project) []Repo {
continue
}
if os.Getenv("GHORG_MATCH_PREFIX") != "" {
repoName := strings.ToLower(p.Name)
foundPrefix := false
pfs := strings.Split(os.Getenv("GHORG_MATCH_PREFIX"), ",")
for _, p := range pfs {
if strings.HasPrefix(repoName, strings.ToLower(p)) {
foundPrefix = true
}
}
if foundPrefix == false {
continue
}
}
r := Repo{}
r.Name = p.Name
if os.Getenv("GHORG_BRANCH") == "" {
defaultBranch := p.DefaultBranch
if defaultBranch == "" {