From 230725234c2e6814d34fb2a56500b73aea7db876 Mon Sep 17 00:00:00 2001 From: gabrie30 Date: Sun, 22 Sep 2024 15:49:21 -0700 Subject: [PATCH] Add/ghorg prune untouched (#459) --- CHANGELOG.md | 1 + cmd/clone.go | 94 ++++++++++- cmd/clone_test.go | 12 ++ cmd/root.go | 10 ++ git/git.go | 51 ++++++ sample-conf.yaml | 12 ++ scripts/github_cloud_integration_tests.sh | 11 ++ scripts/local-gitlab/integration-tests.sh | 196 +++++++++++----------- scripts/local-gitlab/seed.sh | 10 ++ 9 files changed, 298 insertions(+), 99 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da8f56e..fe5f2b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) ### Added - Reclone name and description to reclone output - GHORG_PRESERVE_SCM_HOSTNAME, note that this feature changes the directory struture that gitlab all-users and all-groups clone into; thanks @rrrix +- GHORG_PRUNE_UNTOUCHED, to prune repos that users make no changes in; thanks @MaxG87 ### Changed ### Deprecated ### Removed diff --git a/cmd/clone.go b/cmd/clone.go index df9a44b..2c1602e 100644 --- a/cmd/clone.go +++ b/cmd/clone.go @@ -187,6 +187,14 @@ func cloneFunc(cmd *cobra.Command, argz []string) { os.Setenv("GHORG_PRUNE_NO_CONFIRM", "true") } + if cmd.Flags().Changed("prune-untouched") { + os.Setenv("GHORG_PRUNE_UNTOUCHED", "true") + } + + if cmd.Flags().Changed("prune-untouched-no-confirm") { + os.Setenv("GHORG_PRUNE_UNTOUCHED_NO_CONFIRM", "true") + } + if cmd.Flags().Changed("fetch-all") { os.Setenv("GHORG_FETCH_ALL", "true") } @@ -663,6 +671,7 @@ func CloneAllRepos(git git.Gitter, cloneTargets []scm.Repo) { // maps in go are not safe for concurrent use var mutex = &sync.RWMutex{} + var untouchedReposToPrune []string for i := range cloneTargets { repo := cloneTargets[i] @@ -738,8 +747,61 @@ func CloneAllRepos(git git.Gitter, cloneTargets []scm.Repo) { repo.HostPath = filepath.Join(outputDirAbsolutePath, repoSlug, repo.GitLabSnippetInfo.Title+"-"+repo.GitLabSnippetInfo.ID) } - action := "cloning" repoWillBePulled := repoExistsLocally(repo) + + // Repos are considered untouched if + // 1. There are no new branches, ghorg only clones one branch so if there are more then the user has done something in the repo + // 2. If there are no branches locally, this means the repo is empty or all commits have been removed + // 3. If there are any commits on the default branch locally that are not on the remote + // 4. There are any modified changes locally + if os.Getenv("GHORG_PRUNE_UNTOUCHED") == "true" && repoWillBePulled { + git.FetchCloneBranch(repo) + + branches, err := git.Branch(repo) + if err != nil { + colorlog.PrintError(fmt.Sprintf("Failed to list local branches for repository %s: %v", repo.Name, err)) + return + } + + // Delete if it has no branches + if branches == "" { + untouchedReposToPrune = append(untouchedReposToPrune, repo.HostPath) + return + } + + if len(strings.Split(strings.TrimSpace(branches), "\n")) > 1 { + return + } + + status, err := git.ShortStatus(repo) + if err != nil { + colorlog.PrintError(fmt.Sprintf("Failed to get short status for repository %s: %v", repo.Name, err)) + return + } + + if status != "" { + return + } + + // Check for new commits on the branch that exist locally but not on the remote + commits, err := git.RevListCompare(repo, "HEAD", "@{u}") + if err != nil { + colorlog.PrintError(fmt.Sprintf("Failed to get commit differences for repository %s. The repository may be empty or does not have a .git directory. Error: %v", repo.Name, err)) + return + } + if commits != "" { + return + } + + untouchedReposToPrune = append(untouchedReposToPrune, repo.HostPath) + } + + // Don't clone any new repos when prune untouched is active + if os.Getenv("GHORG_PRUNE_UNTOUCHED") == "true" { + return + } + + action := "cloning" if repoWillBePulled { // prevents git from asking for user for credentials, needs to be unset so creds aren't stored err := git.SetOriginWithCredentials(repo) @@ -918,9 +980,30 @@ func CloneAllRepos(git git.Gitter, cloneTargets []scm.Repo) { } limit.WaitAndClose() + var untouchedPrunes int + + if os.Getenv("GHORG_PRUNE_UNTOUCHED") == "true" && len(untouchedReposToPrune) > 0 { + if os.Getenv("GHORG_PRUNE_UNTOUCHED_NO_CONFIRM") != "true" { + colorlog.PrintSuccess(fmt.Sprintf("PLEASE CONFIRM: The following %d untouched repositories will be deleted. Press enter to confirm: ", len(untouchedReposToPrune))) + for _, repoPath := range untouchedReposToPrune { + colorlog.PrintInfo(fmt.Sprintf("- %s", repoPath)) + } + fmt.Scanln() + } + + for _, repoPath := range untouchedReposToPrune { + err := os.RemoveAll(repoPath) + if err != nil { + colorlog.PrintError(fmt.Sprintf("Failed to prune repository at %s: %v", repoPath, err)) + } else { + untouchedPrunes++ + colorlog.PrintSuccess(fmt.Sprintf("Successfully deleted %s", repoPath)) + } + } + } printRemainingMessages() - printCloneStatsMessage(cloneCount, pulledCount, updateRemoteCount, newCommits) + printCloneStatsMessage(cloneCount, pulledCount, updateRemoteCount, newCommits, untouchedPrunes) if hasCollisions { fmt.Println("") @@ -1230,7 +1313,7 @@ func pruneRepos(cloneTargets []scm.Repo) int { return count } -func printCloneStatsMessage(cloneCount, pulledCount, updateRemoteCount, newCommits int) { +func printCloneStatsMessage(cloneCount, pulledCount, updateRemoteCount, newCommits, untouchedPrunes int) { if updateRemoteCount > 0 { colorlog.PrintSuccess(fmt.Sprintf("New clones: %v, existing resources pulled: %v, total new commits: %v, remotes updated: %v", cloneCount, pulledCount, newCommits, updateRemoteCount)) return @@ -1241,6 +1324,11 @@ func printCloneStatsMessage(cloneCount, pulledCount, updateRemoteCount, newCommi return } + if untouchedPrunes > 0 { + colorlog.PrintSuccess(fmt.Sprintf("New clones: %v, existing resources pulled: %v, total prunes: %v", cloneCount, pulledCount, untouchedPrunes)) + return + } + colorlog.PrintSuccess(fmt.Sprintf("New clones: %v, existing resources pulled: %v", cloneCount, pulledCount)) } diff --git a/cmd/clone_test.go b/cmd/clone_test.go index 95161e5..3520112 100644 --- a/cmd/clone_test.go +++ b/cmd/clone_test.go @@ -105,6 +105,18 @@ func (g MockGitClient) RepoCommitCount(repo scm.Repo) (int, error) { return 0, nil } +func (g MockGitClient) Branch(repo scm.Repo) (string, error) { + return "", nil +} + +func (g MockGitClient) RevListCompare(repo scm.Repo, ref1 string, ref2 string) (string, error) { + return "", nil +} + +func (g MockGitClient) ShortStatus(repo scm.Repo) (string, error) { + return "", nil +} + func TestInitialClone(t *testing.T) { defer UnsetEnv("GHORG_")() dir, err := os.MkdirTemp("", "ghorg_test_initial") diff --git a/cmd/root.go b/cmd/root.go index 2590fff..25efe96 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -70,6 +70,8 @@ var ( noDirSize bool ghorgStatsEnabled bool ghorgPreserveScmHostname bool + ghorgPruneUntouched bool + ghorgPruneUntouchedNoConfirm bool args []string cloneErrors []string cloneInfos []string @@ -173,6 +175,10 @@ func getOrSetDefaults(envVar string) { os.Setenv(envVar, "false") case "GHORG_PRUNE_NO_CONFIRM": os.Setenv(envVar, "false") + case "GHORG_PRUNE_UNTOUCHED": + os.Setenv(envVar, "false") + case "GHORG_PRUNE_UNTOUCHED_NO_CONFIRM": + os.Setenv(envVar, "false") case "GHORG_INSECURE_GITLAB_CLIENT": os.Setenv(envVar, "false") case "GHORG_INSECURE_GITEA_CLIENT": @@ -275,6 +281,8 @@ func InitConfig() { getOrSetDefaults("GHORG_FETCH_ALL") getOrSetDefaults("GHORG_PRUNE") getOrSetDefaults("GHORG_PRUNE_NO_CONFIRM") + getOrSetDefaults("GHORG_PRUNE_UNTOUCHED") + getOrSetDefaults("GHORG_PRUNE_UNTOUCHED_NO_CONFIRM") getOrSetDefaults("GHORG_DRY_RUN") getOrSetDefaults("GHORG_GITHUB_USER_OPTION") getOrSetDefaults("GHORG_CLONE_WIKI") @@ -364,6 +372,8 @@ func init() { cloneCmd.Flags().BoolVar(&includeSubmodules, "include-submodules", false, "GHORG_INCLUDE_SUBMODULES - Include submodules in all clone and pull operations.") cloneCmd.Flags().BoolVar(&ghorgStatsEnabled, "stats-enabled", false, "GHORG_STATS_ENABLED - Creates a CSV in the GHORG_ABSOLUTE_PATH_TO_CLONE_TO called _ghorg_stats.csv with info about each clone. This allows you to track clone data over time such as number of commits and size in megabytes of the clone directory.") cloneCmd.Flags().BoolVar(&ghorgPreserveScmHostname, "preserve-scm-hostname", false, "GHORG_PRESERVE_SCM_HOSTNAME - Appends the scm hostname to the GHORG_ABSOLUTE_PATH_TO_CLONE_TO which will organize your clones into specific folders by the scm provider. e.g. /github.com/kuberentes") + cloneCmd.Flags().BoolVar(&ghorgPruneUntouched, "prune-untouched", false, "GHORG_PRUNE_UNTOUCHED - Prune repositories that don't have any local changes, see sample-conf.yaml for more details") + cloneCmd.Flags().BoolVar(&ghorgPruneUntouchedNoConfirm, "prune-untouched-no-confirm", false, "GHORG_PRUNE_UNTOUCHED_NO_CONFIRM - Automatically delete repos without showing an interactive confirmation prompt.") cloneCmd.Flags().StringVarP(&baseURL, "base-url", "", "", "GHORG_SCM_BASE_URL - Change SCM base url, for on self hosted instances (currently gitlab, gitea and github (use format of https://git.mydomain.com/api/v3))") cloneCmd.Flags().StringVarP(&concurrency, "concurrency", "", "", "GHORG_CONCURRENCY - Max goroutines to spin up while cloning (default 25)") cloneCmd.Flags().StringVarP(&cloneDepth, "clone-depth", "", "", "GHORG_CLONE_DEPTH - Create a shallow clone with a history truncated to the specified number of commits") diff --git a/git/git.go b/git/git.go index 729619b..4767f2e 100644 --- a/git/git.go +++ b/git/git.go @@ -19,6 +19,9 @@ type Gitter interface { SetOriginWithCredentials(scm.Repo) error Clean(scm.Repo) error Checkout(scm.Repo) error + RevListCompare(scm.Repo, string, string) (string, error) + ShortStatus(scm.Repo) (string, error) + Branch(scm.Repo) (string, error) UpdateRemote(scm.Repo) error FetchAll(scm.Repo) error FetchCloneBranch(scm.Repo) error @@ -182,6 +185,35 @@ func (g GitClient) FetchAll(repo scm.Repo) error { return cmd.Run() } +func (g GitClient) Branch(repo scm.Repo) (string, error) { + args := []string{"branch"} + + cmd := exec.Command("git", args...) + cmd.Dir = repo.HostPath + if os.Getenv("GHORG_DEBUG") != "" { + if err := printDebugCmd(cmd, repo); err != nil { + return "", err + } + } + + output, err := cmd.Output() + if err != nil { + return "", err + } + + return strings.TrimSpace(string(output)), nil +} + +// RevListCompare returns the list of commits in the local branch that are not in the remote branch. +func (g GitClient) RevListCompare(repo scm.Repo, localBranch string, remoteBranch string) (string, error) { + cmd := exec.Command("git", "-C", repo.HostPath, "rev-list", localBranch, "^"+remoteBranch) + output, err := cmd.CombinedOutput() + if err != nil { + return "", err + } + return strings.TrimSpace(string(output)), nil +} + func (g GitClient) FetchCloneBranch(repo scm.Repo) error { args := []string{"fetch", "origin", repo.CloneBranch} @@ -198,6 +230,25 @@ func (g GitClient) FetchCloneBranch(repo scm.Repo) error { return cmd.Run() } +func (g GitClient) ShortStatus(repo scm.Repo) (string, error) { + args := []string{"status", "--short"} + + cmd := exec.Command("git", args...) + cmd.Dir = repo.HostPath + if os.Getenv("GHORG_DEBUG") != "" { + if err := printDebugCmd(cmd, repo); err != nil { + return "", err + } + } + + output, err := cmd.Output() + if err != nil { + return "", err + } + + return strings.TrimSpace(string(output)), nil +} + func (g GitClient) RepoCommitCount(repo scm.Repo) (int, error) { args := []string{"rev-list", "--count", repo.CloneBranch} cmd := exec.Command("git", args...) diff --git a/sample-conf.yaml b/sample-conf.yaml index 8f5092d..6f063b2 100644 --- a/sample-conf.yaml +++ b/sample-conf.yaml @@ -62,6 +62,18 @@ GHORG_PRUNE: false # flag (--prune-no-confirm) GHORG_PRUNE_NO_CONFIRM: false +# Prune repositories that are considered untouched. A repository is considered untouched if: +# 1. There are no new branches, ghorg only clones one branch so if there are more then the user has done something in the repo +# 2. If there are no branches locally, this means the repo is empty or all commits have been removed +# 3. If there are any commits on the default branch locally that are not on the remote +# 4. There are any modified changes locally +# flag (--prune-untouched) +GHORG_PRUNE_UNTOUCHED: false + +# Automatically delete repos without showing an interactive confirmation prompt. +# flag (--prune-untouched-no-confirm) +GHORG_PRUNE_UNTOUCHED_NO_CONFIRM: false + # Color output (enabled, disabled) # flag( --color) eg: --color=enabled eg: --color=disabled GHORG_COLOR: disabled diff --git a/scripts/github_cloud_integration_tests.sh b/scripts/github_cloud_integration_tests.sh index bf7cc58..fa5c91f 100755 --- a/scripts/github_cloud_integration_tests.sh +++ b/scripts/github_cloud_integration_tests.sh @@ -36,6 +36,17 @@ else exit 1 fi +# clone an org preserving scm hostname +ghorg clone $GITHUB_ORG --token=$GITHUB_TOKEN --preserve-scm-hostname --prune-untouched --prune-untouched-no-confirm + +if [ -z "$(ls -A $HOME/ghorg/github.com/$GITHUB_ORG)" ] +then + echo "Pass: github org clone preserving scm hostname prune untouched" +else + echo "Fail: github org clone preserving scm hostnamey prune untouched" + exit 1 +fi + # clone an org with no config file to a specific path ghorg clone $GITHUB_ORG --token=$GITHUB_TOKEN --path=/tmp --output-dir=testing_output_dir diff --git a/scripts/local-gitlab/integration-tests.sh b/scripts/local-gitlab/integration-tests.sh index 39e8b20..65cd32a 100755 --- a/scripts/local-gitlab/integration-tests.sh +++ b/scripts/local-gitlab/integration-tests.sh @@ -368,83 +368,83 @@ echo "CLONE AND TEST ALL-GROUPS, OUTPUT DIR, WIKI" exit 1 fi -# ########### CLONE AND TEST ALL-GROUPS, OUTPUT DIR, WIKI, SNIPPETS ############ -# ghorg clone all-groups --scm=gitlab --base-url="${GITLAB_URL}" --token="${TOKEN}" --clone-wiki --clone-snippets --output-dir=local-gitlab-v15-repos-flat-wiki-snippets -# ghorg clone all-groups --scm=gitlab --base-url="${GITLAB_URL}" --token="${TOKEN}" --clone-wiki --clone-snippets --output-dir=local-gitlab-v15-repos-flat-wiki-snippets +########### CLONE AND TEST ALL-GROUPS, OUTPUT DIR, WIKI, SNIPPETS ############ +ghorg clone all-groups --scm=gitlab --base-url="${GITLAB_URL}" --token="${TOKEN}" --clone-wiki --clone-snippets --output-dir=local-gitlab-v15-repos-flat-wiki-snippets +ghorg clone all-groups --scm=gitlab --base-url="${GITLAB_URL}" --token="${TOKEN}" --clone-wiki --clone-snippets --output-dir=local-gitlab-v15-repos-flat-wiki-snippets -# GOT=$( ghorg ls local-gitlab-v15-repos-flat-wiki-snippets | grep -o 'local-gitlab-v15-repos-flat-wiki-snippets.*') -# WANT=$(cat <