Add ci subcommands and simplify scheduled workflows

This commit is contained in:
Julien Bisconti
2026-02-28 01:44:56 +01:00
parent c14a071c8d
commit 9a5331398d
3 changed files with 420 additions and 159 deletions

View File

@@ -5,9 +5,14 @@ on:
- cron: "0 2 * * 6"
workflow_dispatch:
concurrency:
group: broken-links-${{ github.ref }}
cancel-in-progress: false
jobs:
check-links:
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: read
issues: write
@@ -24,22 +29,7 @@ jobs:
- name: Run Link Check
id: link_check
run: |
set +e
./awesome-docker check > link_check_output.txt 2>&1
exit_code=$?
set -e
has_errors=false
if [ "$exit_code" -ne 0 ]; then
has_errors=true
fi
if grep -qi "broken links" link_check_output.txt; then
has_errors=true
fi
echo "has_errors=$has_errors" >> "$GITHUB_OUTPUT"
echo "check_exit_code=$exit_code" >> "$GITHUB_OUTPUT"
run: ./awesome-docker ci broken-links --issue-file broken_links_issue.md --github-output "$GITHUB_OUTPUT"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -49,10 +39,7 @@ jobs:
with:
script: |
const fs = require('fs');
const output = fs.readFileSync('link_check_output.txt', 'utf8');
const exitCode = '${{ steps.link_check.outputs.check_exit_code }}';
const issueBody = `# Broken Links Detected\n\nThe weekly link check found broken links or the checker failed to execute cleanly.\n\nChecker exit code: ${exitCode}\n\n\`\`\`\n${output}\n\`\`\`\n\n## Action Required\n\n- Update the URL if the resource moved\n- Remove the entry if permanently unavailable\n- Add to \`config/exclude.yaml\` if a known false positive\n- Investigate checker failures when exit code is non-zero\n\n---\n*Auto-generated by broken_links.yml*`;
const issueBody = fs.readFileSync('broken_links_issue.md', 'utf8');
const issues = await github.rest.issues.listForRepo({
owner: context.repo.owner,

View File

@@ -5,9 +5,14 @@ on:
- cron: "0 9 * * 1"
workflow_dispatch:
concurrency:
group: health-report-${{ github.ref }}
cancel-in-progress: false
jobs:
health-check:
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: read
issues: write
@@ -22,22 +27,12 @@ jobs:
- name: Build
run: go build -o awesome-docker ./cmd/awesome-docker
- name: Run Health Scoring
run: ./awesome-docker health
continue-on-error: true
- name: Run Health + Report
id: report
run: ./awesome-docker ci health-report --issue-file health_report.txt --github-output "$GITHUB_OUTPUT"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Generate Report
id: report
run: |
./awesome-docker report > health_report.txt 2>&1 || true
if [ -s health_report.txt ]; then
echo "has_report=true" >> "$GITHUB_OUTPUT"
else
echo "has_report=false" >> "$GITHUB_OUTPUT"
fi
- name: Create/Update Issue with Health Report
if: steps.report.outputs.has_report == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8
@@ -45,8 +40,7 @@ jobs:
script: |
const fs = require('fs');
const report = fs.readFileSync('health_report.txt', 'utf8');
const issueBody = report + '\n\n---\n*Auto-generated weekly by health_report.yml*';
const issueBody = report;
const issues = await github.rest.issues.listForRepo({
owner: context.repo.owner,

View File

@@ -4,6 +4,8 @@ import (
"context"
"fmt"
"os"
"strconv"
"strings"
"github.com/spf13/cobra"
"github.com/veggiemonk/awesome-docker/internal/builder"
@@ -23,6 +25,15 @@ const (
version = "0.1.0"
)
type checkSummary struct {
ExternalTotal int
GitHubTotal int
Broken []checker.LinkResult
Redirected []checker.LinkResult
GitHubErrors []error
GitHubSkipped bool
}
func main() {
root := &cobra.Command{
Use: "awesome-docker",
@@ -37,6 +48,7 @@ func main() {
buildCmd(),
reportCmd(),
validateCmd(),
ciCmd(),
)
if err := root.Execute(); err != nil {
@@ -70,6 +82,205 @@ func collectURLs(sections []parser.Section, urls *[]string) {
}
}
func runLinkChecks(prMode bool) (checkSummary, error) {
doc, err := parseReadme()
if err != nil {
return checkSummary{}, fmt.Errorf("parse: %w", err)
}
var urls []string
collectURLs(doc.Sections, &urls)
exclude, err := cache.LoadExcludeList(excludePath)
if err != nil {
return checkSummary{}, fmt.Errorf("load exclude list: %w", err)
}
ghURLs, extURLs := checker.PartitionLinks(urls)
summary := checkSummary{
ExternalTotal: len(extURLs),
GitHubTotal: len(ghURLs),
}
results := checker.CheckLinks(extURLs, 10, exclude)
for _, r := range results {
if !r.OK {
summary.Broken = append(summary.Broken, r)
}
if r.Redirected {
summary.Redirected = append(summary.Redirected, r)
}
}
if prMode {
summary.GitHubSkipped = true
return summary, nil
}
token := os.Getenv("GITHUB_TOKEN")
if token == "" {
summary.GitHubSkipped = true
return summary, nil
}
gc := checker.NewGitHubChecker(token)
_, errs := gc.CheckRepos(context.Background(), ghURLs, 50)
summary.GitHubErrors = errs
return summary, nil
}
func runHealth(ctx context.Context) error {
token := os.Getenv("GITHUB_TOKEN")
if token == "" {
return fmt.Errorf("GITHUB_TOKEN environment variable is required")
}
doc, err := parseReadme()
if err != nil {
return fmt.Errorf("parse: %w", err)
}
var urls []string
collectURLs(doc.Sections, &urls)
ghURLs, _ := checker.PartitionLinks(urls)
fmt.Printf("Scoring %d GitHub repositories...\n", len(ghURLs))
gc := checker.NewGitHubChecker(token)
infos, errs := gc.CheckRepos(ctx, ghURLs, 50)
for _, e := range errs {
fmt.Printf(" error: %v\n", e)
}
if len(infos) == 0 {
if len(errs) > 0 {
return fmt.Errorf("failed to fetch GitHub metadata for all repositories (%d errors); check network/DNS and GITHUB_TOKEN", len(errs))
}
return fmt.Errorf("no GitHub repositories found in README")
}
scored := scorer.ScoreAll(infos)
cacheEntries := scorer.ToCacheEntries(scored)
hc, err := cache.LoadHealthCache(healthCachePath)
if err != nil {
return fmt.Errorf("load cache: %w", err)
}
hc.Merge(cacheEntries)
if err := cache.SaveHealthCache(healthCachePath, hc); err != nil {
return fmt.Errorf("save cache: %w", err)
}
fmt.Printf("Cache updated: %d entries in %s\n", len(hc.Entries), healthCachePath)
return nil
}
func scoredFromCache() ([]scorer.ScoredEntry, error) {
hc, err := cache.LoadHealthCache(healthCachePath)
if err != nil {
return nil, fmt.Errorf("load cache: %w", err)
}
if len(hc.Entries) == 0 {
return nil, fmt.Errorf("no cache data, run 'health' first")
}
scored := make([]scorer.ScoredEntry, 0, len(hc.Entries))
for _, e := range hc.Entries {
scored = append(scored, scorer.ScoredEntry{
URL: e.URL,
Name: e.Name,
Status: scorer.Status(e.Status),
Stars: e.Stars,
HasLicense: e.HasLicense,
LastPush: e.LastPush,
})
}
return scored, nil
}
func markdownReportFromCache() (string, error) {
scored, err := scoredFromCache()
if err != nil {
return "", err
}
return scorer.GenerateReport(scored), nil
}
func writeGitHubOutput(path, key, value string) error {
if path == "" {
return nil
}
f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
return fmt.Errorf("open github output file: %w", err)
}
defer f.Close()
if _, err := fmt.Fprintf(f, "%s=%s\n", key, value); err != nil {
return fmt.Errorf("write github output: %w", err)
}
return nil
}
func sanitizeOutputValue(v string) string {
v = strings.ReplaceAll(v, "\n", " ")
v = strings.ReplaceAll(v, "\r", " ")
return strings.TrimSpace(v)
}
func buildBrokenLinksIssueBody(summary checkSummary, runErr error) string {
var b strings.Builder
b.WriteString("# Broken Links Detected\n\n")
if runErr != nil {
b.WriteString("The link checker failed to execute cleanly.\n\n")
b.WriteString("## Failure\n\n")
fmt.Fprintf(&b, "- %s\n\n", runErr)
} else {
fmt.Fprintf(&b, "- Broken links: %d\n", len(summary.Broken))
fmt.Fprintf(&b, "- Redirected links: %d\n", len(summary.Redirected))
fmt.Fprintf(&b, "- GitHub API errors: %d\n\n", len(summary.GitHubErrors))
if len(summary.Broken) > 0 {
b.WriteString("## Broken Links\n\n")
for _, r := range summary.Broken {
fmt.Fprintf(&b, "- `%s` -> `%d %s`\n", r.URL, r.StatusCode, strings.TrimSpace(r.Error))
}
b.WriteString("\n")
}
if len(summary.GitHubErrors) > 0 {
b.WriteString("## GitHub API Errors\n\n")
for _, e := range summary.GitHubErrors {
fmt.Fprintf(&b, "- `%s`\n", e)
}
b.WriteString("\n")
}
}
b.WriteString("## Action Required\n\n")
b.WriteString("- Update the URL if the resource moved\n")
b.WriteString("- Remove the entry if permanently unavailable\n")
b.WriteString("- Add to `config/exclude.yaml` if a known false positive\n")
b.WriteString("- Investigate GitHub API/auth failures when present\n\n")
b.WriteString("---\n")
b.WriteString("*Auto-generated by awesome-docker ci broken-links*\n")
return b.String()
}
func buildHealthReportIssueBody(report string, healthErr error) string {
var b strings.Builder
if healthErr != nil {
b.WriteString("WARNING: health refresh failed in this run; showing latest cached report.\n\n")
fmt.Fprintf(&b, "Error: `%s`\n\n", healthErr)
}
b.WriteString(report)
if !strings.HasSuffix(report, "\n") {
b.WriteString("\n")
}
b.WriteString("\n---\n")
b.WriteString("*Auto-generated weekly by awesome-docker ci health-report*\n")
return b.String()
}
func lintCmd() *cobra.Command {
var fix bool
cmd := &cobra.Command{
@@ -113,71 +324,45 @@ func checkCmd() *cobra.Command {
Use: "check",
Short: "Check links for reachability",
RunE: func(cmd *cobra.Command, args []string) error {
doc, err := parseReadme()
summary, err := runLinkChecks(prMode)
if err != nil {
return fmt.Errorf("parse: %w", err)
return err
}
var urls []string
collectURLs(doc.Sections, &urls)
exclude, err := cache.LoadExcludeList(excludePath)
if err != nil {
return fmt.Errorf("load exclude list: %w", err)
}
ghURLs, extURLs := checker.PartitionLinks(urls)
fmt.Printf("Checking %d external links...\n", len(extURLs))
results := checker.CheckLinks(extURLs, 10, exclude)
var broken []checker.LinkResult
var redirected []checker.LinkResult
for _, r := range results {
if !r.OK {
broken = append(broken, r)
}
if r.Redirected {
redirected = append(redirected, r)
}
}
var ghErrs []error
fmt.Printf("Checking %d external links...\n", summary.ExternalTotal)
if !prMode {
token := os.Getenv("GITHUB_TOKEN")
if token != "" {
fmt.Printf("Checking %d GitHub repositories...\n", len(ghURLs))
gc := checker.NewGitHubChecker(token)
_, errs := gc.CheckRepos(context.Background(), ghURLs, 50)
ghErrs = errs
for _, e := range ghErrs {
fmt.Printf(" GitHub error: %v\n", e)
}
} else {
if summary.GitHubSkipped {
fmt.Println("GITHUB_TOKEN not set, skipping GitHub repo checks")
} else {
fmt.Printf("Checking %d GitHub repositories...\n", summary.GitHubTotal)
}
}
if len(redirected) > 0 {
fmt.Printf("\n%d redirected links (consider updating):\n", len(redirected))
for _, r := range redirected {
for _, e := range summary.GitHubErrors {
fmt.Printf(" GitHub error: %v\n", e)
}
if len(summary.Redirected) > 0 {
fmt.Printf("\n%d redirected links (consider updating):\n", len(summary.Redirected))
for _, r := range summary.Redirected {
fmt.Printf(" %s -> %s\n", r.URL, r.RedirectURL)
}
}
if len(broken) > 0 {
fmt.Printf("\n%d broken links:\n", len(broken))
for _, r := range broken {
if len(summary.Broken) > 0 {
fmt.Printf("\n%d broken links:\n", len(summary.Broken))
for _, r := range summary.Broken {
fmt.Printf(" %s -> %d %s\n", r.URL, r.StatusCode, r.Error)
}
}
if len(broken) > 0 && len(ghErrs) > 0 {
return fmt.Errorf("found %d broken links and %d GitHub API errors", len(broken), len(ghErrs))
if len(summary.Broken) > 0 && len(summary.GitHubErrors) > 0 {
return fmt.Errorf("found %d broken links and %d GitHub API errors", len(summary.Broken), len(summary.GitHubErrors))
}
if len(broken) > 0 {
return fmt.Errorf("found %d broken links", len(broken))
if len(summary.Broken) > 0 {
return fmt.Errorf("found %d broken links", len(summary.Broken))
}
if len(ghErrs) > 0 {
return fmt.Errorf("github checks failed with %d errors", len(ghErrs))
if len(summary.GitHubErrors) > 0 {
return fmt.Errorf("github checks failed with %d errors", len(summary.GitHubErrors))
}
fmt.Println("All links OK")
@@ -193,47 +378,7 @@ func healthCmd() *cobra.Command {
Use: "health",
Short: "Score repository health and update cache",
RunE: func(cmd *cobra.Command, args []string) error {
token := os.Getenv("GITHUB_TOKEN")
if token == "" {
return fmt.Errorf("GITHUB_TOKEN environment variable is required")
}
doc, err := parseReadme()
if err != nil {
return fmt.Errorf("parse: %w", err)
}
var urls []string
collectURLs(doc.Sections, &urls)
ghURLs, _ := checker.PartitionLinks(urls)
fmt.Printf("Scoring %d GitHub repositories...\n", len(ghURLs))
gc := checker.NewGitHubChecker(token)
infos, errs := gc.CheckRepos(context.Background(), ghURLs, 50)
for _, e := range errs {
fmt.Printf(" error: %v\n", e)
}
if len(infos) == 0 {
if len(errs) > 0 {
return fmt.Errorf("failed to fetch GitHub metadata for all repositories (%d errors); check network/DNS and GITHUB_TOKEN", len(errs))
}
return fmt.Errorf("no GitHub repositories found in README")
}
scored := scorer.ScoreAll(infos)
cacheEntries := scorer.ToCacheEntries(scored)
hc, err := cache.LoadHealthCache(healthCachePath)
if err != nil {
return fmt.Errorf("load cache: %w", err)
}
hc.Merge(cacheEntries)
if err := cache.SaveHealthCache(healthCachePath, hc); err != nil {
return fmt.Errorf("save cache: %w", err)
}
fmt.Printf("Cache updated: %d entries in %s\n", len(hc.Entries), healthCachePath)
return nil
return runHealth(context.Background())
},
}
}
@@ -258,24 +403,9 @@ func reportCmd() *cobra.Command {
Use: "report",
Short: "Generate health report from cache",
RunE: func(cmd *cobra.Command, args []string) error {
hc, err := cache.LoadHealthCache(healthCachePath)
scored, err := scoredFromCache()
if err != nil {
return fmt.Errorf("load cache: %w", err)
}
if len(hc.Entries) == 0 {
return fmt.Errorf("no cache data, run 'health' first")
}
var scored []scorer.ScoredEntry
for _, e := range hc.Entries {
scored = append(scored, scorer.ScoredEntry{
URL: e.URL,
Name: e.Name,
Status: scorer.Status(e.Status),
Stars: e.Stars,
HasLicense: e.HasLicense,
LastPush: e.LastPush,
})
return err
}
if jsonOutput {
@@ -319,28 +449,17 @@ func validateCmd() *cobra.Command {
fmt.Printf("Lint OK: %d warnings\n", result.Warnings)
fmt.Println("\n=== Checking links (PR mode) ===")
var urls []string
collectURLs(doc.Sections, &urls)
exclude, err := cache.LoadExcludeList(excludePath)
summary, err := runLinkChecks(true)
if err != nil {
return fmt.Errorf("load exclude list: %w", err)
return err
}
_, extURLs := checker.PartitionLinks(urls)
fmt.Printf("Checking %d external links...\n", len(extURLs))
results := checker.CheckLinks(extURLs, 10, exclude)
var broken []checker.LinkResult
for _, r := range results {
if !r.OK {
broken = append(broken, r)
}
}
if len(broken) > 0 {
fmt.Printf("\n%d broken links:\n", len(broken))
for _, r := range broken {
fmt.Printf("Checking %d external links...\n", summary.ExternalTotal)
if len(summary.Broken) > 0 {
fmt.Printf("\n%d broken links:\n", len(summary.Broken))
for _, r := range summary.Broken {
fmt.Printf(" %s -> %d %s\n", r.URL, r.StatusCode, r.Error)
}
return fmt.Errorf("found %d broken links", len(broken))
return fmt.Errorf("found %d broken links", len(summary.Broken))
}
fmt.Println("\nValidation passed")
@@ -348,3 +467,164 @@ func validateCmd() *cobra.Command {
},
}
}
func ciCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "ci",
Short: "CI-oriented helper commands",
}
cmd.AddCommand(
ciBrokenLinksCmd(),
ciHealthReportCmd(),
)
return cmd
}
func ciBrokenLinksCmd() *cobra.Command {
var issueFile string
var githubOutput string
var strict bool
cmd := &cobra.Command{
Use: "broken-links",
Short: "Run link checks and emit CI outputs/artifacts",
RunE: func(cmd *cobra.Command, args []string) error {
summary, runErr := runLinkChecks(false)
hasErrors := runErr != nil || len(summary.Broken) > 0 || len(summary.GitHubErrors) > 0
exitCode := 0
if hasErrors {
exitCode = 1
}
if runErr != nil {
exitCode = 2
}
if issueFile != "" && hasErrors {
body := buildBrokenLinksIssueBody(summary, runErr)
if err := os.WriteFile(issueFile, []byte(body), 0o644); err != nil {
return fmt.Errorf("write issue file: %w", err)
}
}
if err := writeGitHubOutput(githubOutput, "has_errors", strconv.FormatBool(hasErrors)); err != nil {
return err
}
if err := writeGitHubOutput(githubOutput, "check_exit_code", strconv.Itoa(exitCode)); err != nil {
return err
}
if err := writeGitHubOutput(githubOutput, "broken_count", strconv.Itoa(len(summary.Broken))); err != nil {
return err
}
if err := writeGitHubOutput(githubOutput, "github_error_count", strconv.Itoa(len(summary.GitHubErrors))); err != nil {
return err
}
if runErr != nil {
if err := writeGitHubOutput(githubOutput, "run_error", sanitizeOutputValue(runErr.Error())); err != nil {
return err
}
}
if runErr != nil {
fmt.Printf("CI broken-links run error: %v\n", runErr)
}
if hasErrors {
fmt.Printf("CI broken-links found %d broken links and %d GitHub errors\n", len(summary.Broken), len(summary.GitHubErrors))
} else {
fmt.Println("CI broken-links found no errors")
}
if strict {
if runErr != nil {
return runErr
}
if hasErrors {
return fmt.Errorf("found %d broken links and %d GitHub API errors", len(summary.Broken), len(summary.GitHubErrors))
}
}
return nil
},
}
cmd.Flags().StringVar(&issueFile, "issue-file", "broken_links_issue.md", "Path to write issue markdown body")
cmd.Flags().StringVar(&githubOutput, "github-output", "", "Path to GitHub output file (typically $GITHUB_OUTPUT)")
cmd.Flags().BoolVar(&strict, "strict", false, "Return non-zero when errors are found")
return cmd
}
func ciHealthReportCmd() *cobra.Command {
var issueFile string
var githubOutput string
var strict bool
cmd := &cobra.Command{
Use: "health-report",
Short: "Refresh health cache, render report, and emit CI outputs/artifacts",
RunE: func(cmd *cobra.Command, args []string) error {
healthErr := runHealth(context.Background())
report, reportErr := markdownReportFromCache()
healthOK := healthErr == nil
reportOK := reportErr == nil
hasReport := reportOK && strings.TrimSpace(report) != ""
hasErrors := !healthOK || !reportOK
if hasReport && issueFile != "" {
body := buildHealthReportIssueBody(report, healthErr)
if err := os.WriteFile(issueFile, []byte(body), 0o644); err != nil {
return fmt.Errorf("write issue file: %w", err)
}
}
if err := writeGitHubOutput(githubOutput, "has_report", strconv.FormatBool(hasReport)); err != nil {
return err
}
if err := writeGitHubOutput(githubOutput, "health_ok", strconv.FormatBool(healthOK)); err != nil {
return err
}
if err := writeGitHubOutput(githubOutput, "report_ok", strconv.FormatBool(reportOK)); err != nil {
return err
}
if err := writeGitHubOutput(githubOutput, "has_errors", strconv.FormatBool(hasErrors)); err != nil {
return err
}
if healthErr != nil {
if err := writeGitHubOutput(githubOutput, "health_error", sanitizeOutputValue(healthErr.Error())); err != nil {
return err
}
}
if reportErr != nil {
if err := writeGitHubOutput(githubOutput, "report_error", sanitizeOutputValue(reportErr.Error())); err != nil {
return err
}
}
if healthErr != nil {
fmt.Printf("CI health-report health error: %v\n", healthErr)
}
if reportErr != nil {
fmt.Printf("CI health-report report error: %v\n", reportErr)
}
if hasReport {
fmt.Println("CI health-report generated report artifact")
} else {
fmt.Println("CI health-report has no report artifact")
}
if strict {
if healthErr != nil {
return healthErr
}
if reportErr != nil {
return reportErr
}
}
return nil
},
}
cmd.Flags().StringVar(&issueFile, "issue-file", "health_report.txt", "Path to write health issue markdown body")
cmd.Flags().StringVar(&githubOutput, "github-output", "", "Path to GitHub output file (typically $GITHUB_OUTPUT)")
cmd.Flags().BoolVar(&strict, "strict", false, "Return non-zero when health/report fails")
return cmd
}