985 lines
35 KiB
Go
985 lines
35 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/base64"
|
|
"encoding/csv"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"math"
|
|
"net/http"
|
|
"os"
|
|
"os/signal"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/google/go-github/v61/github"
|
|
"golang.org/x/oauth2"
|
|
"golang.org/x/time/rate"
|
|
)
|
|
|
|
// Configuration flags
|
|
var (
|
|
githubToken = flag.String("token", "", "GitHub Personal Access Token (or set GITHUB_TOKEN env)")
|
|
outputDir = flag.String("output", "./output", "Output directory for findings")
|
|
workers = flag.Int("workers", 20, "Number of concurrent workers")
|
|
minStars = flag.Int("min-stars", 0, "Minimum stars filter")
|
|
sinceDays = flag.Int("since-days", 7, "Only scan repos updated in last X days (repeats)")
|
|
verbose = flag.Bool("verbose", false, "Verbose output")
|
|
scanPRs = flag.Bool("scan-prs", true, "Scan pull requests")
|
|
scanIssues = flag.Bool("scan-issues", true, "Scan issues and comments")
|
|
scanCommits = flag.Bool("scan-commits", true, "Scan commit history")
|
|
entropyCheck = flag.Bool("entropy", true, "Enable entropy checking")
|
|
continuous = flag.Bool("continuous", true, "Run continuously (loop forever)")
|
|
sleepMinutes = flag.Int("sleep-minutes", 60, "Sleep between continuous cycles")
|
|
scanAdvisories = flag.Bool("scan-advisories", false, "Check repository for GitHub Security Advisories (requires additional API calls)")
|
|
hideObfuscated = flag.Bool("hide-obfuscated", true, "Hide obfuscated emails (e.g., user[at]example[dot]com)")
|
|
|
|
// Single/multiple repo scanning
|
|
repoURLs = flag.String("repo", "", "Single repository URL to scan (can be used multiple times)")
|
|
repoListFile = flag.String("repo-list", "", "File containing repository URLs (one per line)")
|
|
)
|
|
|
|
// Core types
|
|
type Reaper struct {
|
|
client *github.Client
|
|
patterns []*SecretPattern
|
|
results chan *Finding
|
|
advisories chan *Advisory
|
|
limiter *rate.Limiter
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
wg sync.WaitGroup
|
|
stats *ScanStats
|
|
csvWriter *csv.Writer
|
|
jsonFile *os.File
|
|
advFile *os.File
|
|
mu sync.Mutex
|
|
scannedRepos map[string]bool
|
|
emailCache map[string]map[string]bool // repo -> email -> found
|
|
}
|
|
|
|
type SecretPattern struct {
|
|
Name string
|
|
Regex *regexp.Regexp
|
|
Severity string
|
|
Entropy bool
|
|
}
|
|
|
|
type Finding struct {
|
|
ID string `json:"id"`
|
|
Repository string `json:"repository"`
|
|
FilePath string `json:"file_path"`
|
|
LineNumber int `json:"line_number"`
|
|
SecretType string `json:"secret_type"`
|
|
SecretValue string `json:"secret_value"`
|
|
Context string `json:"context"`
|
|
URL string `json:"url"`
|
|
Branch string `json:"branch"`
|
|
Timestamp time.Time `json:"timestamp"`
|
|
Severity string `json:"severity"`
|
|
}
|
|
|
|
type Advisory struct {
|
|
ID string `json:"id"`
|
|
Repository string `json:"repository"`
|
|
GHSAID string `json:"ghsa_id"`
|
|
CVEID string `json:"cve_id,omitempty"`
|
|
Summary string `json:"summary"`
|
|
Severity string `json:"severity"`
|
|
PublishedAt time.Time `json:"published_at"`
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
Permalink string `json:"permalink"`
|
|
VulnerableManifest string `json:"vulnerable_manifest,omitempty"`
|
|
}
|
|
|
|
type ScanStats struct {
|
|
ReposScanned int
|
|
FilesScanned int
|
|
FindingsFound int
|
|
AdvisoriesFound int
|
|
StartTime time.Time
|
|
CycleStart time.Time
|
|
RateLimitHits int
|
|
mu sync.Mutex
|
|
}
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
|
|
token := *githubToken
|
|
if token == "" {
|
|
token = os.Getenv("GITHUB_TOKEN")
|
|
}
|
|
if token == "" {
|
|
log.Fatal("GitHub token required. Use -token flag or GITHUB_TOKEN env variable")
|
|
}
|
|
|
|
if err := os.MkdirAll(*outputDir, 0755); err != nil {
|
|
log.Fatalf("Failed to create output directory: %v", err)
|
|
}
|
|
|
|
reaper := NewReaper(token)
|
|
|
|
sigChan := make(chan os.Signal, 1)
|
|
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
|
go func() {
|
|
<-sigChan
|
|
fmt.Println("\nShutting down REAPER gracefully...")
|
|
reaper.cancel()
|
|
}()
|
|
|
|
fmt.Println(`
|
|
============================================================
|
|
██████╗ ███████╗ █████╗ ██████╗ ███████╗██████╗
|
|
██╔══██╗██╔════╝██╔══██╗██╔══██╗██╔════╝██╔══██╗
|
|
██████╔╝█████╗ ███████║██████╔╝█████╗ ██████╔╝
|
|
██╔══██╗██╔══╝ ██╔══██║██╔═══╝ ██╔══╝ ██╔══██╗
|
|
██║ ██║███████╗██║ ██║██║ ███████╗██║ ██║
|
|
╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ ╚══════╝╚═╝ ╚═╝
|
|
============================================================
|
|
REAPER - GitHub Secret Harvester & Advisory Scanner
|
|
============================================================
|
|
`)
|
|
|
|
// Determine mode: single-shot or continuous
|
|
targetRepos := getTargetRepos()
|
|
if len(targetRepos) > 0 {
|
|
fmt.Printf("[!] Single-shot mode: scanning %d specific repository(s)\n", len(targetRepos))
|
|
fmt.Printf("[+] Output directory: %s\n", *outputDir)
|
|
fmt.Printf("[+] Scan commits: %v\n", *scanCommits)
|
|
fmt.Printf("[+] Scan advisories: %v\n", *scanAdvisories)
|
|
fmt.Printf("[+] Hide obfuscated emails: %v\n", *hideObfuscated)
|
|
|
|
for _, repoURL := range targetRepos {
|
|
repo, err := reaper.getRepoFromURL(repoURL)
|
|
if err != nil {
|
|
log.Printf("Failed to parse %s: %v", repoURL, err)
|
|
continue
|
|
}
|
|
reaper.scanRepository(repo)
|
|
reaper.stats.mu.Lock()
|
|
reaper.stats.ReposScanned++
|
|
reaper.stats.mu.Unlock()
|
|
}
|
|
reaper.printFinalStats()
|
|
return
|
|
}
|
|
|
|
// Normal continuous mode
|
|
fmt.Printf("[+] Starting REAPER with %d workers\n", *workers)
|
|
fmt.Printf("[+] Output directory: %s\n", *outputDir)
|
|
fmt.Printf("[+] Continuous mode: %v (sleep %d min between cycles)\n", *continuous, *sleepMinutes)
|
|
fmt.Printf("[+] Scanning repos updated in last %d days\n", *sinceDays)
|
|
fmt.Printf("[+] Scan commits: %v\n", *scanCommits)
|
|
fmt.Printf("[+] Scan advisories: %v\n", *scanAdvisories)
|
|
fmt.Printf("[+] Hide obfuscated emails: %v\n", *hideObfuscated)
|
|
|
|
reaper.RunForever()
|
|
}
|
|
|
|
func getTargetRepos() []string {
|
|
var repos []string
|
|
|
|
flag.Visit(func(f *flag.Flag) {
|
|
if f.Name == "repo" && f.Value.String() != "" {
|
|
repos = append(repos, f.Value.String())
|
|
}
|
|
})
|
|
|
|
if *repoListFile != "" {
|
|
file, err := os.Open(*repoListFile)
|
|
if err == nil {
|
|
defer file.Close()
|
|
scanner := bufio.NewScanner(file)
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if line != "" && !strings.HasPrefix(line, "#") {
|
|
repos = append(repos, line)
|
|
}
|
|
}
|
|
} else {
|
|
log.Printf("Warning: could not open repo list file %s: %v", *repoListFile, err)
|
|
}
|
|
}
|
|
|
|
return repos
|
|
}
|
|
|
|
func (r *Reaper) getRepoFromURL(rawURL string) (*github.Repository, error) {
|
|
rawURL = strings.TrimSuffix(rawURL, ".git")
|
|
parts := strings.Split(strings.TrimPrefix(rawURL, "https://github.com/"), "/")
|
|
if len(parts) < 2 {
|
|
return nil, fmt.Errorf("invalid repository URL: %s", rawURL)
|
|
}
|
|
owner, name := parts[0], parts[1]
|
|
|
|
repo, _, err := r.client.Repositories.Get(r.ctx, owner, name)
|
|
return repo, err
|
|
}
|
|
|
|
func NewReaper(token string) *Reaper {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
ts := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token})
|
|
tc := oauth2.NewClient(ctx, ts)
|
|
client := github.NewClient(tc)
|
|
|
|
jsonFile, _ := os.OpenFile(filepath.Join(*outputDir, "reaper_findings.jsonl"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
|
advFile, _ := os.OpenFile(filepath.Join(*outputDir, "advisories.jsonl"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
|
|
|
timestamp := time.Now().Format("20060102_150405")
|
|
csvFile, _ := os.Create(filepath.Join(*outputDir, fmt.Sprintf("reaper_findings_%s.csv", timestamp)))
|
|
csvWriter := csv.NewWriter(csvFile)
|
|
csvWriter.Write([]string{"Timestamp", "Repository", "File", "Line", "Type", "Secret", "URL", "Severity"})
|
|
|
|
scannedRepos := make(map[string]bool)
|
|
scannedFile, err := os.OpenFile(filepath.Join(*outputDir, "scanned_repos.txt"), os.O_RDWR|os.O_CREATE, 0644)
|
|
if err == nil {
|
|
scanner := bufio.NewScanner(scannedFile)
|
|
for scanner.Scan() {
|
|
scannedRepos[scanner.Text()] = true
|
|
}
|
|
scannedFile.Close()
|
|
}
|
|
|
|
reaper := &Reaper{
|
|
client: client,
|
|
patterns: GetAllPatterns(),
|
|
results: make(chan *Finding, 10000),
|
|
advisories: make(chan *Advisory, 1000),
|
|
limiter: rate.NewLimiter(rate.Limit(30), 100),
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
stats: &ScanStats{StartTime: time.Now()},
|
|
csvWriter: csvWriter,
|
|
jsonFile: jsonFile,
|
|
advFile: advFile,
|
|
scannedRepos: scannedRepos,
|
|
emailCache: make(map[string]map[string]bool),
|
|
}
|
|
|
|
go reaper.processResults()
|
|
go reaper.processAdvisories()
|
|
|
|
return reaper
|
|
}
|
|
|
|
// isObfuscatedEmail checks if an email address is obfuscated
|
|
func isObfuscatedEmail(email string) bool {
|
|
obfuscatedPatterns := []string{
|
|
"[at]", "[@]", "{at}", "{@}",
|
|
" at ", "(at)", "[dot]", "{dot}", "(dot)",
|
|
" dot ", " DOT ", " AT ",
|
|
" user@", "@domain", "example.com",
|
|
"replace@", "change@", "obfuscated",
|
|
"noreply", "no-reply", "do-not-reply",
|
|
}
|
|
emailLower := strings.ToLower(email)
|
|
for _, pattern := range obfuscatedPatterns {
|
|
if strings.Contains(emailLower, pattern) {
|
|
return true
|
|
}
|
|
}
|
|
// Check for invalid domain patterns
|
|
if strings.Contains(email, "@[") && strings.Contains(email, "]") {
|
|
return true
|
|
}
|
|
if strings.Contains(email, "@(") && strings.Contains(email, ")") {
|
|
return true
|
|
}
|
|
// Check for placeholder domains
|
|
placeholderDomains := []string{"example.com", "domain.com", "test.com", "localhost", "invalid"}
|
|
for _, domain := range placeholderDomains {
|
|
if strings.Contains(emailLower, "@"+domain) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// isGitHubNoReply checks for GitHub's no-reply emails
|
|
func isGitHubNoReply(email string) bool {
|
|
return strings.Contains(email, "noreply.github.com") || strings.Contains(email, "users.noreply.github.com")
|
|
}
|
|
|
|
func (r *Reaper) isDuplicateEmail(repoName, email string) bool {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
|
|
if _, exists := r.emailCache[repoName]; !exists {
|
|
r.emailCache[repoName] = make(map[string]bool)
|
|
}
|
|
|
|
if r.emailCache[repoName][email] {
|
|
return true
|
|
}
|
|
|
|
r.emailCache[repoName][email] = true
|
|
return false
|
|
}
|
|
|
|
func GetAllPatterns() []*SecretPattern {
|
|
return []*SecretPattern{
|
|
{Name: "AWS Access Key", Regex: regexp.MustCompile(`AKIA[0-9A-Z]{16}`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "AWS Secret Key", Regex: regexp.MustCompile(`(?i)(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=\s]+['"]?([A-Za-z0-9/+=]{40})['"]?`), Severity: "CRITICAL", Entropy: true},
|
|
{Name: "Google API Key", Regex: regexp.MustCompile(`AIza[0-9A-Za-z\-_]{35}`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "GitHub Token", Regex: regexp.MustCompile(`ghp_[A-Za-z0-9]{36}|github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "Slack Token", Regex: regexp.MustCompile(`xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}`), Severity: "HIGH", Entropy: false},
|
|
{Name: "Discord Bot Token", Regex: regexp.MustCompile(`[MNO][a-zA-Z\d_-]{23,25}\.[a-zA-Z\d_-]{6}\.[a-zA-Z\d_-]{27}`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "Stripe Secret Key", Regex: regexp.MustCompile(`sk_live_[A-Za-z0-9]{24}`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "Stripe Publishable Key", Regex: regexp.MustCompile(`pk_live_[A-Za-z0-9]{24}`), Severity: "HIGH", Entropy: false},
|
|
{Name: "JWT Token", Regex: regexp.MustCompile(`eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_=]+\.?[A-Za-z0-9-_.+/=]*`), Severity: "HIGH", Entropy: true},
|
|
{Name: "PostgreSQL URL", Regex: regexp.MustCompile(`postgresql://[^/\s]+:[^/\s]+@[^/\s]+/\w+`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "MySQL URL", Regex: regexp.MustCompile(`mysql://[^/\s]+:[^/\s]+@[^/\s]+/\w+`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "MongoDB URL", Regex: regexp.MustCompile(`mongodb(?:\+srv)?://[^/\s]+:[^/\s]+@[^/\s]+/\w+`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "Redis URL", Regex: regexp.MustCompile(`redis://(?:[^:@]+:[^@]+@)?[^:]+:[0-9]+`), Severity: "HIGH", Entropy: false},
|
|
{Name: "RSA Private Key", Regex: regexp.MustCompile(`-----BEGIN RSA PRIVATE KEY-----`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "SSH Private Key", Regex: regexp.MustCompile(`-----BEGIN OPENSSH PRIVATE KEY-----`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "EC Private Key", Regex: regexp.MustCompile(`-----BEGIN EC PRIVATE KEY-----`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "Email Address", Regex: regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`), Severity: "MEDIUM", Entropy: false},
|
|
{Name: "Generic Password", Regex: regexp.MustCompile(`(?i)(?:password|passwd|pwd)\s*[:=\s]+['"]?([^'"\s]{8,50})['"]?`), Severity: "HIGH", Entropy: false},
|
|
{Name: "Generic API Key", Regex: regexp.MustCompile(`(?i)(?:api[_-]?key|apikey|api_token|token)\s*[:=\s]+['"]?([A-Za-z0-9]{20,50})['"]?`), Severity: "HIGH", Entropy: true},
|
|
{Name: "Azure Connection String", Regex: regexp.MustCompile(`DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[^;]+`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "Twilio API Key", Regex: regexp.MustCompile(`SK[0-9a-fA-F]{32}`), Severity: "HIGH", Entropy: false},
|
|
{Name: "SendGrid API Key", Regex: regexp.MustCompile(`SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}`), Severity: "HIGH", Entropy: false},
|
|
{Name: "Heroku API Key", Regex: regexp.MustCompile(`[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`), Severity: "HIGH", Entropy: true},
|
|
{Name: "OpenAI API Key", Regex: regexp.MustCompile(`sk-[A-Za-z0-9]{48}`), Severity: "CRITICAL", Entropy: true},
|
|
{Name: "Telegram Bot Token", Regex: regexp.MustCompile(`[0-9]{8,10}:[A-Za-z0-9_-]{35}`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "GitHub App Token", Regex: regexp.MustCompile(`ghu_[A-Za-z0-9]{36}`), Severity: "CRITICAL", Entropy: false},
|
|
{Name: "GitLab Token", Regex: regexp.MustCompile(`glpat-[A-Za-z0-9-_]{20}`), Severity: "HIGH", Entropy: false},
|
|
{Name: "Docker Hub Token", Regex: regexp.MustCompile(`dckr_pat_[A-Za-z0-9-_]{32}`), Severity: "HIGH", Entropy: false},
|
|
{Name: "NPM Token", Regex: regexp.MustCompile(`npm_[A-Za-z0-9]{36}`), Severity: "HIGH", Entropy: false},
|
|
{Name: "Pulumi API Key", Regex: regexp.MustCompile(`pul-[a-f0-9]{40}`), Severity: "HIGH", Entropy: false},
|
|
{Name: "DigitalOcean Token", Regex: regexp.MustCompile(`dops_v1_[a-zA-Z0-9]{64}`), Severity: "HIGH", Entropy: false},
|
|
{Name: "Alibaba Cloud Key", Regex: regexp.MustCompile(`LTAI[A-Za-z0-9]{16,20}`), Severity: "HIGH", Entropy: false},
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) RunForever() {
|
|
for {
|
|
select {
|
|
case <-r.ctx.Done():
|
|
r.printFinalStats()
|
|
return
|
|
default:
|
|
}
|
|
|
|
fmt.Printf("\nStarting new scan cycle at %s\n", time.Now().Format("15:04:05"))
|
|
r.stats.CycleStart = time.Now()
|
|
|
|
if err := r.scanCycle(); err != nil {
|
|
log.Printf("Scan cycle error: %v", err)
|
|
}
|
|
|
|
cycleDuration := time.Since(r.stats.CycleStart)
|
|
r.stats.mu.Lock()
|
|
fmt.Printf("\nCycle complete: %d repos, %d findings, %d advisories in %s\n",
|
|
r.stats.ReposScanned, r.stats.FindingsFound, r.stats.AdvisoriesFound, cycleDuration.Round(time.Second))
|
|
r.stats.mu.Unlock()
|
|
|
|
if !*continuous {
|
|
break
|
|
}
|
|
|
|
fmt.Printf("Sleeping for %d minutes before next cycle...\n", *sleepMinutes)
|
|
sleepTimer := time.NewTimer(time.Duration(*sleepMinutes) * time.Minute)
|
|
select {
|
|
case <-sleepTimer.C:
|
|
case <-r.ctx.Done():
|
|
sleepTimer.Stop()
|
|
return
|
|
}
|
|
}
|
|
r.printFinalStats()
|
|
}
|
|
|
|
func (r *Reaper) scanCycle() error {
|
|
query := r.buildSearchQuery()
|
|
fmt.Printf("[+] Search query: %s\n", query)
|
|
|
|
opts := &github.SearchOptions{
|
|
Sort: "updated",
|
|
Order: "desc",
|
|
ListOptions: github.ListOptions{PerPage: 100},
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case <-r.ctx.Done():
|
|
return nil
|
|
default:
|
|
}
|
|
|
|
if err := r.limiter.Wait(r.ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
result, resp, err := r.client.Search.Repositories(r.ctx, query, opts)
|
|
if err != nil {
|
|
if strings.Contains(err.Error(), "rate limit") {
|
|
r.stats.RateLimitHits++
|
|
time.Sleep(60 * time.Second)
|
|
continue
|
|
}
|
|
return fmt.Errorf("search failed: %w", err)
|
|
}
|
|
|
|
if len(result.Repositories) == 0 {
|
|
fmt.Println("[+] No new repositories found in this cycle.")
|
|
break
|
|
}
|
|
|
|
fmt.Printf("[+] Found %d repositories (page %d)\n", len(result.Repositories), opts.Page)
|
|
|
|
repoChan := make(chan *github.Repository, len(result.Repositories))
|
|
var workerWg sync.WaitGroup
|
|
|
|
for i := 0; i < *workers; i++ {
|
|
workerWg.Add(1)
|
|
go r.repoWorker(repoChan, &workerWg)
|
|
}
|
|
|
|
newRepos := 0
|
|
for _, repo := range result.Repositories {
|
|
if repo.GetPrivate() {
|
|
continue
|
|
}
|
|
if *minStars > 0 && repo.GetStargazersCount() < *minStars {
|
|
continue
|
|
}
|
|
|
|
r.mu.Lock()
|
|
if r.scannedRepos[repo.GetFullName()] {
|
|
r.mu.Unlock()
|
|
continue
|
|
}
|
|
r.scannedRepos[repo.GetFullName()] = true
|
|
r.mu.Unlock()
|
|
|
|
repoChan <- repo
|
|
newRepos++
|
|
}
|
|
|
|
close(repoChan)
|
|
workerWg.Wait()
|
|
|
|
fmt.Printf("[+] Cycle progress: %d new repos scanned, total findings: %d, total advisories: %d\n",
|
|
newRepos, r.stats.FindingsFound, r.stats.AdvisoriesFound)
|
|
|
|
if resp.NextPage == 0 {
|
|
break
|
|
}
|
|
opts.Page = resp.NextPage
|
|
}
|
|
|
|
r.saveScannedList()
|
|
return nil
|
|
}
|
|
|
|
func (r *Reaper) repoWorker(repoChan <-chan *github.Repository, wg *sync.WaitGroup) {
|
|
defer wg.Done()
|
|
|
|
for repo := range repoChan {
|
|
select {
|
|
case <-r.ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
|
|
r.scanRepository(repo)
|
|
r.stats.mu.Lock()
|
|
r.stats.ReposScanned++
|
|
r.stats.mu.Unlock()
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) scanRepository(repo *github.Repository) {
|
|
repoName := repo.GetFullName()
|
|
if repo.GetArchived() {
|
|
return
|
|
}
|
|
|
|
defaultBranch := repo.GetDefaultBranch()
|
|
r.scanBranch(repoName, defaultBranch)
|
|
|
|
if *scanPRs {
|
|
r.scanPullRequests(repoName)
|
|
}
|
|
if *scanIssues {
|
|
r.scanIssues(repoName)
|
|
}
|
|
if *scanCommits {
|
|
r.scanCommitHistory(repoName, defaultBranch)
|
|
}
|
|
if *scanAdvisories {
|
|
r.fetchAdvisories(repoName, getOwner(repoName), getRepoName(repoName))
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) scanBranch(repoName, branch string) {
|
|
opts := &github.RepositoryContentGetOptions{Ref: branch}
|
|
_, contents, _, err := r.client.Repositories.GetContents(
|
|
r.ctx, getOwner(repoName), getRepoName(repoName), "/", opts)
|
|
if err != nil {
|
|
return
|
|
}
|
|
r.processContents(repoName, branch, contents, opts.Ref)
|
|
}
|
|
|
|
func (r *Reaper) processContents(repoName, branch string, contents []*github.RepositoryContent, ref string) {
|
|
for _, content := range contents {
|
|
select {
|
|
case <-r.ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
if content == nil {
|
|
continue
|
|
}
|
|
if *content.Type == "dir" {
|
|
_, dirContents, _, err := r.client.Repositories.GetContents(
|
|
r.ctx, getOwner(repoName), getRepoName(repoName), content.GetPath(),
|
|
&github.RepositoryContentGetOptions{Ref: ref})
|
|
if err == nil {
|
|
r.processContents(repoName, branch, dirContents, ref)
|
|
}
|
|
} else if *content.Type == "file" {
|
|
r.scanFile(repoName, branch, content)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) scanFile(repoName, branch string, file *github.RepositoryContent) {
|
|
ext := strings.ToLower(filepath.Ext(file.GetName()))
|
|
skipExts := map[string]bool{
|
|
".jpg": true, ".jpeg": true, ".png": true, ".gif": true, ".ico": true,
|
|
".mp4": true, ".mp3": true, ".zip": true, ".tar": true, ".gz": true,
|
|
".exe": true, ".dll": true, ".so": true, ".bin": true,
|
|
}
|
|
if skipExts[ext] {
|
|
return
|
|
}
|
|
|
|
content, err := file.GetContent()
|
|
if err != nil {
|
|
return
|
|
}
|
|
decoded, err := base64.StdEncoding.DecodeString(content)
|
|
if err != nil {
|
|
decoded = []byte(content)
|
|
}
|
|
|
|
r.stats.mu.Lock()
|
|
r.stats.FilesScanned++
|
|
r.stats.mu.Unlock()
|
|
|
|
lines := strings.Split(string(decoded), "\n")
|
|
for i, line := range lines {
|
|
for _, pattern := range r.patterns {
|
|
matches := pattern.Regex.FindAllStringSubmatch(line, -1)
|
|
if len(matches) == 0 {
|
|
continue
|
|
}
|
|
for _, match := range matches {
|
|
secret := match[0]
|
|
if len(match) > 1 && match[1] != "" {
|
|
secret = match[1]
|
|
}
|
|
if *entropyCheck && pattern.Entropy && !hasHighEntropy(secret) {
|
|
continue
|
|
}
|
|
|
|
// Special handling for Email Addresses
|
|
if pattern.Name == "Email Address" {
|
|
// Skip obfuscated emails if flag is set
|
|
if *hideObfuscated && isObfuscatedEmail(secret) {
|
|
continue
|
|
}
|
|
// Skip GitHub no-reply emails
|
|
if isGitHubNoReply(secret) {
|
|
continue
|
|
}
|
|
// Deduplicate emails per repository
|
|
if r.isDuplicateEmail(repoName, secret) {
|
|
continue
|
|
}
|
|
}
|
|
|
|
hash := sha256.Sum256([]byte(repoName + file.GetPath() + secret))
|
|
id := hex.EncodeToString(hash[:])[:16]
|
|
|
|
finding := &Finding{
|
|
ID: id,
|
|
Repository: repoName,
|
|
FilePath: file.GetPath(),
|
|
LineNumber: i + 1,
|
|
SecretType: pattern.Name,
|
|
SecretValue: maskSecret(secret),
|
|
Context: getContext(lines, i, 2),
|
|
URL: file.GetHTMLURL(),
|
|
Branch: branch,
|
|
Timestamp: time.Now(),
|
|
Severity: pattern.Severity,
|
|
}
|
|
r.results <- finding
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) scanPullRequests(repoName string) {
|
|
opts := &github.PullRequestListOptions{State: "all", ListOptions: github.ListOptions{PerPage: 50}}
|
|
for {
|
|
select {
|
|
case <-r.ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
if err := r.limiter.Wait(r.ctx); err != nil {
|
|
return
|
|
}
|
|
prs, resp, err := r.client.PullRequests.List(r.ctx, getOwner(repoName), getRepoName(repoName), opts)
|
|
if err != nil {
|
|
return
|
|
}
|
|
for _, pr := range prs {
|
|
r.scanText(repoName, "pull_request", pr.GetTitle(), pr.GetHTMLURL())
|
|
r.scanText(repoName, "pull_request", pr.GetBody(), pr.GetHTMLURL())
|
|
}
|
|
if resp.NextPage == 0 {
|
|
break
|
|
}
|
|
opts.Page = resp.NextPage
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) scanIssues(repoName string) {
|
|
opts := &github.IssueListByRepoOptions{State: "all", ListOptions: github.ListOptions{PerPage: 50}}
|
|
for {
|
|
select {
|
|
case <-r.ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
if err := r.limiter.Wait(r.ctx); err != nil {
|
|
return
|
|
}
|
|
issues, resp, err := r.client.Issues.ListByRepo(r.ctx, getOwner(repoName), getRepoName(repoName), opts)
|
|
if err != nil {
|
|
return
|
|
}
|
|
for _, issue := range issues {
|
|
r.scanText(repoName, "issue", issue.GetTitle(), issue.GetHTMLURL())
|
|
r.scanText(repoName, "issue", issue.GetBody(), issue.GetHTMLURL())
|
|
}
|
|
if resp.NextPage == 0 {
|
|
break
|
|
}
|
|
opts.Page = resp.NextPage
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) scanCommitHistory(repoName, branch string) {
|
|
opts := &github.CommitsListOptions{
|
|
SHA: branch,
|
|
ListOptions: github.ListOptions{PerPage: 100},
|
|
}
|
|
for {
|
|
select {
|
|
case <-r.ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
if err := r.limiter.Wait(r.ctx); err != nil {
|
|
return
|
|
}
|
|
commits, resp, err := r.client.Repositories.ListCommits(r.ctx, getOwner(repoName), getRepoName(repoName), opts)
|
|
if err != nil {
|
|
return
|
|
}
|
|
for _, commit := range commits {
|
|
if commit.Commit != nil {
|
|
r.scanText(repoName, "commit_message", commit.Commit.GetMessage(), commit.GetHTMLURL())
|
|
}
|
|
}
|
|
if resp.NextPage == 0 {
|
|
break
|
|
}
|
|
opts.Page = resp.NextPage
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) scanText(repoName, location, text, url string) {
|
|
if text == "" {
|
|
return
|
|
}
|
|
for _, pattern := range r.patterns {
|
|
matches := pattern.Regex.FindAllStringSubmatch(text, -1)
|
|
for _, match := range matches {
|
|
secret := match[0]
|
|
if len(match) > 1 && match[1] != "" {
|
|
secret = match[1]
|
|
}
|
|
if *entropyCheck && pattern.Entropy && !hasHighEntropy(secret) {
|
|
continue
|
|
}
|
|
|
|
// Special handling for Email Addresses
|
|
if pattern.Name == "Email Address" {
|
|
if *hideObfuscated && isObfuscatedEmail(secret) {
|
|
continue
|
|
}
|
|
if isGitHubNoReply(secret) {
|
|
continue
|
|
}
|
|
if r.isDuplicateEmail(repoName, secret) {
|
|
continue
|
|
}
|
|
}
|
|
|
|
hash := sha256.Sum256([]byte(repoName + location + secret))
|
|
id := hex.EncodeToString(hash[:])[:16]
|
|
finding := &Finding{
|
|
ID: id,
|
|
Repository: repoName,
|
|
FilePath: location,
|
|
SecretType: pattern.Name,
|
|
SecretValue: maskSecret(secret),
|
|
Context: text,
|
|
URL: url,
|
|
Timestamp: time.Now(),
|
|
Severity: pattern.Severity,
|
|
}
|
|
r.results <- finding
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) fetchAdvisories(repoName, owner, repo string) {
|
|
query := fmt.Sprintf(`{
|
|
repository(owner: "%s", name: "%s") {
|
|
vulnerabilityAlerts(first: 100) {
|
|
nodes {
|
|
securityAdvisory {
|
|
ghsaId
|
|
cveId
|
|
summary
|
|
severity
|
|
publishedAt
|
|
updatedAt
|
|
permalink
|
|
}
|
|
vulnerableManifestPath
|
|
}
|
|
}
|
|
}
|
|
}`, owner, repo)
|
|
|
|
body := struct {
|
|
Query string `json:"query"`
|
|
}{Query: query}
|
|
jsonBody, _ := json.Marshal(body)
|
|
|
|
req, err := http.NewRequestWithContext(r.ctx, "POST", "https://api.github.com/graphql", bytes.NewReader(jsonBody))
|
|
if err != nil {
|
|
return
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+os.Getenv("GITHUB_TOKEN"))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
var result struct {
|
|
Data struct {
|
|
Repository struct {
|
|
VulnerabilityAlerts struct {
|
|
Nodes []struct {
|
|
SecurityAdvisory struct {
|
|
GhsaId string `json:"ghsaId"`
|
|
CveId string `json:"cveId"`
|
|
Summary string `json:"summary"`
|
|
Severity string `json:"severity"`
|
|
PublishedAt time.Time `json:"publishedAt"`
|
|
UpdatedAt time.Time `json:"updatedAt"`
|
|
Permalink string `json:"permalink"`
|
|
} `json:"securityAdvisory"`
|
|
VulnerableManifestPath string `json:"vulnerableManifestPath"`
|
|
} `json:"nodes"`
|
|
} `json:"vulnerabilityAlerts"`
|
|
} `json:"repository"`
|
|
} `json:"data"`
|
|
}
|
|
|
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
|
return
|
|
}
|
|
|
|
for _, node := range result.Data.Repository.VulnerabilityAlerts.Nodes {
|
|
adv := &Advisory{
|
|
Repository: repoName,
|
|
GHSAID: node.SecurityAdvisory.GhsaId,
|
|
CVEID: node.SecurityAdvisory.CveId,
|
|
Summary: node.SecurityAdvisory.Summary,
|
|
Severity: node.SecurityAdvisory.Severity,
|
|
PublishedAt: node.SecurityAdvisory.PublishedAt,
|
|
UpdatedAt: node.SecurityAdvisory.UpdatedAt,
|
|
Permalink: node.SecurityAdvisory.Permalink,
|
|
VulnerableManifest: node.VulnerableManifestPath,
|
|
}
|
|
r.advisories <- adv
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) processResults() {
|
|
for finding := range r.results {
|
|
r.stats.mu.Lock()
|
|
r.stats.FindingsFound++
|
|
currentFindings := r.stats.FindingsFound
|
|
r.stats.mu.Unlock()
|
|
|
|
record := []string{
|
|
finding.Timestamp.Format(time.RFC3339),
|
|
finding.Repository,
|
|
finding.FilePath,
|
|
fmt.Sprintf("%d", finding.LineNumber),
|
|
finding.SecretType,
|
|
finding.SecretValue,
|
|
finding.URL,
|
|
finding.Severity,
|
|
}
|
|
r.csvWriter.Write(record)
|
|
r.csvWriter.Flush()
|
|
|
|
jsonData, _ := json.Marshal(finding)
|
|
r.jsonFile.Write(append(jsonData, '\n'))
|
|
r.jsonFile.Sync()
|
|
|
|
if *verbose {
|
|
fmt.Printf("\n[SECRET] #%d: %s [%s] in %s - %s\n", currentFindings, finding.SecretType, finding.Severity, finding.Repository, finding.FilePath)
|
|
} else {
|
|
fmt.Printf(".")
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) processAdvisories() {
|
|
for advisory := range r.advisories {
|
|
r.stats.mu.Lock()
|
|
r.stats.AdvisoriesFound++
|
|
r.stats.mu.Unlock()
|
|
|
|
jsonData, _ := json.Marshal(advisory)
|
|
r.advFile.Write(append(jsonData, '\n'))
|
|
r.advFile.Sync()
|
|
|
|
if *verbose {
|
|
fmt.Printf("\n[ADVISORY] %s [%s] in %s - %s\n", advisory.GHSAID, advisory.Severity, advisory.Repository, advisory.Summary)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) buildSearchQuery() string {
|
|
query := "a is:public"
|
|
if *sinceDays > 0 {
|
|
since := time.Now().AddDate(0, 0, -*sinceDays).Format("2006-01-02")
|
|
query += fmt.Sprintf(" pushed:>%s", since)
|
|
}
|
|
return query
|
|
}
|
|
|
|
func (r *Reaper) saveScannedList() {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
file, err := os.Create(filepath.Join(*outputDir, "scanned_repos.txt"))
|
|
if err != nil {
|
|
return
|
|
}
|
|
defer file.Close()
|
|
for repo := range r.scannedRepos {
|
|
file.WriteString(repo + "\n")
|
|
}
|
|
}
|
|
|
|
func (r *Reaper) printFinalStats() {
|
|
duration := time.Since(r.stats.StartTime)
|
|
fmt.Println("\n" + strings.Repeat("=", 60))
|
|
fmt.Println("REAPER FINAL STATISTICS")
|
|
fmt.Println(strings.Repeat("=", 60))
|
|
fmt.Printf("Total runtime: %s\n", duration.Round(time.Second))
|
|
fmt.Printf("Repositories: %d\n", r.stats.ReposScanned)
|
|
fmt.Printf("Files scanned: %d\n", r.stats.FilesScanned)
|
|
fmt.Printf("Secrets found: %d\n", r.stats.FindingsFound)
|
|
fmt.Printf("Advisories found: %d\n", r.stats.AdvisoriesFound)
|
|
fmt.Printf("Rate limit hits: %d\n", r.stats.RateLimitHits)
|
|
fmt.Printf("Output directory: %s\n", *outputDir)
|
|
fmt.Println(strings.Repeat("=", 60))
|
|
fmt.Println("\nDISCLAIMER: This tool is for educational and authorized testing only.")
|
|
fmt.Println("Use responsibly and in compliance with GitHub's Terms of Service.")
|
|
}
|
|
|
|
func getOwner(repoName string) string {
|
|
parts := strings.Split(repoName, "/")
|
|
if len(parts) > 0 {
|
|
return parts[0]
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func getRepoName(repoName string) string {
|
|
parts := strings.Split(repoName, "/")
|
|
if len(parts) > 1 {
|
|
return parts[1]
|
|
}
|
|
return repoName
|
|
}
|
|
|
|
func maskSecret(secret string) string {
|
|
if len(secret) <= 12 {
|
|
return "***MASKED***"
|
|
}
|
|
return secret[:6] + "..." + secret[len(secret)-6:]
|
|
}
|
|
|
|
func getContext(lines []string, lineNum, contextLines int) string {
|
|
start := lineNum - contextLines
|
|
if start < 0 {
|
|
start = 0
|
|
}
|
|
end := lineNum + contextLines + 1
|
|
if end > len(lines) {
|
|
end = len(lines)
|
|
}
|
|
return strings.Join(lines[start:end], "\n")
|
|
}
|
|
|
|
func hasHighEntropy(s string) bool {
|
|
if len(s) < 8 {
|
|
return false
|
|
}
|
|
freq := make(map[rune]float64)
|
|
for _, char := range s {
|
|
freq[char]++
|
|
}
|
|
var entropy float64
|
|
for _, f := range freq {
|
|
p := f / float64(len(s))
|
|
entropy -= p * math.Log2(p)
|
|
}
|
|
return entropy > 4.5
|
|
}
|