Upload files to "/"
This commit is contained in:
@@ -0,0 +1,984 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"encoding/csv"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-github/v61/github"
|
||||
"golang.org/x/oauth2"
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
// Configuration flags
|
||||
var (
|
||||
githubToken = flag.String("token", "", "GitHub Personal Access Token (or set GITHUB_TOKEN env)")
|
||||
outputDir = flag.String("output", "./output", "Output directory for findings")
|
||||
workers = flag.Int("workers", 20, "Number of concurrent workers")
|
||||
minStars = flag.Int("min-stars", 0, "Minimum stars filter")
|
||||
sinceDays = flag.Int("since-days", 7, "Only scan repos updated in last X days (repeats)")
|
||||
verbose = flag.Bool("verbose", false, "Verbose output")
|
||||
scanPRs = flag.Bool("scan-prs", true, "Scan pull requests")
|
||||
scanIssues = flag.Bool("scan-issues", true, "Scan issues and comments")
|
||||
scanCommits = flag.Bool("scan-commits", true, "Scan commit history")
|
||||
entropyCheck = flag.Bool("entropy", true, "Enable entropy checking")
|
||||
continuous = flag.Bool("continuous", true, "Run continuously (loop forever)")
|
||||
sleepMinutes = flag.Int("sleep-minutes", 60, "Sleep between continuous cycles")
|
||||
scanAdvisories = flag.Bool("scan-advisories", false, "Check repository for GitHub Security Advisories (requires additional API calls)")
|
||||
hideObfuscated = flag.Bool("hide-obfuscated", true, "Hide obfuscated emails (e.g., user[at]example[dot]com)")
|
||||
|
||||
// Single/multiple repo scanning
|
||||
repoURLs = flag.String("repo", "", "Single repository URL to scan (can be used multiple times)")
|
||||
repoListFile = flag.String("repo-list", "", "File containing repository URLs (one per line)")
|
||||
)
|
||||
|
||||
// Core types
|
||||
type Reaper struct {
|
||||
client *github.Client
|
||||
patterns []*SecretPattern
|
||||
results chan *Finding
|
||||
advisories chan *Advisory
|
||||
limiter *rate.Limiter
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
wg sync.WaitGroup
|
||||
stats *ScanStats
|
||||
csvWriter *csv.Writer
|
||||
jsonFile *os.File
|
||||
advFile *os.File
|
||||
mu sync.Mutex
|
||||
scannedRepos map[string]bool
|
||||
emailCache map[string]map[string]bool // repo -> email -> found
|
||||
}
|
||||
|
||||
type SecretPattern struct {
|
||||
Name string
|
||||
Regex *regexp.Regexp
|
||||
Severity string
|
||||
Entropy bool
|
||||
}
|
||||
|
||||
type Finding struct {
|
||||
ID string `json:"id"`
|
||||
Repository string `json:"repository"`
|
||||
FilePath string `json:"file_path"`
|
||||
LineNumber int `json:"line_number"`
|
||||
SecretType string `json:"secret_type"`
|
||||
SecretValue string `json:"secret_value"`
|
||||
Context string `json:"context"`
|
||||
URL string `json:"url"`
|
||||
Branch string `json:"branch"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Severity string `json:"severity"`
|
||||
}
|
||||
|
||||
type Advisory struct {
|
||||
ID string `json:"id"`
|
||||
Repository string `json:"repository"`
|
||||
GHSAID string `json:"ghsa_id"`
|
||||
CVEID string `json:"cve_id,omitempty"`
|
||||
Summary string `json:"summary"`
|
||||
Severity string `json:"severity"`
|
||||
PublishedAt time.Time `json:"published_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
Permalink string `json:"permalink"`
|
||||
VulnerableManifest string `json:"vulnerable_manifest,omitempty"`
|
||||
}
|
||||
|
||||
type ScanStats struct {
|
||||
ReposScanned int
|
||||
FilesScanned int
|
||||
FindingsFound int
|
||||
AdvisoriesFound int
|
||||
StartTime time.Time
|
||||
CycleStart time.Time
|
||||
RateLimitHits int
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
token := *githubToken
|
||||
if token == "" {
|
||||
token = os.Getenv("GITHUB_TOKEN")
|
||||
}
|
||||
if token == "" {
|
||||
log.Fatal("GitHub token required. Use -token flag or GITHUB_TOKEN env variable")
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(*outputDir, 0755); err != nil {
|
||||
log.Fatalf("Failed to create output directory: %v", err)
|
||||
}
|
||||
|
||||
reaper := NewReaper(token)
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
go func() {
|
||||
<-sigChan
|
||||
fmt.Println("\nShutting down REAPER gracefully...")
|
||||
reaper.cancel()
|
||||
}()
|
||||
|
||||
fmt.Println(`
|
||||
============================================================
|
||||
██████╗ ███████╗ █████╗ ██████╗ ███████╗██████╗
|
||||
██╔══██╗██╔════╝██╔══██╗██╔══██╗██╔════╝██╔══██╗
|
||||
██████╔╝█████╗ ███████║██████╔╝█████╗ ██████╔╝
|
||||
██╔══██╗██╔══╝ ██╔══██║██╔═══╝ ██╔══╝ ██╔══██╗
|
||||
██║ ██║███████╗██║ ██║██║ ███████╗██║ ██║
|
||||
╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ ╚══════╝╚═╝ ╚═╝
|
||||
============================================================
|
||||
REAPER - GitHub Secret Harvester & Advisory Scanner
|
||||
============================================================
|
||||
`)
|
||||
|
||||
// Determine mode: single-shot or continuous
|
||||
targetRepos := getTargetRepos()
|
||||
if len(targetRepos) > 0 {
|
||||
fmt.Printf("[!] Single-shot mode: scanning %d specific repository(s)\n", len(targetRepos))
|
||||
fmt.Printf("[+] Output directory: %s\n", *outputDir)
|
||||
fmt.Printf("[+] Scan commits: %v\n", *scanCommits)
|
||||
fmt.Printf("[+] Scan advisories: %v\n", *scanAdvisories)
|
||||
fmt.Printf("[+] Hide obfuscated emails: %v\n", *hideObfuscated)
|
||||
|
||||
for _, repoURL := range targetRepos {
|
||||
repo, err := reaper.getRepoFromURL(repoURL)
|
||||
if err != nil {
|
||||
log.Printf("Failed to parse %s: %v", repoURL, err)
|
||||
continue
|
||||
}
|
||||
reaper.scanRepository(repo)
|
||||
reaper.stats.mu.Lock()
|
||||
reaper.stats.ReposScanned++
|
||||
reaper.stats.mu.Unlock()
|
||||
}
|
||||
reaper.printFinalStats()
|
||||
return
|
||||
}
|
||||
|
||||
// Normal continuous mode
|
||||
fmt.Printf("[+] Starting REAPER with %d workers\n", *workers)
|
||||
fmt.Printf("[+] Output directory: %s\n", *outputDir)
|
||||
fmt.Printf("[+] Continuous mode: %v (sleep %d min between cycles)\n", *continuous, *sleepMinutes)
|
||||
fmt.Printf("[+] Scanning repos updated in last %d days\n", *sinceDays)
|
||||
fmt.Printf("[+] Scan commits: %v\n", *scanCommits)
|
||||
fmt.Printf("[+] Scan advisories: %v\n", *scanAdvisories)
|
||||
fmt.Printf("[+] Hide obfuscated emails: %v\n", *hideObfuscated)
|
||||
|
||||
reaper.RunForever()
|
||||
}
|
||||
|
||||
func getTargetRepos() []string {
|
||||
var repos []string
|
||||
|
||||
flag.Visit(func(f *flag.Flag) {
|
||||
if f.Name == "repo" && f.Value.String() != "" {
|
||||
repos = append(repos, f.Value.String())
|
||||
}
|
||||
})
|
||||
|
||||
if *repoListFile != "" {
|
||||
file, err := os.Open(*repoListFile)
|
||||
if err == nil {
|
||||
defer file.Close()
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line != "" && !strings.HasPrefix(line, "#") {
|
||||
repos = append(repos, line)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Printf("Warning: could not open repo list file %s: %v", *repoListFile, err)
|
||||
}
|
||||
}
|
||||
|
||||
return repos
|
||||
}
|
||||
|
||||
func (r *Reaper) getRepoFromURL(rawURL string) (*github.Repository, error) {
|
||||
rawURL = strings.TrimSuffix(rawURL, ".git")
|
||||
parts := strings.Split(strings.TrimPrefix(rawURL, "https://github.com/"), "/")
|
||||
if len(parts) < 2 {
|
||||
return nil, fmt.Errorf("invalid repository URL: %s", rawURL)
|
||||
}
|
||||
owner, name := parts[0], parts[1]
|
||||
|
||||
repo, _, err := r.client.Repositories.Get(r.ctx, owner, name)
|
||||
return repo, err
|
||||
}
|
||||
|
||||
func NewReaper(token string) *Reaper {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
ts := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token})
|
||||
tc := oauth2.NewClient(ctx, ts)
|
||||
client := github.NewClient(tc)
|
||||
|
||||
jsonFile, _ := os.OpenFile(filepath.Join(*outputDir, "reaper_findings.jsonl"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
advFile, _ := os.OpenFile(filepath.Join(*outputDir, "advisories.jsonl"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
|
||||
timestamp := time.Now().Format("20060102_150405")
|
||||
csvFile, _ := os.Create(filepath.Join(*outputDir, fmt.Sprintf("reaper_findings_%s.csv", timestamp)))
|
||||
csvWriter := csv.NewWriter(csvFile)
|
||||
csvWriter.Write([]string{"Timestamp", "Repository", "File", "Line", "Type", "Secret", "URL", "Severity"})
|
||||
|
||||
scannedRepos := make(map[string]bool)
|
||||
scannedFile, err := os.OpenFile(filepath.Join(*outputDir, "scanned_repos.txt"), os.O_RDWR|os.O_CREATE, 0644)
|
||||
if err == nil {
|
||||
scanner := bufio.NewScanner(scannedFile)
|
||||
for scanner.Scan() {
|
||||
scannedRepos[scanner.Text()] = true
|
||||
}
|
||||
scannedFile.Close()
|
||||
}
|
||||
|
||||
reaper := &Reaper{
|
||||
client: client,
|
||||
patterns: GetAllPatterns(),
|
||||
results: make(chan *Finding, 10000),
|
||||
advisories: make(chan *Advisory, 1000),
|
||||
limiter: rate.NewLimiter(rate.Limit(30), 100),
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
stats: &ScanStats{StartTime: time.Now()},
|
||||
csvWriter: csvWriter,
|
||||
jsonFile: jsonFile,
|
||||
advFile: advFile,
|
||||
scannedRepos: scannedRepos,
|
||||
emailCache: make(map[string]map[string]bool),
|
||||
}
|
||||
|
||||
go reaper.processResults()
|
||||
go reaper.processAdvisories()
|
||||
|
||||
return reaper
|
||||
}
|
||||
|
||||
// isObfuscatedEmail checks if an email address is obfuscated
|
||||
func isObfuscatedEmail(email string) bool {
|
||||
obfuscatedPatterns := []string{
|
||||
"[at]", "[@]", "{at}", "{@}",
|
||||
" at ", "(at)", "[dot]", "{dot}", "(dot)",
|
||||
" dot ", " DOT ", " AT ",
|
||||
" user@", "@domain", "example.com",
|
||||
"replace@", "change@", "obfuscated",
|
||||
"noreply", "no-reply", "do-not-reply",
|
||||
}
|
||||
emailLower := strings.ToLower(email)
|
||||
for _, pattern := range obfuscatedPatterns {
|
||||
if strings.Contains(emailLower, pattern) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
// Check for invalid domain patterns
|
||||
if strings.Contains(email, "@[") && strings.Contains(email, "]") {
|
||||
return true
|
||||
}
|
||||
if strings.Contains(email, "@(") && strings.Contains(email, ")") {
|
||||
return true
|
||||
}
|
||||
// Check for placeholder domains
|
||||
placeholderDomains := []string{"example.com", "domain.com", "test.com", "localhost", "invalid"}
|
||||
for _, domain := range placeholderDomains {
|
||||
if strings.Contains(emailLower, "@"+domain) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isGitHubNoReply checks for GitHub's no-reply emails
|
||||
func isGitHubNoReply(email string) bool {
|
||||
return strings.Contains(email, "noreply.github.com") || strings.Contains(email, "users.noreply.github.com")
|
||||
}
|
||||
|
||||
func (r *Reaper) isDuplicateEmail(repoName, email string) bool {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
if _, exists := r.emailCache[repoName]; !exists {
|
||||
r.emailCache[repoName] = make(map[string]bool)
|
||||
}
|
||||
|
||||
if r.emailCache[repoName][email] {
|
||||
return true
|
||||
}
|
||||
|
||||
r.emailCache[repoName][email] = true
|
||||
return false
|
||||
}
|
||||
|
||||
func GetAllPatterns() []*SecretPattern {
|
||||
return []*SecretPattern{
|
||||
{Name: "AWS Access Key", Regex: regexp.MustCompile(`AKIA[0-9A-Z]{16}`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "AWS Secret Key", Regex: regexp.MustCompile(`(?i)(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=\s]+['"]?([A-Za-z0-9/+=]{40})['"]?`), Severity: "CRITICAL", Entropy: true},
|
||||
{Name: "Google API Key", Regex: regexp.MustCompile(`AIza[0-9A-Za-z\-_]{35}`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "GitHub Token", Regex: regexp.MustCompile(`ghp_[A-Za-z0-9]{36}|github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "Slack Token", Regex: regexp.MustCompile(`xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}`), Severity: "HIGH", Entropy: false},
|
||||
{Name: "Discord Bot Token", Regex: regexp.MustCompile(`[MNO][a-zA-Z\d_-]{23,25}\.[a-zA-Z\d_-]{6}\.[a-zA-Z\d_-]{27}`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "Stripe Secret Key", Regex: regexp.MustCompile(`sk_live_[A-Za-z0-9]{24}`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "Stripe Publishable Key", Regex: regexp.MustCompile(`pk_live_[A-Za-z0-9]{24}`), Severity: "HIGH", Entropy: false},
|
||||
{Name: "JWT Token", Regex: regexp.MustCompile(`eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_=]+\.?[A-Za-z0-9-_.+/=]*`), Severity: "HIGH", Entropy: true},
|
||||
{Name: "PostgreSQL URL", Regex: regexp.MustCompile(`postgresql://[^/\s]+:[^/\s]+@[^/\s]+/\w+`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "MySQL URL", Regex: regexp.MustCompile(`mysql://[^/\s]+:[^/\s]+@[^/\s]+/\w+`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "MongoDB URL", Regex: regexp.MustCompile(`mongodb(?:\+srv)?://[^/\s]+:[^/\s]+@[^/\s]+/\w+`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "Redis URL", Regex: regexp.MustCompile(`redis://(?:[^:@]+:[^@]+@)?[^:]+:[0-9]+`), Severity: "HIGH", Entropy: false},
|
||||
{Name: "RSA Private Key", Regex: regexp.MustCompile(`-----BEGIN RSA PRIVATE KEY-----`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "SSH Private Key", Regex: regexp.MustCompile(`-----BEGIN OPENSSH PRIVATE KEY-----`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "EC Private Key", Regex: regexp.MustCompile(`-----BEGIN EC PRIVATE KEY-----`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "Email Address", Regex: regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`), Severity: "MEDIUM", Entropy: false},
|
||||
{Name: "Generic Password", Regex: regexp.MustCompile(`(?i)(?:password|passwd|pwd)\s*[:=\s]+['"]?([^'"\s]{8,50})['"]?`), Severity: "HIGH", Entropy: false},
|
||||
{Name: "Generic API Key", Regex: regexp.MustCompile(`(?i)(?:api[_-]?key|apikey|api_token|token)\s*[:=\s]+['"]?([A-Za-z0-9]{20,50})['"]?`), Severity: "HIGH", Entropy: true},
|
||||
{Name: "Azure Connection String", Regex: regexp.MustCompile(`DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[^;]+`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "Twilio API Key", Regex: regexp.MustCompile(`SK[0-9a-fA-F]{32}`), Severity: "HIGH", Entropy: false},
|
||||
{Name: "SendGrid API Key", Regex: regexp.MustCompile(`SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}`), Severity: "HIGH", Entropy: false},
|
||||
{Name: "Heroku API Key", Regex: regexp.MustCompile(`[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`), Severity: "HIGH", Entropy: true},
|
||||
{Name: "OpenAI API Key", Regex: regexp.MustCompile(`sk-[A-Za-z0-9]{48}`), Severity: "CRITICAL", Entropy: true},
|
||||
{Name: "Telegram Bot Token", Regex: regexp.MustCompile(`[0-9]{8,10}:[A-Za-z0-9_-]{35}`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "GitHub App Token", Regex: regexp.MustCompile(`ghu_[A-Za-z0-9]{36}`), Severity: "CRITICAL", Entropy: false},
|
||||
{Name: "GitLab Token", Regex: regexp.MustCompile(`glpat-[A-Za-z0-9-_]{20}`), Severity: "HIGH", Entropy: false},
|
||||
{Name: "Docker Hub Token", Regex: regexp.MustCompile(`dckr_pat_[A-Za-z0-9-_]{32}`), Severity: "HIGH", Entropy: false},
|
||||
{Name: "NPM Token", Regex: regexp.MustCompile(`npm_[A-Za-z0-9]{36}`), Severity: "HIGH", Entropy: false},
|
||||
{Name: "Pulumi API Key", Regex: regexp.MustCompile(`pul-[a-f0-9]{40}`), Severity: "HIGH", Entropy: false},
|
||||
{Name: "DigitalOcean Token", Regex: regexp.MustCompile(`dops_v1_[a-zA-Z0-9]{64}`), Severity: "HIGH", Entropy: false},
|
||||
{Name: "Alibaba Cloud Key", Regex: regexp.MustCompile(`LTAI[A-Za-z0-9]{16,20}`), Severity: "HIGH", Entropy: false},
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) RunForever() {
|
||||
for {
|
||||
select {
|
||||
case <-r.ctx.Done():
|
||||
r.printFinalStats()
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
fmt.Printf("\nStarting new scan cycle at %s\n", time.Now().Format("15:04:05"))
|
||||
r.stats.CycleStart = time.Now()
|
||||
|
||||
if err := r.scanCycle(); err != nil {
|
||||
log.Printf("Scan cycle error: %v", err)
|
||||
}
|
||||
|
||||
cycleDuration := time.Since(r.stats.CycleStart)
|
||||
r.stats.mu.Lock()
|
||||
fmt.Printf("\nCycle complete: %d repos, %d findings, %d advisories in %s\n",
|
||||
r.stats.ReposScanned, r.stats.FindingsFound, r.stats.AdvisoriesFound, cycleDuration.Round(time.Second))
|
||||
r.stats.mu.Unlock()
|
||||
|
||||
if !*continuous {
|
||||
break
|
||||
}
|
||||
|
||||
fmt.Printf("Sleeping for %d minutes before next cycle...\n", *sleepMinutes)
|
||||
sleepTimer := time.NewTimer(time.Duration(*sleepMinutes) * time.Minute)
|
||||
select {
|
||||
case <-sleepTimer.C:
|
||||
case <-r.ctx.Done():
|
||||
sleepTimer.Stop()
|
||||
return
|
||||
}
|
||||
}
|
||||
r.printFinalStats()
|
||||
}
|
||||
|
||||
func (r *Reaper) scanCycle() error {
|
||||
query := r.buildSearchQuery()
|
||||
fmt.Printf("[+] Search query: %s\n", query)
|
||||
|
||||
opts := &github.SearchOptions{
|
||||
Sort: "updated",
|
||||
Order: "desc",
|
||||
ListOptions: github.ListOptions{PerPage: 100},
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-r.ctx.Done():
|
||||
return nil
|
||||
default:
|
||||
}
|
||||
|
||||
if err := r.limiter.Wait(r.ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
result, resp, err := r.client.Search.Repositories(r.ctx, query, opts)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "rate limit") {
|
||||
r.stats.RateLimitHits++
|
||||
time.Sleep(60 * time.Second)
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("search failed: %w", err)
|
||||
}
|
||||
|
||||
if len(result.Repositories) == 0 {
|
||||
fmt.Println("[+] No new repositories found in this cycle.")
|
||||
break
|
||||
}
|
||||
|
||||
fmt.Printf("[+] Found %d repositories (page %d)\n", len(result.Repositories), opts.Page)
|
||||
|
||||
repoChan := make(chan *github.Repository, len(result.Repositories))
|
||||
var workerWg sync.WaitGroup
|
||||
|
||||
for i := 0; i < *workers; i++ {
|
||||
workerWg.Add(1)
|
||||
go r.repoWorker(repoChan, &workerWg)
|
||||
}
|
||||
|
||||
newRepos := 0
|
||||
for _, repo := range result.Repositories {
|
||||
if repo.GetPrivate() {
|
||||
continue
|
||||
}
|
||||
if *minStars > 0 && repo.GetStargazersCount() < *minStars {
|
||||
continue
|
||||
}
|
||||
|
||||
r.mu.Lock()
|
||||
if r.scannedRepos[repo.GetFullName()] {
|
||||
r.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
r.scannedRepos[repo.GetFullName()] = true
|
||||
r.mu.Unlock()
|
||||
|
||||
repoChan <- repo
|
||||
newRepos++
|
||||
}
|
||||
|
||||
close(repoChan)
|
||||
workerWg.Wait()
|
||||
|
||||
fmt.Printf("[+] Cycle progress: %d new repos scanned, total findings: %d, total advisories: %d\n",
|
||||
newRepos, r.stats.FindingsFound, r.stats.AdvisoriesFound)
|
||||
|
||||
if resp.NextPage == 0 {
|
||||
break
|
||||
}
|
||||
opts.Page = resp.NextPage
|
||||
}
|
||||
|
||||
r.saveScannedList()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *Reaper) repoWorker(repoChan <-chan *github.Repository, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
for repo := range repoChan {
|
||||
select {
|
||||
case <-r.ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
r.scanRepository(repo)
|
||||
r.stats.mu.Lock()
|
||||
r.stats.ReposScanned++
|
||||
r.stats.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) scanRepository(repo *github.Repository) {
|
||||
repoName := repo.GetFullName()
|
||||
if repo.GetArchived() {
|
||||
return
|
||||
}
|
||||
|
||||
defaultBranch := repo.GetDefaultBranch()
|
||||
r.scanBranch(repoName, defaultBranch)
|
||||
|
||||
if *scanPRs {
|
||||
r.scanPullRequests(repoName)
|
||||
}
|
||||
if *scanIssues {
|
||||
r.scanIssues(repoName)
|
||||
}
|
||||
if *scanCommits {
|
||||
r.scanCommitHistory(repoName, defaultBranch)
|
||||
}
|
||||
if *scanAdvisories {
|
||||
r.fetchAdvisories(repoName, getOwner(repoName), getRepoName(repoName))
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) scanBranch(repoName, branch string) {
|
||||
opts := &github.RepositoryContentGetOptions{Ref: branch}
|
||||
_, contents, _, err := r.client.Repositories.GetContents(
|
||||
r.ctx, getOwner(repoName), getRepoName(repoName), "/", opts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
r.processContents(repoName, branch, contents, opts.Ref)
|
||||
}
|
||||
|
||||
func (r *Reaper) processContents(repoName, branch string, contents []*github.RepositoryContent, ref string) {
|
||||
for _, content := range contents {
|
||||
select {
|
||||
case <-r.ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
if content == nil {
|
||||
continue
|
||||
}
|
||||
if *content.Type == "dir" {
|
||||
_, dirContents, _, err := r.client.Repositories.GetContents(
|
||||
r.ctx, getOwner(repoName), getRepoName(repoName), content.GetPath(),
|
||||
&github.RepositoryContentGetOptions{Ref: ref})
|
||||
if err == nil {
|
||||
r.processContents(repoName, branch, dirContents, ref)
|
||||
}
|
||||
} else if *content.Type == "file" {
|
||||
r.scanFile(repoName, branch, content)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) scanFile(repoName, branch string, file *github.RepositoryContent) {
|
||||
ext := strings.ToLower(filepath.Ext(file.GetName()))
|
||||
skipExts := map[string]bool{
|
||||
".jpg": true, ".jpeg": true, ".png": true, ".gif": true, ".ico": true,
|
||||
".mp4": true, ".mp3": true, ".zip": true, ".tar": true, ".gz": true,
|
||||
".exe": true, ".dll": true, ".so": true, ".bin": true,
|
||||
}
|
||||
if skipExts[ext] {
|
||||
return
|
||||
}
|
||||
|
||||
content, err := file.GetContent()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
decoded, err := base64.StdEncoding.DecodeString(content)
|
||||
if err != nil {
|
||||
decoded = []byte(content)
|
||||
}
|
||||
|
||||
r.stats.mu.Lock()
|
||||
r.stats.FilesScanned++
|
||||
r.stats.mu.Unlock()
|
||||
|
||||
lines := strings.Split(string(decoded), "\n")
|
||||
for i, line := range lines {
|
||||
for _, pattern := range r.patterns {
|
||||
matches := pattern.Regex.FindAllStringSubmatch(line, -1)
|
||||
if len(matches) == 0 {
|
||||
continue
|
||||
}
|
||||
for _, match := range matches {
|
||||
secret := match[0]
|
||||
if len(match) > 1 && match[1] != "" {
|
||||
secret = match[1]
|
||||
}
|
||||
if *entropyCheck && pattern.Entropy && !hasHighEntropy(secret) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Special handling for Email Addresses
|
||||
if pattern.Name == "Email Address" {
|
||||
// Skip obfuscated emails if flag is set
|
||||
if *hideObfuscated && isObfuscatedEmail(secret) {
|
||||
continue
|
||||
}
|
||||
// Skip GitHub no-reply emails
|
||||
if isGitHubNoReply(secret) {
|
||||
continue
|
||||
}
|
||||
// Deduplicate emails per repository
|
||||
if r.isDuplicateEmail(repoName, secret) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
hash := sha256.Sum256([]byte(repoName + file.GetPath() + secret))
|
||||
id := hex.EncodeToString(hash[:])[:16]
|
||||
|
||||
finding := &Finding{
|
||||
ID: id,
|
||||
Repository: repoName,
|
||||
FilePath: file.GetPath(),
|
||||
LineNumber: i + 1,
|
||||
SecretType: pattern.Name,
|
||||
SecretValue: maskSecret(secret),
|
||||
Context: getContext(lines, i, 2),
|
||||
URL: file.GetHTMLURL(),
|
||||
Branch: branch,
|
||||
Timestamp: time.Now(),
|
||||
Severity: pattern.Severity,
|
||||
}
|
||||
r.results <- finding
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) scanPullRequests(repoName string) {
|
||||
opts := &github.PullRequestListOptions{State: "all", ListOptions: github.ListOptions{PerPage: 50}}
|
||||
for {
|
||||
select {
|
||||
case <-r.ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
if err := r.limiter.Wait(r.ctx); err != nil {
|
||||
return
|
||||
}
|
||||
prs, resp, err := r.client.PullRequests.List(r.ctx, getOwner(repoName), getRepoName(repoName), opts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for _, pr := range prs {
|
||||
r.scanText(repoName, "pull_request", pr.GetTitle(), pr.GetHTMLURL())
|
||||
r.scanText(repoName, "pull_request", pr.GetBody(), pr.GetHTMLURL())
|
||||
}
|
||||
if resp.NextPage == 0 {
|
||||
break
|
||||
}
|
||||
opts.Page = resp.NextPage
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) scanIssues(repoName string) {
|
||||
opts := &github.IssueListByRepoOptions{State: "all", ListOptions: github.ListOptions{PerPage: 50}}
|
||||
for {
|
||||
select {
|
||||
case <-r.ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
if err := r.limiter.Wait(r.ctx); err != nil {
|
||||
return
|
||||
}
|
||||
issues, resp, err := r.client.Issues.ListByRepo(r.ctx, getOwner(repoName), getRepoName(repoName), opts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for _, issue := range issues {
|
||||
r.scanText(repoName, "issue", issue.GetTitle(), issue.GetHTMLURL())
|
||||
r.scanText(repoName, "issue", issue.GetBody(), issue.GetHTMLURL())
|
||||
}
|
||||
if resp.NextPage == 0 {
|
||||
break
|
||||
}
|
||||
opts.Page = resp.NextPage
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) scanCommitHistory(repoName, branch string) {
|
||||
opts := &github.CommitsListOptions{
|
||||
SHA: branch,
|
||||
ListOptions: github.ListOptions{PerPage: 100},
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-r.ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
if err := r.limiter.Wait(r.ctx); err != nil {
|
||||
return
|
||||
}
|
||||
commits, resp, err := r.client.Repositories.ListCommits(r.ctx, getOwner(repoName), getRepoName(repoName), opts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for _, commit := range commits {
|
||||
if commit.Commit != nil {
|
||||
r.scanText(repoName, "commit_message", commit.Commit.GetMessage(), commit.GetHTMLURL())
|
||||
}
|
||||
}
|
||||
if resp.NextPage == 0 {
|
||||
break
|
||||
}
|
||||
opts.Page = resp.NextPage
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) scanText(repoName, location, text, url string) {
|
||||
if text == "" {
|
||||
return
|
||||
}
|
||||
for _, pattern := range r.patterns {
|
||||
matches := pattern.Regex.FindAllStringSubmatch(text, -1)
|
||||
for _, match := range matches {
|
||||
secret := match[0]
|
||||
if len(match) > 1 && match[1] != "" {
|
||||
secret = match[1]
|
||||
}
|
||||
if *entropyCheck && pattern.Entropy && !hasHighEntropy(secret) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Special handling for Email Addresses
|
||||
if pattern.Name == "Email Address" {
|
||||
if *hideObfuscated && isObfuscatedEmail(secret) {
|
||||
continue
|
||||
}
|
||||
if isGitHubNoReply(secret) {
|
||||
continue
|
||||
}
|
||||
if r.isDuplicateEmail(repoName, secret) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
hash := sha256.Sum256([]byte(repoName + location + secret))
|
||||
id := hex.EncodeToString(hash[:])[:16]
|
||||
finding := &Finding{
|
||||
ID: id,
|
||||
Repository: repoName,
|
||||
FilePath: location,
|
||||
SecretType: pattern.Name,
|
||||
SecretValue: maskSecret(secret),
|
||||
Context: text,
|
||||
URL: url,
|
||||
Timestamp: time.Now(),
|
||||
Severity: pattern.Severity,
|
||||
}
|
||||
r.results <- finding
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) fetchAdvisories(repoName, owner, repo string) {
|
||||
query := fmt.Sprintf(`{
|
||||
repository(owner: "%s", name: "%s") {
|
||||
vulnerabilityAlerts(first: 100) {
|
||||
nodes {
|
||||
securityAdvisory {
|
||||
ghsaId
|
||||
cveId
|
||||
summary
|
||||
severity
|
||||
publishedAt
|
||||
updatedAt
|
||||
permalink
|
||||
}
|
||||
vulnerableManifestPath
|
||||
}
|
||||
}
|
||||
}
|
||||
}`, owner, repo)
|
||||
|
||||
body := struct {
|
||||
Query string `json:"query"`
|
||||
}{Query: query}
|
||||
jsonBody, _ := json.Marshal(body)
|
||||
|
||||
req, err := http.NewRequestWithContext(r.ctx, "POST", "https://api.github.com/graphql", bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+os.Getenv("GITHUB_TOKEN"))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var result struct {
|
||||
Data struct {
|
||||
Repository struct {
|
||||
VulnerabilityAlerts struct {
|
||||
Nodes []struct {
|
||||
SecurityAdvisory struct {
|
||||
GhsaId string `json:"ghsaId"`
|
||||
CveId string `json:"cveId"`
|
||||
Summary string `json:"summary"`
|
||||
Severity string `json:"severity"`
|
||||
PublishedAt time.Time `json:"publishedAt"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
Permalink string `json:"permalink"`
|
||||
} `json:"securityAdvisory"`
|
||||
VulnerableManifestPath string `json:"vulnerableManifestPath"`
|
||||
} `json:"nodes"`
|
||||
} `json:"vulnerabilityAlerts"`
|
||||
} `json:"repository"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, node := range result.Data.Repository.VulnerabilityAlerts.Nodes {
|
||||
adv := &Advisory{
|
||||
Repository: repoName,
|
||||
GHSAID: node.SecurityAdvisory.GhsaId,
|
||||
CVEID: node.SecurityAdvisory.CveId,
|
||||
Summary: node.SecurityAdvisory.Summary,
|
||||
Severity: node.SecurityAdvisory.Severity,
|
||||
PublishedAt: node.SecurityAdvisory.PublishedAt,
|
||||
UpdatedAt: node.SecurityAdvisory.UpdatedAt,
|
||||
Permalink: node.SecurityAdvisory.Permalink,
|
||||
VulnerableManifest: node.VulnerableManifestPath,
|
||||
}
|
||||
r.advisories <- adv
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) processResults() {
|
||||
for finding := range r.results {
|
||||
r.stats.mu.Lock()
|
||||
r.stats.FindingsFound++
|
||||
currentFindings := r.stats.FindingsFound
|
||||
r.stats.mu.Unlock()
|
||||
|
||||
record := []string{
|
||||
finding.Timestamp.Format(time.RFC3339),
|
||||
finding.Repository,
|
||||
finding.FilePath,
|
||||
fmt.Sprintf("%d", finding.LineNumber),
|
||||
finding.SecretType,
|
||||
finding.SecretValue,
|
||||
finding.URL,
|
||||
finding.Severity,
|
||||
}
|
||||
r.csvWriter.Write(record)
|
||||
r.csvWriter.Flush()
|
||||
|
||||
jsonData, _ := json.Marshal(finding)
|
||||
r.jsonFile.Write(append(jsonData, '\n'))
|
||||
r.jsonFile.Sync()
|
||||
|
||||
if *verbose {
|
||||
fmt.Printf("\n[SECRET] #%d: %s [%s] in %s - %s\n", currentFindings, finding.SecretType, finding.Severity, finding.Repository, finding.FilePath)
|
||||
} else {
|
||||
fmt.Printf(".")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) processAdvisories() {
|
||||
for advisory := range r.advisories {
|
||||
r.stats.mu.Lock()
|
||||
r.stats.AdvisoriesFound++
|
||||
r.stats.mu.Unlock()
|
||||
|
||||
jsonData, _ := json.Marshal(advisory)
|
||||
r.advFile.Write(append(jsonData, '\n'))
|
||||
r.advFile.Sync()
|
||||
|
||||
if *verbose {
|
||||
fmt.Printf("\n[ADVISORY] %s [%s] in %s - %s\n", advisory.GHSAID, advisory.Severity, advisory.Repository, advisory.Summary)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) buildSearchQuery() string {
|
||||
query := "a is:public"
|
||||
if *sinceDays > 0 {
|
||||
since := time.Now().AddDate(0, 0, -*sinceDays).Format("2006-01-02")
|
||||
query += fmt.Sprintf(" pushed:>%s", since)
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
func (r *Reaper) saveScannedList() {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
file, err := os.Create(filepath.Join(*outputDir, "scanned_repos.txt"))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
for repo := range r.scannedRepos {
|
||||
file.WriteString(repo + "\n")
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reaper) printFinalStats() {
|
||||
duration := time.Since(r.stats.StartTime)
|
||||
fmt.Println("\n" + strings.Repeat("=", 60))
|
||||
fmt.Println("REAPER FINAL STATISTICS")
|
||||
fmt.Println(strings.Repeat("=", 60))
|
||||
fmt.Printf("Total runtime: %s\n", duration.Round(time.Second))
|
||||
fmt.Printf("Repositories: %d\n", r.stats.ReposScanned)
|
||||
fmt.Printf("Files scanned: %d\n", r.stats.FilesScanned)
|
||||
fmt.Printf("Secrets found: %d\n", r.stats.FindingsFound)
|
||||
fmt.Printf("Advisories found: %d\n", r.stats.AdvisoriesFound)
|
||||
fmt.Printf("Rate limit hits: %d\n", r.stats.RateLimitHits)
|
||||
fmt.Printf("Output directory: %s\n", *outputDir)
|
||||
fmt.Println(strings.Repeat("=", 60))
|
||||
fmt.Println("\nDISCLAIMER: This tool is for educational and authorized testing only.")
|
||||
fmt.Println("Use responsibly and in compliance with GitHub's Terms of Service.")
|
||||
}
|
||||
|
||||
func getOwner(repoName string) string {
|
||||
parts := strings.Split(repoName, "/")
|
||||
if len(parts) > 0 {
|
||||
return parts[0]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getRepoName(repoName string) string {
|
||||
parts := strings.Split(repoName, "/")
|
||||
if len(parts) > 1 {
|
||||
return parts[1]
|
||||
}
|
||||
return repoName
|
||||
}
|
||||
|
||||
func maskSecret(secret string) string {
|
||||
if len(secret) <= 12 {
|
||||
return "***MASKED***"
|
||||
}
|
||||
return secret[:6] + "..." + secret[len(secret)-6:]
|
||||
}
|
||||
|
||||
func getContext(lines []string, lineNum, contextLines int) string {
|
||||
start := lineNum - contextLines
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
end := lineNum + contextLines + 1
|
||||
if end > len(lines) {
|
||||
end = len(lines)
|
||||
}
|
||||
return strings.Join(lines[start:end], "\n")
|
||||
}
|
||||
|
||||
func hasHighEntropy(s string) bool {
|
||||
if len(s) < 8 {
|
||||
return false
|
||||
}
|
||||
freq := make(map[rune]float64)
|
||||
for _, char := range s {
|
||||
freq[char]++
|
||||
}
|
||||
var entropy float64
|
||||
for _, f := range freq {
|
||||
p := f / float64(len(s))
|
||||
entropy -= p * math.Log2(p)
|
||||
}
|
||||
return entropy > 4.5
|
||||
}
|
||||
Reference in New Issue
Block a user