Files
REAPER/reaper.go
T
2026-06-19 21:22:17 +00:00

985 lines
35 KiB
Go

package main
import (
"bufio"
"bytes"
"context"
"crypto/sha256"
"encoding/base64"
"encoding/csv"
"encoding/hex"
"encoding/json"
"flag"
"fmt"
"log"
"math"
"net/http"
"os"
"os/signal"
"path/filepath"
"regexp"
"strings"
"sync"
"syscall"
"time"
"github.com/google/go-github/v61/github"
"golang.org/x/oauth2"
"golang.org/x/time/rate"
)
// Configuration flags
var (
githubToken = flag.String("token", "", "GitHub Personal Access Token (or set GITHUB_TOKEN env)")
outputDir = flag.String("output", "./output", "Output directory for findings")
workers = flag.Int("workers", 20, "Number of concurrent workers")
minStars = flag.Int("min-stars", 0, "Minimum stars filter")
sinceDays = flag.Int("since-days", 7, "Only scan repos updated in last X days (repeats)")
verbose = flag.Bool("verbose", false, "Verbose output")
scanPRs = flag.Bool("scan-prs", true, "Scan pull requests")
scanIssues = flag.Bool("scan-issues", true, "Scan issues and comments")
scanCommits = flag.Bool("scan-commits", true, "Scan commit history")
entropyCheck = flag.Bool("entropy", true, "Enable entropy checking")
continuous = flag.Bool("continuous", true, "Run continuously (loop forever)")
sleepMinutes = flag.Int("sleep-minutes", 60, "Sleep between continuous cycles")
scanAdvisories = flag.Bool("scan-advisories", false, "Check repository for GitHub Security Advisories (requires additional API calls)")
hideObfuscated = flag.Bool("hide-obfuscated", true, "Hide obfuscated emails (e.g., user[at]example[dot]com)")
// Single/multiple repo scanning
repoURLs = flag.String("repo", "", "Single repository URL to scan (can be used multiple times)")
repoListFile = flag.String("repo-list", "", "File containing repository URLs (one per line)")
)
// Core types
type Reaper struct {
client *github.Client
patterns []*SecretPattern
results chan *Finding
advisories chan *Advisory
limiter *rate.Limiter
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
stats *ScanStats
csvWriter *csv.Writer
jsonFile *os.File
advFile *os.File
mu sync.Mutex
scannedRepos map[string]bool
emailCache map[string]map[string]bool // repo -> email -> found
}
type SecretPattern struct {
Name string
Regex *regexp.Regexp
Severity string
Entropy bool
}
type Finding struct {
ID string `json:"id"`
Repository string `json:"repository"`
FilePath string `json:"file_path"`
LineNumber int `json:"line_number"`
SecretType string `json:"secret_type"`
SecretValue string `json:"secret_value"`
Context string `json:"context"`
URL string `json:"url"`
Branch string `json:"branch"`
Timestamp time.Time `json:"timestamp"`
Severity string `json:"severity"`
}
type Advisory struct {
ID string `json:"id"`
Repository string `json:"repository"`
GHSAID string `json:"ghsa_id"`
CVEID string `json:"cve_id,omitempty"`
Summary string `json:"summary"`
Severity string `json:"severity"`
PublishedAt time.Time `json:"published_at"`
UpdatedAt time.Time `json:"updated_at"`
Permalink string `json:"permalink"`
VulnerableManifest string `json:"vulnerable_manifest,omitempty"`
}
type ScanStats struct {
ReposScanned int
FilesScanned int
FindingsFound int
AdvisoriesFound int
StartTime time.Time
CycleStart time.Time
RateLimitHits int
mu sync.Mutex
}
func main() {
flag.Parse()
token := *githubToken
if token == "" {
token = os.Getenv("GITHUB_TOKEN")
}
if token == "" {
log.Fatal("GitHub token required. Use -token flag or GITHUB_TOKEN env variable")
}
if err := os.MkdirAll(*outputDir, 0755); err != nil {
log.Fatalf("Failed to create output directory: %v", err)
}
reaper := NewReaper(token)
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
fmt.Println("\nShutting down REAPER gracefully...")
reaper.cancel()
}()
fmt.Println(`
============================================================
██████╗ ███████╗ █████╗ ██████╗ ███████╗██████╗
██╔══██╗██╔════╝██╔══██╗██╔══██╗██╔════╝██╔══██╗
██████╔╝█████╗ ███████║██████╔╝█████╗ ██████╔╝
██╔══██╗██╔══╝ ██╔══██║██╔═══╝ ██╔══╝ ██╔══██╗
██║ ██║███████╗██║ ██║██║ ███████╗██║ ██║
╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ ╚══════╝╚═╝ ╚═╝
============================================================
REAPER - GitHub Secret Harvester & Advisory Scanner
============================================================
`)
// Determine mode: single-shot or continuous
targetRepos := getTargetRepos()
if len(targetRepos) > 0 {
fmt.Printf("[!] Single-shot mode: scanning %d specific repository(s)\n", len(targetRepos))
fmt.Printf("[+] Output directory: %s\n", *outputDir)
fmt.Printf("[+] Scan commits: %v\n", *scanCommits)
fmt.Printf("[+] Scan advisories: %v\n", *scanAdvisories)
fmt.Printf("[+] Hide obfuscated emails: %v\n", *hideObfuscated)
for _, repoURL := range targetRepos {
repo, err := reaper.getRepoFromURL(repoURL)
if err != nil {
log.Printf("Failed to parse %s: %v", repoURL, err)
continue
}
reaper.scanRepository(repo)
reaper.stats.mu.Lock()
reaper.stats.ReposScanned++
reaper.stats.mu.Unlock()
}
reaper.printFinalStats()
return
}
// Normal continuous mode
fmt.Printf("[+] Starting REAPER with %d workers\n", *workers)
fmt.Printf("[+] Output directory: %s\n", *outputDir)
fmt.Printf("[+] Continuous mode: %v (sleep %d min between cycles)\n", *continuous, *sleepMinutes)
fmt.Printf("[+] Scanning repos updated in last %d days\n", *sinceDays)
fmt.Printf("[+] Scan commits: %v\n", *scanCommits)
fmt.Printf("[+] Scan advisories: %v\n", *scanAdvisories)
fmt.Printf("[+] Hide obfuscated emails: %v\n", *hideObfuscated)
reaper.RunForever()
}
func getTargetRepos() []string {
var repos []string
flag.Visit(func(f *flag.Flag) {
if f.Name == "repo" && f.Value.String() != "" {
repos = append(repos, f.Value.String())
}
})
if *repoListFile != "" {
file, err := os.Open(*repoListFile)
if err == nil {
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line != "" && !strings.HasPrefix(line, "#") {
repos = append(repos, line)
}
}
} else {
log.Printf("Warning: could not open repo list file %s: %v", *repoListFile, err)
}
}
return repos
}
func (r *Reaper) getRepoFromURL(rawURL string) (*github.Repository, error) {
rawURL = strings.TrimSuffix(rawURL, ".git")
parts := strings.Split(strings.TrimPrefix(rawURL, "https://github.com/"), "/")
if len(parts) < 2 {
return nil, fmt.Errorf("invalid repository URL: %s", rawURL)
}
owner, name := parts[0], parts[1]
repo, _, err := r.client.Repositories.Get(r.ctx, owner, name)
return repo, err
}
func NewReaper(token string) *Reaper {
ctx, cancel := context.WithCancel(context.Background())
ts := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token})
tc := oauth2.NewClient(ctx, ts)
client := github.NewClient(tc)
jsonFile, _ := os.OpenFile(filepath.Join(*outputDir, "reaper_findings.jsonl"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
advFile, _ := os.OpenFile(filepath.Join(*outputDir, "advisories.jsonl"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
timestamp := time.Now().Format("20060102_150405")
csvFile, _ := os.Create(filepath.Join(*outputDir, fmt.Sprintf("reaper_findings_%s.csv", timestamp)))
csvWriter := csv.NewWriter(csvFile)
csvWriter.Write([]string{"Timestamp", "Repository", "File", "Line", "Type", "Secret", "URL", "Severity"})
scannedRepos := make(map[string]bool)
scannedFile, err := os.OpenFile(filepath.Join(*outputDir, "scanned_repos.txt"), os.O_RDWR|os.O_CREATE, 0644)
if err == nil {
scanner := bufio.NewScanner(scannedFile)
for scanner.Scan() {
scannedRepos[scanner.Text()] = true
}
scannedFile.Close()
}
reaper := &Reaper{
client: client,
patterns: GetAllPatterns(),
results: make(chan *Finding, 10000),
advisories: make(chan *Advisory, 1000),
limiter: rate.NewLimiter(rate.Limit(30), 100),
ctx: ctx,
cancel: cancel,
stats: &ScanStats{StartTime: time.Now()},
csvWriter: csvWriter,
jsonFile: jsonFile,
advFile: advFile,
scannedRepos: scannedRepos,
emailCache: make(map[string]map[string]bool),
}
go reaper.processResults()
go reaper.processAdvisories()
return reaper
}
// isObfuscatedEmail checks if an email address is obfuscated
func isObfuscatedEmail(email string) bool {
obfuscatedPatterns := []string{
"[at]", "[@]", "{at}", "{@}",
" at ", "(at)", "[dot]", "{dot}", "(dot)",
" dot ", " DOT ", " AT ",
" user@", "@domain", "example.com",
"replace@", "change@", "obfuscated",
"noreply", "no-reply", "do-not-reply",
}
emailLower := strings.ToLower(email)
for _, pattern := range obfuscatedPatterns {
if strings.Contains(emailLower, pattern) {
return true
}
}
// Check for invalid domain patterns
if strings.Contains(email, "@[") && strings.Contains(email, "]") {
return true
}
if strings.Contains(email, "@(") && strings.Contains(email, ")") {
return true
}
// Check for placeholder domains
placeholderDomains := []string{"example.com", "domain.com", "test.com", "localhost", "invalid"}
for _, domain := range placeholderDomains {
if strings.Contains(emailLower, "@"+domain) {
return true
}
}
return false
}
// isGitHubNoReply checks for GitHub's no-reply emails
func isGitHubNoReply(email string) bool {
return strings.Contains(email, "noreply.github.com") || strings.Contains(email, "users.noreply.github.com")
}
func (r *Reaper) isDuplicateEmail(repoName, email string) bool {
r.mu.Lock()
defer r.mu.Unlock()
if _, exists := r.emailCache[repoName]; !exists {
r.emailCache[repoName] = make(map[string]bool)
}
if r.emailCache[repoName][email] {
return true
}
r.emailCache[repoName][email] = true
return false
}
func GetAllPatterns() []*SecretPattern {
return []*SecretPattern{
{Name: "AWS Access Key", Regex: regexp.MustCompile(`AKIA[0-9A-Z]{16}`), Severity: "CRITICAL", Entropy: false},
{Name: "AWS Secret Key", Regex: regexp.MustCompile(`(?i)(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=\s]+['"]?([A-Za-z0-9/+=]{40})['"]?`), Severity: "CRITICAL", Entropy: true},
{Name: "Google API Key", Regex: regexp.MustCompile(`AIza[0-9A-Za-z\-_]{35}`), Severity: "CRITICAL", Entropy: false},
{Name: "GitHub Token", Regex: regexp.MustCompile(`ghp_[A-Za-z0-9]{36}|github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}`), Severity: "CRITICAL", Entropy: false},
{Name: "Slack Token", Regex: regexp.MustCompile(`xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}`), Severity: "HIGH", Entropy: false},
{Name: "Discord Bot Token", Regex: regexp.MustCompile(`[MNO][a-zA-Z\d_-]{23,25}\.[a-zA-Z\d_-]{6}\.[a-zA-Z\d_-]{27}`), Severity: "CRITICAL", Entropy: false},
{Name: "Stripe Secret Key", Regex: regexp.MustCompile(`sk_live_[A-Za-z0-9]{24}`), Severity: "CRITICAL", Entropy: false},
{Name: "Stripe Publishable Key", Regex: regexp.MustCompile(`pk_live_[A-Za-z0-9]{24}`), Severity: "HIGH", Entropy: false},
{Name: "JWT Token", Regex: regexp.MustCompile(`eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_=]+\.?[A-Za-z0-9-_.+/=]*`), Severity: "HIGH", Entropy: true},
{Name: "PostgreSQL URL", Regex: regexp.MustCompile(`postgresql://[^/\s]+:[^/\s]+@[^/\s]+/\w+`), Severity: "CRITICAL", Entropy: false},
{Name: "MySQL URL", Regex: regexp.MustCompile(`mysql://[^/\s]+:[^/\s]+@[^/\s]+/\w+`), Severity: "CRITICAL", Entropy: false},
{Name: "MongoDB URL", Regex: regexp.MustCompile(`mongodb(?:\+srv)?://[^/\s]+:[^/\s]+@[^/\s]+/\w+`), Severity: "CRITICAL", Entropy: false},
{Name: "Redis URL", Regex: regexp.MustCompile(`redis://(?:[^:@]+:[^@]+@)?[^:]+:[0-9]+`), Severity: "HIGH", Entropy: false},
{Name: "RSA Private Key", Regex: regexp.MustCompile(`-----BEGIN RSA PRIVATE KEY-----`), Severity: "CRITICAL", Entropy: false},
{Name: "SSH Private Key", Regex: regexp.MustCompile(`-----BEGIN OPENSSH PRIVATE KEY-----`), Severity: "CRITICAL", Entropy: false},
{Name: "EC Private Key", Regex: regexp.MustCompile(`-----BEGIN EC PRIVATE KEY-----`), Severity: "CRITICAL", Entropy: false},
{Name: "Email Address", Regex: regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`), Severity: "MEDIUM", Entropy: false},
{Name: "Generic Password", Regex: regexp.MustCompile(`(?i)(?:password|passwd|pwd)\s*[:=\s]+['"]?([^'"\s]{8,50})['"]?`), Severity: "HIGH", Entropy: false},
{Name: "Generic API Key", Regex: regexp.MustCompile(`(?i)(?:api[_-]?key|apikey|api_token|token)\s*[:=\s]+['"]?([A-Za-z0-9]{20,50})['"]?`), Severity: "HIGH", Entropy: true},
{Name: "Azure Connection String", Regex: regexp.MustCompile(`DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[^;]+`), Severity: "CRITICAL", Entropy: false},
{Name: "Twilio API Key", Regex: regexp.MustCompile(`SK[0-9a-fA-F]{32}`), Severity: "HIGH", Entropy: false},
{Name: "SendGrid API Key", Regex: regexp.MustCompile(`SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}`), Severity: "HIGH", Entropy: false},
{Name: "Heroku API Key", Regex: regexp.MustCompile(`[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`), Severity: "HIGH", Entropy: true},
{Name: "OpenAI API Key", Regex: regexp.MustCompile(`sk-[A-Za-z0-9]{48}`), Severity: "CRITICAL", Entropy: true},
{Name: "Telegram Bot Token", Regex: regexp.MustCompile(`[0-9]{8,10}:[A-Za-z0-9_-]{35}`), Severity: "CRITICAL", Entropy: false},
{Name: "GitHub App Token", Regex: regexp.MustCompile(`ghu_[A-Za-z0-9]{36}`), Severity: "CRITICAL", Entropy: false},
{Name: "GitLab Token", Regex: regexp.MustCompile(`glpat-[A-Za-z0-9-_]{20}`), Severity: "HIGH", Entropy: false},
{Name: "Docker Hub Token", Regex: regexp.MustCompile(`dckr_pat_[A-Za-z0-9-_]{32}`), Severity: "HIGH", Entropy: false},
{Name: "NPM Token", Regex: regexp.MustCompile(`npm_[A-Za-z0-9]{36}`), Severity: "HIGH", Entropy: false},
{Name: "Pulumi API Key", Regex: regexp.MustCompile(`pul-[a-f0-9]{40}`), Severity: "HIGH", Entropy: false},
{Name: "DigitalOcean Token", Regex: regexp.MustCompile(`dops_v1_[a-zA-Z0-9]{64}`), Severity: "HIGH", Entropy: false},
{Name: "Alibaba Cloud Key", Regex: regexp.MustCompile(`LTAI[A-Za-z0-9]{16,20}`), Severity: "HIGH", Entropy: false},
}
}
func (r *Reaper) RunForever() {
for {
select {
case <-r.ctx.Done():
r.printFinalStats()
return
default:
}
fmt.Printf("\nStarting new scan cycle at %s\n", time.Now().Format("15:04:05"))
r.stats.CycleStart = time.Now()
if err := r.scanCycle(); err != nil {
log.Printf("Scan cycle error: %v", err)
}
cycleDuration := time.Since(r.stats.CycleStart)
r.stats.mu.Lock()
fmt.Printf("\nCycle complete: %d repos, %d findings, %d advisories in %s\n",
r.stats.ReposScanned, r.stats.FindingsFound, r.stats.AdvisoriesFound, cycleDuration.Round(time.Second))
r.stats.mu.Unlock()
if !*continuous {
break
}
fmt.Printf("Sleeping for %d minutes before next cycle...\n", *sleepMinutes)
sleepTimer := time.NewTimer(time.Duration(*sleepMinutes) * time.Minute)
select {
case <-sleepTimer.C:
case <-r.ctx.Done():
sleepTimer.Stop()
return
}
}
r.printFinalStats()
}
func (r *Reaper) scanCycle() error {
query := r.buildSearchQuery()
fmt.Printf("[+] Search query: %s\n", query)
opts := &github.SearchOptions{
Sort: "updated",
Order: "desc",
ListOptions: github.ListOptions{PerPage: 100},
}
for {
select {
case <-r.ctx.Done():
return nil
default:
}
if err := r.limiter.Wait(r.ctx); err != nil {
return err
}
result, resp, err := r.client.Search.Repositories(r.ctx, query, opts)
if err != nil {
if strings.Contains(err.Error(), "rate limit") {
r.stats.RateLimitHits++
time.Sleep(60 * time.Second)
continue
}
return fmt.Errorf("search failed: %w", err)
}
if len(result.Repositories) == 0 {
fmt.Println("[+] No new repositories found in this cycle.")
break
}
fmt.Printf("[+] Found %d repositories (page %d)\n", len(result.Repositories), opts.Page)
repoChan := make(chan *github.Repository, len(result.Repositories))
var workerWg sync.WaitGroup
for i := 0; i < *workers; i++ {
workerWg.Add(1)
go r.repoWorker(repoChan, &workerWg)
}
newRepos := 0
for _, repo := range result.Repositories {
if repo.GetPrivate() {
continue
}
if *minStars > 0 && repo.GetStargazersCount() < *minStars {
continue
}
r.mu.Lock()
if r.scannedRepos[repo.GetFullName()] {
r.mu.Unlock()
continue
}
r.scannedRepos[repo.GetFullName()] = true
r.mu.Unlock()
repoChan <- repo
newRepos++
}
close(repoChan)
workerWg.Wait()
fmt.Printf("[+] Cycle progress: %d new repos scanned, total findings: %d, total advisories: %d\n",
newRepos, r.stats.FindingsFound, r.stats.AdvisoriesFound)
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
}
r.saveScannedList()
return nil
}
func (r *Reaper) repoWorker(repoChan <-chan *github.Repository, wg *sync.WaitGroup) {
defer wg.Done()
for repo := range repoChan {
select {
case <-r.ctx.Done():
return
default:
}
r.scanRepository(repo)
r.stats.mu.Lock()
r.stats.ReposScanned++
r.stats.mu.Unlock()
}
}
func (r *Reaper) scanRepository(repo *github.Repository) {
repoName := repo.GetFullName()
if repo.GetArchived() {
return
}
defaultBranch := repo.GetDefaultBranch()
r.scanBranch(repoName, defaultBranch)
if *scanPRs {
r.scanPullRequests(repoName)
}
if *scanIssues {
r.scanIssues(repoName)
}
if *scanCommits {
r.scanCommitHistory(repoName, defaultBranch)
}
if *scanAdvisories {
r.fetchAdvisories(repoName, getOwner(repoName), getRepoName(repoName))
}
}
func (r *Reaper) scanBranch(repoName, branch string) {
opts := &github.RepositoryContentGetOptions{Ref: branch}
_, contents, _, err := r.client.Repositories.GetContents(
r.ctx, getOwner(repoName), getRepoName(repoName), "/", opts)
if err != nil {
return
}
r.processContents(repoName, branch, contents, opts.Ref)
}
func (r *Reaper) processContents(repoName, branch string, contents []*github.RepositoryContent, ref string) {
for _, content := range contents {
select {
case <-r.ctx.Done():
return
default:
}
if content == nil {
continue
}
if *content.Type == "dir" {
_, dirContents, _, err := r.client.Repositories.GetContents(
r.ctx, getOwner(repoName), getRepoName(repoName), content.GetPath(),
&github.RepositoryContentGetOptions{Ref: ref})
if err == nil {
r.processContents(repoName, branch, dirContents, ref)
}
} else if *content.Type == "file" {
r.scanFile(repoName, branch, content)
}
}
}
func (r *Reaper) scanFile(repoName, branch string, file *github.RepositoryContent) {
ext := strings.ToLower(filepath.Ext(file.GetName()))
skipExts := map[string]bool{
".jpg": true, ".jpeg": true, ".png": true, ".gif": true, ".ico": true,
".mp4": true, ".mp3": true, ".zip": true, ".tar": true, ".gz": true,
".exe": true, ".dll": true, ".so": true, ".bin": true,
}
if skipExts[ext] {
return
}
content, err := file.GetContent()
if err != nil {
return
}
decoded, err := base64.StdEncoding.DecodeString(content)
if err != nil {
decoded = []byte(content)
}
r.stats.mu.Lock()
r.stats.FilesScanned++
r.stats.mu.Unlock()
lines := strings.Split(string(decoded), "\n")
for i, line := range lines {
for _, pattern := range r.patterns {
matches := pattern.Regex.FindAllStringSubmatch(line, -1)
if len(matches) == 0 {
continue
}
for _, match := range matches {
secret := match[0]
if len(match) > 1 && match[1] != "" {
secret = match[1]
}
if *entropyCheck && pattern.Entropy && !hasHighEntropy(secret) {
continue
}
// Special handling for Email Addresses
if pattern.Name == "Email Address" {
// Skip obfuscated emails if flag is set
if *hideObfuscated && isObfuscatedEmail(secret) {
continue
}
// Skip GitHub no-reply emails
if isGitHubNoReply(secret) {
continue
}
// Deduplicate emails per repository
if r.isDuplicateEmail(repoName, secret) {
continue
}
}
hash := sha256.Sum256([]byte(repoName + file.GetPath() + secret))
id := hex.EncodeToString(hash[:])[:16]
finding := &Finding{
ID: id,
Repository: repoName,
FilePath: file.GetPath(),
LineNumber: i + 1,
SecretType: pattern.Name,
SecretValue: maskSecret(secret),
Context: getContext(lines, i, 2),
URL: file.GetHTMLURL(),
Branch: branch,
Timestamp: time.Now(),
Severity: pattern.Severity,
}
r.results <- finding
}
}
}
}
func (r *Reaper) scanPullRequests(repoName string) {
opts := &github.PullRequestListOptions{State: "all", ListOptions: github.ListOptions{PerPage: 50}}
for {
select {
case <-r.ctx.Done():
return
default:
}
if err := r.limiter.Wait(r.ctx); err != nil {
return
}
prs, resp, err := r.client.PullRequests.List(r.ctx, getOwner(repoName), getRepoName(repoName), opts)
if err != nil {
return
}
for _, pr := range prs {
r.scanText(repoName, "pull_request", pr.GetTitle(), pr.GetHTMLURL())
r.scanText(repoName, "pull_request", pr.GetBody(), pr.GetHTMLURL())
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
}
}
func (r *Reaper) scanIssues(repoName string) {
opts := &github.IssueListByRepoOptions{State: "all", ListOptions: github.ListOptions{PerPage: 50}}
for {
select {
case <-r.ctx.Done():
return
default:
}
if err := r.limiter.Wait(r.ctx); err != nil {
return
}
issues, resp, err := r.client.Issues.ListByRepo(r.ctx, getOwner(repoName), getRepoName(repoName), opts)
if err != nil {
return
}
for _, issue := range issues {
r.scanText(repoName, "issue", issue.GetTitle(), issue.GetHTMLURL())
r.scanText(repoName, "issue", issue.GetBody(), issue.GetHTMLURL())
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
}
}
func (r *Reaper) scanCommitHistory(repoName, branch string) {
opts := &github.CommitsListOptions{
SHA: branch,
ListOptions: github.ListOptions{PerPage: 100},
}
for {
select {
case <-r.ctx.Done():
return
default:
}
if err := r.limiter.Wait(r.ctx); err != nil {
return
}
commits, resp, err := r.client.Repositories.ListCommits(r.ctx, getOwner(repoName), getRepoName(repoName), opts)
if err != nil {
return
}
for _, commit := range commits {
if commit.Commit != nil {
r.scanText(repoName, "commit_message", commit.Commit.GetMessage(), commit.GetHTMLURL())
}
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
}
}
func (r *Reaper) scanText(repoName, location, text, url string) {
if text == "" {
return
}
for _, pattern := range r.patterns {
matches := pattern.Regex.FindAllStringSubmatch(text, -1)
for _, match := range matches {
secret := match[0]
if len(match) > 1 && match[1] != "" {
secret = match[1]
}
if *entropyCheck && pattern.Entropy && !hasHighEntropy(secret) {
continue
}
// Special handling for Email Addresses
if pattern.Name == "Email Address" {
if *hideObfuscated && isObfuscatedEmail(secret) {
continue
}
if isGitHubNoReply(secret) {
continue
}
if r.isDuplicateEmail(repoName, secret) {
continue
}
}
hash := sha256.Sum256([]byte(repoName + location + secret))
id := hex.EncodeToString(hash[:])[:16]
finding := &Finding{
ID: id,
Repository: repoName,
FilePath: location,
SecretType: pattern.Name,
SecretValue: maskSecret(secret),
Context: text,
URL: url,
Timestamp: time.Now(),
Severity: pattern.Severity,
}
r.results <- finding
}
}
}
func (r *Reaper) fetchAdvisories(repoName, owner, repo string) {
query := fmt.Sprintf(`{
repository(owner: "%s", name: "%s") {
vulnerabilityAlerts(first: 100) {
nodes {
securityAdvisory {
ghsaId
cveId
summary
severity
publishedAt
updatedAt
permalink
}
vulnerableManifestPath
}
}
}
}`, owner, repo)
body := struct {
Query string `json:"query"`
}{Query: query}
jsonBody, _ := json.Marshal(body)
req, err := http.NewRequestWithContext(r.ctx, "POST", "https://api.github.com/graphql", bytes.NewReader(jsonBody))
if err != nil {
return
}
req.Header.Set("Authorization", "Bearer "+os.Getenv("GITHUB_TOKEN"))
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
var result struct {
Data struct {
Repository struct {
VulnerabilityAlerts struct {
Nodes []struct {
SecurityAdvisory struct {
GhsaId string `json:"ghsaId"`
CveId string `json:"cveId"`
Summary string `json:"summary"`
Severity string `json:"severity"`
PublishedAt time.Time `json:"publishedAt"`
UpdatedAt time.Time `json:"updatedAt"`
Permalink string `json:"permalink"`
} `json:"securityAdvisory"`
VulnerableManifestPath string `json:"vulnerableManifestPath"`
} `json:"nodes"`
} `json:"vulnerabilityAlerts"`
} `json:"repository"`
} `json:"data"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return
}
for _, node := range result.Data.Repository.VulnerabilityAlerts.Nodes {
adv := &Advisory{
Repository: repoName,
GHSAID: node.SecurityAdvisory.GhsaId,
CVEID: node.SecurityAdvisory.CveId,
Summary: node.SecurityAdvisory.Summary,
Severity: node.SecurityAdvisory.Severity,
PublishedAt: node.SecurityAdvisory.PublishedAt,
UpdatedAt: node.SecurityAdvisory.UpdatedAt,
Permalink: node.SecurityAdvisory.Permalink,
VulnerableManifest: node.VulnerableManifestPath,
}
r.advisories <- adv
}
}
func (r *Reaper) processResults() {
for finding := range r.results {
r.stats.mu.Lock()
r.stats.FindingsFound++
currentFindings := r.stats.FindingsFound
r.stats.mu.Unlock()
record := []string{
finding.Timestamp.Format(time.RFC3339),
finding.Repository,
finding.FilePath,
fmt.Sprintf("%d", finding.LineNumber),
finding.SecretType,
finding.SecretValue,
finding.URL,
finding.Severity,
}
r.csvWriter.Write(record)
r.csvWriter.Flush()
jsonData, _ := json.Marshal(finding)
r.jsonFile.Write(append(jsonData, '\n'))
r.jsonFile.Sync()
if *verbose {
fmt.Printf("\n[SECRET] #%d: %s [%s] in %s - %s\n", currentFindings, finding.SecretType, finding.Severity, finding.Repository, finding.FilePath)
} else {
fmt.Printf(".")
}
}
}
func (r *Reaper) processAdvisories() {
for advisory := range r.advisories {
r.stats.mu.Lock()
r.stats.AdvisoriesFound++
r.stats.mu.Unlock()
jsonData, _ := json.Marshal(advisory)
r.advFile.Write(append(jsonData, '\n'))
r.advFile.Sync()
if *verbose {
fmt.Printf("\n[ADVISORY] %s [%s] in %s - %s\n", advisory.GHSAID, advisory.Severity, advisory.Repository, advisory.Summary)
}
}
}
func (r *Reaper) buildSearchQuery() string {
query := "a is:public"
if *sinceDays > 0 {
since := time.Now().AddDate(0, 0, -*sinceDays).Format("2006-01-02")
query += fmt.Sprintf(" pushed:>%s", since)
}
return query
}
func (r *Reaper) saveScannedList() {
r.mu.Lock()
defer r.mu.Unlock()
file, err := os.Create(filepath.Join(*outputDir, "scanned_repos.txt"))
if err != nil {
return
}
defer file.Close()
for repo := range r.scannedRepos {
file.WriteString(repo + "\n")
}
}
func (r *Reaper) printFinalStats() {
duration := time.Since(r.stats.StartTime)
fmt.Println("\n" + strings.Repeat("=", 60))
fmt.Println("REAPER FINAL STATISTICS")
fmt.Println(strings.Repeat("=", 60))
fmt.Printf("Total runtime: %s\n", duration.Round(time.Second))
fmt.Printf("Repositories: %d\n", r.stats.ReposScanned)
fmt.Printf("Files scanned: %d\n", r.stats.FilesScanned)
fmt.Printf("Secrets found: %d\n", r.stats.FindingsFound)
fmt.Printf("Advisories found: %d\n", r.stats.AdvisoriesFound)
fmt.Printf("Rate limit hits: %d\n", r.stats.RateLimitHits)
fmt.Printf("Output directory: %s\n", *outputDir)
fmt.Println(strings.Repeat("=", 60))
fmt.Println("\nDISCLAIMER: This tool is for educational and authorized testing only.")
fmt.Println("Use responsibly and in compliance with GitHub's Terms of Service.")
}
func getOwner(repoName string) string {
parts := strings.Split(repoName, "/")
if len(parts) > 0 {
return parts[0]
}
return ""
}
func getRepoName(repoName string) string {
parts := strings.Split(repoName, "/")
if len(parts) > 1 {
return parts[1]
}
return repoName
}
func maskSecret(secret string) string {
if len(secret) <= 12 {
return "***MASKED***"
}
return secret[:6] + "..." + secret[len(secret)-6:]
}
func getContext(lines []string, lineNum, contextLines int) string {
start := lineNum - contextLines
if start < 0 {
start = 0
}
end := lineNum + contextLines + 1
if end > len(lines) {
end = len(lines)
}
return strings.Join(lines[start:end], "\n")
}
func hasHighEntropy(s string) bool {
if len(s) < 8 {
return false
}
freq := make(map[rune]float64)
for _, char := range s {
freq[char]++
}
var entropy float64
for _, f := range freq {
p := f / float64(len(s))
entropy -= p * math.Log2(p)
}
return entropy > 4.5
}