diff --git a/README.md b/README.md index 10c2e02..699e060 100755 --- a/README.md +++ b/README.md @@ -97,9 +97,8 @@ Env: dev **Medium** ``` /swagger-ui.html /swagger.json /api-docs -/openapi.json /robots.txt /.well-known/security.txt -/web.config /.htaccess /Dockerfile -/docker-compose.yml +/openapi.json /web.config /.htaccess +/Dockerfile /docker-compose.yml ``` @@ -144,15 +143,29 @@ Access settings via the **gear icon** in the popup: ## Testing -A test environment is included to verify the extension works correctly: +A dynamic test server is included to verify the extension works correctly: ```bash cd test/ -./start-server-macos.command # macOS (opens browser automatically) +python3 server.py # Recommended: Dynamic server +``` + +Alternative (static server, limited functionality): +```bash +./start-server-macos.command # macOS ./start-server.sh # Linux/other ``` -This starts a local server on port 9000 with fake sensitive files and debug endpoints. +The dynamic server (`server.py`) serves different content based on debug params/headers: +- **Normal request**: Clean production page (no sensitive data) +- **With debug params**: Debug page with exposed credentials, stack traces, etc. + +This mimics real-world behavior where debug endpoints only expose sensitive data when triggered. + +Test URLs: +- `http://localhost:9000/` — Normal page +- `http://localhost:9000/?debug=1` — Debug mode triggered +- `http://localhost:9000/.env` — Sensitive path ## Technical Details @@ -163,6 +176,16 @@ This starts a local server on port 9000 with fake sensitive files and debug endp ## Changelog +### v2.0.6 +- **Reduced false positives on dynamic sites** (login pages, news sites, etc.) + - Redirect detection: Filters paths that redirect to catch-all destinations + - Natural variance measurement: Detects highly dynamic sites + - Smart mode now requires clear evidence (debug indicators or status changes) + - Debug indicators must be NEW (not present in original response) +- **Fixed path detection on non-standard ports** (e.g., localhost:9000) +- **New dynamic test server** (`test/server.py`) that mimics real-world behavior +- Improved soft-404 detection (content length comparison) + ### v2.0.0 - Complete rewrite with Manifest V3 - Multi-factor detection engine diff --git a/background.js b/background.js index e27da57..ea4cb92 100644 --- a/background.js +++ b/background.js @@ -1,6 +1,9 @@ /** - * debugHunter v2.0.0 - Background Service Worker + * debugHunter v2.0.6 - Background Service Worker * Multi-factor detection with configurable comparison strategies + * - Added redirect detection to filter false positives on paths + * - Added natural variance measurement to filter false positives on dynamic sites + * - Require variance check for all detections without debug indicators */ import { stringSimilarity } from './similarity.js'; @@ -180,6 +183,7 @@ const debugHeaders = [ async function getSettings() { const result = await chrome.storage.sync.get([ + 'enabled', 'detectionMode', 'requireDebugIndicators', 'detectStatusChanges', @@ -195,6 +199,7 @@ async function getSettings() { ]); return { + enabled: result.enabled !== false, // Enabled by default detectionMode: result.detectionMode || 'smart', requireDebugIndicators: result.requireDebugIndicators !== false, detectStatusChanges: result.detectStatusChanges !== false, @@ -293,6 +298,11 @@ function containsDebugIndicators(text) { return { found: false, level: null }; } +function getLevelPriority(level) { + const priorities = { critical: 4, high: 3, medium: 2, low: 1 }; + return priorities[level] || 0; +} + function extractInterestingHeaders(response) { const found = {}; for (const header of debugHeaders) { @@ -323,7 +333,7 @@ function compareHeaders(original, modified) { // MULTI-FACTOR COMPARISON // ============================================================================ -async function analyzeResponseDifference(originalResponse, modifiedResponse, originalText, modifiedText, settings) { +async function analyzeResponseDifference(originalResponse, modifiedResponse, originalText, modifiedText, settings, naturalVariance = null) { const result = { isDifferent: false, confidence: 0, @@ -331,6 +341,7 @@ async function analyzeResponseDifference(originalResponse, modifiedResponse, ori severity: 'low', debugIndicators: null, headerChanges: [], + requiresVarianceCheck: false, // Flag to trigger control request verification }; // 1. Status code change detection @@ -360,15 +371,28 @@ async function analyzeResponseDifference(originalResponse, modifiedResponse, ori } } - // 3. Content length difference + // 3. Content length difference (variance-aware) const lengthDiff = Math.abs(modifiedText.length - originalText.length); - if (lengthDiff >= settings.minLengthDiff) { + // If we know the site's natural variance, only count if difference EXCEEDS natural variance + const isLengthWithinVariance = naturalVariance && lengthDiff <= naturalVariance.lengthDiff * 1.2; + + if (!isLengthWithinVariance && lengthDiff >= settings.minLengthDiff) { result.reasons.push(`Content length diff: ${lengthDiff} bytes`); result.confidence += Math.min(lengthDiff / 100, 25); } - // 4. Debug indicator detection - const debugCheck = containsDebugIndicators(modifiedText); + // 4. Debug indicator detection - only count if NEW (not present in original) + const debugCheckModified = containsDebugIndicators(modifiedText); + const debugCheckOriginal = containsDebugIndicators(originalText); + + // Only consider debug indicators that are NEW (caused by the param/header) + // If the same level of indicator exists in original, it's not caused by our test + const debugCheck = { + found: debugCheckModified.found && (!debugCheckOriginal.found || + getLevelPriority(debugCheckModified.level) > getLevelPriority(debugCheckOriginal.level)), + level: debugCheckModified.level, + }; + if (debugCheck.found) { result.debugIndicators = debugCheck; result.reasons.push(`Debug indicators found: ${debugCheck.level}`); @@ -379,7 +403,7 @@ async function analyzeResponseDifference(originalResponse, modifiedResponse, ori else if (debugCheck.level === 'medium' && !['critical', 'high'].includes(result.severity)) result.severity = 'medium'; } - // 5. Similarity check (after filtering dynamic content) + // 5. Similarity check (after filtering dynamic content, variance-aware) let originalFiltered = originalText; let modifiedFiltered = modifiedText; @@ -389,11 +413,25 @@ async function analyzeResponseDifference(originalResponse, modifiedResponse, ori } const similarity = stringSimilarity.compareTwoStrings(originalFiltered, modifiedFiltered); - if (similarity < settings.similarityThreshold) { + + // If we know the site's natural variance, only count if similarity is WORSE than natural variance + // E.g., if site naturally has 92% similarity between requests, only flag if this request is < 90% + const isSimilarityWithinVariance = naturalVariance && similarity >= naturalVariance.similarity - 0.02; + + if (!isSimilarityWithinVariance && similarity < settings.similarityThreshold) { result.reasons.push(`Similarity: ${(similarity * 100).toFixed(1)}%`); result.confidence += (1 - similarity) * 30; } + // If we have confidence but NO debug indicators, always verify with variance check + // This prevents false positives on dynamic sites (login pages, news sites, etc.) + if (!naturalVariance && !debugCheck.found && result.confidence > 0) { + result.requiresVarianceCheck = true; + } + + // Only critical/high/medium indicators count as significant (low like "Warning:" can appear in normal pages) + const hasSignificantDebugIndicators = debugCheck.found && ['critical', 'high', 'medium'].includes(debugCheck.level); + // Determine if response is different based on mode switch (settings.detectionMode) { case 'aggressive': @@ -413,12 +451,26 @@ async function analyzeResponseDifference(originalResponse, modifiedResponse, ori case 'smart': default: - // Multi-factor: needs significant confidence - // If debug indicators found, lower threshold - if (settings.requireDebugIndicators) { - result.isDifferent = result.confidence >= 40 && debugCheck.found; + // Smart mode: require clear evidence to avoid false positives on dynamic sites + const hasStatusBypass = originalResponse.status === 403 && modifiedResponse.status === 200; + const hasServerError = modifiedResponse.status >= 500; + // Check if content is significantly different (not just dynamic variation) + const isSignificantlyDifferent = similarity < 0.70; + // Debug indicators in modified response (even if also in original) + const hasAnyDebugIndicators = debugCheckModified.found && ['critical', 'high', 'medium'].includes(debugCheckModified.level); + + if (hasStatusBypass || hasServerError) { + // Clear signal - status change is strong evidence + result.isDifferent = true; + } else if (hasSignificantDebugIndicators) { + // NEW debug indicators found - report + result.isDifferent = result.confidence >= 40; + } else if (hasAnyDebugIndicators && isSignificantlyDifferent) { + // Debug indicators exist AND content is very different - likely more debug info triggered + result.isDifferent = true; } else { - result.isDifferent = result.confidence >= 50; + // No clear evidence - don't report to avoid FPs on dynamic sites + result.isDifferent = false; } break; } @@ -595,6 +647,65 @@ async function getUrlBaseline(url) { } } +// ============================================================================ +// NATURAL VARIANCE MEASUREMENT (for dynamic sites) +// ============================================================================ + +const varianceCache = new Map(); + +async function measureNaturalVariance(url, baselineText, settings, useRandomParam = false) { + // Cache key includes whether we're measuring with params + const cacheKey = useRandomParam ? `${url}#withParam` : url; + + // Check cache first (valid for 2 minutes) + if (varianceCache.has(cacheKey)) { + const cached = varianceCache.get(cacheKey); + if (Date.now() - cached.timestamp < 120000) { + return cached.variance; + } + } + + try { + // For params, measure variance by adding a random param to see how the site responds + // This catches sites that return different content when ANY query param is present + let controlUrl = url; + if (useRandomParam) { + const randomParam = `_rnd${Math.random().toString(36).substring(7)}`; + const urlObj = new URL(url); + urlObj.searchParams.set(randomParam, '1'); + controlUrl = urlObj.href; + } + + const controlResponse = await rateLimitedFetch(controlUrl); + const controlText = await controlResponse.text(); + + // Filter dynamic content before comparison + let baselineFiltered = baselineText; + let controlFiltered = controlText; + + if (settings.filterDynamicContent) { + baselineFiltered = filterDynamicContent(baselineText, settings.dynamicPatterns); + controlFiltered = filterDynamicContent(controlText, settings.dynamicPatterns); + } + + // Calculate natural variance between baseline and control + const naturalSimilarity = stringSimilarity.compareTwoStrings(baselineFiltered, controlFiltered); + const naturalLengthDiff = Math.abs(controlText.length - baselineText.length); + + const variance = { + similarity: naturalSimilarity, + lengthDiff: naturalLengthDiff, + // Site is "highly dynamic" if requests differ significantly + isHighlyDynamic: naturalSimilarity < 0.95, + }; + + varianceCache.set(cacheKey, { variance, timestamp: Date.now() }); + return variance; + } catch (e) { + return null; + } +} + // ============================================================================ // PARAMETER CHECKING (uses cached baseline) // ============================================================================ @@ -607,7 +718,6 @@ function appendParam(url, param) { async function checkParams(url, baseline = null) { const settings = await getSettings(); - const allParams = [...debugParams.high, ...debugParams.medium]; try { // Use provided baseline or fetch new one @@ -622,6 +732,9 @@ async function checkParams(url, baseline = null) { ...debugParams.medium.map(p => ({ ...p, confidence: 'medium' })), ]; + // Track if we've measured variance for this URL (lazy - only when needed) + let measuredVariance = null; + for (const param of sortedParams) { const modifiedUrl = appendParam(url, param); @@ -629,12 +742,31 @@ async function checkParams(url, baseline = null) { const modifiedResponse = await rateLimitedFetch(modifiedUrl); const modifiedText = await modifiedResponse.text(); - const analysis = await analyzeResponseDifference( + // First analysis without variance + let analysis = await analyzeResponseDifference( baseline.mockResponse, modifiedResponse, baseline.text, modifiedText, - settings + settings, + measuredVariance ); + // If flagged but needs variance verification (no debug indicators found) + if (analysis.isDifferent && analysis.requiresVarianceCheck && !measuredVariance) { + // Measure variance with a random param to see how site responds to ANY query param + // This catches sites that return different content when params are present (vs absent) + measuredVariance = await measureNaturalVariance(url, baseline.text, settings, true); + + if (measuredVariance) { + // Re-analyze with variance knowledge - always re-check, not just for highly dynamic sites + analysis = await analyzeResponseDifference( + baseline.mockResponse, modifiedResponse, + baseline.text, modifiedText, + settings, + measuredVariance + ); + } + } + if (analysis.isDifferent) { await addFinding('params', { url: modifiedUrl, @@ -670,6 +802,9 @@ async function checkHeaders(url, baseline = null) { } if (!baseline) return; + // Track if we've measured variance for this URL (lazy - only when needed) + let measuredVariance = null; + for (const header of customHeaders) { try { const headers = new Headers(); @@ -678,12 +813,30 @@ async function checkHeaders(url, baseline = null) { const modifiedResponse = await rateLimitedFetch(url, { headers }); const modifiedText = await modifiedResponse.text(); - const analysis = await analyzeResponseDifference( + // First analysis without variance + let analysis = await analyzeResponseDifference( baseline.mockResponse, modifiedResponse, baseline.text, modifiedText, - settings + settings, + measuredVariance ); + // If flagged but needs variance verification (no debug indicators found) + if (analysis.isDifferent && analysis.requiresVarianceCheck && !measuredVariance) { + // Measure natural variance with a control request + measuredVariance = await measureNaturalVariance(url, baseline.text, settings); + + if (measuredVariance) { + // Re-analyze with variance knowledge - always re-check, not just for highly dynamic sites + analysis = await analyzeResponseDifference( + baseline.mockResponse, modifiedResponse, + baseline.text, modifiedText, + settings, + measuredVariance + ); + } + } + if (analysis.isDifferent) { await addFinding('headers', { url, @@ -710,6 +863,24 @@ async function checkHeaders(url, baseline = null) { // Cache for domain baselines and soft-404 fingerprints const domainCache = new Map(); +// Normalize redirect URL for comparison (resolves relative URLs, removes trailing slashes) +function normalizeRedirectUrl(location, baseUrl) { + try { + const resolved = new URL(location, baseUrl); + // Return pathname without trailing slash for consistent comparison + return resolved.pathname.replace(/\/$/, '') || '/'; + } catch (e) { + return location; + } +} + +// Check if a redirect is just URL normalization (trailing slash, case change) +function isNormalizationRedirect(originalPath, redirectPath) { + const normalizedOriginal = originalPath.replace(/\/$/, '').toLowerCase(); + const normalizedRedirect = redirectPath.replace(/\/$/, '').toLowerCase(); + return normalizedOriginal === normalizedRedirect; +} + async function getDomainBaseline(baseUrl) { if (domainCache.has(baseUrl)) { const cached = domainCache.get(baseUrl); @@ -723,18 +894,32 @@ async function getDomainBaseline(baseUrl) { const baseResponse = await rateLimitedFetch(baseUrl); const baseText = await baseResponse.text(); - // Get soft-404 fingerprint (request a random non-existent path) + // Get soft-404 fingerprint and catch-all redirect (request a random non-existent path) const randomPath = `/${Math.random().toString(36).substring(7)}-${Date.now()}`; let soft404Fingerprint = null; let soft404Length = 0; + let catchAllRedirect = null; try { - const soft404Response = await rateLimitedFetch(baseUrl + randomPath); - const soft404Text = await soft404Response.text(); + // Use redirect: 'manual' to detect catch-all redirects + const soft404Response = await rateLimitedFetch(baseUrl + randomPath, { redirect: 'manual' }); + + // Check if the random path redirects somewhere (catch-all redirect pattern) + if (soft404Response.status >= 300 && soft404Response.status < 400) { + const location = soft404Response.headers.get('location'); + if (location) { + // Normalize the redirect URL for comparison + catchAllRedirect = normalizeRedirectUrl(location, baseUrl); + } + } + + // For fingerprinting, follow the redirect to get actual content + const finalResponse = await rateLimitedFetch(baseUrl + randomPath); + const soft404Text = await finalResponse.text(); soft404Length = soft404Text.length; // Create a fingerprint based on content structure, not exact content soft404Fingerprint = { - status: soft404Response.status, + status: finalResponse.status, length: soft404Text.length, hasTitle: /
This is a normal production page with no sensitive information.
+{params_str}
+ {headers_str}
+ +DB_HOST=localhost +DB_NAME=production_db +DB_PASSWORD=super_secret_password_123! +API_KEY=sk-1234567890abcdef1234567890abcdef +AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY +SECRET_KEY=my-super-secret-application-key ++
+PHP Version: PHP/8.2.0 +Server: Apache/2.4.52 +Document Root: /var/www/html +Server User: www-data +Debug Mode: true ++
+Fatal error: Uncaught Exception in /var/www/html/app/core.php:142
+Stack trace:
+#0 /var/www/html/app/core.php(142): Database->connect()
+#1 /var/www/html/app/bootstrap.php(28): Application->init()
+#2 /var/www/html/index.php(5): require_once('/var/www/html/...')
+#3 {{main}}
+
+