Welcome to our site.
"#; + let headers = std::collections::HashMap::new(); + + let detector = ChallengeDetector::default(); + let result = detector.detect_from_html("https://example.com", 200, &headers, html); + assert!(result.is_none()); + } + + #[test] + fn test_no_false_positive_on_word_containing_captcha() { + // "encapsulated" contains "captcha" substring — but risk score should + // only be 20 (GenericCaptcha) which is below default threshold of 25 + // if no other indicators present. Actually it IS 20 which is < 25. + let html = "We encapsulated the logic in a module.
"; + + let detector = ChallengeDetector::default(); + let result = detector.detect_from_html("https://x.com", 200, &std::collections::HashMap::new(), html); + // GenericCaptcha score is 20, threshold is 25, so below threshold → None + assert!(result.is_none()); + } + + #[test] + fn test_combined_header_and_html_detection() { + let mut headers = std::collections::HashMap::new(); + headers.insert("server".to_string(), "cloudflare".to_string()); + + let html = r#""#; + + let detector = ChallengeDetector::default(); + let info = detector + .detect_from_html("https://x.com", 403, &headers, html) + .unwrap(); + + // Should have both header-detected JsChallenge and HTML-detected Recaptcha + assert!(info.kinds.contains(&ChallengeKind::JsChallenge)); + assert!(info.kinds.contains(&ChallengeKind::Recaptcha)); + assert!(info.risk_score >= 55); // 35 (JS) + 30 (reCAPTCHA) = 65+, clamped + } + + #[test] + fn test_early_exit_high_score() { + let mut headers = std::collections::HashMap::new(); + headers.insert("server".to_string(), "cloudflare".to_string()); + headers.insert("cf-mitigated".to_string(), "1".to_string()); + headers.insert("cf-ray".to_string(), "abc".to_string()); + headers.insert("x-datadome".to_string(), "yes".to_string()); + headers.insert("x-px".to_string(), "1".to_string()); + + let detector = ChallengeDetector::new(25); + // Score from headers alone will be high — detect_from_html should early-exit + let info = detector + .detect_from_html("https://example.com", 403, &headers, "recaptcha hcaptcha") + .unwrap(); + + // Only header-detected kinds should be present (no Recaptcha/Hcaptcha from HTML) + assert!(info.risk_score >= 65); + } + + #[test] + fn test_custom_threshold_high() { + let mut headers = std::collections::HashMap::new(); + headers.insert("x-datadome".to_string(), "active".to_string()); + + let detector = ChallengeDetector::new(50); + let result = detector.detect_from_response("https://x.com", 403, &headers); + // DataDome alone is score 30, which is below threshold 50 + // But we also get BotProtection from generic 403 = 15, total = 45 still < 50 + // Actually: generic 403 gives BotProtection 15, DataDome gives BotProtection 30 + // dedup: first add BotProtection 15, second add is deduped → score stays 15 + // 15 < 50, so None + // Wait let me re-check: 403 → generic gives BotProtection+15, then DataDome gives + // BotProtection but kind already exists → dedup, score stays 15. 15 < 50 → None. + // Hmm but x-datadome also has its own detection that adds BotProtection 30. + // The dedup means score only increases on first insert. + // So score = 15 (from generic 403), 15 < 50 → None + assert!(result.is_none()); + } + + // ── ChallengeInfo helpers ────────────────────────────────────────── + + #[test] + fn test_info_is_captcha() { + let info = ChallengeInfo { + url: "https://x.com".to_string(), + status: 200, + kinds: vec![ChallengeKind::Recaptcha], + risk_score: 30, + }; + assert!(info.is_captcha()); + assert!(!info.is_js_challenge()); + } + + #[test] + fn test_info_is_js_challenge() { + let info = ChallengeInfo { + url: "https://x.com".to_string(), + status: 403, + kinds: vec![ChallengeKind::JsChallenge], + risk_score: 35, + }; + assert!(!info.is_captcha()); + assert!(info.is_js_challenge()); + } + + // ── ChallengeKind Display ────────────────────────────────────────── + + #[test] + fn test_kind_display() { + assert_eq!(format!("{}", ChallengeKind::Recaptcha), "reCAPTCHA"); + assert_eq!(format!("{}", ChallengeKind::Hcaptcha), "hCaptcha"); + assert_eq!(format!("{}", ChallengeKind::Turnstile), "Cloudflare Turnstile"); + assert_eq!(format!("{}", ChallengeKind::GenericCaptcha), "CAPTCHA"); + assert_eq!(format!("{}", ChallengeKind::JsChallenge), "JS Challenge"); + assert_eq!(format!("{}", ChallengeKind::BotProtection), "Bot Protection"); + } +} diff --git a/crates/pardus-challenge/src/interceptor.rs b/crates/pardus-challenge/src/interceptor.rs new file mode 100644 index 0000000..a8ede4a --- /dev/null +++ b/crates/pardus-challenge/src/interceptor.rs @@ -0,0 +1,107 @@ +//! Challenge interceptor that plugs into pardus-core's interceptor pipeline. + +use std::sync::Arc; + +use async_trait::async_trait; +use tokio::sync::oneshot; +use pardus_core::intercept::{ + InterceptAction, Interceptor, InterceptorPhase, ModifiedRequest, PauseHandle, + RequestContext, ResponseContext, +}; + +use crate::detector::ChallengeDetector; +use crate::resolver::ChallengeResolver; + +/// An interceptor that pauses the pipeline when a CAPTCHA or bot-challenge +/// is detected and delegates resolution to a [`ChallengeResolver`]. +/// +/// Register this on an `InterceptorManager` (either on `App` or `Browser`): +/// +/// ```ignore +/// let resolver: Arc