@@ -103,6 +103,57 @@ fn resolve_case(plan: &TextSearchPlan) -> CaseSensitivity {
103103 }
104104}
105105
106+ fn plan_has_regex ( plan : & TextSearchPlan ) -> bool {
107+ plan. required_terms
108+ . iter ( )
109+ . chain ( plan. excluded_terms . iter ( ) )
110+ . any ( |term| matches ! ( term, ContentPredicate :: Regex ( _) ) )
111+ }
112+
113+ fn request_has_regex ( request : & TextSearchRequest ) -> bool {
114+ request. plans . iter ( ) . any ( plan_has_regex)
115+ }
116+
117+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
118+ struct SearchBudgets {
119+ fetch_limit : i64 ,
120+ file_limit : i64 ,
121+ plan_row_limit : i64 ,
122+ }
123+
124+ fn compute_search_budgets ( request : & TextSearchRequest ) -> SearchBudgets {
125+ let page_index = u64:: from ( request. page ) ;
126+ let page_size = u64:: from ( request. page_size . max ( 1 ) ) ;
127+ let ( sample_factor, fetch_limit_cap, plan_row_limit) = if request_has_regex ( request) {
128+ (
129+ u64:: from ( REGEX_FILE_SAMPLE_FACTOR . max ( 1 ) ) ,
130+ REGEX_FETCH_LIMIT_CAP ,
131+ REGEX_PLAN_ROW_LIMIT ,
132+ )
133+ } else {
134+ (
135+ u64:: from ( FILE_SAMPLE_FACTOR . max ( 1 ) ) ,
136+ DEFAULT_FETCH_LIMIT_CAP ,
137+ DEFAULT_PLAN_ROW_LIMIT ,
138+ )
139+ } ;
140+ let base_limit = page_index
141+ . saturating_add ( 1 )
142+ . saturating_mul ( page_size)
143+ . saturating_mul ( sample_factor) ;
144+ let minimum = page_size. saturating_mul ( sample_factor) ;
145+ let fetch_limit_u64 = base_limit. max ( minimum) . saturating_add ( 1 ) ;
146+ let mut fetch_limit = fetch_limit_u64. min ( i64:: MAX as u64 ) as i64 ;
147+ fetch_limit = fetch_limit. min ( fetch_limit_cap) ;
148+ let file_limit = fetch_limit. min ( FILE_LIMIT_CAP ) ;
149+
150+ SearchBudgets {
151+ fetch_limit,
152+ file_limit,
153+ plan_row_limit,
154+ }
155+ }
156+
106157fn push_search_ctes < ' a > (
107158 qb : & mut QueryBuilder < ' a , Postgres > ,
108159 request : & ' a TextSearchRequest ,
@@ -151,7 +202,7 @@ fn push_search_ctes<'a>(
151202
152203 let case_mode = resolve_case ( plan) ;
153204 let highlight_case_sensitive = matches ! ( case_mode, CaseSensitivity :: Yes ) ;
154- let prefer_repo_first = !plan. repos . is_empty ( ) ;
205+ let seed_repo_first = ! plan_has_regex ( plan ) && !plan. repos . is_empty ( ) ;
155206
156207 qb. push ( "(" ) ;
157208 qb. push (
@@ -174,7 +225,7 @@ fn push_search_ctes<'a>(
174225 " ,
175226 ) ;
176227 qb. push_bind ( plan. include_historical ) ;
177- if prefer_repo_first {
228+ if seed_repo_first {
178229 qb. push (
179230 " AS include_historical
180231 FROM (
@@ -288,27 +339,27 @@ fn push_search_ctes<'a>(
288339
289340 qb. push ( " WHERE TRUE" ) ;
290341
291- if !prefer_repo_first && !plan. repos . is_empty ( ) {
342+ if !seed_repo_first && !plan. repos . is_empty ( ) {
292343 qb. push ( " AND files.repository = ANY(" ) ;
293344 qb. push_bind ( & plan. repos ) ;
294345 qb. push ( ")" ) ;
295346 }
296347
297- if !prefer_repo_first && !plan. excluded_repos . is_empty ( ) {
348+ if !seed_repo_first && !plan. excluded_repos . is_empty ( ) {
298349 qb. push ( " AND NOT (files.repository = ANY(" ) ;
299350 qb. push_bind ( & plan. excluded_repos ) ;
300351 qb. push ( "))" ) ;
301352 }
302353
303- if !prefer_repo_first && !plan. file_globs . is_empty ( ) {
354+ if !seed_repo_first && !plan. file_globs . is_empty ( ) {
304355 for pattern in & plan. file_globs {
305356 qb. push ( " AND files.file_path ILIKE " ) ;
306357 qb. push_bind ( pattern) ;
307358 qb. push ( " ESCAPE '\\ '" ) ;
308359 }
309360 }
310361
311- if !prefer_repo_first && !plan. excluded_file_globs . is_empty ( ) {
362+ if !seed_repo_first && !plan. excluded_file_globs . is_empty ( ) {
312363 for pattern in & plan. excluded_file_globs {
313364 qb. push ( " AND files.file_path NOT ILIKE " ) ;
314365 qb. push_bind ( pattern) ;
@@ -1692,19 +1743,11 @@ ORDER BY idx
16921743 ) ) ;
16931744 }
16941745
1695- let page_index = u64:: from ( request. page ) ;
1696- let page_size = u64:: from ( request. page_size . max ( 1 ) ) ;
1697- let sample_factor = u64:: from ( FILE_SAMPLE_FACTOR . max ( 1 ) ) ;
1698- let base_limit = page_index
1699- . saturating_add ( 1 )
1700- . saturating_mul ( page_size)
1701- . saturating_mul ( sample_factor) ;
1702- let minimum = page_size. saturating_mul ( sample_factor) ;
1703- let fetch_limit_u64 = base_limit. max ( minimum) . saturating_add ( 1 ) ;
1704- let mut fetch_limit = fetch_limit_u64. min ( i64:: MAX as u64 ) as i64 ;
1705- fetch_limit = fetch_limit. min ( 5000 ) ;
1706- let file_limit = fetch_limit. min ( 25000 ) ;
1707- let plan_row_limit: i64 = 5000 ;
1746+ let SearchBudgets {
1747+ fetch_limit,
1748+ file_limit,
1749+ plan_row_limit,
1750+ } = compute_search_budgets ( request) ;
17081751
17091752 let needs_live_branch_filter = request
17101753 . plans
@@ -1967,8 +2010,8 @@ ORDER BY idx
19672010 let chunk_start_line: i32 = best_row. start_line . try_into ( ) . unwrap_or ( i32:: MAX ) ;
19682011 let best_match_line =
19692012 chunk_start_line. saturating_add ( best_row. match_line_number - 1 ) ;
1970- let best_start_line = chunk_start_line
1971- . saturating_add ( best_row. snippet_start_line_number - 1 ) ;
2013+ let best_start_line =
2014+ chunk_start_line . saturating_add ( best_row. snippet_start_line_number - 1 ) ;
19722015 let best_end_line = snippet_end_line ( & best_row. content_text , best_start_line) ;
19732016 let best_match_spans = normalize_literal_match_spans (
19742017 & best_row. content_text ,
@@ -2745,6 +2788,12 @@ impl PostgresDb {
27452788}
27462789
27472790const FILE_SAMPLE_FACTOR : u32 = 6 ;
2791+ const REGEX_FILE_SAMPLE_FACTOR : u32 = 2 ;
2792+ const DEFAULT_FETCH_LIMIT_CAP : i64 = 5000 ;
2793+ const REGEX_FETCH_LIMIT_CAP : i64 = 1000 ;
2794+ const FILE_LIMIT_CAP : i64 = 25000 ;
2795+ const DEFAULT_PLAN_ROW_LIMIT : i64 = 5000 ;
2796+ const REGEX_PLAN_ROW_LIMIT : i64 = 1000 ;
27482797const INSERT_BATCH_SIZE : usize = 1000 ;
27492798
27502799#[ derive( sqlx:: FromRow ) ]
@@ -3034,7 +3083,9 @@ fn merge_overlapping_snippets(mut snippets: Vec<SearchSnippet>) -> Vec<SearchSni
30343083 merged
30353084}
30363085
3037- fn build_snippet_line_map ( snippet : & SearchSnippet ) -> BTreeMap < i32 , ( String , Vec < SearchMatchSpan > ) > {
3086+ fn build_snippet_line_map (
3087+ snippet : & SearchSnippet ,
3088+ ) -> BTreeMap < i32 , ( String , Vec < SearchMatchSpan > ) > {
30383089 let mut map = BTreeMap :: new ( ) ;
30393090 for ( idx, ( line, spans) ) in split_snippet_lines ( snippet) . into_iter ( ) . enumerate ( ) {
30403091 let line_number = snippet. start_line . saturating_add ( idx as i32 ) ;
@@ -3132,19 +3183,11 @@ mod tests {
31323183 use super :: * ;
31333184
31343185 fn build_phase1_sql ( request : & TextSearchRequest ) -> String {
3135- let page_index = u64:: from ( request. page ) ;
3136- let page_size = u64:: from ( request. page_size . max ( 1 ) ) ;
3137- let sample_factor = u64:: from ( FILE_SAMPLE_FACTOR . max ( 1 ) ) ;
3138- let base_limit = page_index
3139- . saturating_add ( 1 )
3140- . saturating_mul ( page_size)
3141- . saturating_mul ( sample_factor) ;
3142- let minimum = page_size. saturating_mul ( sample_factor) ;
3143- let fetch_limit_u64 = base_limit. max ( minimum) . saturating_add ( 1 ) ;
3144- let mut fetch_limit = fetch_limit_u64. min ( i64:: MAX as u64 ) as i64 ;
3145- fetch_limit = fetch_limit. min ( 5000 ) ;
3146- let file_limit = fetch_limit. min ( 25000 ) ;
3147- let plan_row_limit: i64 = 5000 ;
3186+ let SearchBudgets {
3187+ fetch_limit,
3188+ file_limit,
3189+ plan_row_limit,
3190+ } = compute_search_budgets ( request) ;
31483191
31493192 let needs_live_branch_filter = request
31503193 . plans
@@ -3203,11 +3246,13 @@ mod tests {
32033246 assert_eq ! ( lines. len( ) , 5 ) ;
32043247 assert_eq ! ( merged_snippet. match_spans. len( ) , 2 ) ;
32053248 assert_eq ! (
3206- & merged_snippet. content_text[ merged_snippet. match_spans[ 0 ] . start..merged_snippet. match_spans[ 0 ] . end] ,
3249+ & merged_snippet. content_text
3250+ [ merged_snippet. match_spans[ 0 ] . start..merged_snippet. match_spans[ 0 ] . end] ,
32073251 "hit_a"
32083252 ) ;
32093253 assert_eq ! (
3210- & merged_snippet. content_text[ merged_snippet. match_spans[ 1 ] . start..merged_snippet. match_spans[ 1 ] . end] ,
3254+ & merged_snippet. content_text
3255+ [ merged_snippet. match_spans[ 1 ] . start..merged_snippet. match_spans[ 1 ] . end] ,
32113256 "hit_b"
32123257 ) ;
32133258 }
@@ -3237,7 +3282,10 @@ mod tests {
32373282 let lines: Vec < & str > = merged_snippet. content_text . split ( '\n' ) . collect ( ) ;
32383283 assert_eq ! ( lines. len( ) , 5 ) ;
32393284 assert_eq ! ( lines[ 2 ] , "hit_b" ) ;
3240- assert_eq ! ( merged_snippet. match_spans, vec![ SearchMatchSpan { start: 14 , end: 19 } ] ) ;
3285+ assert_eq ! (
3286+ merged_snippet. match_spans,
3287+ vec![ SearchMatchSpan { start: 14 , end: 19 } ]
3288+ ) ;
32413289 }
32423290
32433291 #[ test]
@@ -3261,11 +3309,13 @@ mod tests {
32613309 let merged_snippet = & merged[ 0 ] ;
32623310
32633311 assert_eq ! (
3264- & merged_snippet. content_text[ merged_snippet. match_spans[ 0 ] . start..merged_snippet. match_spans[ 0 ] . end] ,
3312+ & merged_snippet. content_text
3313+ [ merged_snippet. match_spans[ 0 ] . start..merged_snippet. match_spans[ 0 ] . end] ,
32653314 "failed for block"
32663315 ) ;
32673316 assert_eq ! (
3268- & merged_snippet. content_text[ merged_snippet. match_spans[ 1 ] . start..merged_snippet. match_spans[ 1 ] . end] ,
3317+ & merged_snippet. content_text
3318+ [ merged_snippet. match_spans[ 1 ] . start..merged_snippet. match_spans[ 1 ] . end] ,
32693319 "write"
32703320 ) ;
32713321 }
@@ -3290,8 +3340,7 @@ mod tests {
32903340 let text = r#"pg_fatal("seek failed for block %u", blockno);"# ;
32913341 let original = vec ! [ SearchMatchSpan { start: 17 , end: 33 } ] ;
32923342
3293- let normalized =
3294- normalize_literal_match_spans ( text, & original, "failed for block" , true ) ;
3343+ let normalized = normalize_literal_match_spans ( text, & original, "failed for block" , true ) ;
32953344
32963345 let expected_start = text. find ( "failed for block" ) . expect ( "phrase should exist" ) ;
32973346 assert_eq ! (
@@ -3306,8 +3355,7 @@ mod tests {
33063355 #[ test]
33073356 fn normalize_literal_match_spans_preserves_regex_patterns ( ) {
33083357 let original = vec ! [ SearchMatchSpan { start: 5 , end: 11 } ] ;
3309- let normalized =
3310- normalize_literal_match_spans ( "abcde failed" , & original, "fail.*" , true ) ;
3358+ let normalized = normalize_literal_match_spans ( "abcde failed" , & original, "fail.*" , true ) ;
33113359 assert_eq ! ( normalized, original) ;
33123360 }
33133361
@@ -3325,6 +3373,56 @@ mod tests {
33253373 let sql = build_phase1_sql ( & request) ;
33263374 assert ! ( !sql. contains( "INTERSECT" ) ) ;
33273375 }
3376+
3377+ #[ test]
3378+ fn plain_repo_filtered_search_seeds_from_files ( ) {
3379+ let request = TextSearchRequest :: from_query_str ( "repo:pointer polly" ) . unwrap ( ) ;
3380+ let sql = build_phase1_sql ( & request) ;
3381+
3382+ assert ! ( sql. contains( "FROM\n files f_seed" ) ) ;
3383+ assert ! ( sql. contains( "f_seed.repository = ANY(" ) ) ;
3384+ }
3385+
3386+ #[ test]
3387+ fn regex_repo_filtered_search_seeds_from_chunks ( ) {
3388+ let request =
3389+ TextSearchRequest :: from_query_str ( "repo:pointer regex:\" unsafe\\ \\ s*\\ \\ {\" " ) . unwrap ( ) ;
3390+ let sql = build_phase1_sql ( & request) ;
3391+
3392+ assert ! ( sql. contains( "FROM\n chunks c" ) ) ;
3393+ assert ! ( !sql. contains( "f_seed.repository = ANY(" ) ) ;
3394+ assert ! ( sql. contains( "files.repository = ANY(" ) ) ;
3395+ }
3396+
3397+ #[ test]
3398+ fn regex_search_uses_smaller_phase1_budgets ( ) {
3399+ let request = TextSearchRequest :: from_query_str ( "regex:\" foo.*bar\" " ) . unwrap ( ) ;
3400+ let budgets = compute_search_budgets ( & request) ;
3401+
3402+ assert_eq ! (
3403+ budgets,
3404+ SearchBudgets {
3405+ fetch_limit: 51 ,
3406+ file_limit: 51 ,
3407+ plan_row_limit: REGEX_PLAN_ROW_LIMIT ,
3408+ }
3409+ ) ;
3410+ }
3411+
3412+ #[ test]
3413+ fn plain_search_keeps_default_phase1_budgets ( ) {
3414+ let request = TextSearchRequest :: from_query_str ( "polly" ) . unwrap ( ) ;
3415+ let budgets = compute_search_budgets ( & request) ;
3416+
3417+ assert_eq ! (
3418+ budgets,
3419+ SearchBudgets {
3420+ fetch_limit: 151 ,
3421+ file_limit: 151 ,
3422+ plan_row_limit: DEFAULT_PLAN_ROW_LIMIT ,
3423+ }
3424+ ) ;
3425+ }
33283426}
33293427
33303428fn build_search_stats ( rows : & [ RankedFileRow ] ) -> SearchResultsStats {
0 commit comments