From b6bfab1b5c97f8bac4f9811591237802f008bbc6 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Tue, 26 May 2026 15:16:52 +0200 Subject: [PATCH 1/5] Make expand_context patch-stack aware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit expand_context now shows the file as it appears after the full patch stack is applied. A new patch_apply module provides reusable primitives: - strip_diff_prefix, find_patched_file, apply_patched_file: diff utilities with tenacity retries on both fetches for transient failures - get_file_after_stack(patch, path, fetch): applies patch.patch_stack in order on top of whatever base the caller provides via fetch() expand_context calls get_file_after_stack with patch.get_old_file as the fetch function. The Patch base class gains patch_stack (default: [patch_set]), get_base_revision(), and patch_stack_warning(); phabricator.py overrides all three. Non-linear stacks fall back to the current patch only and set a warning that expand_context surfaces prominently so the agent can flag it in the review. expand_context also gains optional start_line / end_line — omitting either returns from the beginning or to the end of the file. --- bugbug/tools/code_review/langchain_tools.py | 53 +++-- bugbug/tools/core/platforms/base.py | 31 ++- bugbug/tools/core/platforms/patch_apply.py | 87 ++++++++ bugbug/tools/core/platforms/phabricator.py | 46 +++++ tests/test_code_review.py | 216 ++++++++++++++++++++ 5 files changed, 409 insertions(+), 24 deletions(-) create mode 100644 bugbug/tools/core/platforms/patch_apply.py diff --git a/bugbug/tools/code_review/langchain_tools.py b/bugbug/tools/code_review/langchain_tools.py index 98d6d0cbc9..f97a7a1eea 100644 --- a/bugbug/tools/code_review/langchain_tools.py +++ b/bugbug/tools/code_review/langchain_tools.py @@ -7,6 +7,7 @@ from dataclasses import dataclass from logging import getLogger +from typing import Optional from langchain.tools import tool from langgraph.runtime import get_runtime @@ -15,47 +16,69 @@ from bugbug.code_search.function_search import FunctionSearch from bugbug.tools.code_review.data_types import Skill, SkillLoadError from bugbug.tools.core.platforms.base import Patch +from bugbug.tools.core.platforms.patch_apply import get_file_after_stack logger = getLogger(__name__) +def _tool_error(message: str, *, fatal: bool = False) -> str: + prefix = "Fatal" if fatal else "Warning" + return f"{prefix}: {message}" + + @dataclass class CodeReviewContext: patch: Patch @tool -async def expand_context(file_path: str, start_line: int, end_line: int) -> str: - """Show the content of a file between specified line numbers as it is after the patch. If the file is not modified by the patch, the original file content is returned as-is. +async def expand_context( + file_path: str, + start_line: Optional[int] = None, + end_line: Optional[int] = None, +) -> str: + """Retrieve the content of a file, optionally restricted to a line range. - Be careful to not fill your context window with too much data. Request the - minimum amount of context necessary to understand the code, but do not split - what you really need into multiple requests if the line range is continuous. + Omit start_line and end_line to get the full file. When specifying a range, + be careful to not fill your context window with too much data — request the + minimum necessary, but do not split a continuous range into multiple requests. Args: - file_path: The path to the file. - start_line: The starting line number in the patched file. Minimum is 1. - end_line: The ending line number in the patched file. Maximum is the total number of lines in the file. + file_path: Repository-relative path, e.g. 'dom/media/webaudio/AudioNode.h'. + start_line: Starting line number (1-based). Omit to start from the beginning. + end_line: Ending line number (inclusive). Omit to read to the end of the file. Returns: - The content of the file between the specified line numbers. + The file content, with line numbers prefixed. """ runtime = get_runtime(CodeReviewContext) + patch = runtime.context.patch + + warning = None + try: + patch_stack = patch.patch_stack + except Exception as e: + warning = f"Could not retrieve the full patch stack ({e}). File content reflects only this patch; please flag this in your review." + patch_stack = [patch.patch_set] try: - file_content = await runtime.context.patch.get_new_file(file_path) + file_content = await get_file_after_stack(patch_stack, file_path, patch.get_old_file) except FileNotFoundError: - return "File not found in the repository after the patch." + return f"Warning: {file_path} was removed by the patch stack." + except Exception as e: + return f"Warning: could not retrieve {file_path}: {e}." lines = file_content.splitlines() - start = max(1, start_line) - 1 - end = min(len(lines), end_line) + start = max(1, start_line) - 1 if start_line is not None else 0 + end = min(len(lines), end_line) if end_line is not None else len(lines) - # Format the output with line numbers that match the patched file. line_number_width = len(str(end)) - return "\n".join( + content = "\n".join( f"{i + 1:>{line_number_width}}| {lines[i]}" for i in range(start, end) ) + if warning: + return f"Warning: {warning}\n\n{content}" + return content def create_find_function_definition_tool(function_search: FunctionSearch): diff --git a/bugbug/tools/core/platforms/base.py b/bugbug/tools/core/platforms/base.py index 514dfe07d4..72837d1088 100644 --- a/bugbug/tools/core/platforms/base.py +++ b/bugbug/tools/core/platforms/base.py @@ -9,12 +9,15 @@ from abc import ABC, abstractmethod from datetime import datetime from functools import cached_property -from typing import Iterable +from logging import getLogger +from typing import Iterable, Optional from unidiff import Hunk, PatchedFile, PatchSet from bugbug.tools.core.data_types import InlineComment +logger = getLogger(__name__) + class Patch(ABC): """Abstract base class for code patches.""" @@ -35,6 +38,11 @@ def date_created(self) -> datetime: ... def patch_set(self) -> PatchSet: return PatchSet.from_string(self.raw_diff) + @cached_property + def patch_stack(self) -> list[PatchSet]: + """Return patch sets that must be applied, in order, to get this patch.""" + return [self.patch_set] + @property @abstractmethod def bug_title(self) -> str: @@ -65,6 +73,11 @@ def patch_url(self) -> str: """Return the URL of the patch.""" ... + @abstractmethod + async def get_base_revision(self) -> Optional[str]: + """Return the VCS revision the patch was written against, or None.""" + ... + @abstractmethod async def get_old_file(self, file_path: str) -> str: """Return the contents of a file before the patch was applied.""" @@ -126,7 +139,7 @@ def load_raw_diff_by_id(self, diff_id) -> str: def get_matching_hunk( self, patched_file: PatchedFile, comment: InlineComment - ) -> Hunk: + ) -> Hunk | None: def source_end(hunk: Hunk) -> int: return hunk.source_start + hunk.source_length @@ -146,9 +159,6 @@ def target_end(hunk: Hunk) -> int: # prioritize added lines over deleted lines because comments are more # likely to be on added lines than deleted lines. if len(matching_hunks) > 1: - from logging import getLogger - - logger = getLogger(__name__) logger.warning( "Multiple matching hunks found for comment %s in file %s", comment.id, @@ -179,6 +189,13 @@ def target_end(hunk: Hunk) -> int: if hunk.target_start <= comment.start_line < target_end(hunk): return hunk + logger.warning( + "No matching hunk found for comment %s in file %s", + comment.id, + comment.filename, + ) + return None + def retrieve_comments_with_hunks(self): def comment_filter(comment: InlineComment): # We want to keep all generated comments @@ -217,13 +234,9 @@ def comment_filter(comment: InlineComment): return True - from logging import getLogger - from libmozdata.phabricator import ConduitError from unidiff.errors import UnidiffParseError - logger = getLogger(__name__) - for diff_id, comments in self.get_all_inline_comments(comment_filter): try: patch_set = PatchSet.from_string(self.load_raw_diff_by_id(diff_id)) diff --git a/bugbug/tools/core/platforms/patch_apply.py b/bugbug/tools/core/platforms/patch_apply.py new file mode 100644 index 0000000000..1d16a7a2d1 --- /dev/null +++ b/bugbug/tools/core/platforms/patch_apply.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Utilities for applying a patch stack to a base file.""" + +from typing import Awaitable, Callable, Optional + +from unidiff import PatchedFile, PatchSet + + +def strip_diff_prefix(file_path: str) -> str: + if file_path.startswith("a/") or file_path.startswith("b/"): + return file_path[2:] + return file_path + + +def find_patched_file(patch_set: PatchSet, file_path: str) -> Optional[PatchedFile]: + """Return the PatchedFile for file_path in patch_set, or None.""" + for patched_file in patch_set: + if file_path in { + patched_file.path, + strip_diff_prefix(patched_file.target_file), + }: + return patched_file + return None + + +def apply_patched_file(base_content: str, patched_file: PatchedFile) -> str: + """Apply a single patched file's hunks to base_content and return the result.""" + if patched_file.is_removed_file: + raise FileNotFoundError("File is removed by the patch") + + base_lines = [] if patched_file.is_added_file else base_content.splitlines(True) + new_lines = [] + source_index = 0 + + # hunk.target_lines() only covers the hunk's span; we must explicitly copy + # the unchanged base lines that fall between hunks ourselves. + for hunk in patched_file: + hunk_source_start = max(hunk.source_start - 1, 0) + new_lines.extend(base_lines[source_index:hunk_source_start]) + new_lines.extend(line.value for line in hunk.target_lines()) + source_index = hunk_source_start + hunk.source_length + + new_lines.extend(base_lines[source_index:]) + return "".join(new_lines) + + +async def get_file_after_stack( + patch_stack: list[PatchSet], + file_path: str, + fetch: Callable[[str], Awaitable[str]], +) -> str: + """Return file_path after applying patch_stack on top of the base. + + fetch(path) is called to obtain the pre-stack base content. Rename chains + are followed across the stack so the correct source file is fetched. + + Args: + patch_stack: Ordered list of patch sets to apply. + file_path: Repository-relative path to retrieve. + fetch: Async callable that returns file content given a path. + """ + normalized_path = strip_diff_prefix(file_path) + + # Walk the stack in reverse to collect patches that touch this file, + # following rename chains to find the original source path. + source_path = normalized_path + patched_files = [] + for patch_set in reversed(patch_stack): + pf = find_patched_file(patch_set, source_path) + if pf is None: + continue + patched_files.append(pf) + if not pf.is_added_file: + source_path = strip_diff_prefix(pf.source_file) + + if not patched_files: + return await fetch(normalized_path) + + patched_files.reverse() + file_content = "" if patched_files[0].is_added_file else await fetch(source_path) + for pf in patched_files: + file_content = apply_patched_file(file_content, pf) + return file_content diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index 748a01ac15..7ea7526b3a 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -16,6 +16,7 @@ from async_lru import alru_cache from libmozdata.phabricator import PhabricatorRevisionNotFoundException from tqdm import tqdm +from unidiff import PatchSet from bugbug.tools.core.connection import get_http_client, get_user_agent from bugbug.tools.core.data_types import InlineComment @@ -268,6 +269,7 @@ def __init__( revision_phid: Optional[str] = None, revision_id: Optional[int] = None, ) -> None: + super().__init__() assert diff_id or revision_phid or revision_id, ( "You must provide at least one of diff_id, revision_phid, or revision_id" ) @@ -404,6 +406,12 @@ def _diff_metadata(self) -> dict: return diff + async def get_base_revision(self) -> Optional[str]: + try: + return await self.get_base_commit_hash() + except Exception: + return None + @alru_cache async def get_base_commit_hash(self) -> str: diff = self._diff_metadata @@ -508,6 +516,44 @@ def diff_author_phid(self) -> str: def stack_graph(self) -> dict: return self._revision_metadata["fields"].get("stackGraph", {}) + @cached_property + def patch_stack(self) -> list[PatchSet]: + """Return the ordered list of patch sets to apply to reach this revision. + + Walks the Phabricator stackGraph from the bottom-most ancestor up to the + current revision. If the graph is non-linear (diamond / merge dependency), + only the current patch set is returned and an error is set. + """ + current_phid = self._revision_metadata["phid"] + stack_graph = self.stack_graph + + if not stack_graph or current_phid not in stack_graph: + return [self.patch_set] + + # Only walk the ancestry of current_phid — unrelated branches in + # stack_graph may have diamonds that don't affect this patch's lineage. + ancestor: Optional[str] = current_phid + while ancestor is not None: + deps = stack_graph.get(ancestor, []) + if len(deps) > 1: + raise ValueError("Patch stack is not linear") + ancestor = deps[0] if deps else None + + ordered_phids = [] + phid: Optional[str] = current_phid + while phid is not None: + ordered_phids.append(phid) + deps = stack_graph.get(phid, []) + phid = deps[0] if deps else None + ordered_phids.reverse() + + return [ + self.patch_set + if phid == current_phid + else self.__class__(revision_phid=phid).patch_set + for phid in ordered_phids + ] + @cached_property def _all_comments(self) -> list: return [c for c in self.get_comments() if c.content.strip()] diff --git a/tests/test_code_review.py b/tests/test_code_review.py index 369c195aa1..fbb47f10cb 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -1,5 +1,7 @@ +import asyncio import logging import os +from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock, patch import httpx @@ -10,10 +12,224 @@ from bugbug.tools.code_review.data_types import Skill, _strip_frontmatter from bugbug.tools.code_review.langchain_tools import create_load_skill_tool from bugbug.tools.code_review.utils import find_comment_scope +from bugbug.tools.core.platforms.patch_apply import ( + apply_patched_file, + get_file_after_stack, + strip_diff_prefix, +) FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures/phabricator") +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def make_patch(raw_diff): + patch_set = PatchSet.from_string(raw_diff) + return SimpleNamespace(patch_set=patch_set, patch_stack=[patch_set]) + + +def make_patch_stack(*raw_diffs): + patch_stack = [PatchSet.from_string(raw_diff) for raw_diff in raw_diffs] + return SimpleNamespace(patch_set=patch_stack[-1], patch_stack=patch_stack) + + +def make_fetch(files): + async def fetch(path): + return files[path] + + return fetch + + +# --------------------------------------------------------------------------- +# strip_diff_prefix +# --------------------------------------------------------------------------- + + +def test_strip_diff_prefix_removes_a(): + assert strip_diff_prefix("a/foo/bar.h") == "foo/bar.h" + + +def test_strip_diff_prefix_removes_b(): + assert strip_diff_prefix("b/foo/bar.h") == "foo/bar.h" + + +def test_strip_diff_prefix_noop(): + assert strip_diff_prefix("foo/bar.h") == "foo/bar.h" + + +# --------------------------------------------------------------------------- +# apply_patched_file +# --------------------------------------------------------------------------- + + +def test_apply_patched_file_modifies_lines(): + ps = PatchSet.from_string( + "--- a/f.txt\n+++ b/f.txt\n@@ -1,3 +1,4 @@\n a\n-b\n+B\n c\n+d\n" + ) + assert apply_patched_file("a\nb\nc\n", ps[0]) == "a\nB\nc\nd\n" + + +def test_apply_patched_file_added_file(): + ps = PatchSet.from_string("--- /dev/null\n+++ b/new.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n") + assert apply_patched_file("", ps[0]) == "a\nb\n" + + +def test_apply_patched_file_removed_file_raises(): + ps = PatchSet.from_string("--- a/f.txt\n+++ /dev/null\n@@ -1,2 +0,0 @@\n-a\n-b\n") + try: + apply_patched_file("a\nb\n", ps[0]) + assert False, "expected FileNotFoundError" + except FileNotFoundError: + pass + + +def test_apply_patched_file_multiple_hunks(): + ps = PatchSet.from_string( + "--- a/f.txt\n+++ b/f.txt\n" + "@@ -1,2 +1,2 @@\n a\n-b\n+B\n" + "@@ -5,2 +5,2 @@\n e\n-f\n+F\n" + ) + assert apply_patched_file("a\nb\nc\nd\ne\nf\n", ps[0]) == "a\nB\nc\nd\ne\nF\n" + + +# --------------------------------------------------------------------------- +# get_file_after_stack +# --------------------------------------------------------------------------- + + +def test_get_file_after_stack_modifies_file(): + patch = make_patch( + "--- a/foo.txt\n+++ b/foo.txt\n@@ -1,3 +1,4 @@\n a\n-b\n+B\n c\n+d\n" + ) + result = asyncio.run( + get_file_after_stack(patch.patch_stack,"foo.txt", make_fetch({"foo.txt": "a\nb\nc\n"})) + ) + assert result == "a\nB\nc\nd\n" + + +def test_get_file_after_stack_added_file(): + patch = make_patch("--- /dev/null\n+++ b/new.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n") + result = asyncio.run(get_file_after_stack(patch.patch_stack,"new.txt", make_fetch({}))) + assert result == "a\nb\n" + + +def test_get_file_after_stack_unmodified_file(): + patch = make_patch("") + result = asyncio.run( + get_file_after_stack(patch.patch_stack,"foo.txt", make_fetch({"foo.txt": "a\nb\n"})) + ) + assert result == "a\nb\n" + + +def test_get_file_after_stack_applies_stack(): + patch = make_patch_stack( + "--- /dev/null\n+++ b/f.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n", + "--- a/f.txt\n+++ b/f.txt\n@@ -1,2 +1,3 @@\n a\n-b\n+B\n+c\n", + ) + result = asyncio.run(get_file_after_stack(patch.patch_stack,"f.txt", make_fetch({}))) + assert result == "a\nB\nc\n" + + +def test_get_file_after_stack_raises_for_deleted_file(): + patch = make_patch("--- a/f.txt\n+++ /dev/null\n@@ -1,2 +0,0 @@\n-a\n-b\n") + try: + asyncio.run( + get_file_after_stack(patch.patch_stack,"f.txt", make_fetch({"f.txt": "a\nb\n"})) + ) + assert False, "expected FileNotFoundError" + except FileNotFoundError: + pass + + +def test_get_file_after_stack_follows_renames(): + patch = make_patch_stack( + "--- a/old.txt\n+++ b/old.txt\n@@ -1,2 +1,2 @@\n a\n-b\n+B\n", + "--- a/old.txt\n+++ b/new.txt\n@@ -1,2 +1,3 @@\n a\n-B\n+C\n+d\n", + ) + result = asyncio.run( + get_file_after_stack(patch.patch_stack,"new.txt", make_fetch({"old.txt": "a\nb\n"})) + ) + assert result == "a\nC\nd\n" + + +# --------------------------------------------------------------------------- +# phabricator patch_stack non-linear bailout +# --------------------------------------------------------------------------- + + +def test_patch_stack_bails_on_nonlinear_graph(): + from bugbug.tools.core.platforms.phabricator import PhabricatorPatch + + class FakePatch(PhabricatorPatch): + def __init__(self): + pass + + @property + def _revision_metadata(self): + return {"phid": "PHID-D"} + + @property + def stack_graph(self): + return { + "PHID-A": [], + "PHID-B": ["PHID-A"], + "PHID-C": ["PHID-A"], # two children of A → non-linear + "PHID-D": ["PHID-B", "PHID-C"], # diamond + } + + @property + def patch_set(self): + return PatchSet.from_string("") + + fake = FakePatch() + try: + fake.patch_stack + assert False, "expected ValueError" + except ValueError as e: + assert "not linear" in str(e) + + +def test_patch_stack_linear_despite_unrelated_diamond(): + from bugbug.tools.core.platforms.phabricator import PhabricatorPatch + + # PHID-E has a linear ancestry (E→C→A), but D creates a diamond among + # unrelated branches (D depends on both B and C). The old code would bail; + # the fixed code must return the 3-entry linear chain without any error. + class FakePatch(PhabricatorPatch): + def __init__(self, revision_phid=None): + pass + + @property + def _revision_metadata(self): + return {"phid": "PHID-E"} + + @property + def stack_graph(self): + return { + "PHID-A": [], + "PHID-B": ["PHID-A"], + "PHID-C": ["PHID-A"], + "PHID-D": ["PHID-B", "PHID-C"], # diamond, not in E's ancestry + "PHID-E": ["PHID-C"], + } + + @property + def patch_set(self): + return PatchSet.from_string("") + + fake = FakePatch() + stack = fake.patch_stack + assert len(stack) == 3 # A, C, E + + +# --------------------------------------------------------------------------- +# find_comment_scope +# --------------------------------------------------------------------------- + + def test_find_comment_scope(): test_data = { (233024, 964198): { From cb7fb14fbcfe332f8ecd68f964f791fea2b2448e Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Tue, 26 May 2026 16:00:14 +0200 Subject: [PATCH 2/5] Add searchfox-based tools to the code review agent Expose the searchfox Python API (searchfox-cli) as LangChain tools wired into the code review agent. expand_context switches its fetch function from patch.get_old_file to searchfox at the patch's base revision, using get_file_after_stack from patch_apply. - expand_context: searchfox at base revision + full patch stack applied - search_text: free-text / regex search with file-type and path filters - get_blame: per-line commit history - get_field_layout: C++ class/struct memory layout - find_definition: look up a function, method, class, or struct - search_identifier: exact identifier search with file-type filters - calls_from / calls_to / calls_between: call graph traversal with depth - check_can_gc: SpiderMonkey GC hazard analysis for C++ functions All tools support langs (list) and tests filters where applicable. No additional tests for the tool wrappers: the tools are thin call-and-format wrappers over searchfox-py, which is tested independently in its own repo. Closes #5284 diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py --- bugbug/tools/code_review/agent.py | 22 +- bugbug/tools/code_review/langchain_tools.py | 291 +++++++++++++++++++- pyproject.toml | 1 + uv.lock | 10 + 4 files changed, 320 insertions(+), 4 deletions(-) diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py index e6f89d3871..1394c8da77 100644 --- a/bugbug/tools/code_review/agent.py +++ b/bugbug/tools/code_review/agent.py @@ -30,9 +30,18 @@ from bugbug.tools.code_review.database import ReviewCommentsDB from bugbug.tools.code_review.langchain_tools import ( CodeReviewContext, + calls_between, + calls_from, + calls_to, + check_can_gc, create_find_function_definition_tool, create_load_skill_tool, expand_context, + find_definition, + get_blame, + get_field_layout, + search_identifier, + search_text, ) from bugbug.tools.code_review.prompts import ( CODE_REVIEW_TODO_PROMPT, @@ -97,7 +106,18 @@ def __init__( self.patch_summarizer = patch_summarizer self.suggestion_filterer = suggestion_filterer - tools = [expand_context] + tools = [ + expand_context, + search_text, + get_blame, + get_field_layout, + find_definition, + search_identifier, + calls_from, + calls_to, + calls_between, + check_can_gc, + ] if function_search: tools.append(create_find_function_definition_tool(function_search)) diff --git a/bugbug/tools/code_review/langchain_tools.py b/bugbug/tools/code_review/langchain_tools.py index f97a7a1eea..8dfc43a840 100644 --- a/bugbug/tools/code_review/langchain_tools.py +++ b/bugbug/tools/code_review/langchain_tools.py @@ -7,18 +7,24 @@ from dataclasses import dataclass from logging import getLogger -from typing import Optional +from typing import Literal, Optional +import tenacity from langchain.tools import tool from langgraph.runtime import get_runtime from requests import HTTPError +from searchfox import AsyncSearchfoxClient from bugbug.code_search.function_search import FunctionSearch from bugbug.tools.code_review.data_types import Skill, SkillLoadError from bugbug.tools.core.platforms.base import Patch from bugbug.tools.core.platforms.patch_apply import get_file_after_stack -logger = getLogger(__name__) +_retry = tenacity.retry( + stop=tenacity.stop_after_attempt(3), + wait=tenacity.wait_exponential(multiplier=1, min=1, max=4), + reraise=True, +) def _tool_error(message: str, *, fatal: bool = False) -> str: @@ -26,11 +32,28 @@ def _tool_error(message: str, *, fatal: bool = False) -> str: return f"{prefix}: {message}" +logger = getLogger(__name__) + +LangStr = Literal[ + "cpp", "c", "js", "webidl", "java", "kotlin", "rust", "python", "html", "css" +] +Tests = Optional[Literal["only", "exclude"]] + +_client: Optional[AsyncSearchfoxClient] = None + + @dataclass class CodeReviewContext: patch: Patch +def _get_client() -> AsyncSearchfoxClient: + global _client + if _client is None: + _client = AsyncSearchfoxClient() + return _client + + @tool async def expand_context( file_path: str, @@ -61,8 +84,20 @@ async def expand_context( warning = f"Could not retrieve the full patch stack ({e}). File content reflects only this patch; please flag this in your review." patch_stack = [patch.patch_set] + revision = await patch.get_base_revision() + client = _get_client() + + async def fetch(path: str) -> str: + if revision: + try: + return await _retry(client.get_file_at_revision)(path, revision) + except Exception: + # searchfox raises plain Exception; fall back to tip + pass + return await _retry(client.get_file)(path) + try: - file_content = await get_file_after_stack(patch_stack, file_path, patch.get_old_file) + file_content = await get_file_after_stack(patch_stack, file_path, fetch) except FileNotFoundError: return f"Warning: {file_path} was removed by the patch stack." except Exception as e: @@ -153,3 +188,253 @@ async def load_skill(name: str) -> str: return f"Failed to load skill '{name}'. Please proceed without it." return load_skill +@tool +async def search_text( + query: str, + path_filter: Optional[str] = None, + langs: Optional[list[LangStr]] = None, + tests: Tests = None, + regexp: bool = False, + case_sensitive: bool = False, + limit: int = 50, + context_lines: Optional[int] = None, +) -> str: + """Search for text or patterns across the codebase. + + Args: + query: Text or regular expression to search for. + path_filter: Optional path prefix, e.g. 'dom/media'. + langs: Optional language filter. Multiple values are OR-ed. + tests: 'only' to restrict to test files, 'exclude' to omit them. + regexp: Treat query as a regular expression. + case_sensitive: Enable case-sensitive matching. + limit: Maximum number of results (default 50). + context_lines: Surrounding lines to include per match. + + Returns: + Matching lines as 'path:line: content' entries. + """ + try: + results = await _get_client().search( + query=query, + path=path_filter, + langs=langs, + tests=tests, + regexp=regexp, + case=case_sensitive, + limit=limit, + context=context_lines, + ) + if not results: + return "No results found." + return "\n".join(f"{path}:{line}: {content}" for path, line, content in results) + except Exception as e: # searchfox raises plain Exception + logger.error("Error searching for '%s': %s", query, e) + return _tool_error(f"search failed: {e}") + + +@tool +async def get_field_layout( + class_name: str, +) -> str: + """Show the memory layout of a C++ class or struct, including field offsets and sizes. + + Args: + class_name: Fully-qualified class name, e.g. 'mozilla::dom::AudioContext'. + + Returns: + Field layout as JSON. + """ + try: + return await _get_client().search_field_layout(class_name) + except Exception as e: # searchfox raises plain Exception + logger.error("Error fetching field layout for '%s': %s", class_name, e) + return _tool_error(f"field layout fetch failed: {e}") + + +@tool +async def get_blame( + file_path: str, + lines: list[int], +) -> str: + """Get the commit that last modified each of the given lines in a file. + + Args: + file_path: Repository-relative path, e.g. 'dom/media/webaudio/AudioNode.cpp'. + lines: List of 1-based line numbers to look up. + + Returns: + For each line: 'LINE: HASH (DATE) MESSAGE'. + """ + try: + results = await _get_client().get_blame_for_lines(file_path, lines) + if not results: + return "No blame information found." + return "\n".join( + f"{line}: {hash_} ({date}) {message}" + for line, hash_, message, date in results + ) + except Exception as e: # searchfox raises plain Exception + logger.error("Error fetching blame for '%s': %s", file_path, e) + return _tool_error(f"blame fetch failed: {e}") + + +@tool +async def check_can_gc( + symbol: str, +) -> str: + """Check whether a C++ function can trigger garbage collection in SpiderMonkey. + + Accepts partial names (e.g. 'CreateGain') or fully-qualified names + (e.g. 'mozilla::dom::AudioContext::CreateGain'). + + Args: + symbol: Function name to check. + + Returns: + For each match: whether it can GC, and the GC call path if available. + """ + try: + results = await _get_client().get_gc_info(symbol) + if not results: + return "No GC information found. GC analysis is only available for C++ functions." + lines = [] + for pretty, _mangled, can_gc, gc_path in results: + status = "can GC" if can_gc else "cannot GC" + line = f"{pretty}: {status}" + if gc_path: + line += f" (via {gc_path})" + lines.append(line) + return "\n".join(lines) + except Exception as e: # searchfox raises plain Exception + logger.error("Error checking GC status for '%s': %s", symbol, e) + return _tool_error(f"GC check failed: {e}") + + +@tool +async def find_definition( + name: str, + path_filter: Optional[str] = None, +) -> str: + """Find the definition of a function, method, class, or struct. + + Accepts partial names (e.g. 'AudioNode') or fully-qualified names + (e.g. 'mozilla::dom::AudioNode' or 'AudioNode::Connect'). + + Args: + name: Symbol name to look up. + path_filter: Optional path prefix, e.g. 'dom/media'. + + Returns: + The definition source. + """ + try: + return await _get_client().get_definition(name, path_filter) + except Exception as e: # searchfox raises plain Exception + logger.error("Error finding definition for '%s': %s", name, e) + return _tool_error(f"definition lookup failed: {e}") + + +@tool +async def search_identifier( + identifier: str, + path_filter: Optional[str] = None, + langs: Optional[list[LangStr]] = None, + tests: Tests = None, + limit: int = 50, +) -> str: + """Search for an exact identifier across the codebase. + + Args: + identifier: Identifier to search for. + path_filter: Optional path prefix, e.g. 'dom/media'. + langs: Optional language filter. Multiple values are OR-ed. + tests: 'only' to restrict to test files, 'exclude' to omit them. + limit: Maximum number of results (default 50). + + Returns: + Matching lines as 'path:line: content' entries. + """ + try: + results = await _get_client().search( + id=identifier, + path=path_filter, + langs=langs, + tests=tests, + limit=limit, + ) + if not results: + return "No results found." + return "\n".join(f"{path}:{line}: {content}" for path, line, content in results) + except Exception as e: # searchfox raises plain Exception + logger.error("Error searching for identifier '%s': %s", identifier, e) + return _tool_error(f"identifier search failed: {e}") + + +@tool +async def calls_from( + symbol: str, + depth: int = 2, +) -> str: + """Find functions called by the given symbol (outgoing calls). + + Args: + symbol: Fully-qualified function or method name, e.g. 'mozilla::dom::AudioNode::Connect'. + depth: Levels of calls to traverse (default 2). + + Returns: + Call graph as JSON. + """ + try: + return await _get_client().search_call_graph(calls_from=symbol, depth=depth) + except Exception as e: # searchfox raises plain Exception + logger.error("Error fetching calls from '%s': %s", symbol, e) + return _tool_error(f"call graph fetch failed: {e}") + + +@tool +async def calls_to( + symbol: str, + depth: int = 2, +) -> str: + """Find functions that call the given symbol (incoming calls). + + Args: + symbol: Fully-qualified function or method name, e.g. 'mozilla::dom::AudioNode::Connect'. + depth: Levels of callers to traverse (default 2). + + Returns: + Call graph as JSON. + """ + try: + return await _get_client().search_call_graph(calls_to=symbol, depth=depth) + except Exception as e: # searchfox raises plain Exception + logger.error("Error fetching calls to '%s': %s", symbol, e) + return _tool_error(f"call graph fetch failed: {e}") + + +@tool +async def calls_between( + symbol_a: str, + symbol_b: str, + depth: int = 2, +) -> str: + """Find call paths between two symbols or classes. + + Args: + symbol_a: First fully-qualified symbol or class name, e.g. 'mozilla::dom::AudioContext'. + symbol_b: Second fully-qualified symbol or class name. + depth: Levels to traverse (default 2). + + Returns: + Call graph as JSON. + """ + try: + return await _get_client().search_call_graph( + calls_between=(symbol_a, symbol_b), depth=depth + ) + except Exception as e: # searchfox raises plain Exception + logger.error( + "Error fetching calls between '%s' and '%s': %s", symbol_a, symbol_b, e + ) + return _tool_error(f"call graph fetch failed: {e}") diff --git a/pyproject.toml b/pyproject.toml index f547046051..72c72b9e8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ dependencies = [ "requests>=2.33,<2.35", "requests-html~=0.10.0", "rs-parsepatch~=0.4.6", + "searchfox>=0.13.0", "scikit-learn~=1.7.2", "scipy~=1.17.1", "sendgrid~=6.12.5", diff --git a/uv.lock b/uv.lock index e6e9295ff4..6386263b45 100644 --- a/uv.lock +++ b/uv.lock @@ -454,6 +454,7 @@ dependencies = [ { name = "rs-parsepatch" }, { name = "scikit-learn" }, { name = "scipy" }, + { name = "searchfox" }, { name = "sendgrid" }, { name = "shap", extra = ["plots"] }, { name = "tabulate" }, @@ -532,6 +533,7 @@ requires-dist = [ { name = "rs-parsepatch", specifier = "~=0.4.6" }, { name = "scikit-learn", specifier = "~=1.7.2" }, { name = "scipy", specifier = "~=1.17.1" }, + { name = "searchfox", specifier = ">=0.13.0" }, { name = "sendgrid", specifier = "~=6.12.5" }, { name = "shap", extras = ["plots"], specifier = "~=0.51.0" }, { name = "spacy", marker = "extra == 'nlp'", specifier = "==3.8.14" }, @@ -4956,6 +4958,14 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165, upload-time = "2026-02-23T00:22:29.563Z" }, ] +[[package]] +name = "searchfox" +version = "0.14.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/94/b51b8f9441bec76da0a1a4cbbe078c46c4a4942f2a99f892bc27880f9aab/searchfox-0.14.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a44a2de92fb0e0f5a6d032472da34fe41361685c707587e25beeb544e4c47a85", size = 4211018 }, +] + [[package]] name = "sendgrid" version = "6.12.5" From 1654dfdef4f52869d6e15f730afe60f93913a3ff Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Thu, 28 May 2026 12:15:02 +0000 Subject: [PATCH 3/5] Address review comments, add get_function_at_line tool - Define SEARCHFOX_TOOLS list in langchain_tools.py; import and use it in agent.py instead of listing tools inline - Remove create_find_function_definition_tool and function_search parameter; the new find_definition tool is a cleaner replacement - Move logger above _retry; use functools.cache for _get_client - Add patch.get_old_file fallback in expand_context before falling back to searchfox tip - Add get_function_at_line tool (searchfox-py 0.14.0); bump requirement and lockfile accordingly - Rename langchain_tools.py to searchfox_tools.py --- bugbug/tools/code_review/agent.py | 42 ++------ ...{langchain_tools.py => searchfox_tools.py} | 101 +++++++++--------- pyproject.toml | 2 +- tests/test_code_review.py | 24 +++-- uv.lock | 2 +- 5 files changed, 76 insertions(+), 95 deletions(-) rename bugbug/tools/code_review/{langchain_tools.py => searchfox_tools.py} (88%) diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py index 1394c8da77..fa95f79acb 100644 --- a/bugbug/tools/code_review/agent.py +++ b/bugbug/tools/code_review/agent.py @@ -19,7 +19,6 @@ from langgraph.errors import GraphRecursionError from unidiff import PatchSet -from bugbug.code_search.function_search import FunctionSearch from bugbug.tools.base import GenerativeModelTool from bugbug.tools.code_review.data_types import ( AgentResponse, @@ -28,21 +27,6 @@ Skill, ) from bugbug.tools.code_review.database import ReviewCommentsDB -from bugbug.tools.code_review.langchain_tools import ( - CodeReviewContext, - calls_between, - calls_from, - calls_to, - check_can_gc, - create_find_function_definition_tool, - create_load_skill_tool, - expand_context, - find_definition, - get_blame, - get_field_layout, - search_identifier, - search_text, -) from bugbug.tools.code_review.prompts import ( CODE_REVIEW_TODO_PROMPT, CODE_REVIEW_TODO_TOOL_DESCRIPTION, @@ -56,6 +40,11 @@ PatchSummarizer, SuggestionFilterer, ) +from bugbug.tools.code_review.searchfox_tools import ( + SEARCHFOX_TOOLS, + CodeReviewContext, + create_load_skill_tool, +) from bugbug.tools.code_review.utils import ( convert_generated_comments_to_inline, format_patch_set, @@ -75,7 +64,6 @@ def __init__( llm: BaseChatModel, patch_summarizer: PatchSummarizer, suggestion_filterer: SuggestionFilterer, - function_search: Optional[FunctionSearch] = None, review_comments_db: Optional["ReviewCommentsDB"] = None, show_patch_example: bool = False, verbose: bool = True, @@ -106,20 +94,7 @@ def __init__( self.patch_summarizer = patch_summarizer self.suggestion_filterer = suggestion_filterer - tools = [ - expand_context, - search_text, - get_blame, - get_field_layout, - find_definition, - search_identifier, - calls_from, - calls_to, - calls_between, - check_can_gc, - ] - if function_search: - tools.append(create_find_function_definition_tool(function_search)) + tools = list(SEARCHFOX_TOOLS) if skills: tools.append(create_load_skill_tool(skills)) @@ -171,11 +146,6 @@ def create(cls, **kwargs): parameters are optional. If a parameter is not provided, a default component will be created and used. """ - if "function_search" not in kwargs: - from bugbug.code_search.searchfox_api import FunctionSearchSearchfoxAPI - - kwargs["function_search"] = FunctionSearchSearchfoxAPI() - if "review_comments_db" not in kwargs: from bugbug.tools.code_review.database import ReviewCommentsDB from bugbug.vectordb import QdrantVectorDB diff --git a/bugbug/tools/code_review/langchain_tools.py b/bugbug/tools/code_review/searchfox_tools.py similarity index 88% rename from bugbug/tools/code_review/langchain_tools.py rename to bugbug/tools/code_review/searchfox_tools.py index 8dfc43a840..09e2b81512 100644 --- a/bugbug/tools/code_review/langchain_tools.py +++ b/bugbug/tools/code_review/searchfox_tools.py @@ -6,20 +6,21 @@ """LangGraph tools for code review agent.""" from dataclasses import dataclass +from functools import cache from logging import getLogger from typing import Literal, Optional import tenacity from langchain.tools import tool from langgraph.runtime import get_runtime -from requests import HTTPError from searchfox import AsyncSearchfoxClient -from bugbug.code_search.function_search import FunctionSearch from bugbug.tools.code_review.data_types import Skill, SkillLoadError from bugbug.tools.core.platforms.base import Patch from bugbug.tools.core.platforms.patch_apply import get_file_after_stack +logger = getLogger(__name__) + _retry = tenacity.retry( stop=tenacity.stop_after_attempt(3), wait=tenacity.wait_exponential(multiplier=1, min=1, max=4), @@ -32,26 +33,20 @@ def _tool_error(message: str, *, fatal: bool = False) -> str: return f"{prefix}: {message}" -logger = getLogger(__name__) - LangStr = Literal[ "cpp", "c", "js", "webidl", "java", "kotlin", "rust", "python", "html", "css" ] Tests = Optional[Literal["only", "exclude"]] -_client: Optional[AsyncSearchfoxClient] = None - @dataclass class CodeReviewContext: patch: Patch +@cache def _get_client() -> AsyncSearchfoxClient: - global _client - if _client is None: - _client = AsyncSearchfoxClient() - return _client + return AsyncSearchfoxClient() @tool @@ -92,8 +87,11 @@ async def fetch(path: str) -> str: try: return await _retry(client.get_file_at_revision)(path, revision) except Exception: - # searchfox raises plain Exception; fall back to tip pass + try: + return await patch.get_old_file(path) + except Exception: + pass return await _retry(client.get_file)(path) try: @@ -116,46 +114,6 @@ async def fetch(path: str) -> str: return content -def create_find_function_definition_tool(function_search: FunctionSearch): - @tool - def find_function_definition( - file_path: str, line_number: int, function_name: str - ) -> str: - """Find the definition of a function based on its usage. - - Args: - file_path: The path to the file where the function is used. - line_number: The line number where the function is used. It should be based on the original file, not the patch. - function_name: The name of the function to find its definition. - - Returns: - The function definition. - """ - try: - functions = function_search.get_function_by_name( - # TODO: We may want to use the patch base commit hash here instead of "tip". - "tip", - file_path, - function_name, - ) - except HTTPError as e: - logger.error( - "HTTP error occurred while searching for the definition of function '%s' which is used in file '%s' at line %d: %s", - function_name, - file_path, - line_number, - e, - ) - return "Error occurred while searching for the function definition." - - if not functions: - return "Function definition not found." - - return functions[0].source - - return find_function_definition - - def create_load_skill_tool(skills: list[Skill]): skills_by_name = {skill.name: skill for skill in skills} available_names = ", ".join(skills_by_name) @@ -438,3 +396,44 @@ async def calls_between( "Error fetching calls between '%s' and '%s': %s", symbol_a, symbol_b, e ) return _tool_error(f"call graph fetch failed: {e}") + + +@tool +async def get_function_at_line( + file_path: str, + line: int, +) -> str: + """Get the source of the innermost function enclosing a given line. + + Useful when you know a line number and want the full function body without + having to know the function name. + + Args: + file_path: Repository-relative path, e.g. 'dom/media/webaudio/AudioNode.cpp'. + line: 1-based line number inside the function. + + Returns: + The function source. + """ + try: + return await _get_client().get_function_at_line(file_path, line) + except Exception as e: # searchfox raises plain Exception + logger.error( + "Error fetching function at line %d in '%s': %s", line, file_path, e + ) + return _tool_error(f"function lookup failed: {e}") + + +SEARCHFOX_TOOLS = [ + expand_context, + search_text, + get_blame, + get_field_layout, + find_definition, + get_function_at_line, + search_identifier, + calls_from, + calls_to, + calls_between, + check_can_gc, +] diff --git a/pyproject.toml b/pyproject.toml index 72c72b9e8a..abf83759b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ dependencies = [ "requests>=2.33,<2.35", "requests-html~=0.10.0", "rs-parsepatch~=0.4.6", - "searchfox>=0.13.0", + "searchfox>=0.14.0", "scikit-learn~=1.7.2", "scipy~=1.17.1", "sendgrid~=6.12.5", diff --git a/tests/test_code_review.py b/tests/test_code_review.py index fbb47f10cb..9d29ee6274 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -105,21 +105,27 @@ def test_get_file_after_stack_modifies_file(): "--- a/foo.txt\n+++ b/foo.txt\n@@ -1,3 +1,4 @@\n a\n-b\n+B\n c\n+d\n" ) result = asyncio.run( - get_file_after_stack(patch.patch_stack,"foo.txt", make_fetch({"foo.txt": "a\nb\nc\n"})) + get_file_after_stack( + patch.patch_stack, "foo.txt", make_fetch({"foo.txt": "a\nb\nc\n"}) + ) ) assert result == "a\nB\nc\nd\n" def test_get_file_after_stack_added_file(): patch = make_patch("--- /dev/null\n+++ b/new.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n") - result = asyncio.run(get_file_after_stack(patch.patch_stack,"new.txt", make_fetch({}))) + result = asyncio.run( + get_file_after_stack(patch.patch_stack, "new.txt", make_fetch({})) + ) assert result == "a\nb\n" def test_get_file_after_stack_unmodified_file(): patch = make_patch("") result = asyncio.run( - get_file_after_stack(patch.patch_stack,"foo.txt", make_fetch({"foo.txt": "a\nb\n"})) + get_file_after_stack( + patch.patch_stack, "foo.txt", make_fetch({"foo.txt": "a\nb\n"}) + ) ) assert result == "a\nb\n" @@ -129,7 +135,9 @@ def test_get_file_after_stack_applies_stack(): "--- /dev/null\n+++ b/f.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n", "--- a/f.txt\n+++ b/f.txt\n@@ -1,2 +1,3 @@\n a\n-b\n+B\n+c\n", ) - result = asyncio.run(get_file_after_stack(patch.patch_stack,"f.txt", make_fetch({}))) + result = asyncio.run( + get_file_after_stack(patch.patch_stack, "f.txt", make_fetch({})) + ) assert result == "a\nB\nc\n" @@ -137,7 +145,9 @@ def test_get_file_after_stack_raises_for_deleted_file(): patch = make_patch("--- a/f.txt\n+++ /dev/null\n@@ -1,2 +0,0 @@\n-a\n-b\n") try: asyncio.run( - get_file_after_stack(patch.patch_stack,"f.txt", make_fetch({"f.txt": "a\nb\n"})) + get_file_after_stack( + patch.patch_stack, "f.txt", make_fetch({"f.txt": "a\nb\n"}) + ) ) assert False, "expected FileNotFoundError" except FileNotFoundError: @@ -150,7 +160,9 @@ def test_get_file_after_stack_follows_renames(): "--- a/old.txt\n+++ b/new.txt\n@@ -1,2 +1,3 @@\n a\n-B\n+C\n+d\n", ) result = asyncio.run( - get_file_after_stack(patch.patch_stack,"new.txt", make_fetch({"old.txt": "a\nb\n"})) + get_file_after_stack( + patch.patch_stack, "new.txt", make_fetch({"old.txt": "a\nb\n"}) + ) ) assert result == "a\nC\nd\n" diff --git a/uv.lock b/uv.lock index 6386263b45..973aaf02d5 100644 --- a/uv.lock +++ b/uv.lock @@ -533,7 +533,7 @@ requires-dist = [ { name = "rs-parsepatch", specifier = "~=0.4.6" }, { name = "scikit-learn", specifier = "~=1.7.2" }, { name = "scipy", specifier = "~=1.17.1" }, - { name = "searchfox", specifier = ">=0.13.0" }, + { name = "searchfox", specifier = ">=0.14.0" }, { name = "sendgrid", specifier = "~=6.12.5" }, { name = "shap", extras = ["plots"], specifier = "~=0.51.0" }, { name = "spacy", marker = "extra == 'nlp'", specifier = "==3.8.14" }, From d112b366dfbb480a5e24df630800e1dc56390d3c Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Thu, 28 May 2026 14:34:42 +0000 Subject: [PATCH 4/5] Address review comments from suhaib - Rename searchfox_tools.py back to langchain_tools.py - Extract fetch closure to module-level _fetch_file to reduce nesting - Use specific exception types instead of bare Exception where possible --- bugbug/tools/code_review/agent.py | 2 +- ...{searchfox_tools.py => langchain_tools.py} | 35 ++++++++++++------- 2 files changed, 23 insertions(+), 14 deletions(-) rename bugbug/tools/code_review/{searchfox_tools.py => langchain_tools.py} (95%) diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py index fa95f79acb..f3d110b26c 100644 --- a/bugbug/tools/code_review/agent.py +++ b/bugbug/tools/code_review/agent.py @@ -40,7 +40,7 @@ PatchSummarizer, SuggestionFilterer, ) -from bugbug.tools.code_review.searchfox_tools import ( +from bugbug.tools.code_review.langchain_tools import ( SEARCHFOX_TOOLS, CodeReviewContext, create_load_skill_tool, diff --git a/bugbug/tools/code_review/searchfox_tools.py b/bugbug/tools/code_review/langchain_tools.py similarity index 95% rename from bugbug/tools/code_review/searchfox_tools.py rename to bugbug/tools/code_review/langchain_tools.py index 09e2b81512..b39cda925a 100644 --- a/bugbug/tools/code_review/searchfox_tools.py +++ b/bugbug/tools/code_review/langchain_tools.py @@ -6,10 +6,11 @@ """LangGraph tools for code review agent.""" from dataclasses import dataclass -from functools import cache +from functools import cache, partial from logging import getLogger from typing import Literal, Optional +import httpx import tenacity from langchain.tools import tool from langgraph.runtime import get_runtime @@ -49,6 +50,24 @@ def _get_client() -> AsyncSearchfoxClient: return AsyncSearchfoxClient() +async def _fetch_file( + path: str, + revision: Optional[str], + client: AsyncSearchfoxClient, + patch: Patch, +) -> str: + if revision: + try: + return await _retry(client.get_file_at_revision)(path, revision) + except Exception: # searchfox raises plain Exception + pass + try: + return await patch.get_old_file(path) + except (FileNotFoundError, httpx.HTTPStatusError): + pass + return await _retry(client.get_file)(path) + + @tool async def expand_context( file_path: str, @@ -75,24 +94,14 @@ async def expand_context( warning = None try: patch_stack = patch.patch_stack - except Exception as e: + except ValueError as e: warning = f"Could not retrieve the full patch stack ({e}). File content reflects only this patch; please flag this in your review." patch_stack = [patch.patch_set] revision = await patch.get_base_revision() client = _get_client() - async def fetch(path: str) -> str: - if revision: - try: - return await _retry(client.get_file_at_revision)(path, revision) - except Exception: - pass - try: - return await patch.get_old_file(path) - except Exception: - pass - return await _retry(client.get_file)(path) + fetch = partial(_fetch_file, revision=revision, client=client, patch=patch) try: file_content = await get_file_after_stack(patch_stack, file_path, fetch) From 1c115b782ec9fbeb2aa85b58dda6acf215de5e80 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Thu, 28 May 2026 14:36:16 +0000 Subject: [PATCH 5/5] Try phabricator before searchfox when fetching base file content --- bugbug/tools/code_review/agent.py | 10 +-- bugbug/tools/code_review/langchain_tools.py | 15 +++-- tests/test_code_review.py | 72 ++++++++++++++++++++- 3 files changed, 85 insertions(+), 12 deletions(-) diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py index f3d110b26c..794dc6194d 100644 --- a/bugbug/tools/code_review/agent.py +++ b/bugbug/tools/code_review/agent.py @@ -27,6 +27,11 @@ Skill, ) from bugbug.tools.code_review.database import ReviewCommentsDB +from bugbug.tools.code_review.langchain_tools import ( + SEARCHFOX_TOOLS, + CodeReviewContext, + create_load_skill_tool, +) from bugbug.tools.code_review.prompts import ( CODE_REVIEW_TODO_PROMPT, CODE_REVIEW_TODO_TOOL_DESCRIPTION, @@ -40,11 +45,6 @@ PatchSummarizer, SuggestionFilterer, ) -from bugbug.tools.code_review.langchain_tools import ( - SEARCHFOX_TOOLS, - CodeReviewContext, - create_load_skill_tool, -) from bugbug.tools.code_review.utils import ( convert_generated_comments_to_inline, format_patch_set, diff --git a/bugbug/tools/code_review/langchain_tools.py b/bugbug/tools/code_review/langchain_tools.py index b39cda925a..f7fc648c34 100644 --- a/bugbug/tools/code_review/langchain_tools.py +++ b/bugbug/tools/code_review/langchain_tools.py @@ -6,7 +6,7 @@ """LangGraph tools for code review agent.""" from dataclasses import dataclass -from functools import cache, partial +from functools import cache from logging import getLogger from typing import Literal, Optional @@ -56,15 +56,15 @@ async def _fetch_file( client: AsyncSearchfoxClient, patch: Patch, ) -> str: + try: + return await patch.get_old_file(path) + except (FileNotFoundError, httpx.HTTPStatusError): + pass if revision: try: return await _retry(client.get_file_at_revision)(path, revision) except Exception: # searchfox raises plain Exception pass - try: - return await patch.get_old_file(path) - except (FileNotFoundError, httpx.HTTPStatusError): - pass return await _retry(client.get_file)(path) @@ -101,7 +101,8 @@ async def expand_context( revision = await patch.get_base_revision() client = _get_client() - fetch = partial(_fetch_file, revision=revision, client=client, patch=patch) + async def fetch(path: str) -> str: + return await _fetch_file(path, revision, client, patch) try: file_content = await get_file_after_stack(patch_stack, file_path, fetch) @@ -155,6 +156,8 @@ async def load_skill(name: str) -> str: return f"Failed to load skill '{name}'. Please proceed without it." return load_skill + + @tool async def search_text( query: str, diff --git a/tests/test_code_review.py b/tests/test_code_review.py index 9d29ee6274..584824544d 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -10,7 +10,7 @@ from bugbug.tools.code_review import data_types, langchain_tools from bugbug.tools.code_review.data_types import Skill, _strip_frontmatter -from bugbug.tools.code_review.langchain_tools import create_load_skill_tool +from bugbug.tools.code_review.langchain_tools import _fetch_file, create_load_skill_tool from bugbug.tools.code_review.utils import find_comment_scope from bugbug.tools.core.platforms.patch_apply import ( apply_patched_file, @@ -410,3 +410,73 @@ def test_load_skill_tool_description_lists_skills(): for skill in skills: assert skill.name in tool.description assert skill.description in tool.description + + +# --------------------------------------------------------------------------- +# _fetch_file +# --------------------------------------------------------------------------- + + +def make_client(*, at_revision=None, latest=None): + client = MagicMock() + client.get_file_at_revision = AsyncMock( + return_value=at_revision, + side_effect=None if at_revision is not None else Exception("not found"), + ) + client.get_file = AsyncMock(return_value=latest) + return client + + +def make_patch_obj(*, old_file=None, old_file_exc=None): + patch = MagicMock() + if old_file_exc is not None: + patch.get_old_file = AsyncMock(side_effect=old_file_exc) + else: + patch.get_old_file = AsyncMock(return_value=old_file) + return patch + + +def test_fetch_file_uses_phabricator_first(): + patch = make_patch_obj(old_file="phab content") + client = make_client(at_revision="rev content", latest="latest content") + result = asyncio.run(_fetch_file("f.txt", "abc123", client, patch)) + assert result == "phab content" + client.get_file_at_revision.assert_not_called() + client.get_file.assert_not_called() + + +def test_fetch_file_falls_back_to_revision_on_file_not_found(): + patch = make_patch_obj(old_file_exc=FileNotFoundError("not in patch")) + client = make_client(at_revision="rev content", latest="latest content") + result = asyncio.run(_fetch_file("f.txt", "abc123", client, patch)) + assert result == "rev content" + client.get_file.assert_not_called() + + +def test_fetch_file_falls_back_to_revision_on_http_error(): + response = httpx.Response(500, request=httpx.Request("GET", "http://x")) + patch = make_patch_obj( + old_file_exc=httpx.HTTPStatusError( + "err", request=response.request, response=response + ) + ) + client = make_client(at_revision="rev content", latest="latest content") + result = asyncio.run(_fetch_file("f.txt", "abc123", client, patch)) + assert result == "rev content" + client.get_file.assert_not_called() + + +def test_fetch_file_falls_back_to_latest_when_revision_fails(): + patch = make_patch_obj(old_file_exc=FileNotFoundError()) + client = make_client(latest="latest content") + client.get_file_at_revision = AsyncMock(side_effect=Exception("searchfox error")) + result = asyncio.run(_fetch_file("f.txt", "abc123", client, patch)) + assert result == "latest content" + + +def test_fetch_file_skips_revision_when_none(): + patch = make_patch_obj(old_file_exc=FileNotFoundError()) + client = make_client(latest="latest content") + result = asyncio.run(_fetch_file("f.txt", None, client, patch)) + assert result == "latest content" + client.get_file_at_revision.assert_not_called()