From b4c3bcbc0417d3df5b22212969fcc7e318211823 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Mar 2026 07:13:47 +0000
Subject: [PATCH 1/2] Initial plan


From 5fd3b0e0cbac6f04f0ff0415a78580559e199316 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Mar 2026 07:22:16 +0000
Subject: [PATCH 2/2] feat: support multiple compliance documents (PDFs, URLs,
 text files) for PR review

Co-authored-by: JayPat2003 <87596889+JayPat2003@users.noreply.github.com>
Agent-Logs-Url: https://github.com/JayPat2003/github-architect-code-reviewer/sessions/5b791906-a427-47fb-81ce-7446faa63057
---
 README.md         | 50 +++++++++++++++++++++++++----------
 src/cli.py        | 50 +++++++++++++++++++++++------------
 src/doc_loader.py | 41 ++++++++++++++++++++++++-----
 src/reporter.py   |  6 ++++-
 src/reviewer.py   | 66 ++++++++++++++++++++++++++++++-----------------
 src/types.py      | 10 ++++---
 6 files changed, 156 insertions(+), 67 deletions(-)

diff --git a/README.md b/README.md
index fea37f3..02af405 100644
--- a/README.md
+++ b/README.md
@@ -43,8 +43,8 @@ This tool automates the architecture review step using **GitHub Copilot AI**.
 When a developer opens a Pull Request, this tool:
 
 1. **Reads** the code changes from GitHub automatically.
-2. **Reads** your organisation's architecture document (PDF, web page, or text file).
-3. **Sends both** to GitHub Copilot AI and asks it to check for violations.
+2. **Reads** one or more of your organisation's compliance documents (PDFs, web pages, or text/Markdown files).
+3. **Sends everything** to GitHub Copilot AI and asks it to check for violations across all documents.
 4. **Produces** a structured report listing every issue found, with severity levels and suggested fixes.
 
 The result is an instant, consistent, and repeatable architecture review — every single time a Pull Request is raised.
@@ -312,7 +312,7 @@ To generate a token:
 
 ## Running the Tool
 
-### Basic command
+### Basic command (single compliance document)
 
 ```bash
 python -m src.cli review \
@@ -322,7 +322,23 @@ python -m src.cli review \
   --doc    path/to/architecture.pdf
 ```
 
-### With a URL as the architecture document
+### Multiple compliance documents
+
+Repeat `--doc` for each additional document — any combination of PDFs, URLs, and text/Markdown files is supported:
+
+```bash
+python -m src.cli review \
+  --owner  your-org \
+  --repo   your-repo \
+  --pr     42 \
+  --doc    docs/architecture-standards.pdf \
+  --doc    https://your-intranet.com/security-policy \
+  --doc    docs/company-guidelines.md
+```
+
+The AI will check the PR diff against **all** supplied documents and report any violation found in any of them.
+
+### With a URL as the compliance document
 
 ```bash
 python -m src.cli review \
@@ -355,13 +371,13 @@ python -m src.cli review --help
 
 The generated JSON report contains the following sections:
 
-| Field            | Description                                              |
-|------------------|----------------------------------------------------------|
-| `meta`           | PR details: owner, repo, number, title, and review date |
-| `passed`         | `true` if no errors were found, `false` otherwise        |
-| `summary`        | A paragraph written by the AI summarising the review     |
-| `comments`       | A list of specific issues found (see below)              |
-| `files_reviewed` | Every file that was part of the Pull Request             |
+| Field              | Description                                                         |
+|--------------------|---------------------------------------------------------------------|
+| `meta`             | PR details: owner, repo, number, title, review date, and the list of compliance documents used |
+| `passed`           | `true` if no errors were found, `false` otherwise                   |
+| `summary`          | A paragraph written by the AI summarising the review                |
+| `comments`         | A list of specific issues found (see below)                         |
+| `files_reviewed`   | Every file that was part of the Pull Request                        |
 
 ### Comment severity levels
 
@@ -380,7 +396,11 @@ The generated JSON report contains the following sections:
     "repo": "payment-service",
     "pr_number": 42,
     "pr_title": "Add Stripe payment integration",
-    "reviewed_at": "20240615_143022"
+    "reviewed_at": "20240615_143022",
+    "compliance_docs": [
+      { "source": "docs/architecture-standards.pdf", "doc_type": "pdf" },
+      { "source": "https://your-intranet.com/security-policy", "doc_type": "url" }
+    ]
   },
   "passed": false,
   "summary": "The PR introduces a payment module but contains a hardcoded API key in config.py, which directly violates the organisation's secrets management policy.",
@@ -439,9 +459,11 @@ jobs:
             --owner  ${{ github.repository_owner }} \
             --repo   ${{ github.event.repository.name }} \
             --pr     ${{ github.event.pull_request.number }} \
-            --doc    docs/architecture-standards.pdf
+            --doc    docs/architecture-standards.pdf \
+            --doc    docs/security-policy.md \
+            --doc    https://your-intranet.com/company-guidelines
 ```
 
-When this workflow is added to a repository, **every Pull Request will be automatically reviewed** against the architecture document. If violations are found, the check will fail and the merge will be blocked until the issues are resolved.
+When this workflow is added to a repository, **every Pull Request will be automatically reviewed** against all configured compliance documents. If violations are found, the check will fail and the merge will be blocked until the issues are resolved.
 
 ---
\ No newline at end of file
diff --git a/src/cli.py b/src/cli.py
index c714e1f..ac372ff 100644
--- a/src/cli.py
+++ b/src/cli.py
@@ -14,22 +14,31 @@
     # 1. Create a .env file with your token:
     #       GITHUB_TOKEN=ghp_...
     #
-    # 2. Basic usage:
+    # 2. Single compliance document:
     #       python -m src.cli review \
     #           --owner  <github-owner> \
     #           --repo   <repo-name>   \
     #           --pr     <pr-number>   \
     #           --doc    path/to/architecture.pdf
     #
-    # 3. Override the output directory:
+    # 3. Multiple compliance documents (PDFs, URLs, text files):
+    #       python -m src.cli review \
+    #           --owner  <github-owner> \
+    #           --repo   <repo-name>   \
+    #           --pr     <pr-number>   \
+    #           --doc    path/to/architecture.pdf \
+    #           --doc    https://your-intranet.com/security-policy \
+    #           --doc    docs/company-guidelines.md
+    #
+    # 4. Override the output directory:
     #       python -m src.cli review ... --output ./my-reports
     #
-    # 4. Show help:
+    # 5. Show help:
     #       python -m src.cli --help
     #       python -m src.cli review --help
 
 Pipeline triggered by this file:
-    fetch_pull_request()  →  load_document()  →  run_review()  →  save_report()
+    fetch_pull_request()  →  load_documents()  →  run_review()  →  save_report()
 """
 
 import os
@@ -46,7 +55,7 @@
 
 
 @click.group()
-@click.version_option("1.0.0")
+@click.version_option("1.1.0")
 def main() -> None:
     """AI-powered code review against architecture principles using GitHub Copilot."""
 
@@ -55,19 +64,24 @@ def main() -> None:
 @click.option("--owner",     required=True,                                      help="GitHub repository owner")
 @click.option("--repo",      required=True,                                      help="GitHub repository name")
 @click.option("--pr",        "pr_number", required=True, type=int,               help="Pull request number")
-@click.option("--doc",       "doc_path",  required=True,                         help="Path or URL to the architecture document (PDF, URL, or text file)")
+@click.option("--doc",       "doc_paths", required=True, multiple=True,
+              help=(
+                  "Path or URL to a compliance / architecture document "
+                  "(PDF, URL, or text file). "
+                  "Repeat this option to supply multiple documents."
+              ))
 @click.option("--output",    "output_dir",
               default=os.getenv("REPORT_OUTPUT_DIR", "./reports"),
               show_default=True,                                                  help="Directory for the generated report")
-def review(owner: str, repo: str, pr_number: int, doc_path: str, output_dir: str) -> None:
+def review(owner: str, repo: str, pr_number: int, doc_paths: tuple, output_dir: str) -> None:
     """
-    Review a pull request against an architecture document.
+    Review a pull request against one or more compliance documents.
 
     Steps performed:
         1. Validate that GITHUB_TOKEN is present in the environment.
         2. Fetch the PR diff from GitHub via github_client.py.
-        3. Load and parse the architecture document via doc_loader.py.
-        4. Send the diff + document to the Copilot API via reviewer.py.
+        3. Load and parse every compliance document via doc_loader.py.
+        4. Send the diff + all documents to the Copilot API via reviewer.py.
         5. Persist the structured report to disk via reporter.py.
         6. Exit 0 if no errors were found, exit 1 otherwise (CI-friendly).
 
@@ -75,7 +89,8 @@ def review(owner: str, repo: str, pr_number: int, doc_path: str, output_dir: str
         owner      : GitHub org or user who owns the repository.
         repo       : Repository name.
         pr_number  : PR number to review (--pr flag).
-        doc_path   : Local file path or HTTPS URL of the architecture doc.
+        doc_paths  : One or more local file paths or HTTPS URLs of compliance
+                     documents. Supply --doc multiple times for multiple sources.
         output_dir : Directory where the report file will be written.
     """
     # ── 1. Guard: token must exist before any network call ────────────────────
@@ -85,11 +100,12 @@ def review(owner: str, repo: str, pr_number: int, doc_path: str, output_dir: str
         sys.exit(1)
 
     console.print(f"[bold cyan]Reviewing PR #{pr_number}[/] in [green]{owner}/{repo}[/]")
-    console.print(f"Architecture doc: [yellow]{doc_path}[/]")
+    for i, doc in enumerate(doc_paths, start=1):
+        console.print(f"Compliance doc {i}: [yellow]{doc}[/]")
 
     # Lazy imports keep CLI startup fast (heavy deps load only when 'review' runs)
     from src.github_client import fetch_pull_request
-    from src.doc_loader import load_document
+    from src.doc_loader import load_documents
     from src.reviewer import run_review
     from src.reporter import save_report
 
@@ -97,13 +113,13 @@ def review(owner: str, repo: str, pr_number: int, doc_path: str, output_dir: str
     with console.status("Fetching pull request…"):
         pr = fetch_pull_request(owner, repo, pr_number, github_token)
 
-    # ── 3. Load architecture document ─────────────────────────────────────────
-    with console.status("Loading architecture document…"):
-        arch_doc = load_document(doc_path)
+    # ── 3. Load compliance documents ──────────────────────────────────────────
+    with console.status(f"Loading {len(doc_paths)} compliance document(s)…"):
+        docs = load_documents(list(doc_paths))
 
     # ── 4. Run Copilot review ─────────────────────────────────────────────────
     with console.status("Running Copilot review…"):
-        result = run_review(pr, arch_doc)
+        result = run_review(pr, docs)
 
     # ── 5. Save report ────────────────────────────────────────────────────────
     Path(output_dir).mkdir(parents=True, exist_ok=True)
diff --git a/src/doc_loader.py b/src/doc_loader.py
index b600d5f..b71b384 100644
--- a/src/doc_loader.py
+++ b/src/doc_loader.py
@@ -2,20 +2,22 @@
 doc_loader.py — Architecture document loader.
 
 Purpose:
-    Accepts a file path or URL pointing to an architecture document,
-    extracts its plain-text content, and returns an ArchitectureDoc object.
-    Supports three input types:
+    Accepts one or more file paths or URLs pointing to architecture /
+    compliance documents, extracts their plain-text content, and returns
+    the results as ArchitectureDoc objects.
+    Supports three input types per source:
         - 'pdf'  : Local PDF file  (parsed via pdfminer.six)
         - 'url'  : HTTPS web page  (scraped via requests + BeautifulSoup)
-        - 'text' : Plain .txt file (read directly)
+        - 'text' : Plain .txt / .md file (read directly)
 
 How it fits in the pipeline:
-    cli.py  ──calls──>  load_document()  ──returns──>  ArchitectureDoc
+    cli.py  ──calls──>  load_documents()  ──returns──>  List[ArchitectureDoc]
 """
 
 import re
 from io import StringIO
 from pathlib import Path
+from typing import List
 
 import requests
 from bs4 import BeautifulSoup
@@ -46,7 +48,7 @@ def _load_url(url: str) -> str:
 
 
 def _load_text(path: str) -> str:
-    """Read a plain .txt file directly."""
+    """Read a plain text or markdown file directly."""
     return Path(path).read_text(encoding="utf-8").strip()
 
 
@@ -60,7 +62,7 @@ def load_document(source: str) -> ArchitectureDoc:
         3. Anything else                        → plain text loader.
 
     Args:
-        source: File path (PDF or .txt) or HTTPS URL.
+        source: File path (PDF, .txt, .md) or HTTPS URL.
 
     Returns:
         ArchitectureDoc with source, extracted content, and doc_type set.
@@ -84,3 +86,28 @@ def load_document(source: str) -> ArchitectureDoc:
         doc_type = "text"
 
     return ArchitectureDoc(source=source, content=content, doc_type=doc_type)
+
+
+def load_documents(sources: List[str]) -> List[ArchitectureDoc]:
+    """
+    Load one or more compliance / architecture documents.
+
+    Each source is processed independently via :func:`load_document`, so
+    any mix of PDFs, URLs, and plain-text files is valid.
+
+    Args:
+        sources: Non-empty list of file paths or URLs.
+
+    Returns:
+        List of ArchitectureDoc objects in the same order as *sources*.
+
+    Raises:
+        ValueError: If *sources* is empty.
+        Any exception raised by individual document loaders is propagated
+        to the caller so that a single bad source does not silently produce
+        an empty document set.
+    """
+    if not sources:
+        raise ValueError("At least one compliance document source must be provided.")
+    return [load_document(s) for s in sources]
+
diff --git a/src/reporter.py b/src/reporter.py
index e9078f5..3cbdb49 100644
--- a/src/reporter.py
+++ b/src/reporter.py
@@ -58,6 +58,10 @@ def save_report(result: ReviewResult, output_dir: str) -> str:
             "pr_number": pr.number,
             "pr_title":  pr.title,
             "reviewed_at": timestamp,
+            "compliance_docs": [
+                {"source": doc.source, "doc_type": doc.doc_type}
+                for doc in result.compliance_docs
+            ],
         },
         "passed":   result.passed,
         "summary":  result.summary,
@@ -86,4 +90,4 @@ def save_report(result: ReviewResult, output_dir: str) -> str:
     report_path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")
 
     # 4. Return path as string for cli.py to display
-    return str(report_path)
\ No newline at end of file
+    return str(report_path)
diff --git a/src/reviewer.py b/src/reviewer.py
index 4ff7cd4..031022f 100644
--- a/src/reviewer.py
+++ b/src/reviewer.py
@@ -2,15 +2,15 @@
 reviewer.py — GitHub Copilot API integration for code review.
 
 Purpose:
-    Builds a structured prompt from the PR diff and architecture document,
-    sends it to the GitHub Copilot chat completions API via the openai SDK,
-    parses the JSON response into ReviewComment objects, and returns a
-    ReviewResult.
+    Builds a structured prompt from the PR diff and one or more compliance /
+    architecture documents, sends it to the GitHub Copilot chat completions
+    API via the openai SDK, parses the JSON response into ReviewComment
+    objects, and returns a ReviewResult.
 
 How it fits in the pipeline:
-    cli.py  ──calls──>  run_review(pr, arch_doc)  ──returns──>  ReviewResult
-                                                                      │
-                                                               reporter.py uses it
+    cli.py  ──calls──>  run_review(pr, docs)  ──returns──>  ReviewResult
+                                                                  │
+                                                           reporter.py uses it
 
 Environment variables required:
     GITHUB_TOKEN : Used as the Bearer token for Copilot API auth.
@@ -27,18 +27,18 @@
 
 # ── Prompt builder ────────────────────────────────────────────────────────────
 
-def _build_prompt(pr: PullRequest, arch_doc: ArchitectureDoc) -> str:
+def _build_prompt(pr: PullRequest, docs: List[ArchitectureDoc]) -> str:
     """
     Construct the user message sent to the Copilot model.
 
     Includes:
         - PR title and description
-        - Architecture document content
+        - All compliance / architecture document contents (clearly labelled)
         - Unified diffs of every changed file
 
     Args:
-        pr      : Fetched PullRequest object.
-        arch_doc: Loaded ArchitectureDoc object.
+        pr  : Fetched PullRequest object.
+        docs: One or more loaded ArchitectureDoc objects.
 
     Returns:
         A single formatted string ready to send as the user message.
@@ -53,10 +53,19 @@ def _build_prompt(pr: PullRequest, arch_doc: ArchitectureDoc) -> str:
 
     diffs = "\n\n".join(diff_sections)
 
-    return f"""You are a senior software architect reviewing a Pull Request for compliance with the project's architecture principles.
+    # Build a clearly labelled section for every compliance document
+    doc_sections = []
+    for i, doc in enumerate(docs, start=1):
+        doc_sections.append(
+            f"### Compliance Document {i}: {doc.source} [{doc.doc_type}]\n"
+            f"{doc.content}"
+        )
+    docs_block = "\n\n".join(doc_sections)
+
+    return f"""You are a senior software architect reviewing a Pull Request for compliance with the project's architecture principles and company policies.
 
-## Architecture Document
-{arch_doc.content}
+## Compliance & Architecture Documents
+{docs_block}
 
 ## Pull Request
 **Title:** {pr.title}
@@ -66,7 +75,7 @@ def _build_prompt(pr: PullRequest, arch_doc: ArchitectureDoc) -> str:
 {diffs}
 
 ## Your Task
-Review every changed file against the architecture document.
+Review every changed file against ALL of the compliance and architecture documents listed above.
 Return a JSON object with this exact shape:
 {{
   "summary": "<one paragraph overall assessment>",
@@ -87,7 +96,7 @@ def _build_prompt(pr: PullRequest, arch_doc: ArchitectureDoc) -> str:
 
 # ── Response parser ───────────────────────────────────────────────────────────
 
-def _parse_response(raw: str, pr: PullRequest) -> ReviewResult:
+def _parse_response(raw: str, pr: PullRequest, docs: List[ArchitectureDoc]) -> ReviewResult:
     """
     Parse the raw JSON string returned by the Copilot model.
 
@@ -95,8 +104,9 @@ def _parse_response(raw: str, pr: PullRequest) -> ReviewResult:
     so the pipeline never crashes due to a malformed LLM response.
 
     Args:
-        raw: Raw string content from the model's message.
-        pr : The original PullRequest (attached to the result).
+        raw : Raw string content from the model's message.
+        pr  : The original PullRequest (attached to the result).
+        docs: The compliance documents used in this review.
 
     Returns:
         A fully-populated ReviewResult.
@@ -118,6 +128,7 @@ def _parse_response(raw: str, pr: PullRequest) -> ReviewResult:
             comments=comments,
             summary=data.get("summary", ""),
             passed=data.get("passed", True),
+            compliance_docs=docs,
         )
     except (json.JSONDecodeError, KeyError) as exc:
         # Graceful fallback — surface the raw response as an error comment
@@ -134,23 +145,29 @@ def _parse_response(raw: str, pr: PullRequest) -> ReviewResult:
             ],
             summary="Review could not be parsed.",
             passed=False,
+            compliance_docs=docs,
         )
 
 
 # ── Public entry point ────────────────────────────────────────────────────────
 
-def run_review(pr: PullRequest, arch_doc: ArchitectureDoc) -> ReviewResult:
+def run_review(pr: PullRequest, docs: List[ArchitectureDoc]) -> ReviewResult:
     """
-    Orchestrate the full Copilot review for one PR.
+    Orchestrate the full Copilot review for one PR against one or more
+    compliance / architecture documents.
 
     Uses GitHub Models API (models.inference.ai.azure.com) which accepts
     a GitHub PAT directly — no token exchange needed.
 
     Steps:
         1. Initialise the OpenAI client pointed at GitHub Models API.
-        2. Build the prompt from the PR diff and architecture document.
+        2. Build the prompt from the PR diff and all compliance documents.
         3. Send a chat completion request to gpt-4o.
         4. Parse and return the structured ReviewResult.
+
+    Args:
+        pr  : The PullRequest to review.
+        docs: One or more ArchitectureDoc objects to check compliance against.
     """
     # Explicitly remove any OPENAI_BASE_URL or OPENAI_API_KEY that could
     # redirect the client to the internal Copilot endpoint
@@ -164,7 +181,7 @@ def run_review(pr: PullRequest, arch_doc: ArchitectureDoc) -> ReviewResult:
     )
 
     # 2. Build prompt
-    prompt = _build_prompt(pr, arch_doc)
+    prompt = _build_prompt(pr, docs)
 
     # 3. Call the API
     response = client.chat.completions.create(
@@ -174,7 +191,8 @@ def run_review(pr: PullRequest, arch_doc: ArchitectureDoc) -> ReviewResult:
                 "role": "system",
                 "content": (
                     "You are an expert software architect. "
-                    "You review code changes for compliance with architecture principles. "
+                    "You review code changes for compliance with architecture principles "
+                    "and company policies. "
                     "Always respond with valid JSON only."
                 ),
             },
@@ -187,4 +205,4 @@ def run_review(pr: PullRequest, arch_doc: ArchitectureDoc) -> ReviewResult:
     raw = response.choices[0].message.content or ""
 
     # 4. Parse and return
-    return _parse_response(raw, pr)
+    return _parse_response(raw, pr, docs)
diff --git a/src/types.py b/src/types.py
index e4e531e..e0324ff 100644
--- a/src/types.py
+++ b/src/types.py
@@ -115,12 +115,14 @@ class ReviewResult:
     """
     Complete output of one review run.
     Fields:
-        pr      : The PullRequest reviewed.
-        comments: All ReviewComments raised.
-        summary : High-level summary paragraph.
-        passed  : False if any 'error' severity comment exists.
+        pr           : The PullRequest reviewed.
+        comments     : All ReviewComments raised.
+        summary      : High-level summary paragraph.
+        passed       : False if any 'error' severity comment exists.
+        compliance_docs: Compliance documents used in this review.
     """
     pr: PullRequest
     comments: List[ReviewComment] = field(default_factory=list)
     summary: str = ""
     passed: bool = True
+    compliance_docs: List[ArchitectureDoc] = field(default_factory=list)