keploy · nehagup · Apr 15, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/docusaurus.config.js b/docusaurus.config.js
@@ -449,20 +449,66 @@ module.exports = {
           changefreq: "weekly",
           priority: 0.5,
           filename: "sitemap.xml",
+          // Differentiate docs sitemap priorities by content type so
+          // search engines spend crawl budget proportional to how
+          // canonical each page is. Priority buckets:
+          //   1.0  → /docs/ root (highest — primary entry point)
+          //   0.9  → /docs/quickstart/* (highest-intent user flow)
+          //   0.8  → /docs/running-keploy/* (primary product docs)
+          //   0.7  → /docs/concepts/*, /docs/keploy-explained/*
+          //   0.6  → /docs/keploy-cloud/*, /docs/ci-cd/*
+          //   0.6  → /docs/keploy-explained/*-faq/ (3 FAQ pages) and
+          //          /docs/keploy-explained/common-errors/ (troubleshooting)
+          //          — reference-style, lower crawl priority than core docs
+          //   0.5  → /docs/concepts/reference/glossary/* (long-tail
+          //          glossary; noindexed legacy versions excluded via
+          //          netlify headers + robots.txt)
           createSitemapItems: async (params) => {
             const {defaultCreateSitemapItems, ...rest} = params;
             const items = await defaultCreateSitemapItems(rest);
             return items.map((item) => {
-              if (item.url.includes("/quickstart/")) {
+              const url = item.url;
+              // The /docs/ home page is the highest-priority entry point
+              // for the whole docs subtree.
+              if (url.endsWith("/docs/") || url.endsWith("/docs")) {
+                return {...item, priority: 1.0, changefreq: "weekly"};
+              }
+              if (url.includes("/quickstart/")) {
+                return {...item, priority: 0.9, changefreq: "weekly"};
+              }
+              if (url.includes("/running-keploy/")) {
                 return {...item, priority: 0.8, changefreq: "weekly"};
               }
+              if (url.includes("/concepts/reference/glossary/")) {
+                // Glossary entries are numerous, long-tail, and often
+                // off-topic for core product queries. Keep them in the
+                // sitemap but mark them low priority.
+                return {...item, priority: 0.5, changefreq: "monthly"};
+              }
+              // FAQ + troubleshooting match FIRST, because these pages live
+              // under /keploy-explained/ in the v4 docs (e.g.
+              // /docs/keploy-explained/integration-testing-faq/,
+              // /docs/keploy-explained/api-testing-faq/,
+              // /docs/keploy-explained/unit-testing-faq/,
+              // /docs/keploy-explained/common-errors/ — "common-errors" is
+              // the troubleshooting guide, labelled "Troubleshooting Guide"
+              // in the sidebar). Without matching first, they would be
+              // captured by the /keploy-explained/ rule below and get
+              // priority 0.7 instead of the intended 0.6.
+              if (
+                url.includes("-faq/") ||
+                url.includes("-faq") ||
+                url.includes("/common-errors")
+              ) {
+                return {...item, priority: 0.6, changefreq: "monthly"};
+              }
               if (
-                item.url.includes("/concepts/") ||
-                item.url.includes("/keploy-explained/")
+                url.includes("/concepts/") ||
+                url.includes("/keploy-explained/")
               ) {
                 return {...item, priority: 0.7, changefreq: "weekly"};
               }
-              if (item.url.includes("/keploy-cloud/")) {
+              if (url.includes("/keploy-cloud/") || url.includes("/ci-cd/")) {
                 return {...item, priority: 0.6, changefreq: "monthly"};
               }
               return item;

diff --git a/src/theme/DocBreadcrumbs/index.js b/src/theme/DocBreadcrumbs/index.js
@@ -51,9 +51,12 @@ export default function DocBreadcrumbs() {
   const {siteConfig} = useDocusaurusContext();
   const {pathname} = useLocation();
 
-  if (!breadcrumbs) {
-    return null;
-  }
+  // Previously this component early-returned when useSidebarBreadcrumbs()
+  // returned null/undefined, which caused glossary and reference pages
+  // not in the sidebar config to ship with zero BreadcrumbList schema.
+  // Treat null/undefined as "no sidebar trail, emit Home + Docs schema
+  // anyway" so AI crawlers always get a hierarchy signal.
+  const sidebarTrail = Array.isArray(breadcrumbs) ? breadcrumbs : [];
 
   const toAbsoluteUrl = (baseUrl, url) => {
     if (!url) {
@@ -89,9 +92,9 @@ export default function DocBreadcrumbs() {
     }
   }
 
-  if (breadcrumbs.length > 0) {
-    breadcrumbs.forEach((crumb, index) => {
-      const isLast = index === breadcrumbs.length - 1;
+  if (sidebarTrail.length > 0) {
+    sidebarTrail.forEach((crumb, index) => {
+      const isLast = index === sidebarTrail.length - 1;
       const href =
         crumb.type === "category" && crumb.linkUnlisted
           ? undefined
@@ -130,35 +133,37 @@ export default function DocBreadcrumbs() {
           </script>
         </Head>
       )}
-      <nav
-        className={clsx(
-          ThemeClassNames.docs.docBreadcrumbs,
-          styles.breadcrumbsContainer
-        )}
-        aria-label={translate({
-          id: "theme.docs.breadcrumbs.navAriaLabel",
-          message: "Breadcrumbs",
-          description: "The ARIA label for the breadcrumbs",
-        })}
-      >
-        <ul className="breadcrumbs">
-          {homePageRoute && <HomeBreadcrumbItem />}
-          {breadcrumbs.map((item, idx) => {
-            const isLast = idx === breadcrumbs.length - 1;
-            const href =
-              item.type === "category" && item.linkUnlisted
-                ? undefined
-                : item.href;
-            return (
-              <BreadcrumbsItem key={idx} active={isLast}>
-                <BreadcrumbsItemLink href={href} isLast={isLast}>
-                  {item.label}
-                </BreadcrumbsItemLink>
-              </BreadcrumbsItem>
-            );
+      {sidebarTrail.length > 0 && (
+        <nav
+          className={clsx(
+            ThemeClassNames.docs.docBreadcrumbs,
+            styles.breadcrumbsContainer
+          )}
+          aria-label={translate({
+            id: "theme.docs.breadcrumbs.navAriaLabel",
+            message: "Breadcrumbs",
+            description: "The ARIA label for the breadcrumbs",
           })}
-        </ul>
-      </nav>
+        >
+          <ul className="breadcrumbs">
+            {homePageRoute && <HomeBreadcrumbItem />}
+            {sidebarTrail.map((item, idx) => {
+              const isLast = idx === sidebarTrail.length - 1;
+              const href =
+                item.type === "category" && item.linkUnlisted
+                  ? undefined
+                  : item.href;
+              return (
+                <BreadcrumbsItem key={idx} active={isLast}>
+                  <BreadcrumbsItemLink href={href} isLast={isLast}>
+                    {item.label}
+                  </BreadcrumbsItemLink>
+                </BreadcrumbsItem>
+              );
+            })}
+          </ul>
+        </nav>
+      )}
     </>
   );
 }
diff --git a/src/theme/DocItem/index.js b/src/theme/DocItem/index.js
@@ -143,12 +143,36 @@ export default function DocItem(props) {
   const currentYear = new Date().getFullYear();
   const image = assets?.image ?? frontMatter?.image;
   const imageWithBaseUrl = useBaseUrl(image || "");
-  const socialImage = image ? toAbsoluteUrl(siteConfig?.url, imageWithBaseUrl) : null;
+  const socialImage = image
+    ? toAbsoluteUrl(siteConfig?.url, imageWithBaseUrl)
+    : null;
   const normalizedMetaKeywords = Array.isArray(metaKeywords)
     ? metaKeywords.join(", ")
     : metaKeywords;
+  // Suppress Article / BlogPosting / APIReference schema on the /docs/
+  // root, versioned docs roots like /docs/4.0.0/, and any category
+  // index pages. Article schema on a hub page is a type mismatch
+  // because a hub does not have a single author, a single publication
+  // date, or a single headline — it is an index of content. Hub pages
+  // emit only the normal DocBreadcrumbs JSON-LD.
+  const permalink = metadata?.permalink || "";
+  // Versioned root pattern: /docs/<version>/ or /docs/<version> where
+  // <version> starts with a digit. Covers current and archived
+  // versions listed in docusaurus.config.js onlyIncludeVersions.
+  const isVersionedDocsRoot =
+    /^\/docs\/\d[\w.-]*(?:\/index)?\/?$/.test(permalink);
+  const isDocsRoot =
+    permalink === "/docs/" ||
+    permalink === "/docs" ||
+    permalink.endsWith("/docs/index") ||
+    permalink.endsWith("/docs/") ||
+    isVersionedDocsRoot;
+  const isCategoryIndex =
+    frontMatter?.slug === "index" || /\/category\/|\/index\/?$/.test(permalink);
+  const suppressArticleSchema = isDocsRoot || isCategoryIndex;
+
   const articleSchema =
-    pageUrl && title
+    pageUrl && title && !suppressArticleSchema
       ? {
           "@context": "https://schema.org",
           "@type": schemaType,
@@ -187,6 +211,19 @@ export default function DocItem(props) {
         {normalizedMetaKeywords && (
           <meta name="keywords" content={normalizedMetaKeywords} />
         )}
+        {/* Per-page og:title and og:description override the
+            docusaurus.config.js site-level defaults, which would
+            otherwise emit the same og:title on every docs page
+            regardless of content. Social card previews now reflect
+            the actual page title. */}
+        <meta property="og:title" content={title} />
+        {description && (
+          <meta property="og:description" content={description} />
+        )}
+        <meta name="twitter:title" content={title} />
+        {description && (
+          <meta name="twitter:description" content={description} />
+        )}
         {socialImage && <meta property="og:image" content={socialImage} />}
         {socialImage && <meta name="twitter:image" content={socialImage} />}
         {socialImage && (
@@ -288,7 +325,10 @@ export default function DocItem(props) {
                   href="https://join.slack.com/t/keploy/shared_invite/zt-357qqm9b5-PbZRVu3Yt2rJIa6ofrwWNg"
                   aria-label="Slack"
                 >
-                  <span className="docs-inline-footer__slack" aria-hidden="true" />
+                  <span
+                    className="docs-inline-footer__slack"
+                    aria-hidden="true"
+                  />
                 </a>
               </div>
               <div className="docs-inline-footer__usecase">

diff --git a/static/robots.txt b/static/robots.txt
@@ -1,12 +1,98 @@
-# Block specific bot
+# Keploy docs robots.txt
+# Policy: allow AI search/answer engines, block training-only crawlers,
+# block Bytespider. Search bots drive visibility in ChatGPT, Claude,
+# Perplexity, Copilot, Gemini answers. Training bots feed future model
+# weights and provide nothing back.
+# Reference: Speedscale / Katalon / Testsigma split policy (2026 competitor audit)
+
+# =============================================================================
+# ALLOW — AI search / answer engines
+# Legacy-version disallows are repeated inside this group because a bot that
+# matches a named User-agent group only reads rules from THAT group; it does
+# not fall through to `User-agent: *`. Without these lines, Perplexity/
+# Applebot/OAI-SearchBot/etc. would still crawl /docs/{1,2,3}.0.0/ despite
+# the global block further below.
+# =============================================================================
+
+User-agent: OAI-SearchBot
+User-agent: ChatGPT-User
+User-agent: Claude-SearchBot
+User-agent: Claude-User
+User-agent: PerplexityBot
+User-agent: Perplexity-User
+User-agent: Gemini-Deep-Research
+User-agent: GoogleOther
+User-agent: Applebot
+User-agent: DuckAssistBot
+User-agent: Amazonbot
+Allow: /
-Allow: /
+Allow: /
+Crawl-delay: 5
+Disallow: /cgi-bin/
-Allow: /
+Allow: /
+Crawl-delay: 5
+Disallow: /cgi-bin/
+Crawl-delay: 5
+Disallow: /cgi-bin/
+Disallow: /docs/1.0.0/
+Disallow: /docs/2.0.0/
+Disallow: /docs/3.0.0/
+
+# =============================================================================
+# DISALLOW — Training-only crawlers
+# =============================================================================
+
+User-agent: GPTBot
+Disallow: /
+
+User-agent: ClaudeBot
+Disallow: /
+
+User-agent: anthropic-ai
+Disallow: /
+
+User-agent: CCBot
+Disallow: /
+
+User-agent: Google-Extended
+Disallow: /
+
+User-agent: Applebot-Extended
+Disallow: /
+
+User-agent: Meta-ExternalAgent
+Disallow: /
+
+User-agent: FacebookBot
+Disallow: /
+
+User-agent: cohere-ai
+Disallow: /
+
+User-agent: Diffbot
+Disallow: /
+
+User-agent: Omgilibot
+Disallow: /
+
+User-agent: ImagesiftBot
+Disallow: /
+
+# Always-block scraper
 User-agent: Bytespider
 Disallow: /
 
-# Default rules — apply to all crawlers including AI bots
+# =============================================================================
+# DEFAULT — Googlebot, Bingbot, and all other crawlers
+# =============================================================================
+
 User-agent: *
 Allow: /
 Crawl-delay: 5
 Disallow: /cgi-bin/
 
+# Block unmaintained legacy doc versions (already set via noindex + canonical,
+# belt-and-braces for crawlers that ignore those signals).
+Disallow: /docs/1.0.0/
+Disallow: /docs/2.0.0/
+Disallow: /docs/3.0.0/
+
+# =============================================================================
 # Sitemap
+# =============================================================================
+
 Sitemap: https://keploy.io/docs/sitemap.xml