diff --git a/src/libraries/libraries.ts b/src/libraries/libraries.ts index 5de8e78b..abf1e863 100644 --- a/src/libraries/libraries.ts +++ b/src/libraries/libraries.ts @@ -30,6 +30,10 @@ export const query: LibrarySlim = { scarfId: '53afb586-3934-4624-a37a-e680c1528e17', ogImage: 'https://github.com/tanstack/query/raw/main/media/repo-header.png', defaultDocs: 'framework/react/overview', + sitemap: { + includeLandingPage: true, + includeTopLevelDocsPages: true, + }, installPath: 'framework/$framework/installation', legacyPackages: ['react-query'], handleRedirects: (href) => { @@ -217,6 +221,10 @@ export const router: LibrarySlim = { scarfId: '3d14fff2-f326-4929-b5e1-6ecf953d24f4', ogImage: 'https://github.com/tanstack/router/raw/main/media/header.png', docsRoot: 'docs/router', + sitemap: { + includeLandingPage: true, + includeTopLevelDocsPages: true, + }, legacyPackages: ['react-location'], hideCodesandboxUrl: true, handleRedirects: (href) => { @@ -282,6 +290,10 @@ export const start: LibrarySlim = { scarfId: 'b6e2134f-e805-401d-95c3-2a7765d49a3d', docsRoot: 'docs/start', defaultDocs: 'framework/react/overview', + sitemap: { + includeLandingPage: true, + includeTopLevelDocsPages: true, + }, installPath: 'framework/$framework/build-from-scratch', embedEditor: 'codesandbox', showNetlifyUrl: true, @@ -323,6 +335,10 @@ export const table: LibrarySlim = { scarfId: 'dc8b39e1-3fe9-4f3a-8e56-d4e2cf420a9e', ogImage: 'https://github.com/tanstack/table/raw/main/media/repo-header.png', defaultDocs: 'introduction', + sitemap: { + includeLandingPage: true, + includeTopLevelDocsPages: true, + }, corePackageName: '@tanstack/table-core', legacyPackages: ['react-table'], handleRedirects: (href) => { @@ -392,6 +408,10 @@ export const form: LibrarySlim = { availableVersions: ['v1'], scarfId: '72ec4452-5d77-427c-b44a-57515d2d83aa', ogImage: 'https://github.com/tanstack/form/raw/main/media/repo-header.png', + sitemap: { + includeLandingPage: true, + includeTopLevelDocsPages: true, + }, } export const virtual: LibrarySlim = { @@ -556,6 +576,9 @@ export const db: LibrarySlim = { scarfId: '302d0fef-cb3f-43c6-b45c-f055b9745edb', ogImage: 'https://github.com/tanstack/db/raw/main/media/repo-header.png', defaultDocs: 'overview', + sitemap: { + includeLandingPage: true, + }, } export const ai: LibrarySlim = { diff --git a/src/libraries/types.ts b/src/libraries/types.ts index b478bf66..b6debb7b 100644 --- a/src/libraries/types.ts +++ b/src/libraries/types.ts @@ -79,6 +79,10 @@ export type LibrarySlim = { * Defaults to true. */ visible?: boolean + sitemap?: { + includeLandingPage?: boolean + includeTopLevelDocsPages?: boolean + } } // Extended library type - adds React node content for landing pages diff --git a/src/routeTree.gen.ts b/src/routeTree.gen.ts index df2a9641..1e4d66cb 100644 --- a/src/routeTree.gen.ts +++ b/src/routeTree.gen.ts @@ -14,7 +14,9 @@ import { Route as TermsRouteImport } from './routes/terms' import { Route as TenetsRouteImport } from './routes/tenets' import { Route as SupportRouteImport } from './routes/support' import { Route as SponsorsEmbedRouteImport } from './routes/sponsors-embed' +import { Route as SitemapDotxmlRouteImport } from './routes/sitemap[.]xml' import { Route as RssDotxmlRouteImport } from './routes/rss[.]xml' +import { Route as RobotsDottxtRouteImport } from './routes/robots[.]txt' import { Route as PrivacyRouteImport } from './routes/privacy' import { Route as PartnersEmbedRouteImport } from './routes/partners-embed' import { Route as PartnersRouteImport } from './routes/partners' @@ -144,11 +146,21 @@ const SponsorsEmbedRoute = SponsorsEmbedRouteImport.update({ path: '/sponsors-embed', getParentRoute: () => rootRouteImport, } as any) +const SitemapDotxmlRoute = SitemapDotxmlRouteImport.update({ + id: '/sitemap.xml', + path: '/sitemap.xml', + getParentRoute: () => rootRouteImport, +} as any) const RssDotxmlRoute = RssDotxmlRouteImport.update({ id: '/rss.xml', path: '/rss.xml', getParentRoute: () => rootRouteImport, } as any) +const RobotsDottxtRoute = RobotsDottxtRouteImport.update({ + id: '/robots.txt', + path: '/robots.txt', + getParentRoute: () => rootRouteImport, +} as any) const PrivacyRoute = PrivacyRouteImport.update({ id: '/privacy', path: '/privacy', @@ -710,7 +722,9 @@ export interface FileRoutesByFullPath { '/partners': typeof PartnersRoute '/partners-embed': typeof PartnersEmbedRoute '/privacy': typeof PrivacyRoute + '/robots.txt': typeof RobotsDottxtRoute '/rss.xml': typeof RssDotxmlRoute + '/sitemap.xml': typeof SitemapDotxmlRoute '/sponsors-embed': typeof SponsorsEmbedRoute '/support': typeof SupportRoute '/tenets': typeof TenetsRoute @@ -816,7 +830,9 @@ export interface FileRoutesByTo { '/partners': typeof PartnersRoute '/partners-embed': typeof PartnersEmbedRoute '/privacy': typeof PrivacyRoute + '/robots.txt': typeof RobotsDottxtRoute '/rss.xml': typeof RssDotxmlRoute + '/sitemap.xml': typeof SitemapDotxmlRoute '/sponsors-embed': typeof SponsorsEmbedRoute '/support': typeof SupportRoute '/tenets': typeof TenetsRoute @@ -925,7 +941,9 @@ export interface FileRoutesById { '/partners': typeof PartnersRoute '/partners-embed': typeof PartnersEmbedRoute '/privacy': typeof PrivacyRoute + '/robots.txt': typeof RobotsDottxtRoute '/rss.xml': typeof RssDotxmlRoute + '/sitemap.xml': typeof SitemapDotxmlRoute '/sponsors-embed': typeof SponsorsEmbedRoute '/support': typeof SupportRoute '/tenets': typeof TenetsRoute @@ -1038,7 +1056,9 @@ export interface FileRouteTypes { | '/partners' | '/partners-embed' | '/privacy' + | '/robots.txt' | '/rss.xml' + | '/sitemap.xml' | '/sponsors-embed' | '/support' | '/tenets' @@ -1144,7 +1164,9 @@ export interface FileRouteTypes { | '/partners' | '/partners-embed' | '/privacy' + | '/robots.txt' | '/rss.xml' + | '/sitemap.xml' | '/sponsors-embed' | '/support' | '/tenets' @@ -1252,7 +1274,9 @@ export interface FileRouteTypes { | '/partners' | '/partners-embed' | '/privacy' + | '/robots.txt' | '/rss.xml' + | '/sitemap.xml' | '/sponsors-embed' | '/support' | '/tenets' @@ -1364,7 +1388,9 @@ export interface RootRouteChildren { PartnersRoute: typeof PartnersRoute PartnersEmbedRoute: typeof PartnersEmbedRoute PrivacyRoute: typeof PrivacyRoute + RobotsDottxtRoute: typeof RobotsDottxtRoute RssDotxmlRoute: typeof RssDotxmlRoute + SitemapDotxmlRoute: typeof SitemapDotxmlRoute SponsorsEmbedRoute: typeof SponsorsEmbedRoute SupportRoute: typeof SupportRoute TenetsRoute: typeof TenetsRoute @@ -1448,6 +1474,13 @@ declare module '@tanstack/react-router' { preLoaderRoute: typeof SponsorsEmbedRouteImport parentRoute: typeof rootRouteImport } + '/sitemap.xml': { + id: '/sitemap.xml' + path: '/sitemap.xml' + fullPath: '/sitemap.xml' + preLoaderRoute: typeof SitemapDotxmlRouteImport + parentRoute: typeof rootRouteImport + } '/rss.xml': { id: '/rss.xml' path: '/rss.xml' @@ -1455,6 +1488,13 @@ declare module '@tanstack/react-router' { preLoaderRoute: typeof RssDotxmlRouteImport parentRoute: typeof rootRouteImport } + '/robots.txt': { + id: '/robots.txt' + path: '/robots.txt' + fullPath: '/robots.txt' + preLoaderRoute: typeof RobotsDottxtRouteImport + parentRoute: typeof rootRouteImport + } '/privacy': { id: '/privacy' path: '/privacy' @@ -2379,7 +2419,9 @@ const rootRouteChildren: RootRouteChildren = { PartnersRoute: PartnersRoute, PartnersEmbedRoute: PartnersEmbedRoute, PrivacyRoute: PrivacyRoute, + RobotsDottxtRoute: RobotsDottxtRoute, RssDotxmlRoute: RssDotxmlRoute, + SitemapDotxmlRoute: SitemapDotxmlRoute, SponsorsEmbedRoute: SponsorsEmbedRoute, SupportRoute: SupportRoute, TenetsRoute: TenetsRoute, diff --git a/src/routes/__root.tsx b/src/routes/__root.tsx index 0eac7762..9c19d09c 100644 --- a/src/routes/__root.tsx +++ b/src/routes/__root.tsx @@ -9,7 +9,12 @@ import { } from '@tanstack/react-router' import { QueryClient } from '@tanstack/react-query' import appCss from '~/styles/app.css?url' -import { seo } from '~/utils/seo' +import { + canonicalUrl, + getCanonicalPath, + seo, + shouldIndexPath, +} from '~/utils/seo' import ogImage from '~/images/og.png' const LazyRouterDevtools = React.lazy(() => import('@tanstack/react-router-devtools').then((m) => ({ @@ -155,6 +160,12 @@ function ShellComponent({ children }: { children: React.ReactNode }) { select: (s) => s.resolvedLocation?.pathname.startsWith('/router'), }) + const canonicalPath = useRouterState({ + select: (s) => s.resolvedLocation?.pathname || '/', + }) + + const preferredCanonicalPath = getCanonicalPath(canonicalPath) + const showDevtools = canShowLoading && isRouterPage const hideNavbar = useMatches({ @@ -166,6 +177,12 @@ function ShellComponent({ children }: { children: React.ReactNode }) { return ( + {preferredCanonicalPath ? ( + + ) : null} + {!shouldIndexPath(canonicalPath) ? ( + + ) : null} {hasBaseParent ? : null} diff --git a/src/routes/robots[.]txt.ts b/src/routes/robots[.]txt.ts new file mode 100644 index 00000000..59228206 --- /dev/null +++ b/src/routes/robots[.]txt.ts @@ -0,0 +1,25 @@ +import { createFileRoute } from '@tanstack/react-router' +import { setResponseHeader } from '@tanstack/react-start/server' +import { generateRobotsTxt, getSiteOrigin } from '~/utils/sitemap' + +export const Route = createFileRoute('/robots.txt')({ + server: { + handlers: { + GET: async ({ request }: { request: Request }) => { + const content = generateRobotsTxt(getSiteOrigin(request)) + + setResponseHeader('Content-Type', 'text/plain; charset=utf-8') + setResponseHeader( + 'Cache-Control', + 'public, max-age=300, must-revalidate', + ) + setResponseHeader( + 'CDN-Cache-Control', + 'max-age=3600, stale-while-revalidate=3600', + ) + + return new Response(content) + }, + }, + }, +}) diff --git a/src/routes/showcase/index.tsx b/src/routes/showcase/index.tsx index 006b4d59..0ba89521 100644 --- a/src/routes/showcase/index.tsx +++ b/src/routes/showcase/index.tsx @@ -16,6 +16,17 @@ const searchSchema = v.object({ export const PAGE_SIZE_OPTIONS = [24, 48, 96, 192] as const +function hasNonCanonicalSearch(search: v.InferOutput) { + return Boolean( + search.page > 1 || + search.pageSize !== PAGE_SIZE_OPTIONS[0] || + search.libraryIds?.length || + search.useCases?.length || + search.hasSourceCode || + search.q, + ) +} + export const Route = createFileRoute('/showcase/')({ validateSearch: searchSchema, loaderDeps: ({ search }) => ({ @@ -41,13 +52,18 @@ export const Route = createFileRoute('/showcase/')({ }, }), ) + + return { + hasNonCanonicalSearch: hasNonCanonicalSearch(deps), + } }, component: ShowcaseGallery, - head: () => ({ + head: ({ loaderData }) => ({ meta: seo({ title: 'Showcase | TanStack', description: 'Discover projects built with TanStack libraries. See how developers are using TanStack Query, Router, Table, Form, and more in production.', + noindex: loaderData?.hasNonCanonicalSearch, }), }), }) diff --git a/src/routes/sitemap[.]xml.ts b/src/routes/sitemap[.]xml.ts new file mode 100644 index 00000000..ae6b02d0 --- /dev/null +++ b/src/routes/sitemap[.]xml.ts @@ -0,0 +1,25 @@ +import { createFileRoute } from '@tanstack/react-router' +import { setResponseHeader } from '@tanstack/react-start/server' +import { generateSitemapXml, getSiteOrigin } from '~/utils/sitemap' + +export const Route = createFileRoute('/sitemap.xml')({ + server: { + handlers: { + GET: async ({ request }: { request: Request }) => { + const content = await generateSitemapXml(getSiteOrigin(request)) + + setResponseHeader('Content-Type', 'application/xml; charset=utf-8') + setResponseHeader( + 'Cache-Control', + 'public, max-age=300, must-revalidate', + ) + setResponseHeader( + 'CDN-Cache-Control', + 'max-age=3600, stale-while-revalidate=3600', + ) + + return new Response(content) + }, + }, + }, +}) diff --git a/src/utils/seo.ts b/src/utils/seo.ts index 39440353..31b8daef 100644 --- a/src/utils/seo.ts +++ b/src/utils/seo.ts @@ -1,16 +1,78 @@ +import { env } from '~/utils/env' +import { findLibrary } from '~/libraries' + +const DEFAULT_SITE_URL = 'https://tanstack.com' +const NON_INDEXABLE_PATH_PREFIXES = ['/account', '/admin', '/login'] as const + +function trimTrailingSlash(value: string) { + return value.replace(/\/$/, '') +} + +function normalizePath(path: string) { + if (!path || path === '/') { + return '/' + } + + const normalizedPath = path.startsWith('/') ? path : `/${path}` + + return normalizedPath.replace(/\/$/, '') +} + +export function getCanonicalPath(path: string) { + const normalizedPath = normalizePath(path) + + if ( + NON_INDEXABLE_PATH_PREFIXES.some( + (prefix) => + normalizedPath === prefix || normalizedPath.startsWith(`${prefix}/`), + ) + ) { + return null + } + + const pathSegments = normalizedPath.split('/').filter(Boolean) + + if (pathSegments.length >= 2) { + const [libraryId, version, ...rest] = pathSegments + const library = findLibrary(libraryId) + + if (library && version !== 'latest') { + return normalizePath(`/${library.id}/latest/${rest.join('/')}`) + } + } + + return normalizedPath +} + +export function shouldIndexPath(path: string) { + return getCanonicalPath(path) !== null +} + +export function canonicalUrl(path: string) { + const origin = trimTrailingSlash( + env.URL || + (import.meta.env.SSR ? env.SITE_URL : undefined) || + DEFAULT_SITE_URL, + ) + + return `${origin}${normalizePath(path)}` +} + +type SeoOptions = { + title: string + description?: string + image?: string + keywords?: string + noindex?: boolean +} + export const seo = ({ title, description, keywords, image, noindex, -}: { - title: string - description?: string - image?: string - keywords?: string - noindex?: boolean -}) => { +}: SeoOptions) => { const tags = [ { title }, { name: 'description', content: description }, diff --git a/src/utils/sitemap.ts b/src/utils/sitemap.ts new file mode 100644 index 00000000..6fcd145e --- /dev/null +++ b/src/utils/sitemap.ts @@ -0,0 +1,183 @@ +import { getBranch, libraries } from '~/libraries' +import type { LibrarySlim } from '~/libraries/types' +import { getPublishedPosts } from '~/utils/blog' +import { fetchRepoDirectoryContents } from '~/utils/docs' +import type { GitHubFileNode } from '~/utils/documents.server' +import { env } from '~/utils/env' + +export type SitemapEntry = { + path: string + lastModified?: string +} + +const MAX_DOCS_SITEMAP_DEPTH = 3 + +const HIGH_VALUE_NON_DOC_PAGES = [ + '/', + '/blog', + '/libraries', + '/learn', + '/showcase', + '/support', + '/workshops', + '/paid-support', +] as const satisfies ReadonlyArray + +function trimTrailingSlash(url: string) { + return url.replace(/\/$/, '') +} + +function escapeXml(value: string) { + return value + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, ''') +} + +function asLastModified(value: string) { + return new Date(`${value}T12:00:00.000Z`).toISOString() +} + +function getLibraryEntries(): Array { + return libraries.flatMap((library) => { + if ( + library.visible === false || + !library.latestVersion || + library.sitemap?.includeLandingPage !== true + ) { + return [] + } + + const basePath = `/${library.id}/latest` + return [{ path: basePath }] + }) +} + +function flattenDocsTree(nodes: Array): Array { + return nodes.flatMap((node) => [ + node, + ...(node.children ? flattenDocsTree(node.children) : []), + ]) +} + +function toDocsSlug(filePath: string, docsRoot: string) { + const docsPrefix = `${docsRoot}/` + + if (!filePath.startsWith(docsPrefix) || !filePath.endsWith('.md')) { + return null + } + + const slug = filePath.slice(docsPrefix.length, -'.md'.length) + + if (!slug || slug.endsWith('/index')) { + return null + } + + return slug +} + +function isTopLevelDocsSlug(slug: string) { + const segments = slug.split('/') + + return segments.length <= MAX_DOCS_SITEMAP_DEPTH +} + +function isDefined(value: T | null): value is T { + return value !== null +} + +async function getLibraryDocsEntries( + library: LibrarySlim, +): Promise> { + if ( + library.visible === false || + !library.latestVersion || + library.sitemap?.includeTopLevelDocsPages !== true + ) { + return [] + } + + const docsRoot = library.docsRoot || 'docs' + const branch = getBranch(library, 'latest') + const docsTree = await fetchRepoDirectoryContents({ + data: { + repo: library.repo, + branch, + startingPath: docsRoot, + }, + }).catch(() => []) + + return flattenDocsTree(docsTree) + .filter((node) => node.type === 'file') + .map((node) => toDocsSlug(node.path, docsRoot)) + .filter(isDefined) + .filter(isTopLevelDocsSlug) + .map((slug) => ({ + path: `/${library.id}/latest/docs/${slug}`, + })) +} + +function getBlogEntries(): Array { + return getPublishedPosts().map((post) => ({ + path: `/blog/${post.slug}`, + lastModified: asLastModified(post.published), + })) +} + +export function getSiteOrigin(request: Request) { + return trimTrailingSlash(env.SITE_URL || new URL(request.url).origin) +} + +export async function getSitemapEntries(): Promise> { + const docsEntries = await Promise.all( + libraries.map((library) => getLibraryDocsEntries(library)), + ) + + const entries = [ + ...HIGH_VALUE_NON_DOC_PAGES.map((path) => ({ path })), + ...getLibraryEntries(), + ...docsEntries.flat(), + ...getBlogEntries(), + ] + + return Array.from( + new Map(entries.map((entry) => [entry.path, entry])).values(), + ) +} + +export async function generateSitemapXml(origin: string) { + const urls = (await getSitemapEntries()) + .map((entry) => { + const loc = `${origin}${entry.path}` + + return [ + ' ', + ` ${escapeXml(loc)}`, + entry.lastModified + ? ` ${entry.lastModified}` + : '', + ' ', + ] + .filter(Boolean) + .join('\n') + }) + .join('\n') + + return ` + +${urls} +` +} + +export function generateRobotsTxt(origin: string) { + return [ + 'User-agent: *', + 'Allow: /', + 'Disallow: /api/', + 'Disallow: /oauth/', + '', + `Sitemap: ${origin}/sitemap.xml`, + ].join('\n') +}