feat(seo): add sitemap generation and improve seo metadata

- Add sitemap generator script that scans markdown files and creates multilingual sitemap
- Update build script to include sitemap generation
- Add robots.txt and llms.txt files for crawlers
- Enhance SEO metadata with better structured data and hreflang tags
- Fix stage-0 URL in README
This commit is contained in:
sanbuphy
2026-03-03 16:39:07 +08:00
parent 0cde334b1d
commit 0ca1a53306
7 changed files with 1595 additions and 20 deletions
+223
View File
@@ -0,0 +1,223 @@
#!/usr/bin/env node
/**
* Sitemap Generator for Easy-Vibe
* Generates sitemap.xml for all pages in the documentation
*/
import fs from 'fs'
import path from 'path'
import { fileURLToPath } from 'url'
const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
const docsDir = path.resolve(__dirname, '../docs')
const publicDir = path.resolve(__dirname, '../docs/public')
// 支持的语言
const locales = ['zh-cn', 'en', 'zh-tw', 'ja-jp', 'ko-kr', 'es-es', 'fr-fr', 'de-de', 'ar-sa', 'vi-vn']
// 基础 URL (根据部署环境动态确定)
const getBaseUrl = () => {
if (process.env.VERCEL_URL) {
return `https://${process.env.VERCEL_URL}`
}
if (process.env.EDGEONE_URL) {
return `https://${process.env.EDGEONE_URL}`
}
if (process.env.SITE_URL) {
return process.env.SITE_URL
}
return 'https://datawhalechina.github.io/easy-vibe'
}
const siteUrl = getBaseUrl()
// 扫描目录中的所有 markdown 文件
function scanMarkdownFiles(dir, basePath = '') {
const files = []
const entries = fs.readdirSync(dir, { withFileTypes: true })
for (const entry of entries) {
const fullPath = path.join(dir, entry.name)
const relativePath = path.join(basePath, entry.name)
if (entry.isDirectory()) {
// 跳过特殊目录
if (entry.name === '.vitepress' || entry.name === 'node_modules' || entry.name === 'dist' || entry.name === 'public') {
continue
}
files.push(...scanMarkdownFiles(fullPath, relativePath))
} else if (entry.isFile() && entry.name.endsWith('.md')) {
files.push(relativePath)
}
}
return files
}
// 将 markdown 路径转换为 URL 路径
function mdPathToUrl(mdPath, locale) {
// 移除 .md 扩展名
let urlPath = mdPath.replace(/\.md$/, '')
// 如果是 index.md,只保留目录
if (urlPath.endsWith('/index')) {
urlPath = urlPath.slice(0, -6)
} else if (urlPath === 'index') {
urlPath = ''
}
// 构建完整 URL
return `${siteUrl}/${locale}/${urlPath}${urlPath ? '/' : ''}`
}
// 生成 sitemap XML
function generateSitemap(urls) {
const now = new Date().toISOString()
let xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n'
xml += ' xmlns:xhtml="http://www.w3.org/1999/xhtml">\n'
for (const urlInfo of urls) {
xml += ' <url>\n'
xml += ` <loc>${escapeXml(urlInfo.loc)}</loc>\n`
xml += ` <lastmod>${now}</lastmod>\n`
xml += ` <changefreq>weekly</changefreq>\n`
xml += ` <priority>${urlInfo.priority.toFixed(1)}</priority>\n`
// 添加 hreflang alternates
for (const alternate of urlInfo.alternates) {
xml += ` <xhtml:link rel="alternate" hreflang="${alternate.hreflang}" href="${escapeXml(alternate.href)}"/>\n`
}
xml += ' </url>\n'
}
xml += '</urlset>\n'
return xml
}
function escapeXml(str) {
return str
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&apos;')
}
// 主函数
function main() {
console.log('🔍 Scanning documentation files...')
const allUrls = []
const localePaths = new Map()
// 首先扫描中文内容作为基准
const zhCnDir = path.join(docsDir, 'zh-cn')
let baseFiles = []
if (fs.existsSync(zhCnDir)) {
baseFiles = scanMarkdownFiles(zhCnDir)
} else {
// 如果没有 zh-cn 目录,扫描 docs 根目录
baseFiles = scanMarkdownFiles(docsDir).filter(f => !f.includes('/'))
}
console.log(`📄 Found ${baseFiles} base pages`)
// 为每个文件生成 URL 信息
for (const baseFile of baseFiles) {
// 跳过根目录的 index.md(特殊处理)
if (baseFile === 'index.md') continue
const urlInfo = {
loc: '',
priority: getPriority(baseFile),
alternates: []
}
// 为每个语言版本生成 alternate
for (const locale of locales) {
const localeDir = path.join(docsDir, locale)
const localeFile = path.join(localeDir, baseFile)
// 检查该语言版本是否存在
if (fs.existsSync(localeFile)) {
const url = mdPathToUrl(baseFile, locale)
urlInfo.alternates.push({
hreflang: getHreflangCode(locale),
href: url
})
// 设置主要语言版本为 zh-cn
if (locale === 'zh-cn') {
urlInfo.loc = url
}
}
}
// 如果有至少一个语言版本存在
if (urlInfo.alternates.length > 0) {
// 如果没有 zh-cn 版本,使用第一个可用的
if (!urlInfo.loc) {
urlInfo.loc = urlInfo.alternates[0].href
}
allUrls.push(urlInfo)
}
}
// 添加首页
const homeAlternates = []
for (const locale of locales) {
homeAlternates.push({
hreflang: getHreflangCode(locale),
href: `${siteUrl}/${locale}/`
})
}
allUrls.unshift({
loc: `${siteUrl}/zh-cn/`,
priority: 1.0,
alternates: homeAlternates
})
console.log(`🌐 Generating sitemap with ${allUrls.length} URLs...`)
const sitemapXml = generateSitemap(allUrls)
const outputPath = path.join(publicDir, 'sitemap.xml')
fs.writeFileSync(outputPath, sitemapXml, 'utf-8')
console.log(`✅ Sitemap generated at ${outputPath}`)
console.log(`📊 Statistics:`)
console.log(` - Total URLs: ${allUrls.length}`)
console.log(` - Locales: ${locales.length}`)
console.log(` - Site URL: ${siteUrl}`)
}
function getPriority(filePath) {
if (filePath.includes('stage-0') || filePath.includes('stage-1')) return 0.9
if (filePath.includes('stage-2')) return 0.8
if (filePath.includes('stage-3')) return 0.8
if (filePath.includes('appendix')) return 0.7
return 0.6
}
function getHreflangCode(locale) {
const map = {
'zh-cn': 'zh-CN',
'en': 'en',
'zh-tw': 'zh-TW',
'ja-jp': 'ja',
'ko-kr': 'ko',
'es-es': 'es',
'fr-fr': 'fr',
'de-de': 'de',
'ar-sa': 'ar',
'vi-vn': 'vi'
}
return map[locale] || locale
}
main()