feat(seo): add sitemap generation and improve seo metadata
- Add sitemap generator script that scans markdown files and creates multilingual sitemap - Update build script to include sitemap generation - Add robots.txt and llms.txt files for crawlers - Enhance SEO metadata with better structured data and hreflang tags - Fix stage-0 URL in README
This commit is contained in:
@@ -0,0 +1,223 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Sitemap Generator for Easy-Vibe
|
||||
* Generates sitemap.xml for all pages in the documentation
|
||||
*/
|
||||
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
import { fileURLToPath } from 'url'
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url)
|
||||
const __dirname = path.dirname(__filename)
|
||||
|
||||
const docsDir = path.resolve(__dirname, '../docs')
|
||||
const publicDir = path.resolve(__dirname, '../docs/public')
|
||||
|
||||
// 支持的语言
|
||||
const locales = ['zh-cn', 'en', 'zh-tw', 'ja-jp', 'ko-kr', 'es-es', 'fr-fr', 'de-de', 'ar-sa', 'vi-vn']
|
||||
|
||||
// 基础 URL (根据部署环境动态确定)
|
||||
const getBaseUrl = () => {
|
||||
if (process.env.VERCEL_URL) {
|
||||
return `https://${process.env.VERCEL_URL}`
|
||||
}
|
||||
if (process.env.EDGEONE_URL) {
|
||||
return `https://${process.env.EDGEONE_URL}`
|
||||
}
|
||||
if (process.env.SITE_URL) {
|
||||
return process.env.SITE_URL
|
||||
}
|
||||
return 'https://datawhalechina.github.io/easy-vibe'
|
||||
}
|
||||
|
||||
const siteUrl = getBaseUrl()
|
||||
|
||||
// 扫描目录中的所有 markdown 文件
|
||||
function scanMarkdownFiles(dir, basePath = '') {
|
||||
const files = []
|
||||
const entries = fs.readdirSync(dir, { withFileTypes: true })
|
||||
|
||||
for (const entry of entries) {
|
||||
const fullPath = path.join(dir, entry.name)
|
||||
const relativePath = path.join(basePath, entry.name)
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
// 跳过特殊目录
|
||||
if (entry.name === '.vitepress' || entry.name === 'node_modules' || entry.name === 'dist' || entry.name === 'public') {
|
||||
continue
|
||||
}
|
||||
files.push(...scanMarkdownFiles(fullPath, relativePath))
|
||||
} else if (entry.isFile() && entry.name.endsWith('.md')) {
|
||||
files.push(relativePath)
|
||||
}
|
||||
}
|
||||
|
||||
return files
|
||||
}
|
||||
|
||||
// 将 markdown 路径转换为 URL 路径
|
||||
function mdPathToUrl(mdPath, locale) {
|
||||
// 移除 .md 扩展名
|
||||
let urlPath = mdPath.replace(/\.md$/, '')
|
||||
|
||||
// 如果是 index.md,只保留目录
|
||||
if (urlPath.endsWith('/index')) {
|
||||
urlPath = urlPath.slice(0, -6)
|
||||
} else if (urlPath === 'index') {
|
||||
urlPath = ''
|
||||
}
|
||||
|
||||
// 构建完整 URL
|
||||
return `${siteUrl}/${locale}/${urlPath}${urlPath ? '/' : ''}`
|
||||
}
|
||||
|
||||
// 生成 sitemap XML
|
||||
function generateSitemap(urls) {
|
||||
const now = new Date().toISOString()
|
||||
|
||||
let xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
|
||||
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n'
|
||||
xml += ' xmlns:xhtml="http://www.w3.org/1999/xhtml">\n'
|
||||
|
||||
for (const urlInfo of urls) {
|
||||
xml += ' <url>\n'
|
||||
xml += ` <loc>${escapeXml(urlInfo.loc)}</loc>\n`
|
||||
xml += ` <lastmod>${now}</lastmod>\n`
|
||||
xml += ` <changefreq>weekly</changefreq>\n`
|
||||
xml += ` <priority>${urlInfo.priority.toFixed(1)}</priority>\n`
|
||||
|
||||
// 添加 hreflang alternates
|
||||
for (const alternate of urlInfo.alternates) {
|
||||
xml += ` <xhtml:link rel="alternate" hreflang="${alternate.hreflang}" href="${escapeXml(alternate.href)}"/>\n`
|
||||
}
|
||||
|
||||
xml += ' </url>\n'
|
||||
}
|
||||
|
||||
xml += '</urlset>\n'
|
||||
return xml
|
||||
}
|
||||
|
||||
function escapeXml(str) {
|
||||
return str
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''')
|
||||
}
|
||||
|
||||
// 主函数
|
||||
function main() {
|
||||
console.log('🔍 Scanning documentation files...')
|
||||
|
||||
const allUrls = []
|
||||
const localePaths = new Map()
|
||||
|
||||
// 首先扫描中文内容作为基准
|
||||
const zhCnDir = path.join(docsDir, 'zh-cn')
|
||||
let baseFiles = []
|
||||
|
||||
if (fs.existsSync(zhCnDir)) {
|
||||
baseFiles = scanMarkdownFiles(zhCnDir)
|
||||
} else {
|
||||
// 如果没有 zh-cn 目录,扫描 docs 根目录
|
||||
baseFiles = scanMarkdownFiles(docsDir).filter(f => !f.includes('/'))
|
||||
}
|
||||
|
||||
console.log(`📄 Found ${baseFiles} base pages`)
|
||||
|
||||
// 为每个文件生成 URL 信息
|
||||
for (const baseFile of baseFiles) {
|
||||
// 跳过根目录的 index.md(特殊处理)
|
||||
if (baseFile === 'index.md') continue
|
||||
|
||||
const urlInfo = {
|
||||
loc: '',
|
||||
priority: getPriority(baseFile),
|
||||
alternates: []
|
||||
}
|
||||
|
||||
// 为每个语言版本生成 alternate
|
||||
for (const locale of locales) {
|
||||
const localeDir = path.join(docsDir, locale)
|
||||
const localeFile = path.join(localeDir, baseFile)
|
||||
|
||||
// 检查该语言版本是否存在
|
||||
if (fs.existsSync(localeFile)) {
|
||||
const url = mdPathToUrl(baseFile, locale)
|
||||
urlInfo.alternates.push({
|
||||
hreflang: getHreflangCode(locale),
|
||||
href: url
|
||||
})
|
||||
|
||||
// 设置主要语言版本为 zh-cn
|
||||
if (locale === 'zh-cn') {
|
||||
urlInfo.loc = url
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果有至少一个语言版本存在
|
||||
if (urlInfo.alternates.length > 0) {
|
||||
// 如果没有 zh-cn 版本,使用第一个可用的
|
||||
if (!urlInfo.loc) {
|
||||
urlInfo.loc = urlInfo.alternates[0].href
|
||||
}
|
||||
allUrls.push(urlInfo)
|
||||
}
|
||||
}
|
||||
|
||||
// 添加首页
|
||||
const homeAlternates = []
|
||||
for (const locale of locales) {
|
||||
homeAlternates.push({
|
||||
hreflang: getHreflangCode(locale),
|
||||
href: `${siteUrl}/${locale}/`
|
||||
})
|
||||
}
|
||||
allUrls.unshift({
|
||||
loc: `${siteUrl}/zh-cn/`,
|
||||
priority: 1.0,
|
||||
alternates: homeAlternates
|
||||
})
|
||||
|
||||
console.log(`🌐 Generating sitemap with ${allUrls.length} URLs...`)
|
||||
|
||||
const sitemapXml = generateSitemap(allUrls)
|
||||
const outputPath = path.join(publicDir, 'sitemap.xml')
|
||||
fs.writeFileSync(outputPath, sitemapXml, 'utf-8')
|
||||
|
||||
console.log(`✅ Sitemap generated at ${outputPath}`)
|
||||
console.log(`📊 Statistics:`)
|
||||
console.log(` - Total URLs: ${allUrls.length}`)
|
||||
console.log(` - Locales: ${locales.length}`)
|
||||
console.log(` - Site URL: ${siteUrl}`)
|
||||
}
|
||||
|
||||
function getPriority(filePath) {
|
||||
if (filePath.includes('stage-0') || filePath.includes('stage-1')) return 0.9
|
||||
if (filePath.includes('stage-2')) return 0.8
|
||||
if (filePath.includes('stage-3')) return 0.8
|
||||
if (filePath.includes('appendix')) return 0.7
|
||||
return 0.6
|
||||
}
|
||||
|
||||
function getHreflangCode(locale) {
|
||||
const map = {
|
||||
'zh-cn': 'zh-CN',
|
||||
'en': 'en',
|
||||
'zh-tw': 'zh-TW',
|
||||
'ja-jp': 'ja',
|
||||
'ko-kr': 'ko',
|
||||
'es-es': 'es',
|
||||
'fr-fr': 'fr',
|
||||
'de-de': 'de',
|
||||
'ar-sa': 'ar',
|
||||
'vi-vn': 'vi'
|
||||
}
|
||||
return map[locale] || locale
|
||||
}
|
||||
|
||||
main()
|
||||
Reference in New Issue
Block a user