feat(sitemap): improve sitemap generation with git last modified dates

Add git last modified date tracking for sitemap URLs to provide more accurate lastmod timestamps
Only write sitemap file when content actually changes to avoid unnecessary updates
This commit is contained in:
sanbuphy
2026-03-03 18:05:08 +08:00
parent 0ca1a53306
commit 5728ae16b3
+59 -9
View File
@@ -7,6 +7,7 @@
import fs from 'fs' import fs from 'fs'
import path from 'path' import path from 'path'
import { fileURLToPath } from 'url' import { fileURLToPath } from 'url'
import { execSync } from 'child_process'
const __filename = fileURLToPath(import.meta.url) const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename) const __dirname = path.dirname(__filename)
@@ -72,10 +73,38 @@ function mdPathToUrl(mdPath, locale) {
return `${siteUrl}/${locale}/${urlPath}${urlPath ? '/' : ''}` return `${siteUrl}/${locale}/${urlPath}${urlPath ? '/' : ''}`
} }
// 生成 sitemap XML function getGitLastModified(filePath) {
function generateSitemap(urls) { try {
const now = new Date().toISOString() const result = execSync(`git log -1 --format="%aI" -- "${filePath}"`, {
cwd: path.resolve(__dirname, '..'),
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe']
}).trim()
if (result) {
return result
}
} catch {
// 文件可能是新文件或不在 git 中
}
const stats = fs.statSync(filePath)
return stats.mtime.toISOString()
}
function getLatestModTime(files) {
let latest = '1970-01-01T00:00:00.000Z'
for (const file of files) {
const fullPath = path.join(docsDir, file.locale, file.relativePath)
if (fs.existsSync(fullPath)) {
const modTime = getGitLastModified(fullPath)
if (modTime > latest) {
latest = modTime
}
}
}
return latest
}
function generateSitemap(urls) {
let xml = '<?xml version="1.0" encoding="UTF-8"?>\n' let xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n' xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n'
xml += ' xmlns:xhtml="http://www.w3.org/1999/xhtml">\n' xml += ' xmlns:xhtml="http://www.w3.org/1999/xhtml">\n'
@@ -83,11 +112,10 @@ function generateSitemap(urls) {
for (const urlInfo of urls) { for (const urlInfo of urls) {
xml += ' <url>\n' xml += ' <url>\n'
xml += ` <loc>${escapeXml(urlInfo.loc)}</loc>\n` xml += ` <loc>${escapeXml(urlInfo.loc)}</loc>\n`
xml += ` <lastmod>${now}</lastmod>\n` xml += ` <lastmod>${urlInfo.lastmod}</lastmod>\n`
xml += ` <changefreq>weekly</changefreq>\n` xml += ` <changefreq>weekly</changefreq>\n`
xml += ` <priority>${urlInfo.priority.toFixed(1)}</priority>\n` xml += ` <priority>${urlInfo.priority.toFixed(1)}</priority>\n`
// 添加 hreflang alternates
for (const alternate of urlInfo.alternates) { for (const alternate of urlInfo.alternates) {
xml += ` <xhtml:link rel="alternate" hreflang="${alternate.hreflang}" href="${escapeXml(alternate.href)}"/>\n` xml += ` <xhtml:link rel="alternate" hreflang="${alternate.hreflang}" href="${escapeXml(alternate.href)}"/>\n`
} }
@@ -136,7 +164,8 @@ function main() {
const urlInfo = { const urlInfo = {
loc: '', loc: '',
priority: getPriority(baseFile), priority: getPriority(baseFile),
alternates: [] alternates: [],
sourceFiles: []
} }
// 为每个语言版本生成 alternate // 为每个语言版本生成 alternate
@@ -151,6 +180,7 @@ function main() {
hreflang: getHreflangCode(locale), hreflang: getHreflangCode(locale),
href: url href: url
}) })
urlInfo.sourceFiles.push({ locale, relativePath: baseFile })
// 设置主要语言版本为 zh-cn // 设置主要语言版本为 zh-cn
if (locale === 'zh-cn') { if (locale === 'zh-cn') {
@@ -165,31 +195,51 @@ function main() {
if (!urlInfo.loc) { if (!urlInfo.loc) {
urlInfo.loc = urlInfo.alternates[0].href urlInfo.loc = urlInfo.alternates[0].href
} }
urlInfo.lastmod = getLatestModTime(urlInfo.sourceFiles)
allUrls.push(urlInfo) allUrls.push(urlInfo)
} }
} }
// 添加首页 // 添加首页
const homeAlternates = [] const homeAlternates = []
const homeSourceFiles = []
for (const locale of locales) { for (const locale of locales) {
homeAlternates.push({ homeAlternates.push({
hreflang: getHreflangCode(locale), hreflang: getHreflangCode(locale),
href: `${siteUrl}/${locale}/` href: `${siteUrl}/${locale}/`
}) })
const indexPath = path.join(docsDir, locale, 'index.md')
if (fs.existsSync(indexPath)) {
homeSourceFiles.push({ locale, relativePath: 'index.md' })
}
} }
allUrls.unshift({ allUrls.unshift({
loc: `${siteUrl}/zh-cn/`, loc: `${siteUrl}/zh-cn/`,
priority: 1.0, priority: 1.0,
alternates: homeAlternates alternates: homeAlternates,
sourceFiles: homeSourceFiles,
lastmod: getLatestModTime(homeSourceFiles)
}) })
console.log(`🌐 Generating sitemap with ${allUrls.length} URLs...`) console.log(`🌐 Generating sitemap with ${allUrls.length} URLs...`)
const sitemapXml = generateSitemap(allUrls) const sitemapXml = generateSitemap(allUrls)
const outputPath = path.join(publicDir, 'sitemap.xml') const outputPath = path.join(publicDir, 'sitemap.xml')
fs.writeFileSync(outputPath, sitemapXml, 'utf-8')
console.log(`✅ Sitemap generated at ${outputPath}`) let shouldWrite = true
if (fs.existsSync(outputPath)) {
const existingXml = fs.readFileSync(outputPath, 'utf-8')
if (existingXml === sitemapXml) {
shouldWrite = false
console.log(`⏭️ Sitemap unchanged, skipping write`)
}
}
if (shouldWrite) {
fs.writeFileSync(outputPath, sitemapXml, 'utf-8')
console.log(`✅ Sitemap generated at ${outputPath}`)
}
console.log(`📊 Statistics:`) console.log(`📊 Statistics:`)
console.log(` - Total URLs: ${allUrls.length}`) console.log(` - Total URLs: ${allUrls.length}`)
console.log(` - Locales: ${locales.length}`) console.log(` - Locales: ${locales.length}`)