sitemap.rs 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. // -*- coding: utf-8 -*-
  2. //
  3. // Simple CMS
  4. //
  5. // Copyright (C) 2011-2024 Michael Büsch <m@bues.ch>
  6. //
  7. // Licensed under the Apache License version 2.0
  8. // or the MIT license, at your option.
  9. // SPDX-License-Identifier: Apache-2.0 OR MIT
  10. use crate::{
  11. comm::{CmsComm, CommGetPage, CommPage, CommSubPages},
  12. config::CmsConfig,
  13. };
  14. use anyhow as ah;
  15. use chrono::prelude::*;
  16. use cms_ident::{CheckedIdent, UrlComp};
  17. use std::{fmt::Write as _, sync::Arc, write as wr, writeln as ln};
  18. const MAX_DEPTH: usize = 64;
  19. const DEFAULT_ELEMS_ALLOC: usize = 256;
  20. const DEFAULT_HTML_ALLOC: usize = 1024 * 16;
  21. fn xml_escape(mut s: String) -> String {
  22. if !s.is_empty() {
  23. if s.contains('&') {
  24. s = s.replace('&', "&amp;");
  25. }
  26. if s.contains('\'') {
  27. s = s.replace('\'', "&apos;");
  28. }
  29. if s.contains('"') {
  30. s = s.replace('"', "&quot;");
  31. }
  32. if s.contains('>') {
  33. s = s.replace('>', "&gt;");
  34. }
  35. if s.contains('<') {
  36. s = s.replace('<', "&lt;");
  37. }
  38. }
  39. s
  40. }
  41. pub struct SiteMapContext<'a> {
  42. pub comm: &'a mut CmsComm,
  43. pub config: Arc<CmsConfig>,
  44. pub root: &'a CheckedIdent,
  45. pub protocol: &'a str,
  46. }
  47. struct SiteMapElem {
  48. loc: String,
  49. lastmod: String,
  50. changefreq: String,
  51. priority: String,
  52. }
  53. async fn do_build_elems(
  54. ctx: &mut SiteMapContext<'_>,
  55. elems: &mut Vec<SiteMapElem>,
  56. ident: &CheckedIdent,
  57. stamp: DateTime<Utc>,
  58. nav_stop: bool,
  59. depth: usize,
  60. ) -> ah::Result<()> {
  61. if depth >= MAX_DEPTH {
  62. return Ok(());
  63. }
  64. let loc = ident.url(UrlComp {
  65. protocol: Some(ctx.protocol),
  66. domain: Some(ctx.config.domain()),
  67. base: Some(ctx.config.url_base()),
  68. });
  69. let lastmod;
  70. let changefreq;
  71. let priority;
  72. if depth == 1 {
  73. // Main groups
  74. lastmod = String::new();
  75. changefreq = "monthly".to_string();
  76. priority = "0.3".to_string();
  77. } else {
  78. // Pages, main page and sub groups
  79. lastmod = stamp.format("%Y-%m-%dT%H:%M:%SZ").to_string();
  80. changefreq = String::new();
  81. priority = "0.7".to_string();
  82. }
  83. elems.push(SiteMapElem {
  84. loc,
  85. lastmod,
  86. changefreq,
  87. priority,
  88. });
  89. if !nav_stop {
  90. let Ok(CommSubPages {
  91. mut names,
  92. nav_stops,
  93. stamps,
  94. ..
  95. }) = ctx.comm.get_db_sub_pages(ident).await
  96. else {
  97. return Ok(());
  98. };
  99. names.sort_unstable();
  100. for i in 0..names.len() {
  101. let sub_ident = ident.clone_append(&names[i]).into_checked()?;
  102. Box::pin(do_build_elems(
  103. ctx,
  104. elems,
  105. &sub_ident,
  106. stamps[i],
  107. nav_stops[i],
  108. depth + 1,
  109. ))
  110. .await?;
  111. }
  112. }
  113. Ok(())
  114. }
  115. async fn build_elems(
  116. ctx: &mut SiteMapContext<'_>,
  117. elems: &mut Vec<SiteMapElem>,
  118. ident: &CheckedIdent,
  119. ) -> ah::Result<()> {
  120. let Ok(CommPage { stamp, .. }) = ctx
  121. .comm
  122. .get_db_page(CommGetPage {
  123. path: ident.clone(),
  124. get_stamp: true,
  125. ..Default::default()
  126. })
  127. .await
  128. else {
  129. return Ok(());
  130. };
  131. do_build_elems(ctx, elems, ident, stamp.unwrap_or_default(), false, 0).await
  132. }
  133. async fn build_user_elems(
  134. ctx: &mut SiteMapContext<'_>,
  135. elems: &mut Vec<SiteMapElem>,
  136. ) -> ah::Result<()> {
  137. let user_site_map = ctx.comm.get_db_string("site-map").await?;
  138. for line in user_site_map.lines() {
  139. let line = line.trim();
  140. if line.is_empty() || line.starts_with('#') {
  141. continue;
  142. }
  143. let mut line = line.split_whitespace();
  144. let Some(loc) = line.next() else {
  145. continue;
  146. };
  147. let loc = format!("{}://{}/{}", ctx.protocol, ctx.config.domain(), loc);
  148. let priority = line.next().unwrap_or("0.7");
  149. let changefreq = line.next().unwrap_or("always");
  150. elems.push(SiteMapElem {
  151. loc,
  152. lastmod: String::new(),
  153. changefreq: changefreq.to_string(),
  154. priority: priority.to_string(),
  155. });
  156. }
  157. Ok(())
  158. }
  159. /// Site map generator.
  160. /// Specification: https://www.sitemaps.org/protocol.html
  161. pub struct SiteMap {
  162. elems: Vec<SiteMapElem>,
  163. }
  164. impl SiteMap {
  165. pub async fn build(mut ctx: SiteMapContext<'_>) -> ah::Result<Self> {
  166. let mut elems = Vec::with_capacity(DEFAULT_ELEMS_ALLOC);
  167. let root = ctx.root.clone();
  168. build_elems(&mut ctx, &mut elems, &root).await?;
  169. build_user_elems(&mut ctx, &mut elems).await?;
  170. Ok(Self { elems })
  171. }
  172. #[rustfmt::skip]
  173. pub fn get_xml(&self) -> ah::Result<String> {
  174. let mut b = String::with_capacity(DEFAULT_HTML_ALLOC);
  175. ln!(b, r#"<?xml version="1.0" encoding="UTF-8"?>"#)?;
  176. wr!(b, r#"<urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9" "#)?;
  177. wr!(b, r#"xmlns:xsi="https://www.w3.org/2001/XMLSchema-instance" "#)?;
  178. wr!(b, r#"xsi:schemaLocation="https://www.sitemaps.org/schemas/sitemap/0.9 "#)?;
  179. ln!(b, r#"https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">"#)?;
  180. for elem in &self.elems {
  181. let loc = xml_escape(elem.loc.clone());
  182. let lastmod = xml_escape(elem.lastmod.clone());
  183. let changefreq = xml_escape(elem.changefreq.clone());
  184. let priority = xml_escape(elem.priority.clone());
  185. ln!(b, r#"<url>"#)?;
  186. if !loc.is_empty() {
  187. ln!(b, r#"<loc>{loc}</loc>"#)?;
  188. }
  189. if !lastmod.is_empty() {
  190. ln!(b, r#"<lastmod>{lastmod}</lastmod>"#)?;
  191. }
  192. if !changefreq.is_empty() {
  193. ln!(b, r#"<changefreq>{changefreq}</changefreq>"#)?;
  194. }
  195. if !priority.is_empty() {
  196. ln!(b, r#"<priority>{priority}</priority>"#)?;
  197. }
  198. ln!(b, r#"</url>"#)?;
  199. }
  200. wr!(b, r#"</urlset>"#)?;
  201. Ok(b)
  202. }
  203. }
  204. // vim: ts=4 sw=4 expandtab