lib.rs 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732
  1. // -*- coding: utf-8 -*-
  2. //
  3. // Copyright (C) 2024 Michael Büsch <m@bues.ch>
  4. // Copyright (C) 2020 Marco Lochen
  5. //
  6. // This program is free software: you can redistribute it and/or modify
  7. // it under the terms of the GNU General Public License as published by
  8. // the Free Software Foundation, either version 2 of the License, or
  9. // (at your option) any later version.
  10. //
  11. // This program is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. // GNU General Public License for more details.
  15. //
  16. // You should have received a copy of the GNU General Public License
  17. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  18. //
  19. // SPDX-License-Identifier: GPL-2.0-or-later
  20. #![forbid(unsafe_code)]
  21. mod error;
  22. use crate::error::Error;
  23. use anyhow::{self as ah, format_err as err, Context as _};
  24. use chrono::{DateTime, Utc};
  25. use rusqlite::{Connection, OpenFlags, Row};
  26. use sha2::{Digest as _, Sha256};
  27. use std::{
  28. path::{Path, PathBuf},
  29. sync::{Arc, Mutex},
  30. time::{Duration, Instant},
  31. };
  32. use tokio::task::spawn_blocking;
  33. pub const DEBUG: bool = true;
  34. const TIMEOUT: Duration = Duration::from_millis(10_000);
  35. pub fn get_prefix() -> PathBuf {
  36. option_env!("FEEDREADER_PREFIX").unwrap_or("/").into()
  37. }
  38. pub fn get_varlib() -> PathBuf {
  39. get_prefix().join("var/lib/feedreader")
  40. }
  41. fn sql_to_dt(timestamp: i64) -> DateTime<Utc> {
  42. DateTime::<Utc>::from_timestamp(timestamp, 0).unwrap_or_else(Utc::now)
  43. }
  44. fn dt_to_sql(dt: &DateTime<Utc>) -> i64 {
  45. dt.timestamp()
  46. }
  47. #[derive(Clone, Debug)]
  48. pub struct Feed {
  49. pub feed_id: Option<i64>,
  50. pub href: String,
  51. pub title: String,
  52. pub last_retrieval: DateTime<Utc>,
  53. pub next_retrieval: DateTime<Utc>,
  54. pub last_activity: DateTime<Utc>,
  55. pub disabled: bool,
  56. pub updated_items: i64,
  57. }
  58. impl Feed {
  59. fn from_sql_row(row: &Row<'_>) -> rusqlite::Result<Self> {
  60. Ok(Self {
  61. feed_id: Some(row.get(0)?),
  62. href: row.get(1)?,
  63. title: row.get(2)?,
  64. last_retrieval: sql_to_dt(row.get(3)?),
  65. next_retrieval: sql_to_dt(row.get(4)?),
  66. last_activity: sql_to_dt(row.get(5)?),
  67. disabled: row.get(6)?,
  68. updated_items: row.get(7)?,
  69. })
  70. }
  71. }
  72. #[derive(Clone, Debug)]
  73. pub struct Item {
  74. pub item_id: Option<String>,
  75. pub feed_id: Option<i64>,
  76. pub retrieved: DateTime<Utc>,
  77. pub seen: bool,
  78. pub author: String,
  79. pub title: String,
  80. pub feed_item_id: String,
  81. pub link: String,
  82. pub published: DateTime<Utc>,
  83. pub summary: String,
  84. }
  85. impl Item {
  86. fn from_sql_row(row: &Row<'_>) -> rusqlite::Result<Self> {
  87. Ok(Self {
  88. item_id: Some(row.get(0)?),
  89. feed_id: Some(row.get(1)?),
  90. retrieved: sql_to_dt(row.get(2)?),
  91. seen: row.get(3)?,
  92. author: row.get(4)?,
  93. title: row.get(5)?,
  94. feed_item_id: row.get(6)?,
  95. link: row.get(7)?,
  96. published: sql_to_dt(row.get(8)?),
  97. summary: row.get(9)?,
  98. })
  99. }
  100. fn from_sql_row_extended(row: &Row<'_>) -> rusqlite::Result<(Self, ItemExt)> {
  101. let count: i64 = row.get(10)?;
  102. let max_seen: bool = row.get(11)?;
  103. let sum_seen: i64 = row.get(12)?;
  104. Ok((
  105. Self::from_sql_row(row)?,
  106. ItemExt {
  107. count,
  108. any_seen: max_seen,
  109. all_seen: sum_seen == count,
  110. },
  111. ))
  112. }
  113. pub async fn make_id(&self) -> String {
  114. let mut h = Sha256::new();
  115. h.update(&self.feed_item_id);
  116. h.update(&self.author);
  117. h.update(&self.title);
  118. h.update(&self.link);
  119. h.update(format!("{}", dt_to_sql(&self.published)));
  120. h.update(&self.summary);
  121. hex::encode(h.finalize())
  122. }
  123. }
  124. #[derive(Clone, Debug)]
  125. pub struct ItemExt {
  126. pub count: i64,
  127. pub any_seen: bool,
  128. pub all_seen: bool,
  129. }
  130. #[derive(Clone, Copy, PartialEq, Eq, Debug)]
  131. pub enum ItemStatus {
  132. New,
  133. Updated,
  134. Exists,
  135. }
  136. async fn transaction<F, R>(conn: Arc<Mutex<Connection>>, mut f: F) -> ah::Result<R>
  137. where
  138. F: FnMut(rusqlite::Transaction) -> Result<R, Error> + Send + 'static,
  139. R: Send + 'static,
  140. {
  141. spawn_blocking(move || {
  142. let timeout = Instant::now() + TIMEOUT;
  143. loop {
  144. let mut conn = conn.lock().expect("Mutex poisoned");
  145. let trans = conn.transaction()?;
  146. match f(trans) {
  147. Ok(r) => {
  148. break Ok(r);
  149. }
  150. Err(Error::Sql(
  151. e @ rusqlite::Error::SqliteFailure(
  152. rusqlite::ffi::Error {
  153. code: rusqlite::ffi::ErrorCode::DatabaseBusy,
  154. ..
  155. },
  156. ..,
  157. ),
  158. )) => {
  159. drop(conn); // unlock
  160. if Instant::now() >= timeout {
  161. break Err(e.into());
  162. }
  163. std::thread::sleep(Duration::from_millis(20));
  164. }
  165. Err(e) => {
  166. break Err(e.into());
  167. }
  168. }
  169. }
  170. })
  171. .await?
  172. }
  173. pub struct DbConn {
  174. conn: Arc<Mutex<Connection>>,
  175. }
  176. impl DbConn {
  177. async fn new(path: &Path) -> ah::Result<Self> {
  178. let path = path.to_path_buf();
  179. let conn = spawn_blocking(move || -> ah::Result<Connection> {
  180. let timeout = Instant::now() + TIMEOUT;
  181. loop {
  182. let conn = match Connection::open_with_flags(
  183. &path,
  184. OpenFlags::SQLITE_OPEN_READ_WRITE
  185. | OpenFlags::SQLITE_OPEN_CREATE
  186. | OpenFlags::SQLITE_OPEN_NO_MUTEX,
  187. ) {
  188. Ok(conn) => conn,
  189. Err(
  190. e @ rusqlite::Error::SqliteFailure(
  191. rusqlite::ffi::Error {
  192. code: rusqlite::ffi::ErrorCode::DatabaseBusy,
  193. ..
  194. },
  195. ..,
  196. ),
  197. ) => {
  198. if Instant::now() >= timeout {
  199. break Err(e.into());
  200. }
  201. std::thread::sleep(Duration::from_millis(20));
  202. continue;
  203. }
  204. Err(e) => {
  205. break Err(e.into());
  206. }
  207. };
  208. conn.busy_timeout(TIMEOUT)?;
  209. conn.set_prepared_statement_cache_capacity(64);
  210. break Ok(conn);
  211. }
  212. })
  213. .await?
  214. .context("Open SQLite database")?;
  215. Ok(Self {
  216. conn: Arc::new(Mutex::new(conn)),
  217. })
  218. }
  219. #[rustfmt::skip]
  220. pub async fn init(&mut self) -> ah::Result<()> {
  221. transaction(Arc::clone(&self.conn), move |t| {
  222. t.execute(
  223. "\
  224. CREATE TABLE IF NOT EXISTS feeds (\
  225. feed_id INTEGER PRIMARY KEY, \
  226. href VARCHAR, \
  227. title VARCHAR, \
  228. last_retrieval TIMESTAMP, \
  229. next_retrieval TIMESTAMP, \
  230. last_activity TIMESTAMP, \
  231. disabled BOOLEAN, \
  232. updated_items INTEGER\
  233. )",
  234. [],
  235. )?;
  236. t.execute(
  237. "\
  238. CREATE TABLE IF NOT EXISTS items (\
  239. item_id VARCHAR PRIMARY KEY, \
  240. feed_id INTEGER, \
  241. retrieved TIMESTAMP, \
  242. seen BOOLEAN, \
  243. author VARCHAR, \
  244. title VARCHAR, \
  245. feed_item_id VARCHAR, \
  246. link VARCHAR, \
  247. published TIMESTAMP, \
  248. summary VARCHAR, \
  249. FOREIGN KEY(feed_id) REFERENCES feeds(feed_id)\
  250. )",
  251. [],
  252. )?;
  253. t.execute("CREATE INDEX IF NOT EXISTS feed_id ON feeds(feed_id)", [])?;
  254. t.execute("CREATE INDEX IF NOT EXISTS item_id ON items(item_id)", [])?;
  255. // Remove legacy table.
  256. t.execute("DROP TABLE IF EXISTS enclosures", [])?;
  257. // Remove dangling items.
  258. t.execute(
  259. "\
  260. DELETE FROM items \
  261. WHERE feed_id NOT IN (\
  262. SELECT feed_id FROM feeds\
  263. )\
  264. ",
  265. []
  266. )?;
  267. t.commit()?;
  268. Ok(())
  269. })
  270. .await
  271. }
  272. pub async fn vacuum(&mut self) -> ah::Result<()> {
  273. spawn_blocking({
  274. let conn = Arc::clone(&self.conn);
  275. move || {
  276. let conn = conn.lock().expect("Mutex poisoned");
  277. conn.execute("VACUUM", [])?;
  278. Ok(())
  279. }
  280. })
  281. .await?
  282. }
  283. pub async fn update_feed(
  284. &mut self,
  285. feed: &Feed,
  286. items: &[Item],
  287. gc_thres: Option<DateTime<Utc>>,
  288. ) -> ah::Result<()> {
  289. let feed = feed.clone();
  290. let items = items.to_vec();
  291. transaction(Arc::clone(&self.conn), move |t| {
  292. let Some(feed_id) = feed.feed_id else {
  293. return Err(Error::Ah(err!("update_feed(): Invalid feed. No feed_id.")));
  294. };
  295. t.prepare_cached(
  296. "\
  297. UPDATE feeds SET \
  298. href = ?, \
  299. title = ?, \
  300. last_retrieval = ?, \
  301. next_retrieval = ?, \
  302. last_activity = ?, \
  303. disabled = ?, \
  304. updated_items = ? \
  305. WHERE feed_id = ?\
  306. ",
  307. )?
  308. .execute((
  309. &feed.href,
  310. &feed.title,
  311. dt_to_sql(&feed.last_retrieval),
  312. dt_to_sql(&feed.next_retrieval),
  313. dt_to_sql(&feed.last_activity),
  314. feed.disabled,
  315. feed.updated_items,
  316. feed_id,
  317. ))?;
  318. for item in &items {
  319. let Some(item_id) = &item.item_id else {
  320. return Err(Error::Ah(err!("update_feed(): Invalid item. No item_id.")));
  321. };
  322. if item.feed_id.is_some() && item.feed_id != Some(feed_id) {
  323. return Err(Error::Ah(err!(
  324. "update_feed(): Invalid item. Invalid feed_id."
  325. )));
  326. }
  327. t.prepare_cached(
  328. "\
  329. INSERT INTO items \
  330. VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\
  331. ",
  332. )?
  333. .execute((
  334. item_id,
  335. feed_id,
  336. dt_to_sql(&item.retrieved),
  337. item.seen,
  338. &item.author,
  339. &item.title,
  340. &item.feed_item_id,
  341. &item.link,
  342. dt_to_sql(&item.published),
  343. &item.summary,
  344. ))?;
  345. }
  346. if let Some(gc_thres) = gc_thres.as_ref() {
  347. t.prepare_cached(
  348. "\
  349. DELETE FROM items \
  350. WHERE \
  351. feed_id = ? AND \
  352. published < ? AND \
  353. seen = TRUE\
  354. ",
  355. )?
  356. .execute((feed_id, dt_to_sql(gc_thres)))?;
  357. }
  358. t.commit()?;
  359. Ok(())
  360. })
  361. .await
  362. }
  363. pub async fn add_feed(&mut self, href: &str) -> ah::Result<()> {
  364. let href = href.to_string();
  365. transaction(Arc::clone(&self.conn), move |t| {
  366. t.prepare_cached(
  367. "\
  368. INSERT INTO feeds \
  369. VALUES (?, ?, ?, ?, ?, ?, ?, ?)\
  370. ",
  371. )?
  372. .execute((
  373. None::<i64>,
  374. &href,
  375. "[New feed] Updating...",
  376. 0,
  377. 0,
  378. 0,
  379. false,
  380. 0,
  381. ))?;
  382. t.commit()?;
  383. Ok(())
  384. })
  385. .await
  386. }
  387. pub async fn delete_feeds(&mut self, feed_ids: &[i64]) -> ah::Result<()> {
  388. if !feed_ids.is_empty() {
  389. let feed_ids = feed_ids.to_vec();
  390. transaction(Arc::clone(&self.conn), move |t| {
  391. for feed_id in &feed_ids {
  392. t.prepare_cached(
  393. "\
  394. DELETE FROM items \
  395. WHERE feed_id = ?\
  396. ",
  397. )?
  398. .execute([feed_id])?;
  399. t.prepare_cached(
  400. "\
  401. DELETE FROM feeds \
  402. WHERE feed_id = ?\
  403. ",
  404. )?
  405. .execute([feed_id])?;
  406. }
  407. t.commit()?;
  408. Ok(())
  409. })
  410. .await
  411. } else {
  412. Ok(())
  413. }
  414. }
  415. pub async fn get_feeds_due(&mut self) -> ah::Result<Vec<Feed>> {
  416. let now = Utc::now();
  417. transaction(Arc::clone(&self.conn), move |t| {
  418. let feeds: Vec<Feed> = t
  419. .prepare_cached(
  420. "\
  421. SELECT * FROM feeds \
  422. WHERE \
  423. next_retrieval < ? AND \
  424. disabled == FALSE\
  425. ",
  426. )?
  427. .query_map([dt_to_sql(&now)], Feed::from_sql_row)?
  428. .map(|f| f.unwrap())
  429. .collect();
  430. t.finish()?;
  431. Ok(feeds)
  432. })
  433. .await
  434. }
  435. pub async fn get_next_due_time(&mut self) -> ah::Result<DateTime<Utc>> {
  436. transaction(Arc::clone(&self.conn), move |t| {
  437. let next_retrieval = t
  438. .prepare_cached(
  439. "\
  440. SELECT min(next_retrieval) FROM feeds \
  441. WHERE disabled == FALSE\
  442. ",
  443. )?
  444. .query([])?
  445. .next()?
  446. .unwrap()
  447. .get(0)?;
  448. t.finish()?;
  449. Ok(sql_to_dt(next_retrieval))
  450. })
  451. .await
  452. }
  453. pub async fn get_feeds(&mut self, active_feed_id: Option<i64>) -> ah::Result<Vec<Feed>> {
  454. transaction(Arc::clone(&self.conn), move |t| {
  455. if let Some(active_feed_id) = active_feed_id {
  456. t.prepare_cached(
  457. "\
  458. UPDATE feeds \
  459. SET updated_items = 0 \
  460. WHERE feed_id = ?\
  461. ",
  462. )?
  463. .execute([active_feed_id])?;
  464. }
  465. let feeds: Vec<Feed> = t
  466. .prepare_cached(
  467. "\
  468. SELECT * FROM feeds \
  469. ORDER BY last_activity DESC\
  470. ",
  471. )?
  472. .query_map([], Feed::from_sql_row)?
  473. .map(|f| f.unwrap())
  474. .collect();
  475. if active_feed_id.is_some() {
  476. t.commit()?;
  477. } else {
  478. t.finish()?;
  479. }
  480. Ok(feeds)
  481. })
  482. .await
  483. }
  484. pub async fn get_feed_items(&mut self, feed_id: i64) -> ah::Result<Vec<(Item, ItemExt)>> {
  485. transaction(Arc::clone(&self.conn), move |t| {
  486. let items: Vec<(Item, ItemExt)> = t
  487. .prepare_cached(
  488. "\
  489. SELECT \
  490. item_id, \
  491. feed_id, \
  492. max(retrieved), \
  493. seen, \
  494. author, \
  495. title, \
  496. feed_item_id, \
  497. link, \
  498. published, \
  499. summary, \
  500. count() as count, \
  501. max(seen) as any_seen, \
  502. sum(seen) as sum_seen \
  503. FROM items \
  504. WHERE feed_id = ? \
  505. GROUP BY feed_item_id \
  506. ORDER BY published DESC\
  507. ",
  508. )?
  509. .query_map([feed_id], Item::from_sql_row_extended)?
  510. .map(|i| i.unwrap())
  511. .collect();
  512. t.prepare_cached(
  513. "\
  514. UPDATE items \
  515. SET seen = TRUE \
  516. WHERE feed_id = ?\
  517. ",
  518. )?
  519. .execute([feed_id])?;
  520. t.commit()?;
  521. Ok(items)
  522. })
  523. .await
  524. }
  525. pub async fn get_feed_items_by_item_id(
  526. &mut self,
  527. feed_id: i64,
  528. item_id: &str,
  529. ) -> ah::Result<Vec<Item>> {
  530. let item_id = item_id.to_string();
  531. transaction(Arc::clone(&self.conn), move |t| {
  532. let items: Vec<Item> = t
  533. .prepare_cached(
  534. "\
  535. SELECT * FROM items \
  536. WHERE \
  537. feed_id = ? AND \
  538. feed_item_id IN (\
  539. SELECT feed_item_id FROM items \
  540. WHERE item_id = ?\
  541. ) \
  542. ORDER BY retrieved DESC\
  543. ",
  544. )?
  545. .query_map((feed_id, &item_id), Item::from_sql_row)?
  546. .map(|i| i.unwrap())
  547. .collect();
  548. t.prepare_cached(
  549. "\
  550. UPDATE items \
  551. SET seen = TRUE \
  552. WHERE feed_id = ?\
  553. ",
  554. )?
  555. .execute([feed_id])?;
  556. t.commit()?;
  557. Ok(items)
  558. })
  559. .await
  560. }
  561. pub async fn set_seen(&mut self, feed_id: Option<i64>) -> ah::Result<()> {
  562. transaction(Arc::clone(&self.conn), move |t| {
  563. if let Some(feed_id) = feed_id {
  564. t.prepare_cached(
  565. "\
  566. UPDATE items \
  567. SET seen = TRUE \
  568. WHERE feed_id = ?\
  569. ",
  570. )?
  571. .execute([feed_id])?;
  572. t.prepare_cached(
  573. "\
  574. UPDATE feeds \
  575. SET updated_items = 0 \
  576. WHERE feed_id = ?\
  577. ",
  578. )?
  579. .execute([feed_id])?;
  580. } else {
  581. t.prepare_cached(
  582. "\
  583. UPDATE items \
  584. SET seen = TRUE \
  585. ",
  586. )?
  587. .execute([])?;
  588. t.prepare_cached(
  589. "\
  590. UPDATE feeds \
  591. SET updated_items = 0 \
  592. ",
  593. )?
  594. .execute([])?;
  595. }
  596. t.commit()?;
  597. Ok(())
  598. })
  599. .await
  600. }
  601. pub async fn check_item_exists(&mut self, item: &Item) -> ah::Result<ItemStatus> {
  602. if let Some(item_id) = item.item_id.as_ref() {
  603. let item_id = item_id.clone();
  604. let feed_item_id = item.feed_item_id.clone();
  605. transaction(Arc::clone(&self.conn), move |t| {
  606. let feed_item_id_count: Vec<i64> = t
  607. .prepare_cached(
  608. "\
  609. SELECT count(feed_item_id) \
  610. FROM items \
  611. WHERE feed_item_id = ?\
  612. ",
  613. )?
  614. .query_map([&feed_item_id], |row| row.get(0))?
  615. .map(|c| c.unwrap())
  616. .collect();
  617. let item_id_count: Vec<i64> = t
  618. .prepare_cached(
  619. "\
  620. SELECT count(item_id) \
  621. FROM items \
  622. WHERE item_id = ?\
  623. ",
  624. )?
  625. .query_map([&item_id], |row| row.get(0))?
  626. .map(|c| c.unwrap())
  627. .collect();
  628. let feed_item_id_count = *feed_item_id_count.first().unwrap_or(&0);
  629. let item_id_count = *item_id_count.first().unwrap_or(&0);
  630. let status = if item_id_count == 0 && feed_item_id_count == 0 {
  631. ItemStatus::New
  632. } else if item_id_count == 0 {
  633. ItemStatus::Updated
  634. } else {
  635. ItemStatus::Exists
  636. };
  637. t.finish()?;
  638. Ok(status)
  639. })
  640. .await
  641. } else {
  642. Err(err!("check_item_exists(): Invalid item. No item_id."))
  643. }
  644. }
  645. }
  646. pub struct Db {
  647. path: PathBuf,
  648. }
  649. impl Db {
  650. pub async fn new(name: &str) -> ah::Result<Self> {
  651. if !name
  652. .chars()
  653. .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
  654. {
  655. return Err(err!("Invalid name"));
  656. }
  657. let path = get_varlib().join(format!("{name}.db"));
  658. Ok(Self { path })
  659. }
  660. pub async fn open(&self) -> ah::Result<DbConn> {
  661. DbConn::new(&self.path).await
  662. }
  663. }
  664. // vim: ts=4 sw=4 expandtab