export.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. # -*- coding: utf-8 -*-
  2. # vim: ai ts=4 sts=4 et sw=4
  3. # Akreggator archive to newsbeuter sqlite3 converter
  4. import metakit as mk
  5. import urlparse as up
  6. import sqlite3 as sq3
  7. from ConfigParser import ConfigParser
  8. import sys
  9. idxarticle = 0
  10. # Get configuration from file
  11. cparse = ConfigParser()
  12. cparse.read("export.cfg")
  13. path = cparse.get("settings","akregator")
  14. sq3file = cparse.get("settings","output")
  15. indexfile = path + "archiveindex.mk4"
  16. def readfeed(file, feedurl):
  17. # Open a Metakit file, one file per feed, fetch contents
  18. global idxarticle
  19. blogdb = mk.storage(file,0)
  20. blogvw = blogdb.getas(blogdb.description())
  21. for i in blogvw:
  22. writedb_article(i,feedurl)
  23. idxarticle = idxarticle + 1
  24. return
  25. def getfeed(feed):
  26. # Read index of items from Akregator, translate into metakit database filenames
  27. global idxarticle
  28. sys.stdout.write("Reading "+feed.url+": ")
  29. writedb_feed(feed)
  30. file = feed.url
  31. site = up.urlparse(feed.url)
  32. file = file.replace(":","_")
  33. file = file.replace("/","_")
  34. file = path + file + ".mk4"
  35. readfeed(file, feed.url)
  36. print idxarticle
  37. return
  38. def initdb():
  39. # Initialize sqlite3 tables to receive data
  40. print "Deleting old tables"
  41. sqdb.execute("PRAGMA synchronous = OFF")
  42. sqdb.execute("drop table if exists google_replay")
  43. sqdb.execute("drop table if exists rss_feed")
  44. sqdb.execute("drop table if exists rss_item")
  45. print "Creating new tables"
  46. sqdb.execute("CREATE TABLE google_replay ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, guid VARCHAR(64) NOT NULL, state INTEGER NOT NULL, ts INTEGER NOT NULL )")
  47. sqdb.execute('CREATE TABLE rss_feed ( rssurl VARCHAR(1024) PRIMARY KEY NOT NULL, url VARCHAR(1024) NOT NULL, title VARCHAR(1024) NOT NULL , lastmodified INTEGER(11) NOT NULL DEFAULT 0, is_rtl INTEGER(1) NOT NULL DEFAULT 0, etag VARCHAR(128) NOT NULL DEFAULT "" )')
  48. sqdb.execute('CREATE TABLE rss_item ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, guid VARCHAR(64) NOT NULL, title VARCHAR(1024) NOT NULL, author VARCHAR(1024) NOT NULL, url VARCHAR(1024) NOT NULL, feedurl VARCHAR(1024) NOT NULL, pubDate INTEGER NOT NULL, content VARCHAR(65535) NOT NULL, unread INTEGER(1) NOT NULL , enclosure_url VARCHAR(1024), enclosure_type VARCHAR(1024), enqueued INTEGER(1) NOT NULL DEFAULT 0, flags VARCHAR(52), deleted INTEGER(1) NOT NULL DEFAULT 0, base VARCHAR(128) NOT NULL DEFAULT "")')
  49. return
  50. def writedb_feed(feed):
  51. parsed = up.urlparse(feed.url)
  52. # save site URL as title, no feed title seems to be saved in mk4
  53. insert = (feed.url, parsed.scheme+"://"+parsed.netloc, feed.lastfetch, parsed.netloc)
  54. sqdb.execute("insert into rss_feed (rssurl, url, lastmodified, title) values(?, ?, ?, ?)", insert)
  55. return
  56. def writedb_article(article,feedurl):
  57. # store an article in sqlite3
  58. global idxarticle
  59. if not len(article.title):
  60. print "Skipped "+article.guid+" with empty title"
  61. return
  62. # prefer full text article conversion
  63. if len(article.content):
  64. content = article.content
  65. else:
  66. content = article.description
  67. insert = (idxarticle, article.guid, article.title.decode("utf-8"), article.authorName.decode('utf-8'), article.link, feedurl, article.pubDate, content.decode('utf-8'), 0)
  68. sqdb.execute("insert into rss_item (id, guid, title, author, url, feedurl, pubDate, content, unread) values (?,?,?,?,?,?,?,?,?)", insert)
  69. return
  70. print "Starting conversion:"
  71. db=mk.storage(indexfile,0)
  72. vw=db.getas(db.description())
  73. print "Opening up database files"
  74. sqdb = sq3.connect(sq3file)
  75. initdb()
  76. for i in vw:
  77. getfeed(i)
  78. print "Imported "+str(idxarticle)+" articles"
  79. print "Creating indexes:"
  80. sqdb.execute("CREATE INDEX idx_deleted ON rss_item(deleted)")
  81. sqdb.execute("CREATE INDEX idx_feedurl ON rss_item(feedurl)")
  82. sqdb.execute("CREATE INDEX idx_guid ON rss_item(guid)")
  83. sqdb.execute("CREATE INDEX idx_lastmodified ON rss_feed(lastmodified)")
  84. sqdb.execute("CREATE INDEX idx_rssurl ON rss_feed(rssurl)")
  85. sqdb.execute("vacuum")
  86. print "Vacuuming complete, now closing database."
  87. sqdb.commit()
  88. sqdb.close()
  89. print "Finished!"