gnusrss.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. import configparser
  4. import urllib.parse
  5. import pycurl
  6. import os.path
  7. import sqlite3
  8. import feedparser
  9. import argparse
  10. from os import listdir
  11. from sys import argv
  12. from xml.dom import minidom
  13. from io import BytesIO
  14. class Database:
  15. """Manage the database."""
  16. def __init__(self, database='gnusrss.db'):
  17. """
  18. Connect to the database.
  19. database -- string containig the filepath of the db
  20. (default: gnusrss.db)
  21. """
  22. self.connection = sqlite3.connect(database)
  23. def create_tables(self):
  24. """Create table and columns."""
  25. current = self.connection.cursor()
  26. current.execute('DROP TABLE IF EXISTS items')
  27. current.execute('CREATE TABLE items(id INTEGER PRIMARY KEY,'
  28. 'feed TEXT, post TEXT, posted INTEGER, url '
  29. 'TEXT, lastbuild TIMESTAMP, guid TEXT)')
  30. def insert_data(self, param):
  31. """
  32. Insert all the article's information to the table.
  33. Keyword arguments:
  34. param -- list containing all the values
  35. """
  36. self.connection.execute('INSERT INTO items(feed, post, posted'
  37. ', url, lastbuild, guid) VALUES(?, ?,'
  38. '?, ?, ?, ?)',(param))
  39. self.connection.commit()
  40. def select(self, param):
  41. """
  42. Return a select.
  43. Keyword arguments:
  44. param -- string containing a sql select
  45. """
  46. current = self.connection.cursor()
  47. current.execute(param)
  48. rows = current.fetchall()
  49. return rows
  50. def close(self):
  51. """Close the database."""
  52. self.connection.close()
  53. def rss(feed, post_format):
  54. """
  55. Request the feed, parse it and return requested values on a list
  56. of lists.
  57. Keyword arguments:
  58. feed -- string containing the url or the filepath of the feed
  59. post_format -- string containing RSS keywords surrounded by {}
  60. """
  61. foo = []
  62. xml = feedparser.parse(feed)
  63. keys = list(xml.entries[0].keys())
  64. lastbuild = xml.feed.updated
  65. rss_link = xml.feed.link
  66. for item in xml['items']:
  67. values = {}
  68. for i in keys:
  69. if i in post_format:
  70. values[i] = item[i]
  71. post = post_format.format(**values)
  72. foo.append([rss_link, post, item['link'], lastbuild,
  73. item['guid']])
  74. return foo
  75. def post(article, gs_node, username, password):
  76. """
  77. Post the articles to GNU Social.
  78. Keyword arguments:
  79. article -- list containing a most of what is necessary on the
  80. insert
  81. gs_node -- string containing the url of the GNU Social node
  82. username -- string containing the user of GNU Social
  83. password -- string containing the password of GNU Social
  84. """
  85. msg = article[1].split()
  86. api = (gs_node + '/api/''statuses/update.xml')
  87. # Check for twitter images and call post_image if required
  88. for word in msg:
  89. if 'pic.twitter.com/' in word:
  90. image = post_image(word, gs_node, username, password)
  91. index = msg.index(word)
  92. msg[index] = image
  93. msg = ' '.join(msg)
  94. buffer = BytesIO()
  95. post_data = {'status': msg, 'source': 'gnusrss'}
  96. postfields = urllib.parse.urlencode(post_data)
  97. curl = pycurl.Curl()
  98. curl.setopt(pycurl.URL, api)
  99. curl.setopt(pycurl.USERPWD, username + ':' + password)
  100. curl.setopt(pycurl.VERBOSE, False)
  101. curl.setopt(curl.POSTFIELDS, postfields)
  102. curl.setopt(pycurl.WRITEDATA, buffer)
  103. curl.perform()
  104. curl.close
  105. def post_image(picture, gs_node, username, password):
  106. """
  107. Upload a picture to GNU Social hosting and return a string with the
  108. new url.
  109. Keyword arguments:
  110. picture -- string containing the twitter url of a picture
  111. gs_node -- string containing the url of the GNU Social node
  112. username -- string containing the user of GNU Social
  113. password -- string containing the password of GNU Social
  114. """
  115. html = urllib.request.urlopen('https://' + picture).read().decode(
  116. 'utf-8').splitlines()
  117. api = gs_node + '/api/statusnet/media/upload'
  118. # Search the hardcoded tag name of the picture
  119. for tag in html:
  120. if 'data-image-url' in tag:
  121. pic = tag.split('"')[1]
  122. break
  123. buffer = BytesIO()
  124. # Pick the image and put it in the buffer
  125. curl = pycurl.Curl()
  126. curl.setopt(pycurl.URL, pic)
  127. curl.setopt(pycurl.VERBOSE, False)
  128. curl.setopt(pycurl.WRITEDATA, buffer)
  129. curl.perform()
  130. pic = buffer.getvalue()
  131. buffer = BytesIO()
  132. # Upload the buffer's image
  133. curl.setopt(pycurl.URL, api)
  134. curl.setopt(pycurl.USERPWD, username + ':' + password)
  135. curl.setopt(curl.HTTPPOST,
  136. [('media', (curl.FORM_BUFFER, 'useless.jpg',
  137. curl.FORM_BUFFERPTR, pic))])
  138. curl.setopt(pycurl.WRITEDATA, buffer)
  139. curl.perform()
  140. curl.close()
  141. buffer = buffer.getvalue().decode()
  142. xmldoc = minidom.parseString(buffer)
  143. item = xmldoc.getElementsByTagName('rsp')
  144. url = item.item(0).getElementsByTagName(
  145. 'mediaurl')[0].firstChild.data
  146. return url
  147. def shortener(post):
  148. """
  149. Return a shortened url.
  150. Keyword argument:
  151. post -- string containing a url to be shortened
  152. """
  153. api = ('http://qttr.at/yourls-api.php?format=xml&action=shorturl'
  154. '&signature=b6afeec983&url=' + post)
  155. buffer = BytesIO()
  156. curl = pycurl.Curl()
  157. curl.setopt(pycurl.URL, api)
  158. curl.setopt(pycurl.VERBOSE, False)
  159. curl.setopt(pycurl.WRITEDATA, buffer)
  160. curl.perform()
  161. buffer = buffer.getvalue().decode('utf-8')
  162. xmldoc = minidom.parseString(buffer)
  163. item = xmldoc.getElementsByTagName('result')
  164. url = item.item(0).getElementsByTagName('shorturl')[0].\
  165. firstChild.data
  166. return url
  167. def compare(feeds):
  168. """
  169. Compare the picked feed to the saved on the database and return
  170. list of lists if new.
  171. Keyword argument:
  172. feeds -- list of lists containing all actual feeds on the RSS file
  173. """
  174. db = Database()
  175. old = db.select('select guid from items;')
  176. new_feed = []
  177. posted = []
  178. # make the list accesible
  179. for x in old:
  180. posted.append(x[0])
  181. for feed in feeds:
  182. if feed[4] not in posted:
  183. new_feed.append(feed)
  184. return new_feed
  185. def get_config(name, option):
  186. """
  187. Parse config file and return it on a list.
  188. Keyword arguments:
  189. name -- string containing the config's name
  190. option -- string containin the section of the config to be parsed
  191. """
  192. config = []
  193. parser = configparser.SafeConfigParser()
  194. parser.read(name)
  195. for name, value in parser.items(option):
  196. config.append(value)
  197. return config
  198. def create_config(config_name):
  199. """
  200. Create config file.
  201. Keyword argument:
  202. config_name -- string containing the config's name to be created
  203. """
  204. print('Hi! Now we\'ll create de config file!')
  205. feed = input('Please introduce the feed\'s url: ')
  206. username = input('Please introduce your username '
  207. '(user@server.com): ')
  208. password = input('Please introduce your password: ')
  209. shorten = input('Do you need to shorten the urls that you '
  210. 'post? Please take in account \nthat you '
  211. 'should only use it if your node only has 140'
  212. ' characters. \nAnswer with "yes" or just press '
  213. 'enter if you don\'t want to use it: ')
  214. fallback_feed = input('Please introduce your feed\'s fallback'
  215. 'url. If you don\'t want or have one,\n'
  216. 'just press enter: ')
  217. print('Now we\'re gona fetch the feed. Please wait...')
  218. feed_file = feedparser.parse(feed)
  219. keys = list(feed_file.entries[0].keys())
  220. print('Done! The tags are: ')
  221. for tag in keys:
  222. print('\t' + tag)
  223. post_format = input('The XML has been parsed. Choose wich '
  224. 'format you want:\nPlease put the tags '
  225. 'inside the square brackets\nEx: {title}'
  226. ' - {link} by @{author}: ')
  227. config = configparser.ConfigParser()
  228. config['feeds'] = {}
  229. config['feeds']['feed'] = feed
  230. config['feeds']['user'] = username
  231. config['feeds']['password'] = password
  232. config['feeds']['shorten'] = shorten
  233. config['feeds']['fallback_feed'] = fallback_feed
  234. config['feeds']['format'] = post_format
  235. with open(config_name + '.ini', 'w') as configfile:
  236. config.write(configfile)
  237. def parse_options():
  238. """Parse command line options of this program."""
  239. parser = argparse.ArgumentParser(description='Post feeds to GNU '
  240. 'Social', prog='gnusrss')
  241. parser.add_argument('-c', '--create-config', metavar='file_name',
  242. dest='create_config', help='creates a config '
  243. 'file')
  244. parser.add_argument('-C', '--create-db', dest='create_database',
  245. action='store_true',help='creates the database')
  246. parser.add_argument('-p', '--post', metavar='config_file',
  247. dest='post', help='posts feeds')
  248. parser.add_argument('-P', '--post-all', dest='post_all',
  249. action='store_true', help='posts all feeds')
  250. parser.add_argument('-k', '--populate-database', metavar='file_name',
  251. dest='populate_database', help='fetch the RSS and'
  252. ' save it in the database')
  253. args = parser.parse_args()
  254. if args.create_database:
  255. if os.path.exists('gnusrss.db'):
  256. overwrite = input('The database already exists. Are you '
  257. 'sure you want to overwrite it? (y/n)')
  258. if overwrite == 'y':
  259. db = Database()
  260. db.create_tables()
  261. db.close
  262. print('Database created!')
  263. else:
  264. db = Database()
  265. db.create_tables()
  266. db.close()
  267. print('Database created!')
  268. if args.create_config:
  269. db = Database()
  270. create_config(args.create_config)
  271. config = get_config(args.create_config + '.ini', 'feeds')
  272. feed = config[0]
  273. post_format = config[5]
  274. posts = rss(feed, post_format)
  275. for article in posts:
  276. if config[3] is 'yes':
  277. shortened = shortener(article[2])
  278. article[2] = shortened
  279. db.insert_data([article[0], article[1], 1, article[2],
  280. article[3], article[4]])
  281. db.close
  282. elif args.post:
  283. config = get_config(args.post, 'feeds')
  284. feed = config[0]
  285. fallback_feed = config[4]
  286. gs_node = 'https://' + config[1].split('@')[1]
  287. username = config[1].split('@')[0]
  288. password = config[2]
  289. post_format = config[5]
  290. try:
  291. posts = rss(feed, post_format)
  292. except:
  293. posts = rss(fallback_feed, post_format)
  294. new = compare(posts)
  295. if new:
  296. to_post = new[0]
  297. db = Database()
  298. if config[3] == 'yes':
  299. shortened = shortener(to_post[2])
  300. temp = to_post[1].split()
  301. try:
  302. temp[temp.index(to_post[2])] = shortened
  303. except:
  304. print('There\'s not url in the message. Please'
  305. ' fix the config.')
  306. import sys
  307. sys.exit()
  308. to_post[1] = ' '.join(temp)
  309. to_post[2] = shortened
  310. post(to_post, gs_node, username, password)
  311. db.insert_data([to_post[0], to_post[1], 1, to_post[2],
  312. to_post[3], to_post[4]])
  313. db.close()
  314. elif args.post_all:
  315. for config in listdir('.'):
  316. if config.endswith('.ini'):
  317. config = get_config(config, 'feeds')
  318. feed = config[0]
  319. fallback_feed = config[4]
  320. gs_node = 'https://' + config[1].split('@')[1]
  321. username = config[1].split('@')[0]
  322. password = config[2]
  323. post_format = config[5]
  324. try:
  325. posts = rss(feed, post_format)
  326. except:
  327. posts = rss(fallback_feed, post_format)
  328. new = compare(posts)
  329. if new:
  330. to_post = new[0]
  331. db = Database()
  332. if config[3] == 'yes':
  333. shortened = shortener(to_post[2])
  334. temp = to_post[1].split()
  335. try:
  336. temp[temp.index(to_post[2])] = shortened
  337. except:
  338. print('There\'s not url in the message. Please'
  339. ' fix the config.')
  340. import sys
  341. sys.exit()
  342. to_post[1] = ' '.join(temp)
  343. to_post[2] = shortened
  344. post(to_post, gs_node, username, password)
  345. db.insert_data([to_post[0], to_post[1], 1,
  346. to_post[2], to_post[3], to_post[4]])
  347. db.close()
  348. elif args.populate_database:
  349. config = get_config(args.populate_database, 'feeds')
  350. feed = config[0]
  351. fallback_feed = config[4]
  352. post_format = config[5]
  353. try:
  354. posts = rss(feed, post_format)
  355. except:
  356. posts = rss(fallback_feed, post_format)
  357. new = compare(posts)
  358. if new:
  359. db = Database()
  360. for n in new:
  361. if config[3] == 'yes':
  362. shortened = shortener(n[2])
  363. temp = n[1].split()
  364. try:
  365. temp[temp.index(n[2])] = shortened
  366. except:
  367. print('There\'s not url in the message. Please'
  368. ' fix the config.')
  369. import sys
  370. sys.exit()
  371. n[1] = ' '.join(temp)
  372. n[2] = shortened
  373. db.insert_data([n[0], n[1], 1, n[2], n[3], n[4]])
  374. db.close()
  375. if len(argv) == 1:
  376. parser.print_help()
  377. if __name__ == "__main__":
  378. parse_options()