gnusrss.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. import configparser
  4. import urllib.parse
  5. import pycurl
  6. import os.path
  7. import sqlite3
  8. import feedparser
  9. import argparse
  10. from os import listdir
  11. from sys import argv
  12. from xml.dom import minidom
  13. from io import BytesIO
  14. class Database:
  15. """Manage the database."""
  16. def __init__(self, database='gnusrss.db'):
  17. """
  18. Connect to the database.
  19. database -- string containig the filepath of the db
  20. (default: gnusrss.db)
  21. """
  22. self.connection = sqlite3.connect(database)
  23. def create_tables(self):
  24. """Create table and columns."""
  25. current = self.connection.cursor()
  26. current.execute('DROP TABLE IF EXISTS items')
  27. current.execute('CREATE TABLE items(id INTEGER PRIMARY KEY,'
  28. 'feed TEXT, post TEXT, posted INTEGER, url '
  29. 'TEXT, lastbuild TIMESTAMP, guid TEXT)')
  30. def insert_data(self, param):
  31. """
  32. Insert all the article's information to the table.
  33. Keyword arguments:
  34. param -- list containing all the values
  35. """
  36. self.connection.execute('INSERT INTO items(feed, post, posted'
  37. ', url, lastbuild, guid) VALUES(?, ?,'
  38. '?, ?, ?, ?)',(param))
  39. self.connection.commit()
  40. def select(self, param):
  41. """
  42. Return a select.
  43. Keyword arguments:
  44. param -- string containing a sql select
  45. """
  46. current = self.connection.cursor()
  47. current.execute(param)
  48. rows = current.fetchall()
  49. return rows
  50. def close(self):
  51. """Close the database."""
  52. self.connection.close()
  53. def rss(feed, post_format):
  54. """
  55. Request the feed, parse it and return requested values on a list
  56. of lists.
  57. Keyword arguments:
  58. feed -- string containing the url or the filepath of the feed
  59. post_format -- string containing RSS keywords surrounded by {}
  60. """
  61. foo = []
  62. xml = feedparser.parse(feed)
  63. keys = list(xml.entries[0].keys())
  64. lastbuild = xml.feed.updated
  65. rss_link = xml.feed.link
  66. for item in xml['items']:
  67. values = {}
  68. for i in keys:
  69. if i in post_format:
  70. values[i] = item[i]
  71. post = post_format.format(**values)
  72. foo.append([rss_link, post, item['link'], lastbuild,
  73. item['guid']])
  74. return foo
  75. def post(article, gs_node, username, password):
  76. """
  77. Post the articles to GNU Social.
  78. Keyword arguments:
  79. article -- list containing a most of what is necessary on the
  80. insert
  81. gs_node -- string containing the url of the GNU Social node
  82. username -- string containing the user of GNU Social
  83. password -- string containing the password of GNU Social
  84. """
  85. msg = article[1].split()
  86. api = (gs_node + '/api/''statuses/update.xml')
  87. # Check for twitter images and call post_image if required
  88. for word in msg:
  89. if 'pic.twitter.com/' in word:
  90. image = post_image(word, gs_node, username, password)
  91. if image is not None:
  92. index = msg.index(word)
  93. msg[index] = image
  94. else:
  95. pass
  96. msg = ' '.join(msg)
  97. buffer = BytesIO()
  98. post_data = {'status': msg, 'source': 'gnusrss'}
  99. postfields = urllib.parse.urlencode(post_data)
  100. curl = pycurl.Curl()
  101. curl.setopt(pycurl.URL, api)
  102. curl.setopt(pycurl.USERPWD, username + ':' + password)
  103. curl.setopt(pycurl.VERBOSE, False)
  104. curl.setopt(curl.POSTFIELDS, postfields)
  105. curl.setopt(pycurl.WRITEDATA, buffer)
  106. curl.perform()
  107. curl.close
  108. def post_image(picture, gs_node, username, password):
  109. """
  110. Upload a picture to GNU Social hosting and return a string with the
  111. new url.
  112. Keyword arguments:
  113. picture -- string containing the twitter url of a picture
  114. gs_node -- string containing the url of the GNU Social node
  115. username -- string containing the user of GNU Social
  116. password -- string containing the password of GNU Social
  117. """
  118. html = urllib.request.urlopen('https://' + picture).read().decode(
  119. 'utf-8').splitlines()
  120. api = gs_node + '/api/statusnet/media/upload'
  121. pic = ""
  122. # Search the hardcoded tag name of the picture
  123. for tag in html:
  124. if 'data-image-url' in tag:
  125. pic = tag.split('"')[1]
  126. break
  127. # If there's a video instead of a picture, just exit
  128. if not pic:
  129. return None
  130. buffer = BytesIO()
  131. # Pick the image and put it in the buffer
  132. curl = pycurl.Curl()
  133. curl.setopt(pycurl.URL, pic)
  134. curl.setopt(pycurl.VERBOSE, False)
  135. curl.setopt(pycurl.WRITEDATA, buffer)
  136. curl.perform()
  137. pic = buffer.getvalue()
  138. buffer = BytesIO()
  139. # Upload the buffer's image
  140. curl.setopt(pycurl.URL, api)
  141. curl.setopt(pycurl.USERPWD, username + ':' + password)
  142. curl.setopt(curl.HTTPPOST,
  143. [('media', (curl.FORM_BUFFER, 'useless.jpg',
  144. curl.FORM_BUFFERPTR, pic))])
  145. curl.setopt(pycurl.WRITEDATA, buffer)
  146. curl.perform()
  147. curl.close()
  148. buffer = buffer.getvalue().decode()
  149. xmldoc = minidom.parseString(buffer)
  150. item = xmldoc.getElementsByTagName('rsp')
  151. url = item.item(0).getElementsByTagName(
  152. 'mediaurl')[0].firstChild.data
  153. return url
  154. def shortener(post):
  155. """
  156. Return a shortened url.
  157. Keyword argument:
  158. post -- string containing a url to be shortened
  159. """
  160. api = ('http://qttr.at/yourls-api.php?format=xml&action=shorturl'
  161. '&signature=b6afeec983&url=' + post)
  162. buffer = BytesIO()
  163. curl = pycurl.Curl()
  164. curl.setopt(pycurl.URL, api)
  165. curl.setopt(pycurl.VERBOSE, False)
  166. curl.setopt(pycurl.WRITEDATA, buffer)
  167. curl.perform()
  168. buffer = buffer.getvalue().decode('utf-8')
  169. xmldoc = minidom.parseString(buffer)
  170. item = xmldoc.getElementsByTagName('result')
  171. url = item.item(0).getElementsByTagName('shorturl')[0].\
  172. firstChild.data
  173. return url
  174. def compare(feeds):
  175. """
  176. Compare the picked feed to the saved on the database and return
  177. list of lists if new.
  178. Keyword argument:
  179. feeds -- list of lists containing all actual feeds on the RSS file
  180. """
  181. db = Database()
  182. old = db.select('select guid from items;')
  183. new_feed = []
  184. posted = []
  185. # make the list accesible
  186. for x in old:
  187. posted.append(x[0])
  188. for feed in feeds:
  189. if feed[4] not in posted:
  190. new_feed.append(feed)
  191. return new_feed
  192. def get_config(name, option):
  193. """
  194. Parse config file and return it on a list.
  195. Keyword arguments:
  196. name -- string containing the config's name
  197. option -- string containin the section of the config to be parsed
  198. """
  199. config = []
  200. parser = configparser.SafeConfigParser()
  201. parser.read(name)
  202. for name, value in parser.items(option):
  203. config.append(value)
  204. return config
  205. def create_config(config_name):
  206. """
  207. Create config file.
  208. Keyword argument:
  209. config_name -- string containing the config's name to be created
  210. """
  211. print('Hi! Now we\'ll create de config file!')
  212. feed = input('Please introduce the feed\'s url: ')
  213. username = input('Please introduce your username '
  214. '(user@server.com): ')
  215. password = input('Please introduce your password: ')
  216. shorten = input('Do you need to shorten the urls that you '
  217. 'post? Please take in account \nthat you '
  218. 'should only use it if your node only has 140'
  219. ' characters. \nAnswer with "yes" or just press '
  220. 'enter if you don\'t want to use it: ')
  221. fallback_feed = input('Please introduce your feed\'s fallback'
  222. 'url. If you don\'t want or have one,\n'
  223. 'just press enter: ')
  224. print('Now we\'re gona fetch the feed. Please wait...')
  225. feed_file = feedparser.parse(feed)
  226. keys = list(feed_file.entries[0].keys())
  227. print('Done! The tags are: ')
  228. for tag in keys:
  229. print('\t' + tag)
  230. post_format = input('The XML has been parsed. Choose wich '
  231. 'format you want:\nPlease put the tags '
  232. 'inside the square brackets\nEx: {title}'
  233. ' - {link} by @{author}: ')
  234. config = configparser.ConfigParser()
  235. config['feeds'] = {}
  236. config['feeds']['feed'] = feed
  237. config['feeds']['user'] = username
  238. config['feeds']['password'] = password
  239. config['feeds']['shorten'] = shorten
  240. config['feeds']['fallback_feed'] = fallback_feed
  241. config['feeds']['format'] = post_format
  242. with open(config_name + '.ini', 'w') as configfile:
  243. config.write(configfile)
  244. def parse_options():
  245. """Parse command line options of this program."""
  246. parser = argparse.ArgumentParser(description='Post feeds to GNU '
  247. 'Social', prog='gnusrss')
  248. parser.add_argument('-c', '--create-config', metavar='file_name',
  249. dest='create_config', help='creates a config '
  250. 'file')
  251. parser.add_argument('-C', '--create-db', dest='create_database',
  252. action='store_true',help='creates the database')
  253. parser.add_argument('-p', '--post', metavar='config_file',
  254. dest='post', help='posts feeds')
  255. parser.add_argument('-P', '--post-all', dest='post_all',
  256. action='store_true', help='posts all feeds')
  257. parser.add_argument('-k', '--populate-database', metavar='file_name',
  258. dest='populate_database', help='fetch the RSS and'
  259. ' save it in the database')
  260. args = parser.parse_args()
  261. if args.create_database:
  262. if os.path.exists('gnusrss.db'):
  263. overwrite = input('The database already exists. Are you '
  264. 'sure you want to overwrite it? (y/n)')
  265. if overwrite == 'y':
  266. db = Database()
  267. db.create_tables()
  268. db.close
  269. print('Database created!')
  270. else:
  271. db = Database()
  272. db.create_tables()
  273. db.close()
  274. print('Database created!')
  275. if args.create_config:
  276. db = Database()
  277. create_config(args.create_config)
  278. config = get_config(args.create_config + '.ini', 'feeds')
  279. feed = config[0]
  280. post_format = config[5]
  281. posts = rss(feed, post_format)
  282. for article in posts:
  283. if config[3] is 'yes':
  284. shortened = shortener(article[2])
  285. article[2] = shortened
  286. db.insert_data([article[0], article[1], 1, article[2],
  287. article[3], article[4]])
  288. db.close
  289. elif args.post:
  290. config = get_config(args.post, 'feeds')
  291. feed = config[0]
  292. fallback_feed = config[4]
  293. gs_node = 'https://' + config[1].split('@')[1]
  294. username = config[1].split('@')[0]
  295. password = config[2]
  296. post_format = config[5]
  297. try:
  298. posts = rss(feed, post_format)
  299. except:
  300. posts = rss(fallback_feed, post_format)
  301. new = compare(posts)
  302. if new:
  303. to_post = new[0]
  304. db = Database()
  305. if config[3] == 'yes':
  306. shortened = shortener(to_post[2])
  307. temp = to_post[1].split()
  308. try:
  309. temp[temp.index(to_post[2])] = shortened
  310. except:
  311. print('There\'s not url in the message. Please'
  312. ' fix the config.')
  313. import sys
  314. sys.exit()
  315. to_post[1] = ' '.join(temp)
  316. to_post[2] = shortened
  317. post(to_post, gs_node, username, password)
  318. db.insert_data([to_post[0], to_post[1], 1, to_post[2],
  319. to_post[3], to_post[4]])
  320. db.close()
  321. elif args.post_all:
  322. for config in listdir('.'):
  323. if config.endswith('.ini'):
  324. config = get_config(config, 'feeds')
  325. feed = config[0]
  326. fallback_feed = config[4]
  327. gs_node = 'https://' + config[1].split('@')[1]
  328. username = config[1].split('@')[0]
  329. password = config[2]
  330. post_format = config[5]
  331. try:
  332. posts = rss(feed, post_format)
  333. except:
  334. posts = rss(fallback_feed, post_format)
  335. new = compare(posts)
  336. if new:
  337. to_post = new[0]
  338. db = Database()
  339. if config[3] == 'yes':
  340. shortened = shortener(to_post[2])
  341. temp = to_post[1].split()
  342. try:
  343. temp[temp.index(to_post[2])] = shortened
  344. except:
  345. print('There\'s not url in the message. Please'
  346. ' fix the config.')
  347. import sys
  348. sys.exit()
  349. to_post[1] = ' '.join(temp)
  350. to_post[2] = shortened
  351. post(to_post, gs_node, username, password)
  352. db.insert_data([to_post[0], to_post[1], 1,
  353. to_post[2], to_post[3], to_post[4]])
  354. db.close()
  355. elif args.populate_database:
  356. config = get_config(args.populate_database, 'feeds')
  357. feed = config[0]
  358. fallback_feed = config[4]
  359. post_format = config[5]
  360. try:
  361. posts = rss(feed, post_format)
  362. except:
  363. posts = rss(fallback_feed, post_format)
  364. new = compare(posts)
  365. if new:
  366. db = Database()
  367. for n in new:
  368. if config[3] == 'yes':
  369. shortened = shortener(n[2])
  370. temp = n[1].split()
  371. try:
  372. temp[temp.index(n[2])] = shortened
  373. except:
  374. print('There\'s not url in the message. Please'
  375. ' fix the config.')
  376. import sys
  377. sys.exit()
  378. n[1] = ' '.join(temp)
  379. n[2] = shortened
  380. db.insert_data([n[0], n[1], 1, n[2], n[3], n[4]])
  381. db.close()
  382. if len(argv) == 1:
  383. parser.print_help()
  384. if __name__ == "__main__":
  385. parse_options()