123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- import configparser
- import urllib.parse
- import pycurl
- import os.path
- import sqlite3
- import feedparser
- import argparse
- from os import listdir
- from sys import argv
- from xml.dom import minidom
- from io import BytesIO
- class Database:
- """Manage the database."""
- def __init__(self, database='gnusrss.db'):
- """
- Connect to the database.
- database -- string containig the filepath of the db
- (default: gnusrss.db)
- """
- self.connection = sqlite3.connect(database)
- def create_tables(self):
- """Create table and columns."""
- current = self.connection.cursor()
- current.execute('DROP TABLE IF EXISTS items')
- current.execute('CREATE TABLE items(id INTEGER PRIMARY KEY,'
- 'feed TEXT, post TEXT, posted INTEGER, url '
- 'TEXT, lastbuild TIMESTAMP, guid TEXT)')
- def insert_data(self, param):
- """
- Insert all the article's information to the table.
- Keyword arguments:
- param -- list containing all the values
- """
- self.connection.execute('INSERT INTO items(feed, post, posted'
- ', url, lastbuild, guid) VALUES(?, ?,'
- '?, ?, ?, ?)',(param))
- self.connection.commit()
- def select(self, param):
- """
- Return a select.
- Keyword arguments:
- param -- string containing a sql select
- """
- current = self.connection.cursor()
- current.execute(param)
- rows = current.fetchall()
- return rows
- def close(self):
- """Close the database."""
- self.connection.close()
- def rss(feed, post_format):
- """
- Request the feed, parse it and return requested values on a list
- of lists.
- Keyword arguments:
- feed -- string containing the url or the filepath of the feed
- post_format -- string containing RSS keywords surrounded by {}
- """
- foo = []
- xml = feedparser.parse(feed)
- keys = list(xml.entries[0].keys())
- lastbuild = xml.feed.updated
- rss_link = xml.feed.link
- for item in xml['items']:
- values = {}
- for i in keys:
- if i in post_format:
- values[i] = item[i]
- post = post_format.format(**values)
- foo.append([rss_link, post, item['link'], lastbuild,
- item['guid']])
- return foo
- def post(article, gs_node, username, password):
- """
- Post the articles to GNU Social.
- Keyword arguments:
- article -- list containing a most of what is necessary on the
- insert
- gs_node -- string containing the url of the GNU Social node
- username -- string containing the user of GNU Social
- password -- string containing the password of GNU Social
- """
- msg = article[1].split()
- api = (gs_node + '/api/''statuses/update.xml')
- # Check for twitter images and call post_image if required
- for word in msg:
- if 'pic.twitter.com/' in word:
- image = post_image(word, gs_node, username, password)
- if image is not None:
- index = msg.index(word)
- msg[index] = image
- else:
- pass
- msg = ' '.join(msg)
- buffer = BytesIO()
- post_data = {'status': msg, 'source': 'gnusrss'}
- postfields = urllib.parse.urlencode(post_data)
- curl = pycurl.Curl()
- curl.setopt(pycurl.URL, api)
- curl.setopt(pycurl.USERPWD, username + ':' + password)
- curl.setopt(pycurl.VERBOSE, False)
- curl.setopt(curl.POSTFIELDS, postfields)
- curl.setopt(pycurl.WRITEDATA, buffer)
- curl.perform()
- curl.close
- def post_image(picture, gs_node, username, password):
- """
- Upload a picture to GNU Social hosting and return a string with the
- new url.
- Keyword arguments:
- picture -- string containing the twitter url of a picture
- gs_node -- string containing the url of the GNU Social node
- username -- string containing the user of GNU Social
- password -- string containing the password of GNU Social
- """
- html = urllib.request.urlopen('https://' + picture).read().decode(
- 'utf-8').splitlines()
- api = gs_node + '/api/statusnet/media/upload'
- pic = ""
- # Search the hardcoded tag name of the picture
- for tag in html:
- if 'data-image-url' in tag:
- pic = tag.split('"')[1]
- break
- # If there's a video instead of a picture, just exit
- if not pic:
- return None
- buffer = BytesIO()
- # Pick the image and put it in the buffer
- curl = pycurl.Curl()
- curl.setopt(pycurl.URL, pic)
- curl.setopt(pycurl.VERBOSE, False)
- curl.setopt(pycurl.WRITEDATA, buffer)
- curl.perform()
- pic = buffer.getvalue()
- buffer = BytesIO()
- # Upload the buffer's image
- curl.setopt(pycurl.URL, api)
- curl.setopt(pycurl.USERPWD, username + ':' + password)
- curl.setopt(curl.HTTPPOST,
- [('media', (curl.FORM_BUFFER, 'useless.jpg',
- curl.FORM_BUFFERPTR, pic))])
- curl.setopt(pycurl.WRITEDATA, buffer)
- curl.perform()
- curl.close()
- buffer = buffer.getvalue().decode()
- xmldoc = minidom.parseString(buffer)
- item = xmldoc.getElementsByTagName('rsp')
- url = item.item(0).getElementsByTagName(
- 'mediaurl')[0].firstChild.data
- return url
- def shortener(post):
- """
- Return a shortened url.
- Keyword argument:
- post -- string containing a url to be shortened
- """
- api = ('http://qttr.at/yourls-api.php?format=xml&action=shorturl'
- '&signature=b6afeec983&url=' + post)
- buffer = BytesIO()
- curl = pycurl.Curl()
- curl.setopt(pycurl.URL, api)
- curl.setopt(pycurl.VERBOSE, False)
- curl.setopt(pycurl.WRITEDATA, buffer)
- curl.perform()
- buffer = buffer.getvalue().decode('utf-8')
- xmldoc = minidom.parseString(buffer)
- item = xmldoc.getElementsByTagName('result')
- url = item.item(0).getElementsByTagName('shorturl')[0].\
- firstChild.data
- return url
- def compare(feeds):
- """
- Compare the picked feed to the saved on the database and return
- list of lists if new.
- Keyword argument:
- feeds -- list of lists containing all actual feeds on the RSS file
- """
- db = Database()
- old = db.select('select guid from items;')
- new_feed = []
- posted = []
- # make the list accesible
- for x in old:
- posted.append(x[0])
- for feed in feeds:
- if feed[4] not in posted:
- new_feed.append(feed)
- return new_feed
- def get_config(name, option):
- """
- Parse config file and return it on a list.
- Keyword arguments:
- name -- string containing the config's name
- option -- string containin the section of the config to be parsed
- """
- config = []
- parser = configparser.SafeConfigParser()
- parser.read(name)
- for name, value in parser.items(option):
- config.append(value)
- return config
- def create_config(config_name):
- """
- Create config file.
- Keyword argument:
- config_name -- string containing the config's name to be created
- """
- print('Hi! Now we\'ll create de config file!')
- feed = input('Please introduce the feed\'s url: ')
- username = input('Please introduce your username '
- '(user@server.com): ')
- password = input('Please introduce your password: ')
- shorten = input('Do you need to shorten the urls that you '
- 'post? Please take in account \nthat you '
- 'should only use it if your node only has 140'
- ' characters. \nAnswer with "yes" or just press '
- 'enter if you don\'t want to use it: ')
- fallback_feed = input('Please introduce your feed\'s fallback'
- 'url. If you don\'t want or have one,\n'
- 'just press enter: ')
- print('Now we\'re gona fetch the feed. Please wait...')
- feed_file = feedparser.parse(feed)
- keys = list(feed_file.entries[0].keys())
- print('Done! The tags are: ')
- for tag in keys:
- print('\t' + tag)
- post_format = input('The XML has been parsed. Choose wich '
- 'format you want:\nPlease put the tags '
- 'inside the square brackets\nEx: {title}'
- ' - {link} by @{author}: ')
- config = configparser.ConfigParser()
- config['feeds'] = {}
- config['feeds']['feed'] = feed
- config['feeds']['user'] = username
- config['feeds']['password'] = password
- config['feeds']['shorten'] = shorten
- config['feeds']['fallback_feed'] = fallback_feed
- config['feeds']['format'] = post_format
- with open(config_name + '.ini', 'w') as configfile:
- config.write(configfile)
- def parse_options():
- """Parse command line options of this program."""
- parser = argparse.ArgumentParser(description='Post feeds to GNU '
- 'Social', prog='gnusrss')
- parser.add_argument('-c', '--create-config', metavar='file_name',
- dest='create_config', help='creates a config '
- 'file')
- parser.add_argument('-C', '--create-db', dest='create_database',
- action='store_true',help='creates the database')
- parser.add_argument('-p', '--post', metavar='config_file',
- dest='post', help='posts feeds')
- parser.add_argument('-P', '--post-all', dest='post_all',
- action='store_true', help='posts all feeds')
- parser.add_argument('-k', '--populate-database', metavar='file_name',
- dest='populate_database', help='fetch the RSS and'
- ' save it in the database')
- args = parser.parse_args()
- if args.create_database:
- if os.path.exists('gnusrss.db'):
- overwrite = input('The database already exists. Are you '
- 'sure you want to overwrite it? (y/n)')
- if overwrite == 'y':
- db = Database()
- db.create_tables()
- db.close
- print('Database created!')
- else:
- db = Database()
- db.create_tables()
- db.close()
- print('Database created!')
- if args.create_config:
- db = Database()
- create_config(args.create_config)
- config = get_config(args.create_config + '.ini', 'feeds')
- feed = config[0]
- post_format = config[5]
- posts = rss(feed, post_format)
- for article in posts:
- if config[3] is 'yes':
- shortened = shortener(article[2])
- article[2] = shortened
- db.insert_data([article[0], article[1], 1, article[2],
- article[3], article[4]])
- db.close
- elif args.post:
- config = get_config(args.post, 'feeds')
- feed = config[0]
- fallback_feed = config[4]
- gs_node = 'https://' + config[1].split('@')[1]
- username = config[1].split('@')[0]
- password = config[2]
- post_format = config[5]
- try:
- posts = rss(feed, post_format)
- except:
- posts = rss(fallback_feed, post_format)
- new = compare(posts)
- if new:
- to_post = new[0]
- db = Database()
- if config[3] == 'yes':
- shortened = shortener(to_post[2])
- temp = to_post[1].split()
- try:
- temp[temp.index(to_post[2])] = shortened
- except:
- print('There\'s not url in the message. Please'
- ' fix the config.')
- import sys
- sys.exit()
- to_post[1] = ' '.join(temp)
- to_post[2] = shortened
- post(to_post, gs_node, username, password)
- db.insert_data([to_post[0], to_post[1], 1, to_post[2],
- to_post[3], to_post[4]])
- db.close()
- elif args.post_all:
- for config in listdir('.'):
- if config.endswith('.ini'):
- config = get_config(config, 'feeds')
- feed = config[0]
- fallback_feed = config[4]
- gs_node = 'https://' + config[1].split('@')[1]
- username = config[1].split('@')[0]
- password = config[2]
- post_format = config[5]
- try:
- posts = rss(feed, post_format)
- except:
- posts = rss(fallback_feed, post_format)
- new = compare(posts)
- if new:
- to_post = new[0]
- db = Database()
- if config[3] == 'yes':
- shortened = shortener(to_post[2])
- temp = to_post[1].split()
- try:
- temp[temp.index(to_post[2])] = shortened
- except:
- print('There\'s not url in the message. Please'
- ' fix the config.')
- import sys
- sys.exit()
- to_post[1] = ' '.join(temp)
- to_post[2] = shortened
- post(to_post, gs_node, username, password)
- db.insert_data([to_post[0], to_post[1], 1,
- to_post[2], to_post[3], to_post[4]])
- db.close()
- elif args.populate_database:
- config = get_config(args.populate_database, 'feeds')
- feed = config[0]
- fallback_feed = config[4]
- post_format = config[5]
- try:
- posts = rss(feed, post_format)
- except:
- posts = rss(fallback_feed, post_format)
- new = compare(posts)
- if new:
- db = Database()
- for n in new:
- if config[3] == 'yes':
- shortened = shortener(n[2])
- temp = n[1].split()
- try:
- temp[temp.index(n[2])] = shortened
- except:
- print('There\'s not url in the message. Please'
- ' fix the config.')
- import sys
- sys.exit()
- n[1] = ' '.join(temp)
- n[2] = shortened
- db.insert_data([n[0], n[1], 1, n[2], n[3], n[4]])
- db.close()
- if len(argv) == 1:
- parser.print_help()
- if __name__ == "__main__":
- parse_options()
|