import-lastfm-bio.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. #!/usr/bin/env python
  2. import psycopg2 as ordbms
  3. import urllib, urllib2
  4. import xml.etree.cElementTree as ElementTree
  5. class ImportLastfmBio:
  6. def __init__(self):
  7. self.conn = ordbms.connect ("dbname='librefm'")
  8. self.cursor = self.conn.cursor()
  9. def importAll(self):
  10. """Imports descriptions for all artists who don't currently have one"""
  11. self.cursor.execute("SELECT * FROM artist WHERE bio_summary IS NULL AND bio_content IS NULL")
  12. for artist in self.cursor.fetchall():
  13. name = artist[0]
  14. url = "http://ws.audioscrobbler.com/2.0/artist/%s/info.xml" % urllib.quote(name)
  15. print "\nFetching %s..." % name
  16. try:
  17. xml = urllib2.urlopen(url)
  18. self.parse(xml, name, "http://www.last.fm/music/%s" % urllib.quote(name))
  19. except urllib2.HTTPError:
  20. print "Failed."
  21. def parse(self, xml, name, source):
  22. for event, elem in ElementTree.iterparse(xml):
  23. if elem.tag == "bio":
  24. for bio_e in elem.getchildren():
  25. if bio_e.tag == "summary":
  26. summary = bio_e.text
  27. elif bio_e.tag == "content":
  28. content = bio_e.text
  29. if summary:
  30. summary.strip()
  31. summary = self.fixUrls(summary)
  32. if content:
  33. content.strip()
  34. content = self.fixUrls(content)
  35. if summary != None or content != None:
  36. self.cursor.execute("UPDATE artist SET bio_summary = %s, bio_content = %s, bio_source = %s WHERE name = %s", (summary, content, source, name))
  37. self.conn.commit()
  38. print "Imported!"
  39. else:
  40. print "No Bio"
  41. def fixUrls(self, text):
  42. text.replace("http://www.last.fm/tag/", "/tag/")
  43. text.replace("http://last.fm/tag/", "/tag/")
  44. text.replace("http://www.last.fm/music/", "/artist/")
  45. text.replace("http://last.fm/music/", "/artist/")
  46. return text
  47. if __name__ == '__main__':
  48. importer = ImportLastfmBio()
  49. importer.importAll()