jamendo-rewrite.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #!/usr/bin/env python
  2. # Jamendo database dumps can be fetched from: http://img.jamendo.com/data/dbdump_artistalbumtrack.xml.gz
  3. import xml.etree.cElementTree as ElementTree
  4. import sys, gzip, time, os, os.path, urllib, threading
  5. class JamendoRewrite:
  6. def __init__(self, path):
  7. self.music_path = path
  8. def parse(self, dump):
  9. for event, elem in ElementTree.iterparse(dump):
  10. if elem.tag == "artist":
  11. artist = self.proc_artist(elem)
  12. self.make_rules(artist)
  13. def proc_artist(self, elem):
  14. artist = {}
  15. artist["albums"] = []
  16. for artist_e in elem.getchildren():
  17. if artist_e.tag == "name":
  18. artist["name"] = artist_e.text
  19. if artist_e.tag == "Albums":
  20. for album_e in artist_e.getchildren():
  21. artist["albums"].append(self.proc_album(album_e))
  22. return artist
  23. def proc_album(self, elem):
  24. album = {}
  25. album["tracks"] = []
  26. album["name"] = None
  27. for album_e in elem.getchildren():
  28. if album_e.tag == "name":
  29. album["name"] = album_e.text
  30. if album_e.tag == "Tracks":
  31. for track_e in album_e.getchildren():
  32. album["tracks"].append(self.proc_track(track_e))
  33. return album
  34. def proc_track(self, elem):
  35. track = {}
  36. track["id"] = None
  37. track["name"] = None
  38. track["license"] = None
  39. for track_e in elem.getchildren():
  40. if track_e.tag == "id":
  41. track["id"] = int(track_e.text)
  42. if track_e.tag == "name":
  43. track["name"] = track_e.text
  44. if track_e.tag == "license":
  45. track["license"] = track_e.text
  46. return track
  47. def make_rules(self, artist):
  48. for album in artist["albums"]:
  49. for track in album["tracks"]:
  50. if track["id"] and track["name"] and album["name"] and artist["name"] and self.free_license(track["license"]):
  51. filename = "%s-%s-%s" % (artist["name"].replace("/", ""), album["name"].replace("/", ""), track["name"].replace("/", " "))
  52. filename = filename.encode("utf-8")
  53. rule = "rewrite %d\.(...).? %s/%s.$1 redirect;" % (track['id'], self.music_path, urllib.quote(filename))
  54. print rule
  55. def free_license(self, license):
  56. return ("http://creativecommons.org/licenses/by-sa" in license or "http://creativecommons.org/licenses/by/" in license or "http://artlibre.org/licence.php/lal.html" in license)
  57. if __name__ == "__main__":
  58. if len(sys.argv) != 3:
  59. print "Usage: download-jamendo.py <database dump> /path/to/music_files/"
  60. sys.exit(1)
  61. if sys.argv[1][-2:] == "gz":
  62. dump = gzip.open(sys.argv[1], "r")
  63. else:
  64. dump = open(sys.argv[1], "r")
  65. rewriter = JamendoRewrite(sys.argv[2])
  66. rewriter.parse(dump)