import-jamendo.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. #!/usr/bin/env python
  2. import xml.etree.cElementTree as ElementTree
  3. import sys, gzip, time
  4. import psycopg2 as ordbms
  5. genremap = {
  6. 0 : "Blues",
  7. 1 : "Classic Rock",
  8. 2 : "Country",
  9. 3 : "Dance",
  10. 4 : "Disco",
  11. 5 : "Funk",
  12. 6 : "Grunge",
  13. 7 : "Hip-Hop",
  14. 8 : "Jazz",
  15. 9 : "Metal",
  16. 10 : "New Age",
  17. 11 : "Oldies",
  18. 12 : "Other",
  19. 13 : "Pop",
  20. 14 : "R&B",
  21. 15 : "Rap",
  22. 16 : "Reggae",
  23. 17 : "Rock",
  24. 18 : "Techno",
  25. 19 : "Industrial",
  26. 20 : "Alternative",
  27. 21 : "Ska",
  28. 22 : "Death Metal",
  29. 23 : "Pranks",
  30. 24 : "Soundtrack",
  31. 25 : "Euro-Techno",
  32. 26 : "Ambient",
  33. 27 : "Trip-Hop",
  34. 28 : "Vocal",
  35. 29 : "Jazz+Funk",
  36. 30 : "Fusion",
  37. 31 : "Trance",
  38. 32 : "Classical",
  39. 33 : "Instrumental",
  40. 34 : "Acid",
  41. 35 : "House",
  42. 36 : "Game",
  43. 37 : "Sound Clip",
  44. 38 : "Gospel",
  45. 39 : "Noise",
  46. 40 : "Alternative Rock",
  47. 41 : "Bass",
  48. 42 : "Soul",
  49. 43 : "Punk",
  50. 44 : "Space",
  51. 45 : "Meditative",
  52. 46 : "Instrumental Pop",
  53. 47 : "Instrumental Rock",
  54. 48 : "Ethnic",
  55. 49 : "Gothic",
  56. 50 : "Darkwave",
  57. 51 : "Techno-Industrial",
  58. 52 : "Electronic",
  59. 53 : "Pop-Folk",
  60. 54 : "Eurodance",
  61. 55 : "Dream",
  62. 56 : "Southern Rock",
  63. 57 : "Comedy",
  64. 58 : "Cult",
  65. 59 : "Gangsta",
  66. 60 : "Top 40",
  67. 61 : "Christian Rap",
  68. 62 : "Pop/Funk",
  69. 63 : "Jungle",
  70. 64 : "Native American",
  71. 65 : "Cabaret",
  72. 66 : "New Wave",
  73. 67 : "Psychadelic",
  74. 68 : "Rave",
  75. 69 : "Showtunes",
  76. 70 : "Trailer",
  77. 71 : "Lo-Fi",
  78. 72 : "Tribal",
  79. 73 : "Acid Punk",
  80. 74 : "Acid Jazz",
  81. 75 : "Polka",
  82. 76 : "Retro",
  83. 77 : "Musical",
  84. 78 : "Rock & Roll",
  85. 79 : "Hard Rock",
  86. 80 : "Folk",
  87. 81 : "Folk-Rock",
  88. 82 : "National Folk",
  89. 83 : "Swing",
  90. 84 : "Fast Fusion",
  91. 85 : "Bebop",
  92. 86 : "Latin",
  93. 87 : "Revival",
  94. 88 : "Celtic",
  95. 89 : "Bluegrass",
  96. 90 : "Avantgarde",
  97. 91 : "Gothic Rock",
  98. 92 : "Progressive Rock",
  99. 93 : "Psychedelic Rock",
  100. 94 : "Symphonic Rock",
  101. 95 : "Slow Rock",
  102. 96 : "Big Band",
  103. 97 : "Chorus",
  104. 98 : "Easy Listening",
  105. 99 : "Acoustic",
  106. 100 : "Humour",
  107. 101 : "Speech",
  108. 102 : "Chanson",
  109. 103 : "Opera",
  110. 104 : "Chamber Music",
  111. 105 : "Sonata",
  112. 106 : "Symphony",
  113. 107 : "Booty Bass",
  114. 108 : "Primus",
  115. 109 : "Porn Groove",
  116. 110 : "Satire",
  117. 111 : "Slow Jam",
  118. 112 : "Club",
  119. 113 : "Tango",
  120. 114 : "Samba",
  121. 115 : "Folklore",
  122. 116 : "Ballad",
  123. 117 : "Power Ballad",
  124. 118 : "Rhythmic Soul",
  125. 119 : "Freestyle",
  126. 120 : "Duet",
  127. 121 : "Punk Rock",
  128. 122 : "Drum Solo",
  129. 123 : "A capella",
  130. 124 : "Euro-House",
  131. 125 : "Dance Hall",
  132. }
  133. class JamendoImport:
  134. def __init__(self, hostname, username, password, database):
  135. self.conn = ordbms.connect ("dbname='librefm' user='librefm'")
  136. self.cursor = self.conn.cursor ()
  137. def parse(self, dump):
  138. for event, elem in ElementTree.iterparse(dump):
  139. if elem.tag == "artist":
  140. artist = self.proc_artist(elem)
  141. if self.artist_exists(artist["name"]):
  142. try:
  143. self.cursor.execute("UPDATE Artist SET image_small = %s, homepage = %s, mbid = %s WHERE name = %s", (artist["image"], artist["url"], artist["mbid"], artist["name"]))
  144. self.conn.commit()
  145. except Exception, e:
  146. self.conn.rollback()
  147. print 'ua', e
  148. else:
  149. try:
  150. self.cursor.execute("INSERT INTO Artist (name, image_small, mbid, homepage) VALUES (%s, %s, %s, %s)", (artist["name"], artist["image"], artist["mbid"], artist["url"]))
  151. self.conn.commit()
  152. except Exception, e:
  153. self.conn.rollback()
  154. print 'ia', e
  155. for album in artist["albums"]:
  156. if self.album_exists(artist["name"], album["name"]):
  157. try:
  158. self.cursor.execute("UPDATE Album SET albumurl = %s, image = %s, artwork_license = %s, mbid = %s, releasedate = %s, downloadurl = %s WHERE name = %s AND artist_name = %s",
  159. (album["url"], album["image"], album["license_artwork"], album["mbid"], album["releasedate"], album["downloadurl"],
  160. album["name"], artist["name"]))
  161. self.conn.commit()
  162. except Exception, e:
  163. self.conn.rollback()
  164. print 'ub', e
  165. else:
  166. try:
  167. self.cursor.execute("INSERT INTO Album (name, artist_name, albumurl, image, artwork_license, mbid, releasedate, downloadurl) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)",
  168. (album["name"], artist["name"], album["url"], album["image"], album["license_artwork"], album["mbid"], album["releasedate"], album["downloadurl"]))
  169. self.conn.commit()
  170. except Exception, e:
  171. self.conn.rollback()
  172. print 'ib', e
  173. for tag in album["tags"]:
  174. if not self.tag_exists(tag, artist["name"], album["name"]):
  175. try:
  176. self.cursor.execute("INSERT INTO Tags (tag, artist, album) VALUES (%s, %s, %s)",
  177. (tag, artist["name"], album["name"]))
  178. self.conn.commit()
  179. except Exception, e:
  180. self.conn.rollback()
  181. print 'ig', e
  182. for track in album["tracks"]:
  183. if "http://creativecommons.org/licenses/by-sa" not in track["license"] and not "http://creativecommons.org/licenses/by/" in track["license"] and not "http://artlibre.org/licence.php/lal.html" in track["license"]:
  184. streamable = 0
  185. else:
  186. streamable = 1
  187. try:
  188. duration = int(track["duration"])
  189. except:
  190. duration = None
  191. otherid = "jm:"
  192. try:
  193. otherid += str(int(track["id"]))
  194. except:
  195. otherid += "unknown"
  196. if self.track_exists(artist["name"], album["name"], track["name"]):
  197. try:
  198. self.cursor.execute("UPDATE Track SET downloadurl = %s, streamurl = %s, mbid = %s, license = %s, duration = %s, otherid = %s, streamable = %s WHERE name = %s AND artist_name = %s AND album_name = %s", (track["downloadurl"], track["streamurl"], track["mbid"], track["license"], duration, otherid, streamable, track["name"], artist["name"], album["name"]))
  199. self.conn.commit()
  200. except Exception, e:
  201. self.conn.rollback()
  202. print 'ut', e
  203. else:
  204. try:
  205. self.cursor.execute("INSERT INTO Track (name, artist_name, album_name, mbid, downloadurl, streamurl, license, duration, otherid, streamable) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (track["name"], artist["name"], album["name"], track["mbid"], track["downloadurl"], track["streamurl"], track["license"], duration, otherid, streamable))
  206. self.conn.commit()
  207. except Exception, e:
  208. self.conn.rollback()
  209. print 'it', e
  210. for tag in track["tags"]:
  211. if not self.tag_exists(tag, artist["name"], album["name"], track["name"]):
  212. try:
  213. self.cursor.execute("INSERT INTO Tags (tag, artist, album, track) VALUES (%s, %s, %s, %s)",
  214. (tag, artist["name"], album["name"], track["name"]))
  215. self.conn.commit()
  216. except Exception, e:
  217. self.conn.rollback()
  218. print 'ig2', e
  219. def close(self):
  220. self.cursor.close()
  221. self.conn.commit()
  222. self.conn.close()
  223. def proc_artist(self, elem):
  224. artist = {}
  225. artist["albums"] = []
  226. artist["image"] = None
  227. for artist_e in elem.getchildren():
  228. if artist_e.tag == "name":
  229. artist["name"] = artist_e.text
  230. if artist_e.tag == "id":
  231. artist["id"] = int(artist_e.text)
  232. if artist_e.tag == "image":
  233. artist["image"] = artist_e.text
  234. if artist_e.tag == "mbgid":
  235. if artist_e.text is None or len(artist_e.text) == 36:
  236. artist["mbid"] = artist_e.text
  237. else:
  238. print "Artist mbgid wrong length (%d): %s" % (len(artist_e.text),artist_e.text)
  239. if artist_e.tag == "url":
  240. artist["url"] = artist_e.text
  241. if artist_e.tag == "Albums":
  242. for album_e in artist_e.getchildren():
  243. artist["albums"].append(self.proc_album(album_e))
  244. return artist
  245. def proc_album(self, elem):
  246. album = {}
  247. album["tracks"] = []
  248. album["tags"] = []
  249. album["name"] = None
  250. for album_e in elem.getchildren():
  251. if album_e.tag == "name":
  252. album["name"] = album_e.text
  253. if album_e.tag == "id":
  254. album["id"] = int(album_e.text)
  255. album["url"] = "jamendo://album/%d" % album["id"]
  256. album["downloadurl"] = "jamendo://album/download/%d" % album["id"]
  257. album["image"] = "jamendo://album/art/%d" % album["id"]
  258. if album_e.tag == "id3genre":
  259. genre = genremap[int(album_e.text)]
  260. album["tags"].append(genre)
  261. if album_e.tag == "mbgid":
  262. if album_e.text is None or len(album_e.text) == 36:
  263. album["mbid"] = album_e.text
  264. else:
  265. print "Album mbgid wrong length (%d): %s" % (len(album_e.text),album_e.text)
  266. if album_e.tag == "license_artwork":
  267. album["license_artwork"] = album_e.text
  268. if album_e.tag == "releasedate":
  269. album["releasedate"] = time.mktime(time.strptime(album_e.text, "%Y-%m-%dT%H:%M:%S+01:00"))
  270. if album_e.tag == "Tracks":
  271. for track_e in album_e.getchildren():
  272. album["tracks"].append(self.proc_track(track_e))
  273. return album
  274. def proc_track(self, elem):
  275. track = {}
  276. track["tags"] = []
  277. track["mbid"] = None
  278. track["downloadurl"] = None
  279. for track_e in elem.getchildren():
  280. if track_e.tag == "id":
  281. track["id"] = int(track_e.text)
  282. track["streamurl"] = "jamendo://track/stream/%d" % track["id"]
  283. if track_e.tag == "name":
  284. track["name"] = track_e.text
  285. if track_e.tag == "id3genre":
  286. genre = genremap[int(track_e.text)]
  287. track["tags"].append(genre)
  288. if track_e.tag == "license":
  289. track["license"] = track_e.text
  290. if track_e.tag == "duration":
  291. track["duration"] = track_e.text
  292. if track_e.tag == "mbgid":
  293. if track_e.text is None or len(track_e.text) == 36:
  294. track["mbid"] = track_e.text
  295. else:
  296. print "Track mbgid wrong length (%d): %s" % (len(track_e.text),track_e.text)
  297. if track_e.tag == "Tags":
  298. for tag_e in track_e.getchildren():
  299. track["tags"].append(self.proc_tag(tag_e))
  300. return track
  301. def proc_tag(self, elem):
  302. for track_e in elem.getchildren():
  303. if track_e.tag == "idstr":
  304. return track_e.text
  305. def artist_exists(self, artist):
  306. try:
  307. self.cursor.execute("SELECT name FROM Artist WHERE name = %s ", (artist,))
  308. return self.cursor.rowcount != 0
  309. except:
  310. return False
  311. def album_exists(self, artist, album):
  312. try:
  313. self.cursor.execute("SELECT name FROM Album WHERE artist_name = %s AND name = %s", (artist, album))
  314. return self.cursor.rowcount != 0
  315. except:
  316. return False
  317. def track_exists(self, artist, album, track):
  318. try:
  319. self.cursor.execute("SELECT name FROM Track WHERE artist_name = %s AND album_name = %s AND name = %s", (artist, album, track))
  320. return self.cursor.rowcount != 0
  321. except:
  322. return False
  323. def tag_exists(self, tag, artist, album, track=None):
  324. try:
  325. if track:
  326. self.cursor.execute("SELECT tag FROM Tags WHERE tag = %s AND artist = %s AND album = %s AND track = %s", (tag, artist, album, track))
  327. else:
  328. self.cursor.execute("SELECT tag FROM Tags WHERE tag = %s AND artist = %s AND album = %s AND track = ''", (tag, artist, album))
  329. return self.cursor.rowcount != 0
  330. except:
  331. return False
  332. if __name__ == "__main__":
  333. if len(sys.argv) != 6:
  334. print "Usage: import-jamendo.py <database dump> <mysql hostname> <mysql username> <mysql password> <mysql database>"
  335. sys.exit(1)
  336. if sys.argv[1][-2:] == "gz":
  337. dump = gzip.open(sys.argv[1], "r")
  338. else:
  339. dump = open(sys.argv[1], "r")
  340. importer = JamendoImport(sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])
  341. importer.parse(dump)
  342. importer.close()