scrape.py 3.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. import os, music_tag
  2. from uuid import uuid4
  3. from data import config, database
  4. def scrape():
  5. files_skipped = 0
  6. updated_titles = 0
  7. updated_albums = 0
  8. updated_artists = 0
  9. updated_genres = 0
  10. print('Scraping existing files for missing metadata')
  11. for uuid in database['music']:
  12. track = database['music'][uuid]
  13. if track['title'] and track['album'] and track['artists'] and track['genre'] and (database['albums'][track['album']]['artist'] if track['album'] else True):
  14. files_skipped += 1
  15. continue
  16. print('Scraping %s' % uuid)
  17. path = os.path.join(config['files']['audio']['path'], track['path'])
  18. file = music_tag.load_file(path)
  19. if not track['title'] and file['title'].value:
  20. track['title'] = file['title'].value
  21. updated_titles += 1
  22. if not track['album'] and file['album'].value:
  23. for uuid in database['albums']:
  24. album = database['albums'][uuid]
  25. if file['album'].value == album['name']:
  26. track['album'] = uuid
  27. break
  28. else:
  29. uuid = uuid4()
  30. album_artist = None
  31. if file['albumartist']:
  32. for uuid in database['artists']:
  33. artist = database['artists'][uuid]
  34. if file['albumartist'] == artist['name']:
  35. album_artist = uuid
  36. break
  37. else:
  38. uuid = uuid4()
  39. database['artists'][uuid] = {
  40. 'name': file['albumartist'].value
  41. }
  42. album_artist = uuid
  43. database['albums'][uuid] = {
  44. 'name': file['album'].value,
  45. 'artist': album_artist,
  46. 'cover': None
  47. }
  48. track['album'] = uuid
  49. updated_albums += 1
  50. if not track['artists'] and file['artist'].value:
  51. artists = []
  52. for delimeter in config['delimeters']:
  53. split_str = file['artist'].value.split(delimeter)
  54. if len(split_str) > 1:
  55. artists = split_str
  56. break
  57. else:
  58. artists = [file['artist'].value]
  59. for artist in artists:
  60. artist = artist.strip()
  61. for uuid in database['artists']:
  62. db_artist = database['artists'][uuid]
  63. if artist == db_artist['name']:
  64. track['artists'].append(uuid)
  65. break
  66. else:
  67. uuid = uuid4()
  68. database['artists'][uuid] = {
  69. 'name': artist
  70. }
  71. track['artists'].append(uuid)
  72. track['artists'] = list(set(track['artists']))
  73. updated_artists += 1
  74. if not track['genre'] and file['genre'].value:
  75. for uuid in database['genres']:
  76. genre = database['genres'][uuid]
  77. if file['genre'].value == genre['name']:
  78. track['genre'] = uuid
  79. break
  80. else:
  81. uuid = uuid4()
  82. database['genres'][uuid] = {
  83. 'name': file['genre'].value
  84. }
  85. track['genre'] = uuid
  86. updated_genres += 1
  87. print('Skipped %s files' % files_skipped)
  88. print('%s files had there titles updated' % updated_titles)
  89. print('%s files had there associated albums updated' % updated_albums)
  90. print('%s files had there contributing artists updated' % updated_artists)
  91. print('%s files had there genres updated' % updated_artists)