model.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. """model.py: This is a QAbstractTableModel that holds a list of Metadata objects created from books in an OPDS feed"""
  2. __author__ = "Steinar Bang"
  3. __copyright__ = "Steinar Bang, 2015-2022"
  4. __credits__ = ["Steinar Bang"]
  5. __license__ = "GPL v3"
  6. import datetime
  7. from PyQt5.Qt import Qt, QAbstractTableModel, QCoreApplication
  8. from calibre.ebooks.metadata.book.base import Metadata
  9. from calibre.gui2 import error_dialog
  10. from calibre.web.feeds import feedparser
  11. import urllib.parse
  12. import urllib.request
  13. import json
  14. import re
  15. class OpdsBooksModel(QAbstractTableModel):
  16. column_headers = [_('Title'), _('Author(s)'), _('Updated')]
  17. booktableColumnCount = 3
  18. filterBooksThatAreNewspapers = False
  19. filterBooksThatAreAlreadyInLibrary = False
  20. def __init__(self, parent, books = [], db = None):
  21. QAbstractTableModel.__init__(self, parent)
  22. self.db = db
  23. self.books = self.makeMetadataFromParsedOpds(books)
  24. self.filterBooks()
  25. def headerData(self, section, orientation, role):
  26. if role != Qt.DisplayRole:
  27. return None
  28. if orientation == Qt.Vertical:
  29. return section + 1
  30. if section >= len(self.column_headers):
  31. return None
  32. return self.column_headers[section]
  33. def rowCount(self, parent):
  34. return len(self.filteredBooks)
  35. def columnCount(self, parent):
  36. return self.booktableColumnCount
  37. def data(self, index, role):
  38. row, col = index.row(), index.column()
  39. if row >= len(self.filteredBooks):
  40. return None
  41. opdsBook = self.filteredBooks[row]
  42. if role == Qt.UserRole:
  43. # Return the Metadata object underlying each row
  44. return opdsBook
  45. if role != Qt.DisplayRole:
  46. return None
  47. if col >= self.booktableColumnCount:
  48. return None
  49. if col == 0:
  50. return opdsBook.title
  51. if col == 1:
  52. return u' & '.join(opdsBook.author)
  53. if col == 2:
  54. if opdsBook.timestamp is not None:
  55. return opdsBook.timestamp.strftime("%Y-%m-%d %H:%M:%S")
  56. return opdsBook.timestamp
  57. return None
  58. def downloadOpdsRootCatalog(self, gui, opdsUrl, displayDialogOnErrors):
  59. feed = feedparser.parse(opdsUrl)
  60. if 'bozo_exception' in feed:
  61. exception = feed['bozo_exception']
  62. message = 'Failed opening the OPDS URL ' + opdsUrl + ': '
  63. reason = ''
  64. if hasattr(exception, 'reason') :
  65. reason = str(exception.reason)
  66. error_dialog(gui, _('Failed opening the OPDS URL'), message, reason, displayDialogOnErrors)
  67. return (None, {})
  68. if 'server' in feed.headers:
  69. self.serverHeader = feed.headers['server']
  70. else:
  71. self.serverHeader = "none"
  72. print("serverHeader: %s" % self.serverHeader)
  73. print("feed.entries: %s" % feed.entries)
  74. catalogEntries = {}
  75. firstTitle = None
  76. for entry in feed.entries:
  77. title = entry.get('title', 'No title')
  78. if firstTitle is None:
  79. firstTitle = title
  80. links = entry.get('links', [])
  81. firstLink = next(iter(links), None)
  82. if firstLink is not None:
  83. print("firstLink: %s" % firstLink)
  84. catalogEntries[title] = firstLink.href
  85. return (firstTitle, catalogEntries)
  86. def downloadOpdsCatalog(self, gui, opdsCatalogUrl):
  87. print("downloading catalog: %s" % opdsCatalogUrl)
  88. opdsCatalogFeed = feedparser.parse(opdsCatalogUrl)
  89. self.books = self.makeMetadataFromParsedOpds(opdsCatalogFeed.entries)
  90. self.filterBooks()
  91. QCoreApplication.processEvents()
  92. nextUrl = self.findNextUrl(opdsCatalogFeed.feed)
  93. while nextUrl is not None:
  94. nextFeed = feedparser.parse(nextUrl)
  95. self.books = self.books + self.makeMetadataFromParsedOpds(nextFeed.entries)
  96. self.filterBooks()
  97. QCoreApplication.processEvents()
  98. nextUrl = self.findNextUrl(nextFeed.feed)
  99. def isCalibreOpdsServer(self):
  100. return self.serverHeader.startswith('calibre')
  101. def setFilterBooksThatAreAlreadyInLibrary(self, value):
  102. if value != self.filterBooksThatAreAlreadyInLibrary:
  103. self.filterBooksThatAreAlreadyInLibrary = value
  104. self.filterBooks()
  105. def setFilterBooksThatAreNewspapers(self, value):
  106. if value != self.filterBooksThatAreNewspapers:
  107. self.filterBooksThatAreNewspapers = value
  108. self.filterBooks()
  109. def filterBooks(self):
  110. self.beginResetModel()
  111. self.filteredBooks = []
  112. for book in self.books:
  113. if (not self.isFilteredNews(book)) and (not self.isFilteredAlreadyInLibrary(book)):
  114. self.filteredBooks.append(book)
  115. self.endResetModel()
  116. def isFilteredNews(self, book):
  117. if self.filterBooksThatAreNewspapers:
  118. if u'News' in book.tags:
  119. return True
  120. return False
  121. def isFilteredAlreadyInLibrary(self, book):
  122. if self.filterBooksThatAreAlreadyInLibrary:
  123. return self.db.has_book(book)
  124. return False
  125. def makeMetadataFromParsedOpds(self, books):
  126. metadatalist = []
  127. for book in books:
  128. metadata = self.opdsToMetadata(book)
  129. metadatalist.append(metadata)
  130. return metadatalist
  131. def opdsToMetadata(self, opdsBookStructure):
  132. authors = opdsBookStructure.author.replace(u'& ', u'&') if 'author' in opdsBookStructure else ''
  133. metadata = Metadata(opdsBookStructure.title, authors.split(u'&'))
  134. metadata.uuid = opdsBookStructure.id.replace('urn:uuid:', '', 1) if 'id' in opdsBookStructure else ''
  135. try:
  136. rawTimestamp = opdsBookStructure.updated
  137. except AttributeError:
  138. rawTimestamp = "1980-01-01T00:00:00+00:00"
  139. parsableTimestamp = re.sub('((\.[0-9]+)?\+0[0-9]:00|Z)$', '', rawTimestamp)
  140. metadata.timestamp = datetime.datetime.strptime(parsableTimestamp, '%Y-%m-%dT%H:%M:%S')
  141. tags = []
  142. summary = opdsBookStructure.get(u'summary', u'')
  143. summarylines = summary.splitlines()
  144. for summaryline in summarylines:
  145. if summaryline.startswith(u'TAGS: '):
  146. tagsline = summaryline.replace(u'TAGS: ', u'')
  147. tagsline = tagsline.replace(u'<br />',u'')
  148. tagsline = tagsline.replace(u', ', u',')
  149. tags = tagsline.split(u',')
  150. metadata.tags = tags
  151. bookDownloadUrls = []
  152. links = opdsBookStructure.get('links', [])
  153. for link in links:
  154. url = link.get('href', '')
  155. bookType = link.get('type', '')
  156. # Skip covers and thumbnails
  157. if not bookType.startswith('image/'):
  158. if bookType == 'application/epub+zip':
  159. # EPUB books are preferred and always put at the head of the list if found
  160. bookDownloadUrls.insert(0, url)
  161. else:
  162. # Formats other than EPUB (eg. AZW), are appended as they are found
  163. bookDownloadUrls.append(url)
  164. metadata.links = bookDownloadUrls
  165. return metadata
  166. def findNextUrl(self, feed):
  167. for link in feed.links:
  168. if link.rel == u'next':
  169. return link.href
  170. return None
  171. def downloadMetadataUsingCalibreRestApi(self, opdsUrl):
  172. # The "updated" values on the book metadata, in the OPDS returned
  173. # by calibre, are unrelated to the books they are returned with:
  174. # the "updated" value is the same value for all books metadata,
  175. # and this value is the last modified date of the entire calibre
  176. # database.
  177. #
  178. # It is therefore necessary to use the calibre REST API to get
  179. # a meaningful timestamp for the books
  180. # Get the base of the web server, from the OPDS URL
  181. parsedOpdsUrl = urllib.parse.urlparse(opdsUrl)
  182. # GET the search URL twice: the first time is to get the total number
  183. # of books in the other calibre. The second GET gets arguments
  184. # to retrieve all book ids in the other calibre.
  185. parsedCalibreRestSearchUrl = urllib.parse.ParseResult(parsedOpdsUrl.scheme, parsedOpdsUrl.netloc, '/ajax/search', '', '', '')
  186. calibreRestSearchUrl = parsedCalibreRestSearchUrl.geturl()
  187. calibreRestSearchResponse = urllib.request.urlopen(calibreRestSearchUrl)
  188. calibreRestSearchJsonResponse = json.load(calibreRestSearchResponse)
  189. getAllIdsArgument = 'num=' + str(calibreRestSearchJsonResponse['total_num']) + '&offset=0'
  190. parsedCalibreRestSearchUrl = urllib.parse.ParseResult(parsedOpdsUrl.scheme, parsedOpdsUrl.netloc, '/ajax/search', '', getAllIdsArgument, '').geturl()
  191. calibreRestSearchResponse = urllib.request.urlopen(parsedCalibreRestSearchUrl)
  192. calibreRestSearchJsonResponse = json.load(calibreRestSearchResponse)
  193. bookIds = list(map(str, calibreRestSearchJsonResponse['book_ids']))
  194. # Get the metadata for all books by adding the list of
  195. # all IDs as a GET argument
  196. bookIdsGetArgument = 'ids=' + ','.join(bookIds)
  197. parsedCalibreRestBooksUrl = urllib.parse.ParseResult(parsedOpdsUrl.scheme, parsedOpdsUrl.netloc, '/ajax/books', '', bookIdsGetArgument, '')
  198. calibreRestBooksResponse = urllib.request.urlopen(parsedCalibreRestBooksUrl.geturl())
  199. booksDictionary = json.load(calibreRestBooksResponse)
  200. self.updateTimestampInMetadata(bookIds, booksDictionary)
  201. def updateTimestampInMetadata(self, bookIds, booksDictionary):
  202. bookMetadataById = {}
  203. for bookId in bookIds:
  204. bookMetadata = booksDictionary[bookId]
  205. uuid = bookMetadata['uuid']
  206. bookMetadataById[uuid] = bookMetadata
  207. for book in self.books:
  208. bookMetadata = bookMetadataById[book.uuid]
  209. rawTimestamp = bookMetadata['timestamp']
  210. parsableTimestamp = re.sub('(\.[0-9]+)?\+00:00$', '', rawTimestamp)
  211. timestamp = datetime.datetime.strptime(parsableTimestamp, '%Y-%m-%dT%H:%M:%S')
  212. book.timestamp = timestamp
  213. self.filterBooks()