find_packages.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. #!/usr/bin/env python3
  2. # Find the list of (binary) packages containing at least a manpage
  3. import gzip
  4. import json
  5. import os
  6. DIST = "testing"
  7. ARCH = "amd64"
  8. manpages = {}
  9. # Find all manpages from the list of files in Contents-*.gz
  10. with open (DIST + '/' + ARCH + '/' + 'Contents-' + ARCH, 'rt', encoding='UTF-8') as f:
  11. # Search man files in the list of files
  12. # Example line: "usr/share/man/man8/useradd.8.gz admin/passwd"
  13. # Example line: "usr/share/man/it/man8/useradd.8.gz admin/passwd"
  14. for line in f:
  15. line = line.strip (' \t\n')
  16. if line.startswith ('usr/share/man/'):
  17. # Split line at ' ' and return only non-empty strings
  18. line = list (filter (None, line.split (' ')))
  19. if len (line) != 2:
  20. print ('Bad line!')
  21. exit()
  22. """
  23. usr/share/man/<lang>/man8/useradd.8.gz admin/passwd
  24. | | | | | | |_____ distro_package
  25. | | | | | |___________ distro_section
  26. | | | | |______________ page_compression
  27. | | | |________________ page_section_variant
  28. | | |________________________ page_name
  29. | |__________________________ page_section
  30. |__________________________________________________ page_path
  31. """
  32. page_path, page_full_name = line[0].rsplit ('/', 1)
  33. # Skip pages in /usr/share/man
  34. # They should not be here, they should be in /usr/share/man/man*
  35. if page_path.lower () == 'usr/share/man':
  36. continue
  37. page_section = page_path[-1]
  38. distro_section, distro_package = line[1].rsplit ('/', 1)
  39. page_name, page_section_variant, page_compression = page_full_name.rsplit ('.', 2)
  40. # Detect language
  41. page_language = 'en' if page_path.startswith ('usr/share/man/man') \
  42. else page_path.split ('/', 4)[3]
  43. # Check if this is a link to another manpage
  44. # For example pyrogenesis.6 is a link to 0ad.6
  45. # TODO this doesn't take into account languages!!!
  46. file_location = 'man/man' + page_section + '/' + page_full_name
  47. # Extract package name
  48. # /home/user/man/man6/0ad.6.gz => 0ad.6
  49. page_link = os.path.realpath (file_location).rsplit ('.', 1)[0].rsplit ('/', 1)[-1] \
  50. if os.path.islink (file_location) else None
  51. # Store our manpages. A package can have multiple pages
  52. if distro_package not in manpages.keys ():
  53. manpages[distro_package] = []
  54. manpages[distro_package].append ({
  55. 'file': line[0],
  56. 'link_to': page_link,
  57. 'path': page_path,
  58. 'identifier': page_name + '.' + page_section_variant,
  59. 'full_name': page_full_name,
  60. 'name': page_name,
  61. 'section': page_section,
  62. 'section_varinat': page_section_variant,
  63. 'compression': page_compression,
  64. 'language': page_language
  65. })
  66. packages = []
  67. # Extract list of packages that contain at least one manpage
  68. with open (DIST + '/' + ARCH + '/' + 'Packages', 'rt', encoding='UTF-8') as f:
  69. package = {
  70. 'name': None,
  71. 'version': None,
  72. 'architecture': None,
  73. 'section': None,
  74. 'filename': None,
  75. 'deb': None
  76. }
  77. for line in f:
  78. line = line.strip (' \t\n')
  79. if line.startswith ('Package:'):
  80. package['name'] = line.split (' ', 2)[1].strip ()
  81. if line.startswith ('Version:'):
  82. package['version'] = line.split (' ', 2)[1].strip ()
  83. if line.startswith ('Architecture:'):
  84. package['architecture'] = line.split (' ', 2)[1].strip ()
  85. if line.startswith ('Section:'):
  86. package['section'] = line.split (' ', 2)[1].strip ()
  87. if line.startswith ('Filename:'):
  88. package['filename'] = line.split (' ', 2)[1].strip ()
  89. package['deb'] = os.path.basename (package['filename'])
  90. # Empty line defines the end of a package metadata
  91. if line == "":
  92. # Does this package have manpages?
  93. if package['name'] in manpages.keys ():
  94. # Make sure package has all properties
  95. if None not in package.keys ():
  96. # Add manpages belonging to this package
  97. package['manpages'] = manpages[package['name']]
  98. packages.append (package.copy ())
  99. # Reset package dictionary
  100. for key in package.keys ():
  101. package[key] = None
  102. # The packages to download from the mirror
  103. with open ('packages.url', 'wt') as fp:
  104. for package in packages:
  105. fp.write ("http://mi.mirror.garr.it/mirrors/debian/" + package['filename'] + "\n")
  106. # Dump list of packages with their manpages
  107. with open ('manpages.json', 'wt') as fp:
  108. fp.write (json.dumps (packages, sort_keys=True, indent=4))