check-relative-doc-links.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. #!/usr/bin/env python
  2. import os
  3. import sys
  4. import re
  5. SOURCE_ROOT = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
  6. DOCS_DIR = os.path.join(SOURCE_ROOT, 'docs')
  7. def main():
  8. os.chdir(SOURCE_ROOT)
  9. filepaths = []
  10. totalDirs = 0
  11. try:
  12. for root, dirs, files in os.walk(DOCS_DIR):
  13. totalDirs += len(dirs)
  14. for f in files:
  15. if f.endswith('.md'):
  16. filepaths.append(os.path.join(root, f))
  17. except KeyboardInterrupt:
  18. print('Keyboard interruption. Please try again.')
  19. return
  20. totalBrokenLinks = 0
  21. for path in filepaths:
  22. totalBrokenLinks += getBrokenLinks(path)
  23. print('Parsed through ' + str(len(filepaths)) +
  24. ' files within docs directory and its ' +
  25. str(totalDirs) + ' subdirectories.')
  26. print('Found ' + str(totalBrokenLinks) + ' broken relative links.')
  27. def getBrokenLinks(filepath):
  28. currentDir = os.path.dirname(filepath)
  29. brokenLinks = []
  30. try:
  31. f = open(filepath, 'r')
  32. lines = f.readlines()
  33. except KeyboardInterrupt:
  34. print('Keyboard interruption whle parsing. Please try again.')
  35. finally:
  36. f.close()
  37. regexLink = re.compile('\[(.*?)\]\((?P<links>(.*?))\)')
  38. links = []
  39. for line in lines:
  40. matchLinks = regexLink.search(line)
  41. if matchLinks:
  42. relativeLink = matchLinks.group('links')
  43. if not str(relativeLink).startswith('http'):
  44. links.append(relativeLink)
  45. for link in links:
  46. sections = link.split('#')
  47. if len(sections) > 1:
  48. if str(link).startswith('#'):
  49. if not checkSections(sections, lines):
  50. brokenLinks.append(link)
  51. else:
  52. tempFile = os.path.join(currentDir, sections[0])
  53. if os.path.isfile(tempFile):
  54. try:
  55. newFile = open(tempFile, 'r')
  56. newLines = newFile.readlines()
  57. except KeyboardInterrupt:
  58. print('Keyboard interruption whle parsing. Please try again.')
  59. finally:
  60. newFile.close()
  61. if not checkSections(sections, newLines):
  62. brokenLinks.append(link)
  63. else:
  64. brokenLinks.append(link)
  65. else:
  66. if not os.path.isfile(os.path.join(currentDir, link)):
  67. brokenLinks.append(link)
  68. print_errors(filepath, brokenLinks)
  69. return len(brokenLinks)
  70. def checkSections(sections, lines):
  71. sectionHeader = sections[1].replace('-', '')
  72. regexSectionTitle = re.compile('# (?P<header>.*)')
  73. for line in lines:
  74. matchHeader = regexSectionTitle.search(line)
  75. if matchHeader:
  76. matchHeader = filter(str.isalnum, str(matchHeader.group('header')))
  77. if matchHeader.lower() == sectionHeader:
  78. return True
  79. return False
  80. def print_errors(filepath, brokenLink):
  81. if brokenLink:
  82. print "File Location: " + filepath
  83. for link in brokenLink:
  84. print "\tBroken links: " + link
  85. if __name__ == '__main__':
  86. sys.exit(main())