relations.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. #!/usr/bin/env python3
  2. # This is a small module to find relations across nodes (which node links to
  3. # which). It's used in /explore to build the graph.
  4. import json
  5. import rdflib
  6. import xml.etree.ElementTree as ET
  7. g = rdflib.graph.Graph()
  8. g.parse ('dokk.ttl', format='turtle')
  9. # Parse nodes from Turtle into a dictionary
  10. nodes = {}
  11. for s, p, o in g:
  12. s, p, o = str (s), str (p), str (o)
  13. if s not in nodes.keys ():
  14. nodes[s] = {}
  15. if p == 'http://schema.org/headline':
  16. nodes[s]['headline'] = o
  17. if p == 'http://schema.org/image':
  18. if len (o) > 0:
  19. nodes[s]['image'] = 'https://archive.dokk.org/images/' + o
  20. if p == 'http://schema.org/disambiguatingDescription':
  21. nodes[s]['description'] = o
  22. if p == 'http://schema.org/articleBody':
  23. nodes[s]['file'] = o
  24. # Now parse all DOCBOOK files and find which nodes links to which
  25. for key, node in nodes.items ():
  26. tree = ET.parse ('docbook/' + node['file'][:-5] + '.dbk')
  27. root = tree.getroot ()
  28. nodes[key]['links'] = []
  29. for link in root.iter ('ulink'):
  30. url = 'https://dokk.org/' + link.attrib['url']
  31. if url in nodes.keys ():
  32. nodes[key]['links'].append (url)
  33. # Write out dictionary to JSON
  34. with open ('dokk.json', 'w', encoding='UTF-8') as f:
  35. json.dump (nodes, f)