123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- #!/usr/bin/env python3
- # This is a small module to find relations across nodes (which node links to
- # which). It's used in /explore to build the graph.
- import json
- import rdflib
- import xml.etree.ElementTree as ET
- g = rdflib.graph.Graph()
- g.parse ('dokk.ttl', format='turtle')
- # Parse nodes from Turtle into a dictionary
- nodes = {}
- for s, p, o in g:
- s, p, o = str (s), str (p), str (o)
-
- if s not in nodes.keys ():
- nodes[s] = {}
-
- if p == 'http://schema.org/headline':
- nodes[s]['headline'] = o
-
- if p == 'http://schema.org/image':
- if len (o) > 0:
- nodes[s]['image'] = 'https://archive.dokk.org/images/' + o
-
- if p == 'http://schema.org/disambiguatingDescription':
- nodes[s]['description'] = o
-
- if p == 'http://schema.org/articleBody':
- nodes[s]['file'] = o
- # Now parse all DOCBOOK files and find which nodes links to which
- for key, node in nodes.items ():
- tree = ET.parse ('docbook/' + node['file'][:-5] + '.dbk')
- root = tree.getroot ()
-
- nodes[key]['links'] = []
-
- for link in root.iter ('ulink'):
- url = 'https://dokk.org/' + link.attrib['url']
-
- if url in nodes.keys ():
- nodes[key]['links'].append (url)
- # Write out dictionary to JSON
- with open ('dokk.json', 'w', encoding='UTF-8') as f:
- json.dump (nodes, f)
|