123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- #!/usr/bin/env python3
- # This script is used to generate a RDF graph from the list of packages and manpages
- import gzip
- import json
- import os
- from rdflib import BNode, Graph, Literal, Namespace, URIRef
- from rdflib.namespace import RDF
- DIST = 'testing'
- mangraph = Graph ()
- MANPAGE = Namespace ('urn:manpage:')
- MANPAGE_TERMS = Namespace ('urn:manpage:terms:')
- SCHEMA = Namespace ('http://schema.org/')
- mangraph.bind ('manpage', MANPAGE)
- mangraph.bind ('manpage-terms', MANPAGE_TERMS)
- mangraph.bind ('schema', SCHEMA)
- # Add sections to the graph
- def add_section_info (number, description):
- section_n = 'section' + number
-
- mangraph.add ((MANPAGE[section_n],
- RDF.type,
- MANPAGE_TERMS.Section))
- mangraph.add ((MANPAGE[section_n],
- SCHEMA.name,
- Literal ('Section ' + number)))
- mangraph.add ((MANPAGE[section_n],
- MANPAGE_TERMS.section_number,
- Literal (number)))
- mangraph.add ((MANPAGE[section_n],
- SCHEMA.disambiguatingDescription,
- Literal (description, lang='en')))
- add_section_info ('1', 'Executable programs or shell commands')
- add_section_info ('2', 'System calls (functions provided by the kernel)')
- add_section_info ('3', 'Library calls (functions within program libraries)')
- add_section_info ('4', 'Special files')
- add_section_info ('5', 'File formats and conventions')
- add_section_info ('6', 'Games')
- add_section_info ('7', 'Miscellaneous')
- add_section_info ('8', 'System administration commands')
- with open ('manpages.json', 'rt') as fp:
- packages = json.load (fp)
- # Loop packages
- for package in packages:
-
- rdfpackage = URIRef (MANPAGE + 'debian-testing/amd64/' + package['name'])
- mangraph.add ((rdfpackage,
- RDF.type,
- MANPAGE_TERMS.Package))
- mangraph.add ((rdfpackage,
- SCHEMA.name,
- Literal (package['name'])))
- mangraph.add ((rdfpackage,
- SCHEMA.version,
- Literal (package['version'])))
- mangraph.add ((rdfpackage,
- MANPAGE_TERMS.architecture,
- Literal (package['architecture'])))
- mangraph.add ((rdfpackage,
- MANPAGE_TERMS.filename,
- Literal (package['deb'])))
-
- # Loop manpages for this package
- for page in package['manpages']:
-
- subject = URIRef (MANPAGE + page['identifier'])
-
- # Is this page just a link to another page or not?
- if page['link_to'] is None:
- mangraph.add ((subject,
- RDF.type,
- SCHEMA.TextDigitalDocument))
- mangraph.add ((subject,
- SCHEMA.name,
- Literal (page['name'])))
- mangraph.add ((subject,
- SCHEMA.identifier,
- Literal (page['identifier'])))
- # Sections are 1..8 but can have variants such as "1p"
- mangraph.add ((subject,
- MANPAGE_TERMS.section,
- URIRef (MANPAGE.section + page['section'])))
- mangraph.add ((subject,
- MANPAGE_TERMS.section_variant,
- Literal (page['section_varinat'])))
-
- # Link to package
- mangraph.add ((subject, SCHEMA.isPartOf, rdfpackage))
-
- # Extract description from manpage
- # Example: ls - list directory contents
- page_txt = 'man/man' + page['section'] + '/' + page['identifier'] + '.txt'
-
- # Read TXT version, find NAME section, then read next line
- if os.path.isfile (page_txt):
- name_section_found = False
- description = ''
-
- with open (page_txt, 'rt') as f:
- for line in f:
- if name_section_found:
- # Multiline NAME section
- if line.startswith (' '):
- description += line
-
- # NAME section terminated
- else:
- # There can be different kind of separators
- # Usually it's '—' to split name/description, but others
- # possibilities are '--', '-', ',', others?
- if ' — ' in description:
- description = description.split (' — ', 1)[1].strip ()
- elif ' -- ' in description:
- description = description.split (' -- ', 1)[1].strip ()
- elif ' - ' in description:
- description = description.split (' - ', 1)[1].strip ()
- elif ',' in description:
- description = description.split (',', 1)[1].strip ()
- else:
- # Tough luck! No description for this page :(
- description = ''
-
- break
-
- if line == 'NAME\n':
- name_section_found = True
-
- mangraph.add ((subject,
- SCHEMA.disambiguatingDescription,
- Literal (description)))
- else:
- print ('Warning: file not found ', page_txt)
-
- else:
-
- mangraph.add ((URIRef (MANPAGE + page['link_to']),
- SCHEMA.alternateName,
- Literal (page['identifier'])))
- mangraph.serialize ('manpages.ttl', 'turtle')
|