mkpage.pl 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. #!/usr/bin/env perl
  2. #-------------------------------------------------------------------------------
  3. # Automatically generate certain parts of the website (the index, page
  4. # containing all of the posts, etc) and add boiler plate to other written posts
  5. # like TOC generation, general metadata and the navigation menu.
  6. #
  7. # TODO:
  8. # - Add proper error handling
  9. #-------------------------------------------------------------------------------
  10. use v5.40;
  11. use open qw(:std :encoding(UTF-8));
  12. use Unicode::UTF8 qw(decode_utf8);
  13. use Getopt::Long qw(:config no_ignore_case bundling);
  14. use File::Find;
  15. use File::Basename;
  16. use Time::Piece;
  17. # Collapses multiline headings into a single line
  18. sub collapse_headings {
  19. my @lines = @_;
  20. my @collapsed;
  21. my $heading;
  22. my $found = 0;
  23. foreach (@lines) {
  24. $found = 1 if /<h\d>/;
  25. if ($found) {
  26. $heading .= $_
  27. } else {
  28. push(@collapsed, $_);
  29. }
  30. if (/<\/h\d>/) {
  31. $heading =~ s/\n/ /;
  32. push(@collapsed, $heading);
  33. $heading = '';
  34. $found = 0;
  35. }
  36. }
  37. return @collapsed;
  38. }
  39. #-------------------------------------------------------------------------------
  40. # Generates the TOC for the document.
  41. #
  42. # $1: [string] - Array containing each line of the document
  43. # returns: table of contents, title, document with ids added to the headings
  44. #
  45. # Since the '<ol>' element can't handle proper subsection numbering without
  46. # help from CSS, and we want to support text-only browsers as much as we can,
  47. # we need to generate the TOC already with the section numbering. This can be
  48. # done by deriving the outline from the order of appearance of the heading
  49. # elements which must conform with the HTML specification. Which is:
  50. #
  51. # A HTML document can't have it's headings nested more than a single step at a
  52. # time, but when moving out of the tree, it's jump can be of any lenght up to
  53. # heading 1. So, as we move through the headings, we compare each level with
  54. # previous one, while keeping track of the subsections numbering inside an
  55. # array. And, since all headings are children from the '<h1>' element (which
  56. # also usually — and in my documents, always — serves as the title), we skip
  57. # adding it to the TOC.
  58. #
  59. # Also, so that the TOC can reference the sections, we add IDs to the headings
  60. # and return them as a modified array.
  61. #-------------------------------------------------------------------------------
  62. sub gentoc {
  63. my $file = shift;
  64. my $md = shift;
  65. my @lines = @_;
  66. local *err_msg = sub { return "$file: Improper heading at line @_"; };
  67. my @counters;
  68. my $title;
  69. my $toc;
  70. my @md_idxs;
  71. my $md_title_idx;
  72. my $last_level = 0;
  73. my $heading_regex = '\s*<h(\d)>(.*)<\/h[\d]';
  74. if ($md) {
  75. $heading_regex = '\s*(#+)\s(.*)'
  76. }
  77. # Assemble the outline.
  78. # TODO: Catch unclosed heading tags
  79. for my $i (0..$#lines) {
  80. # Skip lines without headings
  81. next unless $lines[$i] =~ /$heading_regex/;
  82. my $level = $1;
  83. my $heading = $2;
  84. # Count the '#' if parsing markdown
  85. $level = ($level =~ tr/#//) if $md;
  86. die err_msg($i) if $level > 6;
  87. # Don't add the <h1> heading to the TOC, but set it as the title
  88. if ($last_level == 0) {
  89. if ($level == 1) {
  90. $md_title_idx = $i if $md;
  91. $title = $heading;
  92. $last_level = 1;
  93. next;
  94. } else {
  95. die err_msg($i);
  96. }
  97. }
  98. # The id is a modified heading
  99. my $id = $heading =~ s/'|"//gr =~ s/\s/-/gr;
  100. # Reassemble the line with the id
  101. if ($md) {
  102. push(@md_idxs, $i);
  103. } else {
  104. $lines[$i] = "<h$level id=$id>$heading</h$level>";
  105. }
  106. die err_msg($i) if $level == 1;
  107. # Assemble the TOC as we travers through the headings
  108. # Later, if needed, we convert it to Markdown.
  109. if ($level > $last_level) { # start subsection
  110. # Nesting in steps larger than one is disallowed in the HTML spec
  111. die err_msg($i) if $level - $last_level != 1;
  112. push(@counters, 1);
  113. $toc .= "<ul>\n" unless $md;
  114. } elsif ($level < $last_level) { # end subsection
  115. $toc .= "</li>\n";
  116. # Close sections as we traverse up the tree
  117. for (1..$last_level - $level) {
  118. pop(@counters);
  119. $toc .= "\t" x @counters."</ul></li>\n" unless $md;
  120. }
  121. $counters[-1]++ if @counters;
  122. } else { # same subsection
  123. $counters[-1]++;
  124. $toc .= "</li>\n" unless $md;
  125. }
  126. # Add a item to the list
  127. my $section = join('.', @counters);
  128. my $indent = "\t" x ($md ? @counters - 1 : @counters);
  129. if ($md) {
  130. # Add a Markdown item
  131. $toc .= $indent."[$section $heading](#$id)\n";
  132. } else {
  133. # Add an HTML item
  134. $toc .= $indent."<li><a href=#$id>$section $heading</a>";
  135. }
  136. $last_level = $level;
  137. }
  138. # Close remaining sections
  139. if ($md) {
  140. # Add anchors with IDs above each heading
  141. my $shift = 0;
  142. foreach (@md_idxs) {
  143. $_ += $shift++;
  144. $lines[$_] =~ /#+\s(.*)/;
  145. my $id = $1 =~ s/\s/-/gr;
  146. splice(@lines, $_, 0, "<a id=\"$id\"></a>\n");
  147. }
  148. # Add TOC after the first h1 element
  149. splice(@lines, $md_title_idx + 1, 0, "\n$toc");
  150. chomp $toc;
  151. } else {
  152. for (0..$#counters) {
  153. pop(@counters);
  154. $toc .= "</li>\n";
  155. $toc .= "\t" x @counters."</ul>";
  156. }
  157. }
  158. die "Missing <h1> heading at file: $file" unless $title;
  159. return ($toc, $title, join('', @lines));
  160. }
  161. # Assembles the complete page with all the passed data and metadata.
  162. # $1: string - Table of contents
  163. # $2: string - Title of the document
  164. # $3: string - Date the document was last modified
  165. # $4: [string] - Array with document's lines.
  166. sub mkpage {
  167. my ($toc, $title, $contents, $date) = @_;
  168. my $toc_indent = "\t" x 5;
  169. my $content_indent = "\t" x 3;
  170. # Document may have not TOC
  171. if ($toc) {
  172. # Add indent
  173. $toc =~ s/^/$toc_indent/mg;
  174. $toc = "<nav>\n$toc\n</nav>";
  175. } else {
  176. $toc = '';
  177. }
  178. # Indent the contents
  179. $contents =~ s/^/$content_indent/mg;
  180. return <<~"TEMPLATE";
  181. <!doctype html>
  182. <html lang="en">
  183. <head>
  184. <meta charset="utf-8">
  185. <meta name="author" content="Henrique F. T. Paone">
  186. <link rel="stylesheet" href="stylesheet.css">
  187. <title>$title</title>
  188. </head>
  189. <body>
  190. <header>
  191. <nav>
  192. </nav>
  193. </header>
  194. <main>
  195. <header>
  196. <p>$date</p>
  197. $toc
  198. </header>$contents
  199. </main>
  200. </body>
  201. </html>
  202. TEMPLATE
  203. }
  204. #-------------------------------------------------------------------------------
  205. # Generates the index page
  206. #
  207. # The index page is a site map with all of the written posts organized by themes,
  208. # which are derived from the directory structure of the './posts' folder. The only
  209. # part that is not generate is the beginning of the page, that is, the h1 tag and
  210. # whatever preamble, if any, that should come before the map.
  211. #
  212. # To generate the main contents, which we call index, we traverse the posts
  213. # folder, building a parent-to-children relational hash map. Then we traverse this
  214. # map recursively with the local 'walk' function to partially assemble the index.
  215. # Then we add the missing ul tags that should surround the li tags.
  216. #-------------------------------------------------------------------------------
  217. sub mkindex {
  218. my $domain = shift;
  219. # TODO: Specify the posts directory via command-line flag
  220. my $posts_dir = 'posts';
  221. my @index;
  222. # Get all the posts and put them in a directory to children hash
  223. my %dirs_to_files;
  224. my $root;
  225. find({
  226. wanted => sub {
  227. my $dir = decode_utf8(basename($File::Find::dir));
  228. $_ = decode_utf8($_);
  229. return if $_ eq '.' or $dir eq 'posts';
  230. $root = $dir unless ($root);
  231. push(@{$dirs_to_files{$dir}}, $_);
  232. },
  233. }, "./$posts_dir");
  234. # Traverse the dirs_to_files hash, adding the 'h' and 'li' tags
  235. my $level = 2;
  236. local *walk = sub {
  237. my $parent = shift;
  238. my $once = 1;
  239. my @children = @{$dirs_to_files{$parent}};
  240. for (0..$#children) {
  241. my $child = $children[$_];
  242. push(@index, "<h$level>".ucfirst($parent)."</h$level>\n"), $once = 0 if ($once);
  243. if ($dirs_to_files{$child}) {
  244. $level++;
  245. walk($child);
  246. $level--;
  247. } else {
  248. $child =~ s/.post//;
  249. my $item = $child;
  250. $item = $child =~
  251. s/.html//r =~
  252. s/_|-/ /gr =~
  253. # capitalize every word
  254. s/([\w']+)/\u\L$1/gr;
  255. push(@index, "\t<li><a href=\"$domain/$child\">$item</a></li>\n");
  256. }
  257. }
  258. };
  259. walk($root);
  260. # Surround the 'li' tags with 'ul' tags
  261. my @index_with_ul;
  262. my $li_found = 0;
  263. foreach (@index) {
  264. if ($li_found) {
  265. if (/<h[1-6]>/) {
  266. push(@index_with_ul, "</ul>\n");
  267. $li_found = 0;
  268. }
  269. } elsif (/<li>/) {
  270. push(@index_with_ul, "<ul>\n");
  271. $li_found = 1;
  272. }
  273. push(@index_with_ul, $_);
  274. }
  275. # Add remaining 'ul' tag if needed
  276. push(@index_with_ul, "</ul>\n") if $li_found;
  277. return @index_with_ul;
  278. }
  279. # Parse command line options
  280. my $domain = "https://localhost";
  281. my $port = '';
  282. my $date = '';
  283. my $toc_only = '';
  284. my $md = 0;
  285. GetOptions(
  286. 'd|domain=s' => \$domain,
  287. 'p|port=s' => \$port,
  288. 'D|date=s' => \$date,
  289. 't|toc-only' => \$toc_only,
  290. 'm|markdown' => \$md
  291. );
  292. $domain = "$domain:$port" if $port;
  293. # Get the file's contents by opening it or via STDIN
  294. my @lines;
  295. my $file = $ARGV[0];
  296. if ($file) {
  297. open(my $contents, "<", $file) or die "Could not open $_ $!\n";
  298. push(@lines, <$contents>);
  299. } elsif (-p STDIN){
  300. $file = "Reading input file's contents from STDIN";
  301. push(@lines, $_) foreach (<STDIN>);
  302. } else {
  303. die 'No input supplied.';
  304. }
  305. @lines = collapse_headings(@lines);
  306. # Check if we are generating the index
  307. my ($is_index, $idx) = (0) x 2;
  308. foreach (@lines) {
  309. if (/^<index\/>$/) {
  310. $is_index = 1;
  311. splice(@lines, $idx, 1);
  312. last;
  313. }
  314. $idx++;
  315. }
  316. push(@lines, "\n", mkindex($domain)) if $is_index;
  317. my ($toc, $title, $contents) = gentoc($file, $md, @lines);
  318. if ($toc_only) {
  319. print "$toc\n";
  320. exit 0;
  321. }
  322. print $md ? $contents : mkpage($toc, $title, $contents, $date);