mkpage.pl 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. #!/usr/bin/env perl
  2. =begin comment
  3. Automatically generate certain parts of the website (the index, page
  4. containing all of the posts, etc) and add boiler plate to other written posts
  5. like TOC generation, general metadata and the navigation menu.
  6. TODO:
  7. - Add proper error handling
  8. =end comment
  9. =cut
  10. use v5.40;
  11. use open qw<:std :encoding(UTF-8)>;
  12. use Getopt::Long;
  13. use File::Find;
  14. use File::Basename;
  15. use Time::Piece;
  16. =begin comment
  17. Since the '<ol>' element can't handle proper subsection numbering without
  18. help from CSS, and we want to support text-only browsers as much as we can,
  19. we need to generate the TOC already with the section numbering. This can be
  20. done by deriving the outline from the order of appearance of the heading
  21. elements which must conform with the HTML specification. Which is:
  22. A HTML document can't have it's headings nested more than a single step at a
  23. time, but when moving out of the tree, it's jump can be of any lenght up to
  24. heading 1. So, as we move through the headings, we compare each level with
  25. previous one, while keeping track of the subsections numbering inside an
  26. array. And, since all headings are children from the '<h1>' element (which
  27. also usually — and in my documents, always — serves as the title), we skip
  28. adding it to the TOC.
  29. Also, so that the TOC can reference the sections, we add IDs to the headings
  30. and return them as a modified array.
  31. =end comment
  32. =cut
  33. # Generates the TOC for the document.
  34. # $1: [string] - Array containing each line of the document
  35. # returns: table of contents, title, document with ids added to the headings
  36. sub gentoc {
  37. my $file = shift;
  38. my @lines = @_;
  39. local *err_msg = sub { return "$file: Improper heading at line @_"; };
  40. my $last_level = 0;
  41. my @counters;
  42. my $toc = '';
  43. my $title;
  44. # Assemble the outline.
  45. # TODO: Catch unclosed heading tags
  46. for my $i (0..$#lines) {
  47. # Skip lines without headings
  48. next unless $lines[$i] =~ /\s*<h(\d)>(.*)<\/h[\d]>/;
  49. my $level = $1;
  50. my $heading = $2;
  51. die err_msg($i) if $level > 6;
  52. # Don't add the <h1> heading to the TOC, but set it as the title
  53. if ($last_level == 0) {
  54. if ($level == 1) {
  55. $title = $heading;
  56. $last_level = 1;
  57. next;
  58. } else {
  59. die err_msg($i);
  60. }
  61. }
  62. # The id is a modified heading
  63. my $id = $heading =~ s/'|"//gr =~ s/\s/_/gr;
  64. # Reassemble the line with the id
  65. $lines[$i] = "<h$level id=$id>$heading</h$level>";
  66. die err_msg($i) if $level == 1;
  67. if ($level > $last_level) { # start subsection
  68. # Nesting in steps larger than one is disallowed in the HTML spec
  69. die err_msg($i) if $level - $last_level != 1;
  70. push(@counters, 1);
  71. $toc .= "\n"."\t" x (@counters - 1)."<ul>\n";
  72. } elsif ($level < $last_level) { # end subsection
  73. $toc .= "</li>\n";
  74. # Close sections as we traverse up the tree
  75. for (1..$last_level - $level) {
  76. pop(@counters);
  77. $toc .= "\t" x @counters."</ul></li>\n";
  78. }
  79. $counters[-1]++ if @counters;
  80. } else { # same subsection
  81. $counters[-1]++;
  82. $toc .= "</li>\n";
  83. }
  84. # Add a item to the list
  85. $toc .= "\t" x @counters.
  86. "<li><a href=#$id>".
  87. join('.', @counters).
  88. ' '.$heading.'</a>';
  89. $last_level = $level;
  90. }
  91. # Close remaining sections
  92. for (0..$#counters) {
  93. pop(@counters);
  94. $toc .= "</li>\n";
  95. $toc .= "\t" x @counters."</ul>";
  96. }
  97. die "Missing <h1> heading at file: $file" unless $title;
  98. return ($toc, $title, join('', @lines));
  99. }
  100. # Assembles the complete page with all the passed data and metadata.
  101. # $1: string - Table of contents
  102. # $2: string - Title of the document
  103. # $3: string - Date the document was last modified
  104. # $4: [string] - Array with document's lines.
  105. sub mkpage {
  106. my ($toc, $title, $contents, $date) = @_;
  107. my $toc_indent = "\t" x 5;
  108. my $content_indent = "\t" x 3;
  109. # Add indentation
  110. $toc =~ s/^/$toc_indent/mg;
  111. $contents =~ s/^/$content_indent/mg;
  112. return <<~"TEMPLATE";
  113. <!doctype html>
  114. <html lang="en">
  115. <head>
  116. <meta charset="utf-8">
  117. <meta name="author" content="Henrique F. T. Paone">
  118. <link rel="stylesheet" href="stylesheet.css">
  119. <title>$title</title>
  120. </head>
  121. <body>
  122. <header>
  123. <nav>
  124. </nav>
  125. </header>
  126. <main>
  127. <header>
  128. <p>$date</p>
  129. <nav>
  130. $toc
  131. </nav>
  132. </header>$contents
  133. </main>
  134. </body>
  135. </html>
  136. TEMPLATE
  137. }
  138. =begin comment
  139. The index page is a site map with all of the written posts organized by themes,
  140. which are derived from the directory structure of the './posts' folder. The only
  141. part that is not generate is the beginning of the page, that is, the h1 tag and
  142. whatever preamble, if any, that should come before the map.
  143. To generate the main contents, which we call index, we traverse the posts
  144. folder, building a parent-to-children relational hash map. Then we traverse this
  145. map recursively with the local 'walk' function to partially assemble the index.
  146. Then we add the missing ul tags that should surround the li tags.
  147. =end comment
  148. =cut
  149. sub mkindex {
  150. my $domain = shift;
  151. # TODO: Specify the posts directory via command-line flag
  152. my $posts_dir = 'posts';
  153. my @index;
  154. # Get all the posts and put them in a directory to children hash
  155. my %dirs_to_files;
  156. my $root;
  157. find({
  158. wanted => sub {
  159. my $dir = basename($File::Find::dir);
  160. return if $_ eq '.' or $dir eq 'posts';
  161. $root = $dir unless ($root);
  162. push(@{$dirs_to_files{$dir}}, $_);
  163. },
  164. }, "./$posts_dir");
  165. # Traverse the dirs_to_files hash, adding the 'h' and 'li' tags
  166. my $level = 2;
  167. local *walk = sub {
  168. my $parent = shift;
  169. my $once = 1;
  170. my @children = @{$dirs_to_files{$parent}};
  171. for (0..$#children) {
  172. my $child = $children[$_];
  173. push(@index, "<h$level>".ucfirst($parent)."</h$level>\n"), $once = 0 if ($once);
  174. if ($dirs_to_files{$child}) {
  175. $level++;
  176. walk($child);
  177. $level--;
  178. } else {
  179. $child =~ s/.post//;
  180. my $item = $child;
  181. $item = $child =~
  182. # remove '.post.html'
  183. s/.html//r =~
  184. # replace '_' and '-' with spaces and
  185. s/_|-/ /gr =~
  186. # capitalize every word
  187. s/([\w']+)/\u\L$1/gr;
  188. push(@index, "\t<li><a href=\"$domain/$child\">$item</a></li>\n");
  189. }
  190. }
  191. };
  192. walk($root);
  193. # Surround the 'li' tags with 'ul' tags
  194. my @index_with_ul;
  195. my $li_found = 0;
  196. foreach (@index) {
  197. if ($li_found) {
  198. if (/<h[1-6]>/) {
  199. push(@index_with_ul, "</ul>\n");
  200. $li_found = 0;
  201. }
  202. } elsif (/<li>/) {
  203. push(@index_with_ul, "<ul>\n");
  204. $li_found = 1;
  205. }
  206. push(@index_with_ul, $_);
  207. }
  208. # Add remaining 'ul' tag if needed
  209. push(@index_with_ul, "</ul>\n") if $li_found;
  210. return @index_with_ul;
  211. }
  212. # Parse command line options
  213. my $domain = "https://localhost:8000";
  214. GetOptions('d|domain:s' => \$domain);
  215. # Open the file and put each line in an array
  216. my @lines;
  217. my $file = $ARGV[0];
  218. open(my $contents, "<", $file) or die "Could not open $_ $!\n";
  219. push(@lines, <$contents>);
  220. # Check if we are generating the index
  221. my ($is_index, $idx) = (0) x 2;
  222. foreach (@lines) {
  223. if (/^<index\/>$/) {
  224. $is_index = 1;
  225. splice(@lines, $idx, 1);
  226. last;
  227. }
  228. $idx++;
  229. }
  230. my $page;
  231. my $t = localtime((stat $file)[9]);
  232. my $date = sprintf("%s %s, %s", $t->fullmonth, $t->mday, $t->year);
  233. if ($is_index) { # create index
  234. push(@lines, "\n", mkindex($domain));
  235. $page = mkpage(gentoc($file, @lines), $date);
  236. } else { # make post
  237. $page = mkpage(gentoc($file, @lines), $date);
  238. }
  239. print $page;