123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271 |
- #!/usr/bin/env perl
- =begin comment
- Automatically generate certain parts of the website (the index, page
- containing all of the posts, etc) and add boiler plate to other written posts
- like TOC generation, general metadata and the navigation menu.
- TODO:
- - Add proper error handling
- =end comment
- =cut
- use v5.40;
- use open qw<:std :encoding(UTF-8)>;
- use Getopt::Long;
- use File::Find;
- use File::Basename;
- use Time::Piece;
- =begin comment
- Since the '<ol>' element can't handle proper subsection numbering without
- help from CSS, and we want to support text-only browsers as much as we can,
- we need to generate the TOC already with the section numbering. This can be
- done by deriving the outline from the order of appearance of the heading
- elements which must conform with the HTML specification. Which is:
- A HTML document can't have it's headings nested more than a single step at a
- time, but when moving out of the tree, it's jump can be of any lenght up to
- heading 1. So, as we move through the headings, we compare each level with
- previous one, while keeping track of the subsections numbering inside an
- array. And, since all headings are children from the '<h1>' element (which
- also usually — and in my documents, always — serves as the title), we skip
- adding it to the TOC.
- Also, so that the TOC can reference the sections, we add IDs to the headings
- and return them as a modified array.
- =end comment
- =cut
- # Generates the TOC for the document.
- # $1: [string] - Array containing each line of the document
- # returns: table of contents, title, document with ids added to the headings
- sub gentoc {
- my $file = shift;
- my @lines = @_;
-
- local *err_msg = sub { return "$file: Improper heading at line @_"; };
-
- my $last_level = 0;
- my @counters;
- my $toc = '';
- my $title;
-
- # Assemble the outline.
- # TODO: Catch unclosed heading tags
- for my $i (0..$#lines) {
- # Skip lines without headings
- next unless $lines[$i] =~ /\s*<h(\d)>(.*)<\/h[\d]>/;
- my $level = $1;
- my $heading = $2;
-
- die err_msg($i) if $level > 6;
-
- # Don't add the <h1> heading to the TOC, but set it as the title
- if ($last_level == 0) {
- if ($level == 1) {
- $title = $heading;
- $last_level = 1;
- next;
- } else {
- die err_msg($i);
- }
- }
-
- # The id is a modified heading
- my $id = $heading =~ s/'|"//gr =~ s/\s/_/gr;
- # Reassemble the line with the id
- $lines[$i] = "<h$level id=$id>$heading</h$level>";
- die err_msg($i) if $level == 1;
- if ($level > $last_level) { # start subsection
- # Nesting in steps larger than one is disallowed in the HTML spec
- die err_msg($i) if $level - $last_level != 1;
- push(@counters, 1);
- $toc .= "\n"."\t" x (@counters - 1)."<ul>\n";
- } elsif ($level < $last_level) { # end subsection
- $toc .= "</li>\n";
- # Close sections as we traverse up the tree
- for (1..$last_level - $level) {
- pop(@counters);
- $toc .= "\t" x @counters."</ul></li>\n";
- }
- $counters[-1]++ if @counters;
- } else { # same subsection
- $counters[-1]++;
- $toc .= "</li>\n";
- }
- # Add a item to the list
- $toc .= "\t" x @counters.
- "<li><a href=#$id>".
- join('.', @counters).
- ' '.$heading.'</a>';
-
- $last_level = $level;
- }
- # Close remaining sections
- for (0..$#counters) {
- pop(@counters);
- $toc .= "</li>\n";
- $toc .= "\t" x @counters."</ul>";
- }
- die "Missing <h1> heading at file: $file" unless $title;
- return ($toc, $title, join('', @lines));
- }
- # Assembles the complete page with all the passed data and metadata.
- # $1: string - Table of contents
- # $2: string - Title of the document
- # $3: string - Date the document was last modified
- # $4: [string] - Array with document's lines.
- sub mkpage {
- my ($toc, $title, $contents, $date) = @_;
- my $toc_indent = "\t" x 5;
- my $content_indent = "\t" x 3;
- # Add indentation
- $toc =~ s/^/$toc_indent/mg;
- $contents =~ s/^/$content_indent/mg;
- return <<~"TEMPLATE";
- <!doctype html>
- <html lang="en">
- <head>
- <meta charset="utf-8">
- <meta name="author" content="Henrique F. T. Paone">
- <link rel="stylesheet" href="stylesheet.css">
- <title>$title</title>
- </head>
- <body>
- <header>
- <nav>
- </nav>
- </header>
- <main>
- <header>
- <p>$date</p>
- <nav>
- $toc
- </nav>
- </header>$contents
- </main>
- </body>
- </html>
- TEMPLATE
- }
- =begin comment
- The index page is a site map with all of the written posts organized by themes,
- which are derived from the directory structure of the './posts' folder. The only
- part that is not generate is the beginning of the page, that is, the h1 tag and
- whatever preamble, if any, that should come before the map.
- To generate the main contents, which we call index, we traverse the posts
- folder, building a parent-to-children relational hash map. Then we traverse this
- map recursively with the local 'walk' function to partially assemble the index.
- Then we add the missing ul tags that should surround the li tags.
- =end comment
- =cut
- sub mkindex {
- my $domain = shift;
- # TODO: Specify the posts directory via command-line flag
- my $posts_dir = 'posts';
- my @index;
-
- # Get all the posts and put them in a directory to children hash
- my %dirs_to_files;
- my $root;
- find({
- wanted => sub {
- my $dir = basename($File::Find::dir);
- return if $_ eq '.' or $dir eq 'posts';
- $root = $dir unless ($root);
- push(@{$dirs_to_files{$dir}}, $_);
- },
- }, "./$posts_dir");
-
- # Traverse the dirs_to_files hash, adding the 'h' and 'li' tags
- my $level = 2;
- local *walk = sub {
- my $parent = shift;
- my $once = 1;
- my @children = @{$dirs_to_files{$parent}};
- for (0..$#children) {
- my $child = $children[$_];
- push(@index, "<h$level>".ucfirst($parent)."</h$level>\n"), $once = 0 if ($once);
- if ($dirs_to_files{$child}) {
- $level++;
- walk($child);
- $level--;
- } else {
- $child =~ s/.post//;
- my $item = $child;
- $item = $child =~
- # remove '.post.html'
- s/.html//r =~
- # replace '_' and '-' with spaces and
- s/_|-/ /gr =~
- # capitalize every word
- s/([\w']+)/\u\L$1/gr;
- push(@index, "\t<li><a href=\"$domain/$child\">$item</a></li>\n");
- }
- }
- };
- walk($root);
-
- # Surround the 'li' tags with 'ul' tags
- my @index_with_ul;
- my $li_found = 0;
- foreach (@index) {
- if ($li_found) {
- if (/<h[1-6]>/) {
- push(@index_with_ul, "</ul>\n");
- $li_found = 0;
- }
- } elsif (/<li>/) {
- push(@index_with_ul, "<ul>\n");
- $li_found = 1;
- }
- push(@index_with_ul, $_);
- }
- # Add remaining 'ul' tag if needed
- push(@index_with_ul, "</ul>\n") if $li_found;
- return @index_with_ul;
- }
- # Parse command line options
- my $domain = "https://localhost:8000";
- GetOptions('d|domain:s' => \$domain);
- # Open the file and put each line in an array
- my @lines;
- my $file = $ARGV[0];
- open(my $contents, "<", $file) or die "Could not open $_ $!\n";
- push(@lines, <$contents>);
- # Check if we are generating the index
- my ($is_index, $idx) = (0) x 2;
- foreach (@lines) {
- if (/^<index\/>$/) {
- $is_index = 1;
- splice(@lines, $idx, 1);
- last;
- }
- $idx++;
- }
- my $page;
- my $t = localtime((stat $file)[9]);
- my $date = sprintf("%s %s, %s", $t->fullmonth, $t->mday, $t->year);
- if ($is_index) { # create index
- push(@lines, "\n", mkindex($domain));
- $page = mkpage(gentoc($file, @lines), $date);
- } else { # make post
- $page = mkpage(gentoc($file, @lines), $date);
- }
- print $page;
|