|
- #!/usr/bin/env perl
- #-------------------------------------------------------------------------------
- # Automatically generate certain parts of the website (the index, page
- # containing all of the posts, etc) and add boiler plate to other written posts
- # like TOC generation, general metadata and the navigation menu.
- #
- # TODO:
- # - Add proper error handling
- #-------------------------------------------------------------------------------
- use v5.40;
- use open qw(:std :encoding(UTF-8));
- use Unicode::UTF8 qw(decode_utf8);
- use Getopt::Long qw(:config no_ignore_case bundling);
- use File::Find;
- use File::Basename;
- use Time::Piece;
- # Collapses multiline headings into a single line
- sub collapse_headings {
- my @lines = @_;
- my @collapsed;
- my $heading;
- my $found = 0;
-
- foreach (@lines) {
- $found = 1 if /<h\d>/;
-
- if ($found) {
- $heading .= $_
- } else {
- push(@collapsed, $_);
- }
-
- if (/<\/h\d>/) {
- $heading =~ s/\n/ /;
- push(@collapsed, $heading);
- $heading = '';
- $found = 0;
- }
- }
- return @collapsed;
- }
- #-------------------------------------------------------------------------------
- # Generates the TOC for the document.
- #
- # $1: [string] - Array containing each line of the document
- # returns: table of contents, title, document with ids added to the headings
- #
- # Since the '<ol>' element can't handle proper subsection numbering without
- # help from CSS, and we want to support text-only browsers as much as we can,
- # we need to generate the TOC already with the section numbering. This can be
- # done by deriving the outline from the order of appearance of the heading
- # elements which must conform with the HTML specification. Which is:
- #
- # A HTML document can't have it's headings nested more than a single step at a
- # time, but when moving out of the tree, it's jump can be of any lenght up to
- # heading 1. So, as we move through the headings, we compare each level with
- # previous one, while keeping track of the subsections numbering inside an
- # array. And, since all headings are children from the '<h1>' element (which
- # also usually — and in my documents, always — serves as the title), we skip
- # adding it to the TOC.
- #
- # Also, so that the TOC can reference the sections, we add IDs to the headings
- # and return them as a modified array.
- #-------------------------------------------------------------------------------
- sub gentoc {
- my $file = shift;
- my $md = shift;
- my @lines = @_;
-
- local *err_msg = sub { return "$file: Improper heading at line @_"; };
-
- my @counters;
- my $title;
- my $toc;
- my @md_idxs;
- my $md_title_idx;
- my $last_level = 0;
-
- my $heading_regex = '\s*<h(\d)>(.*)<\/h[\d]';
- if ($md) {
- $heading_regex = '\s*(#+)\s(.*)'
- }
- # Assemble the outline.
- # TODO: Catch unclosed heading tags
- for my $i (0..$#lines) {
- # Skip lines without headings
- next unless $lines[$i] =~ /$heading_regex/;
- my $level = $1;
- my $heading = $2;
- # Count the '#' if parsing markdown
- $level = ($level =~ tr/#//) if $md;
-
- die err_msg($i) if $level > 6;
-
- # Don't add the <h1> heading to the TOC, but set it as the title
- if ($last_level == 0) {
- if ($level == 1) {
- $md_title_idx = $i if $md;
- $title = $heading;
- $last_level = 1;
- next;
- } else {
- die err_msg($i);
- }
- }
-
- # The id is a modified heading
- my $id = $heading =~ s/'|"//gr =~ s/\s/-/gr;
- # Reassemble the line with the id
- if ($md) {
- push(@md_idxs, $i);
- } else {
- $lines[$i] = "<h$level id=$id>$heading</h$level>";
- }
- die err_msg($i) if $level == 1;
- # Assemble the TOC as we travers through the headings
- # Later, if needed, we convert it to Markdown.
- if ($level > $last_level) { # start subsection
- # Nesting in steps larger than one is disallowed in the HTML spec
- die err_msg($i) if $level - $last_level != 1;
- push(@counters, 1);
- $toc .= "<ul>\n" unless $md;
- } elsif ($level < $last_level) { # end subsection
- $toc .= "</li>\n";
- # Close sections as we traverse up the tree
- for (1..$last_level - $level) {
- pop(@counters);
- $toc .= "\t" x @counters."</ul></li>\n" unless $md;
- }
- $counters[-1]++ if @counters;
- } else { # same subsection
- $counters[-1]++;
- $toc .= "</li>\n" unless $md;
- }
-
- # Add a item to the list
- my $section = join('.', @counters);
- my $indent = "\t" x ($md ? @counters - 1 : @counters);
- if ($md) {
- # Add a Markdown item
- $toc .= $indent."[$section $heading](#$id)\n";
- } else {
- # Add an HTML item
- $toc .= $indent."<li><a href=#$id>$section $heading</a>";
- }
-
- $last_level = $level;
- }
- # Close remaining sections
- if ($md) {
- # Add anchors with IDs above each heading
- my $shift = 0;
- foreach (@md_idxs) {
- $_ += $shift++;
- $lines[$_] =~ /#+\s(.*)/;
- my $id = $1 =~ s/\s/-/gr;
- splice(@lines, $_, 0, "<a id=\"$id\"></a>\n");
- }
- # Add TOC after the first h1 element
- splice(@lines, $md_title_idx + 1, 0, "\n$toc");
- chomp $toc;
- } else {
- for (0..$#counters) {
- pop(@counters);
- $toc .= "</li>\n";
- $toc .= "\t" x @counters."</ul>";
- }
- }
- die "Missing <h1> heading at file: $file" unless $title;
- return ($toc, $title, join('', @lines));
- }
- # Assembles the complete page with all the passed data and metadata.
- # $1: string - Table of contents
- # $2: string - Title of the document
- # $3: string - Date the document was last modified
- # $4: [string] - Array with document's lines.
- sub mkpage {
- my ($toc, $title, $contents, $date) = @_;
- my $toc_indent = "\t" x 5;
- my $content_indent = "\t" x 3;
- # Document may have not TOC
- if ($toc) {
- # Add indent
- $toc =~ s/^/$toc_indent/mg;
- $toc = "<nav>\n$toc\n</nav>";
- } else {
- $toc = '';
- }
- # Indent the contents
- $contents =~ s/^/$content_indent/mg;
- return <<~"TEMPLATE";
- <!doctype html>
- <html lang="en">
- <head>
- <meta charset="utf-8">
- <meta name="author" content="Henrique F. T. Paone">
- <link rel="stylesheet" href="stylesheet.css">
- <title>$title</title>
- </head>
- <body>
- <header>
- <nav>
- </nav>
- </header>
- <main>
- <header>
- <p>$date</p>
- $toc
- </header>$contents
- </main>
- </body>
- </html>
- TEMPLATE
- }
- #-------------------------------------------------------------------------------
- # Generates the index page
- #
- # The index page is a site map with all of the written posts organized by themes,
- # which are derived from the directory structure of the './posts' folder. The only
- # part that is not generate is the beginning of the page, that is, the h1 tag and
- # whatever preamble, if any, that should come before the map.
- #
- # To generate the main contents, which we call index, we traverse the posts
- # folder, building a parent-to-children relational hash map. Then we traverse this
- # map recursively with the local 'walk' function to partially assemble the index.
- # Then we add the missing ul tags that should surround the li tags.
- #-------------------------------------------------------------------------------
- sub mkindex {
- my $domain = shift;
- # TODO: Specify the posts directory via command-line flag
- my $posts_dir = 'posts';
- my @index;
-
- # Get all the posts and put them in a directory to children hash
- my %dirs_to_files;
- my $root;
- find({
- wanted => sub {
- my $dir = decode_utf8(basename($File::Find::dir));
- $_ = decode_utf8($_);
- return if $_ eq '.' or $dir eq 'posts';
- $root = $dir unless ($root);
- push(@{$dirs_to_files{$dir}}, $_);
- },
- }, "./$posts_dir");
- # Traverse the dirs_to_files hash, adding the 'h' and 'li' tags
- my $level = 2;
- local *walk = sub {
- my $parent = shift;
- my $once = 1;
- my @children = @{$dirs_to_files{$parent}};
- for (0..$#children) {
- my $child = $children[$_];
- push(@index, "<h$level>".ucfirst($parent)."</h$level>\n"), $once = 0 if ($once);
- if ($dirs_to_files{$child}) {
- $level++;
- walk($child);
- $level--;
- } else {
- $child =~ s/.post//;
- my $item = $child;
- $item = $child =~
- s/.html//r =~
- s/_|-/ /gr =~
- # capitalize every word
- s/([\w']+)/\u\L$1/gr;
- push(@index, "\t<li><a href=\"$domain/$child\">$item</a></li>\n");
- }
- }
- };
- walk($root);
-
- # Surround the 'li' tags with 'ul' tags
- my @index_with_ul;
- my $li_found = 0;
- foreach (@index) {
- if ($li_found) {
- if (/<h[1-6]>/) {
- push(@index_with_ul, "</ul>\n");
- $li_found = 0;
- }
- } elsif (/<li>/) {
- push(@index_with_ul, "<ul>\n");
- $li_found = 1;
- }
- push(@index_with_ul, $_);
- }
- # Add remaining 'ul' tag if needed
- push(@index_with_ul, "</ul>\n") if $li_found;
- return @index_with_ul;
- }
- # Parse command line options
- my $domain = "https://localhost";
- my $port = '';
- my $date = '';
- my $toc_only = '';
- my $md = 0;
- GetOptions(
- 'd|domain=s' => \$domain,
- 'p|port=s' => \$port,
- 'D|date=s' => \$date,
- 't|toc-only' => \$toc_only,
- 'm|markdown' => \$md
- );
- $domain = "$domain:$port" if $port;
- # Get the file's contents by opening it or via STDIN
- my @lines;
- my $file = $ARGV[0];
- if ($file) {
- open(my $contents, "<", $file) or die "Could not open $_ $!\n";
- push(@lines, <$contents>);
- } elsif (-p STDIN){
- $file = "Reading input file's contents from STDIN";
- push(@lines, $_) foreach (<STDIN>);
- } else {
- die 'No input supplied.';
- }
- @lines = collapse_headings(@lines);
- # Check if we are generating the index
- my ($is_index, $idx) = (0) x 2;
- foreach (@lines) {
- if (/^<index\/>$/) {
- $is_index = 1;
- splice(@lines, $idx, 1);
- last;
- }
- $idx++;
- }
- push(@lines, "\n", mkindex($domain)) if $is_index;
- my ($toc, $title, $contents) = gentoc($file, $md, @lines);
- if ($toc_only) {
- print "$toc\n";
- exit 0;
- }
- print $md ? $contents : mkpage($toc, $title, $contents, $date);
|