#!/usr/bin/perl -w

our $ppversion = "1.1.1";

########################################################################
#
# portspage
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# This is a script for generating CRUX port listings.
# Distributed under the terms of the GPL license.
#
# Changelog:
# 1.1.1
#   - Allow user to request Markdown output instead of HTML
# 1.1
#   - Limit recursion to depth 1 when searching for Pkgfiles
#   - Treat additional args as ports to be inserted into an existing index
#   - Read the stylesheet from an external file
# 1.0.5
#   - Added a table row containing the signify public key
# 1.0.4
#   - Added --date-from-pkgfile (patch from Mikhail Kolesnik)
# 1.0.3
#   - Fixed a problem with tabs in Pkgfile
# 1.0.2
#   - Might as well make it XHTML 1.1
# 1.0.1
#   - Output is now valid XHTML 1.0 Strict
#
########################################################################

use strict;
use Cwd qw(cwd getcwd);

our %options =
(
  title => "CRUX ports",
  timestamp_accuracy => 1,
  date_from_file => 0,
  o_fmt => "HTML"
);
our $stylepage = "/usr/share/portspage/style.html";
our @updates; our @ports; our %printed;
our %parity = ( 0 => "even", 1 => "odd" );
if ($0 =~ /-md$/) {
  $options{o_fmt} = "markdown";
}

sub print_usage {
  my $ok = shift;
  print STDERR <<EOT;
Usage: portspage [OPTION]... [DIRECTORY [port1...portN]]

  --title=TITLE               set the page title
  --header=FILE               name of file to insert before port listing
  --footer=FILE               name of file to insert after port listing
  --timestamp-accuracy=LEVEL  0 = no timestamp, 1 = date only, 2 = date and time
                              default is 1
  --date-from-file            take date from newest file instead of directory
  --date-from-pkgfile         take date from Pkgfile instead of directory
  --version                   output version information and exit
  [DIRECTORY]                 specify a collection other than \$PWD
  [port1...portN]             individual ports to overwrite (or insert)
                              in an existing index

Report bugs to <jmcquah\@disroot dot org>.
EOT
  exit $ok;
}

sub parse_args {
  while (my $arg=shift @ARGV) {
    if ($arg =~ /^--header=(.*)$/) {
      $options{header} = $1;
    }
    elsif ($arg =~ /^--footer=(.*)$/) {
      $options{footer} = $1;
    }
    elsif ($arg =~ /^--title=(.*)$/) {
      $options{title} = $1;
    }
    elsif ($arg =~ /^--timestamp-accuracy=(0|1|2)$/) {
      $options{timestamp_accuracy} = $1;
    }
    elsif ($arg eq "--date-from-file") {
      $options{date_from_file} = 1;
    }
    elsif ($arg eq "--date-from-pkgfile") {
      $options{date_from_pkgfile} = 1;
    }
    elsif ($arg eq "--version") {
      print "$ppversion\n";
      exit 0;
    }
    elsif ($arg eq "--help") {
      print_usage(0);
    }
    elsif (! $options{directory}) {
      (-d $arg) or print_usage(1);
      $options{directory} = $arg;
    }
    else {
      push @updates, $arg;
    }
  }
}

sub main {
  parse_args();

  # save the old working directory, in case quick updates to an existing index are requested
  # (old index is assumed to be in the directory where portspage was called).
  my $old_pwd = getcwd();

  (! $options{directory}) or chdir $options{directory} 
	  or die "Cannot chdir to $options{directory}! Check your user permissions.";

  if (@updates) { # individual ports passed as args.
                  # Discard any that are invalid.
      foreach my $port (@updates) {
        if (-f "$port/Pkgfile") {
          push @ports, $port;
        } else {
          print STDERR "$options{directory}/$port is not a valid port, ignoring.\n";
        }
      }
  } else {
      foreach my $file (glob("*/Pkgfile")) {
        my $port = (split /\//, $file)[-2];
        push @ports, $port;
      }
  }

  if ($options{o_fmt} eq "markdown") {
      print "# $options{title}\n\n";
  } else {
      open(my $fS, $stylepage) or die "style page missing! please reinstall portspage.";
      while (<$fS>) {
         if (m/<(title|h[1-3])>/) {
             s/(title|h[1-3])>[^<]*</$1>$options{title}</;
         }
         print;
      }
      close($fS);
  }

  if ($options{header}) { # Try the path requested by the user
    open(my $hH, $options{header}) or die "Couldn't open header file";
    while (<$hH>) {
      print "  " . $_;
    }
    close($hH);
  }

  my $count = 0;
  my $firstrun = 0;
  if (@updates) { # when an existing index only needs a quick update
    my @queue = sort @ports;
    my %followI; my $oI; my $col_checked=0; my $oline; my $oname; my $fname; my @oldIdx;
    if ($options{o_fmt} eq "HTML") {
        @oldIdx = glob("$old_pwd/index.htm*");
    } else {
	@oldIdx = glob("$old_pwd/README*.md");
    }
    if ($#oldIdx >= 0) {
        # check how many columns the existing index has, and modify our options accordingly
	# (overriding the command-line option if the old index cannot accommodate that column)
        open ($oI, $oldIdx[0]);
        while (($options{timestamp_accuracy}>0) and ($col_checked==0) and ($oline = <$oI>)) {
            if (($options{o_fmt} eq "HTML") and ($oline =~ m/class="header".*Port/)) {
                ($oline =~ m/Last modified/) or $options{timestamp_accuracy} = 0;
                $col_checked = 1;
	    }
	    if (($options{o_fmt} eq "markdown") and ($oline =~ m/^\| Port \| Version \|/)) {
                ($oline =~ m/Last modified/) or $options{timestamp_accuracy} = 0;
                $col_checked = 1;
	    }
        }
    } else {
        $firstrun = 1;
    }
    tablehead();

    HROW: while (my $p = shift @queue) {
        if ($firstrun == 1) {
            $count = printrow($count,$p);
            next HROW;
        }
        # Shift entries from the old index until we find a successor to the current arg
        while ( (! $followI{$p}) and ($oline=<$oI>) ) {
            chomp($oline);
            next if (($options{o_fmt} eq "HTML") and ($oline !~ m/^[[:space:]]*<tr class="(odd|even)"/));
	    next if (($options{o_fmt} eq "markdown") and ($oline =~ m/^\|[-:\s]+\|/));
	    next if (($options{o_fmt} eq "markdown") and ($oline !~ m/^\|/));
	    # Arriving here means the current row of the table must refer to a port.
	    # Let's extract the name with a pattern match.
            $oname = $oline;
            if ($options{o_fmt} eq "HTML") { 
	        $oname =~ s/.*a href="(http|https|ftp):[^>]*">([^<]*)<.*/$2/ ;
            } else {
		$oname =~ s/^..\[([^\]]+)\].*$/$1/ ;
	    }
            if ($oname lt $p) { 
                $count++;
                $oline =~ s/class="(even|odd)"/class="$parity{($count % 2)}"/ if ($options{o_fmt} eq "HTML");
                if ($options{timestamp_accuracy}==0) {
                    ($options{o_fmt} eq "HTML") ?
                      $oline =~ s/<td>[0-9]{4}-[0-1][0-9]-[0-3][0-9].*<\/td>// :
                      $oline =~ s/\| [0-9]{4}-[0-1][0-9]-[0-3][0-9].*\|/\|/ ;
                }
                print "$oline\n";
            } elsif ($oname eq $p) {
                $count = printrow($count, $p);
            } else {
		$count = printrow($count, $p);
                $followI{$p} = "$oline\n";
	    }
            # Before breaking out of the loop, append all the packages from the queue that 
            # are lexographically earlier than the next entry in the old index.
	    # In the event of equality, the command-line arg takes precedence.
            while (($queue[0]) and ($queue[0] le $oname)) {
                $p = shift @queue;
                $count = printrow($count, $p);
                $followI{$p} = "$oline\n" if ($p lt $oname);
            }
	    if ($followI{$p}) {
                $count++;
		$followI{$p} =~ s/class="(even|odd)"/class="$parity{($count % 2)}"/ if ($options{o_fmt} eq "HTML");
		print $followI{$p};
	    }
        }
        # Either the old index had a successor to the current arg, or all remaining args
        # should be appended at the end of the table.
        if (! $followI{$p}) {
	    $count = printrow($count, $p);
            while ($p = shift @queue) {
                $count = printrow($count, $p);
            }
        }
        # Args still remaining in the queue means that the old index hasn't been exhausted.
        # Decide whether to:
        # - print the next row of the old index.
        # - save it in the followI array where it will be printed later.
	# If the old index has greater timestamp accuracy than requested for this run, 
	# remember to delete that column before printing.
        if (@queue) {
            $fname = $followI{$p};
            ($options{o_fmt} eq "HTML") ?
	        $fname =~ s/.*a href="(http|https|ftp):[^>]*">([^<]*)<.*/$2/ :
	        $fname =~ s/^..\[([^\]]+)\].*$/$1/ ;
            if ($queue[0] gt $fname) {
                $followI{$p} =~ s/class="(even|odd)"/class="$parity{($count % 2)}"/
		    if ($options{o_fmt} eq "HTML");
                if ($options{timestamp_accuracy}==0) {
                    $followI{$p} =~ ($options{o_fmt} eq "HTML") ?
		        s/<td>[0-9]{4}-[0-1][0-9]-[0-3][0-9].*<\/td>// :
                        s/\| [0-9]{4}-[0-1][0-9]-[0-3][0-9].*\|/\|/ ;
                }
            } else {
                $followI{$queue[0]} = $followI{$p};
            }
        }
        # Shift another port from the queue
    }
    # Now append the tail of the old index, deleting columns if needed.
    while (($firstrun == 0) and ($oline = <$oI>)) {
        if (($options{o_fmt} eq "HTML") and ($oline =~ m/class="(even|odd)"/)) {
            $count++;
            $oline =~ s/class="(even|odd)"/class="$parity{($count % 2)}"/;
            ($options{timestamp_accuracy} > 0) or $oline =~
		     s/<td>[0-9]{4}-[0-1][0-9]-[0-3][0-9].*<\/td>//;
	    print $oline;
        }
        if (($options{o_fmt} eq "markdown") and ($oline =~ m/^\| /)) {
            $count++;
            ($options{timestamp_accuracy} > 0) or $oline =~
                     s/\| [0-9]{4}-[0-1][0-9]-[0-3][0-9].*\|/\|/ ;
            print $oline;
	}
    }
    ($firstrun == 1) or close($oI);
  }
  else { # No individual ports specified, just process the entire collection
    tablehead();
    foreach my $port (@ports) {
        $count = printrow($count, $port);
    }
  }

  # Append the footer
  if ($options{o_fmt} eq "HTML") {
      print "  </table>\n";
      print "  <p><strong>$count ports</strong></p>\n";
  } else {
      print "\n**$count ports**\n\n";
  }

  if ($options{footer}) { # Try the path requested by the user
      open(my $fH, $options{footer}) or die "Couldn't open footer file";
      while (<$fH>) {
          print "  " . $_;
      }
      close($fH);
  }

  if ($options{o_fmt} eq "HTML") {
      print "  <p><em>Generated by portspage $ppversion on " . isotime() . ".</em></p>\n  </body>\n</html>";
  } else { # Markdown footer
      print "  *Generated by portspage $ppversion on " . isotime() . "*\n";
  }

  return 0;
}

sub tablehead {
    print "  <table width=\"100%\" cellspacing=\"0\">\n" if ($options{o_fmt} eq "HTML");
    my $CWD = getcwd;
    my $repo = (split /\//, $CWD)[-1];
    my $pubkey = "/etc/ports/".$repo.".pub";
    if ( (-e $pubkey) and open(my $kH, $pubkey) ) {
      while (my $line = <$kH>) {
        chomp $line;
        if ($line !~ "untrusted comment") {
          if ($options{o_fmt} eq "HTML") {
            print "  <tr class=\"header\"><td colspan=\"4\">\n";
            print "  <strong>Signify public key:</strong> $line\n";
            print "  </td></tr>\n";
          } else {
	    print "**Signify public key:** `$line`\n\n";
          }
        }
      }
      close($kH);
    }
    if ($options{o_fmt} eq "HTML") {
      print "   <tr class=\"header\"><td><strong>Port</strong></td>";
      print "<td><strong>Version</strong></td><td><strong>Description</strong></td>";
      ($options{timestamp_accuracy} == 0) or print "<td><strong>Last modified</strong></td>";
      print "</tr>\n";
    } else { # Markdown header
      print "| Port | Version | Description |";
      ($options{timestamp_accuracy} == 0) or print " Last modified |";
      print "\n|------|---------|-------------|";
      ($options{timestamp_accuracy} == 0) or print "---------------|";
      print "\n";
    }
}

sub printrow { # return an incremented count if the requested port has not been printed already
	       # and its Pkgfile is opened successfully.
    my ($count, $p) = @_;
    my ($url, $version, $release, $pver, $desc, $date);

    (! $printed{$p}) or return $count;
    open(my $pF, "$p/Pkgfile") or return $count;
    while (<$pF>) {
      if ($_ =~ /^#\s*URL:\s*(.*)$/) {
        $url = $1;
        $url =~ s/</&lt;/g;
        $url =~ s/>/&gt;/g;
        $url =~ s/&/&amp;/g;
      } elsif ($_ =~ /^#\s*Description:\s*(.*)$/) {
        $desc = $1;
      } elsif ($_ =~ /^version=(.*)$/) {
        $version = $1;
      } elsif ($_ =~ /^release=(.*)$/) {
        $release = $1;
      }
    }
    close ($pF);
    $pver = $version ."-". $release;
    if ($options{timestamp_accuracy} > 0) {
      if ($options{date_from_file}) {
        my @dates;
        foreach my $file (glob($p."/*")) {
          push (@dates, (stat($file))[9]);
        }
        $date = (sort @dates)[-1];
      } elsif ($options{date_from_pkgfile}) {
        $date = (stat("$p/Pkgfile"))[9];
      } else {
        $date = (stat("$p"))[9];
      }
    }

  $count++;  
  if ($options{o_fmt} eq "HTML") {
    print "   <tr class=\"$parity{($count % 2)}\"><td>";
    ($url) ? print "<a href=\"$url\">$p</a></td>" : print "$p</td>";
    print "<td><a href=\"$options{directory}/$p/\">$pver</a></td>";
    ($desc) ? print "<td>$desc</td>" : print "<td></td>";
    print "<td>" . isotime($date, $options{timestamp_accuracy}) . "</td>" if ($date);
    print "</tr>\n";
  } else { # generate Markdown row instead
    print "| ";
    ($url) ? print "[$p]($url) | " : print "$p | ";
    print "[$pver]($options{directory}/$p/) | ";
    print ($desc // "");
    print " | " . isotime($date, $options{timestamp_accuracy}) if ($date);
    print " |\n";
  }

  $printed{$p}=1;
  return $count;
}

sub isotime {
  my $time = (shift or time);
  my $accuracy = (shift or 2);
  my @t = gmtime ($time);
  my $year = $t[5] + 1900;
  my $month = sprintf("%02d", $t[4] + 1);
  my $day = sprintf("%02d", $t[3]);

  if ($accuracy == 1) {
    return "$year-$month-$day";
  }

  return "$year-$month-$day " . sprintf("%02d:%02d:%02d UTC", $t[2], $t[1], $t[0]);
}

exit(main());

# End of file
