#!/usr/bin/perl

use strict;
use warnings;
use Cwd ();
use POSIX ();

my %opts;
my @logs;
my $selected_log;
my $host = {};
my %months = (
    Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6,
    Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12,
);
my $now = time();
my $columns;
my $line_count = 0;

get_opts();
get_logs();
select_log();

if ($opts{action} eq "list") {
    show_list();
}
elsif ($opts{action} eq "detailed_list") {
    show_detailed_list();
}
elsif ($opts{action} eq "info") {
    show_info();
}
elsif ($opts{action} eq "stats") {
    show_stats();
}
elsif ($opts{action} eq "graph") {
    show_graph();
}
elsif ($opts{action} eq "print") {
    print "$selected_log->{pfile}\n";
}
else {
    if (!-t STDOUT) {
        system "cat", $selected_log->{pfile};
    }
    elsif ($ENV{PAGER}) {
        system "$ENV{PAGER} $selected_log->{pfile}";
    }
    else {
        system "less", "-RIMS", $selected_log->{pfile};
    }
}

sub get_rotations {
    my ($file) = @_;
    my $dir = dirname($file);
    opendir my $dh, $dir or return ();
    my $array = [];
    for my $name (readdir $dh) {
        next if $name =~ /^\.\.?$/;
        my $dfile = "$dir/$name";
        if ($dfile =~ /^\Q$file\E.(.*?)(\.gz)?$/) {
            my $r = {};
            $r->{file} = $dfile;
            $r->{name} = $1;
            push @$array, $r;
        }
    }
    closedir $dh;
    $array = natural_sort($array, "file");
    return $array;
}

# Sorts files in a way that treats numbers as numbers, so if you have
# files file1, file2, file10 it wouldn't sort it like file1, file10, file2.
sub natural_sort {
    my ($array, $key) = @_;
    my @array2;
    for my $entry (@$array) {
        my $value = $entry->{$key};
        $value =~ s/(\d+)/sprintf("%05d", $1)/ge;
        push @array2, [$value, $entry];
    }
    @array2 = sort {$a->[0] cmp $b->[0]} @array2;
    my @array3;
    for my $entry (@array2) {
        push @array3, $entry->[1];
    }
    return \@array3;
}

sub show_graph {
    my @entries;
    loop_accesses(\&add_access_to_graph, \@entries);

    my $max = 0;
    for my $entry (@entries) {
        $entry->{total} ||= 0;
        if ($entry->{total} > $max) {
            $max = $entry->{total};
        }
    }
    $columns = `tput cols`;
    my $prev;
    for my $entry (@entries) {
        show_graph_entry($entry, $prev, $max);
        $prev = $entry;
    }
}

sub add_access_to_graph {
    my ($a, $entries) = @_;
    my $entry = $entries->[-1];
    if (!$entry) {
        $entry = {};
        $entry->{start} = $a->{date};
        $entry->{count} = 1;
        push @$entries, $entry;
    }
    elsif ($a->{date} >= $entry->{start} + $opts{interval}) {
        while (1) {
            my $entry2 = {};
            $entry2->{count} = $entry->{count} + 1;
            $entry2->{start} = $entry->{start} + $opts{interval};
            $entry = $entry2;
            push @$entries, $entry;
            if ($a->{date} < $entry->{start} + $opts{interval}) {
                last;
            }
        }
    }
    if ($a->{status} =~ /^(\d)/) {
        $entry->{$1}++;
    }
    $entry->{total}++;
}

sub show_graph_entry {
    my ($entry, $prev, $max) = @_;
    my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime($entry->{start});
    $mon += 1;
    my $hour2;
    if ($hour == 0) {
        $hour2 = 12;
    }
    elsif ($hour > 12) {
        $hour2 = $hour - 12;
    }
    else {
        $hour2 = $hour;
    }
    $entry->{monday} = "$mon/$mday";
    my $offset = 16;
    if ($opts{interval} >= 24 * 60 * 60) {
        printf "%-5s: ", "$mon/$mday";
    }
    elsif ($opts{interval} >= 60 * 60) {
        if (!$prev || $prev->{monday} ne $entry->{monday}) {
            printf "%-5s %2d: ", "$mon/$mday", $hour2;
        }
        else {
            printf "      %2d: ", $hour2;
        }
    }
    else {
        $offset += 3;
        if (!$prev || $prev->{monday} ne $entry->{monday}) {
            printf "%-5s %2d:%02d: ", "$mon/$mday", $hour2, $min;
        }
        else {
            printf "      %2d:%02d: ", $hour2, $min;
        }
    }
    my $size = int(($entry->{total} / $max) * ($columns - $offset));
    my $bar = "";
    my $bar2 = "";
    if ($entry->{total} && !$size) {
        $bar2 = "$entry->{total}";
    }
    elsif ($entry->{total}) {
        $bar = "#" x $size;
        my $status = 1;
        my $sum = 0;
        my $loc = 0;
        for my $status (1 .. 5) {
            my $amount = $entry->{$status} or next;
            my $loc2 = int(($sum + $amount) / $entry->{total} * $size);
            my $size2 = $loc2 - $loc;
            if ($size2) {
                my $color = $status == 1 ? 244                            # grey
                          : $status == 2 ? 16 + (0 * 36) + (4 * 6) + (0)  # green
                          : $status == 3 ? 16 + (5 * 36) + (4 * 6) + (0)  # yellow
                          : $status == 4 ? 16 + (5 * 36) + (1 * 6) + (0)  # orange
                                         : 1;                             # red
                my $part = substr($bar, $loc, $size2);
                $bar2 .= "\e[38;5;${color}m$part";
            }
            $loc = $loc2;
            $sum += $amount;
        }
        $bar2 .= "\e[0m $entry->{total}";
    }
    print "$bar2\n";
}

sub show_stats {
    my $stats = {};
    loop_accesses(\&process_access_for_stats, $stats);

    my $file = $selected_log->{pfile};
    my $size = -s $file;
    $size = human_readable($size);
    print "$file\n";
    print "size $size\n";
    my $count = $stats->{count} || 0;
    print "$count requests\n";

    my $from = $stats->{from};
    my $fromstr = datestr($from);
    my $to = $stats->{to};
    my $tostr = datestr($to);
    my $time_diff = time_diff_str2($from, $to);
    if ($time_diff) {
        print "spanning $time_diff\n";
    }

    print "from [$fromstr] to [$tostr]\n";
    if (defined $stats->{bytes}) {
        my $bytes = human_readable($stats->{bytes});
        print "total response size $bytes\n";
    }
    if ($stats->{mintime}) {
        my $mintime = int($stats->{mintime} / 1000);
        print "min serving time $mintime ms\n";
    }
    if ($stats->{maxtime}) {
        my $maxtime = int($stats->{maxtime} / 1000);
        print "max serving time $maxtime ms\n";
    }
    if ($stats->{sumtime}) {
        $stats->{avgtime} = $stats->{sumtime} / $stats->{count};
        $stats->{stddevtime} = sqrt(($stats->{sumtime2} - (($stats->{sumtime} ** 2) / $stats->{count})) / $stats->{count});
        my $avgtime = int($stats->{avgtime} / 1000);
        my $stddevtime = int($stats->{stddevtime} / 1000);
        print "average serving time $avgtime ms ± $stddevtime ms\n";
    }
    print "\n";

    my @methods = sort {
        $stats->{methods}{$b} <=> $stats->{methods}{$a}
    } keys %{$stats->{methods}};
    print "methods:\n";
    print join " ", map "$stats->{methods}{$_} $_", @methods;
    print "\n\n";

    my @status = sort {
        $stats->{status}{$b} <=> $stats->{status}{$a}
    } keys %{$stats->{status}};
    print "status:\n";
    print join " ", map "$stats->{status}{$_} $_", @status;
    print "\n\n";

    my $locations = scalar(keys %{$stats->{locations}});
    my @locations = sort {
        $stats->{locations}{$b} <=> $stats->{locations}{$a}
    } keys %{$stats->{locations}};
    @locations = splice @locations, 0, 5;
    print "most requested locations (of $locations):\n";
    for my $loc (@locations) {
        my $count = $stats->{locations}{$loc};
        print "$count $loc\n";
    }
    print "\n";

    @locations = sort {
        $stats->{lreqtimes}{$b} <=> $stats->{lreqtimes}{$a}
    } keys %{$stats->{lreqtimes}};
    @locations = splice @locations, 0, 5;
    print "slowest request locations (in seconds):\n";
    for my $loc (@locations) {
        my $time = $stats->{lreqtimes}{$loc};
        $time = $time / (1000 * 1000);
        $time = sprintf "%.1f", $time;
        print "$time $loc\n";
    }
    print "\n";

    my @types = sort {
        $stats->{types}{$b} <=> $stats->{types}{$a}
    } keys %{$stats->{types}};
    print "types:\n";
    print join " ", map "$stats->{types}{$_} $_", @types;
    print "\n\n";

    my $ips = scalar(keys %{$stats->{ips}});
    my @ips = sort {
        $stats->{ips}{$b} <=> $stats->{ips}{$a}
    } keys %{$stats->{ips}};
    @ips = splice @ips, 0, 5;
    print "most active ips (of $ips):\n";
    print join " ", map "$stats->{ips}{$_} $_", @ips;
    print "\n";
}

sub process_access_for_stats {
    my ($a, $stats) = @_;
    $stats->{count}++;
    if (!$stats->{from}) {
        $stats->{from} = $a->{date};
    }
    my $loc = process_location($a->{loc});
    $stats->{to} = $a->{date};
    $stats->{locations}{$loc}++;
    $stats->{methods}{$a->{method}}++;
    $stats->{status}{$a->{status}}++;
    if (defined $a->{bytes}) {
        $stats->{bytes} += $a->{bytes};
    }
    $stats->{ips}{$a->{ip}}++;
    $stats->{types}{$a->{type}}++;
    if ($a->{reqtime}) {
        if (!$stats->{initmintime}) {
            $stats->{mintime} = $a->{reqtime};
            $stats->{maxtime} = $a->{reqtime};
            $stats->{initmintime} = 1;
        }
        if ($a->{reqtime} < $stats->{mintime}) {
            $stats->{mintime} = $a->{reqtime};
        }
        if ($a->{reqtime} > $stats->{maxtime}) {
            $stats->{maxtime} = $a->{reqtime};
        }
        $stats->{sumtime} += $a->{reqtime};
        $stats->{sumtime2} += $a->{reqtime} ** 2;
        my $lreqtime = $stats->{lreqtimes}{$loc};
        if (!$lreqtime || $lreqtime < $a->{reqtime}) {
            $stats->{lreqtimes}{$loc} = $a->{reqtime};
        }
    }
}

# Make locations that look like /json-api/order/123456789/purchase/card/ become
# /json-api/order/*/purchase/card/ so they can be combined for stats like
# "most requested locations" or "slowest request locations"
sub process_location {
    my ($loc) = @_;
    # this is special case for the access logs im looking at
    $loc =~ s{/(idev_magic_revision)/[^/]+/+}{/$1/*/};
    # this might be more generally applicable
    $loc =~ s/\d{4,}/*/g;
    return $loc;
}

# for dates that look like 09/Sep/2015:11:01:24 -0500
sub parse_date {
    my ($str) = @_;
    return if !$str;
    $str =~ m{(\d+) / (\w+) / (\d+) : (\d+) : (\d+) : (\d+) \s+ (\S+)}x
        or die "Can't parse date \"$str\"\n";
    my $day = $1;
    my $monthstr = $2;
    my $year = $3;
    my $hour = $4;
    my $min = $5;
    my $sec = $6;
    my $tz = $7;
    my $month = $months{$monthstr} or die "Unknown month $monthstr\n";
    $month -= 1;
    $year -= 1900;
    my $time = POSIX::mktime($sec, $min, $hour, $day, $month, $year, -1, -1, -1);
    return $time;
}

sub parse_access_line {
    my ($line, $log) = @_;

    # If its the first line being parsed, we use the log's format if there is one,
    # if not, based on the content of the line, we could use a default format string,
    # or parse the line a different way.
    if ($line_count == 1) {
        if ($selected_log->{format} && !$selected_log->{captures}) {
            $selected_log->{captures} = parse_log_format($selected_log->{format});
        }
        elsif ($line =~ m{^(\S+) \s+ (\S+) \s+ (\S+) \s+ \[([^\]]+)\] \s+}x) {
            # For lines like:
            # 51.222.253.12 - - [20/Oct/2022:23:37:06 -0400] "GET /post/5345/thumb/blah.jpg HTTP/1.1" 200 108940 "-" "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)"
            my $default_format = '%a %l %u %t "%r" %s %B "%{Referer}i" "%{User-Agent}i"';
            $selected_log->{captures} = parse_log_format($default_format);
        }
    }
    if ($log->{captures}) {
        return parse_access_line_format($line, $log);
    }
    else {
        return parse_access_line_different($line);
    }
}

sub parse_access_line_format {
    my ($line, $log) = @_;
    if ($line =~ /^\s*$/) {
        return undef;
    }
    my $a = {};
    my $captures = $log->{captures};
    for my $c (@$captures) {
        my $regex = $c->{regex};
        my $letter = $c->{letter};
        if ($line !~ /\G$regex/gc) {
            # The log entry doesnt match the format, however some of it might have
            # matched, so we can use that. If it didn't get at least a datestr,
            # then report the error.
            if (!$a->{datestr}) {
                warn "Unable to parse: $line\n";
            }
            last;
        }
        my $value = $1;
        if (!$letter) {
            # we only matched text, nothing to capture
        }
        elsif ($letter eq "a") {
            $a->{ip} = $value;
        }
        elsif ($letter eq "A") {
            $a->{local_ip} = $value;
        }
        elsif ($letter eq "B") {
            $a->{bytes} = $value;
        }
        elsif ($letter eq "D") {
            $a->{reqtime} = $value;
        }
        elsif ($letter eq "e") {
            my $name = lc $c->{curly};
            $name =~ s/-/_/g;
            $a->{$name} = $value;
        }
        elsif ($letter eq "f") {
            $a->{file} = $value;
        }
        elsif ($letter eq "h") {
            $a->{host} = $value;
            if (!$a->{ip}) {
                $a->{ip} = $a->{host};
            }
        }
        elsif ($letter eq "i") {
            my $name = lc $c->{curly};
            $name =~ s/-/_/g;
            $a->{$name} = $value;
        }
        elsif ($letter eq "I") {
            $a->{req_bytes} = $value;
        }
        elsif ($letter eq "k") {
            $a->{keepalives} = $value;
        }
        elsif ($letter eq "l") {
            $a->{ident} = $value;
        }
        elsif ($letter eq "L") {
            $a->{errid} = $value;
        }
        elsif ($letter eq "m") {
            $a->{method} = $value;
        }
        elsif ($letter eq "O") {
            # bytes with headers
            $a->{byteswh} = $value;
        }
        elsif ($letter eq "p") {
            if ($c->{curly} && $c->{curly} eq "local") {
                $a->{local_port} = $value;
            }
            elsif ($c->{curly} && $c->{curly} eq "remote") {
                $a->{remote_port} = $value;
            }
        }
        elsif ($letter eq "P") {
            if ($c->{curly} && $c->{curly} eq "tid") {
                $a->{tid} = $value;
            }
            else {
                $a->{pid} = $value;
            }
        }
        elsif ($letter eq "q") {
            $a->{query} = $value;
        }
        elsif ($letter eq "r") {
            $a->{request} = $value;
        }
        elsif ($letter eq "R") {
            $a->{handler} = $value;
        }
        elsif ($letter eq "s") {
            if ($c->{angle} && $c->{angle} eq ">") {
                $a->{final_status} = $value;
                if (!$a->{status}) {
                    $a->{status} = $value;
                }
            }
            else {
                $a->{status} = $value;
            }
        }
        elsif ($letter eq "t") {
            if ($value =~ /^\[([^]]*)\]/) {
                $a->{datestr} = $1;
            }
            else {
                $a->{datestr} = $value;
            }
        }
        elsif ($letter eq "u") {
            $a->{user} = $value;
        }
        elsif ($letter eq "U") {
            if ($c->{angle} && $c->{angle} eq ">") {
                $a->{url_final} = $value;
            }
            else {
                $a->{url} = $value;
            }
        }
        elsif ($letter eq "v") {
            $a->{vhost} = $value;
        }
        elsif ($letter eq "V") {
            $a->{server_name} = $value;
        }
    }
    process_access($a);
    return $a;
}

sub parse_log_format {
    my ($format) = @_;
    my @captures;
    my $prev;
    while (1) {
        my $c = {};
        if ($format =~ /\G\s+/gc) {
            $c->{regex} = qr{\s+};
            $c->{whitespace} = 1;
        }
        elsif ($format =~ /\G([^%\\\s]+)/gc) {
            $c->{regex} = quotemeta($1);
        }
        elsif ($format =~ /\G\\t/gc) {
            $c->{regex} = qr{\s+};
            $c->{whitespace} = 1;
        }
        elsif ($format =~ /\G%t/gc) {
            $c->{letter} = "t";
            $c->{regex} = qr{\[([^\]]*)\]};
        }
        elsif ($format =~ /\G%r/gc) {
            $c->{letter} ="r";
            if ($prev && $prev->{regex} =~ /"$/) {
                $c->{regex} = qr{((?:[^"]*|\\")*)};
            }
            else {
                $c->{regex} = qr{(\S*)};
            }
        }
        elsif ($format =~ /\G%(\{([^\}]*)\}|([<>]))?(\w)/gc) {
            $c->{curly} = $2;
            $c->{angle} = $3;
            $c->{letter} = $4;
            if ($prev && $prev->{regex} =~ /"$/) {
                $c->{regex} = qr{((?:[^"]*|\\")*)};
            }
            else {
                $c->{regex} = qr{(\S*)};
            }
        }
        else {
            last;
        }
        if ($prev && $prev->{whitespace} && $c->{whitespace}) {
            next;
        }
        push @captures, $c;
        $prev = $c;
    }
    if (!@captures) {
        die "Unable to parse format: $format\n";
    }
    return \@captures;
}

sub parse_access_line_different {
    my ($line) = @_;
    # For lines like:
    # unique_id:"Y0gPiJCuUIKvl11H6djHxgAAAUM" remote_host:"184.94.203.3"      client_IP:"184.94.203.3"        X-Real-IP:"92.205.104.221"      ident:"-"       user:"-"        time:"[13/Oct/2022:08:15:52 -0500]"     req:"GET /foo.cgi?asdf=436 HTTP/1.1" initial_status:200      final_status:200        resp_size:35    referer:"-"     ua:"Foo-Client/1.0"     reqtime_usec:158951     method:"GET"    url_orig:"/foo.cgi"       url_final:"/foo.cgi"      query_string:"?asdf=436"   local_ip:"10.255.1.3"   local_port:443  handler:"cgi-script"    file:"/var/www/foo/foo.cgi"    errid:- pid:4243        tid:139832986691328     req_size:1250   resp_with_headers:588   keep_alives:0   server_name:foo.bar.baz  vhost:qwe.foo.bar.baz
    my $a = {};
    my %key_trans = (
        "time" => "datestr",
        "client_ip" => "ip",
        "ua" => "user_agent",
        "req" => "request",
        "initial_status" => "status",
        "final_status" => "status",
        "reqtime_usec" => "reqtime",
    );
    while ($line =~ m{\G\s*([\w-]+):("([^"]|\\")*"|\S*)}gc) {
        my $key = $1;
        my $value = $2;
        $value =~ s/^"|"$//g;
        $value =~ s/^\[|\]$//g;
        $key = lc $key;
        $key =~ s/-/_/g;
        if ($key_trans{$key}) {
            $key = $key_trans{$key};
        }
        $a->{$key} = $value;
    }
    if (!$a->{datestr}) {
        warn "Unable to parse: $line\n";
    }
    process_access($a);
    return $a;
}

sub process_access {
    my ($a) = @_;
    delete $a->{ident} if !$a->{ident} || $a->{ident} eq "-";
    delete $a->{user} if !$a->{user} || $a->{user} eq "-";
    delete $a->{user_agent} if !$a->{user_agent} || $a->{user_agent} eq "-";
    $a->{bytes} = 0 if $a->{bytes} && $a->{bytes} eq "-";
    delete $a->{referer} if $a->{referer} && $a->{referer} eq "-";
    $a->{request} ||= "";
    if ($a->{request} =~ /^ (\S+) \s+ (\S+) \s+ (\S+) $/x) {
        $a->{method} = $1;
        $a->{uri} = $2;
        $a->{protocol} = $3;
        $a->{loc} = $a->{uri};
        $a->{loc} =~ s/\?.*//;
        delete $a->{request};
    }
    else {
        $a->{method} = $a->{uri} = $a->{protocol} = $a->{loc} = "";
    }
    if ($a->{loc} =~ /\.(jpe?g|gif|png|ico)$/i) {
        $a->{type} = "image";
    }
    elsif ($a->{loc} =~ /\.(cgi)$/i) {
        $a->{type} = "cgi";
    }
    elsif ($a->{loc} =~ /\.(php)$/i) {
        $a->{type} = "php";
    }
    elsif ($a->{loc} =~ /\.(html?)$/i) {
        $a->{type} = "html";
    }
    elsif ($a->{loc} =~ /\.(js)$/i) {
        $a->{type} = "js";
    }
    elsif ($a->{loc} =~ /\.(css)$/i) {
        $a->{type} = "css";
    }
    elsif ($a->{loc} =~ m{/[^/\.]*$}i) {
        $a->{type} = "script";
    }
    else {
        $a->{type} = "other";
    }
    $a->{date} = parse_date($a->{datestr});
    $a->{datestr} = datestr($a->{date});
}

sub datestr {
    my ($date) = @_;
    return "" if !$date;
    my $datestr = POSIX::strftime("%a %b %d, %Y %I:%M:%S %p", localtime($date));
    return $datestr;
}

# returns a string like "17 hours 5 minutes 2 seconds"
sub time_diff_str2 {
    my ($date1, $date2) = @_;
    return "" if !$date1 || !$date2;
    my $obj = time_diff_obj($date1, $date2);

    my $str = "";
    for my $key ("year", "month", "day", "hour", "minute", "second") {
        if ($obj->{$key}) {
            if ($str) {
                $str .= " ";
            }
            $str .= "$obj->{$key} $key";
            if ($obj->{$key} != 1) {
                $str .= "s";
            }
        }
    }

    if ($date1 == $date2) {
        $str = "0 seconds";
    }
    return $str;
}

# returns a string like "17 hours ago" or "in 5 seconds"
sub time_diff_str {
    my ($date1, $date2) = @_;
    return "" if !$date1 || !$date2;
    my $obj = time_diff_obj($date1, $date2);

    my $str = "";
    for my $key ("year", "month", "day", "hour", "minute", "second") {
        if ($obj->{$key}) {
            $str .= "$obj->{$key} $key";
            if ($obj->{$key} != 1) {
                $str .= "s";
            }
            last;
        }
    }

    if ($date1 == $date2) {
        $str = "0 seconds";
    }
    if ($date1 <= $date2) {
        $str .= " ago";
    }
    else {
        $str = "in $str";
    }
    return $str;
}

sub time_diff_obj {
    my ($date1, $date2) = @_;
    my $from = $date1 < $date2 ? $date1 : $date2;
    my $to = $date1 < $date2 ? $date2 : $date1;
    my ($fsec, $fmin, $fhour, $fmday, $fmon, $fyear, $fwday, $fyday, $fisdst) = localtime($from);
    my ($tsec, $tmin, $thour, $tmday, $tmon, $tyear, $twday, $tyday, $tisdst) = localtime($to);

    my $year_diff = 0;
    my $mon_diff;
    if ($tyear > $fyear) {
        $mon_diff = 12 - $fmon + $tmon + 12 * ($tyear - $fyear - 1);
        $year_diff = int($mon_diff / 12);
        $mon_diff = $mon_diff % 12;
    }
    else {
        $mon_diff = $tmon - $fmon;
    }

    my %obj;
    if ($year_diff) {
        $obj{year} = $year_diff;
    }

    my $inner = $from;
    if ($mon_diff) {
        if ($tmday < $fmday) {
            if ($mon_diff > 1) {
                $obj{month} = $mon_diff - 1;
                $inner = POSIX::mktime($fsec, $fmin, $fhour, $fmday, $fmon + $mon_diff - 1, $fyear + $year_diff, 0, 0, -1);
            }
        }
        else {
            $obj{month} = $mon_diff;
            $inner = POSIX::mktime($fsec, $fmin, $fhour, $fmday, $fmon + $mon_diff, $fyear + $year_diff, 0, 0, -1);
        }
    }

    # $inner is less than a month away from the $to date at this point, so you can focus on only days, hours, etc at this point
    my $remaining_time = $to - $inner;
    my $day_diff = int($remaining_time / (24 * 60 * 60));
    if ($day_diff) {
        $obj{day} = $day_diff;
    }
    $remaining_time -= $day_diff * 24 * 60 * 60;

    my $hour_diff = int($remaining_time / (60 * 60));
    if ($hour_diff) {
        $obj{hour} = $hour_diff;
    }
    $remaining_time -= $hour_diff * 60 * 60;

    my $minute_diff = int($remaining_time / 60);
    if ($minute_diff) {
        $obj{minute} = $minute_diff;
    }
    $remaining_time -= $minute_diff * 60;

    if ($remaining_time) {
        $obj{second} = $remaining_time;
    }

    return \%obj;
}

sub show_info {
    loop_accesses(\&show_access_info);
}

sub show_access_info {
    my ($a) = @_;
    print "[$a->{datestr}] from IP $a->{ip}:\n";
    if ($opts{verbose}) {
        delete $a->{datestr};
        delete $a->{date};
        delete $a->{ip};
        for my $key (sort keys %$a) {
            printf "    %12s %s\n", $key, $a->{$key};
        }
        print "\n";
        return;
    }
    if ($a->{user_agent}) {
        print "agent $a->{user_agent}\n";
    }
    if ($a->{referer}) {
        print "referer $a->{referer}\n";
    }
    print "status $a->{status}\n";
    if (defined $a->{bytes}) {
        print "bytes " . human_readable($a->{bytes}) . "\n";
    }
    if ($a->{reqtime}) {
        my $reqtime = int($a->{reqtime} / 1000);
        print "time $reqtime ms\n";
    }
    print "$a->{method} $a->{uri}\n";
    print "\n";
}

sub loop_accesses {
    my ($func, $arg) = @_;
    my $cmd = "";
    my $file = $selected_log->{pfile};
    if ($file =~ /\.gz$/) {
        $cmd = "gzip -dc $file";
    }
    if ($opts{max}) {
        if ($opts{offset}) {
            my $amount = $opts{offset} + $opts{max};
            if ($cmd) {
                $cmd .= "| tail -n $amount | head -n $opts{max}";
            }
            else {
                $cmd = "tail -n $amount $file | head -n $opts{max}";
            }
        }
        else {
            if ($cmd) {
                $cmd .= "| tail -n $opts{max}";
            }
            else {
                $cmd = "tail -n $opts{max} $file";
            }
        }
    }
    my $fh;
    if ($cmd) {
        open $fh, "-|", $cmd or die "Can't open $file: $!\n";
    }
    else {
        open $fh, "<", $file or die "Can't open $file: $!\n";
    }
    my $filter = $opts{filter};
    if ($filter) {
        $filter = qr{$filter}i;
    }
    $line_count = 0;

    while (my $line = <$fh>) {
        if ($filter && $line !~ $filter) {
            next;
        }
        $line_count++;
        chomp $line;
        my $a = parse_access_line($line, $selected_log);
        if (!$a || !$a->{date}) {
            # couldn't parse the line
            next;
        }
        $func->($a, $arg);
    }
    close $fh;
}

sub show_list {
    for my $log (@logs) {
        my $selected = $log->{selected} ? "*" : " ";
        my $size = "-";
        my $updated = "";
        if (-e $log->{file}) {
            $size = human_readable(-s $log->{file});
            my $mtime = (stat($log->{file}))[9];
            $updated = time_diff_str($mtime, $now);
        }
        my $file = $log->{file};
        my $line = sprintf "%s %-60s %-10s %s", $selected, $file, $size, $updated;
        print "$line\n";
    }
}

sub show_detailed_list {
    for my $log (@logs) {
        my $size = "-";
        my $updated = "";
        if (-e $log->{file}) {
            $size = human_readable(-s $log->{file});
            my $mtime = (stat($log->{file}))[9];
            $updated = datestr($mtime) . " (" . time_diff_str($mtime, $now) . ")";
        }
        my $vhost = $log->{vhost};
        print "$log->{file}\n";
        if ($log->{selected}) {
            print "    selected\n";
        }
        print "    size $size\n";

        if ($updated) {
            print "    updated $updated\n";
        }
        if ($vhost) {
            if ($vhost->{file}) {
                print "    config $vhost->{file}\n";
            }
            if ($vhost->{docroot}) {
                print "    docroot $vhost->{docroot}\n";
            }
            if ($vhost->{absdocroot}) {
                print "    absdocroot $vhost->{absdocroot}\n";
            }
        }
        if ($log->{fname}) {
            print "    fname $log->{fname}\n";
        }
        if ($log->{format}) {
            print "    format $log->{format}\n";
        }
        my $rotations = get_rotations($log->{file});
        for my $r (@$rotations) {
            my $size = human_readable(-s $r->{file});
            my $mtime = (stat($r->{file}))[9];
            my $updated = datestr($mtime) . " (" . time_diff_str($mtime, $now) . ")";
            print "    rotation $r->{name} $size $updated\n";
        }
    }
}

sub human_readable {
    my ($size) = @_;
    $size ||= 0;
    my @power = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB");
    my $i = 0;
    my $abs_size = abs $size;
    for ($i = 0; $i < @power; $i++) {
        last if $abs_size < 1024;
        $abs_size /= 1024;
    }
    my $str = sprintf("%.1f %s", $abs_size, $power[$i]);
    $str =~ s/\.0//;
    $str = "-$str" if $size < 0;
    return $str;
}

sub select_log {
    my $name = $opts{name};
    if ($name) {
        my $regex = qr/^$name/;
        for my $log (@logs) {
            if ($log->{name} eq $name || $log->{file} eq $name || $log->{name} =~ $regex) {
                select_log_rotation($log);
                last;
            }
        }
        # If the name doesn't match one of the discovered log files,
        # treat it as a filename.
        if (!$selected_log) {
            my $log = add_log($name);
            select_log_rotation($log);
        }
        return;
    }

    if (@logs) {
        my $cwd = Cwd::cwd();
        $cwd .= "/" if $cwd !~ m{/$};
        for my $log (@logs) {
            my $docroot = "";
            if ($log->{vhost} && $log->{vhost}{absdocroot}) {
                $docroot = $log->{vhost}{absdocroot} . "/";
            }
            $log->{begcmp} = begcmp($cwd, $docroot);
        }
        my @logs_sorted = sort {
            return $b->{begcmp} <=> $a->{begcmp} || $a->{name} cmp $b->{name};
        } @logs;

        select_log_rotation($logs_sorted[0]);
    }
}

sub select_log_rotation {
    my ($log) = @_;
    $selected_log = $log;
    $log->{selected} = 1;
    my $file = $log->{file};
    my $rot = $opts{rotation};

    my $pfile; # pfile for processed file (includes rotation part)
    if (!$rot) {
        $pfile = $file;
    }
    elsif (-e "$file.$rot.gz") {
        $pfile = "$file.$rot.gz";
    }
    elsif (-e "$file.$rot") {
        $pfile = "$file.$rot";
    }
    elsif (-e "$file-$rot.gz") {
        $pfile = "$file-$rot.gz";
    }
    elsif (-e "$file-$rot") {
        $pfile = "$file-$rot";
    }
    elsif ($rot =~ /^\d+$/) {
        my $rotations = get_rotations($file);
        my $r = $rotations->[- $rot];
        if ($r) {
            $pfile = $r->{file};
        }
    }
    if (!$pfile) {
        die "Unable to find log file.\n";
    }
    $log->{pfile} = $pfile;
}

sub add_log {
    my ($file) = @_;
    my $log = {};
    $log->{file} = $file;
    $log->{name} = basename($file);
    push @logs, $log;
    return $log;
}

sub select_log_by_cwd {
    my $cwd = Cwd::cwd();
    $cwd =~ s{/+$}{};
    for my $log (@logs) {
        next if !$log || !$log->{vhost} || !$log->{vhost}{absdocroot};
        my $docroot = $log->{vhost}{absdocroot};
        next if !$docroot;
        $docroot =~ s/\/+$//;
        $docroot =~ s/\/public_html$//;
        if ($cwd =~ /^$docroot(\/|$)/) {
            $log->{selected} = 1;
            $selected_log = $log;
            return;
        }
    }
}

sub get_logs {
    get_logs_from_apache_conf();
    my $dir = "$ENV{HOME}/access-logs";
    if (-e $dir) {
        get_logs_from_dir($dir);
    }
    process_log_list();
    @logs = sort {
        return $a->{file} cmp $b->{file};
    } @logs;
}

# Returns the number of characters that are the same from the beginning
# between two strings.
#     abc and def is 0
#     abc and abcdef is 3
sub begcmp {
    my ($a, $b) = @_;
    my $count = 0;
    my $min = length($a) < length($b) ? length($a) : length($b);
    my $i = 0;
    for ($i = 0; $i < $min; $i++) {
        if (substr($a, $i, 1) ne substr($b, $i, 1)) {
            return $i;
        }
    }
    return $i;
}

sub get_logs_from_dir {
    my ($dir) = @_;
    opendir my $dh, $dir or return;
    for my $dfile (sort readdir $dh) {
        next if $dfile =~ /^\.\.?$/;
        $dfile = "$dir/$dfile";
        add_log($dfile);
    }
    closedir $dh;
}

sub get_logs_from_apache_conf {
    my @files = `find /etc/apache2 /etc/httpd 2>/dev/null`;
    my $vhost = $host;
    my %vhosts;
    for my $file (sort @files) {
        chomp $file;
        next if $file =~ /\bavailable\b/;
        open my $fh, $file or next;
        while (my $line = <$fh>) {
            chomp $line;
            if ($line =~ /(.*)\\$/) {
                $line = $1;
                my $line2 = <$fh>;
                $line .= $line2;
                redo;
            }
            if ($line =~ /^\s*<VirtualHost(|\s+([^>]*))>/ims) {
                $vhost = {vname => $2};
            }
            elsif ($line =~ /^\s*<\/VirtualHost>/ims) {
                $vhost->{name} = vhost_name($vhost);
                delete $vhost->{vname};
                delete $vhost->{sname};
                # The first <VirtualHost> is used if multiple identical named ones exist
                if ($vhosts{$vhost->{name}}) {
                    $vhost->{invalid} = 1;
                }
                else {
                    $vhosts{$vhost->{name}} = $vhost;
                }
                $vhost = $host;
            }
            elsif ($line =~ /^\s*ServerName\s+(\S+)/ims) {
                $vhost->{sname} = $1;
            }
            elsif ($line =~ /^\s*DocumentRoot\s+(.+)/ims) {
                $vhost->{docroot} = unquote($1);
                $vhost->{absdocroot} = Cwd::abs_path($vhost->{docroot});
            }
            elsif ($line =~ /^\s*ServerRoot\s+(\S+)/ims) {
                $vhost->{servroot} = unquote($1);
            }
            elsif ($line =~ /^\s*CustomLog\s+("[^"]*"|\S+)(\s+("(\\"|[^"])*"|\S+))?/ims) {
                my $log_file = unquote($1);
                my $arg2 = unquote($3);
                $log_file = env_replace($log_file);
                my $log = add_log($log_file);
                $log->{vhost} = $vhost;
                $vhost->{file} = $file;
                if ($arg2) {
                    if ($arg2 =~ /%/) {
                        $log->{format} = $arg2;
                    }
                    else {
                        $log->{fname} = $arg2;
                    }
                }
            }
            elsif ($line =~ /^\s*LogFormat\s+("(\\"|[^"])*"|\S+)(\s+(\S+))?/ims) {
                my $format = unquote($1);
                my $fname = unquote($4);
                if ($fname) {
                    $host->{fnames}{$fname} = $format;
                }
                else {
                    $vhost->{format} = $format;
                }
            }
        }
        close $fh;
    }
}

# The list is processed to remove duplicates and remove logs for
# overridden virtualhosts.
sub process_log_list {
    my %logs;
    my @logs_new;
    for my $log (@logs) {
        my $vhost = $log->{vhost};
        if ($vhost && $vhost->{invalid}) {
            next;
        }
        if ($host->{servroot} && $log->{file} !~ /^\//) {
            $log->{file} = "$host->{servroot}/$log->{file}";
        }
        if ($log->{fname} && $host->{fnames}{$log->{fname}}) {
            $log->{format} = $host->{fnames}{$log->{fname}};
        }
        if ($logs{$log->{file}}) {
            next;
        }
        $logs{$log->{file}} = $log;
        push @logs_new, $log;
    }
    @logs = @logs_new;
}

sub env_replace {
    my ($file) = @_;
    $ENV{APACHE_LOG_DIR} ||= "/var/log/apache2";
    $file =~ s/\$\{(\w+)\}/$ENV{$1}/ge;
    return $file;
}

sub basename {
    my ($file) = @_;
    $file =~ /([^\/]+)$/;
    my $name = $1;
    return $name;
}

sub dirname {
    my ($file) = @_;
    $file =~ /^((.*)\/)/;
    my $dir = $2 || ".";
    return $dir;
}

sub unquote {
    my ($file) = @_;
    return $file if !$file;
    if ($file =~ /^"((\\"|[^"])*)"/) {
        $file = $1;
    }
    elsif ($file =~ /^'((\\'|[^'])*)'/) {
        $file = $1;
    }
    $file =~ s/\\(["'])/$1/g;
    return $file;
}

sub vhost_name {
    my ($vhost) = @_;
    my @name;
    if ($vhost->{sname}) {
        push @name, $vhost->{sname};
    }
    if ($vhost->{vname}) {
        push @name, $vhost->{vname};
    }
    my $name = join " ", @name;
    return $name;
}

sub get_opts {
    my @args;
    while (my $arg = shift @ARGV) {
        if ($arg =~ /^--?$/) {
            push @args, @ARGV;
            last;
        }
        elsif ($arg eq "-m") {
            my $max = shift @ARGV;
            if (!defined $max || $max !~ /^\d+$/) {
                die "-m option requires a numerical argument.\n";
            }
            $opts{max} = $max;
        }
        elsif ($arg eq "-o") {
            my $offset = shift @ARGV;
            if (!defined $offset || $offset !~ /^\d+$/) {
                die "-o option requires a numerical argument.\n";
            }
            $opts{offset} = $offset;
        }
        elsif ($arg eq "-f") {
            my $filter = shift @ARGV;
            $opts{filter} = $filter;
            $opts{action} ||= "info";
        }
        elsif ($arg eq "-g") {
            $opts{action} = "graph";
            my $interval = $ARGV[0];
            if (defined $interval && $interval !~ /^-/) {
                shift @ARGV;
            }
            else {
                $interval = undef;
            }
            if (!$interval || $interval eq "h") {
                $opts{interval} = 60 * 60;
            }
            elsif ($interval eq "d") {
                $opts{interval} = 24 * 60 * 60;
            }
            elsif ($interval =~ /^\d+$/) {
                $opts{interval} = $interval;
            }
            else {
                die "-g argument must be h, d, or a number.\n";
            }
        }
        elsif ($arg =~ /^(--?help|-h)$/) {
            usage();
        }
        elsif ($arg eq "-s") {
            $opts{action} = "stats";
        }
        elsif ($arg eq "-i") {
            $opts{action} = "info";
        }
        elsif ($arg eq "-I") {
            $opts{action} = "info";
            $opts{verbose} = 1;
        }
        elsif ($arg eq "-l") {
            $opts{action} = "list";
        }
        elsif ($arg eq "-L") {
            $opts{action} = "detailed_list";
        }
        elsif ($arg eq "-p") {
            $opts{action} = "print";
        }
        elsif ($arg eq "-r") {
            $opts{rotation} = shift @ARGV;
        }
        elsif ($arg =~ /^-/) {
            die "Invalid argument '$arg'\n";
        }
        else {
            push @args, $arg;
        }
    }
    if (@args > 1) {
        die "Too many arguments\n";
    }
    $opts{name} = shift @args;
    $opts{action} ||= "less";
}

sub usage {
    print <<EOUSAGE;
Usage: alog [<options>] [<name>]

-f <regex>        filter based on regex
-g [<interval>]   graph errors
-h                show this help text
-i                show info spread vertically
-I                show info verbosely
-l                list available logs
-L                list available logs with details
-m <n>            process a maximum of n accesses, starting from the end
-o <n>            process accesses starting at an offset from the end
-p                print log path
-r <n>            rotation number
-s                show statistics

<name>            name of the log you are trying to access (regex),
                  if name contains a "/", name is treated as a file name,
                  default is the access log for the cwd.

By default, this command will open the log in \$PAGER or less(1)
EOUSAGE
    exit;
}

__END__

=head1 NAME

alog - An Apache access log viewer

=head1 SYNOPSIS

    alog [<options>] [<name>]

=head1 OPTIONS

    -f <regex>        filter based on regex
    -g [<interval>]   graph errors
    -h                show this help text
    -i                show info spread vertically
    -I                show info verbosely
    -l                list available logs
    -L                list available logs with details
    -m <n>            process a maximum of n accesses, starting from the end
    -o <n>            process accesses starting at an offset from the end
    -p                print log path
    -r <n>            rotation number
    -s                show statistics

    <name>            name of the log you are trying to access (regex),
                      if name contains a "/", name is treated as a file name,
                      default is the access log for the cwd.

By default, this command will open the log in \$PAGER or less(1)

=head1 DESCRIPTION

This program will show the Apache access log associated with the
directory you are currently inside of.

Many people set up web servers with each website inside their own
directory in $HOME or /var/www. While working on these sites, for
example /var/www/coolsite.com/, you can run `alog` with no arguments
and it will show the access log for that site inside of less(1).

If you define the $PAGER environment variable, `alog` will use that
program instead of less(1).

If you want to view another site's access log, provide `alog` with an
expression that partially matches the name of that website's log
after the `alog` command. For example, `alog foo`.

To see a list of all the access logs on the server use `alog -l`.
More detailed information, such as what rotations exist for each
log, use `alog -L`.

To specify an older rotation of an access log, use the -r option.
For example `alog -r 2`, might show the /var/log/httpd/foo.access_log.2.gz
file. If that rotation doesn't exist, it will choose the 2nd in the list
shown when you use the -L option.

The way it determines which access log to show is by parsing Apache
config files in either /etc/httpd or /etc/apache2. A CustomLog line
tells where the access log is, a DocRoot line tells which directory
that access log is for, a LogFormat line tells what format the
access log uses.

The -p option will show the path the selected access log file.

The -f option will filter based on a given regex for the -i, -s, or -g option.

The -s option will show statistics about the access log file such
as how many requests there were, their time frame, and most active
URIs.

The -i option will show the data fields of the access log entry on
their own line, so you don't have to scroll right to see the part
you are interested in.

The -I option will show all the fields we have for the entry on it's
own line.

The -m option limits the maximum number of accesses shown with the -i, -s, or the
-g option, starting from the end of the log (most recent).

The -o option sets an offset to the accesses shown with the -i option, so
"elog -i -m 1" shows the last access, "elog -i -m 1 -o 1" shows the second
to last access.

The -g option will show a graph of the number of accesses in hourly
intervals. If provided an argument, it can be h for hourly, d for daily,
or a number of seconds.

=head1 METACPAN

L<https://metacpan.org/pod/App::Elog>

=head1 AUTHOR

Jacob Gelbman E<lt>gelbman@gmail.comE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2017 by Jacob Gelbman

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.18.2 or,
at your option, any later version of Perl 5 you may have available.

=cut

