#!/usr/bin/perl
# This program is free software. It comes without any warranty, to the extent
# permitted by applicable law. You can redistribute it and/or modify it under
# the terms of the Do What The Fuck You Want To Public License, Version 2, as
# published by Sam Hocevar. See COPYING for the license text, or
# http://sam.zoy.org/wtfpl/ for more information about the WTFPL.

use strict;
use warnings;
use IPC::Open2;
use CGI;
use CGI::Carp qw(fatalsToBrowser);
use LWP::UserAgent;
use File::Temp;
use HTML::TreeBuilder;

require 5.008_000; # for scalar ref -> filehandle magic

my $q = CGI->new;
my %handlers = (
    "application/postscript" => \&handle_ps,
    "text/html"              => \&handle_html,
    "default"                => \&handle_default,
);

my $url = $q->param('url');
unless($url) {
    fail(400, "Supply a 'url' parameter please.");
}
unless($url =~ m{^[a-zA-Z]+://}) {
    $url = "http://$url";
}
my $ua = LWP::UserAgent->new;
$ua->timeout(10);

my $response = $ua->get($url);
unless($response->is_success) {
    fail($response->code, $response->message);
}

my $content_type = $q->param("force_ct") || $response->header('Content-Type');
$content_type =~ s/;.*//;
if(my $handler = $handlers{$content_type}) {
    $handler->($response);
} else {
    $handlers{default}->($response);
}

sub handle_default {
    my $response = shift;
    print $q->header(-type => $response->header('Content-Type'));
    print $response->content;
}

sub handle_html {
    my $response = shift;
    print $q->header(-type => $response->header('Content-Type'));
    my($url) = $response->base;
    $url =~ s{/[^/]+$}{/};
    my($server) = $url =~ m{(http://[^/]+)/};
    my $tree = HTML::TreeBuilder->new_from_content($response->content);
    for (@{ $tree->extract_links(qw/a img/) }) {
        my ($target, $element, $attr, $tag) = @$_;
        my $new_target = $q->url() . "?url=";
        if($target =~ m|^/|) {
            $new_target .= $server;
        }
        elsif ($target !~ m|^http://|) {
            $new_target .= $url;
        }
        $new_target .= $target;
        $element->attr($attr, $new_target);
    }
    print $tree->as_HTML;
    $tree->delete;
}

sub handle_ps {
    my $response = shift;

    my $ps_temp = File::Temp->new( SUFFIX => '.ps' );
    print { $ps_temp } $response->content;

    my $timeout = 10;

    eval {
        local $SIG{ALRM} = sub { die "alarm\n" };
        alarm $timeout;

        open my $pdf_fh, "-|", "ps2pdf", $ps_temp->filename, "-"
            or fail(500, $!);

        print $q->header(-type => 'application/pdf');
        print while <$pdf_fh>;

        close $pdf_fh;

        alarm 0;
    };
    if ($@) {
        die unless $@ eq "alarm\n";
        fail(500, "ps2pdf took more than $timeout seconds to run; aborted.");
    }
}

sub fail {
    my ($code, $message) = @_;
    print $q->header(-type => 'text/html', -status => $code);
    print <<"HTML";
<html>
    <head>
        <title>ps2pdf: error $code</title>
    </head>
    <body>
        <h1>$code: $message</h1>
        <ul>
            <li><a href="?url=http://web.comlab.ox.ac.uk/">comlab</a></li>
        </ul>
    </body>
</html>
HTML
    exit 0;
}
