#!/usr/bin/perl -w

use strict;
use LWP::UserAgent;
use HTML::Entities;
use HTML::Parser ();
use Text::Iconv;
use Encode qw(encode decode);
use Data::Dumper;

use locale;

our ( @P, @Div, $This_Uri, $This_Image_Retrieved );

sub urlize($) {
  my ( $uri, $last_uri ) = shift;
  if($uri !~ "^http://") {
    if($uri =~ "^/") {
      $uri = "http://www.flickr.com$uri";
    } else {
      $uri = $This_Uri/$uri;
    }
  }
  return $uri;
}

sub start($%) {
    my ( $tagname, $attr ) = @_;
    if( $tagname eq 'p' ) {
        if( defined $attr->{class} ) {
            push @P, $attr->{class};
            #print "+ " . $attr->{class} . "\n";
        }
    }
    elsif( $tagname eq 'div' ) {
        if( defined $attr->{id} ) {
            #print $attr->{id} . "\n";
            push @Div, $attr->{id};
        }
        elsif( defined $attr->{class} ) {
            #print $attr->{class} . "\n";
            push @Div, $attr->{class};
        }
    }
    elsif( $tagname eq 'a' ) {
        #@P and print $P[-1] . "\n" . Dumper($attr);
        if( @P and ( $P[-1] eq 'Photo' or $P[-1] eq 'PoolList' ) ) {
            if( defined $attr->{href} and not $This_Image_Retrieved) {
                #print 'page: ' . $attr->{href} .  "\n";
                # FIX: find a better solution
                $This_Image_Retrieved = zoom_image($attr->{href});
            }
        }
        elsif( @Div and $Div[-1] eq 'setThumbs' ) {
            if( defined $attr->{href} ) {
                #print 'page: ' . $attr->{href} . "\n";
                zoom_image($attr->{href});
            }
        }
        elsif( @Div and $Div[-1] eq 'Paginator' ) {
            if( defined $attr->{class} and $attr->{class} eq 'Next'
                and defined $attr->{href} ) {
                #print 'next: ' . $attr->{href} . "\n";
                get_page( urlize( $attr->{href} ) );
            }
        }
    }

}

sub end($%) {
    my ( $tagname, $attr ) = @_;
    if( $tagname eq 'p' ) {
        if( my $class = pop @P ) {
            #print "- " . $class . "\n";
            $This_Image_Retrieved = 0;
        }
    }
    if( $tagname eq 'div' ) {
        if( my $class = pop @Div ) {
            #print "- " . $class . "\n";
        }
    }
}

sub get_page($) {
    my $uri = shift;
    #print "fetching index $uri\n";
    $This_Uri = $uri;

    my $ua = new LWP::UserAgent;
    $ua->timeout(10);
    $ua->env_proxy;

    #print "$uri\n";
    my $res = $ua->get($uri);
    unless($res->is_success) {
        return 1;
    }
    my $page = $res->content;
    #my $page = `links -source $uri`;

    # Create parser object
    my $p = HTML::Parser->new( api_version => 3,
        start_h => [\&start, "tagname, attr"],
        end_h   => [\&end,   "tagname"],
        marked_sections => 1,
    );
    $p->utf8_mode(1);
    $p->parse($page);
    $p->eof;
}

sub download_image($$) {
    my ( $img, $title ) = @_;
    my $filename = "";
    if( $title ) {
        #print "title: $title\n";
        $filename = $title;
        $filename =~ s/\xA0/ /g;
        $filename =~ y/ÀÁÂÃÄÅÆÇÐÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿ/AAAAAAACDEEEEIIIINOOOOOOUUUUYaaaaaaaceeeeiiiinoooooouuuuyy/;
        $filename =~ s/ß/ss/g;
        $filename =~ s/ð/th/g;

        #print "decoded: $filename\n";
        if( $filename =~ /\S/ ) {
            $filename =~ s/\s/_/g;
            $filename =~ s/[^a-zA-Z0-9_\-.]//g;
            #$filename =~ s/[\'\"\&\|\#\$\%\(\)\!]//g;
            $filename =~ s/^\s+//;
            $filename =~ s/\s+$//;
            #print "|$filename|\n";
            if( $filename =~ /\S/ ) {
                if ( -f "$filename.jpg" ) {
                    my $findex = 1;
                    while( -f ( sprintf "%s.%03d.jpg", $filename, $findex ) ) {
                        $findex++;
                    }
                    $filename = sprintf "%s.%03d", $filename, $findex;
                }
                $filename .= ".jpg";
            }
            #print "final: $filename\n";
        }
        else {
            $filename = "";
        }
    }
    if( $filename =~ /\S/ ) {
        print `wget --quiet "$img" -O $filename`;
    }
    else {
        # FIX remove ?v=0
        print `wget --quiet -c "$img"`;
    }
}

sub get_image($$) {
    my ( $uri, $title ) = @_;

    my $ua = new LWP::UserAgent;
    $ua->timeout(10);
    $ua->env_proxy;

    my $res = $ua->get($uri);
    my $retries = 2;
    while( $retries-- and not $res->is_success) {
        $res = $ua->get($uri);
    }
    unless( $res->is_success) {
        warn "Unable to fetch $uri";
        return 0;
    }
    my $image_found;
    foreach my $row ( split /\n/, decode( "utf8", $res->content ) ) {
        #if( $row =~ /<h1[^>]*>([^<]+)/ ) {
        #    $title = $1;
        #    print "$title\n";
        #}
        if( $row =~ /img src="([^"]+)"/ ) {
            my $img = $1;
            # FIX: find a better method, use image id or something
            if( $img =~ /_[obm][.]jpg$/ ) {
                $image_found++;
                download_image($img, $title)
            }
        }
    }
    unless ( $image_found ) {
        warn "No image found in $uri";
        return 0;
    }
    return 1;
}

sub zoom_image($) {
    my $uri = shift;

    my $title;

    my $ua = new LWP::UserAgent;
    $ua->timeout(10);
    $ua->env_proxy;

    my $res = $ua->get(urlize($uri));
    unless($res->is_success) {
        warn "unable to fetch $uri";
        return 1;
    }
    my ( $zoomable, $main_uri );
    foreach my $row ( split /\n/, $res->content ) {
        if( $row =~ /<h1[^>]*>([^<]+)/ ) {
            my $string = $1;
            $title = decode_entities( decode( "utf8", $string, Encode::FB_QUIET ) );
            #print length($title). " $title :$string:\n";
        }
        if( $row =~ /<a\s+href="\/photo_zoom/ ) {
            $zoomable = 1;
        }
        if( $row =~ /class="photoImgDiv">\s*<img\s+src="(http:\/\/[^"]+)"/ ) {
            $main_uri = $1;
        }
    }

    # remove trailing slash
    $uri =~ s:/*$::;

    # find image id
    if( $uri =~ /([^\/]+)\/in\/\w+-[^\/]+$/ or $uri =~ /([^\/]+)$/ ) {
        my $image_id = $1;
        #print "$uri $image_id\n";
        if( $zoomable ) {
            $uri = sprintf 'http://flickr.com/photo_zoom.gne?id=%s', $image_id;
            # FIX: KLUDGE
            my @sizes = qw( t s m l o );
            my %sizes;
            my $final_size = 'm';
            foreach my $size ( `links -source "$uri&size=m" | grep '<a href="photo_zoom' | grep size= | sed 's/^.*size=\\([a-z][a-z]*\\).*/\\1/'` ) {
                chomp $size;
                $sizes{$size} = 1;
            }
            foreach my $size ( @sizes ) {
                if( $sizes{$size} ) {
                    $final_size = $size;
                }
            }
            $uri .= "&size=$final_size";
            return get_image($uri, $title);
        }
        elsif( $main_uri ) {
            download_image( $main_uri, $title);
        }
        else {
            warn "No zoomable image and no main image found for $uri";
            return 0;
        }
    }
    else {
        warn "Image link $uri isn't absolute, do download.";
        return 0;
    }
}

my $converter = Text::Iconv->new( "utf8", "iso88591" );

foreach my $req_uri (@ARGV) {
    get_page( $req_uri );
}