#!/usr/bin/perl -w use strict; package bnParser; use HTML::Parser; use vars qw(@ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS); @ISA = ('HTML::Parser'); sub new { my $type = shift; my $class = ref($type) || $type; my $self = bless(HTML::Parser->new, $class); push @EXPORT, '&getbookinfo'; my @authors; $self->{'authors'} = \@authors; $self->{'state'} = ''; $self->{'tagtext'} = ''; return $self; } sub start($tag,$attr, $attrseq, $origtext) { my ($self,$tag,$attr, $attrseq, $origtext) = @_; if ($tag eq 'a') { $self->{'tagtext'} = ''; } if ($tag eq 'br') { if ($self->{'state'} eq 'getauthor') { my $authors = $self->{'authors'}; my $author = $self->{'tagtext'}; $author =~ s/^\s+//g; $author =~ s/\s+$//g; push @$authors, $author; } if ($self->{'state'} eq 'gettitle') { $self->{'title'} = $self->{'tagtext'}; } $self->{'tagtext'} = ''; $self->{'state'} = ''; } } sub end($tag, $origtext) { my ($self,$tag, $origtext) = @_; if ($tag eq 'a') { if ($self->{'state'} eq 'getauthor') { my $authors = $self->{'authors'}; my $author = $self->{'tagtext'}; $author =~ s/^\s+//g; $author =~ s/\s+$//g; push @$authors, $author; } $self->{'tagtext'} = ''; $self->{'state'} = ''; } } sub text($text) { my ($self,$text) = @_; $self->{'tagtext'} .= $text; } sub comment($comment) { my ($self,$comment) = @_; if ($comment =~ /^\s*title\s*$/) { $self->{'state'} = 'gettitle'; } if ($comment =~ /^\s*author\s*$/) { $self->{'state'} = 'getauthor'; } $self->{'tagtext'} = ''; } sub getbookinfo() { my ($self) = @_; my $authors; $authors = $self->{'authors'}; $self->{'title'} =~ s/\ \;/ /g; $self->{'title'} =~ s/^\s+//g; $self->{'title'} =~ s/\s+$//g; $self->{'title'} =~ s/\s+/ /g; return ($self->{'title'}, @$authors); } package amazonParser; use HTML::Parser; use vars qw(@ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS); @ISA = ('HTML::Parser'); sub new { my $type = shift; my $class = ref($type) || $type; my $self = bless(HTML::Parser->new, $class); push @EXPORT, '&getbookinfo'; my @authors; $self->{'authors'} = \@authors; $self->{'state'} = ''; $self->{'tagtext'} = ''; return $self; } sub start($tag,$attr, $attrseq, $origtext) { my ($self,$tag,$attr, $attrseq, $origtext) = @_; if ($tag eq 'form') { if ($attr->{'action'} =~ /^\/exec\/obidos\/handle\-buy\-box\=[0-9]+\/book-glance/) { $self->{'state'} = 'gettitle'; $self->{'tagtext'} = ''; } } if (($tag eq 'a') && defined($attr->{'href'})) { if ($attr->{'href'} =~ /^\/exec\/obidos\/ts\/book-customer-review-form\//) { $self->{'pastreviewsection'} = 1; } if (($attr->{'href'} =~ /^\/exec\/obidos\/Author\=/) && !defined($self->{'pastreviewsection'})) { $self->{'state'} = 'getauthor'; $self->{'tagtext'} = ''; } } if ($tag eq 'b') { $self->{'tagtext'} = ''; } } sub end($tag, $origtext) { my ($self,$tag, $origtext) = @_; if (($tag eq 'b') || ($tag eq 'a')) { if ($self->{'state'} eq 'gettitle') { $self->{'title'} = $self->{'tagtext'}; $self->{'state'} = ''; } if (($self->{'state'} eq 'getauthor') && defined($self->{'title'})) { my $authors = $self->{'authors'}; push @$authors, $self->{'tagtext'}; $self->{'state'} = ''; } } } sub text($text) { my ($self,$text) = @_; $self->{'tagtext'} .= $text; } sub comment($comment) { my ($self,$comment) = @_; } sub getbookinfo() { my ($self) = @_; my $authors; $authors = $self->{'authors'}; return ($self->{'title'}, @$authors); } package bordersParser; use HTML::Parser; use vars qw(@ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS); @ISA = ('HTML::Parser'); sub new { my $type = shift; my $class = ref($type) || $type; my $self = bless(HTML::Parser->new, $class); push @EXPORT, '&getbookinfo'; my @authors; $self->{'authors'} = \@authors; $self->{'state'} = ''; $self->{'tagtext'} = ''; return $self; } sub start($tag,$attr, $attrseq, $origtext) { my ($self,$tag,$attr, $attrseq, $origtext) = @_; if ($tag eq 'big') { $self->{'tagtext'} = ''; } if ($tag eq 'b') { $self->{'tagtext'} = ''; } if ($tag eq 'br') { if ($self->{'state'} eq 'geteditor') { my $authors = $self->{'authors'}; my ($author, @authlist); if (!(@authlist = split /\//, $self->{'tagtext'})) { push(@authlist, $self->{'tagtext'}); } foreach $author (@authlist) { $author =~ s/\ \;/ /g; $author =~ s/^\s+//; $author =~ s/\s+$//; $author =~ s/\s+/ /; push @$authors, $author; } } if ($self->{'state'} eq 'getauthor') { my $authors = $self->{'authors'}; my ($author, @authlist); if (!(@authlist = split /\//, $self->{'tagtext'})) { push(@authlist, $self->{'tagtext'}); } foreach $author (@authlist) { $author =~ s/\ \;/ /g; $author =~ s/^\s+//; $author =~ s/\s+$//; $author =~ s/\s+/ /; push @$authors, $author; } } $self->{'tagtext'} = ''; $self->{'state'} = ''; } } sub end($tag, $origtext) { my ($self,$tag, $origtext) = @_; if ($tag eq 'big') { $self->{'title'} = $self->{'tagtext'}; } if ($tag eq 'b') { if ($self->{'tagtext'} =~ /^Author.*\:/) { $self->{'state'} = 'getauthor'; $self->{'tagtext'} = ''; } if ($self->{'tagtext'} =~ /^Editor.*\:/) { $self->{'state'} = 'geteditor'; $self->{'tagtext'} = ''; } } } sub text($text) { my ($self,$text) = @_; $self->{'tagtext'} .= $text; } sub comment($comment) { my ($self,$comment) = @_; } sub getbookinfo() { my ($self) = @_; my $authors; $authors = $self->{'authors'}; return ($self->{'title'}, @$authors); } package main; use LWP::UserAgent; my %bookstores = ( 'borders', 'http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?code=%s&mediaType=Book&searchType=ISBNUPC&prodID=', 'amazon', 'http://www.amazon.com/exec/obidos/ISBN=%s', 'bn', 'http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=%s', ); my $ua; $ua = new LWP::UserAgent; $ua->agent("Flutterby.ComLibraryInfo/0.1 ".$ua->agent); open I, "isbn.3"; while () { chop; my $isbn = $_; my $bookstore; foreach $bookstore (keys %bookstores) { my ($p, $t); $p = eval $bookstore.'Parser->new()'; print "Trying $bookstore for $isbn\n"; my $url; $url = sprintf $bookstores{$bookstore}, $isbn; my $req = new HTTP::Request GET => $url; my $res = $ua->request($req); $t = $res->as_string; $p->parse($t); my ($title, @authors, $author); ($title, @authors) = $p->getbookinfo(); print "Title: $title\n"; print " authors @authors\n"; } }