#!/usr/bin/perl -w #use strict; use Tk; use LWP::Simple; $GSBASE = "C:\\gs\\gs8.14\\bin\\gswin32.exe -dBATCH -dNOPAUSE -dFirstPage=2 -q -sDEVICE=pdfwrite -sOutputFile="; # ON UNICES : # $GSBASE = "gs -dBATCH -dNOPAUSE -dFirstPage=2 -q -sDEVICE=pdfwrite -sOutputFile="; $baseReq = "http://cognet.mit.edu/library/books/view?isbn="; $basePdf = "http://cognet.mit.edu/library/books/mitpress/"; $title = ""; # Will be the directory name @fileNames = (); # Names of the pdf files @contentsEntries = (); $BGC = '#DDDDFF'; $FGC = '#EE1111'; $ABGC = '#6666FF'; $AFGC = 'white'; $MW = MainWindow->new (-bg => $BGC, -fg => $FGC); $MW->Label(-text => 'Retrieve books from COGNET', -bg => $BGC, -fg => $FGC)->pack(-side => 'top'); $Top = $MW->Frame(-bg => $BGC,); $Bot = $MW->Frame(-bg => $BGC,); $infoline = $MW->Label(-textvariable => \$info, -bg => $BGC, -relief => 'ridge')->pack(-side => 'bottom', -fill => 'x'); ### $Top->Button(-text => 'Load list of ISBNs', -bg => $BGC, -fg => $FGC, -activebackground => $ABGC, -activeforeground => $AFGC, -command => \&open_file)->pack (-side=>'left'); $Top->Button(-text => 'Download Books', -bg => $BGC, -fg => $FGC, -activebackground => $ABGC, -activeforeground => $AFGC, -command => \&download_files)->pack (-side=>'left'); $Top->Button(-text => 'Merge PDFs', -bg => $BGC, -fg => $FGC, -activebackground => $ABGC, -activeforeground => $AFGC, -command => \&open_toc)->pack (-side=>'left'); $Top->pack(-side => 'top', -fill=>'x'); ### $Entries = $MW->Scrolled('Text', -bg => $BGC, -fg => $FGC, -height => '10', -width => '50', -scrollbars => 'e', # east )->pack(-side => 'top', -fill=>'x'); ### $Bot->Button(-text => 'Help', -bg => $BGC, -fg => $FGC, -activebackground => $ABGC, -activeforeground => $AFGC, -command => \&show_help)->pack (-side=>'left'); $Bot->Button(-text => 'Quit', -bg => $BGC, -fg => $FGC, -activebackground => $ABGC, -activeforeground => $AFGC, -command => sub{exit} )->pack (-side=>'right'); $Bot->Button(-text => 'Clear', -bg => $BGC, -fg => $FGC, -activebackground => $ABGC, -activeforeground => $AFGC, -command => sub{$Entries->delete("1.0","end"); $info = "";} )->pack (-side=>'right'); $Bot->pack(-side => 'bottom', -fill=>'x'); MainLoop; sub open_file { my $fn=$Top->getOpenFile(); $info = "Loading '$fn'..."; $Entries->delete("1.0","end"); if (!open(FIC,"$fn")) { $info = "Error: cannot open file '$fn'"; return; } $n=0; while () { $Entries->insert("end",$_); $n++; } close (FIC); $info = "'$fn' : $n lines read"; } sub open_toc { my ($fn, @tmp, $dir, @lines, $title, $cmd, $chap); $fn=$Top->getOpenFile(); @tmp = split (/[\\\/]/, $fn); pop (@tmp); $dir = join ("\/", @tmp); if (!open(FIC,"$fn")) { $info = "Error: cannot open file '$fn'"; return; } @lines = ; close (FIC); chop (@lines); $title = shift (@lines); shift (@lines); $cmd = $GSBASE . "\"$dir\/$title.pdf\""; foreach (@lines){ $chap = (split (/\s+/,$_))[0]; if (-e "$dir\/$chap" ){ $cmd .= " "; $cmd .= "\"$dir\/$chap\""; } } system ($cmd); } sub download_files{ @isbns=split("\n",$Entries->get("1.0","end")); foreach $isbn (@isbns){ &setTitleAndFilesNames (&makeRequest ($isbn)); $info = "Processing ISBN $isbn ($title)"; mkdir ($title); &mkTableOfContents (); &downloadPdfs ($isbn); } } sub makeRequest { my $reqStr = $baseReq . $_[0]; return get $reqStr; } sub mkTableOfContents { open (TC, ">$title/tableOfContents.txt"); print TC "$title\n\n"; foreach (@fileNames){ print TC "$_.pdf\t"; print TC shift (@contentsEntries); print TC "\n"; } close (TC); } sub setTitleAndFilesNames { # Read read stuff between
 and 
my (@lines); @lines = split (/\n/, $_[0]); @fileNames = (); @contentsEntries = (); foreach (@lines){ if ($_ =~ /(meta)\s+(description)/) { $_ =~ s/^.*(content=\")//; $_ =~ s/\".*$//; $_ =~ s/:/--/; $title = $_; } if ($_ =~ /&part=/){ $_ =~ s/^.*(&part=)//; push (@fileNames, $_); $fileNames[-1] =~ s/\".*$//; $_ =~ s/^.*\"\>//; $_ =~ s/\<\/a\>.*$//; push (@contentsEntries, $_); } } } sub downloadPdfs { my $reqStr; foreach (@fileNames){ $reqStr = "$basePdf/$_[0]/cache/$_.pdf"; getstore ($reqStr, "$title/$_.pdf"); } } sub show_help { $Entries->delete("1.0","end"); $Entries->insert("end",<<"--"); This script allows you to download books from MIT COGNET. If you want to use the pdf merging functionality, you have to adjust the variable \$GSBASE. Here is what the buttons do. * Load list of ISBNs Load new-line separated text file with the ISBNs of the books to be downloaded. Alternatively, you can type the ISBNs by hand. * Download books Download the books corresponding to the ISBNs. * Merge PDFs Select a `table of contents' file (you'll find it in the directory of the download book) and merge the corresponding PDF files into one big file. Note that the result file will be printable (unlike the original cognet files). * Clear Clear the text window. * Quit Quit the program. (c) ansgar.endress\@m4x.org -- }