#! /usr/bin/perl -w # this script shall be run regularly: # it queries the 'entrez' nucleotide-databese for a defined string (like 16S) # and appends the number of items to a text file, errors are sent per email to the 'maintener' # 2004-02-18 Felipe Wettstein use LWP::Simple; use strict; use warnings; use Mail::Mailer; # 'maintainers' email my $maintainer = 'maps@smpa.com'; # adress that is queried my $adress = 'http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Search&db=nucleotide&term='; # query string my $query_string = '((16s[Title] AND rRNA[Title]) OR (16s[Title] AND ribosomal[Title] AND RNA[Title]))'; # data are apended to this file, it might be simpler to use full paths - who knows in the end where the script is run # my $data = '/home/felipe/cgi-bin/entrez_daten.txt'; my $data = 'entrez_daten.txt'; # every whitespace in the query must be replaced with '+' $query_string =~s/\s/\+/g; # the whole website is retrieved and copied in $doc, then the number of items is parsed my $doc = get($adress.$query_string); # for offline tests: my $doc ="Items 1-20 of 134530"; # the line of interest contains # Items 1-20 of 134530 my $time = localtime(); if ($doc =~ /Items\s\d+\-\d+\s+of\s(\d+)\s*\<\/div/) { # append to file $data open (TEMP, ">>$data") || die("cannot open file"); print TEMP $time."\t$1\n"; close (TEMP); } else { # string not found # send mail my %headers = ( 'To' => $maintainer, 'Subject'=>'Cannot reach server' ); my $body="It seams as if today at ($time), no correct string could be retriefed from ($adress).\n"; my $mailer = Mail::Mailer->new('sendmail'); $mailer->open(\%headers); print $mailer $body; $mailer->close; # append to file $data open (TEMP, ">>$data") || die("cannot open file"); print TEMP $time."\terror\n"; close (TEMP); } exit 0;