RFLP planner, source

#!/usr/bin/perl -w

# DESCRIPTION
# this program finds restriction enzymes that cut different homologue DNA sequences to generate a polymorph unique pattern of each sequence.

# HOW IT WORKS
     # align all sequences (clustal)
     # trimm all sequences to length of shortest sequence (corresponding alignment)
     # search for mutations
     # search for restriction enzymes with recognition site in mutations -> 'interesting enzymes'
     # digest all sequences with 'interesting enzymes'
     # draw image of simulated electrophoresis gel:
         # for every sequence one gel, for every restriction enzyme one lane, molecular weight marker is added

# AUTHOR AND FEEDBACK
# felipe wettstein
# felipew at users.sourceforge.net 

# FEATURES
# automatic reverse complement of reversed sequences
# sequences can be trimmed to the same lenght or not, but for the gelimage the original sequences are used
# marker bands can be labelled (markmarker)
# nice printout: sheet with fragment sizes of each sequence, file with alignment, one image for every sequence
# option to label images (with sequence name)
# option to define manually restriction enzymes with whom the sequence should be cut (EnzymeNAME or restriction^site)
# chose between lots of different input or output formats
     # i.e. abuse the program to cut primer sequences of a bunch of sequences:
         # 1. in one sequence cut primer sequence manually
         # 2. import all seq (fasta)
         # 3. choose: trimm = 1, aligned_format = 'fasta'
# automatic remove of bad aligning sequences
# chose range of searched restriction enzymes (4, 6, .. -bp cutter)

# KEYWORDS
# RFLP, restriction fragment lenght polymorphism, find mutations, biodiversity, simulation of gel electrophoresis, homologue sequences, PCR, restriction enzyme

# BUGS
# das 'trimmen' des alignments ist nicht sauber. die sequenzen werden nicht nur auf die kŸrzeste zurŸckgeschnitten,
# sondern auch wenn die sequenzen nicht gleich anfangen, bis zur base halt die allen gleich ist. sauber wŠre es ein
# 'und' zu machen auf die existenz eines buchstabens.

# LIMITATION
# first sequence should align well to the other, it is not removed

# TRY THIS
# animate sequence of images (needs X-Windows and ImageMagick)
     # $> animate -delay 25 imagepath/*.png
# make an anim gif with 'convert' (ImageMagick)
     # $> convert -delay 40 progis/perl/bioperl/phylo/*.png testo.gif


# HISTORY
# 2002-10-24 fw
     # align correct reverse complementary sequences
# 2002-10-28 fw
     # works for sequences with different length
     # site of interest is finally selected correct
     # nice printout 
# 2002-10-29 fw
     # try to print some gel images (does not work)
# 2002-11-21 fw
     # only one representative enzyme is chosen out of a bunch of isoshizoenzymes, and it works!
# 2002-12-03 fw
     # redundances are removed out of the list of interesting enzymes
     # draw an image for every sequence
# 2003-07-24 fw
     # clean up code, remarks, ..
# 2003-07-29 fw
     # clean code, a lot of main program added to subs, trimming of alignment works, removing of bad sequences works
     # make extra file with fragment sizes
# 2003-12-3 fw
     # possible to draw the gel image for sequences digested with desired enzymes, independent from polymorphisms
     # clean code, make it faster (more list-refs)

# TODO
# add option to mark primers on gel (red / blue dyes)
# add option to remove primers, primers could be added with an extra file, programm could be used to remove primers:
         # first do alignement, add primers to alignement, make one strand of primers (fw------back), trimm sequences
         # mark primers in gel image, decide for fragment size calculation with or without primers
         # the option should be to choose betwen remove primers, trim ends to shortest seq, or do nothing
# make some more variables local ($sequence, alignment, $enzyme, $enz, ..)

# MANUALS OF USED MODULES
# man Bio::Tools::RestrictionEnzyme
# man Bio::Root::IO
# man Bio::Align::AlignI
# man Bio::AlignIO
# man Bio::Seq
# man Bio::Tools::Run::Alignment::Clustalw


use  strict;

# alignment
use  Bio::SeqIO;
use  Bio::AlignIO;
use  Bio::Align::AlignI;
use  Bio::Tools::Run::Alignment::Clustalw;

# restriction
use  Bio::Tools::RestrictionEnzyme;

# gel drawing
use  GD;
use  Bio::Tools::Gel;

# colored print
use  Term::ANSIColor;  # only for one single red nucleotide:)


###########################################################################
###################################################### IMPORTANT PARAMETERS

###########################################################################

# path to directory containing the sequences file, images will be safed in this folder
#my @dir = qw (Users felipe Documents forschung sequenzen diox);
my  @dir  =  qw (Users felipe Desktop);
my  $separator  =  '/';  # for windoofs choose '\'

# filename of unaligned sequences
my  $filename  =  'sample.txt';
my  $unaligned_format  =  'fasta';

# output name of fragment text file
my  $output=  "fragments.txt";

# desired enzymes
my  @all_enz_interest;  # all enzymes of interst of a sequence

# if @all_enz_interest is defined now, THE PROGRAM WILL NOT SEARCH FOR POLYMORPHISM
# comment if you wish to >SEARCH FOR< enzymes that produce polymorphisms
#@all_enz_interest = qw (Bsh1236I BsuRI  MboI TaqI  TruI);

# it is also possible to add an interesting cutting site (will be added, but only if @all_enz_interest)
my  @interest_site  =  qw (T^TAA);  

# choose if output and calculations should be based on trimmed sequences (same size as shortest sequence)
my  $trimm  =1;  # yes = 1, no trimm = 0

# restriction enzyme category (size recognition site, usually 4 or 6)
my  $enz_range=4;

# label image, the sequence name is written on each image
my  $label_image  =  1;  # label = 1, nameless = 0

###########################################################################
#################################################### UNIMPORTANT PARAMETERS

###########################################################################
# filename of aligned sequences
my  $aligned_format  =  'clustalw';  # alignment format for output and safed file
# alternative formats  (man Bio::AlignIO)
#               fasta       FASTA format
#               pfam        pfam format
#               selex       selex (hmmer) format
#               clustalw    clustalw (.aln) format
#               msf         msf (GCG) format
#               nexus       Swofford et al NEXUS format
#               pfam        Pfam sequence alignment format
#               phylip      Felsenstein's PHYLIP format
my  $alignfilename  =  $filename;  # filename for aligned (and chopped) sequences
     $alignfilename  =~  s/\.txt//;
     $alignfilename  =  $alignfilename."_align_$aligned_format.txt";

# alignment parameters
my  @params  =  ('ktuple'  =>  3,  'matrix'  =>  'BLOSUM',  'quiet'  =>  1);
# see clustalw manual for more information

# supposed minimal similarity [%] of homologue sequences, important to detect reverse complement
# sequences, a value below 80 is bad
my  $similarity  =  70;

# default settings for molecular part of gel
# molecular weight marker
my  @marker  =  (20,40,60,80,100,120,140,160,180,200,220,240,260,280,300,400,500,1000);

# important bands of the molecular weight marker (labeled and marked in different color)
my  @markmarker  =(100,200,300,400,500);

# dilate rate: how far the gel runs
my  $dilate  =  80;

# default settings for image view, colornames from colors defind below
my  $bgcolor  =    'black';  # gel color
my  $bandcolor  =  'grey';  # color of bands
my  $markercolor  =  'white';  # color of markmarker
my  $height  =     400;  # height of gel
my  $lane_width  =     30;  # width of each lane

# image format of gelimages
my  $image_format  =  "png";  # options: png, jpeg

# define colors (netscape colors and names)
my  %color_defs  =  (
     'black'  =>   [   0,   0  ,  0],
     'grey'  =>    [144,144,144],
     'white'  =>   [255,255,255]);

# alternative colors
#   'black' =>  [  0,  0 , 0],
#   'silver' => [192,192,192],
#   'grey' =>   [144,144,144],
#   'red' =>    [255,  0,  0],
#   'white' =>  [255,255,255]

###########################################################################
########################################################## GLOBAL VARIABLES

###########################################################################

# if similarity is set < 50 set to 85
int  ($similarity  /  50)  or  $similarity  =  85;  

my  ($aln,$factory,$cons,$length,@not_equal);
     # $aln:         alignment object
     # $factory:     settings for clustal alignment
     # $cons:        match line (asteriks where all nucleotides equal, space where unequal)
     # $length:      length of alignment, ie consensus sting
     # @not_equal:   spots where nucleotides differ

# path to sequence files
my  $unaligned_file  =  $separator.join($separator,@dir).$separator.$filename;
my  $aligned_file  =  $separator.join($separator,@dir).$separator.$alignfilename;

# push sequences in Bio::Seq array of unaligned sequences. sequences in fasta format
my  (@seq_array,  $sequence);
my  $stream  =  Bio::SeqIO->new(  '-file'=>  $unaligned_file  ,'-format'  =>  $unaligned_format);
while  ($sequence  =  $stream->next_seq()  )  {
     push  (@seq_array,  $sequence);
}

# make a copy of sequence array, used when drawing the gel
my  @seq_array_copy  =  @seq_array;

###########################################################################
################################################################ ALIGNEMENT

###########################################################################

# do not proceed if clustal is not present
# wieso das nicht mehr geht ist mir auch nicht klar ######!!!
#Bio::Tools::Run::Alignment::Clustalw->exists_clustal() || die "Clustalw program is not found. Was it installed?\n";

print  "align ";
# align: pass reference, instead of values. @seq_array is an array of Bio::Seq objects. \@seq-array is the reference
# get number of sequences involved, percent identity of alignement
my  ($number,$ident)  =  alignment  (\@seq_array);  # number of sequences and percent identity of alignment
printf  "- percent identity: %g\n",$ident;

# it is supposed that reverse complement sequences, decrease the alignment similarity dramatical:
     # compare identity to supposed sequences identity
if  ($ident<$similarity)  {

     print  "some sequences might be reverse complement.\n";
     &find_bad_seq  ();  # search for bad aligned sequences, and reverse complement or kick out

     #realign after removing bad sequence
     print  "align ";
     @seq_array  =  $aln->each_seq();  # get sequence array from modified alignment
     ($number,$ident)  =  alignment  (\@seq_array);  # number of sequences and percent identity of alignment
     printf  "- percent identity: %g\n",$ident;
}

# $cons, a global variable, is a string where asteriks indicate nucleotides present in all sequences
# = matchline, [*] indicate match, [whitespace] indicate no match
$cons=$aln->match_line();

# length of consensus string
$length=length($cons);


# reduce alignment to consensus (remove leading and tailing gaps)
if  ($trimm)  {
     trimm_alignment  ();
}
# put all sequences back in the original @seq_array to take care of direction changes and dots inserted
@seq_array  =  $aln->each_seq();  # get sequence array from modified alignment

# print alignement
print  "\n";

# alignment output to screen
# my $strout = Bio::AlignIO->newFh('-format' => $aligned_format);
# print $strout $aln;

# alignment output to disk
my  $stroutfile  =  Bio::AlignIO->newFh(-file  =>  ">$aligned_file",  '-format'  =>  $aligned_format);
print  $stroutfile  $aln;

`open $aligned_file`;


###########################################################################
############################################################### RESTRICTION

###########################################################################

# a random enzyme object, used to get list of available enzymes
my  $enzyme  =  Bio::Tools::RestrictionEnzyme->new(-name=>'EcoRI');  # enzyme object, is later just an enzyme in a list

# find isoshizomers, 
my  %isoshizo;  # hash: key= restriction site, value= space separated enzyme names
my  $ref_enzyme_list  =  find_isoshizo  ();  # list of enzyme objects, representative for a group of isoshizomer

# if the interesting enzymes have not been defined in the beginning of program
# they will searched now
if  (!  @all_enz_interest)  {
     # find spots of unequal nucleotides, (= mutations)
     find_spots  ();
     
     # make list of available enzymes with recognition site in $enz_range-> redundant list of enzymes of interest
     # while checking each site of difference, the list of fitting enzymes is printed
     # remove redundancy in enzyme list
     printheader  ();  # print header for enzyme output
     @all_enz_interest  =  remove_redundance  (check_all_sequences  ());
     
     print  "interesting enzymes: ".join  (" ",@all_enz_interest)."\n\n";
     # make enzyme objects of enzyme names
     foreach  (@all_enz_interest)  {
         $_  =  Bio::Tools::RestrictionEnzyme->new(-name=>$_);
     }
     
}  else  {
     # first: add enzymes defined through their recognition site
     add_enzyme();
     
     # remove duplicate names
     my  %redundant  =  map  {$_  =>  1}  @all_enz_interest;
     @all_enz_interest  =  keys  %redundant;
     
     # make enzyme objects of enzyme names, if present in list
     make_object  ();
     
     # make a custom enzyme, and add to enzymes of interest
     make_custom_enzyme  ();

     # print interesting enzymes
     print  "interesting enzymes:\t";
     foreach  (@all_enz_interest)  {
         print  $_->name()." ";
     }
     
     print  "\n\n";
}  # end else


###########################################################################
############################################################### GEL DRAWING

###########################################################################

# for every sequence one image (seq_name.png), each image with one lane for every enzyme

# globale variable containing lines of the written text
my  @text_lines  =  time()."\n\nENZYME\t\tFRAGMENTS\n-------------------------";  # text file will be holding information about fragments generated with each enzyme on each sequence

# make a template gel, with loading slots, enzyme names and marker lanes
# it is a GD Png Data Format Image
my  $template  =  make_template();

print  "safed images:";

# take one sequence after another (the origignal sequences)
foreach  $sequence  (@seq_array_copy)  {

     # read the template
     my  $gelimage=  GD::Image->newFromPngData($template);
     
     #initialize variables
     my  $lane  =  1;  #lane counter, 1 = first lane = first enzyme
     my  $sequence_id  =  $sequence->id;
     
     push  (@text_lines,  "\n$sequence_id\n");

     foreach  $enzyme  (@all_enz_interest)  {  # one enzyme after another

         my  @bands  =  $enzyme->cut_seq($sequence);
         my  $line  ='';
         
         foreach(@bands)  {
             $line  =  $line."\t".length  $_;  
         }

         push  (@text_lines,  $enzyme->name."\t$line\n");

         # draw fragments of ($sequence cut with $enzyme) run (migrated with $dilate) in $lane, with $bandcolor in $gelimage
         # ($lane,$color,@bands)
         draw_bands  (\$gelimage,$lane,$color_defs{$bandcolor},fragments_on_gel(\@bands));

         $lane++;  # next lane
     }

     # safe image (and label it if desired)
     safe_image  (\$gelimage,$sequence_id,$markercolor);
}

safe_textfile  (\@text_lines);

print  "\n";
exit;




###########################################################################
###################################################################### SUBS

###########################################################################

# subs for alignment

# clustal alignment
sub  alignment  {
     
     my  $seq_array_ref  =  shift  @_;  # read sequence array reference
     $factory  =  Bio::Tools::Run::Alignment::Clustalw->new(@params);  # construct
     
     # do alignment
     $aln  =  $factory->align($seq_array_ref);  # actual alignement, all sequences

     return  ($aln->no_sequences,$aln->percentage_identity);  # number of sequences and percent identity of alignment
}

# find bad sequences, reverse complement or remove them from alignment
sub  find_bad_seq  {

     my  $sequence;  # a sequence object of alignment
     my  $aln2;  # temporary alignment
     my  $i;  # counter
     
     for  ($i=2;  $i<=$number;  $i++)  {  # compare first sequence to the others
     
         $aln2  =  $aln->select_noncont(1,  $i);  # new temporary alignment

         if  (($aln2->percentage_identity)  <  $similarity)  {
         
             # if alignment between first and other sequence is bad, complement the other
             $sequence  =  $aln->get_seq_by_pos($i);
             $aln->remove_seq($sequence);
             
             $sequence=$sequence->revcom();  # reverse complement wrong sequence
             # da gibt es wohl einen bioperl fehler im modul:
             # Use of uninitialized value in multiplication (*) at /Library/Perl/Bio/LocatableSeq.pm line 393.
             # - melden !!!!!!!
             $aln  =  $factory->profile_align($aln,$sequence);  

             # supposing that the new sequences always are added to the end, they should be number = $number
             $aln2  =  $aln->select_noncont(1,$number);  # new temporary alignment, might be a problem that first seq also has an id (is compared to itself)
             $ident=  $aln2->percentage_identity;  # identity of alignment between first and other seq

             if  ($ident  <  $similarity)  {
                 # sequence does not seem to align well in any direction, it will be removed
                 $aln->remove_seq($aln->get_seq_by_pos($number));
                 print  "\tthe sequence '".$sequence->id()."' is too diffenrent from the others, it was removed from alignement.\n";
             }  else  {
                 print  "\tthe sequence '".$sequence->id."' was reversed.\n";
             }
         }
     }
}


# reduce alignment to consensus (remove leading and tailing gaps)
sub  trimm_alignment  {

     my  ($start,$end)  =  ($cons,$cons);  # start and endpoint of common sequence in alignment
     $start  =~  s/^\s*//g;
     $start  =  $length  -  length($start)+1;
     $end  =~  s/\s*$//g;
     $end  =  length($end);

     # cut not interesting starting and leading gaps from alignment
     $aln  =  $aln->slice($start,  $end);

     # calculate new matchline (consensuns string)
     $cons=$aln->match_line();

     # length of consensus string
     $length=length($cons);
}

###########################################################################
# subs for restriction enzymes

# find isoshizomers, store them in a hash: key= restriction site, value= space separated enz names, return a list of representative isoshizomer
sub  find_isoshizo  {

     my  ($temp_site,  $temp_name,@temp_enz_list);  #temporary variables: enzyme- site and -name, enzyme_list
     my  @enzyme_list  =  $enzyme->available_list($enz_range);  # list of all described enzymes in '$enzyme_range' (4, 6, .. bp-cutter)

     foreach  $enzyme  (@enzyme_list)  {
     $enzyme  =  Bio::Tools::RestrictionEnzyme->new(-NAME=>$enzyme);  # enzyme name is converted to enzyme object
         $temp_site  =  $enzyme->site();
         $temp_name  =  $enzyme->name;
         if  ($isoshizo{$temp_site})  {  # restriction site known: add alternative enzyme name
             $isoshizo{$temp_site}  =  $isoshizo{$temp_site}." ".$temp_name;
         }  else  {  #restriction site unknown: add to hash
             $isoshizo{$temp_site}  =  $temp_name;
             # because i do not know how to send objects around from variable to variable!!!!!!!!!!!!!!:
             # make an array of enzymenames
             # each site representet only once, enzyme name = first name of value of %isoshizo
             push  (@temp_enz_list,$temp_name);
         }
     }
     # convert @temp_enz_list in list of enzyme objects
     for  (my  $i=0;  $i  <  @temp_enz_list;  $i++)  {
         $temp_enz_list[$i]  =  Bio::Tools::RestrictionEnzyme->new(-NAME=>$temp_enz_list[$i]);
     }
     \@temp_enz_list;
}

# find spots were sequences are not equal (mutations), store in @not_equal (global variable)
sub  find_spots  {

     my  $temp;
     do  {
         $cons=~s/^\*+//;
         $temp=length($cons);
         push  (@not_equal,  $length-$temp+1);
         $cons=~s/^\s//;
     }  until  ($cons  eq  '');
     
     # last number is not a gap, is end of file: pop it!
     pop  (@not_equal);
     print  "\@not_equal:\t".join('.',  @not_equal);
}

# choose spot around the mutated nucleotide, depending on size of restrictioin recognition site
sub  chose_hotsite  {

     my  ($i,$spot)  =  @_;
     my  ($start,$stop);  #$start: where the subsequence starts, begine of site of interest, $stop: where the site of interest stops
     my  ($site,$sitea,$siteb,  $spotn);  # $sitea, $siteb: left, right of spot

     # IS THE FOLLOWING LINE NEEDED?
     if  ($spot<2){$spot=2;}
     # extract dna sequence which is interesting for restriction enzymes
     # dots (deletions) are taken into account

     $start  =  $spot-$enz_range+1;
     if  ($start<1)  {$start=1;}

     $stop  =  $spot+$enz_range-1;
     if  ($stop>$length)  {$stop=$length;}

     do  {
         # left
         $sitea  =  $seq_array[$i]->subseq($start,$spot-1);
         $start--;
         if  ($start<1)  {$start=1;}
     }  until  ((($sitea=~tr/[acgtnACGTN]//)==($enz_range-1))||($start==1));
         
     do  {
         #right from spot
         $siteb  =  $seq_array[$i]->subseq($spot+1,$stop);
         $stop++;
         if  ($stop>$length)  {$stop=$length;}
     }  until  ((($siteb=~tr/[acgtnACGTN]//)==($enz_range-1))||($stop==$length));

     # the spot can be a '.' or a nucleotide,
     # in case of a '.' it will be removed
     $spotn   =  $seq_array[$i]->subseq($spot,$spot);
     
     # remove dots
     $sitea=~s/\.//g;
     $spotn=~s/\.//g;
     $siteb=~s/\.//g;
     
     return  ($sitea,$spotn,$siteb);
}

# print output header
sub  printheader  {

     print  "\n\nlooking for enzymes...\n";
     print  "range of interest: $enz_range -bp cutters\n";
     print  "\n   spot\t(SI";
     print  color  'red bold';
     print  "T";
     print  color  'reset';
     print  "E):\t\tEnzyME (CLEA^VAGE)\n";
     print  "-----------------------------------------------------";
}

# check all sequences for restriction enzymes that cut in mutation, output results
sub  check_all_sequences  {

     my  ($site,$sitea,$siteb);  # $sitea, $siteb: left, right of spot
     #$site: DNA sequence that is searched for restriction enzyme recognition site
     
     my  ($spot,$spotn);  #$spot: where they are not equal, $spotn: nucleotide in spot
     
     my  @all_enz_interest;  # all enzymes that cut in $site

     for  (my  $i=0;$i<$number;$i++)  {

         # print sequence name
         print  "\n".  $seq_array[$i]->display_id()  ."\n";
         
         # check all spots of mutation in this particular sequence
         foreach  $spot  (@not_equal)  {
         
             # select whole site around the hotspot
             ($sitea,$spotn,$siteb)  =  chose_hotsite($i,$spot);
             $site=$sitea.$spotn.$siteb;
         
             # look what enzymes cut this site (=interesting enzymes)
             my  $ref_enz_interest  =  find_enzymes  ($site);
         
             # print intersting enzymes
             if  (@$ref_enz_interest)  {
                 push  (@all_enz_interest,  @$ref_enz_interest);  #collect all used enzymes in list
                 printlist  ($spot,  $sitea,$spotn,$siteb,$ref_enz_interest);
             }
         }
         print  "\n";
     }
     
     # return reference to @all_enz_interest
     \@all_enz_interest;
}

# find enzymes that cut in defined sequence
sub  find_enzymes  {

     my  ($site)  =  shift  @_;
     my  @enz_interest;    # list of enzymes that cut site of interest
     my  $string;
     my  $i=0;

     foreach  $enzyme  (@$ref_enzyme_list)  {
         $string=  $enzyme->string();
         if  ($site=~/$string/ig)  {
             push  (@enz_interest,$enzyme);
         }
         $i++;
     }
     \@enz_interest;
}

# print list of fitting enzymes
sub  printlist  {

     my  ($spot,  $sitea,$spotn,$siteb,$ref_enz_interest)=@_;
     print  " in $spot\t($sitea";
     print  color  'bold red';
     if  ($spotn)  {print  $spotn;}
     else  {print  ".";}
     print  color  'reset';
     print  "$siteb):\t";
     # print enzymename of isoshizomers and (-site)
     foreach  $enzyme  (@$ref_enz_interest)  {
         $enzyme  =  $enzyme->site();
         print  "$isoshizo{$enzyme} ($enzyme)\ ";
     }
     print  "\n";
     return
}

# remove redundance in list
# might be done faster, what i do not like is the conversion objects to realname enzymes
# but i do not manage to return objects
sub  remove_redundance  {

     my  $ref_all_enz_interest  =  shift  @_;
     # put object list in hash, key= enzyme object, value= enzyme name
     # redundant enzymes are eliminated in hash
     my  %redundant  =  map  {$_  =>  $_->name()}  @$ref_all_enz_interest;
     return  values  %redundant;
}

# add enzymes defined through their recognition site, to list of interesting enzymes, 
sub  add_enzyme  {
     my  $i  =  @interest_site;  # counter
     print  "added:\t\t";
     while  ($i)  {
         my  $enz  =  shift  @interest_site;
         if  (  $isoshizo  {$enz}  )  {  # if this site is known, choose enzyme from list
             my  $first  =  $isoshizo  {$enz};
             $first  =~  s/^(\w+)\s.*$/$1/m;
            push (@all_enz_interest, $first);
             print  "$first ($enz) ";
         }  else  {
             # return unknown restriction site to array, will be added later as new enzyme
             push  (@interest_site,  $enz);
         }  # end else
         $i--;
     }  # end while $i
     print  "\n";
}

# make enzyme objects of enzyme names, if present in list
sub  make_object  {
     my  $i  =  @all_enz_interest;  # counter
     print  "unknown enzyme:\t";
     while  ($i)  {
         my  $enz  =  shift  @all_enz_interest;
         if  ($enzyme->is_available($enz)  )  {
             push  (@all_enz_interest,  Bio::Tools::RestrictionEnzyme->new(-name=>$enz));
         }  else  {  print  $enz;  }
         $i--;
     }  # end while
     print  "\n";
}

sub  make_custom_enzyme  {
     my  $i  =  @interest_site;  # counter
     while  ($i)  {
         my  $site  =  shift  @interest_site;
         my  $enz_name  =  "CE".$i;
         push  (@all_enz_interest,  Bio::Tools::RestrictionEnzyme->new(-name=>$enz_name."--".$site,  -make  =>'custom'));
         # add line for isoshizomers to prevent error in output
         $isoshizo{$site}  =  "CE".$i;
         print  "unknown site:\t$site,\tcustom enzyme:\t$enz_name ($site)\n";
         $i--;
     }
}

###########################################################################
#subs for geldrawing

# make a gel-image-template
sub  make_template  {
     my  $lanes  =  @all_enz_interest  +1;  # number of lanes
     my  $width=  ($lanes+2)*($lane_width+3);  # width of gel
     my  $template=  new  GD::Image($width,$height);
     
     # define background (background = first color allocated)
     my  ($r,  $g,  $b)  =  map  {  ${$color_defs{$bgcolor}}[$_]  }  (0..2);  # extract backgroundcolor
     $template->colorAllocate($r,  $g,  $b);
     
     # load other colors (dani)
     foreach  my  $color  (keys  %color_defs)  {
         ($r,  $g,  $b)  =  map  {  ${$color_defs{$color}}[$_]  }  (0..2);
         $color_defs{$color}  =  $template->colorAllocate($r,  $g,  $b);
     }
     
     # load marker: run fragments @marker, draw bands on $gelimage
     draw_bands  (\$template,$lanes,$color_defs{$bandcolor},fragments_on_gel(\@marker));
     
     #label extra defined bands of marker ($markmarker)
     markmarker  (\$template,$lanes,$color_defs{$markercolor},@markmarker);
     
     # label lanes
     my  $lane  =1;
     foreach  (@all_enz_interest)  {
         label  (\$template,$lane,$_->name,$color_defs{$bandcolor});
         $lane++;
     }
     
     # convert image template in png format
     $template  =  $template->$image_format;
     
     $template;
}

# run fragments on gel
sub  fragments_on_gel  {

     my  $ref_fragments  =  shift  @_;
     my  $gel  =  Bio::Tools::Gel->new(-seq=>$ref_fragments,-dilate=>$dilate);
     my  %bands  =  $gel->bands;
     \%bands;  # return ref to hash (seq_id => distance)
}

# draw bands on gel
sub  draw_bands  {

     my  ($ref_gelimage,$lane,$color,$hashref_bands)  =  @_;

     my  $lane_left  =  $lane*(3+$lane_width);
     my  $lane_right  =  $lane_left+$lane_width;
     
     # draw loading hole
     $$ref_gelimage->rectangle($lane_left,5,$lane_right,8,$color);
     
     # draw bands
     foreach  my  $band  (values  %$hashref_bands)  {
         $band  =  int($band);
         $$ref_gelimage->line($lane_left,$band+8,$lane_right,$band+8,$color);
     }
}

# label lanes with corresponding enzyme name
sub  label  {

     my  ($ref_gelimage,$lane,$enz,$color)  =  @_;
     my  $lane_left  =  $lane*(3+$lane_width);
     $$ref_gelimage->string(gdTinyFont,$lane_left+2,12,$enz,$color);
     # there are font sizes: gdGiantFont, gdLargeFont, gdMediumBoldFont, gdSmallFont, gdTinyFont
}

# mark some special bands of the marker and labelled
sub  markmarker  {

     my  ($ref_gelimage,$lane,$color,@cuts)  =  @_;

     my  $gel  =  Bio::Tools::Gel->new(-seq=>\@cuts,-dilate=>$dilate);
     my  %bands  =  $gel->bands;
     foreach  my  $band  (keys  %bands){
     }
     
     my  $lane_left  =  $lane*(3+$lane_width);
     my  $lane_right  =  $lane_left+$lane_width;
     
     # make bands bold (ad a pixel in running direction)
     foreach  my  $band  (keys  %bands){
         $band  =  int($band);
         # draw bold band
         $$ref_gelimage->rectangle($lane_left,$bands{$band}+8,$lane_right,$bands{$band}+9,$color);
         # label bold band
         $$ref_gelimage->string(gdTinyFont,$lane_left+$lane_width+4,$bands{$band}+4,$band,$color);
         }
}

# safe image and open it (mac os x only)
sub  safe_image  {

     my  ($ref_gelimage,$name,$color)  =  @_;

     # label image if desired
     if  ($label_image)  {
         $$ref_gelimage->string(gdTinyFont,5,$height-10,$name,$color_defs{$color});
     }

     # safe file as PNG
     my  $image  =  $$ref_gelimage->$image_format;
     
     #write data in filehandle BLA, name = $name
     $name=  $name.".$image_format";
     open  (BLA,  ">$name")  ||  die  "Cannot safe $name: $!";
         binmode  BLA;
         print  BLA  $image;
     close  BLA;
     
     print  "\t$name";
     `open '$name'`;  # Mac OS X only, I think
}

# safe textfile with all intersting enzymes, and corresponding fragment sizes of each sequence
# endnote: list of isoshizomers of involfed enzymes
sub  safe_textfile  {

     my  $ref_lines  =  shift  ;
     push  (@$ref_lines,"\nSITE\t\tISOSHIZOMER\n---------------------------\n");
     
     foreach  (@all_enz_interest)  {
         push  (@$ref_lines,$_->site."\t\t".$isoshizo{$_->site()}."\n");
     }

     open  (BLA,  ">$output")  ||  die  "Cannot safe $output: $!";
         print  BLA  @$ref_lines;
     close  BLA;

     `open $output`;  #  Mac OS X only, I thought (different ;-)
}