#!/usr/bin/perl -w #use strict; #VARIABLES TAKEN FROM COMMAND LINE my $sequence; #sequence to be analyzed my $offset; #user supplied first coordinate, defaults to one my $motif_output_sort_mode; #0 for sort by coordinate, 1 for sort by motif chomp for @ARGV; ($sequence, $motif_output_sort_mode, $offset) = @ARGV; $offset = $offset || 1; $offset--; # AWR change to Upper Case for ALL characters $sequence=uc($sequence); #VARIABLES GENERATED INTERNALLY my $process_id = $$; my $motif_list_file_name = "$process_id" . "_" . "motif.txt"; my $wiggle_plot_file_name = "$process_id" . "_" . "wiggle.txt"; # Output PID to grab needed files from PHP print "$process_id"; $sequence =~ tr/A-Z//cd; #removes ALL non-uppercase letters from input sequence #Reference: Rhoads AR and Friedberg F. 1997. Sequence motifs for calmodulin recognition. FASEB J., 11:331-340. my %motifs = ( '1-5-8-14' => '[FILVW].{3}[FAILVW].{2}[FAILVW].{5}[FILVW]', 'BASIC_1-5-8-14' => '[RK][RK][RK][FILVW].{6}[FAILVW].{5}[FILVW]', '1-8-14' => '[FILVW].{6}[FAILVW].{5}[FILVW]', '1-14' => '[FILVW].{12}[FILVW]', '1-5-10' => '[FILVW].{3}[FAILVW].{4}[FILVW]', 'BASIC_1-5-10' => '[RK][RK][RK][FAILVW].{3}[FILV].{4}[FILVW]', '1-10' => '[FILVW].{8}[FILVW]', '1-16' => '[FILVW].{14}[FILVW]', '1-12' => '[FILVW].{10}[FILVW]', 'IQ' => '[FILV]Q.{3}[RK]G.{3}[RK].{2}[FILVWY]', 'IQ-LIKE' => '[FILV]Q.{3}[RK].{8}', 'IQ-2A' => '[IVL]Q.{3}R.{4}[VL][KR].{1}W', 'IQ-2B' => '[IL]Q.{2}C.{4}K.{1}R.{1}W', 'IQ-UNCONVENTIONAL' => '[IVL]Q.{3}R.{4}[RK].{2}[FILVWY]'); my %hydropho = ( A => 1.8, R => -4.5, N => -3.5, D => -3.5, C => 2.5, E => -3.5, Q => -3.5, G => -0.4, H => -3.2, I => 4.5, L => 3.8, K => -3.9, M => 1.9, F => 2.8, P => -1.6, S => -0.8, T => -0.7, W => -0.9, Y => -1.3, V => 4.2); my %charge = ( A => 0, R => 1, N => 0, D => -1, C => 0, E => -1, Q => 0, G => 0, H => 0, I => 0, L => 0, K => 1, M => 0, F => 0, P => 0, S => 0, T => 0, W => 0, Y => 0, V => 0); open(MOTIF,">$motif_list_file_name"); open(WIGGLE,">$wiggle_plot_file_name"); print MOTIF "Amino Acid: $sequence\n"; my $found_index = 1; my @collected_matches; my @found_motif_list; foreach my $named_motif (keys %motifs) { #iterate through all of the motifs in %motifs try to find them in SEQ my $motif = $motifs{$named_motif}; my $matches = FindMotif($sequence, $motif); my @matches = @$matches; foreach my $match (@matches) { push @collected_matches, { type => $named_motif, matched_seq => $match->{hit}, start => $match->{start}, end => $match->{end} }; $found_index++; } } foreach my $hit (@collected_matches) { #determine the coordinates of each motif found in the previous loop my $match = $hit->{matched_seq}; my $start = $hit->{start}; my $end = $hit->{end}; #my ($start,$end) = FindCoordinatesOfMatch($sequence, $match); push @found_motif_list, { type => $hit->{type}, matched_seq => $match, start => $start, end => $end }; } my %wiggle; my @init_array = (0) x length($sequence); foreach my $motif (keys %motifs) { @{$wiggle{$motif}} = @init_array; } @{$wiggle{total}} = @init_array; @{$wiggle{is_hit}} = @init_array; @{$wiggle{charge}} = @init_array; @{$wiggle{hydro}} = @init_array; my @wiggle = @init_array; my @seq_wiggle = split(//,$sequence); my @sorted_motifs; if ($motif_output_sort_mode == 0) { @sorted_motifs = sort {$a->{start} <=> $b->{start}} @found_motif_list; } elsif ($motif_output_sort_mode == 1) { @sorted_motifs = sort {$a->{type} cmp $b->{type} || $a->{start} <=> $b->{start}} @found_motif_list; } foreach my $hit (@sorted_motifs) { my %data = %$hit; my $matched_seq = $data{matched_seq}; my $type = $data{type}; my $start = $data{start}; my $end = $data{end}; my $display_start = $start + $offset; my $display_end = $end + $offset; for (my $i = $start; $i <= $end; $i++) { @{$wiggle{$type}}[$i - 1]++; @{$wiggle{total}}[$i - 1]++; } print MOTIF "$type\t$matched_seq\t$display_start-$display_end\n"; } my $index = 0; foreach my $AA (@seq_wiggle) { @{$wiggle{charge}}[$index] = $charge{$AA}; @{$wiggle{hydro}}[$index] = $hydropho{$AA}; $index++; } my $window_size = 10; my $count_thresh = 2 * $window_size; my $hydro_thresh = 2.5 * $window_size; my $charge_thresh = 1; my $window_start = 0; my $window_end = $window_size - 1; my $motif_window_count = 0; my $charge_window_count = 0; my $hydro_window_count = 0; for (my $i = $window_start; $i <= $window_end; $i++) { $motif_window_count += @{$wiggle{total}}[$i]; $charge_window_count += @{$wiggle{charge}}[$i]; $hydro_window_count += @{$wiggle{hydro}}[$i]; } if (IsHit($motif_window_count,$charge_window_count,$hydro_window_count)) { for (my $i = $window_start; $i <= $window_end; $i++) { @{$wiggle{is_hit}}[$i] = 1; } } $motif_window_count -= @{$wiggle{total}}[$window_start]; $charge_window_count -= @{$wiggle{charge}}[$window_start]; $hydro_window_count -= @{$wiggle{hydro}}[$window_start]; $window_start++; $window_end++; while ($window_end < length($sequence)) { $motif_window_count += @{$wiggle{total}}[$window_end]; $charge_window_count += @{$wiggle{charge}}[$window_end]; $hydro_window_count += @{$wiggle{hydro}}[$window_end]; if (IsHit($motif_window_count,$charge_window_count,$hydro_window_count)) { for (my $i = $window_start; $i <= $window_end; $i++) { @{$wiggle{is_hit}}[$i] = 1; } } $motif_window_count -= @{$wiggle{total}}[$window_start]; $charge_window_count -= @{$wiggle{charge}}[$window_start]; $hydro_window_count -= @{$wiggle{hydro}}[$window_start]; $window_start++; $window_end++; } my $wig_index = 0; print WIGGLE "coord\tamino acid\ttotal motif count\tcharge\thydro\tis_hit"; foreach my $motif (sort {$a cmp $b} keys %motifs) { print WIGGLE "\t$motif"; } print WIGGLE "\n"; foreach my $i (0...$#seq_wiggle) { my $display_index = $wig_index + $offset + 1; print WIGGLE "$display_index\t$seq_wiggle[$wig_index]\t"; print WIGGLE "@{$wiggle{total}}[$wig_index]\t@{$wiggle{charge}}[$wig_index]\t@{$wiggle{hydro}}[$wig_index]\t@{$wiggle{is_hit}}[$wig_index]"; foreach my $motif (sort {$a cmp $b} keys %motifs) { print WIGGLE "\t@{$wiggle{$motif}}[$wig_index]" } print WIGGLE "\n"; $wig_index++; } sub FindMotif { my ($seq, $motif) = @_; my @hits; while ($seq =~ /(?=($motif))/g) { push @hits, {hit => $1, start => $-[1] + 1, end => $+[1]}; } return \@hits; } sub FindCoordinatesOfMatch { my ($seq, $match) = @_; my ($start,$end); $seq =~ /$match/; return ($-[0],$+[0]); } sub IsHit { my ($count,$charge,$hydro) = @_; if ($count >= $count_thresh && $charge >= $charge_thresh && $hydro <= $hydro_thresh) { return 1; } else { return 0; } }