#!/usr/local/bin/perl -w # # subt_back_find_noise_from_cel_v3.pl # a module for use in analysis of .CEL files # tdesantis 011108 # # separate subtraction for any number of sectors # # sample code line to be written in program which calls this module: # %celvalues_back_subtracted = subt_back_find_noise_from_cel(@array_to_be_passed_to_sub_back_mod); # array to be passed in: # $horz_div = $_[0]; # $vert_div = $_[1]; # $back_thresh = $_[2]; # reference_to_celvalues_hash = $_[3]; ###################### sub subt_back_find_noise_from_cel { local $total_number_of_background_cells_used_for_noise_calc = 0; local $background_value_calculated = 0; local $counter = 0; $horz_div = $_[0]; $vert_div = $_[1]; $back_thresh = $_[2]; $hashref = $_[3]; %celvalues = %$hashref; # dereference the referenced hash that was passed in $horz_span_per_sector = int (256 / $horz_div); # some divide unequally ... $vert_span_per_sector = int (256 / $vert_div); # $total_sectors = $horz_div * $vert_div; ### sector layout example for 4 x 4 ### # 0 1 2 3 # 4 5 6 7 # 8 9 10 11 # 12 13 14 15 ####################################### $sum_of_variations = 0; # this grows as noise cells are identified for ($sector=0; $sector<$total_sectors; $sector++) { # print "background subtraction for sector $sector of $total_sectors ... \t"; $horz_indent = $sector % $horz_div; # sectors in from left edge $vert_indent = int ($sector / $horz_div); # sectors down from top edge # print "horz_indent: $horz_indent , vert_indent: $vert_indent\n"; $top_left_cell_in_sector_x_coord = ($horz_indent * $horz_span_per_sector); # spans can be fractions so int is important $top_left_cell_in_sector_y_coord = ($vert_indent * $vert_span_per_sector); # print "top_left_cell_in_sector_x_coord: $top_left_cell_in_sector_x_coord\t"; # print "top_left_cell_in_sector_y_coord: $top_left_cell_in_sector_y_coord\t"; $bottom_right_cell_in_sector_x_coord = ($top_left_cell_in_sector_x_coord + $horz_span_per_sector); $bottom_right_cell_in_sector_y_coord = ($top_left_cell_in_sector_y_coord + $vert_span_per_sector); if ($horz_indent == $horz_div - 1) { # condition met on any right edge sectors $bottom_right_cell_in_sector_x_coord = 256; # overrides calculated value which could be off when 256 isn't easily divisible # print "bottom_right_cell_in_sector_x_coord: $bottom_right_cell_in_sector_x_coord\n"; } if ($vert_indent == $vert_div - 1) { # condition met on any bottom edge sectors $bottom_right_cell_in_sector_y_coord = 256; # overrides calculated value which could be off when 256 isn't easily divisible # print "bottom_right_cell_in_sector_y_coord: $bottom_right_cell_in_sector_y_coord\n"; } $background_value_calculated = 0; # switch will be set to 1 after background is calculated $#all_inten_for_sector = -1; # empties array # $#all_inten_for_sector_back_sub = -1; # for debugging FOR1: for ($x=$top_left_cell_in_sector_x_coord; $x<$bottom_right_cell_in_sector_x_coord; $x++) { FOR2: for ($y=$top_left_cell_in_sector_y_coord; $y<$bottom_right_cell_in_sector_y_coord; $y++) { # print "$x $y $celvalues{$x}{$y}\n"; if ($background_value_calculated == 0) { # we are gathering intensities if (exists $celvalues{$x}{$y}{mean}) { # when span reaches-off of chip, it won't exist such as 257,4 push (@all_inten_for_sector,$celvalues{$x}{$y}{mean}); $counter++; } } if ($background_value_calculated == 1) { # the entire FOR1 loop is being recycled # for the purpose of accessing each cell in the sector # we are now subtracting background from each intensity $celmean_back_subtracted{$x}{$y} = $celvalues{$x}{$y}{mean} - $ave_back_for_sector; if ( $celmean_back_subtracted{$x}{$y} < 0 ) { $celmean_back_subtracted{$x}{$y} = 0.01 ; # print STDERR "$x $y $celvalues{$x}{$y}{mean} minus $ave_back_for_sector equals $celmean_back_subtracted{$x}{$y}\n"; } # print STDERR "$x $y $celvalues{$x}{$y}{mean} minus $ave_back_for_sector equals $celmean_back_subtracted{$x}{$y}\n"; # if I make it zero, I may have to divide by zero later # next line only useful for debugging # push (@all_inten_for_sector_back_sub,$celvalues_back_subtracted{$x}{$y}); ### also, pick-up values in this sector for noise calculations if ($celvalues{$x}{$y}{mean} <= $thresh_of_mean_for_noise_calc) { $variation = $celvalues{$x}{$y}{stdv} / (sqrt $celvalues{$x}{$y}{npixels}); $sum_of_variations = $sum_of_variations + $variation; # print STDERR "$celvalues{$x}{$y}{mean} $celvalues{$x}{$y}{stdv} $celvalues{$x}{$y}{npixels} $variation\n"; $total_number_of_background_cells_used_for_noise_calc++; } ################# } } } goto ENDING if ($background_value_calculated == 1); # this condition is meet only when exiting the FOR1 loop for the second time # some call this "spaghetti coding" :< # print "@all_inten_for_sector\n"; @all_inten_for_sector = sort sortNumerical @all_inten_for_sector; # print "@all_inten_for_sector\n"; # print "$counter\n"; # sleep 2; $cells_in_sector = @all_inten_for_sector; # returns number of elements in the array $cut_index = int ($cells_in_sector * $back_thresh / 100); # print "cells_in_sector: $cells_in_sector cuttoff index for percentage of $back_thresh : $cut_index\n"; ### now obtain the background slice for averaging and subtraction @background_intensities_to_be_averaged = @all_inten_for_sector[0..$cut_index]; # print "@background_intensities_to_be_averaged\n"; ### remember the highest intensity of the background cells for noise calc... @background_intensities_to_be_averaged = sort sortNumerical @background_intensities_to_be_averaged; $thresh_of_mean_for_noise_calc = $background_intensities_to_be_averaged[$#background_intensities_to_be_averaged]; # last element of the array ### $ave_back_for_sector = find_average(@background_intensities_to_be_averaged); # print STDERR "ave_back_for_sector: $ave_back_for_sector max: $thresh_of_mean_for_noise_calc\n"; $background_value_calculated = 1; goto FOR1 if ($background_value_calculated == 1); ENDING: # print "@all_inten_for_sector_back_sub\n"; } ##do math as on page 474 of GeneChip Analysis Suite 3.3 User Guide my $noise = $sum_of_variations / $total_number_of_background_cells_used_for_noise_calc; # notice NOT "my" print STDERR "noise: $noise $sum_of_variations / $total_number_of_background_cells_used_for_noise_calc \n"; my $cel_mean_back_subtract_ref = \%celmean_back_subtracted; return ($cel_mean_back_subtract_ref, $noise); } # end sub ############## sub sortNumerical(){ # sub from "Mastering Perl" p229 $a <=> $b; } ############## sub find_average() { local $intensity; local $n = 0; local $t = 0; local $average; foreach $intensity (@_) { $n++; $t = $t + $intensity; } $average = $t / $n; return $average; } ############## 1;