# clust.awk -- to cluster the consecutive overlaping segments that # have the same end position and use the data which score is the # maximal among them to represent those overlaping segments. # Output has same format as the input file that are computed by HomoStRscan. # # command line: %awk -f clust.awk -v shift=5 input-file > output # searching and print results # # main { if ( NR == 1 ) { pos5 = $1 pos3 = $2 scr = $3 } else if ( ($2 < pos3-shift) || ($2 > pos3+shift) ){ print pos5, "", pos3, "", scr pos5 = $1 pos3 = $2 scr = $3 } else if ($3 > scr) { pos5 = $1 pos3 = $2 scr = $3 } } END { print pos5, "", pos3, "", scr }