src/algo/blast/core/blast_stat.c File Reference


Detailed Description

Functions to calculate BLAST probabilities etc.

Detailed Contents:

Definition in file blast_stat.c.

#include <algo/blast/core/blast_stat.h>
#include <algo/blast/core/ncbi_math.h>
#include "blast_psi_priv.h"

Include dependency graph for blast_stat.c:

Go to the source code of this file.

Classes

struct  MatrixInfo
 Used to temporarily store matrix values for retrieval. More...
struct  BLAST_LetterProb
 Records probability of letter appearing in sequence. More...
struct  Blast_ResComp
 Intermediate structure to store the composition of a sequence. More...
struct  SRombergCbackArgs
 Internal data structure used by Romberg integration callbacks. More...

Defines

#define BLAST_SCORE_RANGE_MAX   (BLAST_SCORE_MAX - BLAST_SCORE_MIN)
 maximum allowed range of BLAST scores.
#define BLAST_KARLIN_K_SUMLIMIT_DEFAULT   0.0001
 K_SUMLIMIT_DEFAULT == sumlimit used in BlastKarlinLHtoK().
#define BLAST_KARLIN_LAMBDA_ACCURACY_DEFAULT   (1.e-5)
 LAMBDA_ACCURACY_DEFAULT == accuracy to which Lambda should be calc'd.
#define BLAST_KARLIN_LAMBDA_ITER_DEFAULT   17
 LAMBDA_ITER_DEFAULT == no.
#define BLAST_KARLIN_LAMBDA0_DEFAULT   0.5
 Initial guess for the value of Lambda in BlastKarlinLambdaNR.
#define BLAST_KARLIN_K_ITER_MAX   100
 upper limit on iterations for BlastKarlinLHtoK
#define BLAST_NUM_STAT_VALUES   8
 Number of statistical parameters in each row of the precomputed tables.
#define BLOSUM45_VALUES_MAX   14
 Number of different combinations supported for BLOSUM45.
#define BLOSUM50_VALUES_MAX   16
 Number of different combinations supported for BLOSUM50.
#define BLOSUM62_VALUES_MAX   12
 Number of different combinations supported for BLOSUM62.
#define BLOSUM80_VALUES_MAX   10
 Number of different combinations supported for BLOSUM80.
#define BLOSUM90_VALUES_MAX   8
 Number of different combinations supported for BLOSUM90.
#define PAM250_VALUES_MAX   16
 Number of different combinations supported for PAM250.
#define PAM30_VALUES_MAX   7
 Number of different combinations supported for PAM30.
#define PAM70_VALUES_MAX   7
 Number of different combinations supported for PAM70.
#define STD_AMINO_ACID_FREQS   Robinson_prob
 points to the standard amino acid frequencies to use.

Typedefs

typedef double array_of_8 [8]
 Holds values (gap-opening, extension, etc.
typedef Int1 CompressedReverseLookup [BLASTAA_SIZE+1][BLASTAA_SIZE+1]
 2-D array mapping compressed letters to sets of ordinary protein letters

Functions

static SBlastScoreMatrixSBlastScoreMatrixFree (SBlastScoreMatrix *matrix)
 Deallocates SBlastScoreMatrix structure.
static SBlastScoreMatrixSBlastScoreMatrixNew (size_t ncols, size_t nrows)
 Allocates a new SBlastScoreMatrix structure of the specified dimensions.
SPsiBlastScoreMatrixSPsiBlastScoreMatrixFree (SPsiBlastScoreMatrix *matrix)
 Deallocates a SPsiBlastScoreMatrix structure.
SPsiBlastScoreMatrixSPsiBlastScoreMatrixNew (size_t ncols)
 Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.
BlastScoreBlkBlastScoreBlkNew (Uint1 alphabet, Int4 number_of_contexts)
 Allocates and initializes BlastScoreBlk.
Blast_ScoreFreqBlast_ScoreFreqFree (Blast_ScoreFreq *sfp)
 Deallocates the score frequencies structure.
Blast_KarlinBlkBlast_KarlinBlkFree (Blast_KarlinBlk *kbp)
 Deallocates the KarlinBlk.
BlastScoreBlkBlastScoreBlkFree (BlastScoreBlk *sbp)
 Deallocates BlastScoreBlk as well as all associated structures.
Int2 BLAST_ScoreSetAmbigRes (BlastScoreBlk *sbp, char ambiguous_res)
 Set the ambiguous residue (e.g, 'N', 'X') in the BlastScoreBlk*.
Int2 BlastScoreBlkNuclMatrixCreate (BlastScoreBlk *sbp)
 Fill in the matrix for blastn using the penaly and rewards The query sequence alphabet is blastna, the subject sequence is ncbi2na.
static Int2 BlastScoreBlkProteinMatrixRead (BlastScoreBlk *sbp, FILE *fp)
 Read in the matrix from the FILE *fp.
static Int2 BlastScoreBlkMaxScoreSet (BlastScoreBlk *sbp)
 Sets maximum and minimum scores on the BlastScoreBlk for a given matrix.
static Int2 BlastScoreBlkProteinMatrixLoad (BlastScoreBlk *sbp)
 Sets sbp->matrix->data field using sbp->name field using the matrices in the toolkit (util/tables/raw_scoremat.h).
Int2 Blast_ScoreBlkMatrixFill (BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)
 This function fills in the BlastScoreBlk structure.
Blast_ResFreqBlast_ResFreqFree (Blast_ResFreq *rfp)
 Deallocates Blast_ResFreq and prob0 element.
Blast_ResFreqBlast_ResFreqNew (const BlastScoreBlk *sbp)
 Allocates a new Blast_ResFreq structure and fills in the prob element based upon the contents of sbp.
static Int2 Blast_ResFreqNormalize (const BlastScoreBlk *sbp, Blast_ResFreq *rfp, double norm)
 Normalizes all the residue frequencies and then normalizes them to "norm".
Int2 Blast_GetStdAlphabet (Uint1 alphabet_code, Uint1 *residues, Uint4 residues_size)
 Fills a buffer with the 'standard' alphabet (given by STD_AMINO_ACID_FREQS[index].ch).
Int2 Blast_ResFreqStdComp (const BlastScoreBlk *sbp, Blast_ResFreq *rfp)
 Calculates residues frequencies given a standard distribution.
static Blast_ResCompBlastResCompDestruct (Blast_ResComp *rcp)
 Deallocates Blast_ResComp structure and associated arrays.
static Blast_ResCompBlastResCompNew (const BlastScoreBlk *sbp)
 Allocated the Blast_ResComp* for a given alphabet.
static Int2 BlastResCompStr (const BlastScoreBlk *sbp, Blast_ResComp *rcp, char *str, Int4 length)
 Store the composition of a (query) string.
static Int2 Blast_ResFreqClr (const BlastScoreBlk *sbp, Blast_ResFreq *rfp)
 Sets prob elements of Blast_ResFreq to zero.
static Int2 Blast_ResFreqResComp (const BlastScoreBlk *sbp, Blast_ResFreq *rfp, const Blast_ResComp *rcp)
 Calculate the residue frequencies associated with the provided ResComp This function takes into account the composition of a given sequence (expressed through rcp) rather than just doing it for a standard distribution.
static Int2 Blast_ResFreqString (const BlastScoreBlk *sbp, Blast_ResFreq *rfp, char *string, Int4 length)
 Fills in residue frequences for a given sequence.
static Int2 BlastScoreChk (Int4 lo, Int4 hi)
 Check that the lo and hi score are within the allowed ranges.
Blast_ScoreFreqBlast_ScoreFreqNew (Int4 score_min, Int4 score_max)
 Creates a new structure to keep track of score frequencies for a scoring system.
static Int2 BlastScoreFreqCalc (const BlastScoreBlk *sbp, Blast_ScoreFreq *sfp, Blast_ResFreq *rfp1, Blast_ResFreq *rfp2)
 Calculates the score frequencies.
static double BlastKarlinLHtoK (Blast_ScoreFreq *sfp, double lambda, double H)
 The following procedure computes K.
static double NlmKarlinLambdaNR (double *probs, Int4 d, Int4 low, Int4 high, double lambda0, double tolx, Int4 itmax, Int4 maxNewton, Int4 *itn)
 Find positive solution to.
double Blast_KarlinLambdaNR (Blast_ScoreFreq *sfp, double initialLambdaGuess)
 Calculates the parameter Lambda given an initial guess for its value.
static double BlastKarlinLtoH (Blast_ScoreFreq *sfp, double lambda)
 Calculate H, the relative entropy of the p's and q's.
Int2 Blast_KarlinBlkUngappedCalc (Blast_KarlinBlk *kbp, Blast_ScoreFreq *sfp)
 Computes the parameters lambda, H K for use in calculating the statistical significance of high-scoring segments or subalignments (see comment on blast_stat.c for more details).
Int2 Blast_ScoreBlkKbpUngappedCalc (EBlastProgramType program, BlastScoreBlk *sbp, Uint1 *query, const BlastQueryInfo *query_info, Blast_Message **blast_message)
 Calculate and fill the ungapped Karlin-Altschul parameters in the BlastScoreBlk structure (fields kbp_std, kbp_psi, and kbp of that structure).
Int2 Blast_ScoreBlkKbpIdealCalc (BlastScoreBlk *sbp)
 Calculates the Karlin-Altschul parameters assuming standard residue compositions for the query and subject sequences.
Blast_KarlinBlkBlast_KarlinBlkNew (void)
 Callocs a Blast_KarlinBlk.
Int2 Blast_KarlinBlkCopy (Blast_KarlinBlk *kbp_to, Blast_KarlinBlk *kbp_from)
 Copies contents of one Karlin block to another.
static MatrixInfoMatrixInfoDestruct (MatrixInfo *matrix_info)
 Deallocates MatrixInfo as well as name string.
static MatrixInfoMatrixInfoNew (const char *name, array_of_8 *values, Int4 *prefs, Int4 max_number)
 Allocates New MatrixInfo*.
static ListNodeBlastMatrixValuesDestruct (ListNode *vnp)
 Free linked list of MatrixValues and all associated data.
static ListNodeBlastLoadMatrixValues (void)
 Loads all the matrix values, returns a ListNode* chain that contains MatrixInfo*'s.
static Int2 Blast_GetMatrixValues (const char *matrix, Int4 **open, Int4 **extension, double **lambda, double **K, double **H, double **alpha, double **beta, Int4 **pref_flags)
 Obtains arrays of the allowed opening and extension penalties for gapped BLAST for the given matrix.
void BLAST_GetAlphaBeta (const char *matrixName, double *alpha, double *beta, Boolean gapped, Int4 gap_open, Int4 gap_extend, const Blast_KarlinBlk *kbp_ungapped)
 Extract the alpha and beta settings for this matrixName, and these gap open and gap extension costs.
static Int2 s_SplitArrayOf8 (const array_of_8 *input, const array_of_8 **normal, const array_of_8 **non_affine, Boolean *split)
 Splits an ArrayOf8 into two arrays of supported gap costs.
static Int2 s_AdjustGapParametersByGcd (array_of_8 *normal, array_of_8 *linear, int size, Int4 *gap_existence_max, Int4 *gap_extend_max, int divisor)
 Adjust Lambda and H if reward and penalty have a non-1 gcd.
static Int2 s_GetNuclValuesArray (Int4 reward, Int4 penalty, Int4 *array_size, array_of_8 **normal, array_of_8 **non_affine, Int4 *gap_open_max, Int4 *gap_extend_max, Boolean *round_down, Blast_Message **error_return)
 Returns the array of values corresponding to the given match/mismatch scores, the number of supported gap cost combinations and thresholds for the gap costs, beyond which the ungapped statistics can be applied.
Int2 BLAST_GetProteinGapExistenceExtendParams (const char *matrixName, Int4 *gap_existence, Int4 *gap_extension)
 Extract the recommended gap existence and extension values.
Int2 BLAST_GetNucleotideGapExistenceExtendParams (Int4 reward, Int4 penalty, Int4 *gap_existence, Int4 *gap_extension)
 Extract the recommended gap existence and extension values.
Boolean BLAST_CheckRewardPenaltyScores (Int4 reward, Int4 penalty)
 Check the validity of the reward and penalty scores.
static Int2 BlastKarlinReportAllowedValues (const char *matrix_name, Blast_Message **error_return)
 Fills in error_return with strings describing the allowed values.
Int2 Blast_KarlinBlkGappedCalc (Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, const char *matrix_name, Blast_Message **error_return)
 Fills in lambda, H, and K values, as calculated by Stephen Altschul in Methods in Enzy.
Int2 Blast_KarlinBlkGappedLoadFromTables (Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, const char *matrix_name)
 Attempts to fill KarlinBlk for given gap opening, extensions etc.
char * BLAST_PrintMatrixMessage (const char *matrix_name)
 Prints a messages about the allowed matrices, BlastKarlinBlkGappedFill should return 1 before this is called.
char * BLAST_PrintAllowedValues (const char *matrix_name, Int4 gap_open, Int4 gap_extend)
 Prints a messages about the allowed open etc values for the given matrix, BlastKarlinBlkGappedFill should return 2 before this is called.
Int2 Blast_KarlinBlkNuclGappedCalc (Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, Int4 reward, Int4 penalty, Blast_KarlinBlk *kbp_ungap, Boolean *round_down, Blast_Message **error_return)
 Retrieves Karlin-Altschul parameters from precomputed tables, given the substitution and gap scores.
static double s_GetUngappedBeta (Int4 reward, Int4 penalty)
 Returns the beta statistical parameter value, given the nucleotide substitution scores.
Int2 Blast_GetNuclAlphaBeta (Int4 reward, Int4 penalty, Int4 gap_open, Int4 gap_extend, Blast_KarlinBlk *kbp, Boolean gapped_calculation, double *alpha, double *beta)
 Extract the alpha and beta settings for these substitution and gap scores.
static Int4 BlastKarlinEtoS_simple (double E, const Blast_KarlinBlk *kbp, Int8 searchsp)
 Calculates score from expect value and search space.
double BLAST_GapDecayDivisor (double decayrate, unsigned nsegs)
 Compute a divisor used to weight the evalue of a collection of "nsegs" distinct alignments.
Int2 BLAST_Cutoffs (Int4 *S, double *E, Blast_KarlinBlk *kbp, Int8 searchsp, Boolean dodecay, double gap_decay_rate)
 Calculate the cutoff score from the expected number of HSPs or vice versa.
double BLAST_KarlinStoE_simple (Int4 S, Blast_KarlinBlk *kbp, Int8 searchsp)
 Calculates the Expect value based upon the search space and some Karlin-Altschul parameters.
double BLAST_KarlinPtoE (double p)
 Convert a P-value to an E-value.
double BLAST_KarlinEtoP (double x)
 Convert an E-value to a P-value.
static double s_OuterIntegralCback (double x, void *vp)
 Callback for the Romberg integration function.
static double s_InnerIntegralCback (double s, void *vp)
 Callback for the Romberg integration function.
static double s_BlastSumPCalc (int r, double s)
 Evaluate the following double integral, where r = number of segments.
static double s_BlastSumP (Int4 r, double s)
 Estimate the Sum P-value by calculation or interpolation, as appropriate.
double BLAST_SmallGapSumE (Int4 starting_points, Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
 Calculates the e-value for alignments with "small" gaps (typically under fifty residues/basepairs) following ideas of Stephen Altschul's.
double BLAST_UnevenGapSumE (Int4 query_start_points, Int4 subject_start_points, Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
 Calculates the e-value of a collection multiple distinct alignments with asymmetric gaps between the alignments.
double BLAST_LargeGapSumE (Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
 Calculates the e-value if a collection of distinct alignments with arbitrarily large gaps between the alignments.
void Blast_FillResidueProbability (const Uint1 *sequence, Int4 length, double *resProb)
 Given a sequence of 'length' amino acid residues, compute the probability of each residue and put that in the array resProb Excludes ambiguity characters.
static double RPSfindUngappedLambda (const char *matrixName)
 Gets the ungapped lambda calculated for the matrix in question given standard residue composition for both query and subject sequences.
static void RPSFillScores (Int4 **matrix, Int4 matrixLength, double *queryProbArray, double *scoreArray, Blast_ScoreFreq *return_sfp, Int4 range, Int4 alphabet_size)
 the routine RPSFillScores computes the probability of each score weighted by the probability of each query residue and fills those probabilities into scoreArray and puts scoreArray as a field in that in the structure that is returned for indexing convenience the field storing scoreArray points to the entry for score 0, so that referring to the -k index corresponds to score -k FIXME: This can be replaced by _PSIComputeScoreProbabilities??
Int4 ** RPSRescalePssm (double scalingFactor, Int4 rps_query_length, const Uint1 *rps_query_seq, Int4 db_seq_length, Int4 **posMatrix, BlastScoreBlk *sbp)
 Rescale the PSSM, using composition-based statistics, for use with RPS BLAST.
static void s_BuildCompressedTranslation (const char *trans_string, Uint1 *table, Int4 compressed_alphabet_size, CompressedReverseLookup rev_table)
 parse the string defining the conversion between the ordinary protein alphabet and a compressed alphabet
static Int2 s_GetCompressedProbs (BlastScoreBlk *sbp, double *compressed_prob, Int4 compressed_alphabet_size, CompressedReverseLookup rev_table)
 Calculate conditional probability of each letter in each group.
static Int2 s_BuildCompressedScoreMatrix (BlastScoreBlk *sbp, SCompressedAlphabet *new_alphabet, double matrix_scale_factor, CompressedReverseLookup rev_table)
 Compute a (non-square) score matrix for a compressed alphabet.
SCompressedAlphabetSCompressedAlphabetNew (BlastScoreBlk *sbp, Int4 compressed_alphabet_size, double matrix_scale_factor)
 Allocate a new compressed alphabet and score matrix.
SCompressedAlphabetSCompressedAlphabetFree (SCompressedAlphabet *alphabet)
 Free a compressed alphabet and score matrix.
Int4 BLAST_ComputeLengthAdjustment (double K, double logK, double alpha_d_lambda, double beta, Int4 query_length, Int8 db_length, Int4 db_num_seqs, Int4 *length_adjustment)
 Computes the adjustment to the lengths of the query and database sequences that is used to compensate for edge effects when computing evalues.

Variables

static char const rcsid []
static array_of_8 blosum45_values [14]
 Supported values (gap-existence, extension, etc.
static Int4 blosum45_prefs [14]
 Quality values for BLOSUM45 matrix, each element corresponds to same element number in array blosum45_values.
static array_of_8 blosum50_values [16]
 Supported values (gap-existence, extension, etc.
static Int4 blosum50_prefs [16]
 Quality values for BLOSUM50 matrix, each element corresponds to same element number in array blosum50_values.
static array_of_8 blosum62_values [12]
 Supported values (gap-existence, extension, etc.
static Int4 blosum62_prefs [12]
 Quality values for BLOSUM62 matrix, each element corresponds to same element number in array blosum62_values.
static array_of_8 blosum80_values [10]
 Supported values (gap-existence, extension, etc.
static Int4 blosum80_prefs [10]
 Quality values for BLOSUM80 matrix, each element corresponds to same element number in array blosum80_values.
static array_of_8 blosum90_values [8]
 Supported values (gap-existence, extension, etc.
static Int4 blosum90_prefs [8]
 Quality values for BLOSUM90 matrix, each element corresponds to same element number in array blosum90_values.
static array_of_8 pam250_values [16]
 Supported values (gap-existence, extension, etc.
static Int4 pam250_prefs [16]
 Quality values for PAM250 matrix, each element corresponds to same element number in array pam250_values.
static array_of_8 pam30_values [7]
 Supported values (gap-existence, extension, etc.
static Int4 pam30_prefs [7]
 Quality values for PAM30 matrix, each element corresponds to same element number in array pam30_values.
static array_of_8 pam70_values [7]
 Supported values (gap-existence, extension, etc.
static Int4 pam70_prefs [7]
 Quality values for PAM70 matrix, each element corresponds to same element number in array pam70_values.
static const array_of_8 blastn_values_1_5 []
 Karlin-Altschul parameter values for substitution scores 1 and -5.
static const array_of_8 blastn_values_1_4 []
 Karlin-Altschul parameter values for substitution scores 1 and -4.
static const array_of_8 blastn_values_2_7 []
 Karlin-Altschul parameter values for substitution scores 2 and -7.
static const array_of_8 blastn_values_1_3 []
 Karlin-Altschul parameter values for substitution scores 1 and -3.
static const array_of_8 blastn_values_2_5 []
 Karlin-Altschul parameter values for substitution scores 2 and -5.
static const array_of_8 blastn_values_1_2 []
 Karlin-Altschul parameter values for substitution scores 1 and -2.
static const array_of_8 blastn_values_2_3 []
 Karlin-Altschul parameter values for substitution scores 2 and -3.
static const array_of_8 blastn_values_3_4 []
 Karlin-Altschul parameter values for substitution scores 3 and -4.
static const array_of_8 blastn_values_4_5 []
 Karlin-Altschul parameter values for substitution scores 4 and -5.
static const array_of_8 blastn_values_1_1 []
 Karlin-Altschul parameter values for substitution scores 1 and -1.
static const array_of_8 blastn_values_3_2 []
 Karlin-Altschul parameter values for substitution scores 3 and -2.
static const array_of_8 blastn_values_5_4 []
 Karlin-Altschul parameter values for substitution scores 5 and -4.
static BLAST_LetterProb Robinson_prob []
 amino acid background frequencies from Robinson and Robinson
static BLAST_LetterProb nt_prob []
 nucleotide probabilities (25% each letter)
static const char * s_alphabet10 = "IJLMV AST BDENZ KQR G FY P H C W"
 23-to-10 letter compressed alphabet.
static const char * s_alphabet15 = "ST IJV LM KR EQZ A G BD P N F Y H C W"
 23-to-15 letter compressed alphabet.


Define Documentation

#define BLAST_KARLIN_K_ITER_MAX   100
 

upper limit on iterations for BlastKarlinLHtoK

Definition at line 76 of file blast_stat.c.

#define BLAST_KARLIN_K_SUMLIMIT_DEFAULT   0.0001
 

K_SUMLIMIT_DEFAULT == sumlimit used in BlastKarlinLHtoK().

Definition at line 68 of file blast_stat.c.

#define BLAST_KARLIN_LAMBDA0_DEFAULT   0.5
 

Initial guess for the value of Lambda in BlastKarlinLambdaNR.

Definition at line 74 of file blast_stat.c.

Referenced by Blast_KarlinBlkUngappedCalc().

#define BLAST_KARLIN_LAMBDA_ACCURACY_DEFAULT   (1.e-5)
 

LAMBDA_ACCURACY_DEFAULT == accuracy to which Lambda should be calc'd.

Definition at line 70 of file blast_stat.c.

#define BLAST_KARLIN_LAMBDA_ITER_DEFAULT   17
 

LAMBDA_ITER_DEFAULT == no.

of iterations in LambdaBis = ln(accuracy)/ln(2)

Definition at line 72 of file blast_stat.c.

#define BLAST_NUM_STAT_VALUES   8
 

Number of statistical parameters in each row of the precomputed tables.

Definition at line 79 of file blast_stat.c.

#define BLAST_SCORE_RANGE_MAX   (BLAST_SCORE_MAX - BLAST_SCORE_MIN)
 

maximum allowed range of BLAST scores.

Definition at line 60 of file blast_stat.c.

Referenced by BlastScoreChk(), and RPSRescalePssm().

#define BLOSUM45_VALUES_MAX   14
 

Number of different combinations supported for BLOSUM45.

Definition at line 167 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

#define BLOSUM50_VALUES_MAX   16
 

Number of different combinations supported for BLOSUM50.

Definition at line 203 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

#define BLOSUM62_VALUES_MAX   12
 

Number of different combinations supported for BLOSUM62.

Definition at line 242 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

#define BLOSUM80_VALUES_MAX   10
 

Number of different combinations supported for BLOSUM80.

Definition at line 274 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

#define BLOSUM90_VALUES_MAX   8
 

Number of different combinations supported for BLOSUM90.

Definition at line 301 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

#define PAM250_VALUES_MAX   16
 

Number of different combinations supported for PAM250.

Definition at line 324 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

#define PAM30_VALUES_MAX   7
 

Number of different combinations supported for PAM30.

Definition at line 363 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

#define PAM70_VALUES_MAX   7
 

Number of different combinations supported for PAM70.

Definition at line 385 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

#define STD_AMINO_ACID_FREQS   Robinson_prob
 

points to the standard amino acid frequencies to use.

Definition at line 1465 of file blast_stat.c.

Referenced by Blast_GetStdAlphabet(), and Blast_ResFreqStdComp().


Typedef Documentation

typedef double array_of_8[8]
 

Holds values (gap-opening, extension, etc.

) for a matrix.

Definition at line 82 of file blast_stat.c.

typedef Int1 CompressedReverseLookup[BLASTAA_SIZE+1][BLASTAA_SIZE+1]
 

2-D array mapping compressed letters to sets of ordinary protein letters

Definition at line 4277 of file blast_stat.c.


Function Documentation

Boolean BLAST_CheckRewardPenaltyScores Int4  reward,
Int4  penalty
 

Check the validity of the reward and penalty scores.

Only to be used with blastn searches.

Parameters:
reward match score [in]
penalty mismatch score [in]
Returns:
TRUE on success

Definition at line 3098 of file blast_stat.c.

References FALSE, s_GetNuclValuesArray(), and sfree.

Referenced by BlastScoringOptionsValidate().

Int4 BLAST_ComputeLengthAdjustment double  K,
double  logK,
double  alpha_d_lambda,
double  beta,
Int4  query_length,
Int8  db_length,
Int4  db_num_seqs,
Int4 length_adjustment
 

Computes the adjustment to the lengths of the query and database sequences that is used to compensate for edge effects when computing evalues.

The length adjustment is an integer-valued approximation to the fixed point of the function

f(ell) = beta + (alpha/lambda) * (log K + log((m - ell)*(n - N ell)))

where m is the query length n is the length of the database and N is the number of sequences in the database. The values beta, alpha, lambda and K are statistical, Karlin-Altschul parameters.

The value of the length adjustment computed by this routine, A, will always be an integer smaller than the fixed point of f(ell). Usually, it will be the largest such integer. However, the computed length adjustment, A, will also be so small that

K * (m - A) * (n - N * A) > MAX(m,n).

Moreover, an iterative method is used to compute A, and under unusual circumstances the iterative method may not converge.

Parameters:
K the statistical parameter K
logK the natural logarithm of K
alpha_d_lambda the ratio of the statistical parameters alpha and lambda (for ungapped alignments, the value 1/H should be used)
beta the statistical parameter beta (for ungapped alignments, beta == 0)
query_length the length of the query sequence
db_length the length of the database
db_num_seqs the number of sequences in the database
length_adjustment the computed value of the length adjustment [out]
Returns:
0 if length_adjustment is known to be the largest integer less than the fixed point of f(ell); 1 otherwise.

Definition at line 4553 of file blast_stat.c.

References FALSE, MAX, and TRUE.

Referenced by BLAST_CalcEffLengths().

Int2 BLAST_Cutoffs Int4 S,
double *  E,
Blast_KarlinBlk kbp,
Int8  searchsp,
Boolean  dodecay,
double  gap_decay_rate
 

Calculate the cutoff score from the expected number of HSPs or vice versa.

Parameters:
S The calculated score [in] [out]
E The calculated e-value [in] [out]
kbp The Karlin-Altschul statistical parameters [in]
searchsp The effective search space [in]
dodecay Use gap decay feature? [in]
gap_decay_rate Gap decay rate to use, if dodecay is set [in]

Definition at line 3606 of file blast_stat.c.

References BLAST_GapDecayDivisor(), BLAST_KarlinStoE_simple(), BlastKarlinEtoS_simple(), FALSE, Blast_KarlinBlk::H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, and TRUE.

Referenced by BlastHitSavingParametersUpdate(), BlastInitialWordParametersUpdate(), and BOOST_AUTO_TEST_CASE().

void Blast_FillResidueProbability const Uint1 sequence,
Int4  length,
double *  resProb
 

Given a sequence of 'length' amino acid residues, compute the probability of each residue and put that in the array resProb Excludes ambiguity characters.

Parameters:
sequence the sequence to be computed upon [in]
length the length of the sequence [in]
resProb the object to be filled in [in|out]

Definition at line 4090 of file blast_stat.c.

References AMINOACID_TO_NCBISTDAA, and BLASTAA_SIZE.

Referenced by RPSRescalePssm().

double BLAST_GapDecayDivisor double  decayrate,
unsigned  nsegs
 

Compute a divisor used to weight the evalue of a collection of "nsegs" distinct alignments.

These divisors are used to compensate for the effect of choosing the best among multiple collections of alignments. See

Stephen F. Altschul. Evaluating the statitical significance of multiple distinct local alignments. In Suhai, editior, Theoretical and Computational Methods in Genome Research, pages 1-14. Plenum Press, New York, 1997.

The "decayrate" parameter of this routine is a value in the interval (0,1). Typical values of decayrate are .1 and .5.

Parameters:
decayrate adjusts for (multiple) tests of number of HSP sum groups [in]
nsegs the number of HSPs in the sum group [in]
Returns:
divisor used to compensate for multiple tests

Definition at line 3595 of file blast_stat.c.

References BLAST_Powi().

Referenced by BLAST_Cutoffs(), Blast_HSPListGetEvalues(), and s_SumHSPEvalue().

void BLAST_GetAlphaBeta const char *  matrixName,
double *  alpha,
double *  beta,
Boolean  gapped,
Int4  gap_open,
Int4  gap_extend,
const Blast_KarlinBlk kbp_ungapped
 

Extract the alpha and beta settings for this matrixName, and these gap open and gap extension costs.

Parameters:
matrixName name of the matrix used [in]
alpha Karlin-Altschul parameter to be set [out]
beta Karlin-Altschul parameter to be set [out]
gapped TRUE if a gapped search [in]
gap_open existence cost of a gap [in]
gap_extend extension cost of a gap [in]
kbp_ungapped Karlin block with ungapped values of the parameters [in]

Definition at line 2738 of file blast_stat.c.

References Blast_GetMatrixValues(), and BLAST_MATRIX_BEST.

Referenced by BLAST_CalcEffLengths().

static Int2 Blast_GetMatrixValues const char *  matrix,
Int4 **  open,
Int4 **  extension,
double **  lambda,
double **  K,
double **  H,
double **  alpha,
double **  beta,
Int4 **  pref_flags
[static]
 

Obtains arrays of the allowed opening and extension penalties for gapped BLAST for the given matrix.

Also obtains arrays of Lambda, K, and H. Any of these fields that are not required should be set to NULL. The Int2 return value is the length of the arrays.

Parameters:
matrix name of the matrix [in]
open gap existence parameter [in|out]
extension cost to extend a gap by one letter [in|out]
lambda Karlin-Altschul parameter [in|out]
K Karlin-Altschul parameter [in|out]
H Karlin-Altschul parameter [in|out]
alpha Karlin-Altschul parameter [in|out]
beta Karlin-Altschul parameter [in|out]
pref_flags describes preferred values [in|out]
Returns:
maximum number of values (length of arrays).

Definition at line 2657 of file blast_stat.c.

References BlastLoadMatrixValues(), FALSE, MatrixInfo::max_number_values, MatrixInfo::name, ListNode::next, MatrixInfo::prefs, ListNode::ptr, strcasecmp, TRUE, and MatrixInfo::values.

Referenced by BLAST_GetAlphaBeta(), BLAST_GetProteinGapExistenceExtendParams(), and RPSfindUngappedLambda().

Int2 Blast_GetNuclAlphaBeta Int4  reward,
Int4  penalty,
Int4  gap_open,
Int4  gap_extend,
Blast_KarlinBlk kbp,
Boolean  gapped_calculation,
double *  alpha,
double *  beta
 

Extract the alpha and beta settings for these substitution and gap scores.

If substitution or gap costs are not found in the tables, assume an ungapped search. Then alpha is computed using the formula Alpha = Lambda/H, and beta is equal to 0 except for some special cases.

Parameters:
reward Match reward score [in]
penalty Mismatch penalty score [in]
gap_open Gap opening (existence) cost [in]
gap_extend Gap extension cost [in]
kbp Karlin block containing already computed Lambda, K and H parameters.
gapped_calculation Is this a gapped search? [in]
alpha Alpha parameter for this scoring system [out]
beta Beta parameter for this scoring system [out]

Definition at line 3481 of file blast_stat.c.

References ASSERT, FALSE, s_GetNuclValuesArray(), and TRUE.

Referenced by BLAST_CalcEffLengths().

Int2 BLAST_GetNucleotideGapExistenceExtendParams Int4  reward,
Int4  penalty,
Int4 gap_existence,
Int4 gap_extension
 

Extract the recommended gap existence and extension values.

Only to be used with blastn searches.

Parameters:
reward match score [in]
penalty mismatch score [in]
gap_existence returns recommended existence cost [in|out]
gap_extension returns recommended extension cost [in|out]
Returns:
zero on success

Definition at line 3046 of file blast_stat.c.

References FALSE, s_GetNuclValuesArray(), sfree, and TRUE.

Referenced by BOOST_AUTO_UNIT_TEST().

Int2 BLAST_GetProteinGapExistenceExtendParams const char *  matrixName,
Int4 gap_existence,
Int4 gap_extension
 

Extract the recommended gap existence and extension values.

Only to be used with protein matrices.

Parameters:
matrixName name of the matrix [in]
gap_existence returns recommended existence cost [in|out]
gap_extension returns recommended extension cost [in|out]
Returns:
zero on success

Definition at line 3018 of file blast_stat.c.

References Blast_GetMatrixValues(), and BLAST_MATRIX_BEST.

Referenced by BOOST_AUTO_UNIT_TEST(), and CGenericSearchArgs::ExtractAlgorithmOptions().

Int2 Blast_GetStdAlphabet Uint1  alphabet_code,
Uint1 residues,
Uint4  residues_size
 

Fills a buffer with the 'standard' alphabet (given by STD_AMINO_ACID_FREQS[index].ch).

< points to the standard amino acid frequencies to use.

< points to the standard amino acid frequencies to use.

< points to the standard amino acid frequencies to use.

< points to the standard amino acid frequencies to use.

Definition at line 1510 of file blast_stat.c.

References AMINOACID_TO_NCBISTDAA, BLASTAA_SEQ_CODE, DIM, and STD_AMINO_ACID_FREQS.

Referenced by _PSIComputeScoreProbabilities(), and Blast_ResFreqStdComp().

Int2 Blast_KarlinBlkCopy Blast_KarlinBlk kbp_to,
Blast_KarlinBlk kbp_from
 

Copies contents of one Karlin block to another.

Both must be allocated before this function is called.

Parameters:
kbp_to Karlin block to copy values to [in] [out]
kbp_from Karlin block to copy values from [in]
Returns:
0 on success; -1 if either argument is NULL on input.

Definition at line 2522 of file blast_stat.c.

References Blast_KarlinBlk::H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, Blast_KarlinBlk::logK, and Blast_KarlinBlk::paramC.

Referenced by Blast_ScoreBlkKbpGappedCalc(), Blast_ScoreBlkKbpUngappedCalc(), CBlastAncillaryData::do_copy(), s_InitializeKarlinBlk(), and s_RestoreSearch().

Blast_KarlinBlk* Blast_KarlinBlkFree Blast_KarlinBlk kbp  ) 
 

Deallocates the KarlinBlk.

Parameters:
kbp KarlinBlk to be deallocated [in]
Returns:
NULL

Definition at line 850 of file blast_stat.c.

References sfree.

Referenced by Blast_ScoreBlkKbpUngappedCalc(), BlastScoreBlkFree(), BOOST_AUTO_UNIT_TEST(), MakeSomeInvalidKBP(), MakeSomeValidKBP(), s_SavedParametersFree(), SPsiBlastScoreMatrixFree(), and CBlastAncillaryData::~CBlastAncillaryData().

Int2 Blast_KarlinBlkGappedCalc Blast_KarlinBlk kbp,
Int4  gap_open,
Int4  gap_extend,
const char *  matrix_name,
Blast_Message **  error_return
 

Fills in lambda, H, and K values, as calculated by Stephen Altschul in Methods in Enzy.

(vol 266, page 474).

Parameters:
kbp object to be filled in [in|out]
gap_open cost of gap existence [in]
gap_extend cost to extend a gap one letter [in]
matrix_name name of the matrix to be used [in]
error_return filled in with error message if needed [out]
Returns:
zero on success

Definition at line 3171 of file blast_stat.c.

References Blast_KarlinBlkGappedLoadFromTables(), Blast_MessageWrite(), BlastKarlinReportAllowedValues(), BlastLoadMatrixValues(), BlastMatrixValuesDestruct(), eBlastSevError, kBlastMessageNoContext, MatrixInfo::name, ListNode::next, and ListNode::ptr.

Referenced by Blast_ScoreBlkKbpGappedCalc().

Int2 Blast_KarlinBlkGappedLoadFromTables Blast_KarlinBlk kbp,
Int4  gap_open,
Int4  gap_extend,
const char *  matrix_name
 

Attempts to fill KarlinBlk for given gap opening, extensions etc.

Parameters:
kbp object to be filled in [in|out]
gap_open gap existence cost [in]
gap_extend gap extension cost [in]
matrix_name name of the matrix used [in]
Returns:
-1 if matrix_name is NULL; 1 if matrix not found 2 if matrix found, but open, extend etc. values not supported.

Definition at line 3221 of file blast_stat.c.

References BLAST_Nint(), BlastLoadMatrixValues(), FALSE, MatrixInfo::max_number_values, MatrixInfo::name, ListNode::next, ListNode::ptr, strcasecmp, TRUE, and MatrixInfo::values.

Referenced by Blast_KarlinBlkGappedCalc(), BlastScoringOptionsValidate(), and CMultiAligner::x_ComputeTree().

Blast_KarlinBlk* Blast_KarlinBlkNew void   ) 
 

Callocs a Blast_KarlinBlk.

Returns:
pointer to the Blast_KarlinBlk

Definition at line 2512 of file blast_stat.c.

Referenced by Blast_ScoreBlkKbpGappedCalc(), Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreBlkKbpUngappedCalc(), CBlastAncillaryData::CBlastAncillaryData(), CBlastAncillaryData::do_copy(), MakeSomeInvalidKBP(), MakeSomeValidKBP(), s_FillScoreBlkWithBadKbp(), s_InitializeKarlinBlk(), s_PHIScoreBlkFill(), s_RecordInitialSearch(), and SPsiBlastScoreMatrixNew().

Int2 Blast_KarlinBlkNuclGappedCalc Blast_KarlinBlk kbp,
Int4  gap_open,
Int4  gap_extend,
Int4  reward,
Int4  penalty,
Blast_KarlinBlk kbp_ungap,
Boolean round_down,
Blast_Message **  error_return
 

Retrieves Karlin-Altschul parameters from precomputed tables, given the substitution and gap scores.

Gap cost values greater than any of those listed in the tables ("greater" meaning that both values are greater than or equal, and at least one is strictly greater), are treated as infinite, and parameters values are copied from the ungapped Karlin block.

Parameters:
kbp Allocated Karlin block to fill [in] [out]
gap_open Gap openening (existence) cost [in]
gap_extend Gap extension cost [in]
reward Match reward score [in]
penalty Mismatch penalty score [in]
kbp_ungap Karlin block with ungapped Karlin-Altschul parameters [in]
round_down specifies that the score should be rounded down to nearest even score in some cases [in|out]
error_return Pointer to error message. [in] [out]

Definition at line 3368 of file blast_stat.c.

References ASSERT, Blast_KarlinBlk::H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, Blast_KarlinBlk::logK, s_GetNuclValuesArray(), and sfree.

Referenced by Blast_ScoreBlkKbpGappedCalc().

Int2 Blast_KarlinBlkUngappedCalc Blast_KarlinBlk kbp,
Blast_ScoreFreq sfp
 

Computes the parameters lambda, H K for use in calculating the statistical significance of high-scoring segments or subalignments (see comment on blast_stat.c for more details).

< Initial guess for the value of Lambda in BlastKarlinLambdaNR

Definition at line 2346 of file blast_stat.c.

References BLAST_KARLIN_LAMBDA0_DEFAULT, Blast_KarlinLambdaNR(), BlastKarlinLHtoK(), BlastKarlinLtoH(), Blast_KarlinBlk::H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, and Blast_KarlinBlk::logK.

Referenced by _PSIUpdateLambdaK(), Blast_ScoreBlkKbpIdealCalc(), and Blast_ScoreBlkKbpUngappedCalc().

double BLAST_KarlinEtoP double  x  ) 
 

Convert an E-value to a P-value.

E-values and P-values may either represent statistics of a database search or represent statistics on the two sequences being compared. If given a database E-value, this routine will return a database P-value; if given a pairwise E-value, it will return a pairwise P-value.

In the context of a database search, the available E-value is typically a database E-value, whereas the desired P-value is a pairwise P-value. When this is the case, the E-value should be divided by the effective length of the database and multiplied by the effective length of the subject, before BLAST_KarlinEtoP is called.

Parameters:
x the expect value to be converted [in]
Returns:
the corresponding p-value.

Definition at line 3705 of file blast_stat.c.

References BLAST_Expm1().

Referenced by s_AdjustEvaluesForComposition().

double Blast_KarlinLambdaNR Blast_ScoreFreq sfp,
double  initialLambdaGuess
 

Calculates the parameter Lambda given an initial guess for its value.

< LAMBDA_ACCURACY_DEFAULT == accuracy to which Lambda should be calc'd

< LAMBDA_ITER_DEFAULT == no. of iterations in LambdaBis = ln(accuracy)/ln(2)

Definition at line 2214 of file blast_stat.c.

References BLAST_Gcd(), BlastScoreChk(), Blast_ScoreFreq::obs_max, Blast_ScoreFreq::obs_min, Blast_ScoreFreq::score_avg, and Blast_ScoreFreq::sprob.

Referenced by Blast_KarlinBlkUngappedCalc(), and RPSRescalePssm().

double BLAST_KarlinPtoE double  p  ) 
 

Convert a P-value to an E-value.

P-values and E-values may either represent statistics of a database search or represent statistics on the two sequences being compared. If given a database P-value, this routine will return a database E-value; if given a pairwise P-value, it will return a pairwise E-value.

In the context of a database search, the available P-value is often a pairwise P-value, whereas the desired E-value is a database E-value. When this it the case, the value returned by this routine should be multiplied by the effective length of the database and divided by the effective length of the subject.

Parameters:
p the P-value to be converted [in]
Returns:
the corresponding expect value.

Definition at line 3691 of file blast_stat.c.

References BLAST_Log1p(), INT4_MAX, and INT4_MIN.

Referenced by BLAST_LargeGapSumE(), BLAST_SmallGapSumE(), BLAST_UnevenGapSumE(), and s_AdjustEvaluesForComposition().

double BLAST_KarlinStoE_simple Int4  S,
Blast_KarlinBlk kbp,
Int8  searchsp
 

Calculates the Expect value based upon the search space and some Karlin-Altschul parameters.

It is "simple" as it does not use sum-statistics.

Parameters:
S the score of the alignment. [in]
kbp the Karlin-Altschul parameters. [in]
searchsp total search space to be used [in]
Returns:
the expect value

Definition at line 3673 of file blast_stat.c.

References Blast_KarlinBlk::H, H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, and Blast_KarlinBlk::logK.

Referenced by BLAST_Cutoffs(), Blast_HSPListGetEvalues(), and CScoreBuilder::GetBlastEValue().

double BLAST_LargeGapSumE Int2  num,
double  xsum,
Int4  query_length,
Int4  subject_length,
Int8  searchsp_eff,
double  weight_divisor
 

Calculates the e-value if a collection of distinct alignments with arbitrarily large gaps between the alignments.

Parameters:
num number of distinct alignments in the collection [in]
xsum the sum of the scores of these alignments each individually normalized using an appropriate value of Lambda and logK [in]
query_length effective length of query sequence [in]
subject_length effective length of subject sequence [in]
searchsp_eff effective size of the search space [in]
weight_divisor a divisor used to weight the e-value when multiple collections of alignments are being considered by the calling routine [in]
Returns:
sum expect value.

Definition at line 4045 of file blast_stat.c.

References BLAST_KarlinPtoE(), BLAST_LnFactorial(), INT4_MAX, and s_BlastSumP().

char* BLAST_PrintAllowedValues const char *  matrix,
Int4  gap_open,
Int4  gap_extend
 

Prints a messages about the allowed open etc values for the given matrix, BlastKarlinBlkGappedFill should return 2 before this is called.

Parameters:
matrix name of the matrix [in]
gap_open gap existence cost [in]
gap_extend cost to extend a gap by one [in]
Returns:
message

Definition at line 3319 of file blast_stat.c.

References BLAST_Nint(), BlastLoadMatrixValues(), FALSE, MatrixInfo::max_number_values, MatrixInfo::name, ListNode::next, ListNode::ptr, strcasecmp, TRUE, and MatrixInfo::values.

Referenced by BlastScoringOptionsValidate().

char* BLAST_PrintMatrixMessage const char *  matrix  ) 
 

Prints a messages about the allowed matrices, BlastKarlinBlkGappedFill should return 1 before this is called.

Parameters:
matrix the matrix to print a message about [in]
Returns:
the message

Definition at line 3292 of file blast_stat.c.

References BlastLoadMatrixValues(), BlastMatrixValuesDestruct(), MatrixInfo::name, ListNode::next, and ListNode::ptr.

Referenced by BlastScoringOptionsValidate().

static Int2 Blast_ResFreqClr const BlastScoreBlk sbp,
Blast_ResFreq rfp
[static]
 

Sets prob elements of Blast_ResFreq to zero.

Parameters:
sbp needed for alphabet information [in]
rfp contains elements to be zeroed [in|out]
Returns:
zero on success.

Definition at line 1671 of file blast_stat.c.

References BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, and Blast_ResFreq::prob.

Referenced by Blast_ResFreqResComp().

Blast_ResFreq* Blast_ResFreqFree Blast_ResFreq rfp  ) 
 

Deallocates Blast_ResFreq and prob0 element.

Parameters:
rfp the Blast_ResFreq to be deallocated.

Definition at line 1336 of file blast_stat.c.

References Blast_ResFreq::prob0, and sfree.

Referenced by BLAST_GetStandardAaProbabilities(), Blast_ResFreqNew(), Blast_ScoreBlkKbpIdealCalc(), and Blast_ScoreBlkKbpUngappedCalc().

Blast_ResFreq* Blast_ResFreqNew const BlastScoreBlk sbp  ) 
 

Allocates a new Blast_ResFreq structure and fills in the prob element based upon the contents of sbp.

Parameters:
sbp The BlastScoreBlk* used to init prob [in]

Definition at line 1355 of file blast_stat.c.

References BlastScoreBlk::alphabet_code, Blast_ResFreq::alphabet_code, BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, Blast_ResFreqFree(), Blast_ResFreq::prob, and Blast_ResFreq::prob0.

Referenced by BLAST_GetStandardAaProbabilities(), Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreBlkKbpUngappedCalc(), s_GetCompressedProbs(), SPHIPatternSearchBlkNew(), and CMultiAligner::x_AssignDefaultResFreqs().

static Int2 Blast_ResFreqNormalize const BlastScoreBlk sbp,
Blast_ResFreq rfp,
double  norm
[static]
 

Normalizes all the residue frequencies and then normalizes them to "norm".

If "norm" is one, then they will all sum to one.

Parameters:
sbp needed for alphabet information [in]
rfp array of residue frequencies to be normalized [in|out]
norm value to normalize to [in]
Returns:
zero on success, 1 otherwise

Definition at line 1482 of file blast_stat.c.

References BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, and Blast_ResFreq::prob.

static Int2 Blast_ResFreqResComp const BlastScoreBlk sbp,
Blast_ResFreq rfp,
const Blast_ResComp rcp
[static]
 

Calculate the residue frequencies associated with the provided ResComp This function takes into account the composition of a given sequence (expressed through rcp) rather than just doing it for a standard distribution.

Parameters:
sbp contains alphabet information [in]
rfp object to be filled in [in|out]
rcp object with composition information [in]
Returns:
zero on success, 1 on failure

Definition at line 1691 of file blast_stat.c.

References Blast_ResComp::alphabet_code, Blast_ResFreq::alphabet_code, BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, Blast_ResFreqClr(), Blast_ResComp::comp, and Blast_ResFreq::prob.

Referenced by Blast_ResFreqString().

Int2 Blast_ResFreqStdComp const BlastScoreBlk sbp,
Blast_ResFreq rfp
 

Calculates residues frequencies given a standard distribution.

< points to the standard amino acid frequencies to use.

< points to the standard amino acid frequencies to use.

< points to the standard amino acid frequencies to use.

< points to the standard amino acid frequencies to use.

Definition at line 1534 of file blast_stat.c.

References BlastScoreBlk::alphabet_code, Blast_GetStdAlphabet(), DIM, Blast_ResFreq::prob, BlastScoreBlk::protein_alphabet, sfree, STD_AMINO_ACID_FREQS, and TRUE.

Referenced by BLAST_GetStandardAaProbabilities(), Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreBlkKbpUngappedCalc(), s_GetCompressedProbs(), SPHIPatternSearchBlkNew(), and CMultiAligner::x_AssignDefaultResFreqs().

static Int2 Blast_ResFreqString const BlastScoreBlk sbp,
Blast_ResFreq rfp,
char *  string,
Int4  length
[static]
 

Fills in residue frequences for a given sequence.

Parameters:
sbp needed for alphabet information [in]
rfp object to be populated [in|out]
string sequence for calculation [in]
length length of above sequence [in]

Definition at line 1725 of file blast_stat.c.

References Blast_ResFreqResComp(), BlastResCompDestruct(), BlastResCompNew(), and BlastResCompStr().

Referenced by Blast_ScoreBlkKbpUngappedCalc().

Int2 Blast_ScoreBlkKbpIdealCalc BlastScoreBlk sbp  ) 
 

Calculates the Karlin-Altschul parameters assuming standard residue compositions for the query and subject sequences.

It populates the kbp_ideal field of its sbp argument. This is used if the query is translated and the calculated (real) Karlin parameters are bad, as they're calculated for non-coding regions.

Parameters:
sbp ScoreBlk used to calculate "ideal" values. [in|out]
Returns:
0 on success, 1 on failure

Definition at line 2483 of file blast_stat.c.

References Blast_KarlinBlkNew(), Blast_KarlinBlkUngappedCalc(), Blast_ResFreqFree(), Blast_ResFreqNew(), Blast_ResFreqStdComp(), Blast_ScoreFreqFree(), Blast_ScoreFreqNew(), BlastScoreFreqCalc(), BlastScoreBlk::hiscore, BlastScoreBlk::kbp_ideal, and BlastScoreBlk::loscore.

Referenced by Blast_ScoreBlkKbpUngappedCalc(), and s_PHIScoreBlkFill().

Int2 Blast_ScoreBlkKbpUngappedCalc EBlastProgramType  program,
BlastScoreBlk sbp,
Uint1 query,
const BlastQueryInfo query_info,
Blast_Message **  blast_message
 

Calculate and fill the ungapped Karlin-Altschul parameters in the BlastScoreBlk structure (fields kbp_std, kbp_psi, and kbp of that structure).

Parameters:
program BLAST program type, needed to decide whether to substitute ideal values. [in]
sbp Scoring block to work with [in] [out]
query Buffer containing (concatenated) query sequence [in]
query_info Information about offsets of concatenated queries [in]
blast_message returns queries that could not be processed [out]
Returns:
0 if ungapped Karlin-Altschul parameters could be calculated for all of the query sequence's contexts; 1 if any of the contexts failed (but all others will be populated).

Definition at line 2384 of file blast_stat.c.

References ASSERT, Blast_KarlinBlkCopy(), Blast_KarlinBlkFree(), Blast_KarlinBlkNew(), Blast_KarlinBlkUngappedCalc(), Blast_MessageWrite(), Blast_QueryIsPssm(), Blast_QueryIsTranslated(), Blast_ResFreqFree(), Blast_ResFreqNew(), Blast_ResFreqStdComp(), Blast_ResFreqString(), Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreFreqFree(), Blast_ScoreFreqNew(), BlastScoreFreqCalc(), BlastQueryInfo::contexts, eBlastSevWarning, eBlastTypeBlastx, eBlastTypeRpsTblastn, eBlastTypeTblastx, FALSE, BlastQueryInfo::first_context, BlastScoreBlk::hiscore, BlastContextInfo::is_valid, kBlastMessageNoContext, BlastScoreBlk::kbp, BlastScoreBlk::kbp_ideal, BlastScoreBlk::kbp_psi, BlastScoreBlk::kbp_std, Blast_KarlinBlk::Lambda, BlastScoreBlk::loscore, BlastContextInfo::query_length, BlastContextInfo::query_offset, BlastScoreBlk::sfp, and TRUE.

Referenced by BlastSetup_ScoreBlkInit(), and LinkHspTestFixture::setupScoreBlk().

Int2 Blast_ScoreBlkMatrixFill BlastScoreBlk sbp,
GET_MATRIX_PATH  get_path
 

This function fills in the BlastScoreBlk structure.

Tasks are: -read in the matrix -set maxscore

Parameters:
sbp Scoring block [in] [out]
get_path pointer to function that will return path to matrix. Only called if built-in matrix not found [in]

Definition at line 1275 of file blast_stat.c.

References BlastScoreBlk::alphabet_code, BLASTNA_SEQ_CODE, BlastScoreBlkMaxScoreSet(), BlastScoreBlkNuclMatrixCreate(), BlastScoreBlkProteinMatrixLoad(), BlastScoreBlkProteinMatrixRead(), FALSE, fp, BlastScoreBlk::name, BlastScoreBlk::read_in_matrix, sfree, and TRUE.

Referenced by Blast_ScoreBlkMatrixInit(), and s_PHIScoreBlkFill().

Blast_ScoreFreq* Blast_ScoreFreqFree Blast_ScoreFreq sfp  ) 
 

Deallocates the score frequencies structure.

Parameters:
sfp the structure to deallocate [in]
Returns:
NULL

Definition at line 835 of file blast_stat.c.

References sfree, and Blast_ScoreFreq::sprob0.

Referenced by _PSIUpdateLambdaK(), Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreBlkKbpUngappedCalc(), Blast_ScoreFreqNew(), and BlastScoreBlkFree().

Blast_ScoreFreq* Blast_ScoreFreqNew Int4  score_min,
Int4  score_max
 

Creates a new structure to keep track of score frequencies for a scoring system.

Parameters:
score_min Minimum score [in]
score_max Maximum score [in]
Returns:
allocated and initialized pointer to Blast_ScoreFreq

Definition at line 1760 of file blast_stat.c.

References Blast_ScoreFreqFree(), BlastScoreChk(), Blast_ScoreFreq::obs_max, Blast_ScoreFreq::obs_min, Blast_ScoreFreq::score_avg, Blast_ScoreFreq::score_max, Blast_ScoreFreq::score_min, Blast_ScoreFreq::sprob, and Blast_ScoreFreq::sprob0.

Referenced by Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreBlkKbpUngappedCalc(), and s_PHIScoreBlkFill().

Int2 BLAST_ScoreSetAmbigRes BlastScoreBlk sbp,
char  ambiguous_res
 

Set the ambiguous residue (e.g, 'N', 'X') in the BlastScoreBlk*.

Convert from ncbieaa to sbp->alphabet_code (i.e., ncbistdaa) first.

Parameters:
sbp the object to be modified [in|out]
ambiguous_res the residue to be set on the BlastScoreBlk
Returns:
zero on success, others on error

Definition at line 900 of file blast_stat.c.

References BlastScoreBlk::ambig_occupy, BlastScoreBlk::ambig_size, BlastScoreBlk::ambiguous_res, and sfree.

Referenced by Blast_ScoreBlkMatrixInit().

double BLAST_SmallGapSumE Int4  start_points,
Int2  num,
double  xsum,
Int4  query_length,
Int4  subject_length,
Int8  searchsp_eff,
double  weight_divisor
 

Calculates the e-value for alignments with "small" gaps (typically under fifty residues/basepairs) following ideas of Stephen Altschul's.

Parameters:
start_points the number of starting points permitted between adjacent alignments; max_overlap + max_gap + 1 [in]
num the number of distinct alignments in this collection [in]
xsum the sum of the scores of these alignments each individually normalized using an appropriate value of Lambda and logK [in]
query_length effective len of the query seq [in]
subject_length effective len of the subject seq [in]
searchsp_eff effective size of the search space [in]
weight_divisor a divisor used to weight the e-value when multiple collections of alignments are being considered by the calling routine [in]
Returns:
the expect value

Definition at line 3933 of file blast_stat.c.

References BLAST_KarlinPtoE(), BLAST_LnFactorial(), INT4_MAX, and s_BlastSumP().

double BLAST_UnevenGapSumE Int4  query_start_points,
Int4  subject_start_points,
Int2  num,
double  xsum,
Int4  query_length,
Int4  subject_length,
Int8  searchsp_eff,
double  weight_divisor
 

Calculates the e-value of a collection multiple distinct alignments with asymmetric gaps between the alignments.

The gaps in one (protein) sequence are typically small (like in BLAST_SmallGapSumE) gap an the gaps in the other (translated DNA) sequence are possibly large (up to 4000 bp.) This routine is used for linking HSPs representing exons in the DNA sequence that are separated by introns.

Parameters:
query_start_points the number of starting points in the query sequence permitted between adjacent alignments [in]
subject_start_points the number of starting points in the subject sequence permitted between adjacent alignments [in]
num The number of distinct alignments in this collection [in]
xsum The sum of the scores of these alignments, each normalized using an appropriate value of Lambda and logK [in]
query_length The effective len of the query seq [in]
subject_length The effective len of the database seq [in]
searchsp_eff effective size of the search space [in]
weight_divisor A divisor used to weight the e-value when multiple collections of alignments are being considered by the calling routine [in]
Returns:
Resulting e-value of a combined set.

Definition at line 4005 of file blast_stat.c.

References BLAST_KarlinPtoE(), BLAST_LnFactorial(), INT4_MAX, and s_BlastSumP().

Referenced by s_SumHSPEvalue().

static Int4 BlastKarlinEtoS_simple double  E,
const Blast_KarlinBlk kbp,
Int8  searchsp
[static]
 

Calculates score from expect value and search space.

Parameters:
E expect value [in]
kbp contains Karlin-Altschul parameters [in]
searchsp query times database size [in]
Returns:
score

Definition at line 3556 of file blast_stat.c.

References BLAST_SCORE_MIN, Blast_KarlinBlk::H, H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, and MAX.

Referenced by BLAST_Cutoffs().

static double BlastKarlinLHtoK Blast_ScoreFreq sfp,
double  lambda,
double  H
[static]
 

The following procedure computes K.

The input includes Lambda, H, and an array of probabilities for each score. There are distinct closed form for three cases: 1. high score is 1 low score is -1 2. high score is 1 low score is not -1 3. low score is -1, high score is not 1

Otherwise, in most cases the value is computed as: -exp(-2.0*outerSum) / ((H/lambda)*(exp(-lambda) - 1) The last term (exp(-lambda) - 1) can be computed in two different ways depending on whether lambda is small or not. outerSum is a sum of the terms innerSum/j, where j is denoted by iterCounter in the code. The sum is truncated when the new term innersum/j i sufficiently small. innerSum is a weighted sum of the probabilities of of achieving a total score i in a gapless alignment, which we denote by P(i,j). of exactly j characters. innerSum(j) has two parts Sum over i < 0 P(i,j)exp(-i * lambda) + Sum over i >=0 P(i,j) The terms P(i,j) are computed by dynamic programming. An earlier version was flawed in that ignored the special case 1 and tried to replace the tail of the computation of outerSum by a geometric series, but the base of the geometric series was not accurately estimated in some cases.

Parameters:
sfp object holding scoring frequency information [in]
lambda a Karlin-Altschul parameter [in]
H a Karlin-Altschul parameter [in]
Returns:
K, another Karlin-Altschul parameter
< K_SUMLIMIT_DEFAULT == sumlimit used in BlastKarlinLHtoK()

< upper limit on iterations for BlastKarlinLHtoK

Definition at line 1894 of file blast_stat.c.

References BLAST_Gcd(), Blast_ScoreFreq::obs_max, Blast_ScoreFreq::obs_min, Blast_ScoreFreq::score_avg, and Blast_ScoreFreq::sprob.

Referenced by Blast_KarlinBlkUngappedCalc().

static double BlastKarlinLtoH Blast_ScoreFreq sfp,
double  lambda
[static]
 

Calculate H, the relative entropy of the p's and q's.

Parameters:
sfp object containing scoring frequency information [in]
lambda a Karlin-Altschul parameter [in]
Returns:
H, a Karlin-Altschul parameter

Definition at line 2254 of file blast_stat.c.

References BlastScoreChk(), H, Blast_ScoreFreq::obs_max, Blast_ScoreFreq::obs_min, and Blast_ScoreFreq::sprob.

Referenced by Blast_KarlinBlkUngappedCalc().

static Int2 BlastKarlinReportAllowedValues const char *  matrix_name,
Blast_Message **  error_return
[static]
 

Fills in error_return with strings describing the allowed values.

Parameters:
matrix_name name of the matrix [in]
error_return object to be filled in [in|out]
Returns:
zero on success.

Definition at line 3122 of file blast_stat.c.

References Blast_MessageWrite(), BLAST_Nint(), BlastLoadMatrixValues(), eBlastSevError, FALSE, kBlastMessageNoContext, MatrixInfo::max_number_values, MatrixInfo::name, ListNode::next, ListNode::ptr, strcasecmp, TRUE, and MatrixInfo::values.

Referenced by Blast_KarlinBlkGappedCalc().

static ListNode* BlastLoadMatrixValues void   )  [static]
 

Loads all the matrix values, returns a ListNode* chain that contains MatrixInfo*'s.

Returns:
list of MatrixInfos.
< Number of different combinations supported for BLOSUM80.

< Number of different combinations supported for BLOSUM62.

< Number of different combinations supported for BLOSUM50.

< Number of different combinations supported for BLOSUM45.

< Number of different combinations supported for PAM250.

< Number of different combinations supported for BLOSUM90.

< Number of different combinations supported for PAM30.

< Number of different combinations supported for PAM70.

Definition at line 2603 of file blast_stat.c.

References blosum45_prefs, blosum45_values, BLOSUM45_VALUES_MAX, blosum50_prefs, blosum50_values, BLOSUM50_VALUES_MAX, blosum62_prefs, blosum62_values, BLOSUM62_VALUES_MAX, blosum80_prefs, blosum80_values, BLOSUM80_VALUES_MAX, blosum90_prefs, blosum90_values, BLOSUM90_VALUES_MAX, ListNodeAddPointer(), MatrixInfoNew(), pam250_prefs, pam250_values, PAM250_VALUES_MAX, pam30_values, PAM30_VALUES_MAX, and PAM70_VALUES_MAX.

Referenced by Blast_GetMatrixValues(), Blast_KarlinBlkGappedCalc(), Blast_KarlinBlkGappedLoadFromTables(), BLAST_PrintAllowedValues(), BLAST_PrintMatrixMessage(), and BlastKarlinReportAllowedValues().

static ListNode* BlastMatrixValuesDestruct ListNode vnp  )  [static]
 

Free linked list of MatrixValues and all associated data.

Parameters:
vnp linked list of MatrixValues [in]
Returns:
NULL pointer

Definition at line 2580 of file blast_stat.c.

References ListNodeFree(), MatrixInfoDestruct(), ListNode::next, and ListNode::ptr.

Referenced by Blast_KarlinBlkGappedCalc(), and BLAST_PrintMatrixMessage().

static Blast_ResComp* BlastResCompDestruct Blast_ResComp rcp  )  [static]
 

Deallocates Blast_ResComp structure and associated arrays.

Parameters:
rcp the object to be freed [in|out]
Returns:
NULL

Definition at line 1581 of file blast_stat.c.

References Blast_ResComp::comp0, and sfree.

Referenced by Blast_ResFreqString(), and BlastResCompNew().

static Blast_ResComp* BlastResCompNew const BlastScoreBlk sbp  )  [static]
 

Allocated the Blast_ResComp* for a given alphabet.

Only the alphabets ncbistdaa and ncbi4na should be used by BLAST.

Parameters:
sbp contains alphabet code and size.
Returns:
pointer to Blast_ResComp, corectly initialized.

Definition at line 1599 of file blast_stat.c.

References BlastScoreBlk::alphabet_code, Blast_ResComp::alphabet_code, BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, BlastResCompDestruct(), Blast_ResComp::comp, and Blast_ResComp::comp0.

Referenced by Blast_ResFreqString().

static Int2 BlastResCompStr const BlastScoreBlk sbp,
Blast_ResComp rcp,
char *  str,
Int4  length
[static]
 

Store the composition of a (query) string.

Parameters:
sbp needed for alphabet information [in]
rcp object to be filled in [in|out]
str sequence to have composition calculated for [in]
length length of sequence [in]
Returns:
zero on success, 1 otherwise.

Definition at line 1631 of file blast_stat.c.

References BlastScoreBlk::alphabet_code, Blast_ResComp::alphabet_code, BlastScoreBlk::alphabet_size, Blast_ResComp::comp, Blast_ResComp::comp0, and BlastScoreBlk::protein_alphabet.

Referenced by Blast_ResFreqString().

BlastScoreBlk* BlastScoreBlkFree BlastScoreBlk sbp  ) 
 

Deallocates BlastScoreBlk as well as all associated structures.

Parameters:
sbp BlastScoreBlk to be deallocated [in]
Returns:
NULL pointer.

Definition at line 859 of file blast_stat.c.

References Blast_KarlinBlkFree(), Blast_ScoreFreqFree(), BlastScoreBlk::kbp_gap_psi, BlastScoreBlk::kbp_gap_std, BlastScoreBlk::kbp_psi, BlastScoreBlk::kbp_std, BlastScoreBlk::number_of_contexts, and BlastScoreBlk::sfp.

Referenced by BlastScoreBlkNew(), BOOST_AUTO_TEST_CASE(), BOOST_AUTO_UNIT_TEST(), LinkHspTestFixture::freeStructures(), TestFixture::TearDownLookupTable(), CBlastTracebackSearch::x_Init(), CPssmEngine::x_InitializeScoreBlock(), CBl2Seq::x_ResetQueryDs(), AalookupTestFixture::~AalookupTestFixture(), AascanTestFixture::~AascanTestFixture(), and CScoreBuilder::~CScoreBuilder().

static Int2 BlastScoreBlkMaxScoreSet BlastScoreBlk sbp  )  [static]
 

Sets maximum and minimum scores on the BlastScoreBlk for a given matrix.

Parameters:
sbp the BlastScoreBlk on which loscore and hiscore will be set [in|out]
Returns:
zero on success

Definition at line 1177 of file blast_stat.c.

References BlastScoreBlk::alphabet_size, BLAST_SCORE_MAX, BLAST_SCORE_MIN, SBlastScoreMatrix::data, BlastScoreBlk::hiscore, BlastScoreBlk::loscore, and BlastScoreBlk::matrix.

Referenced by Blast_ScoreBlkMatrixFill().

BlastScoreBlk* BlastScoreBlkNew Uint1  alphabet,
Int4  number_of_contexts
 

Allocates and initializes BlastScoreBlk.

Parameters:
alphabet either BLASTAA_SEQ_CODE or BLASTNA_SEQ_CODE [in]
number_of_contexts how many strands or sequences [in]
Returns:
BlastScoreBlk*

Definition at line 784 of file blast_stat.c.

References BlastScoreBlk::alphabet_code, BlastScoreBlk::alphabet_size, BLASTAA_SEQ_CODE, BLASTAA_SIZE, BLASTNA_SEQ_CODE, BLASTNA_SIZE, BlastScoreBlkFree(), FALSE, BlastScoreBlk::kbp_gap_psi, BlastScoreBlk::kbp_gap_std, BlastScoreBlk::kbp_psi, BlastScoreBlk::kbp_std, BlastScoreBlk::matrix, BlastScoreBlk::number_of_contexts, BlastScoreBlk::protein_alphabet, SBlastScoreMatrixNew(), BlastScoreBlk::scale_factor, BlastScoreBlk::sfp, and TRUE.

Referenced by AascanTestFixture::AascanTestFixture(), BlastSetup_ScoreBlkInit(), BOOST_AUTO_TEST_CASE(), AalookupTestFixture::FillLookupTable(), s_FillScoreBlkWithBadKbp(), LinkHspTestFixture::setupScoreBlk(), and CMultiAligner::x_AssignDefaultResFreqs().

Int2 BlastScoreBlkNuclMatrixCreate BlastScoreBlk sbp  ) 
 

Fill in the matrix for blastn using the penaly and rewards The query sequence alphabet is blastna, the subject sequence is ncbi2na.

The alphabet blastna is defined in blast_stat.h and the first four elements of blastna are identical to ncbi2na.

Parameters:
sbp the BlastScoreBlk on which reward, penalty, and matrix will be set [in|out]
Returns:
zero on success.

Definition at line 948 of file blast_stat.c.

References BlastScoreBlk::alphabet_size, ASSERT, BLASTNA_SIZE, BLASTNA_TO_NCBI4NA, SBlastScoreMatrix::data, BlastScoreBlk::matrix, SBlastScoreMatrix::ncols, SBlastScoreMatrix::nrows, BlastScoreBlk::penalty, and BlastScoreBlk::reward.

Referenced by Blast_ScoreBlkMatrixFill().

static Int2 BlastScoreBlkProteinMatrixLoad BlastScoreBlk sbp  )  [static]
 

Sets sbp->matrix->data field using sbp->name field using the matrices in the toolkit (util/tables/raw_scoremat.h).

Parameters:
sbp the object containing matrix and name [in|out]
Returns:
0 on success, 1 if matrix could not be loaded

Definition at line 1216 of file blast_stat.c.

References BlastScoreBlk::alphabet_size, ASSERT, BLAST_SCORE_MIN, BLASTAA_SIZE, SBlastScoreMatrix::data, BlastScoreBlk::matrix, BlastScoreBlk::name, NCBISM_GetStandardMatrix(), SBlastScoreMatrix::ncols, and SBlastScoreMatrix::nrows.

Referenced by Blast_ScoreBlkMatrixFill().

static Int2 BlastScoreBlkProteinMatrixRead BlastScoreBlk sbp,
FILE *  fp
[static]
 

Read in the matrix from the FILE *fp.

This function ASSUMES that the matrices are in the ncbistdaa

Parameters:
sbp the BlastScoreBlk with the matrix to be populated [in|out]
fp the file pointer to read from [in]
Returns:
zero on success

Definition at line 1028 of file blast_stat.c.

References BlastScoreBlk::alphabet_code, BlastScoreBlk::alphabet_size, ASSERT, BLAST_SCORE_MIN, BLASTAA_SIZE, BLASTNA_SEQ_CODE, buf, SBlastScoreMatrix::data, BlastScoreBlk::matrix, SBlastScoreMatrix::ncols, and SBlastScoreMatrix::nrows.

Referenced by Blast_ScoreBlkMatrixFill().

static Int2 BlastScoreChk Int4  lo,
Int4  hi
[static]
 

Check that the lo and hi score are within the allowed ranges.

Parameters:
lo the lowest permitted value [in]
hi the highest permitted value [in]
Returns:
zero on success, 1 otherwise
< maximum allowed range of BLAST scores.

Definition at line 1747 of file blast_stat.c.

References BLAST_SCORE_MAX, and BLAST_SCORE_RANGE_MAX.

Referenced by Blast_KarlinLambdaNR(), Blast_ScoreFreqNew(), and BlastKarlinLtoH().

static Int2 BlastScoreFreqCalc const BlastScoreBlk sbp,
Blast_ScoreFreq sfp,
Blast_ResFreq rfp1,
Blast_ResFreq rfp2
[static]
 

Calculates the score frequencies.

Parameters:
sbp object with scoring information [in]
sfp object to hold frequency information [in|out]
rfp1 letter frequencies for first sequence (query) [in]
rfp2 letter frequencies for second sequence (database) [in]
Returns:
zero on success

Definition at line 1798 of file blast_stat.c.

References BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, SBlastScoreMatrix::data, BlastScoreBlk::hiscore, BlastScoreBlk::loscore, BlastScoreBlk::matrix, Blast_ResFreq::prob, Blast_ScoreFreq::score_max, Blast_ScoreFreq::score_min, and Blast_ScoreFreq::sprob.

Referenced by Blast_ScoreBlkKbpIdealCalc(), and Blast_ScoreBlkKbpUngappedCalc().

static MatrixInfo* MatrixInfoDestruct MatrixInfo matrix_info  )  [static]
 

Deallocates MatrixInfo as well as name string.

Parameters:
matrix_info the object to be deallocated [in]
Returns:
NULL pointer

Definition at line 2541 of file blast_stat.c.

References MatrixInfo::name, and sfree.

Referenced by BlastMatrixValuesDestruct().

static MatrixInfo* MatrixInfoNew const char *  name,
array_of_8 values,
Int4 prefs,
Int4  max_number
[static]
 

Allocates New MatrixInfo*.

Parameters:
name name of matrix [in]
values array contains information about a matrix [in]
prefs contains information on a which values are preferred [in]
max_number size of those arrays [in]
Returns:
pointer to the allocated MatrixInfo

Definition at line 2561 of file blast_stat.c.

References MatrixInfo::max_number_values, MatrixInfo::name, MatrixInfo::prefs, strdup, and MatrixInfo::values.

Referenced by BlastLoadMatrixValues().

static double NlmKarlinLambdaNR double *  probs,
Int4  d,
Int4  low,
Int4  high,
double  lambda0,
double  tolx,
Int4  itmax,
Int4  maxNewton,
Int4 itn
[static]
 

Find positive solution to.

sum_{i=low}^{high} exp(i lambda) * probs[i] = 1.

Note that this solution does not exist unless the average score is negative and the largest score that occurs with nonzero probability is positive.

Parameters:
probs probabilities of a score occurring
d the gcd of the possible scores. This equals 1 if the scores are not a lattice
low the lowest possible score that occurs with nonzero probability
high the highest possible score that occurs with nonzero probability.
lambda0 an initial guess for lambda
tolx the tolerance to which lambda must be computed
itmax the maximum number of times the function may be evaluated
maxNewton the maximum permissible number of Newton iterations; after that the computation will proceed by bisection.
*itn the number of iterations needed to compute Lambda, or itmax if Lambda could not be computed.
Let phi(lambda) = sum_{i=low}^{high} exp(i lambda) - 1. Then phi(lambda) may be written

phi(lamdba) = exp(u lambda) f( exp(-lambda) )

where f(x) is a polynomial that has exactly two zeros, one at x = 1 and one at x = exp(-lamdba). It is simpler to solve this problem in x = exp(-lambda) than it is to solve it in lambda, because we know that for x, a solution lies in [0,1], and because Newton's method is generally more stable and efficient for polynomials than it is for exponentials.

For the most part, this function is a standard safeguarded Newton iteration: define an interval of uncertainty [a,b] with f(a) > 0 and f(b) < 0 (except for the initial value b = 1, where f(b) = 0); evaluate the function and use the sign of that value to shrink the interval of uncertainty; compute a Newton step; and if the Newton step suggests a point outside the interval of uncertainty or fails to decrease the function sufficiently, then bisect. There are three further details needed to understand the algorithm:

1) If y the unique solution in [0,1], then f is positive to the left of y, and negative to the right. Therefore, we may determine whether the Newton step -f(x)/f'(x) is moving toward, or away from, y by examining the sign of f'(x). If f'(x) >= 0, we bisect instead of taking the Newton step. 2) There is a neighborhood around x = 1 for which f'(x) >= 0, so (1) prevents convergence to x = 1 (and for a similar reason prevents convergence to x = 0, if the function is incorrectly called with probs[high] == 0). 3) Conditions like fabs(p) < lambda_tolerance * x * (1-x) are used in convergence criteria because these values translate to a bound on the relative error in lambda. This is proved in the "Blast Scoring Parameters" document that accompanies the BLAST code.

The iteration on f(x) is robust and doesn't overflow; defining a robust safeguarded Newton iteration on phi(lambda) that cannot converge to lambda = 0 and that is protected against overflow is more difficult. So (despite the length of this comment) the Newton iteration on f(x) is the simpler solution.

Definition at line 2138 of file blast_stat.c.

References f(), and g().

static void RPSFillScores Int4 **  matrix,
Int4  matrixLength,
double *  queryProbArray,
double *  scoreArray,
Blast_ScoreFreq return_sfp,
Int4  range,
Int4  alphabet_size
[static]
 

the routine RPSFillScores computes the probability of each score weighted by the probability of each query residue and fills those probabilities into scoreArray and puts scoreArray as a field in that in the structure that is returned for indexing convenience the field storing scoreArray points to the entry for score 0, so that referring to the -k index corresponds to score -k FIXME: This can be replaced by _PSIComputeScoreProbabilities??

Parameters:
matrix a position-specific score matrix with matrixLength positions [in]
matrixLength number of positions in the pssm (arg above) [in]
queryProbArray an array containing the probability of occurrence of each residue in the query [in]
scoreArray an array of probabilities for each score that is to be used as a field in return_sfp
return_sfp a structure to be filled in and returned [in|out]
range the size of scoreArray and is an upper bound on the difference between maximum score and minimum score in the matrix [in]
alphabet_size Number of letters in the alphabet of the input score matrix [in]

Definition at line 4161 of file blast_stat.c.

References AMINOACID_TO_NCBISTDAA, and BLAST_SCORE_MIN.

Referenced by RPSRescalePssm().

static double RPSfindUngappedLambda const char *  matrixName  )  [static]
 

Gets the ungapped lambda calculated for the matrix in question given standard residue composition for both query and subject sequences.

Parameters:
matrixName name of amino acid substitution matrix [in]
Returns:
lambda ungapped or 0.0 if matrix is not supported

Definition at line 4123 of file blast_stat.c.

References Blast_GetMatrixValues(), and sfree.

Referenced by RPSRescalePssm(), and s_BuildCompressedScoreMatrix().

Int4** RPSRescalePssm double  scalingFactor,
Int4  rps_query_length,
const Uint1 rps_query_seq,
Int4  db_seq_length,
Int4 **  posMatrix,
BlastScoreBlk sbp
 

Rescale the PSSM, using composition-based statistics, for use with RPS BLAST.

< maximum allowed range of BLAST scores.

< maximum allowed range of BLAST scores.

Definition at line 4205 of file blast_stat.c.

References _PSIAllocateMatrix(), AMINOACID_TO_NCBISTDAA, ASSERT, Blast_FillResidueProbability(), Blast_KarlinLambdaNR(), BLAST_SCORE_MIN, BLAST_SCORE_RANGE_MAX, BlastScoreBlk::name, SBlastScoreMatrix::nrows, BlastScoreBlk::psi_matrix, SPsiBlastScoreMatrix::pssm, RPSFillScores(), RPSfindUngappedLambda(), and sfree.

Referenced by s_RPSComputeTraceback().

static Int2 s_AdjustGapParametersByGcd array_of_8 normal,
array_of_8 linear,
int  size,
Int4 gap_existence_max,
Int4 gap_extend_max,
int  divisor
[static]
 

Adjust Lambda and H if reward and penalty have a non-1 gcd.

the two arrays (normal and linear) should be filled in with values already.

Parameters:
normal the values for normal (e.g, "affine") gap costs [in|out]
linear specialized values used for megablast [in|out]
size Number of supported combinations for this match/mismatch pair [out]
gap_existence_max start of infinite regime for gap existence [in|out]
gap_extend_max start of infinite regime for gap extension [in|out]
divisor divisor for gap costs [out]

Definition at line 2830 of file blast_stat.c.

Referenced by s_GetNuclValuesArray().

static double s_BlastSumP Int4  r,
double  s
[static]
 

Estimate the Sum P-value by calculation or interpolation, as appropriate.

Approx. 2-1/2 digits accuracy minimum throughout the range of r, s.

Parameters:
r number of segments [in]
s total score (in nats), adjusted by -r*log(KN) [in]
Returns:
p-value

Definition at line 3873 of file blast_stat.c.

References BLAST_Expm1(), BLAST_LnGammaInt(), DIM, and s_BlastSumPCalc().

Referenced by BLAST_LargeGapSumE(), BLAST_SmallGapSumE(), and BLAST_UnevenGapSumE().

static double s_BlastSumPCalc int  r,
double  s
[static]
 

Evaluate the following double integral, where r = number of segments.

and s = the adjusted score in nats:

(r-2) oo oo Prob(r,s) = r - - (r-2) ------------- | exp(-y) | x exp(-exp(x - y/r)) dx dy (r-1)! (r-2)! U U s 0

Parameters:
r number of segments
s adjusted score in nats
Returns:
P value

Definition at line 3785 of file blast_stat.c.

References BLAST_Expm1(), BLAST_LnGammaInt(), BLAST_RombergIntegrate(), and s_InnerIntegralCback().

Referenced by s_BlastSumP().

static Int2 s_BuildCompressedScoreMatrix BlastScoreBlk sbp,
SCompressedAlphabet new_alphabet,
double  matrix_scale_factor,
CompressedReverseLookup  rev_table
[static]
 

Compute a (non-square) score matrix for a compressed alphabet.

Parameters:
sbp Structure containing alphabet and scoring information [in]
new_alphabet Structure defining the new alphabet, including the final score matrix [in][out]
matrix_scale_factor Score matrix entries are scaled by this value [in]
rev_table A (one-to-many) mapping from compressed letter to protein letter. The list of protein letters in each row of the table ends with a negative value [in]

Definition at line 4386 of file blast_stat.c.

References _PSIMatrixFrequencyRatiosFree(), _PSIMatrixFrequencyRatiosNew(), BLAST_Nint(), BLAST_SCORE_MIN, SCompressedAlphabet::compressed_alphabet_size, SFreqRatios::data, SBlastScoreMatrix::data, SCompressedAlphabet::matrix, BlastScoreBlk::name, RPSfindUngappedLambda(), s_GetCompressedProbs(), and SBlastScoreMatrixNew().

Referenced by SCompressedAlphabetNew().

static void s_BuildCompressedTranslation const char *  trans_string,
Uint1 table,
Int4  compressed_alphabet_size,
CompressedReverseLookup  rev_table
[static]
 

parse the string defining the conversion between the ordinary protein alphabet and a compressed alphabet

Parameters:
trans_string The alphabet mappig [in]
table A map from protein letter to compressed letter. Protein letter that have no compressed equivalent will translate to value alphabet_size [out]
compressed_alphabet_size The anticipated size of the compressed alphabet [in]
rev_table A (one-to-many) mapping from compressed letter to protein letter. The list of protein letters in each row of the table ends with a negative value [out]

Definition at line 4291 of file blast_stat.c.

Referenced by SCompressedAlphabetNew().

static Int2 s_GetCompressedProbs BlastScoreBlk sbp,
double *  compressed_prob,
Int4  compressed_alphabet_size,
CompressedReverseLookup  rev_table
[static]
 

Calculate conditional probability of each letter in each group.

Parameters:
sbp Structure containing alphabet information [in]
compressed_prob Array containing final probabilities [out]
compressed_alphabet_size size of the alphabet [in]
rev_table A (one-to-many) mapping from compressed letter to protein letter. The list of protein letters in each row of the table ends with a negative value [in]

Definition at line 4329 of file blast_stat.c.

References Blast_ResFreqNew(), Blast_ResFreqStdComp(), letter(), and Blast_ResFreq::prob.

Referenced by s_BuildCompressedScoreMatrix().

static Int2 s_GetNuclValuesArray Int4  reward,
Int4  penalty,
Int4 array_size,
array_of_8 **  normal,
array_of_8 **  non_affine,
Int4 gap_open_max,
Int4 gap_extend_max,
Boolean round_down,
Blast_Message **  error_return
[static]
 

Returns the array of values corresponding to the given match/mismatch scores, the number of supported gap cost combinations and thresholds for the gap costs, beyond which the ungapped statistics can be applied.

Parameters:
reward Match reward score [in]
penalty Mismatch penalty score [in]
array_size Number of supported combinations for this match/mismatch pair [out]
normal the values for normal (e.g, "affine") gap costs [out]
non_affine specialized values used for megablast [out]
gap_open_max Gap opening cost threshold for infinite gap costs [out]
gap_extend_max Gap extension cost threshold for infinite gap costs [out]
round_down if set to TRUE only even scores should be used for calculation of expect value or bit scores [out]
error_return Pointer to error message [out]
Returns:
zero on success, other values if error

Definition at line 2882 of file blast_stat.c.

References BLAST_Gcd(), Blast_MessageWrite(), BlastMemDup(), eBlastSevError, FALSE, kBlastMessageNoContext, s_AdjustGapParametersByGcd(), s_SplitArrayOf8(), and TRUE.

Referenced by BLAST_CheckRewardPenaltyScores(), Blast_GetNuclAlphaBeta(), BLAST_GetNucleotideGapExistenceExtendParams(), and Blast_KarlinBlkNuclGappedCalc().

static double s_GetUngappedBeta Int4  reward,
Int4  penalty
[static]
 

Returns the beta statistical parameter value, given the nucleotide substitution scores.

Parameters:
reward Match reward score [in]
penalty Mismatch penalty score [in]
Returns:
The value of the beta parameter.

Definition at line 3471 of file blast_stat.c.

static double s_InnerIntegralCback double  s,
void *  vp
[static]
 

Callback for the Romberg integration function.

This is the first of the double integrals that BlastSumPCalc calculates. This is the integral described in the paragraph after eqn. 4 of Karlin and Altschul, PNAS USA, 90, 5873-5877 (1993).

Parameters:
s variable to integrate over [in]
vp pointer to parameters [in]
Returns:
value of integrand

Definition at line 3758 of file blast_stat.c.

References SRombergCbackArgs::adj1, SRombergCbackArgs::adj2, BLAST_RombergIntegrate(), SRombergCbackArgs::epsilon, SRombergCbackArgs::num_hsps, s_OuterIntegralCback(), and SRombergCbackArgs::sdvir.

Referenced by s_BlastSumPCalc().

static double s_OuterIntegralCback double  x,
void *  vp
[static]
 

Callback for the Romberg integration function.

This is the second of the double integrals that BlastSumPCalc calculates This is eqn. 4 of Karlin and Altschul, PNAS USA, 90, 5873-5877 (1993).

Parameters:
x variable to integrate over [in]
vp pointer to parameters [in]
Returns:
value of integrand

Definition at line 3732 of file blast_stat.c.

References SRombergCbackArgs::adj2, SRombergCbackArgs::num_hsps_minus_2, and SRombergCbackArgs::sdvir.

Referenced by s_InnerIntegralCback().

static Int2 s_SplitArrayOf8 const array_of_8 input,
const array_of_8 **  normal,
const array_of_8 **  non_affine,
Boolean split
[static]
 

Splits an ArrayOf8 into two arrays of supported gap costs.

One is for non-affine (megablast linear values) and the other is for standard (typically affine) values.

Parameters:
input the array to be split [in]
normal the standard (typically affine) values [out]
non_affine the megablast (linear) values [out]
split Boolean specifying whether the non-affine values are populated [out]
Returns:
0 on success, -1 on error

Definition at line 2796 of file blast_stat.c.

References FALSE, and TRUE.

Referenced by s_GetNuclValuesArray().

static SBlastScoreMatrix* SBlastScoreMatrixFree SBlastScoreMatrix matrix  )  [static]
 

Deallocates SBlastScoreMatrix structure.

Parameters:
matrix structure to deallocate [in]
Returns:
NULL

Definition at line 690 of file blast_stat.c.

References _PSIDeallocateMatrix(), SBlastScoreMatrix::data, SBlastScoreMatrix::ncols, and sfree.

Referenced by SBlastScoreMatrixNew(), SCompressedAlphabetFree(), and SPsiBlastScoreMatrixFree().

static SBlastScoreMatrix* SBlastScoreMatrixNew size_t  ncols,
size_t  nrows
[static]
 

Allocates a new SBlastScoreMatrix structure of the specified dimensions.

Parameters:
ncols number of columns [in]
nrows number of rows [in]
Returns:
NULL in case of memory allocation failure, else new SBlastScoreMatrix structure

Definition at line 712 of file blast_stat.c.

References _PSIAllocateMatrix(), SBlastScoreMatrix::data, SBlastScoreMatrix::ncols, SBlastScoreMatrix::nrows, and SBlastScoreMatrixFree().

Referenced by BlastScoreBlkNew(), s_BuildCompressedScoreMatrix(), and SPsiBlastScoreMatrixNew().

SCompressedAlphabet* SCompressedAlphabetFree SCompressedAlphabet alphabet  ) 
 

Free a compressed alphabet and score matrix.

Parameters:
alphabet The compressed alphabet structure
Returns:
Always NULL

Definition at line 4503 of file blast_stat.c.

References SCompressedAlphabet::compress_table, SCompressedAlphabet::matrix, SBlastScoreMatrixFree(), and sfree.

Referenced by SCompressedAlphabetNew().

SCompressedAlphabet* SCompressedAlphabetNew BlastScoreBlk sbp,
Int4  compressed_alphabet_size,
double  scale_factor
 

Allocate a new compressed alphabet and score matrix.

Parameters:
sbp Current score matrix information [in]
compressed_alphabet_size Desired size of compressed alphabet (current choices are limited to 10 or 15) [in]
scale_factor Score matrix entries are scaled by this value [in]
Returns:
the new alphabet, or NULL on failure

Definition at line 4467 of file blast_stat.c.

References ASSERT, BLASTAA_SIZE, SCompressedAlphabet::compress_table, SCompressedAlphabet::compressed_alphabet_size, s_BuildCompressedScoreMatrix(), s_BuildCompressedTranslation(), and SCompressedAlphabetFree().

Referenced by BlastCompressedAaLookupTableNew().

SPsiBlastScoreMatrix* SPsiBlastScoreMatrixFree SPsiBlastScoreMatrix matrix  ) 
 

Deallocates a SPsiBlastScoreMatrix structure.

Parameters:
matrix structure to deallocate [in]
Returns:
NULL

Definition at line 731 of file blast_stat.c.

References _PSIDeallocateMatrix(), Blast_KarlinBlkFree(), SPsiBlastScoreMatrix::freq_ratios, SPsiBlastScoreMatrix::kbp, SBlastScoreMatrix::ncols, SPsiBlastScoreMatrix::pssm, SBlastScoreMatrixFree(), and sfree.

Referenced by SPsiBlastScoreMatrixNew().

SPsiBlastScoreMatrix* SPsiBlastScoreMatrixNew size_t  ncols  ) 
 

Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.

Parameters:
ncols number of columns (i.e.: query length) [in]
Returns:
NULL in case of memory allocation failure, else new SPsiBlastScoreMatrix structure

Definition at line 750 of file blast_stat.c.

References _PSIAllocateMatrix(), Blast_KarlinBlkNew(), BLASTAA_SIZE, SPsiBlastScoreMatrix::freq_ratios, SPsiBlastScoreMatrix::kbp, SPsiBlastScoreMatrix::pssm, SBlastScoreMatrixNew(), and SPsiBlastScoreMatrixFree().


Variable Documentation

const array_of_8 blastn_values_1_1[] [static]
 

Initial value:

 {
    { 3,  2, 1.09,  0.31, 0.55, 2.0,  -2, 99 },
    { 2,  2, 1.07,  0.27, 0.49, 2.2,  -3, 97 }, 
    { 1,  2, 1.02,  0.21, 0.36, 2.8,  -6, 92 }, 
    { 0,  2, 0.80, 0.064, 0.17, 4.8, -16, 72 },
    { 4,  1, 1.08,  0.28, 0.54, 2.0,  -2, 98 }, 
    { 3,  1, 1.06,  0.25, 0.46, 2.3,  -4, 96 }, 
    { 2,  1, 0.99,  0.17, 0.30, 3.3, -10, 90 }
}
Karlin-Altschul parameter values for substitution scores 1 and -1.

Definition at line 664 of file blast_stat.c.

const array_of_8 blastn_values_1_2[] [static]
 

Initial value:

 {
    { 0, 0, 1.28, 0.46, 0.85, 1.5, -2, 96 },
    { 2, 2, 1.33, 0.62,  1.1, 1.2,  0, 99 },
    { 1, 2, 1.30, 0.52, 0.93, 1.4, -2, 97 }, 
    { 0, 2, 1.19, 0.34, 0.66, 1.8, -3, 89 },
    { 3, 1, 1.32, 0.57,  1.0, 1.3, -1, 99 }, 
    { 2, 1, 1.29, 0.49, 0.92, 1.4, -1, 96 }, 
    { 1, 1, 1.14, 0.26, 0.52, 2.2, -5, 85 }
}
Karlin-Altschul parameter values for substitution scores 1 and -2.

Definition at line 618 of file blast_stat.c.

const array_of_8 blastn_values_1_3[] [static]
 

Initial value:

 {
    { 0, 0, 1.374, 0.711, 1.31, 1.05,  0, 100 },
    { 2, 2,  1.37,  0.70,  1.2,  1.1,  0,  99 },
    { 1, 2,  1.35,  0.64,  1.1,  1.2, -1,  98 },
    { 0, 2,  1.25,  0.42, 0.83,  1.5, -2,  91 },
    { 2, 1,  1.34,  0.60,  1.1,  1.2, -1,  97 },
    { 1, 1,  1.21,  0.34, 0.71,  1.7, -2,  88 }
}
Karlin-Altschul parameter values for substitution scores 1 and -3.

Definition at line 596 of file blast_stat.c.

const array_of_8 blastn_values_1_4[] [static]
 

Initial value:

 {
    { 0, 0, 1.383, 0.738, 1.36, 1.02,  0, 100 },
    { 1, 2,  1.36,  0.67,  1.2,  1.1,  0,  98 }, 
    { 0, 2,  1.26,  0.43, 0.90,  1.4, -1,  91 },
    { 2, 1,  1.35,  0.61,  1.1,  1.2, -1,  98 },
    { 1, 1,  1.22,  0.35, 0.72,  1.7, -3,  88 }
}
Karlin-Altschul parameter values for substitution scores 1 and -4.

Definition at line 575 of file blast_stat.c.

const array_of_8 blastn_values_1_5[] [static]
 

Initial value:

 {
    { 0, 0, 1.39, 0.747, 1.38, 1.00,  0, 100 },
    { 3, 3, 1.39, 0.747, 1.38, 1.00,  0, 100 }
}
Karlin-Altschul parameter values for substitution scores 1 and -5.

Definition at line 569 of file blast_stat.c.

const array_of_8 blastn_values_2_3[] [static]
 

Initial value:

 {
    { 0, 0,  0.55, 0.21, 0.46,  1.2, -5, 87 },
    { 4, 4,  0.63, 0.42, 0.84, 0.75, -2, 99 },
    { 2, 4, 0.615, 0.37, 0.72, 0.85, -3, 97 },
    { 0, 4,  0.55, 0.21, 0.46,  1.2, -5, 87 },
    { 3, 3, 0.615, 0.37, 0.68,  0.9, -3, 97 },
    { 6, 2,  0.63, 0.42, 0.84, 0.75, -2, 99 },
    { 5, 2, 0.625, 0.41, 0.78,  0.8, -2, 99 },
    { 4, 2,  0.61, 0.35, 0.68,  0.9, -3, 96 },
    { 2, 2, 0.515, 0.14, 0.33, 1.55, -9, 81 }
}
Karlin-Altschul parameter values for substitution scores 2 and -3.

These parameters can only be applied to even scores. Any odd score must be rounded down to the nearest even number before calculating the e-value.

Definition at line 632 of file blast_stat.c.

const array_of_8 blastn_values_2_5[] [static]
 

Initial value:

 {
    { 0, 0, 0.675, 0.65,  1.1,  0.6, -1, 99 },
    { 2, 4,  0.67, 0.59,  1.1,  0.6, -1, 98 },
    { 0, 4,  0.62, 0.39, 0.78,  0.8, -2, 91 },
    { 4, 2,  0.67, 0.61,  1.0, 0.65, -2, 98 },
    { 2, 2,  0.56, 0.32, 0.59, 0.95, -4, 82 }
}
Karlin-Altschul parameter values for substitution scores 2 and -5.

These parameters can only be applied to even scores. Any odd score must be rounded down to the nearest even number before calculating the e-value.

Definition at line 609 of file blast_stat.c.

const array_of_8 blastn_values_2_7[] [static]
 

Initial value:

 {
    { 0, 0,  0.69, 0.73, 1.34, 0.515,  0, 100 },
    { 2, 4,  0.68, 0.67,  1.2,  0.55,  0,  99 }, 
    { 0, 4,  0.63, 0.43, 0.90,   0.7, -1,  91 },
    { 4, 2, 0.675, 0.62,  1.1,   0.6, -1,  98 },
    { 2, 2,  0.61, 0.35, 0.72,   1.7, -3,  88 }
}
Karlin-Altschul parameter values for substitution scores 2 and -7.

These parameters can only be applied to even scores. Any odd score must be rounded down to the nearest even number before calculating the e-value.

Definition at line 587 of file blast_stat.c.

const array_of_8 blastn_values_3_2[] [static]
 

Initial value:

 {
    {  5,  5, 0.208, 0.030, 0.072, 2.9, -47, 77}
}
Karlin-Altschul parameter values for substitution scores 3 and -2.

Definition at line 675 of file blast_stat.c.

const array_of_8 blastn_values_3_4[] [static]
 

Initial value:

 {
    { 6, 3, 0.389, 0.25, 0.56, 0.7, -5, 95},
    { 5, 3, 0.375, 0.21, 0.47, 0.8, -6, 92},
    { 4, 3, 0.351, 0.14, 0.35, 1.0, -9, 86},
    { 6, 2, 0.362, 0.16, 0.45, 0.8, -4, 88},
    { 5, 2, 0.330, 0.092, 0.28, 1.2, -13, 81},
    { 4, 2, 0.281, 0.046, 0.16, 1.8, -23, 69}
}
Karlin-Altschul parameter values for substitution scores 3 and -4.

Definition at line 645 of file blast_stat.c.

const array_of_8 blastn_values_4_5[] [static]
 

Initial value:

 {
    { 0, 0, 0.22, 0.061, 0.22, 1.0, -15, 74 },
    { 6, 5, 0.28,  0.21, 0.47, 0.6 , -7, 93 },
    { 5, 5, 0.27,  0.17, 0.39, 0.7,  -9, 90 },
    { 4, 5, 0.25,  0.10, 0.31, 0.8, -10, 83 },
    { 3, 5, 0.23, 0.065, 0.25, 0.9, -11, 76 }
}
Karlin-Altschul parameter values for substitution scores 4 and -5.

Definition at line 655 of file blast_stat.c.

const array_of_8 blastn_values_5_4[] [static]
 

Initial value:

 {
    { 10, 6, 0.163, 0.068, 0.16, 1.0, -19, 85 },
    {  8, 6, 0.146, 0.039, 0.11, 1.3, -29, 76 }
}
Karlin-Altschul parameter values for substitution scores 5 and -4.

Definition at line 680 of file blast_stat.c.

Int4 blosum45_prefs[14] [static]
 

Initial value:

 {
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_BEST,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL
}
Quality values for BLOSUM45 matrix, each element corresponds to same element number in array blosum45_values.

Definition at line 185 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

array_of_8 blosum45_values[14] [static]
 

Initial value:

 {
    {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.2291, 0.0924, 0.2514, 0.9113, -5.7},
    {13, 3, (double) INT2_MAX, 0.207, 0.049, 0.14, 1.5, -22},
    {12, 3, (double) INT2_MAX, 0.199, 0.039, 0.11, 1.8, -34},
    {11, 3, (double) INT2_MAX, 0.190, 0.031, 0.095, 2.0, -38},
    {10, 3, (double) INT2_MAX, 0.179, 0.023, 0.075, 2.4, -51},
    {16, 2, (double) INT2_MAX, 0.210, 0.051, 0.14, 1.5, -24},
    {15, 2, (double) INT2_MAX, 0.203, 0.041, 0.12, 1.7, -31},
    {14, 2, (double) INT2_MAX, 0.195, 0.032, 0.10, 1.9, -36},
    {13, 2, (double) INT2_MAX, 0.185, 0.024, 0.084, 2.2, -45},
    {12, 2, (double) INT2_MAX, 0.171, 0.016, 0.061, 2.8, -65},
    {19, 1, (double) INT2_MAX, 0.205, 0.040, 0.11, 1.9, -43},
    {18, 1, (double) INT2_MAX, 0.198, 0.032, 0.10, 2.0, -43},
    {17, 1, (double) INT2_MAX, 0.189, 0.024, 0.079, 2.4, -57},
    {16, 1, (double) INT2_MAX, 0.176, 0.016, 0.063, 2.8, -67},
}
Supported values (gap-existence, extension, etc.

) for BLOSUM45.

Definition at line 168 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

Int4 blosum50_prefs[16] [static]
 

Initial value:

 {
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_BEST,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL
}
Quality values for BLOSUM50 matrix, each element corresponds to same element number in array blosum50_values.

Definition at line 223 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

array_of_8 blosum50_values[16] [static]
 

Initial value:

 {
    {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.2318, 0.112, 0.3362, 0.6895, -4.0},
    {13, 3, (double) INT2_MAX, 0.212, 0.063, 0.19, 1.1, -16},
    {12, 3, (double) INT2_MAX, 0.206, 0.055, 0.17, 1.2, -18},
    {11, 3, (double) INT2_MAX, 0.197, 0.042, 0.14, 1.4, -25},
    {10, 3, (double) INT2_MAX, 0.186, 0.031, 0.11, 1.7, -34},
    {9, 3, (double) INT2_MAX, 0.172, 0.022, 0.082, 2.1, -48},
    {16, 2, (double) INT2_MAX, 0.215, 0.066, 0.20, 1.05, -15},
    {15, 2, (double) INT2_MAX, 0.210, 0.058, 0.17, 1.2, -20},
    {14, 2, (double) INT2_MAX, 0.202, 0.045, 0.14, 1.4, -27},
    {13, 2, (double) INT2_MAX, 0.193, 0.035, 0.12, 1.6, -32},
    {12, 2, (double) INT2_MAX, 0.181, 0.025, 0.095, 1.9, -41},
    {19, 1, (double) INT2_MAX, 0.212, 0.057, 0.18, 1.2, -21},
    {18, 1, (double) INT2_MAX, 0.207, 0.050, 0.15, 1.4, -28},
    {17, 1, (double) INT2_MAX, 0.198, 0.037, 0.12, 1.6, -33},
    {16, 1, (double) INT2_MAX, 0.186, 0.025, 0.10, 1.9, -42},
    {15, 1, (double) INT2_MAX, 0.171, 0.015, 0.063, 2.7, -76},
}
Supported values (gap-existence, extension, etc.

) for BLOSUM50.

Definition at line 204 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

Int4 blosum62_prefs[12] [static]
 

Initial value:

 {
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_BEST,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
}
Quality values for BLOSUM62 matrix, each element corresponds to same element number in array blosum62_values.

Definition at line 258 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

array_of_8 blosum62_values[12] [static]
 

Initial value:

 {
    {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3176, 0.134, 0.4012, 0.7916, -3.2},
    {11, 2, (double) INT2_MAX, 0.297, 0.082, 0.27, 1.1, -10},
    {10, 2, (double) INT2_MAX, 0.291, 0.075, 0.23, 1.3, -15},
    {9, 2, (double) INT2_MAX, 0.279, 0.058, 0.19, 1.5, -19},
    {8, 2, (double) INT2_MAX, 0.264, 0.045, 0.15, 1.8, -26},
    {7, 2, (double) INT2_MAX, 0.239, 0.027, 0.10, 2.5, -46},
    {6, 2, (double) INT2_MAX, 0.201, 0.012, 0.061, 3.3, -58},
    {13, 1, (double) INT2_MAX, 0.292, 0.071, 0.23, 1.2, -11},
    {12, 1, (double) INT2_MAX, 0.283, 0.059, 0.19, 1.5, -19},
    {11, 1, (double) INT2_MAX, 0.267, 0.041, 0.14, 1.9, -30},
    {10, 1, (double) INT2_MAX, 0.243, 0.024, 0.10, 2.5, -44},
    {9, 1, (double) INT2_MAX, 0.206, 0.010, 0.052, 4.0, -87},
}
Supported values (gap-existence, extension, etc.

) for BLOSUM62.

Definition at line 243 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

Int4 blosum80_prefs[10] [static]
 

Initial value:

 {
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_NOMINAL,
    BLAST_MATRIX_BEST,
    BLAST_MATRIX_NOMINAL
}
Quality values for BLOSUM80 matrix, each element corresponds to same element number in array blosum80_values.

Definition at line 288 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

array_of_8 blosum80_values[10] [static]
 

Initial value:

 {
    {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3430, 0.177, 0.6568, 0.5222, -1.6},
    {25, 2, (double) INT2_MAX, 0.342, 0.17, 0.66, 0.52, -1.6},
    {13, 2, (double) INT2_MAX, 0.336, 0.15, 0.57, 0.59, -3},
    {9, 2, (double) INT2_MAX, 0.319, 0.11, 0.42, 0.76, -6},
    {8, 2, (double) INT2_MAX, 0.308, 0.090, 0.35, 0.89, -9},
    {7, 2, (double) INT2_MAX, 0.293, 0.070, 0.27, 1.1, -14},
    {6, 2, (double) INT2_MAX, 0.268, 0.045, 0.19, 1.4, -19},
    {11, 1, (double) INT2_MAX, 0.314, 0.095, 0.35, 0.90, -9},
    {10, 1, (double) INT2_MAX, 0.299, 0.071, 0.27, 1.1, -14},
    {9, 1, (double) INT2_MAX, 0.279, 0.048, 0.20, 1.4, -19},
}
Supported values (gap-existence, extension, etc.

) for BLOSUM80.

Definition at line 275 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

Int4 blosum90_prefs[8] [static]
 

Initial value:

 {
   BLAST_MATRIX_NOMINAL,
   BLAST_MATRIX_NOMINAL,
   BLAST_MATRIX_NOMINAL,
   BLAST_MATRIX_NOMINAL,
   BLAST_MATRIX_NOMINAL,
   BLAST_MATRIX_NOMINAL,
   BLAST_MATRIX_BEST,
   BLAST_MATRIX_NOMINAL
}
Quality values for BLOSUM90 matrix, each element corresponds to same element number in array blosum90_values.

Definition at line 313 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

array_of_8 blosum90_values[8] [static]
 

Initial value:

 {
    {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3346, 0.190, 0.7547, 0.4434, -1.4},
    {9, 2, (double) INT2_MAX, 0.310, 0.12, 0.46, 0.67, -6},
    {8, 2, (double) INT2_MAX, 0.300, 0.099, 0.39, 0.76, -7},
    {7, 2, (double) INT2_MAX, 0.283, 0.072, 0.30, 0.93, -11},
    {6, 2, (double) INT2_MAX, 0.259, 0.048, 0.22, 1.2, -16},
    {11, 1, (double) INT2_MAX, 0.302, 0.093, 0.39, 0.78, -8},
    {10, 1, (double) INT2_MAX, 0.290, 0.075, 0.28, 1.04, -15},
    {9, 1, (double) INT2_MAX, 0.265, 0.044, 0.20, 1.3, -19},
}
Supported values (gap-existence, extension, etc.

) for BLOSUM90.

Definition at line 302 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

BLAST_LetterProb nt_prob[] [static]
 

Initial value:

 {
      { 'A', 25.00 },
      { 'C', 25.00 },
      { 'G', 25.00 },
      { 'T', 25.00 }
   }
nucleotide probabilities (25% each letter)

Definition at line 1467 of file blast_stat.c.

Int4 pam250_prefs[16] [static]
 

Initial value:

 {
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_BEST,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL
}
Quality values for PAM250 matrix, each element corresponds to same element number in array pam250_values.

Definition at line 344 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

array_of_8 pam250_values[16] [static]
 

Initial value:

 {
    {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.2252, 0.0868, 0.2223, 0.98, -5.0},
    {15, 3, (double) INT2_MAX, 0.205, 0.049, 0.13, 1.6, -23},
    {14, 3, (double) INT2_MAX, 0.200, 0.043, 0.12, 1.7, -26},
    {13, 3, (double) INT2_MAX, 0.194, 0.036, 0.10, 1.9, -31},
    {12, 3, (double) INT2_MAX, 0.186, 0.029, 0.085, 2.2, -41},
    {11, 3, (double) INT2_MAX, 0.174, 0.020, 0.070, 2.5, -48},
    {17, 2, (double) INT2_MAX, 0.204, 0.047, 0.12, 1.7, -28},
    {16, 2, (double) INT2_MAX, 0.198, 0.038, 0.11, 1.8, -29},
    {15, 2, (double) INT2_MAX, 0.191, 0.031, 0.087, 2.2, -44},
    {14, 2, (double) INT2_MAX, 0.182, 0.024, 0.073, 2.5, -53},
    {13, 2, (double) INT2_MAX, 0.171, 0.017, 0.059, 2.9, -64},
    {21, 1, (double) INT2_MAX, 0.205, 0.045, 0.11, 1.8, -34},
    {20, 1, (double) INT2_MAX, 0.199, 0.037, 0.10, 1.9, -35},
    {19, 1, (double) INT2_MAX, 0.192, 0.029, 0.083, 2.3, -52},
    {18, 1, (double) INT2_MAX, 0.183, 0.021, 0.070, 2.6, -60},
    {17, 1, (double) INT2_MAX, 0.171, 0.014, 0.052, 3.3, -86},
}
Supported values (gap-existence, extension, etc.

) for PAM250.

Definition at line 325 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

Int4 pam30_prefs[7] [static]
 

Initial value:

 {
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_BEST,
BLAST_MATRIX_NOMINAL,
}
Quality values for PAM30 matrix, each element corresponds to same element number in array pam30_values.

Definition at line 374 of file blast_stat.c.

array_of_8 pam30_values[7] [static]
 

Initial value:

 {
    {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3400, 0.283, 1.754, 0.1938, -0.3},
    {7, 2, (double) INT2_MAX, 0.305, 0.15, 0.87, 0.35, -3},
    {6, 2, (double) INT2_MAX, 0.287, 0.11, 0.68, 0.42, -4},
    {5, 2, (double) INT2_MAX, 0.264, 0.079, 0.45, 0.59, -7},
    {10, 1, (double) INT2_MAX, 0.309, 0.15, 0.88, 0.35, -3},
    {9, 1, (double) INT2_MAX, 0.294, 0.11, 0.61, 0.48, -6},
    {8, 1, (double) INT2_MAX, 0.270, 0.072, 0.40, 0.68, -10},
}
Supported values (gap-existence, extension, etc.

) for PAM30.

Definition at line 364 of file blast_stat.c.

Referenced by BlastLoadMatrixValues().

Int4 pam70_prefs[7] [static]
 

Initial value:

 {
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_BEST,
BLAST_MATRIX_NOMINAL
}
Quality values for PAM70 matrix, each element corresponds to same element number in array pam70_values.

Definition at line 396 of file blast_stat.c.

array_of_8 pam70_values[7] [static]
 

Initial value:

 {
    {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3345, 0.229, 1.029, 0.3250,   -0.7},
    {8, 2, (double) INT2_MAX, 0.301, 0.12, 0.54, 0.56, -5},
    {7, 2, (double) INT2_MAX, 0.286, 0.093, 0.43, 0.67, -7},
    {6, 2, (double) INT2_MAX, 0.264, 0.064, 0.29, 0.90, -12},
    {11, 1, (double) INT2_MAX, 0.305, 0.12, 0.52, 0.59, -6},
    {10, 1, (double) INT2_MAX, 0.291, 0.091, 0.41, 0.71, -9},
    {9, 1, (double) INT2_MAX, 0.270, 0.060, 0.28, 0.97, -14},
}
Supported values (gap-existence, extension, etc.

) for PAM70.

Definition at line 386 of file blast_stat.c.

char const rcsid[] [static]
 

Initial value:

 
    "$Id: blast_stat.c 144802 2008-11-03 20:57:20Z camacho $"

Definition at line 52 of file blast_stat.c.

BLAST_LetterProb Robinson_prob[] [static]
 

Initial value:

 {
      { 'A', 78.05 },
      { 'C', 19.25 },
      { 'D', 53.64 },
      { 'E', 62.95 },
      { 'F', 38.56 },
      { 'G', 73.77 },
      { 'H', 21.99 },
      { 'I', 51.42 },
      { 'K', 57.44 },
      { 'L', 90.19 },
      { 'M', 22.43 },
      { 'N', 44.87 },
      { 'P', 52.03 },
      { 'Q', 42.64 },
      { 'R', 51.29 },
      { 'S', 71.20 },
      { 'T', 58.41 },
      { 'V', 64.41 },
      { 'W', 13.30 },
      { 'Y', 32.16 }
   }
amino acid background frequencies from Robinson and Robinson

Definition at line 1442 of file blast_stat.c.

const char* s_alphabet10 = "IJLMV AST BDENZ KQR G FY P H C W" [static]
 

23-to-10 letter compressed alphabet.

Based on SE-V(10)

Definition at line 4462 of file blast_stat.c.

const char* s_alphabet15 = "ST IJV LM KR EQZ A G BD P N F Y H C W" [static]
 

23-to-15 letter compressed alphabet.

Based on SE_B(14)

Definition at line 4464 of file blast_stat.c.


Generated on Wed Mar 11 22:44:42 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Mar 11 23:16:10 2009 by modify_doxy.py rev. 117643