src/algo/blast/core/blast_stat.c File Reference

Detailed Description

Functions to calculate BLAST probabilities etc.

Detailed Contents:

allocate and deallocate structures used by BLAST to calculate probabilities etc.

calculate residue frequencies for query and "average" database.

read in matrix or load it from memory.

calculate sum-p from a collection of HSP's, for both the case of a "small" gap and a "large" gap, when give a total score and the number of HSP's.

calculate expect values for p-values.

calculate pseuod-scores from p-values.

Definition in file blast_stat.c.

#include <algo/blast/core/blast_stat.h>
#include <algo/blast/core/ncbi_math.h>
#include "blast_psi_priv.h"

Include dependency graph for blast_stat.c:

Go to the source code of this file.

Classes

struct MatrixInfo

Used to temporarily store matrix values for retrieval. More...

struct BLAST_LetterProb

Records probability of letter appearing in sequence. More...

struct Blast_ResComp

Intermediate structure to store the composition of a sequence. More...

struct SRombergCbackArgs

Internal data structure used by Romberg integration callbacks. More...

Defines

#define BLAST_SCORE_RANGE_MAX   (BLAST_SCORE_MAX - BLAST_SCORE_MIN)

maximum allowed range of BLAST scores.

#define BLAST_KARLIN_K_SUMLIMIT_DEFAULT   0.0001

K_SUMLIMIT_DEFAULT == sumlimit used in BlastKarlinLHtoK().

#define BLAST_KARLIN_LAMBDA_ACCURACY_DEFAULT   (1.e-5)

LAMBDA_ACCURACY_DEFAULT == accuracy to which Lambda should be calc'd.

#define BLAST_KARLIN_LAMBDA_ITER_DEFAULT   17

LAMBDA_ITER_DEFAULT == no.

#define BLAST_KARLIN_LAMBDA0_DEFAULT   0.5

Initial guess for the value of Lambda in BlastKarlinLambdaNR.

#define BLAST_KARLIN_K_ITER_MAX   100

upper limit on iterations for BlastKarlinLHtoK

#define BLAST_NUM_STAT_VALUES   8

Number of statistical parameters in each row of the precomputed tables.

#define BLOSUM45_VALUES_MAX   14

Number of different combinations supported for BLOSUM45.

#define BLOSUM50_VALUES_MAX   16

Number of different combinations supported for BLOSUM50.

#define BLOSUM62_VALUES_MAX   12

Number of different combinations supported for BLOSUM62.

#define BLOSUM80_VALUES_MAX   10

Number of different combinations supported for BLOSUM80.

#define BLOSUM90_VALUES_MAX   8

Number of different combinations supported for BLOSUM90.

#define PAM250_VALUES_MAX   16

Number of different combinations supported for PAM250.

#define PAM30_VALUES_MAX   7

Number of different combinations supported for PAM30.

#define PAM70_VALUES_MAX   7

Number of different combinations supported for PAM70.

#define STD_AMINO_ACID_FREQS   Robinson_prob

points to the standard amino acid frequencies to use.

Typedefs

typedef double array_of_8 [8]

Holds values (gap-opening, extension, etc.

typedef Int1 CompressedReverseLookup [BLASTAA_SIZE+1][BLASTAA_SIZE+1]

2-D array mapping compressed letters to sets of ordinary protein letters

Functions

static SBlastScoreMatrix * SBlastScoreMatrixFree (SBlastScoreMatrix *matrix)

Deallocates SBlastScoreMatrix structure.

static SBlastScoreMatrix * SBlastScoreMatrixNew (size_t ncols, size_t nrows)

Allocates a new SBlastScoreMatrix structure of the specified dimensions.

SPsiBlastScoreMatrix * SPsiBlastScoreMatrixFree (SPsiBlastScoreMatrix *matrix)

Deallocates a SPsiBlastScoreMatrix structure.

SPsiBlastScoreMatrix * SPsiBlastScoreMatrixNew (size_t ncols)

Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.

BlastScoreBlk * BlastScoreBlkNew (Uint1 alphabet, Int4 number_of_contexts)

Allocates and initializes BlastScoreBlk.

Blast_ScoreFreq * Blast_ScoreFreqFree (Blast_ScoreFreq *sfp)

Deallocates the score frequencies structure.

Blast_KarlinBlk * Blast_KarlinBlkFree (Blast_KarlinBlk *kbp)

Deallocates the KarlinBlk.

BlastScoreBlk * BlastScoreBlkFree (BlastScoreBlk *sbp)

Deallocates BlastScoreBlk as well as all associated structures.

Int2 BLAST_ScoreSetAmbigRes (BlastScoreBlk *sbp, char ambiguous_res)

Set the ambiguous residue (e.g, 'N', 'X') in the BlastScoreBlk*.

Int2 BlastScoreBlkNuclMatrixCreate (BlastScoreBlk *sbp)

Fill in the matrix for blastn using the penaly and rewards The query sequence alphabet is blastna, the subject sequence is ncbi2na.

static Int2 BlastScoreBlkProteinMatrixRead (BlastScoreBlk *sbp, FILE *fp)

Read in the matrix from the FILE *fp.

static Int2 BlastScoreBlkMaxScoreSet (BlastScoreBlk *sbp)

Sets maximum and minimum scores on the BlastScoreBlk for a given matrix.

static Int2 BlastScoreBlkProteinMatrixLoad (BlastScoreBlk *sbp)

Sets sbp->matrix->data field using sbp->name field using the matrices in the toolkit (util/tables/raw_scoremat.h).

Int2 Blast_ScoreBlkMatrixFill (BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)

This function fills in the BlastScoreBlk structure.

Blast_ResFreq * Blast_ResFreqFree (Blast_ResFreq *rfp)

Deallocates Blast_ResFreq and prob0 element.

Blast_ResFreq * Blast_ResFreqNew (const BlastScoreBlk *sbp)

Allocates a new Blast_ResFreq structure and fills in the prob element based upon the contents of sbp.

static Int2 Blast_ResFreqNormalize (const BlastScoreBlk *sbp, Blast_ResFreq *rfp, double norm)

Normalizes all the residue frequencies and then normalizes them to "norm".

Int2 Blast_GetStdAlphabet (Uint1 alphabet_code, Uint1 *residues, Uint4 residues_size)

Fills a buffer with the 'standard' alphabet (given by STD_AMINO_ACID_FREQS[index].ch).

Int2 Blast_ResFreqStdComp (const BlastScoreBlk *sbp, Blast_ResFreq *rfp)

Calculates residues frequencies given a standard distribution.

static Blast_ResComp * BlastResCompDestruct (Blast_ResComp *rcp)

Deallocates Blast_ResComp structure and associated arrays.

static Blast_ResComp * BlastResCompNew (const BlastScoreBlk *sbp)

Allocated the Blast_ResComp* for a given alphabet.

static Int2 BlastResCompStr (const BlastScoreBlk *sbp, Blast_ResComp *rcp, char *str, Int4 length)

Store the composition of a (query) string.

static Int2 Blast_ResFreqClr (const BlastScoreBlk *sbp, Blast_ResFreq *rfp)

Sets prob elements of Blast_ResFreq to zero.

static Int2 Blast_ResFreqResComp (const BlastScoreBlk *sbp, Blast_ResFreq *rfp, const Blast_ResComp *rcp)

Calculate the residue frequencies associated with the provided ResComp This function takes into account the composition of a given sequence (expressed through rcp) rather than just doing it for a standard distribution.

static Int2 Blast_ResFreqString (const BlastScoreBlk *sbp, Blast_ResFreq *rfp, char *string, Int4 length)

Fills in residue frequences for a given sequence.

static Int2 BlastScoreChk (Int4 lo, Int4 hi)

Check that the lo and hi score are within the allowed ranges.

Blast_ScoreFreq * Blast_ScoreFreqNew (Int4 score_min, Int4 score_max)

Creates a new structure to keep track of score frequencies for a scoring system.

static Int2 BlastScoreFreqCalc (const BlastScoreBlk *sbp, Blast_ScoreFreq *sfp, Blast_ResFreq *rfp1, Blast_ResFreq *rfp2)

Calculates the score frequencies.

static double BlastKarlinLHtoK (Blast_ScoreFreq *sfp, double lambda, double H)

The following procedure computes K.

static double NlmKarlinLambdaNR (double *probs, Int4 d, Int4 low, Int4 high, double lambda0, double tolx, Int4 itmax, Int4 maxNewton, Int4 *itn)

Find positive solution to.

double Blast_KarlinLambdaNR (Blast_ScoreFreq *sfp, double initialLambdaGuess)

Calculates the parameter Lambda given an initial guess for its value.

static double BlastKarlinLtoH (Blast_ScoreFreq *sfp, double lambda)

Calculate H, the relative entropy of the p's and q's.

Int2 Blast_KarlinBlkUngappedCalc (Blast_KarlinBlk *kbp, Blast_ScoreFreq *sfp)

Computes the parameters lambda, H K for use in calculating the statistical significance of high-scoring segments or subalignments (see comment on blast_stat.c for more details).

Int2 Blast_ScoreBlkKbpUngappedCalc (EBlastProgramType program, BlastScoreBlk *sbp, Uint1 *query, const BlastQueryInfo *query_info, Blast_Message **blast_message)

Calculate and fill the ungapped Karlin-Altschul parameters in the BlastScoreBlk structure (fields kbp_std, kbp_psi, and kbp of that structure).

Int2 Blast_ScoreBlkKbpIdealCalc (BlastScoreBlk *sbp)

Calculates the Karlin-Altschul parameters assuming standard residue compositions for the query and subject sequences.

Blast_KarlinBlk * Blast_KarlinBlkNew (void)

Callocs a Blast_KarlinBlk.

Int2 Blast_KarlinBlkCopy (Blast_KarlinBlk *kbp_to, Blast_KarlinBlk *kbp_from)

Copies contents of one Karlin block to another.

static MatrixInfo * MatrixInfoDestruct (MatrixInfo *matrix_info)

Deallocates MatrixInfo as well as name string.

static MatrixInfo * MatrixInfoNew (const char *name, array_of_8 *values, Int4 *prefs, Int4 max_number)

Allocates New MatrixInfo*.

static ListNode * BlastMatrixValuesDestruct (ListNode *vnp)

Free linked list of MatrixValues and all associated data.

static ListNode * BlastLoadMatrixValues (void)

Loads all the matrix values, returns a ListNode* chain that contains MatrixInfo*'s.

static Int2 Blast_GetMatrixValues (const char *matrix, Int4 **open, Int4 **extension, double **lambda, double **K, double **H, double **alpha, double **beta, Int4 **pref_flags)

Obtains arrays of the allowed opening and extension penalties for gapped BLAST for the given matrix.

void BLAST_GetAlphaBeta (const char *matrixName, double *alpha, double *beta, Boolean gapped, Int4 gap_open, Int4 gap_extend, const Blast_KarlinBlk *kbp_ungapped)

Extract the alpha and beta settings for this matrixName, and these gap open and gap extension costs.

static Int2 s_SplitArrayOf8 (const array_of_8 *input, const array_of_8 **normal, const array_of_8 **non_affine, Boolean *split)

Splits an ArrayOf8 into two arrays of supported gap costs.

static Int2 s_AdjustGapParametersByGcd (array_of_8 *normal, array_of_8 *linear, int size, Int4 *gap_existence_max, Int4 *gap_extend_max, int divisor)

Adjust Lambda and H if reward and penalty have a non-1 gcd.

static Int2 s_GetNuclValuesArray (Int4 reward, Int4 penalty, Int4 *array_size, array_of_8 **normal, array_of_8 **non_affine, Int4 *gap_open_max, Int4 *gap_extend_max, Boolean *round_down, Blast_Message **error_return)

Returns the array of values corresponding to the given match/mismatch scores, the number of supported gap cost combinations and thresholds for the gap costs, beyond which the ungapped statistics can be applied.

Int2 BLAST_GetProteinGapExistenceExtendParams (const char *matrixName, Int4 *gap_existence, Int4 *gap_extension)

Extract the recommended gap existence and extension values.

Int2 BLAST_GetNucleotideGapExistenceExtendParams (Int4 reward, Int4 penalty, Int4 *gap_existence, Int4 *gap_extension)

Extract the recommended gap existence and extension values.

Boolean BLAST_CheckRewardPenaltyScores (Int4 reward, Int4 penalty)

Check the validity of the reward and penalty scores.

static Int2 BlastKarlinReportAllowedValues (const char *matrix_name, Blast_Message **error_return)

Fills in error_return with strings describing the allowed values.

Int2 Blast_KarlinBlkGappedCalc (Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, const char *matrix_name, Blast_Message **error_return)

Fills in lambda, H, and K values, as calculated by Stephen Altschul in Methods in Enzy.

Int2 Blast_KarlinBlkGappedLoadFromTables (Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, const char *matrix_name)

Attempts to fill KarlinBlk for given gap opening, extensions etc.

char * BLAST_PrintMatrixMessage (const char *matrix_name)

Prints a messages about the allowed matrices, BlastKarlinBlkGappedFill should return 1 before this is called.

char * BLAST_PrintAllowedValues (const char *matrix_name, Int4 gap_open, Int4 gap_extend)

Prints a messages about the allowed open etc values for the given matrix, BlastKarlinBlkGappedFill should return 2 before this is called.

Int2 Blast_KarlinBlkNuclGappedCalc (Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, Int4 reward, Int4 penalty, Blast_KarlinBlk *kbp_ungap, Boolean *round_down, Blast_Message **error_return)

Retrieves Karlin-Altschul parameters from precomputed tables, given the substitution and gap scores.

static double s_GetUngappedBeta (Int4 reward, Int4 penalty)

Returns the beta statistical parameter value, given the nucleotide substitution scores.

Int2 Blast_GetNuclAlphaBeta (Int4 reward, Int4 penalty, Int4 gap_open, Int4 gap_extend, Blast_KarlinBlk *kbp, Boolean gapped_calculation, double *alpha, double *beta)

Extract the alpha and beta settings for these substitution and gap scores.

static Int4 BlastKarlinEtoS_simple (double E, const Blast_KarlinBlk *kbp, Int8 searchsp)

Calculates score from expect value and search space.

double BLAST_GapDecayDivisor (double decayrate, unsigned nsegs)

Compute a divisor used to weight the evalue of a collection of "nsegs" distinct alignments.

Int2 BLAST_Cutoffs (Int4 *S, double *E, Blast_KarlinBlk *kbp, Int8 searchsp, Boolean dodecay, double gap_decay_rate)

Calculate the cutoff score from the expected number of HSPs or vice versa.

double BLAST_KarlinStoE_simple (Int4 S, Blast_KarlinBlk *kbp, Int8 searchsp)

Calculates the Expect value based upon the search space and some Karlin-Altschul parameters.

double BLAST_KarlinPtoE (double p)

Convert a P-value to an E-value.

double BLAST_KarlinEtoP (double x)

Convert an E-value to a P-value.

static double s_OuterIntegralCback (double x, void *vp)

Callback for the Romberg integration function.

static double s_InnerIntegralCback (double s, void *vp)

Callback for the Romberg integration function.

static double s_BlastSumPCalc (int r, double s)

Evaluate the following double integral, where r = number of segments.

static double s_BlastSumP (Int4 r, double s)

Estimate the Sum P-value by calculation or interpolation, as appropriate.

double BLAST_SmallGapSumE (Int4 starting_points, Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)

Calculates the e-value for alignments with "small" gaps (typically under fifty residues/basepairs) following ideas of Stephen Altschul's.

double BLAST_UnevenGapSumE (Int4 query_start_points, Int4 subject_start_points, Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)

Calculates the e-value of a collection multiple distinct alignments with asymmetric gaps between the alignments.

double BLAST_LargeGapSumE (Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)

Calculates the e-value if a collection of distinct alignments with arbitrarily large gaps between the alignments.

void Blast_FillResidueProbability (const Uint1 *sequence, Int4 length, double *resProb)

Given a sequence of 'length' amino acid residues, compute the probability of each residue and put that in the array resProb Excludes ambiguity characters.

static double RPSfindUngappedLambda (const char *matrixName)

Gets the ungapped lambda calculated for the matrix in question given standard residue composition for both query and subject sequences.

static void RPSFillScores (Int4 **matrix, Int4 matrixLength, double *queryProbArray, double *scoreArray, Blast_ScoreFreq *return_sfp, Int4 range, Int4 alphabet_size)

the routine RPSFillScores computes the probability of each score weighted by the probability of each query residue and fills those probabilities into scoreArray and puts scoreArray as a field in that in the structure that is returned for indexing convenience the field storing scoreArray points to the entry for score 0, so that referring to the -k index corresponds to score -k FIXME: This can be replaced by _PSIComputeScoreProbabilities??

Int4 ** RPSRescalePssm (double scalingFactor, Int4 rps_query_length, const Uint1 *rps_query_seq, Int4 db_seq_length, Int4 **posMatrix, BlastScoreBlk *sbp)

Rescale the PSSM, using composition-based statistics, for use with RPS BLAST.

static void s_BuildCompressedTranslation (const char *trans_string, Uint1 *table, Int4 compressed_alphabet_size, CompressedReverseLookup rev_table)

parse the string defining the conversion between the ordinary protein alphabet and a compressed alphabet

static Int2 s_GetCompressedProbs (BlastScoreBlk *sbp, double *compressed_prob, Int4 compressed_alphabet_size, CompressedReverseLookup rev_table)

Calculate conditional probability of each letter in each group.

static Int2 s_BuildCompressedScoreMatrix (BlastScoreBlk *sbp, SCompressedAlphabet *new_alphabet, double matrix_scale_factor, CompressedReverseLookup rev_table)

Compute a (non-square) score matrix for a compressed alphabet.

SCompressedAlphabet * SCompressedAlphabetNew (BlastScoreBlk *sbp, Int4 compressed_alphabet_size, double matrix_scale_factor)

Allocate a new compressed alphabet and score matrix.

SCompressedAlphabet * SCompressedAlphabetFree (SCompressedAlphabet *alphabet)

Free a compressed alphabet and score matrix.

Int4 BLAST_ComputeLengthAdjustment (double K, double logK, double alpha_d_lambda, double beta, Int4 query_length, Int8 db_length, Int4 db_num_seqs, Int4 *length_adjustment)

Computes the adjustment to the lengths of the query and database sequences that is used to compensate for edge effects when computing evalues.

Variables

static char const rcsid []

static array_of_8 blosum45_values [14]

Supported values (gap-existence, extension, etc.

static Int4 blosum45_prefs [14]

Quality values for BLOSUM45 matrix, each element corresponds to same element number in array blosum45_values.

static array_of_8 blosum50_values [16]

Supported values (gap-existence, extension, etc.

static Int4 blosum50_prefs [16]

Quality values for BLOSUM50 matrix, each element corresponds to same element number in array blosum50_values.

static array_of_8 blosum62_values [12]

Supported values (gap-existence, extension, etc.

static Int4 blosum62_prefs [12]

Quality values for BLOSUM62 matrix, each element corresponds to same element number in array blosum62_values.

static array_of_8 blosum80_values [10]

Supported values (gap-existence, extension, etc.

static Int4 blosum80_prefs [10]

Quality values for BLOSUM80 matrix, each element corresponds to same element number in array blosum80_values.

static array_of_8 blosum90_values [8]

Supported values (gap-existence, extension, etc.

static Int4 blosum90_prefs [8]

Quality values for BLOSUM90 matrix, each element corresponds to same element number in array blosum90_values.

static array_of_8 pam250_values [16]

Supported values (gap-existence, extension, etc.

static Int4 pam250_prefs [16]

Quality values for PAM250 matrix, each element corresponds to same element number in array pam250_values.

static array_of_8 pam30_values [7]

Supported values (gap-existence, extension, etc.

static Int4 pam30_prefs [7]

Quality values for PAM30 matrix, each element corresponds to same element number in array pam30_values.

static array_of_8 pam70_values [7]

Supported values (gap-existence, extension, etc.

static Int4 pam70_prefs [7]

Quality values for PAM70 matrix, each element corresponds to same element number in array pam70_values.

static const array_of_8 blastn_values_1_5 []

Karlin-Altschul parameter values for substitution scores 1 and -5.

static const array_of_8 blastn_values_1_4 []

Karlin-Altschul parameter values for substitution scores 1 and -4.

static const array_of_8 blastn_values_2_7 []

Karlin-Altschul parameter values for substitution scores 2 and -7.

static const array_of_8 blastn_values_1_3 []

Karlin-Altschul parameter values for substitution scores 1 and -3.

static const array_of_8 blastn_values_2_5 []

Karlin-Altschul parameter values for substitution scores 2 and -5.

static const array_of_8 blastn_values_1_2 []

Karlin-Altschul parameter values for substitution scores 1 and -2.

static const array_of_8 blastn_values_2_3 []

Karlin-Altschul parameter values for substitution scores 2 and -3.

static const array_of_8 blastn_values_3_4 []

Karlin-Altschul parameter values for substitution scores 3 and -4.

static const array_of_8 blastn_values_4_5 []

Karlin-Altschul parameter values for substitution scores 4 and -5.

static const array_of_8 blastn_values_1_1 []

Karlin-Altschul parameter values for substitution scores 1 and -1.

static const array_of_8 blastn_values_3_2 []

Karlin-Altschul parameter values for substitution scores 3 and -2.

static const array_of_8 blastn_values_5_4 []

Karlin-Altschul parameter values for substitution scores 5 and -4.

static BLAST_LetterProb Robinson_prob []

amino acid background frequencies from Robinson and Robinson

static BLAST_LetterProb nt_prob []

nucleotide probabilities (25% each letter)

static const char * s_alphabet10 = "IJLMV AST BDENZ KQR G FY P H C W"

23-to-10 letter compressed alphabet.

static const char * s_alphabet15 = "ST IJV LM KR EQZ A G BD P N F Y H C W"

23-to-15 letter compressed alphabet.

Define Documentation

#define BLAST_KARLIN_K_ITER_MAX 100

upper limit on iterations for BlastKarlinLHtoK

Definition at line 76 of file blast_stat.c.

#define BLAST_KARLIN_K_SUMLIMIT_DEFAULT 0.0001

K_SUMLIMIT_DEFAULT == sumlimit used in BlastKarlinLHtoK().

Definition at line 68 of file blast_stat.c.

#define BLAST_KARLIN_LAMBDA0_DEFAULT 0.5

Initial guess for the value of Lambda in BlastKarlinLambdaNR.

Definition at line 74 of file blast_stat.c.
Referenced by Blast_KarlinBlkUngappedCalc().

#define BLAST_KARLIN_LAMBDA_ACCURACY_DEFAULT (1.e-5)

LAMBDA_ACCURACY_DEFAULT == accuracy to which Lambda should be calc'd.

Definition at line 70 of file blast_stat.c.

#define BLAST_KARLIN_LAMBDA_ITER_DEFAULT 17

LAMBDA_ITER_DEFAULT == no.
of iterations in LambdaBis = ln(accuracy)/ln(2)
Definition at line 72 of file blast_stat.c.

#define BLAST_NUM_STAT_VALUES 8

Number of statistical parameters in each row of the precomputed tables.

Definition at line 79 of file blast_stat.c.

#define BLAST_SCORE_RANGE_MAX (BLAST_SCORE_MAX - BLAST_SCORE_MIN)

maximum allowed range of BLAST scores.

Definition at line 60 of file blast_stat.c.
Referenced by BlastScoreChk(), and RPSRescalePssm().

#define BLOSUM45_VALUES_MAX 14

Number of different combinations supported for BLOSUM45.

Definition at line 167 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

#define BLOSUM50_VALUES_MAX 16

Number of different combinations supported for BLOSUM50.

Definition at line 203 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

#define BLOSUM62_VALUES_MAX 12

Number of different combinations supported for BLOSUM62.

Definition at line 242 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

#define BLOSUM80_VALUES_MAX 10

Number of different combinations supported for BLOSUM80.

Definition at line 274 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

#define BLOSUM90_VALUES_MAX 8

Number of different combinations supported for BLOSUM90.

Definition at line 301 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

#define PAM250_VALUES_MAX 16

Number of different combinations supported for PAM250.

Definition at line 324 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

#define PAM30_VALUES_MAX 7

Number of different combinations supported for PAM30.

Definition at line 363 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

#define PAM70_VALUES_MAX 7

Number of different combinations supported for PAM70.

Definition at line 385 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

#define STD_AMINO_ACID_FREQS Robinson_prob

points to the standard amino acid frequencies to use.

Definition at line 1465 of file blast_stat.c.
Referenced by Blast_GetStdAlphabet(), and Blast_ResFreqStdComp().

Typedef Documentation

typedef double array_of_8[8]

Holds values (gap-opening, extension, etc.
) for a matrix.
Definition at line 82 of file blast_stat.c.

typedef Int1 CompressedReverseLookup[BLASTAA_SIZE+1][BLASTAA_SIZE+1]

2-D array mapping compressed letters to sets of ordinary protein letters

Definition at line 4277 of file blast_stat.c.

Function Documentation

Boolean BLAST_CheckRewardPenaltyScores ( Int4 reward,

Int4 penalty

)

Check the validity of the reward and penalty scores.
Only to be used with blastn searches.
Parameters:

reward match score [in]

penalty mismatch score [in]

Returns:
TRUE on success

Definition at line 3098 of file blast_stat.c.
References FALSE, s_GetNuclValuesArray(), and sfree.
Referenced by BlastScoringOptionsValidate().

Int4 BLAST_ComputeLengthAdjustment ( double K,

double logK,

double alpha_d_lambda,

double beta,

Int4 query_length,

Int8 db_length,

Int4 db_num_seqs,

Int4 * length_adjustment

)

Computes the adjustment to the lengths of the query and database sequences that is used to compensate for edge effects when computing evalues.
The length adjustment is an integer-valued approximation to the fixed point of the function
f(ell) = beta + (alpha/lambda) * (log K + log((m - ell)*(n - N ell)))
where m is the query length n is the length of the database and N is the number of sequences in the database. The values beta, alpha, lambda and K are statistical, Karlin-Altschul parameters.
The value of the length adjustment computed by this routine, A, will always be an integer smaller than the fixed point of f(ell). Usually, it will be the largest such integer. However, the computed length adjustment, A, will also be so small that
K * (m - A) * (n - N * A) > MAX(m,n).
Moreover, an iterative method is used to compute A, and under unusual circumstances the iterative method may not converge.

Parameters:

K the statistical parameter K

logK the natural logarithm of K

alpha_d_lambda the ratio of the statistical parameters alpha and lambda (for ungapped alignments, the value 1/H should be used)

beta the statistical parameter beta (for ungapped alignments, beta == 0)

query_length the length of the query sequence

db_length the length of the database

db_num_seqs the number of sequences in the database

length_adjustment the computed value of the length adjustment [out]

Returns:
0 if length_adjustment is known to be the largest integer less than the fixed point of f(ell); 1 otherwise.

Definition at line 4553 of file blast_stat.c.
References FALSE, MAX, and TRUE.
Referenced by BLAST_CalcEffLengths().

Int2 BLAST_Cutoffs ( Int4 * S,

double * E,

Blast_KarlinBlk * kbp,

Int8 searchsp,

Boolean dodecay,

double gap_decay_rate

)

Calculate the cutoff score from the expected number of HSPs or vice versa.

Parameters:

S The calculated score [in] [out]

E The calculated e-value [in] [out]

kbp The Karlin-Altschul statistical parameters [in]

searchsp The effective search space [in]

dodecay Use gap decay feature? [in]

gap_decay_rate Gap decay rate to use, if dodecay is set [in]

Definition at line 3606 of file blast_stat.c.
References BLAST_GapDecayDivisor(), BLAST_KarlinStoE_simple(), BlastKarlinEtoS_simple(), FALSE, Blast_KarlinBlk::H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, and TRUE.
Referenced by BlastHitSavingParametersUpdate(), BlastInitialWordParametersUpdate(), and BOOST_AUTO_TEST_CASE().

void Blast_FillResidueProbability ( const Uint1 * sequence,

Int4 length,

double * resProb

)

Given a sequence of 'length' amino acid residues, compute the probability of each residue and put that in the array resProb Excludes ambiguity characters.

Parameters:

sequence the sequence to be computed upon [in]

length the length of the sequence [in]

resProb the object to be filled in [in|out]

Definition at line 4090 of file blast_stat.c.
References AMINOACID_TO_NCBISTDAA, and BLASTAA_SIZE.
Referenced by RPSRescalePssm().

double BLAST_GapDecayDivisor ( double decayrate,

unsigned nsegs

)

Compute a divisor used to weight the evalue of a collection of "nsegs" distinct alignments.
These divisors are used to compensate for the effect of choosing the best among multiple collections of alignments. See
Stephen F. Altschul. Evaluating the statitical significance of multiple distinct local alignments. In Suhai, editior, Theoretical and Computational Methods in Genome Research, pages 1-14. Plenum Press, New York, 1997.
The "decayrate" parameter of this routine is a value in the interval (0,1). Typical values of decayrate are .1 and .5.
Parameters:

decayrate adjusts for (multiple) tests of number of HSP sum groups [in]

nsegs the number of HSPs in the sum group [in]

Returns:
divisor used to compensate for multiple tests

Definition at line 3595 of file blast_stat.c.
References BLAST_Powi().
Referenced by BLAST_Cutoffs(), Blast_HSPListGetEvalues(), and s_SumHSPEvalue().

void BLAST_GetAlphaBeta ( const char * matrixName,

double * alpha,

double * beta,

Boolean gapped,

Int4 gap_open,

Int4 gap_extend,

const Blast_KarlinBlk * kbp_ungapped

)

Extract the alpha and beta settings for this matrixName, and these gap open and gap extension costs.

Parameters:

matrixName name of the matrix used [in]

alpha Karlin-Altschul parameter to be set [out]

beta Karlin-Altschul parameter to be set [out]

gapped TRUE if a gapped search [in]

gap_open existence cost of a gap [in]

gap_extend extension cost of a gap [in]

kbp_ungapped Karlin block with ungapped values of the parameters [in]

Definition at line 2738 of file blast_stat.c.
References Blast_GetMatrixValues(), and BLAST_MATRIX_BEST.
Referenced by BLAST_CalcEffLengths().

static Int2 Blast_GetMatrixValues ( const char * matrix,

Int4 ** open,

Int4 ** extension,

double ** lambda,

double ** K,

double ** H,

double ** alpha,

double ** beta,

Int4 ** pref_flags

) [static]

Obtains arrays of the allowed opening and extension penalties for gapped BLAST for the given matrix.
Also obtains arrays of Lambda, K, and H. Any of these fields that are not required should be set to NULL. The Int2 return value is the length of the arrays.
Parameters:

matrix name of the matrix [in]

open gap existence parameter [in|out]

extension cost to extend a gap by one letter [in|out]

lambda Karlin-Altschul parameter [in|out]

K Karlin-Altschul parameter [in|out]

H Karlin-Altschul parameter [in|out]

alpha Karlin-Altschul parameter [in|out]

beta Karlin-Altschul parameter [in|out]

pref_flags describes preferred values [in|out]

Returns:
maximum number of values (length of arrays).

Definition at line 2657 of file blast_stat.c.
References BlastLoadMatrixValues(), FALSE, MatrixInfo::max_number_values, MatrixInfo::name, ListNode::next, MatrixInfo::prefs, ListNode::ptr, strcasecmp, TRUE, and MatrixInfo::values.
Referenced by BLAST_GetAlphaBeta(), BLAST_GetProteinGapExistenceExtendParams(), and RPSfindUngappedLambda().

Int2 Blast_GetNuclAlphaBeta ( Int4 reward,

Int4 penalty,

Int4 gap_open,

Int4 gap_extend,

Blast_KarlinBlk * kbp,

Boolean gapped_calculation,

double * alpha,

double * beta

)

Extract the alpha and beta settings for these substitution and gap scores.
If substitution or gap costs are not found in the tables, assume an ungapped search. Then alpha is computed using the formula Alpha = Lambda/H, and beta is equal to 0 except for some special cases.
Parameters:

reward Match reward score [in]

penalty Mismatch penalty score [in]

gap_open Gap opening (existence) cost [in]

gap_extend Gap extension cost [in]

kbp Karlin block containing already computed Lambda, K and H parameters.

gapped_calculation Is this a gapped search? [in]

alpha Alpha parameter for this scoring system [out]

beta Beta parameter for this scoring system [out]

Definition at line 3481 of file blast_stat.c.
References ASSERT, FALSE, s_GetNuclValuesArray(), and TRUE.
Referenced by BLAST_CalcEffLengths().

Int2 BLAST_GetNucleotideGapExistenceExtendParams ( Int4 reward,

Int4 penalty,

Int4 * gap_existence,

Int4 * gap_extension

)

Extract the recommended gap existence and extension values.
Only to be used with blastn searches.
Parameters:

reward match score [in]

penalty mismatch score [in]

gap_existence returns recommended existence cost [in|out]

gap_extension returns recommended extension cost [in|out]

Returns:
zero on success

Definition at line 3046 of file blast_stat.c.
References FALSE, s_GetNuclValuesArray(), sfree, and TRUE.
Referenced by BOOST_AUTO_UNIT_TEST().

Int2 BLAST_GetProteinGapExistenceExtendParams ( const char * matrixName,

Int4 * gap_existence,

Int4 * gap_extension

)

Extract the recommended gap existence and extension values.
Only to be used with protein matrices.
Parameters:

matrixName name of the matrix [in]

gap_existence returns recommended existence cost [in|out]

gap_extension returns recommended extension cost [in|out]

Returns:
zero on success

Definition at line 3018 of file blast_stat.c.
References Blast_GetMatrixValues(), and BLAST_MATRIX_BEST.
Referenced by BOOST_AUTO_UNIT_TEST(), and CGenericSearchArgs::ExtractAlgorithmOptions().

Int2 Blast_GetStdAlphabet ( Uint1 alphabet_code,

Uint1 * residues,

Uint4 residues_size

)

Fills a buffer with the 'standard' alphabet (given by STD_AMINO_ACID_FREQS[index].ch).
< points to the standard amino acid frequencies to use.
< points to the standard amino acid frequencies to use.
< points to the standard amino acid frequencies to use.
< points to the standard amino acid frequencies to use.
Definition at line 1510 of file blast_stat.c.
References AMINOACID_TO_NCBISTDAA, BLASTAA_SEQ_CODE, DIM, and STD_AMINO_ACID_FREQS.
Referenced by _PSIComputeScoreProbabilities(), and Blast_ResFreqStdComp().

Int2 Blast_KarlinBlkCopy ( Blast_KarlinBlk * kbp_to,

Blast_KarlinBlk * kbp_from

)

Copies contents of one Karlin block to another.
Both must be allocated before this function is called.
Parameters:

kbp_to Karlin block to copy values to [in] [out]

kbp_from Karlin block to copy values from [in]

Returns:
0 on success; -1 if either argument is NULL on input.

Definition at line 2522 of file blast_stat.c.
References Blast_KarlinBlk::H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, Blast_KarlinBlk::logK, and Blast_KarlinBlk::paramC.
Referenced by Blast_ScoreBlkKbpGappedCalc(), Blast_ScoreBlkKbpUngappedCalc(), CBlastAncillaryData::do_copy(), s_InitializeKarlinBlk(), and s_RestoreSearch().

Blast_KarlinBlk* Blast_KarlinBlkFree ( Blast_KarlinBlk * kbp )

Deallocates the KarlinBlk.

Parameters:

kbp KarlinBlk to be deallocated [in]

Returns:
NULL

Definition at line 850 of file blast_stat.c.
References sfree.
Referenced by Blast_ScoreBlkKbpUngappedCalc(), BlastScoreBlkFree(), BOOST_AUTO_UNIT_TEST(), MakeSomeInvalidKBP(), MakeSomeValidKBP(), s_SavedParametersFree(), SPsiBlastScoreMatrixFree(), and CBlastAncillaryData::~CBlastAncillaryData().

Int2 Blast_KarlinBlkGappedCalc ( Blast_KarlinBlk * kbp,

Int4 gap_open,

Int4 gap_extend,

const char * matrix_name,

Blast_Message ** error_return

)

Fills in lambda, H, and K values, as calculated by Stephen Altschul in Methods in Enzy.
(vol 266, page 474).
Parameters:

kbp object to be filled in [in|out]

gap_open cost of gap existence [in]

gap_extend cost to extend a gap one letter [in]

matrix_name name of the matrix to be used [in]

error_return filled in with error message if needed [out]

Returns:
zero on success

Definition at line 3171 of file blast_stat.c.
References Blast_KarlinBlkGappedLoadFromTables(), Blast_MessageWrite(), BlastKarlinReportAllowedValues(), BlastLoadMatrixValues(), BlastMatrixValuesDestruct(), eBlastSevError, kBlastMessageNoContext, MatrixInfo::name, ListNode::next, and ListNode::ptr.
Referenced by Blast_ScoreBlkKbpGappedCalc().

Int2 Blast_KarlinBlkGappedLoadFromTables ( Blast_KarlinBlk * kbp,

Int4 gap_open,

Int4 gap_extend,

const char * matrix_name

)

Attempts to fill KarlinBlk for given gap opening, extensions etc.

Parameters:

kbp object to be filled in [in|out]

gap_open gap existence cost [in]

gap_extend gap extension cost [in]

matrix_name name of the matrix used [in]

Returns:
-1 if matrix_name is NULL; 1 if matrix not found 2 if matrix found, but open, extend etc. values not supported.

Definition at line 3221 of file blast_stat.c.
References BLAST_Nint(), BlastLoadMatrixValues(), FALSE, MatrixInfo::max_number_values, MatrixInfo::name, ListNode::next, ListNode::ptr, strcasecmp, TRUE, and MatrixInfo::values.
Referenced by Blast_KarlinBlkGappedCalc(), BlastScoringOptionsValidate(), and CMultiAligner::x_ComputeTree().

Blast_KarlinBlk* Blast_KarlinBlkNew ( void )

Callocs a Blast_KarlinBlk.

Returns:
pointer to the Blast_KarlinBlk

Definition at line 2512 of file blast_stat.c.
Referenced by Blast_ScoreBlkKbpGappedCalc(), Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreBlkKbpUngappedCalc(), CBlastAncillaryData::CBlastAncillaryData(), CBlastAncillaryData::do_copy(), MakeSomeInvalidKBP(), MakeSomeValidKBP(), s_FillScoreBlkWithBadKbp(), s_InitializeKarlinBlk(), s_PHIScoreBlkFill(), s_RecordInitialSearch(), and SPsiBlastScoreMatrixNew().

Int2 Blast_KarlinBlkNuclGappedCalc ( Blast_KarlinBlk * kbp,

Int4 gap_open,

Int4 gap_extend,

Int4 reward,

Int4 penalty,

Blast_KarlinBlk * kbp_ungap,

Boolean * round_down,

Blast_Message ** error_return

)

Retrieves Karlin-Altschul parameters from precomputed tables, given the substitution and gap scores.
Gap cost values greater than any of those listed in the tables ("greater" meaning that both values are greater than or equal, and at least one is strictly greater), are treated as infinite, and parameters values are copied from the ungapped Karlin block.
Parameters:

kbp Allocated Karlin block to fill [in] [out]

gap_open Gap openening (existence) cost [in]

gap_extend Gap extension cost [in]

reward Match reward score [in]

penalty Mismatch penalty score [in]

kbp_ungap Karlin block with ungapped Karlin-Altschul parameters [in]

round_down specifies that the score should be rounded down to nearest even score in some cases [in|out]

error_return Pointer to error message. [in] [out]

Definition at line 3368 of file blast_stat.c.
References ASSERT, Blast_KarlinBlk::H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, Blast_KarlinBlk::logK, s_GetNuclValuesArray(), and sfree.
Referenced by Blast_ScoreBlkKbpGappedCalc().

Int2 Blast_KarlinBlkUngappedCalc ( Blast_KarlinBlk * kbp,

Blast_ScoreFreq * sfp

)

Computes the parameters lambda, H K for use in calculating the statistical significance of high-scoring segments or subalignments (see comment on blast_stat.c for more details).
< Initial guess for the value of Lambda in BlastKarlinLambdaNR
Definition at line 2346 of file blast_stat.c.
References BLAST_KARLIN_LAMBDA0_DEFAULT, Blast_KarlinLambdaNR(), BlastKarlinLHtoK(), BlastKarlinLtoH(), Blast_KarlinBlk::H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, and Blast_KarlinBlk::logK.
Referenced by _PSIUpdateLambdaK(), Blast_ScoreBlkKbpIdealCalc(), and Blast_ScoreBlkKbpUngappedCalc().

double BLAST_KarlinEtoP ( double x )

Convert an E-value to a P-value.
E-values and P-values may either represent statistics of a database search or represent statistics on the two sequences being compared. If given a database E-value, this routine will return a database P-value; if given a pairwise E-value, it will return a pairwise P-value.
In the context of a database search, the available E-value is typically a database E-value, whereas the desired P-value is a pairwise P-value. When this is the case, the E-value should be divided by the effective length of the database and multiplied by the effective length of the subject, before BLAST_KarlinEtoP is called.

Parameters:

x the expect value to be converted [in]

Returns:
the corresponding p-value.

Definition at line 3705 of file blast_stat.c.
References BLAST_Expm1().
Referenced by s_AdjustEvaluesForComposition().

double Blast_KarlinLambdaNR ( Blast_ScoreFreq * sfp,

double initialLambdaGuess

)

Calculates the parameter Lambda given an initial guess for its value.
< LAMBDA_ACCURACY_DEFAULT == accuracy to which Lambda should be calc'd
< LAMBDA_ITER_DEFAULT == no. of iterations in LambdaBis = ln(accuracy)/ln(2)
Definition at line 2214 of file blast_stat.c.
References BLAST_Gcd(), BlastScoreChk(), Blast_ScoreFreq::obs_max, Blast_ScoreFreq::obs_min, Blast_ScoreFreq::score_avg, and Blast_ScoreFreq::sprob.
Referenced by Blast_KarlinBlkUngappedCalc(), and RPSRescalePssm().

double BLAST_KarlinPtoE ( double p )

Convert a P-value to an E-value.
P-values and E-values may either represent statistics of a database search or represent statistics on the two sequences being compared. If given a database P-value, this routine will return a database E-value; if given a pairwise P-value, it will return a pairwise E-value.
In the context of a database search, the available P-value is often a pairwise P-value, whereas the desired E-value is a database E-value. When this it the case, the value returned by this routine should be multiplied by the effective length of the database and divided by the effective length of the subject.

Parameters:

p the P-value to be converted [in]

Returns:
the corresponding expect value.

Definition at line 3691 of file blast_stat.c.
References BLAST_Log1p(), INT4_MAX, and INT4_MIN.
Referenced by BLAST_LargeGapSumE(), BLAST_SmallGapSumE(), BLAST_UnevenGapSumE(), and s_AdjustEvaluesForComposition().

double BLAST_KarlinStoE_simple ( Int4 S,

Blast_KarlinBlk * kbp,

Int8 searchsp

)

Calculates the Expect value based upon the search space and some Karlin-Altschul parameters.
It is "simple" as it does not use sum-statistics.
Parameters:

S the score of the alignment. [in]

kbp the Karlin-Altschul parameters. [in]

searchsp total search space to be used [in]

Returns:
the expect value

Definition at line 3673 of file blast_stat.c.
References Blast_KarlinBlk::H, H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, and Blast_KarlinBlk::logK.
Referenced by BLAST_Cutoffs(), Blast_HSPListGetEvalues(), and CScoreBuilder::GetBlastEValue().

double BLAST_LargeGapSumE ( Int2 num,

double xsum,

Int4 query_length,

Int4 subject_length,

Int8 searchsp_eff,

double weight_divisor

)

Calculates the e-value if a collection of distinct alignments with arbitrarily large gaps between the alignments.

Parameters:

num number of distinct alignments in the collection [in]

xsum the sum of the scores of these alignments each individually normalized using an appropriate value of Lambda and logK [in]

query_length effective length of query sequence [in]

subject_length effective length of subject sequence [in]

searchsp_eff effective size of the search space [in]

weight_divisor a divisor used to weight the e-value when multiple collections of alignments are being considered by the calling routine [in]

Returns:
sum expect value.

Definition at line 4045 of file blast_stat.c.
References BLAST_KarlinPtoE(), BLAST_LnFactorial(), INT4_MAX, and s_BlastSumP().

char* BLAST_PrintAllowedValues ( const char * matrix,

Int4 gap_open,

Int4 gap_extend

)

Prints a messages about the allowed open etc values for the given matrix, BlastKarlinBlkGappedFill should return 2 before this is called.

Parameters:

matrix name of the matrix [in]

gap_open gap existence cost [in]

gap_extend cost to extend a gap by one [in]

Returns:
message

Definition at line 3319 of file blast_stat.c.
References BLAST_Nint(), BlastLoadMatrixValues(), FALSE, MatrixInfo::max_number_values, MatrixInfo::name, ListNode::next, ListNode::ptr, strcasecmp, TRUE, and MatrixInfo::values.
Referenced by BlastScoringOptionsValidate().

char* BLAST_PrintMatrixMessage ( const char * matrix )

Prints a messages about the allowed matrices, BlastKarlinBlkGappedFill should return 1 before this is called.

Parameters:

matrix the matrix to print a message about [in]

Returns:
the message

Definition at line 3292 of file blast_stat.c.
References BlastLoadMatrixValues(), BlastMatrixValuesDestruct(), MatrixInfo::name, ListNode::next, and ListNode::ptr.
Referenced by BlastScoringOptionsValidate().

static Int2 Blast_ResFreqClr ( const BlastScoreBlk * sbp,

Blast_ResFreq * rfp

) [static]

Sets prob elements of Blast_ResFreq to zero.

Parameters:

sbp needed for alphabet information [in]

rfp contains elements to be zeroed [in|out]

Returns:
zero on success.

Definition at line 1671 of file blast_stat.c.
References BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, and Blast_ResFreq::prob.
Referenced by Blast_ResFreqResComp().

Blast_ResFreq* Blast_ResFreqFree ( Blast_ResFreq * rfp )

Deallocates Blast_ResFreq and prob0 element.

Parameters:

rfp the Blast_ResFreq to be deallocated.

Definition at line 1336 of file blast_stat.c.
References Blast_ResFreq::prob0, and sfree.
Referenced by BLAST_GetStandardAaProbabilities(), Blast_ResFreqNew(), Blast_ScoreBlkKbpIdealCalc(), and Blast_ScoreBlkKbpUngappedCalc().

Blast_ResFreq* Blast_ResFreqNew ( const BlastScoreBlk * sbp )

Allocates a new Blast_ResFreq structure and fills in the prob element based upon the contents of sbp.

Parameters:

sbp The BlastScoreBlk* used to init prob [in]

Definition at line 1355 of file blast_stat.c.
References BlastScoreBlk::alphabet_code, Blast_ResFreq::alphabet_code, BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, Blast_ResFreqFree(), Blast_ResFreq::prob, and Blast_ResFreq::prob0.
Referenced by BLAST_GetStandardAaProbabilities(), Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreBlkKbpUngappedCalc(), s_GetCompressedProbs(), SPHIPatternSearchBlkNew(), and CMultiAligner::x_AssignDefaultResFreqs().

static Int2 Blast_ResFreqNormalize ( const BlastScoreBlk * sbp,

Blast_ResFreq * rfp,

double norm

) [static]

Normalizes all the residue frequencies and then normalizes them to "norm".
If "norm" is one, then they will all sum to one.
Parameters:

sbp needed for alphabet information [in]

rfp array of residue frequencies to be normalized [in|out]

norm value to normalize to [in]

Returns:
zero on success, 1 otherwise

Definition at line 1482 of file blast_stat.c.
References BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, and Blast_ResFreq::prob.

static Int2 Blast_ResFreqResComp ( const BlastScoreBlk * sbp,

Blast_ResFreq * rfp,

const Blast_ResComp * rcp

) [static]

Calculate the residue frequencies associated with the provided ResComp This function takes into account the composition of a given sequence (expressed through rcp) rather than just doing it for a standard distribution.

Parameters:

sbp contains alphabet information [in]

rfp object to be filled in [in|out]

rcp object with composition information [in]

Returns:
zero on success, 1 on failure

Definition at line 1691 of file blast_stat.c.
References Blast_ResComp::alphabet_code, Blast_ResFreq::alphabet_code, BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, Blast_ResFreqClr(), Blast_ResComp::comp, and Blast_ResFreq::prob.
Referenced by Blast_ResFreqString().

Int2 Blast_ResFreqStdComp ( const BlastScoreBlk * sbp,

Blast_ResFreq * rfp

)

Calculates residues frequencies given a standard distribution.
< points to the standard amino acid frequencies to use.
< points to the standard amino acid frequencies to use.
< points to the standard amino acid frequencies to use.
< points to the standard amino acid frequencies to use.
Definition at line 1534 of file blast_stat.c.
References BlastScoreBlk::alphabet_code, Blast_GetStdAlphabet(), DIM, Blast_ResFreq::prob, BlastScoreBlk::protein_alphabet, sfree, STD_AMINO_ACID_FREQS, and TRUE.
Referenced by BLAST_GetStandardAaProbabilities(), Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreBlkKbpUngappedCalc(), s_GetCompressedProbs(), SPHIPatternSearchBlkNew(), and CMultiAligner::x_AssignDefaultResFreqs().

static Int2 Blast_ResFreqString ( const BlastScoreBlk * sbp,

Blast_ResFreq * rfp,

char * string,

Int4 length

) [static]

Fills in residue frequences for a given sequence.

Parameters:

sbp needed for alphabet information [in]

rfp object to be populated [in|out]

string sequence for calculation [in]

length length of above sequence [in]

Definition at line 1725 of file blast_stat.c.
References Blast_ResFreqResComp(), BlastResCompDestruct(), BlastResCompNew(), and BlastResCompStr().
Referenced by Blast_ScoreBlkKbpUngappedCalc().

Int2 Blast_ScoreBlkKbpIdealCalc ( BlastScoreBlk * sbp )

Calculates the Karlin-Altschul parameters assuming standard residue compositions for the query and subject sequences.
It populates the kbp_ideal field of its sbp argument. This is used if the query is translated and the calculated (real) Karlin parameters are bad, as they're calculated for non-coding regions.
Parameters:

sbp ScoreBlk used to calculate "ideal" values. [in|out]

Returns:
0 on success, 1 on failure

Definition at line 2483 of file blast_stat.c.
References Blast_KarlinBlkNew(), Blast_KarlinBlkUngappedCalc(), Blast_ResFreqFree(), Blast_ResFreqNew(), Blast_ResFreqStdComp(), Blast_ScoreFreqFree(), Blast_ScoreFreqNew(), BlastScoreFreqCalc(), BlastScoreBlk::hiscore, BlastScoreBlk::kbp_ideal, and BlastScoreBlk::loscore.
Referenced by Blast_ScoreBlkKbpUngappedCalc(), and s_PHIScoreBlkFill().

Int2 Blast_ScoreBlkKbpUngappedCalc ( EBlastProgramType program,

BlastScoreBlk * sbp,

Uint1 * query,

const BlastQueryInfo * query_info,

Blast_Message ** blast_message

)

Calculate and fill the ungapped Karlin-Altschul parameters in the BlastScoreBlk structure (fields kbp_std, kbp_psi, and kbp of that structure).

Parameters:

program BLAST program type, needed to decide whether to substitute ideal values. [in]

sbp Scoring block to work with [in] [out]

query Buffer containing (concatenated) query sequence [in]

query_info Information about offsets of concatenated queries [in]

blast_message returns queries that could not be processed [out]

Returns:
0 if ungapped Karlin-Altschul parameters could be calculated for all of the query sequence's contexts; 1 if any of the contexts failed (but all others will be populated).

Definition at line 2384 of file blast_stat.c.
References ASSERT, Blast_KarlinBlkCopy(), Blast_KarlinBlkFree(), Blast_KarlinBlkNew(), Blast_KarlinBlkUngappedCalc(), Blast_MessageWrite(), Blast_QueryIsPssm(), Blast_QueryIsTranslated(), Blast_ResFreqFree(), Blast_ResFreqNew(), Blast_ResFreqStdComp(), Blast_ResFreqString(), Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreFreqFree(), Blast_ScoreFreqNew(), BlastScoreFreqCalc(), BlastQueryInfo::contexts, eBlastSevWarning, eBlastTypeBlastx, eBlastTypeRpsTblastn, eBlastTypeTblastx, FALSE, BlastQueryInfo::first_context, BlastScoreBlk::hiscore, BlastContextInfo::is_valid, kBlastMessageNoContext, BlastScoreBlk::kbp, BlastScoreBlk::kbp_ideal, BlastScoreBlk::kbp_psi, BlastScoreBlk::kbp_std, Blast_KarlinBlk::Lambda, BlastScoreBlk::loscore, BlastContextInfo::query_length, BlastContextInfo::query_offset, BlastScoreBlk::sfp, and TRUE.
Referenced by BlastSetup_ScoreBlkInit(), and LinkHspTestFixture::setupScoreBlk().

Int2 Blast_ScoreBlkMatrixFill ( BlastScoreBlk * sbp,

GET_MATRIX_PATH get_path

)

This function fills in the BlastScoreBlk structure.
Tasks are: -read in the matrix -set maxscore
Parameters:

sbp Scoring block [in] [out]

get_path pointer to function that will return path to matrix. Only called if built-in matrix not found [in]

Definition at line 1275 of file blast_stat.c.
References BlastScoreBlk::alphabet_code, BLASTNA_SEQ_CODE, BlastScoreBlkMaxScoreSet(), BlastScoreBlkNuclMatrixCreate(), BlastScoreBlkProteinMatrixLoad(), BlastScoreBlkProteinMatrixRead(), FALSE, fp, BlastScoreBlk::name, BlastScoreBlk::read_in_matrix, sfree, and TRUE.
Referenced by Blast_ScoreBlkMatrixInit(), and s_PHIScoreBlkFill().

Blast_ScoreFreq* Blast_ScoreFreqFree ( Blast_ScoreFreq * sfp )

Deallocates the score frequencies structure.

Parameters:

sfp the structure to deallocate [in]

Returns:
NULL

Definition at line 835 of file blast_stat.c.
References sfree, and Blast_ScoreFreq::sprob0.
Referenced by _PSIUpdateLambdaK(), Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreBlkKbpUngappedCalc(), Blast_ScoreFreqNew(), and BlastScoreBlkFree().

Blast_ScoreFreq* Blast_ScoreFreqNew ( Int4 score_min,

Int4 score_max

)

Creates a new structure to keep track of score frequencies for a scoring system.

Parameters:

score_min Minimum score [in]

score_max Maximum score [in]

Returns:
allocated and initialized pointer to Blast_ScoreFreq

Definition at line 1760 of file blast_stat.c.
References Blast_ScoreFreqFree(), BlastScoreChk(), Blast_ScoreFreq::obs_max, Blast_ScoreFreq::obs_min, Blast_ScoreFreq::score_avg, Blast_ScoreFreq::score_max, Blast_ScoreFreq::score_min, Blast_ScoreFreq::sprob, and Blast_ScoreFreq::sprob0.
Referenced by Blast_ScoreBlkKbpIdealCalc(), Blast_ScoreBlkKbpUngappedCalc(), and s_PHIScoreBlkFill().

Int2 BLAST_ScoreSetAmbigRes ( BlastScoreBlk * sbp,

char ambiguous_res

)

Set the ambiguous residue (e.g, 'N', 'X') in the BlastScoreBlk*.
Convert from ncbieaa to sbp->alphabet_code (i.e., ncbistdaa) first.

Parameters:

sbp the object to be modified [in|out]

ambiguous_res the residue to be set on the BlastScoreBlk

Returns:
zero on success, others on error

Definition at line 900 of file blast_stat.c.
References BlastScoreBlk::ambig_occupy, BlastScoreBlk::ambig_size, BlastScoreBlk::ambiguous_res, and sfree.
Referenced by Blast_ScoreBlkMatrixInit().

double BLAST_SmallGapSumE ( Int4 start_points,

Int2 num,

double xsum,

Int4 query_length,

Int4 subject_length,

Int8 searchsp_eff,

double weight_divisor

)

Calculates the e-value for alignments with "small" gaps (typically under fifty residues/basepairs) following ideas of Stephen Altschul's.

Parameters:

start_points the number of starting points permitted between adjacent alignments; max_overlap + max_gap + 1 [in]

num the number of distinct alignments in this collection [in]

xsum the sum of the scores of these alignments each individually normalized using an appropriate value of Lambda and logK [in]

query_length effective len of the query seq [in]

subject_length effective len of the subject seq [in]

searchsp_eff effective size of the search space [in]

weight_divisor a divisor used to weight the e-value when multiple collections of alignments are being considered by the calling routine [in]

Returns:
the expect value

Definition at line 3933 of file blast_stat.c.
References BLAST_KarlinPtoE(), BLAST_LnFactorial(), INT4_MAX, and s_BlastSumP().

double BLAST_UnevenGapSumE ( Int4 query_start_points,

Int4 subject_start_points,

Int2 num,

double xsum,

Int4 query_length,

Int4 subject_length,

Int8 searchsp_eff,

double weight_divisor

)

Calculates the e-value of a collection multiple distinct alignments with asymmetric gaps between the alignments.
The gaps in one (protein) sequence are typically small (like in BLAST_SmallGapSumE) gap an the gaps in the other (translated DNA) sequence are possibly large (up to 4000 bp.) This routine is used for linking HSPs representing exons in the DNA sequence that are separated by introns.
Parameters:

query_start_points the number of starting points in the query sequence permitted between adjacent alignments [in]

subject_start_points the number of starting points in the subject sequence permitted between adjacent alignments [in]

num The number of distinct alignments in this collection [in]

xsum The sum of the scores of these alignments, each normalized using an appropriate value of Lambda and logK [in]

query_length The effective len of the query seq [in]

subject_length The effective len of the database seq [in]

searchsp_eff effective size of the search space [in]

weight_divisor A divisor used to weight the e-value when multiple collections of alignments are being considered by the calling routine [in]

Returns:
Resulting e-value of a combined set.

Definition at line 4005 of file blast_stat.c.
References BLAST_KarlinPtoE(), BLAST_LnFactorial(), INT4_MAX, and s_BlastSumP().
Referenced by s_SumHSPEvalue().

static Int4 BlastKarlinEtoS_simple ( double E,

const Blast_KarlinBlk * kbp,

Int8 searchsp

) [static]

Calculates score from expect value and search space.

Parameters:

E expect value [in]

kbp contains Karlin-Altschul parameters [in]

searchsp query times database size [in]

Returns:
score

Definition at line 3556 of file blast_stat.c.
References BLAST_SCORE_MIN, Blast_KarlinBlk::H, H, Blast_KarlinBlk::K, Blast_KarlinBlk::Lambda, and MAX.
Referenced by BLAST_Cutoffs().

static double BlastKarlinLHtoK ( Blast_ScoreFreq * sfp,

double lambda,

double H

) [static]

The following procedure computes K.
The input includes Lambda, H, and an array of probabilities for each score. There are distinct closed form for three cases: 1. high score is 1 low score is -1 2. high score is 1 low score is not -1 3. low score is -1, high score is not 1
Otherwise, in most cases the value is computed as: -exp(-2.0*outerSum) / ((H/lambda)*(exp(-lambda) - 1) The last term (exp(-lambda) - 1) can be computed in two different ways depending on whether lambda is small or not. outerSum is a sum of the terms innerSum/j, where j is denoted by iterCounter in the code. The sum is truncated when the new term innersum/j i sufficiently small. innerSum is a weighted sum of the probabilities of of achieving a total score i in a gapless alignment, which we denote by P(i,j). of exactly j characters. innerSum(j) has two parts Sum over i < 0 P(i,j)exp(-i * lambda) + Sum over i >=0 P(i,j) The terms P(i,j) are computed by dynamic programming. An earlier version was flawed in that ignored the special case 1 and tried to replace the tail of the computation of outerSum by a geometric series, but the base of the geometric series was not accurately estimated in some cases.

Parameters:

sfp object holding scoring frequency information [in]

lambda a Karlin-Altschul parameter [in]

H a Karlin-Altschul parameter [in]

Returns:
K, another Karlin-Altschul parameter
< K_SUMLIMIT_DEFAULT == sumlimit used in BlastKarlinLHtoK()
< upper limit on iterations for BlastKarlinLHtoK
Definition at line 1894 of file blast_stat.c.
References BLAST_Gcd(), Blast_ScoreFreq::obs_max, Blast_ScoreFreq::obs_min, Blast_ScoreFreq::score_avg, and Blast_ScoreFreq::sprob.
Referenced by Blast_KarlinBlkUngappedCalc().

static double BlastKarlinLtoH ( Blast_ScoreFreq * sfp,

double lambda

) [static]

Calculate H, the relative entropy of the p's and q's.

Parameters:

sfp object containing scoring frequency information [in]

lambda a Karlin-Altschul parameter [in]

Returns:
H, a Karlin-Altschul parameter

Definition at line 2254 of file blast_stat.c.
References BlastScoreChk(), H, Blast_ScoreFreq::obs_max, Blast_ScoreFreq::obs_min, and Blast_ScoreFreq::sprob.
Referenced by Blast_KarlinBlkUngappedCalc().

static Int2 BlastKarlinReportAllowedValues ( const char * matrix_name,

Blast_Message ** error_return

) [static]

Fills in error_return with strings describing the allowed values.

Parameters:

matrix_name name of the matrix [in]

error_return object to be filled in [in|out]

Returns:
zero on success.

Definition at line 3122 of file blast_stat.c.
References Blast_MessageWrite(), BLAST_Nint(), BlastLoadMatrixValues(), eBlastSevError, FALSE, kBlastMessageNoContext, MatrixInfo::max_number_values, MatrixInfo::name, ListNode::next, ListNode::ptr, strcasecmp, TRUE, and MatrixInfo::values.
Referenced by Blast_KarlinBlkGappedCalc().

static ListNode* BlastLoadMatrixValues ( void ) [static]

Loads all the matrix values, returns a ListNode* chain that contains MatrixInfo*'s.

Returns:
list of MatrixInfos.
< Number of different combinations supported for BLOSUM80.
< Number of different combinations supported for BLOSUM62.
< Number of different combinations supported for BLOSUM50.
< Number of different combinations supported for BLOSUM45.
< Number of different combinations supported for PAM250.
< Number of different combinations supported for BLOSUM90.
< Number of different combinations supported for PAM30.
< Number of different combinations supported for PAM70.
Definition at line 2603 of file blast_stat.c.
References blosum45_prefs, blosum45_values, BLOSUM45_VALUES_MAX, blosum50_prefs, blosum50_values, BLOSUM50_VALUES_MAX, blosum62_prefs, blosum62_values, BLOSUM62_VALUES_MAX, blosum80_prefs, blosum80_values, BLOSUM80_VALUES_MAX, blosum90_prefs, blosum90_values, BLOSUM90_VALUES_MAX, ListNodeAddPointer(), MatrixInfoNew(), pam250_prefs, pam250_values, PAM250_VALUES_MAX, pam30_values, PAM30_VALUES_MAX, and PAM70_VALUES_MAX.
Referenced by Blast_GetMatrixValues(), Blast_KarlinBlkGappedCalc(), Blast_KarlinBlkGappedLoadFromTables(), BLAST_PrintAllowedValues(), BLAST_PrintMatrixMessage(), and BlastKarlinReportAllowedValues().

static ListNode* BlastMatrixValuesDestruct ( ListNode * vnp ) [static]

Free linked list of MatrixValues and all associated data.

Parameters:

vnp linked list of MatrixValues [in]

Returns:
NULL pointer

Definition at line 2580 of file blast_stat.c.
References ListNodeFree(), MatrixInfoDestruct(), ListNode::next, and ListNode::ptr.
Referenced by Blast_KarlinBlkGappedCalc(), and BLAST_PrintMatrixMessage().

static Blast_ResComp* BlastResCompDestruct ( Blast_ResComp * rcp ) [static]

Deallocates Blast_ResComp structure and associated arrays.

Parameters:

rcp the object to be freed [in|out]

Returns:
NULL

Definition at line 1581 of file blast_stat.c.
References Blast_ResComp::comp0, and sfree.
Referenced by Blast_ResFreqString(), and BlastResCompNew().

static Blast_ResComp* BlastResCompNew ( const BlastScoreBlk * sbp ) [static]

Allocated the Blast_ResComp* for a given alphabet.
Only the alphabets ncbistdaa and ncbi4na should be used by BLAST.
Parameters:

sbp contains alphabet code and size.

Returns:
pointer to Blast_ResComp, corectly initialized.

Definition at line 1599 of file blast_stat.c.
References BlastScoreBlk::alphabet_code, Blast_ResComp::alphabet_code, BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, BlastResCompDestruct(), Blast_ResComp::comp, and Blast_ResComp::comp0.
Referenced by Blast_ResFreqString().

static Int2 BlastResCompStr ( const BlastScoreBlk * sbp,

Blast_ResComp * rcp,

char * str,

Int4 length

) [static]

Store the composition of a (query) string.

Parameters:

sbp needed for alphabet information [in]

rcp object to be filled in [in|out]

str sequence to have composition calculated for [in]

length length of sequence [in]

Returns:
zero on success, 1 otherwise.

Definition at line 1631 of file blast_stat.c.
References BlastScoreBlk::alphabet_code, Blast_ResComp::alphabet_code, BlastScoreBlk::alphabet_size, Blast_ResComp::comp, Blast_ResComp::comp0, and BlastScoreBlk::protein_alphabet.
Referenced by Blast_ResFreqString().

BlastScoreBlk* BlastScoreBlkFree ( BlastScoreBlk * sbp )

Deallocates BlastScoreBlk as well as all associated structures.

Parameters:

sbp BlastScoreBlk to be deallocated [in]

Returns:
NULL pointer.

Definition at line 859 of file blast_stat.c.
References Blast_KarlinBlkFree(), Blast_ScoreFreqFree(), BlastScoreBlk::kbp_gap_psi, BlastScoreBlk::kbp_gap_std, BlastScoreBlk::kbp_psi, BlastScoreBlk::kbp_std, BlastScoreBlk::number_of_contexts, and BlastScoreBlk::sfp.
Referenced by BlastScoreBlkNew(), BOOST_AUTO_TEST_CASE(), BOOST_AUTO_UNIT_TEST(), LinkHspTestFixture::freeStructures(), TestFixture::TearDownLookupTable(), CBlastTracebackSearch::x_Init(), CPssmEngine::x_InitializeScoreBlock(), CBl2Seq::x_ResetQueryDs(), AalookupTestFixture::~AalookupTestFixture(), AascanTestFixture::~AascanTestFixture(), and CScoreBuilder::~CScoreBuilder().

static Int2 BlastScoreBlkMaxScoreSet ( BlastScoreBlk * sbp ) [static]

Sets maximum and minimum scores on the BlastScoreBlk for a given matrix.

Parameters:

sbp the BlastScoreBlk on which loscore and hiscore will be set [in|out]

Returns:
zero on success

Definition at line 1177 of file blast_stat.c.
References BlastScoreBlk::alphabet_size, BLAST_SCORE_MAX, BLAST_SCORE_MIN, SBlastScoreMatrix::data, BlastScoreBlk::hiscore, BlastScoreBlk::loscore, and BlastScoreBlk::matrix.
Referenced by Blast_ScoreBlkMatrixFill().

BlastScoreBlk* BlastScoreBlkNew ( Uint1 alphabet,

Int4 number_of_contexts

)

Allocates and initializes BlastScoreBlk.

Parameters:

alphabet either BLASTAA_SEQ_CODE or BLASTNA_SEQ_CODE [in]

number_of_contexts how many strands or sequences [in]

Returns:
BlastScoreBlk*

Definition at line 784 of file blast_stat.c.
References BlastScoreBlk::alphabet_code, BlastScoreBlk::alphabet_size, BLASTAA_SEQ_CODE, BLASTAA_SIZE, BLASTNA_SEQ_CODE, BLASTNA_SIZE, BlastScoreBlkFree(), FALSE, BlastScoreBlk::kbp_gap_psi, BlastScoreBlk::kbp_gap_std, BlastScoreBlk::kbp_psi, BlastScoreBlk::kbp_std, BlastScoreBlk::matrix, BlastScoreBlk::number_of_contexts, BlastScoreBlk::protein_alphabet, SBlastScoreMatrixNew(), BlastScoreBlk::scale_factor, BlastScoreBlk::sfp, and TRUE.
Referenced by AascanTestFixture::AascanTestFixture(), BlastSetup_ScoreBlkInit(), BOOST_AUTO_TEST_CASE(), AalookupTestFixture::FillLookupTable(), s_FillScoreBlkWithBadKbp(), LinkHspTestFixture::setupScoreBlk(), and CMultiAligner::x_AssignDefaultResFreqs().

Int2 BlastScoreBlkNuclMatrixCreate ( BlastScoreBlk * sbp )

Fill in the matrix for blastn using the penaly and rewards The query sequence alphabet is blastna, the subject sequence is ncbi2na.
The alphabet blastna is defined in blast_stat.h and the first four elements of blastna are identical to ncbi2na.
Parameters:

sbp the BlastScoreBlk on which reward, penalty, and matrix will be set [in|out]

Returns:
zero on success.

Definition at line 948 of file blast_stat.c.
References BlastScoreBlk::alphabet_size, ASSERT, BLASTNA_SIZE, BLASTNA_TO_NCBI4NA, SBlastScoreMatrix::data, BlastScoreBlk::matrix, SBlastScoreMatrix::ncols, SBlastScoreMatrix::nrows, BlastScoreBlk::penalty, and BlastScoreBlk::reward.
Referenced by Blast_ScoreBlkMatrixFill().

static Int2 BlastScoreBlkProteinMatrixLoad ( BlastScoreBlk * sbp ) [static]

Sets sbp->matrix->data field using sbp->name field using the matrices in the toolkit (util/tables/raw_scoremat.h).

Parameters:

sbp the object containing matrix and name [in|out]

Returns:
0 on success, 1 if matrix could not be loaded

Definition at line 1216 of file blast_stat.c.
References BlastScoreBlk::alphabet_size, ASSERT, BLAST_SCORE_MIN, BLASTAA_SIZE, SBlastScoreMatrix::data, BlastScoreBlk::matrix, BlastScoreBlk::name, NCBISM_GetStandardMatrix(), SBlastScoreMatrix::ncols, and SBlastScoreMatrix::nrows.
Referenced by Blast_ScoreBlkMatrixFill().

static Int2 BlastScoreBlkProteinMatrixRead ( BlastScoreBlk * sbp,

FILE * fp

) [static]

Read in the matrix from the FILE *fp.
This function ASSUMES that the matrices are in the ncbistdaa
Parameters:

sbp the BlastScoreBlk with the matrix to be populated [in|out]

fp the file pointer to read from [in]

Returns:
zero on success

Definition at line 1028 of file blast_stat.c.
References BlastScoreBlk::alphabet_code, BlastScoreBlk::alphabet_size, ASSERT, BLAST_SCORE_MIN, BLASTAA_SIZE, BLASTNA_SEQ_CODE, buf, SBlastScoreMatrix::data, BlastScoreBlk::matrix, SBlastScoreMatrix::ncols, and SBlastScoreMatrix::nrows.
Referenced by Blast_ScoreBlkMatrixFill().

static Int2 BlastScoreChk ( Int4 lo,

Int4 hi

) [static]

Check that the lo and hi score are within the allowed ranges.

Parameters:

lo the lowest permitted value [in]

hi the highest permitted value [in]

Returns:
zero on success, 1 otherwise
< maximum allowed range of BLAST scores.
Definition at line 1747 of file blast_stat.c.
References BLAST_SCORE_MAX, and BLAST_SCORE_RANGE_MAX.
Referenced by Blast_KarlinLambdaNR(), Blast_ScoreFreqNew(), and BlastKarlinLtoH().

static Int2 BlastScoreFreqCalc ( const BlastScoreBlk * sbp,

Blast_ScoreFreq * sfp,

Blast_ResFreq * rfp1,

Blast_ResFreq * rfp2

) [static]

Calculates the score frequencies.

Parameters:

sbp object with scoring information [in]

sfp object to hold frequency information [in|out]

rfp1 letter frequencies for first sequence (query) [in]

rfp2 letter frequencies for second sequence (database) [in]

Returns:
zero on success

Definition at line 1798 of file blast_stat.c.
References BlastScoreBlk::alphabet_size, BlastScoreBlk::alphabet_start, SBlastScoreMatrix::data, BlastScoreBlk::hiscore, BlastScoreBlk::loscore, BlastScoreBlk::matrix, Blast_ResFreq::prob, Blast_ScoreFreq::score_max, Blast_ScoreFreq::score_min, and Blast_ScoreFreq::sprob.
Referenced by Blast_ScoreBlkKbpIdealCalc(), and Blast_ScoreBlkKbpUngappedCalc().

static MatrixInfo* MatrixInfoDestruct ( MatrixInfo * matrix_info ) [static]

Deallocates MatrixInfo as well as name string.

Parameters:

matrix_info the object to be deallocated [in]

Returns:
NULL pointer

Definition at line 2541 of file blast_stat.c.
References MatrixInfo::name, and sfree.
Referenced by BlastMatrixValuesDestruct().

static MatrixInfo* MatrixInfoNew ( const char * name,

array_of_8 * values,

Int4 * prefs,

Int4 max_number

) [static]

Allocates New MatrixInfo*.

Parameters:

name name of matrix [in]

values array contains information about a matrix [in]

prefs contains information on a which values are preferred [in]

max_number size of those arrays [in]

Returns:
pointer to the allocated MatrixInfo

Definition at line 2561 of file blast_stat.c.
References MatrixInfo::max_number_values, MatrixInfo::name, MatrixInfo::prefs, strdup, and MatrixInfo::values.
Referenced by BlastLoadMatrixValues().

static double NlmKarlinLambdaNR ( double * probs,

Int4 d,

Int4 low,

Int4 high,

double lambda0,

double tolx,

Int4 itmax,

Int4 maxNewton,

Int4 * itn

) [static]

Find positive solution to.
sum_{i=low}^{high} exp(i lambda) * probs[i] = 1.
Note that this solution does not exist unless the average score is negative and the largest score that occurs with nonzero probability is positive.

Parameters:

probs probabilities of a score occurring

d the gcd of the possible scores. This equals 1 if the scores are not a lattice

low the lowest possible score that occurs with nonzero probability

high the highest possible score that occurs with nonzero probability.

lambda0 an initial guess for lambda

tolx the tolerance to which lambda must be computed

itmax the maximum number of times the function may be evaluated

maxNewton the maximum permissible number of Newton iterations; after that the computation will proceed by bisection.

*itn the number of iterations needed to compute Lambda, or itmax if Lambda could not be computed.

Let phi(lambda) = sum_{i=low}^{high} exp(i lambda) - 1. Then phi(lambda) may be written
phi(lamdba) = exp(u lambda) f( exp(-lambda) )
where f(x) is a polynomial that has exactly two zeros, one at x = 1 and one at x = exp(-lamdba). It is simpler to solve this problem in x = exp(-lambda) than it is to solve it in lambda, because we know that for x, a solution lies in [0,1], and because Newton's method is generally more stable and efficient for polynomials than it is for exponentials.
For the most part, this function is a standard safeguarded Newton iteration: define an interval of uncertainty [a,b] with f(a) > 0 and f(b) < 0 (except for the initial value b = 1, where f(b) = 0); evaluate the function and use the sign of that value to shrink the interval of uncertainty; compute a Newton step; and if the Newton step suggests a point outside the interval of uncertainty or fails to decrease the function sufficiently, then bisect. There are three further details needed to understand the algorithm:
1) If y the unique solution in [0,1], then f is positive to the left of y, and negative to the right. Therefore, we may determine whether the Newton step -f(x)/f'(x) is moving toward, or away from, y by examining the sign of f'(x). If f'(x) >= 0, we bisect instead of taking the Newton step. 2) There is a neighborhood around x = 1 for which f'(x) >= 0, so (1) prevents convergence to x = 1 (and for a similar reason prevents convergence to x = 0, if the function is incorrectly called with probs[high] == 0). 3) Conditions like fabs(p) < lambda_tolerance * x * (1-x) are used in convergence criteria because these values translate to a bound on the relative error in lambda. This is proved in the "Blast Scoring Parameters" document that accompanies the BLAST code.
The iteration on f(x) is robust and doesn't overflow; defining a robust safeguarded Newton iteration on phi(lambda) that cannot converge to lambda = 0 and that is protected against overflow is more difficult. So (despite the length of this comment) the Newton iteration on f(x) is the simpler solution.
Definition at line 2138 of file blast_stat.c.
References f(), and g().

static void RPSFillScores ( Int4 ** matrix,

Int4 matrixLength,

double * queryProbArray,

double * scoreArray,

Blast_ScoreFreq * return_sfp,

Int4 range,

Int4 alphabet_size

) [static]

the routine RPSFillScores computes the probability of each score weighted by the probability of each query residue and fills those probabilities into scoreArray and puts scoreArray as a field in that in the structure that is returned for indexing convenience the field storing scoreArray points to the entry for score 0, so that referring to the -k index corresponds to score -k FIXME: This can be replaced by _PSIComputeScoreProbabilities??

Parameters:

matrix a position-specific score matrix with matrixLength positions [in]

matrixLength number of positions in the pssm (arg above) [in]

queryProbArray an array containing the probability of occurrence of each residue in the query [in]

scoreArray an array of probabilities for each score that is to be used as a field in return_sfp

return_sfp a structure to be filled in and returned [in|out]

range the size of scoreArray and is an upper bound on the difference between maximum score and minimum score in the matrix [in]

alphabet_size Number of letters in the alphabet of the input score matrix [in]

Definition at line 4161 of file blast_stat.c.
References AMINOACID_TO_NCBISTDAA, and BLAST_SCORE_MIN.
Referenced by RPSRescalePssm().

static double RPSfindUngappedLambda ( const char * matrixName ) [static]

Gets the ungapped lambda calculated for the matrix in question given standard residue composition for both query and subject sequences.

Parameters:

matrixName name of amino acid substitution matrix [in]

Returns:
lambda ungapped or 0.0 if matrix is not supported

Definition at line 4123 of file blast_stat.c.
References Blast_GetMatrixValues(), and sfree.
Referenced by RPSRescalePssm(), and s_BuildCompressedScoreMatrix().

Int4** RPSRescalePssm ( double scalingFactor,

Int4 rps_query_length,

const Uint1 * rps_query_seq,

Int4 db_seq_length,

Int4 ** posMatrix,

BlastScoreBlk * sbp

)

Rescale the PSSM, using composition-based statistics, for use with RPS BLAST.
< maximum allowed range of BLAST scores.
< maximum allowed range of BLAST scores.
Definition at line 4205 of file blast_stat.c.
References _PSIAllocateMatrix(), AMINOACID_TO_NCBISTDAA, ASSERT, Blast_FillResidueProbability(), Blast_KarlinLambdaNR(), BLAST_SCORE_MIN, BLAST_SCORE_RANGE_MAX, BlastScoreBlk::name, SBlastScoreMatrix::nrows, BlastScoreBlk::psi_matrix, SPsiBlastScoreMatrix::pssm, RPSFillScores(), RPSfindUngappedLambda(), and sfree.
Referenced by s_RPSComputeTraceback().

static Int2 s_AdjustGapParametersByGcd ( array_of_8 * normal,

array_of_8 * linear,

int size,

Int4 * gap_existence_max,

Int4 * gap_extend_max,

int divisor

) [static]

Adjust Lambda and H if reward and penalty have a non-1 gcd.
the two arrays (normal and linear) should be filled in with values already.
Parameters:

normal the values for normal (e.g, "affine") gap costs [in|out]

linear specialized values used for megablast [in|out]

size Number of supported combinations for this match/mismatch pair [out]

gap_existence_max start of infinite regime for gap existence [in|out]

gap_extend_max start of infinite regime for gap extension [in|out]

divisor divisor for gap costs [out]

Definition at line 2830 of file blast_stat.c.
Referenced by s_GetNuclValuesArray().

static double s_BlastSumP ( Int4 r,

double s

) [static]

Estimate the Sum P-value by calculation or interpolation, as appropriate.
Approx. 2-1/2 digits accuracy minimum throughout the range of r, s.
Parameters:

r number of segments [in]

s total score (in nats), adjusted by -r*log(KN) [in]

Returns:
p-value

Definition at line 3873 of file blast_stat.c.
References BLAST_Expm1(), BLAST_LnGammaInt(), DIM, and s_BlastSumPCalc().
Referenced by BLAST_LargeGapSumE(), BLAST_SmallGapSumE(), and BLAST_UnevenGapSumE().

static double s_BlastSumPCalc ( int r,

double s

) [static]

Evaluate the following double integral, where r = number of segments.
and s = the adjusted score in nats:
(r-2) oo oo Prob(r,s) = r - - (r-2) ------------- | exp(-y) | x exp(-exp(x - y/r)) dx dy (r-1)! (r-2)! U U s 0
Parameters:

r number of segments

s adjusted score in nats

Returns:
P value

Definition at line 3785 of file blast_stat.c.
References BLAST_Expm1(), BLAST_LnGammaInt(), BLAST_RombergIntegrate(), and s_InnerIntegralCback().
Referenced by s_BlastSumP().

static Int2 s_BuildCompressedScoreMatrix ( BlastScoreBlk * sbp,

SCompressedAlphabet * new_alphabet,

double matrix_scale_factor,

CompressedReverseLookup rev_table

) [static]

Compute a (non-square) score matrix for a compressed alphabet.

Parameters:

sbp Structure containing alphabet and scoring information [in]

new_alphabet Structure defining the new alphabet, including the final score matrix [in][out]

matrix_scale_factor Score matrix entries are scaled by this value [in]

rev_table A (one-to-many) mapping from compressed letter to protein letter. The list of protein letters in each row of the table ends with a negative value [in]

Definition at line 4386 of file blast_stat.c.
References _PSIMatrixFrequencyRatiosFree(), _PSIMatrixFrequencyRatiosNew(), BLAST_Nint(), BLAST_SCORE_MIN, SCompressedAlphabet::compressed_alphabet_size, SFreqRatios::data, SBlastScoreMatrix::data, SCompressedAlphabet::matrix, BlastScoreBlk::name, RPSfindUngappedLambda(), s_GetCompressedProbs(), and SBlastScoreMatrixNew().
Referenced by SCompressedAlphabetNew().

static void s_BuildCompressedTranslation ( const char * trans_string,

Uint1 * table,

Int4 compressed_alphabet_size,

CompressedReverseLookup rev_table

) [static]

parse the string defining the conversion between the ordinary protein alphabet and a compressed alphabet

Parameters:

trans_string The alphabet mappig [in]

table A map from protein letter to compressed letter. Protein letter that have no compressed equivalent will translate to value alphabet_size [out]

compressed_alphabet_size The anticipated size of the compressed alphabet [in]

rev_table A (one-to-many) mapping from compressed letter to protein letter. The list of protein letters in each row of the table ends with a negative value [out]

Definition at line 4291 of file blast_stat.c.
Referenced by SCompressedAlphabetNew().

static Int2 s_GetCompressedProbs ( BlastScoreBlk * sbp,

double * compressed_prob,

Int4 compressed_alphabet_size,

CompressedReverseLookup rev_table

) [static]

Calculate conditional probability of each letter in each group.

Parameters:

sbp Structure containing alphabet information [in]

compressed_prob Array containing final probabilities [out]

compressed_alphabet_size size of the alphabet [in]

rev_table A (one-to-many) mapping from compressed letter to protein letter. The list of protein letters in each row of the table ends with a negative value [in]

Definition at line 4329 of file blast_stat.c.
References Blast_ResFreqNew(), Blast_ResFreqStdComp(), letter(), and Blast_ResFreq::prob.
Referenced by s_BuildCompressedScoreMatrix().

static Int2 s_GetNuclValuesArray ( Int4 reward,

Int4 penalty,

Int4 * array_size,

array_of_8 ** normal,

array_of_8 ** non_affine,

Int4 * gap_open_max,

Int4 * gap_extend_max,

Boolean * round_down,

Blast_Message ** error_return

) [static]

Returns the array of values corresponding to the given match/mismatch scores, the number of supported gap cost combinations and thresholds for the gap costs, beyond which the ungapped statistics can be applied.

Parameters:

reward Match reward score [in]

penalty Mismatch penalty score [in]

array_size Number of supported combinations for this match/mismatch pair [out]

normal the values for normal (e.g, "affine") gap costs [out]

non_affine specialized values used for megablast [out]

gap_open_max Gap opening cost threshold for infinite gap costs [out]

gap_extend_max Gap extension cost threshold for infinite gap costs [out]

round_down if set to TRUE only even scores should be used for calculation of expect value or bit scores [out]

error_return Pointer to error message [out]

Returns:
zero on success, other values if error

Definition at line 2882 of file blast_stat.c.
References BLAST_Gcd(), Blast_MessageWrite(), BlastMemDup(), eBlastSevError, FALSE, kBlastMessageNoContext, s_AdjustGapParametersByGcd(), s_SplitArrayOf8(), and TRUE.
Referenced by BLAST_CheckRewardPenaltyScores(), Blast_GetNuclAlphaBeta(), BLAST_GetNucleotideGapExistenceExtendParams(), and Blast_KarlinBlkNuclGappedCalc().

static double s_GetUngappedBeta ( Int4 reward,

Int4 penalty

) [static]

Returns the beta statistical parameter value, given the nucleotide substitution scores.

Parameters:

reward Match reward score [in]

penalty Mismatch penalty score [in]

Returns:
The value of the beta parameter.

Definition at line 3471 of file blast_stat.c.

static double s_InnerIntegralCback ( double s,

void * vp

) [static]

Callback for the Romberg integration function.
This is the first of the double integrals that BlastSumPCalc calculates. This is the integral described in the paragraph after eqn. 4 of Karlin and Altschul, PNAS USA, 90, 5873-5877 (1993).

Parameters:

s variable to integrate over [in]

vp pointer to parameters [in]

Returns:
value of integrand

Definition at line 3758 of file blast_stat.c.
References SRombergCbackArgs::adj1, SRombergCbackArgs::adj2, BLAST_RombergIntegrate(), SRombergCbackArgs::epsilon, SRombergCbackArgs::num_hsps, s_OuterIntegralCback(), and SRombergCbackArgs::sdvir.
Referenced by s_BlastSumPCalc().

static double s_OuterIntegralCback ( double x,

void * vp

) [static]

Callback for the Romberg integration function.
This is the second of the double integrals that BlastSumPCalc calculates This is eqn. 4 of Karlin and Altschul, PNAS USA, 90, 5873-5877 (1993).

Parameters:

x variable to integrate over [in]

vp pointer to parameters [in]

Returns:
value of integrand

Definition at line 3732 of file blast_stat.c.
References SRombergCbackArgs::adj2, SRombergCbackArgs::num_hsps_minus_2, and SRombergCbackArgs::sdvir.
Referenced by s_InnerIntegralCback().

static Int2 s_SplitArrayOf8 ( const array_of_8 * input,

const array_of_8 ** normal,

const array_of_8 ** non_affine,

Boolean * split

) [static]

Splits an ArrayOf8 into two arrays of supported gap costs.
One is for non-affine (megablast linear values) and the other is for standard (typically affine) values.
Parameters:

input the array to be split [in]

normal the standard (typically affine) values [out]

non_affine the megablast (linear) values [out]

split Boolean specifying whether the non-affine values are populated [out]

Returns:
0 on success, -1 on error

Definition at line 2796 of file blast_stat.c.
References FALSE, and TRUE.
Referenced by s_GetNuclValuesArray().

static SBlastScoreMatrix* SBlastScoreMatrixFree ( SBlastScoreMatrix * matrix ) [static]

Deallocates SBlastScoreMatrix structure.

Parameters:

matrix structure to deallocate [in]

Returns:
NULL

Definition at line 690 of file blast_stat.c.
References _PSIDeallocateMatrix(), SBlastScoreMatrix::data, SBlastScoreMatrix::ncols, and sfree.
Referenced by SBlastScoreMatrixNew(), SCompressedAlphabetFree(), and SPsiBlastScoreMatrixFree().

static SBlastScoreMatrix* SBlastScoreMatrixNew ( size_t ncols,

size_t nrows

) [static]

Allocates a new SBlastScoreMatrix structure of the specified dimensions.

Parameters:

ncols number of columns [in]

nrows number of rows [in]

Returns:
NULL in case of memory allocation failure, else new SBlastScoreMatrix structure

Definition at line 712 of file blast_stat.c.
References _PSIAllocateMatrix(), SBlastScoreMatrix::data, SBlastScoreMatrix::ncols, SBlastScoreMatrix::nrows, and SBlastScoreMatrixFree().
Referenced by BlastScoreBlkNew(), s_BuildCompressedScoreMatrix(), and SPsiBlastScoreMatrixNew().

SCompressedAlphabet* SCompressedAlphabetFree ( SCompressedAlphabet * alphabet )

Free a compressed alphabet and score matrix.

Parameters:

alphabet The compressed alphabet structure

Returns:
Always NULL

Definition at line 4503 of file blast_stat.c.
References SCompressedAlphabet::compress_table, SCompressedAlphabet::matrix, SBlastScoreMatrixFree(), and sfree.
Referenced by SCompressedAlphabetNew().

SCompressedAlphabet* SCompressedAlphabetNew ( BlastScoreBlk * sbp,

Int4 compressed_alphabet_size,

double scale_factor

)

Allocate a new compressed alphabet and score matrix.

Parameters:

sbp Current score matrix information [in]

compressed_alphabet_size Desired size of compressed alphabet (current choices are limited to 10 or 15) [in]

scale_factor Score matrix entries are scaled by this value [in]

Returns:
the new alphabet, or NULL on failure

Definition at line 4467 of file blast_stat.c.
References ASSERT, BLASTAA_SIZE, SCompressedAlphabet::compress_table, SCompressedAlphabet::compressed_alphabet_size, s_BuildCompressedScoreMatrix(), s_BuildCompressedTranslation(), and SCompressedAlphabetFree().
Referenced by BlastCompressedAaLookupTableNew().

SPsiBlastScoreMatrix* SPsiBlastScoreMatrixFree ( SPsiBlastScoreMatrix * matrix )

Deallocates a SPsiBlastScoreMatrix structure.

Parameters:

matrix structure to deallocate [in]

Returns:
NULL

Definition at line 731 of file blast_stat.c.
References _PSIDeallocateMatrix(), Blast_KarlinBlkFree(), SPsiBlastScoreMatrix::freq_ratios, SPsiBlastScoreMatrix::kbp, SBlastScoreMatrix::ncols, SPsiBlastScoreMatrix::pssm, SBlastScoreMatrixFree(), and sfree.
Referenced by SPsiBlastScoreMatrixNew().

SPsiBlastScoreMatrix* SPsiBlastScoreMatrixNew ( size_t ncols )

Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.

Parameters:

ncols number of columns (i.e.: query length) [in]

Returns:
NULL in case of memory allocation failure, else new SPsiBlastScoreMatrix structure

Definition at line 750 of file blast_stat.c.
References _PSIAllocateMatrix(), Blast_KarlinBlkNew(), BLASTAA_SIZE, SPsiBlastScoreMatrix::freq_ratios, SPsiBlastScoreMatrix::kbp, SPsiBlastScoreMatrix::pssm, SBlastScoreMatrixNew(), and SPsiBlastScoreMatrixFree().

Variable Documentation

const array_of_8 blastn_values_1_1[] [static]

Initial value:
{ { 3, 2, 1.09, 0.31, 0.55, 2.0, -2, 99 }, { 2, 2, 1.07, 0.27, 0.49, 2.2, -3, 97 }, { 1, 2, 1.02, 0.21, 0.36, 2.8, -6, 92 }, { 0, 2, 0.80, 0.064, 0.17, 4.8, -16, 72 }, { 4, 1, 1.08, 0.28, 0.54, 2.0, -2, 98 }, { 3, 1, 1.06, 0.25, 0.46, 2.3, -4, 96 }, { 2, 1, 0.99, 0.17, 0.30, 3.3, -10, 90 } }
Karlin-Altschul parameter values for substitution scores 1 and -1.

Definition at line 664 of file blast_stat.c.

const array_of_8 blastn_values_1_2[] [static]

Initial value:
{ { 0, 0, 1.28, 0.46, 0.85, 1.5, -2, 96 }, { 2, 2, 1.33, 0.62, 1.1, 1.2, 0, 99 }, { 1, 2, 1.30, 0.52, 0.93, 1.4, -2, 97 }, { 0, 2, 1.19, 0.34, 0.66, 1.8, -3, 89 }, { 3, 1, 1.32, 0.57, 1.0, 1.3, -1, 99 }, { 2, 1, 1.29, 0.49, 0.92, 1.4, -1, 96 }, { 1, 1, 1.14, 0.26, 0.52, 2.2, -5, 85 } }
Karlin-Altschul parameter values for substitution scores 1 and -2.

Definition at line 618 of file blast_stat.c.

const array_of_8 blastn_values_1_3[] [static]

Initial value:
{ { 0, 0, 1.374, 0.711, 1.31, 1.05, 0, 100 }, { 2, 2, 1.37, 0.70, 1.2, 1.1, 0, 99 }, { 1, 2, 1.35, 0.64, 1.1, 1.2, -1, 98 }, { 0, 2, 1.25, 0.42, 0.83, 1.5, -2, 91 }, { 2, 1, 1.34, 0.60, 1.1, 1.2, -1, 97 }, { 1, 1, 1.21, 0.34, 0.71, 1.7, -2, 88 } }
Karlin-Altschul parameter values for substitution scores 1 and -3.

Definition at line 596 of file blast_stat.c.

const array_of_8 blastn_values_1_4[] [static]

Initial value:
{ { 0, 0, 1.383, 0.738, 1.36, 1.02, 0, 100 }, { 1, 2, 1.36, 0.67, 1.2, 1.1, 0, 98 }, { 0, 2, 1.26, 0.43, 0.90, 1.4, -1, 91 }, { 2, 1, 1.35, 0.61, 1.1, 1.2, -1, 98 }, { 1, 1, 1.22, 0.35, 0.72, 1.7, -3, 88 } }
Karlin-Altschul parameter values for substitution scores 1 and -4.

Definition at line 575 of file blast_stat.c.

const array_of_8 blastn_values_1_5[] [static]

Initial value:
{ { 0, 0, 1.39, 0.747, 1.38, 1.00, 0, 100 }, { 3, 3, 1.39, 0.747, 1.38, 1.00, 0, 100 } }
Karlin-Altschul parameter values for substitution scores 1 and -5.

Definition at line 569 of file blast_stat.c.

const array_of_8 blastn_values_2_3[] [static]

Initial value:
{ { 0, 0, 0.55, 0.21, 0.46, 1.2, -5, 87 }, { 4, 4, 0.63, 0.42, 0.84, 0.75, -2, 99 }, { 2, 4, 0.615, 0.37, 0.72, 0.85, -3, 97 }, { 0, 4, 0.55, 0.21, 0.46, 1.2, -5, 87 }, { 3, 3, 0.615, 0.37, 0.68, 0.9, -3, 97 }, { 6, 2, 0.63, 0.42, 0.84, 0.75, -2, 99 }, { 5, 2, 0.625, 0.41, 0.78, 0.8, -2, 99 }, { 4, 2, 0.61, 0.35, 0.68, 0.9, -3, 96 }, { 2, 2, 0.515, 0.14, 0.33, 1.55, -9, 81 } }
Karlin-Altschul parameter values for substitution scores 2 and -3.
These parameters can only be applied to even scores. Any odd score must be rounded down to the nearest even number before calculating the e-value.
Definition at line 632 of file blast_stat.c.

const array_of_8 blastn_values_2_5[] [static]

Initial value:
{ { 0, 0, 0.675, 0.65, 1.1, 0.6, -1, 99 }, { 2, 4, 0.67, 0.59, 1.1, 0.6, -1, 98 }, { 0, 4, 0.62, 0.39, 0.78, 0.8, -2, 91 }, { 4, 2, 0.67, 0.61, 1.0, 0.65, -2, 98 }, { 2, 2, 0.56, 0.32, 0.59, 0.95, -4, 82 } }
Karlin-Altschul parameter values for substitution scores 2 and -5.
These parameters can only be applied to even scores. Any odd score must be rounded down to the nearest even number before calculating the e-value.
Definition at line 609 of file blast_stat.c.

const array_of_8 blastn_values_2_7[] [static]

Initial value:
{ { 0, 0, 0.69, 0.73, 1.34, 0.515, 0, 100 }, { 2, 4, 0.68, 0.67, 1.2, 0.55, 0, 99 }, { 0, 4, 0.63, 0.43, 0.90, 0.7, -1, 91 }, { 4, 2, 0.675, 0.62, 1.1, 0.6, -1, 98 }, { 2, 2, 0.61, 0.35, 0.72, 1.7, -3, 88 } }
Karlin-Altschul parameter values for substitution scores 2 and -7.
These parameters can only be applied to even scores. Any odd score must be rounded down to the nearest even number before calculating the e-value.
Definition at line 587 of file blast_stat.c.

const array_of_8 blastn_values_3_2[] [static]

Initial value:
{ { 5, 5, 0.208, 0.030, 0.072, 2.9, -47, 77} }
Karlin-Altschul parameter values for substitution scores 3 and -2.

Definition at line 675 of file blast_stat.c.

const array_of_8 blastn_values_3_4[] [static]

Initial value:
{ { 6, 3, 0.389, 0.25, 0.56, 0.7, -5, 95}, { 5, 3, 0.375, 0.21, 0.47, 0.8, -6, 92}, { 4, 3, 0.351, 0.14, 0.35, 1.0, -9, 86}, { 6, 2, 0.362, 0.16, 0.45, 0.8, -4, 88}, { 5, 2, 0.330, 0.092, 0.28, 1.2, -13, 81}, { 4, 2, 0.281, 0.046, 0.16, 1.8, -23, 69} }
Karlin-Altschul parameter values for substitution scores 3 and -4.

Definition at line 645 of file blast_stat.c.

const array_of_8 blastn_values_4_5[] [static]

Initial value:
{ { 0, 0, 0.22, 0.061, 0.22, 1.0, -15, 74 }, { 6, 5, 0.28, 0.21, 0.47, 0.6 , -7, 93 }, { 5, 5, 0.27, 0.17, 0.39, 0.7, -9, 90 }, { 4, 5, 0.25, 0.10, 0.31, 0.8, -10, 83 }, { 3, 5, 0.23, 0.065, 0.25, 0.9, -11, 76 } }
Karlin-Altschul parameter values for substitution scores 4 and -5.

Definition at line 655 of file blast_stat.c.

const array_of_8 blastn_values_5_4[] [static]

Initial value:
{ { 10, 6, 0.163, 0.068, 0.16, 1.0, -19, 85 }, { 8, 6, 0.146, 0.039, 0.11, 1.3, -29, 76 } }
Karlin-Altschul parameter values for substitution scores 5 and -4.

Definition at line 680 of file blast_stat.c.

Int4 blosum45_prefs[14] [static]

Initial value:
{ BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_BEST, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL }
Quality values for BLOSUM45 matrix, each element corresponds to same element number in array blosum45_values.

Definition at line 185 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

array_of_8 blosum45_values[14] [static]

Initial value:
{ {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.2291, 0.0924, 0.2514, 0.9113, -5.7}, {13, 3, (double) INT2_MAX, 0.207, 0.049, 0.14, 1.5, -22}, {12, 3, (double) INT2_MAX, 0.199, 0.039, 0.11, 1.8, -34}, {11, 3, (double) INT2_MAX, 0.190, 0.031, 0.095, 2.0, -38}, {10, 3, (double) INT2_MAX, 0.179, 0.023, 0.075, 2.4, -51}, {16, 2, (double) INT2_MAX, 0.210, 0.051, 0.14, 1.5, -24}, {15, 2, (double) INT2_MAX, 0.203, 0.041, 0.12, 1.7, -31}, {14, 2, (double) INT2_MAX, 0.195, 0.032, 0.10, 1.9, -36}, {13, 2, (double) INT2_MAX, 0.185, 0.024, 0.084, 2.2, -45}, {12, 2, (double) INT2_MAX, 0.171, 0.016, 0.061, 2.8, -65}, {19, 1, (double) INT2_MAX, 0.205, 0.040, 0.11, 1.9, -43}, {18, 1, (double) INT2_MAX, 0.198, 0.032, 0.10, 2.0, -43}, {17, 1, (double) INT2_MAX, 0.189, 0.024, 0.079, 2.4, -57}, {16, 1, (double) INT2_MAX, 0.176, 0.016, 0.063, 2.8, -67}, }
Supported values (gap-existence, extension, etc.
) for BLOSUM45.
Definition at line 168 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

Int4 blosum50_prefs[16] [static]

Initial value:
{ BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_BEST, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL }
Quality values for BLOSUM50 matrix, each element corresponds to same element number in array blosum50_values.

Definition at line 223 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

array_of_8 blosum50_values[16] [static]

Initial value:
{ {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.2318, 0.112, 0.3362, 0.6895, -4.0}, {13, 3, (double) INT2_MAX, 0.212, 0.063, 0.19, 1.1, -16}, {12, 3, (double) INT2_MAX, 0.206, 0.055, 0.17, 1.2, -18}, {11, 3, (double) INT2_MAX, 0.197, 0.042, 0.14, 1.4, -25}, {10, 3, (double) INT2_MAX, 0.186, 0.031, 0.11, 1.7, -34}, {9, 3, (double) INT2_MAX, 0.172, 0.022, 0.082, 2.1, -48}, {16, 2, (double) INT2_MAX, 0.215, 0.066, 0.20, 1.05, -15}, {15, 2, (double) INT2_MAX, 0.210, 0.058, 0.17, 1.2, -20}, {14, 2, (double) INT2_MAX, 0.202, 0.045, 0.14, 1.4, -27}, {13, 2, (double) INT2_MAX, 0.193, 0.035, 0.12, 1.6, -32}, {12, 2, (double) INT2_MAX, 0.181, 0.025, 0.095, 1.9, -41}, {19, 1, (double) INT2_MAX, 0.212, 0.057, 0.18, 1.2, -21}, {18, 1, (double) INT2_MAX, 0.207, 0.050, 0.15, 1.4, -28}, {17, 1, (double) INT2_MAX, 0.198, 0.037, 0.12, 1.6, -33}, {16, 1, (double) INT2_MAX, 0.186, 0.025, 0.10, 1.9, -42}, {15, 1, (double) INT2_MAX, 0.171, 0.015, 0.063, 2.7, -76}, }
Supported values (gap-existence, extension, etc.
) for BLOSUM50.
Definition at line 204 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

Int4 blosum62_prefs[12] [static]

Initial value:
{ BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_BEST, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, }
Quality values for BLOSUM62 matrix, each element corresponds to same element number in array blosum62_values.

Definition at line 258 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

array_of_8 blosum62_values[12] [static]

Initial value:
{ {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3176, 0.134, 0.4012, 0.7916, -3.2}, {11, 2, (double) INT2_MAX, 0.297, 0.082, 0.27, 1.1, -10}, {10, 2, (double) INT2_MAX, 0.291, 0.075, 0.23, 1.3, -15}, {9, 2, (double) INT2_MAX, 0.279, 0.058, 0.19, 1.5, -19}, {8, 2, (double) INT2_MAX, 0.264, 0.045, 0.15, 1.8, -26}, {7, 2, (double) INT2_MAX, 0.239, 0.027, 0.10, 2.5, -46}, {6, 2, (double) INT2_MAX, 0.201, 0.012, 0.061, 3.3, -58}, {13, 1, (double) INT2_MAX, 0.292, 0.071, 0.23, 1.2, -11}, {12, 1, (double) INT2_MAX, 0.283, 0.059, 0.19, 1.5, -19}, {11, 1, (double) INT2_MAX, 0.267, 0.041, 0.14, 1.9, -30}, {10, 1, (double) INT2_MAX, 0.243, 0.024, 0.10, 2.5, -44}, {9, 1, (double) INT2_MAX, 0.206, 0.010, 0.052, 4.0, -87}, }
Supported values (gap-existence, extension, etc.
) for BLOSUM62.
Definition at line 243 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

Int4 blosum80_prefs[10] [static]

Initial value:
{ BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_BEST, BLAST_MATRIX_NOMINAL }
Quality values for BLOSUM80 matrix, each element corresponds to same element number in array blosum80_values.

Definition at line 288 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

array_of_8 blosum80_values[10] [static]

Initial value:
{ {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3430, 0.177, 0.6568, 0.5222, -1.6}, {25, 2, (double) INT2_MAX, 0.342, 0.17, 0.66, 0.52, -1.6}, {13, 2, (double) INT2_MAX, 0.336, 0.15, 0.57, 0.59, -3}, {9, 2, (double) INT2_MAX, 0.319, 0.11, 0.42, 0.76, -6}, {8, 2, (double) INT2_MAX, 0.308, 0.090, 0.35, 0.89, -9}, {7, 2, (double) INT2_MAX, 0.293, 0.070, 0.27, 1.1, -14}, {6, 2, (double) INT2_MAX, 0.268, 0.045, 0.19, 1.4, -19}, {11, 1, (double) INT2_MAX, 0.314, 0.095, 0.35, 0.90, -9}, {10, 1, (double) INT2_MAX, 0.299, 0.071, 0.27, 1.1, -14}, {9, 1, (double) INT2_MAX, 0.279, 0.048, 0.20, 1.4, -19}, }
Supported values (gap-existence, extension, etc.
) for BLOSUM80.
Definition at line 275 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

Int4 blosum90_prefs[8] [static]

Initial value:
{ BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_BEST, BLAST_MATRIX_NOMINAL }
Quality values for BLOSUM90 matrix, each element corresponds to same element number in array blosum90_values.

Definition at line 313 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

array_of_8 blosum90_values[8] [static]

Initial value:
{ {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3346, 0.190, 0.7547, 0.4434, -1.4}, {9, 2, (double) INT2_MAX, 0.310, 0.12, 0.46, 0.67, -6}, {8, 2, (double) INT2_MAX, 0.300, 0.099, 0.39, 0.76, -7}, {7, 2, (double) INT2_MAX, 0.283, 0.072, 0.30, 0.93, -11}, {6, 2, (double) INT2_MAX, 0.259, 0.048, 0.22, 1.2, -16}, {11, 1, (double) INT2_MAX, 0.302, 0.093, 0.39, 0.78, -8}, {10, 1, (double) INT2_MAX, 0.290, 0.075, 0.28, 1.04, -15}, {9, 1, (double) INT2_MAX, 0.265, 0.044, 0.20, 1.3, -19}, }
Supported values (gap-existence, extension, etc.
) for BLOSUM90.
Definition at line 302 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

BLAST_LetterProb nt_prob[] [static]

Initial value:
{ { 'A', 25.00 }, { 'C', 25.00 }, { 'G', 25.00 }, { 'T', 25.00 } }
nucleotide probabilities (25% each letter)

Definition at line 1467 of file blast_stat.c.

Int4 pam250_prefs[16] [static]

Initial value:
{ BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_BEST, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL }
Quality values for PAM250 matrix, each element corresponds to same element number in array pam250_values.

Definition at line 344 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

array_of_8 pam250_values[16] [static]

Initial value:
{ {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.2252, 0.0868, 0.2223, 0.98, -5.0}, {15, 3, (double) INT2_MAX, 0.205, 0.049, 0.13, 1.6, -23}, {14, 3, (double) INT2_MAX, 0.200, 0.043, 0.12, 1.7, -26}, {13, 3, (double) INT2_MAX, 0.194, 0.036, 0.10, 1.9, -31}, {12, 3, (double) INT2_MAX, 0.186, 0.029, 0.085, 2.2, -41}, {11, 3, (double) INT2_MAX, 0.174, 0.020, 0.070, 2.5, -48}, {17, 2, (double) INT2_MAX, 0.204, 0.047, 0.12, 1.7, -28}, {16, 2, (double) INT2_MAX, 0.198, 0.038, 0.11, 1.8, -29}, {15, 2, (double) INT2_MAX, 0.191, 0.031, 0.087, 2.2, -44}, {14, 2, (double) INT2_MAX, 0.182, 0.024, 0.073, 2.5, -53}, {13, 2, (double) INT2_MAX, 0.171, 0.017, 0.059, 2.9, -64}, {21, 1, (double) INT2_MAX, 0.205, 0.045, 0.11, 1.8, -34}, {20, 1, (double) INT2_MAX, 0.199, 0.037, 0.10, 1.9, -35}, {19, 1, (double) INT2_MAX, 0.192, 0.029, 0.083, 2.3, -52}, {18, 1, (double) INT2_MAX, 0.183, 0.021, 0.070, 2.6, -60}, {17, 1, (double) INT2_MAX, 0.171, 0.014, 0.052, 3.3, -86}, }
Supported values (gap-existence, extension, etc.
) for PAM250.
Definition at line 325 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

Int4 pam30_prefs[7] [static]

Initial value:
{ BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_BEST, BLAST_MATRIX_NOMINAL, }
Quality values for PAM30 matrix, each element corresponds to same element number in array pam30_values.

Definition at line 374 of file blast_stat.c.

array_of_8 pam30_values[7] [static]

Initial value:
{ {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3400, 0.283, 1.754, 0.1938, -0.3}, {7, 2, (double) INT2_MAX, 0.305, 0.15, 0.87, 0.35, -3}, {6, 2, (double) INT2_MAX, 0.287, 0.11, 0.68, 0.42, -4}, {5, 2, (double) INT2_MAX, 0.264, 0.079, 0.45, 0.59, -7}, {10, 1, (double) INT2_MAX, 0.309, 0.15, 0.88, 0.35, -3}, {9, 1, (double) INT2_MAX, 0.294, 0.11, 0.61, 0.48, -6}, {8, 1, (double) INT2_MAX, 0.270, 0.072, 0.40, 0.68, -10}, }
Supported values (gap-existence, extension, etc.
) for PAM30.
Definition at line 364 of file blast_stat.c.
Referenced by BlastLoadMatrixValues().

Int4 pam70_prefs[7] [static]

Initial value:
{ BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_NOMINAL, BLAST_MATRIX_BEST, BLAST_MATRIX_NOMINAL }
Quality values for PAM70 matrix, each element corresponds to same element number in array pam70_values.

Definition at line 396 of file blast_stat.c.

array_of_8 pam70_values[7] [static]

Initial value:
{ {(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3345, 0.229, 1.029, 0.3250, -0.7}, {8, 2, (double) INT2_MAX, 0.301, 0.12, 0.54, 0.56, -5}, {7, 2, (double) INT2_MAX, 0.286, 0.093, 0.43, 0.67, -7}, {6, 2, (double) INT2_MAX, 0.264, 0.064, 0.29, 0.90, -12}, {11, 1, (double) INT2_MAX, 0.305, 0.12, 0.52, 0.59, -6}, {10, 1, (double) INT2_MAX, 0.291, 0.091, 0.41, 0.71, -9}, {9, 1, (double) INT2_MAX, 0.270, 0.060, 0.28, 0.97, -14}, }
Supported values (gap-existence, extension, etc.
) for PAM70.
Definition at line 386 of file blast_stat.c.

char const rcsid[] [static]

Initial value:
"$Id: blast_stat.c 144802 2008-11-03 20:57:20Z camacho $"

Definition at line 52 of file blast_stat.c.

BLAST_LetterProb Robinson_prob[] [static]

Initial value:
{ { 'A', 78.05 }, { 'C', 19.25 }, { 'D', 53.64 }, { 'E', 62.95 }, { 'F', 38.56 }, { 'G', 73.77 }, { 'H', 21.99 }, { 'I', 51.42 }, { 'K', 57.44 }, { 'L', 90.19 }, { 'M', 22.43 }, { 'N', 44.87 }, { 'P', 52.03 }, { 'Q', 42.64 }, { 'R', 51.29 }, { 'S', 71.20 }, { 'T', 58.41 }, { 'V', 64.41 }, { 'W', 13.30 }, { 'Y', 32.16 } }
amino acid background frequencies from Robinson and Robinson

Definition at line 1442 of file blast_stat.c.

const char* s_alphabet10 = "IJLMV AST BDENZ KQR G FY P H C W" [static]

23-to-10 letter compressed alphabet.
Based on SE-V(10)
Definition at line 4462 of file blast_stat.c.

const char* s_alphabet15 = "ST IJV LM KR EQZ A G BD P N F Y H C W" [static]

23-to-15 letter compressed alphabet.
Based on SE_B(14)
Definition at line 4464 of file blast_stat.c.

Generated on Wed Mar 11 22:44:42 2009 for NCBI C++ ToolKit by

1.4.6

Modified on Wed Mar 11 23:16:10 2009 by modify_doxy.py rev. 117643


Classes
struct	MatrixInfo
	Used to temporarily store matrix values for retrieval. More...
struct	BLAST_LetterProb
	Records probability of letter appearing in sequence. More...
struct	Blast_ResComp
	Intermediate structure to store the composition of a sequence. More...
struct	SRombergCbackArgs
	Internal data structure used by Romberg integration callbacks. More...
Defines
#define	BLAST_SCORE_RANGE_MAX (BLAST_SCORE_MAX - BLAST_SCORE_MIN)
	maximum allowed range of BLAST scores.
#define	BLAST_KARLIN_K_SUMLIMIT_DEFAULT 0.0001
	K_SUMLIMIT_DEFAULT == sumlimit used in BlastKarlinLHtoK().
#define	BLAST_KARLIN_LAMBDA_ACCURACY_DEFAULT (1.e-5)
	LAMBDA_ACCURACY_DEFAULT == accuracy to which Lambda should be calc'd.
#define	BLAST_KARLIN_LAMBDA_ITER_DEFAULT 17
	LAMBDA_ITER_DEFAULT == no.
#define	BLAST_KARLIN_LAMBDA0_DEFAULT 0.5
	Initial guess for the value of Lambda in BlastKarlinLambdaNR.
#define	BLAST_KARLIN_K_ITER_MAX 100
	upper limit on iterations for BlastKarlinLHtoK
#define	BLAST_NUM_STAT_VALUES 8
	Number of statistical parameters in each row of the precomputed tables.
#define	BLOSUM45_VALUES_MAX 14
	Number of different combinations supported for BLOSUM45.
#define	BLOSUM50_VALUES_MAX 16
	Number of different combinations supported for BLOSUM50.
#define	BLOSUM62_VALUES_MAX 12
	Number of different combinations supported for BLOSUM62.
#define	BLOSUM80_VALUES_MAX 10
	Number of different combinations supported for BLOSUM80.
#define	BLOSUM90_VALUES_MAX 8
	Number of different combinations supported for BLOSUM90.
#define	PAM250_VALUES_MAX 16
	Number of different combinations supported for PAM250.
#define	PAM30_VALUES_MAX 7
	Number of different combinations supported for PAM30.
#define	PAM70_VALUES_MAX 7
	Number of different combinations supported for PAM70.
#define	STD_AMINO_ACID_FREQS Robinson_prob
	points to the standard amino acid frequencies to use.
Typedefs
typedef double	array_of_8 [8]
	Holds values (gap-opening, extension, etc.
typedef Int1	CompressedReverseLookup [BLASTAA_SIZE+1][BLASTAA_SIZE+1]
	2-D array mapping compressed letters to sets of ordinary protein letters
Functions
static SBlastScoreMatrix *	SBlastScoreMatrixFree (SBlastScoreMatrix *matrix)
	Deallocates SBlastScoreMatrix structure.
static SBlastScoreMatrix *	SBlastScoreMatrixNew (size_t ncols, size_t nrows)
	Allocates a new SBlastScoreMatrix structure of the specified dimensions.
SPsiBlastScoreMatrix *	SPsiBlastScoreMatrixFree (SPsiBlastScoreMatrix *matrix)
	Deallocates a SPsiBlastScoreMatrix structure.
SPsiBlastScoreMatrix *	SPsiBlastScoreMatrixNew (size_t ncols)
	Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.
BlastScoreBlk *	BlastScoreBlkNew (Uint1 alphabet, Int4 number_of_contexts)
	Allocates and initializes BlastScoreBlk.
Blast_ScoreFreq *	Blast_ScoreFreqFree (Blast_ScoreFreq *sfp)
	Deallocates the score frequencies structure.
Blast_KarlinBlk *	Blast_KarlinBlkFree (Blast_KarlinBlk *kbp)
	Deallocates the KarlinBlk.
BlastScoreBlk *	BlastScoreBlkFree (BlastScoreBlk *sbp)
	Deallocates BlastScoreBlk as well as all associated structures.
Int2	BLAST_ScoreSetAmbigRes (BlastScoreBlk *sbp, char ambiguous_res)
	Set the ambiguous residue (e.g, 'N', 'X') in the BlastScoreBlk*.
Int2	BlastScoreBlkNuclMatrixCreate (BlastScoreBlk *sbp)
	Fill in the matrix for blastn using the penaly and rewards The query sequence alphabet is blastna, the subject sequence is ncbi2na.
static Int2	BlastScoreBlkProteinMatrixRead (BlastScoreBlk sbp, FILE fp)
	Read in the matrix from the FILE *fp.
static Int2	BlastScoreBlkMaxScoreSet (BlastScoreBlk *sbp)
	Sets maximum and minimum scores on the BlastScoreBlk for a given matrix.
static Int2	BlastScoreBlkProteinMatrixLoad (BlastScoreBlk *sbp)
	Sets sbp->matrix->data field using sbp->name field using the matrices in the toolkit (util/tables/raw_scoremat.h).
Int2	Blast_ScoreBlkMatrixFill (BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)
	This function fills in the BlastScoreBlk structure.
Blast_ResFreq *	Blast_ResFreqFree (Blast_ResFreq *rfp)
	Deallocates Blast_ResFreq and prob0 element.
Blast_ResFreq *	Blast_ResFreqNew (const BlastScoreBlk *sbp)
	Allocates a new Blast_ResFreq structure and fills in the prob element based upon the contents of sbp.
static Int2	Blast_ResFreqNormalize (const BlastScoreBlk sbp, Blast_ResFreq rfp, double norm)
	Normalizes all the residue frequencies and then normalizes them to "norm".
Int2	Blast_GetStdAlphabet (Uint1 alphabet_code, Uint1 *residues, Uint4 residues_size)
	Fills a buffer with the 'standard' alphabet (given by STD_AMINO_ACID_FREQS[index].ch).
Int2	Blast_ResFreqStdComp (const BlastScoreBlk sbp, Blast_ResFreq rfp)
	Calculates residues frequencies given a standard distribution.
static Blast_ResComp *	BlastResCompDestruct (Blast_ResComp *rcp)
	Deallocates Blast_ResComp structure and associated arrays.
static Blast_ResComp *	BlastResCompNew (const BlastScoreBlk *sbp)
	Allocated the Blast_ResComp* for a given alphabet.
static Int2	BlastResCompStr (const BlastScoreBlk sbp, Blast_ResComp rcp, char *str, Int4 length)
	Store the composition of a (query) string.
static Int2	Blast_ResFreqClr (const BlastScoreBlk sbp, Blast_ResFreq rfp)
	Sets prob elements of Blast_ResFreq to zero.
static Int2	Blast_ResFreqResComp (const BlastScoreBlk sbp, Blast_ResFreq rfp, const Blast_ResComp *rcp)
	Calculate the residue frequencies associated with the provided ResComp This function takes into account the composition of a given sequence (expressed through rcp) rather than just doing it for a standard distribution.
static Int2	Blast_ResFreqString (const BlastScoreBlk sbp, Blast_ResFreq rfp, char *string, Int4 length)
	Fills in residue frequences for a given sequence.
static Int2	BlastScoreChk (Int4 lo, Int4 hi)
	Check that the lo and hi score are within the allowed ranges.
Blast_ScoreFreq *	Blast_ScoreFreqNew (Int4 score_min, Int4 score_max)
	Creates a new structure to keep track of score frequencies for a scoring system.
static Int2	BlastScoreFreqCalc (const BlastScoreBlk sbp, Blast_ScoreFreq sfp, Blast_ResFreq rfp1, Blast_ResFreq rfp2)
	Calculates the score frequencies.
static double	BlastKarlinLHtoK (Blast_ScoreFreq *sfp, double lambda, double H)
	The following procedure computes K.
static double	NlmKarlinLambdaNR (double probs, Int4 d, Int4 low, Int4 high, double lambda0, double tolx, Int4 itmax, Int4 maxNewton, Int4 itn)
	Find positive solution to.
double	Blast_KarlinLambdaNR (Blast_ScoreFreq *sfp, double initialLambdaGuess)
	Calculates the parameter Lambda given an initial guess for its value.
static double	BlastKarlinLtoH (Blast_ScoreFreq *sfp, double lambda)
	Calculate H, the relative entropy of the p's and q's.
Int2	Blast_KarlinBlkUngappedCalc (Blast_KarlinBlk kbp, Blast_ScoreFreq sfp)
	Computes the parameters lambda, H K for use in calculating the statistical significance of high-scoring segments or subalignments (see comment on blast_stat.c for more details).
Int2	Blast_ScoreBlkKbpUngappedCalc (EBlastProgramType program, BlastScoreBlk sbp, Uint1 query, const BlastQueryInfo query_info, Blast_Message *blast_message)
	Calculate and fill the ungapped Karlin-Altschul parameters in the BlastScoreBlk structure (fields kbp_std, kbp_psi, and kbp of that structure).
Int2	Blast_ScoreBlkKbpIdealCalc (BlastScoreBlk *sbp)
	Calculates the Karlin-Altschul parameters assuming standard residue compositions for the query and subject sequences.
Blast_KarlinBlk *	Blast_KarlinBlkNew (void)
	Callocs a Blast_KarlinBlk.
Int2	Blast_KarlinBlkCopy (Blast_KarlinBlk kbp_to, Blast_KarlinBlk kbp_from)
	Copies contents of one Karlin block to another.
static MatrixInfo *	MatrixInfoDestruct (MatrixInfo *matrix_info)
	Deallocates MatrixInfo as well as name string.
static MatrixInfo *	MatrixInfoNew (const char name, array_of_8 values, Int4 *prefs, Int4 max_number)
	Allocates New MatrixInfo*.
static ListNode *	BlastMatrixValuesDestruct (ListNode *vnp)
	Free linked list of MatrixValues and all associated data.
static ListNode *	BlastLoadMatrixValues (void)
	Loads all the matrix values, returns a ListNode* chain that contains MatrixInfo*'s.
static Int2	Blast_GetMatrixValues (const char matrix, Int4 open, Int4 extension, double lambda, double K, double H, double alpha, double beta, Int4 *pref_flags)
	Obtains arrays of the allowed opening and extension penalties for gapped BLAST for the given matrix.
void	BLAST_GetAlphaBeta (const char matrixName, double alpha, double beta, Boolean gapped, Int4 gap_open, Int4 gap_extend, const Blast_KarlinBlk kbp_ungapped)
	Extract the alpha and beta settings for this matrixName, and these gap open and gap extension costs.
static Int2	s_SplitArrayOf8 (const array_of_8 input, const array_of_8 normal, const array_of_8 non_affine, Boolean split)
	Splits an ArrayOf8 into two arrays of supported gap costs.
static Int2	s_AdjustGapParametersByGcd (array_of_8 normal, array_of_8 linear, int size, Int4 gap_existence_max, Int4 gap_extend_max, int divisor)
	Adjust Lambda and H if reward and penalty have a non-1 gcd.
static Int2	s_GetNuclValuesArray (Int4 reward, Int4 penalty, Int4 array_size, array_of_8 normal, array_of_8 non_affine, Int4 gap_open_max, Int4 gap_extend_max, Boolean round_down, Blast_Message **error_return)
	Returns the array of values corresponding to the given match/mismatch scores, the number of supported gap cost combinations and thresholds for the gap costs, beyond which the ungapped statistics can be applied.
Int2	BLAST_GetProteinGapExistenceExtendParams (const char matrixName, Int4 gap_existence, Int4 *gap_extension)
	Extract the recommended gap existence and extension values.
Int2	BLAST_GetNucleotideGapExistenceExtendParams (Int4 reward, Int4 penalty, Int4 gap_existence, Int4 gap_extension)
	Extract the recommended gap existence and extension values.
Boolean	BLAST_CheckRewardPenaltyScores (Int4 reward, Int4 penalty)
	Check the validity of the reward and penalty scores.
static Int2	BlastKarlinReportAllowedValues (const char matrix_name, Blast_Message *error_return)
	Fills in error_return with strings describing the allowed values.
Int2	Blast_KarlinBlkGappedCalc (Blast_KarlinBlk kbp, Int4 gap_open, Int4 gap_extend, const char matrix_name, Blast_Message **error_return)
	Fills in lambda, H, and K values, as calculated by Stephen Altschul in Methods in Enzy.
Int2	Blast_KarlinBlkGappedLoadFromTables (Blast_KarlinBlk kbp, Int4 gap_open, Int4 gap_extend, const char matrix_name)
	Attempts to fill KarlinBlk for given gap opening, extensions etc.
char *	BLAST_PrintMatrixMessage (const char *matrix_name)
	Prints a messages about the allowed matrices, BlastKarlinBlkGappedFill should return 1 before this is called.
char *	BLAST_PrintAllowedValues (const char *matrix_name, Int4 gap_open, Int4 gap_extend)
	Prints a messages about the allowed open etc values for the given matrix, BlastKarlinBlkGappedFill should return 2 before this is called.
Int2	Blast_KarlinBlkNuclGappedCalc (Blast_KarlinBlk kbp, Int4 gap_open, Int4 gap_extend, Int4 reward, Int4 penalty, Blast_KarlinBlk kbp_ungap, Boolean round_down, Blast_Message *error_return)
	Retrieves Karlin-Altschul parameters from precomputed tables, given the substitution and gap scores.
static double	s_GetUngappedBeta (Int4 reward, Int4 penalty)
	Returns the beta statistical parameter value, given the nucleotide substitution scores.
Int2	Blast_GetNuclAlphaBeta (Int4 reward, Int4 penalty, Int4 gap_open, Int4 gap_extend, Blast_KarlinBlk kbp, Boolean gapped_calculation, double alpha, double *beta)
	Extract the alpha and beta settings for these substitution and gap scores.
static Int4	BlastKarlinEtoS_simple (double E, const Blast_KarlinBlk *kbp, Int8 searchsp)
	Calculates score from expect value and search space.
double	BLAST_GapDecayDivisor (double decayrate, unsigned nsegs)
	Compute a divisor used to weight the evalue of a collection of "nsegs" distinct alignments.
Int2	BLAST_Cutoffs (Int4 S, double E, Blast_KarlinBlk *kbp, Int8 searchsp, Boolean dodecay, double gap_decay_rate)
	Calculate the cutoff score from the expected number of HSPs or vice versa.
double	BLAST_KarlinStoE_simple (Int4 S, Blast_KarlinBlk *kbp, Int8 searchsp)
	Calculates the Expect value based upon the search space and some Karlin-Altschul parameters.
double	BLAST_KarlinPtoE (double p)
	Convert a P-value to an E-value.
double	BLAST_KarlinEtoP (double x)
	Convert an E-value to a P-value.
static double	s_OuterIntegralCback (double x, void *vp)
	Callback for the Romberg integration function.
static double	s_InnerIntegralCback (double s, void *vp)
	Callback for the Romberg integration function.
static double	s_BlastSumPCalc (int r, double s)
	Evaluate the following double integral, where r = number of segments.
static double	s_BlastSumP (Int4 r, double s)
	Estimate the Sum P-value by calculation or interpolation, as appropriate.
double	BLAST_SmallGapSumE (Int4 starting_points, Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
	Calculates the e-value for alignments with "small" gaps (typically under fifty residues/basepairs) following ideas of Stephen Altschul's.
double	BLAST_UnevenGapSumE (Int4 query_start_points, Int4 subject_start_points, Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
	Calculates the e-value of a collection multiple distinct alignments with asymmetric gaps between the alignments.
double	BLAST_LargeGapSumE (Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
	Calculates the e-value if a collection of distinct alignments with arbitrarily large gaps between the alignments.
void	Blast_FillResidueProbability (const Uint1 sequence, Int4 length, double resProb)
	Given a sequence of 'length' amino acid residues, compute the probability of each residue and put that in the array resProb Excludes ambiguity characters.
static double	RPSfindUngappedLambda (const char *matrixName)
	Gets the ungapped lambda calculated for the matrix in question given standard residue composition for both query and subject sequences.
static void	RPSFillScores (Int4 *matrix, Int4 matrixLength, double queryProbArray, double scoreArray, Blast_ScoreFreq return_sfp, Int4 range, Int4 alphabet_size)
	the routine RPSFillScores computes the probability of each score weighted by the probability of each query residue and fills those probabilities into scoreArray and puts scoreArray as a field in that in the structure that is returned for indexing convenience the field storing scoreArray points to the entry for score 0, so that referring to the -k index corresponds to score -k FIXME: This can be replaced by _PSIComputeScoreProbabilities??
Int4 **	RPSRescalePssm (double scalingFactor, Int4 rps_query_length, const Uint1 rps_query_seq, Int4 db_seq_length, Int4 posMatrix, BlastScoreBlk sbp)
	Rescale the PSSM, using composition-based statistics, for use with RPS BLAST.
static void	s_BuildCompressedTranslation (const char trans_string, Uint1 table, Int4 compressed_alphabet_size, CompressedReverseLookup rev_table)
	parse the string defining the conversion between the ordinary protein alphabet and a compressed alphabet
static Int2	s_GetCompressedProbs (BlastScoreBlk sbp, double compressed_prob, Int4 compressed_alphabet_size, CompressedReverseLookup rev_table)
	Calculate conditional probability of each letter in each group.
static Int2	s_BuildCompressedScoreMatrix (BlastScoreBlk sbp, SCompressedAlphabet new_alphabet, double matrix_scale_factor, CompressedReverseLookup rev_table)
	Compute a (non-square) score matrix for a compressed alphabet.
SCompressedAlphabet *	SCompressedAlphabetNew (BlastScoreBlk *sbp, Int4 compressed_alphabet_size, double matrix_scale_factor)
	Allocate a new compressed alphabet and score matrix.
SCompressedAlphabet *	SCompressedAlphabetFree (SCompressedAlphabet *alphabet)
	Free a compressed alphabet and score matrix.
Int4	BLAST_ComputeLengthAdjustment (double K, double logK, double alpha_d_lambda, double beta, Int4 query_length, Int8 db_length, Int4 db_num_seqs, Int4 *length_adjustment)
	Computes the adjustment to the lengths of the query and database sequences that is used to compensate for edge effects when computing evalues.
Variables
static char const	rcsid []
static array_of_8	blosum45_values [14]
	Supported values (gap-existence, extension, etc.
static Int4	blosum45_prefs [14]
	Quality values for BLOSUM45 matrix, each element corresponds to same element number in array blosum45_values.
static array_of_8	blosum50_values [16]
	Supported values (gap-existence, extension, etc.
static Int4	blosum50_prefs [16]
	Quality values for BLOSUM50 matrix, each element corresponds to same element number in array blosum50_values.
static array_of_8	blosum62_values [12]
	Supported values (gap-existence, extension, etc.
static Int4	blosum62_prefs [12]
	Quality values for BLOSUM62 matrix, each element corresponds to same element number in array blosum62_values.
static array_of_8	blosum80_values [10]
	Supported values (gap-existence, extension, etc.
static Int4	blosum80_prefs [10]
	Quality values for BLOSUM80 matrix, each element corresponds to same element number in array blosum80_values.
static array_of_8	blosum90_values [8]
	Supported values (gap-existence, extension, etc.
static Int4	blosum90_prefs [8]
	Quality values for BLOSUM90 matrix, each element corresponds to same element number in array blosum90_values.
static array_of_8	pam250_values [16]
	Supported values (gap-existence, extension, etc.
static Int4	pam250_prefs [16]
	Quality values for PAM250 matrix, each element corresponds to same element number in array pam250_values.
static array_of_8	pam30_values [7]
	Supported values (gap-existence, extension, etc.
static Int4	pam30_prefs [7]
	Quality values for PAM30 matrix, each element corresponds to same element number in array pam30_values.
static array_of_8	pam70_values [7]
	Supported values (gap-existence, extension, etc.
static Int4	pam70_prefs [7]
	Quality values for PAM70 matrix, each element corresponds to same element number in array pam70_values.
static const array_of_8	blastn_values_1_5 []
	Karlin-Altschul parameter values for substitution scores 1 and -5.
static const array_of_8	blastn_values_1_4 []
	Karlin-Altschul parameter values for substitution scores 1 and -4.
static const array_of_8	blastn_values_2_7 []
	Karlin-Altschul parameter values for substitution scores 2 and -7.
static const array_of_8	blastn_values_1_3 []
	Karlin-Altschul parameter values for substitution scores 1 and -3.
static const array_of_8	blastn_values_2_5 []
	Karlin-Altschul parameter values for substitution scores 2 and -5.
static const array_of_8	blastn_values_1_2 []
	Karlin-Altschul parameter values for substitution scores 1 and -2.
static const array_of_8	blastn_values_2_3 []
	Karlin-Altschul parameter values for substitution scores 2 and -3.
static const array_of_8	blastn_values_3_4 []
	Karlin-Altschul parameter values for substitution scores 3 and -4.
static const array_of_8	blastn_values_4_5 []
	Karlin-Altschul parameter values for substitution scores 4 and -5.
static const array_of_8	blastn_values_1_1 []
	Karlin-Altschul parameter values for substitution scores 1 and -1.
static const array_of_8	blastn_values_3_2 []
	Karlin-Altschul parameter values for substitution scores 3 and -2.
static const array_of_8	blastn_values_5_4 []
	Karlin-Altschul parameter values for substitution scores 5 and -4.
static BLAST_LetterProb	Robinson_prob []
	amino acid background frequencies from Robinson and Robinson
static BLAST_LetterProb	nt_prob []
	nucleotide probabilities (25% each letter)
static const char *	s_alphabet10 = "IJLMV AST BDENZ KQR G FY P H C W"
	23-to-10 letter compressed alphabet.
static const char *	s_alphabet15 = "ST IJV LM KR EQZ A G BD P N F Y H C W"
	23-to-15 letter compressed alphabet.