#include <seqdbisam.hpp>
Inheritance diagram for CSeqDBIsam:
Manages one ISAM file, which will translate either PIGs, GIs, or Accessions to OIDs. Translation in the other direction is done in the CSeqDBVol code. Files managed by this class include those with the extensions pni, pnd, ppi, ppd, psi, psd, nsi, nsd, nni, and nnd. Each instance of this object will manage one pair of these files, including one whose name ends in 'i' and one whose name ends in 'd'.
Definition at line 60 of file seqdbisam.hpp.
Public Types | |
typedef CSeqDBGiList::SGiOid | TGiOid |
Import the type representing one GI, OID association. | |
typedef CSeqDBAtlas::TIndx | TIndx |
Type which is large enough to span the bytes of an ISAM file. | |
typedef int | TOid |
This class works with OIDs relative to a specific volume. | |
typedef int | TPig |
PIG identifiers for numeric indices over protein volumes. | |
typedef int | TGi |
Genomic IDs, the most common numerical identifier. | |
typedef Int8 | TTi |
Identifier type for trace databases. | |
typedef Int8 | TId |
Type large enough to hold any numerical ID. | |
enum | EIdentType { eGiId, eTiId, ePigId, eStringId, eHashId, eOID } |
Identifier formats used by this class. More... | |
enum | EIsamDbType { eNumeric = 0, eNumericNoData = 1, eString = 2, eStringDatabase = 3, eStringBin = 4, eNumericLongId = 5 } |
Types of database this class can access. More... | |
Public Member Functions | |
CSeqDBIsam (CSeqDBAtlas &atlas, const string &dbname, char prot_nucl, char file_ext_char, EIdentType ident_type) | |
Constructor. | |
~CSeqDBIsam () | |
Destructor. | |
bool | PigToOid (TPig pig, TOid &oid, CSeqDBLockHold &locked) |
PIG translation. | |
bool | IdToOid (Int8 id, TOid &oid, CSeqDBLockHold &locked) |
GI or TI translation. | |
void | IdsToOids (int vol_start, int vol_end, CSeqDBGiList &ids, CSeqDBLockHold &locked) |
Translate Gis and Tis to Oids for the given ID list. | |
void | IdsToOids (int vol_start, int vol_end, CSeqDBNegativeList &ids, CSeqDBLockHold &locked) |
Compute list of included OIDs based on a negative ID list. | |
void | StringToOids (const string &acc, vector< TOid > &oids, bool adjusted, bool &version_check, CSeqDBLockHold &locked) |
String translation. | |
bool | SeqidToOid (const string &acc, TOid &oid, CSeqDBLockHold &locked) |
Seq-id translation. | |
void | HashToOids (unsigned hash, vector< TOid > &oids, CSeqDBLockHold &locked) |
Sequence hash lookup. | |
void | UnLease () |
Return any memory held by this object to the atlas. | |
void | GetIdBounds (Int8 &low_id, Int8 &high_id, int &count, CSeqDBLockHold &locked) |
Get Numeric Bounds. | |
void | GetIdBounds (string &low_id, string &high_id, int &count, CSeqDBLockHold &locked) |
Get String Bounds. | |
Static Public Member Functions | |
static EIdentType | TryToSimplifyAccession (const string &acc, Int8 &num_id, string &str_id, bool &simpler) |
String id simplification. | |
static EIdentType | SimplifySeqid (CSeq_id &bestid, const string *acc, Int8 &num_id, string &str_id, bool &simpler) |
Seq-id simplification. | |
static bool | IndexExists (const string &dbname, char prot_nucl, char file_ext_char) |
Check if a given ISAM index exists. | |
Private Types | |
enum | EErrorCode { eNotFound = 1, eNoError = 0, eBadVersion = -10, eBadType = -11, eWrongFile = -12 } |
Exit conditions occurring in this code. More... | |
Private Member Functions | |
bool | x_IdentToOid (Int8 id, TOid &oid, CSeqDBLockHold &locked) |
Numeric identifier lookup. | |
EErrorCode | x_SearchIndexNumeric (Int8 Number, int *Data, Uint4 *Index, Int4 &SampleNum, bool &done, CSeqDBLockHold &locked) |
Index file search. | |
void | x_SearchIndexNumericMulti (int vol_start, int vol_end, CSeqDBGiList &gis, bool use_tis, CSeqDBLockHold &locked) |
GiList Translation. | |
void | x_SearchNegativeMulti (int vol_start, int vol_end, CSeqDBNegativeList &gis, bool use_tis, CSeqDBLockHold &locked) |
Negative ID List Translation. | |
EErrorCode | x_SearchDataNumeric (Int8 Number, int *Data, Uint4 *Index, Int4 SampleNum, CSeqDBLockHold &locked) |
Data file search. | |
void | x_SearchDataNumericMulti (int vol_start, int vol_end, CSeqDBGiList &gis, int &gilist_index, int sample_index, bool use_tis, CSeqDBLockHold &locked) |
GiList translation for one page of a data file. | |
EErrorCode | x_NumericSearch (Int8 Number, int *Data, Uint4 *Index, CSeqDBLockHold &locked) |
Numeric identifier lookup. | |
EErrorCode | x_StringSearch (const string &term_in, vector< string > &term_out, vector< string > &value_out, vector< TIndx > &index_out, CSeqDBLockHold &locked) |
String identifier lookup. | |
EErrorCode | x_InitSearch (CSeqDBLockHold &locked) |
Initialize the search object. | |
int | x_GetPageNumElements (Int4 SampleNum, Int4 *Start) |
Determine the number of elements in the data page. | |
bool | x_SparseStringToOids (const string &acc, vector< int > &oids, bool adjusted, CSeqDBLockHold &locked) |
Lookup a string in a sparse table. | |
int | x_DiffCharLease (const string &term_in, CSeqDBMemLease &lease, const string &file_name, TIndx file_length, Uint4 at_least, TIndx KeyOffset, bool ignore_case, CSeqDBLockHold &locked) |
Find the first character to differ in two strings. | |
int | x_DiffChar (const string &term_in, const char *begin, const char *end, bool ignore_case) |
Find the first character to differ in two strings. | |
void | x_ExtractData (const char *key_start, const char *entry_end, vector< string > &key_out, vector< string > &data_out) |
Extract the data from a key-value pair in memory. | |
TIndx | x_GetIndexKeyOffset (TIndx sample_offset, Uint4 sample_num, CSeqDBLockHold &locked) |
Get the offset of the specified sample. | |
void | x_GetIndexString (TIndx key_offset, int length, string &prefix, bool trim_to_null, CSeqDBLockHold &locked) |
Read a string from the index file. | |
int | x_DiffSample (const string &term_in, Uint4 SampleNum, TIndx &KeyOffset, CSeqDBLockHold &locked) |
Find the first character to differ in two strings. | |
void | x_ExtractAllData (const string &term_in, TIndx sample_index, vector< TIndx > &indices_out, vector< string > &keys_out, vector< string > &data_out, CSeqDBLockHold &locked) |
Find matches in the given page of a string ISAM file. | |
void | x_ExtractPageData (const string &term_in, TIndx page_index, const char *beginp, const char *endp, vector< TIndx > &indices_out, vector< string > &keys_out, vector< string > &data_out) |
Find matches in the given memory area of a string ISAM file. | |
void | x_LoadPage (TIndx SampleNum1, TIndx SampleNum2, const char **beginp, const char **endp, CSeqDBLockHold &locked) |
Map a page into memory. | |
int | x_TestNumericSample (CSeqDBMemLease &index_lease, int index, Int8 key_in, Int8 &key_out, int &data_out) |
Test a sample key value from a numeric index. | |
void | x_GetNumericSample (CSeqDBMemLease &index_lease, int index, Int8 &key_out, int &data_out) |
Get a sample key value from a numeric index. | |
bool | x_AdvanceGiList (int vol_start, int vol_end, CSeqDBGiList &gis, int &index, Int8 key, int data, bool use_tis) |
Advance the GI list. | |
bool | x_FindInNegativeList (CSeqDBNegativeList &ids, int &index, Int8 key, bool use_tis) |
Find ID in the negative GI list using PBS. | |
bool | x_AdvanceIsamIndex (CSeqDBMemLease &index_lease, int &index, Int8 target_gi, Int8 &isam_key, int &isam_data) |
Advance the ISAM file. | |
void | x_MapDataPage (int sample_index, int &start, int &num_elements, const void **data_page_begin, CSeqDBLockHold &locked) |
Map a data page. | |
void | x_GetDataElement (const void *dpage, int index, Int8 &key, int &data) |
Get a particular data element from a data page. | |
void | x_FindIndexBounds (CSeqDBLockHold &locked) |
Find the least and greatest keys in this ISAM file. | |
bool | x_OutOfBounds (Int8 key, CSeqDBLockHold &locked) |
Check whether a numeric key is within this volume's bounds. | |
bool | x_OutOfBounds (string key, CSeqDBLockHold &locked) |
Check whether a string key is within this volume's bounds. | |
Uint8 | x_GetNumericKey (const void *p) |
int | x_GetNumericData (const void *p) |
Static Private Member Functions | |
static void | x_Upper (string &s) |
Converts a string to upper case. | |
static Int8 | x_GetId (CSeqDBGiList &ids, int index, bool use_tis) |
Fetch a GI or TI from a GI list. | |
static Int8 | x_GetOid (CSeqDBGiList &ids, int index, bool use_tis) |
Fetch an OID from the GI or TI vector in a GI list. | |
static Int8 | x_GetId (CSeqDBNegativeList &ids, int index, bool use_tis) |
Fetch a GI or TI from a GI list. | |
static void | x_MakeFilenames (const string &dbname, char prot_nucl, char file_ext_char, string &index_name, string &data_name) |
Make filenames for ISAM file. | |
Private Attributes | |
CSeqDBAtlas & | m_Atlas |
The memory management layer. | |
EIdentType | m_IdentType |
The type of identifier this class uses. | |
CSeqDBMemLease | m_IndexLease |
A persistent lease on the ISAM index file. | |
CSeqDBMemLease | m_DataLease |
A persistent lease on the ISAM data file. | |
int | m_Type |
The format type of database files found (eNumeric or eString). | |
string | m_DataFname |
The filename of the ISAM data file. | |
string | m_IndexFname |
The filename of the ISAM index file. | |
TIndx | m_DataFileLength |
The length of the ISAM data file. | |
TIndx | m_IndexFileLength |
The length of the ISAM index file. | |
Int4 | m_NumTerms |
Number of terms in database. | |
Int4 | m_NumSamples |
Number of terms in ISAM index. | |
Int4 | m_PageSize |
Page size of ISAM index. | |
Int4 | m_MaxLineSize |
Maximum string length in the database. | |
Int4 | m_IdxOption |
Options set by upper layer. | |
bool | m_Initialized |
Flag indicating whether initialization has been done. | |
TIndx | m_KeySampleOffset |
Offset of samples in index file. | |
bool | m_TestNonUnique |
Check if data for String ISAM sorted. | |
char * | m_FileStart |
Pointer to index file if no memmap. | |
Int4 | m_FirstOffset |
First and last offset's of last page. | |
Int4 | m_LastOffset |
First and last offset's of last page. | |
SIsamKey | m_FirstKey |
First volume key. | |
SIsamKey | m_LastKey |
Last volume key. | |
bool | m_LongId |
Use Uint8 for the key. | |
int | m_Keysize |
size of the numeric key-data | |
Classes | |
class | SIsamKey |
Stores a key for an ISAM file. More... |
|
Genomic IDs, the most common numerical identifier.
Definition at line 95 of file seqdbisam.hpp. |
|
Import the type representing one GI, OID association.
Definition at line 63 of file seqdbisam.hpp. |
|
Type large enough to hold any numerical ID.
Definition at line 101 of file seqdbisam.hpp. |
|
Type which is large enough to span the bytes of an ISAM file.
Definition at line 86 of file seqdbisam.hpp. |
|
This class works with OIDs relative to a specific volume.
Definition at line 89 of file seqdbisam.hpp. |
|
PIG identifiers for numeric indices over protein volumes.
Definition at line 92 of file seqdbisam.hpp. |
|
Identifier type for trace databases.
Definition at line 98 of file seqdbisam.hpp. |
|
Exit conditions occurring in this code.
Definition at line 481 of file seqdbisam.hpp. |
|
Identifier formats used by this class.
Definition at line 66 of file seqdbisam.hpp. |
|
Types of database this class can access.
Definition at line 76 of file seqdbisam.hpp. |
|
Constructor. An ISAM file object corresponds to an index file and a data file, and converts identifiers (string, GI, or PIG) into OIDs relative to a particular database volume.
Definition at line 1409 of file seqdbisam.cpp. References DEFAULT_NISAM_SIZE, DEFAULT_SISAM_SIZE, eGiId, eHashId, eNumeric, ePigId, eString, eStringId, eTiId, m_DataFname, m_IndexFname, m_PageSize, m_Type, NCBI_THROW, and x_MakeFilenames(). |
|
Destructor. Releases all resources associated with this object. Definition at line 1512 of file seqdbisam.cpp. References UnLease(). |
|
Get String Bounds. Fetch the lowest, highest, and total number of string keys in the database index. If the operation fails, zero will be returned for count.
Definition at line 2238 of file seqdbisam.cpp. References eNoError, error(), CSeqDBIsam::SIsamKey::GetString(), CSeqDBIsam::SIsamKey::IsSet(), CSeqDBAtlas::Lock(), m_Atlas, m_FirstKey, m_Initialized, m_LastKey, m_NumTerms, x_FindIndexBounds(), and x_InitSearch(). |
|
Get Numeric Bounds. Fetch the lowest, highest, and total number of numeric keys in the database index. If the operation fails, zero will be returned for count.
Definition at line 2213 of file seqdbisam.cpp. References eNoError, error(), CSeqDBIsam::SIsamKey::GetNumeric(), CSeqDBIsam::SIsamKey::IsSet(), CSeqDBAtlas::Lock(), m_Atlas, m_FirstKey, m_Initialized, m_LastKey, m_NumTerms, x_FindIndexBounds(), and x_InitSearch(). |
|
Sequence hash lookup. This methods tries to find sequences associated with a given sequence hash value. The provided value is numeric but the ISAM file uses a string format, because string searches can return multiple results per key, and there may be multiple OIDs for a given hash value due to identical sequences and collisions.
Definition at line 2263 of file seqdbisam.cpp. References _ASSERT, eHashId, eNoError, eNotFound, ITERATE, CSeqDBAtlas::Lock(), m_Atlas, m_IdentType, m_Initialized, NStr::UIntToString(), x_InitSearch(), and x_StringSearch(). |
|
Compute list of included OIDs based on a negative ID list. This method iterates over a vector of Gis or Tis, along with the corresponding ISAM file for this volume. Each OID found in the ISAM file is marked in the negative ID list. For those for which the GI or TI is not mentioned in the negative ID list, the OID will be marked as an 'included' OID in the ID list (that OID will be searched). The OIDs for IDs that are not found in the ID list will be marked as 'visible' OIDs. When this process is done for all volumes, the SeqDB object will use all OIDs that are either marked as 'included' or NOT marked as 'visible'. The 'visible' list is needed because otherwise iteration would skip IDs that are do not have GIs or TIs (whichever is being iterated). To use this method, this volume must have an ISAM file matching the negative ID list's identifier type or an exception will be thrown.
Definition at line 2012 of file seqdbisam.cpp. References _ASSERT, eGiId, eTiId, CSeqDBNegativeList::GetNumGis(), CSeqDBNegativeList::InsureOrder(), CSeqDBAtlas::Lock(), m_Atlas, m_IdentType, and x_SearchNegativeMulti(). |
|
Translate Gis and Tis to Oids for the given ID list. This method iterates over a vector of Gi/OID and/or Ti/OID pairs. For each pair where the OID is -1, the GI or TI will be looked up in the ISAM file, and (if found) the correct OID will be stored (otherwise the -1 will remain). This method will normally be called once for each volume.
Definition at line 1982 of file seqdbisam.cpp. References _ASSERT, CSeqDBGiList::eGi, eGiId, eTiId, CSeqDBGiList::GetNumGis(), CSeqDBGiList::InsureOrder(), CSeqDBAtlas::Lock(), m_Atlas, m_IdentType, and x_SearchIndexNumericMulti(). |
|
GI or TI translation. A GI or TI identifier is translated to an OID. GI identifiers are used for all types of sequences. TI identifiers are used primarily for nucleotide data in the Trace DBs. Multiple GIs may indicate the same sequence of bases and the same OID, but TIs are usually unique.
Definition at line 168 of file seqdbisam.hpp. References _ASSERT, eGiId, eTiId, m_IdentType, and x_IdentToOid(). |
|
Check if a given ISAM index exists.
Definition at line 1502 of file seqdbisam.cpp. References x_MakeFilenames(). |
|
PIG translation. A PIG identifier is translated to an OID. PIG identifiers are used exclusively for protein sequences. One PIG corresponds to exactly one sequences of amino acids, and vice versa. They are also stable; the sequence a PIG points to will never be changed.
Definition at line 146 of file seqdbisam.hpp. References _ASSERT, ePigId, m_IdentType, and x_IdentToOid(). |
|
Seq-id translation. A Seq-id identifier (serialized to a string) is translated into an OID. This routine will attempt to simplify the seqid so as to use the faster numeric lookup techniques whenever possible.
|
|
Seq-id simplification. Given a Seq-id, this routine devolves it to a GI or PIG if possible. If not, it formats the Seq-id into a canonical form for lookup in the string ISAM files. If the Seq-id was parsed from an accession, it can be provided in the "acc" parameter, and it will be used if the Seq-id is not in a form this code can recognize. In the case that new Seq-id types are added, support for which has not been added to this code, this mechanism will try to use the original string.
Definition at line 1804 of file seqdbisam.cpp. References CDbtag_Base::CanGetDb(), CSeq_id_Base::e_General, CSeq_id_Base::e_Gi, CSeq_id_Base::e_Gibbsq, eGiId, eOID, ePigId, eStringId, CDbtag_Base::GetDb(), CSeq_id_Base::GetGeneral(), CSeq_id_Base::GetGi(), CSeq_id_Base::GetGibbsq(), CDbtag_Base::GetTag(), NStr::UIntToString(), and CSeq_id_Base::Which(). Referenced by CSeqDBVol::SeqidToOids(), and TryToSimplifyAccession(). |
|
String translation. A string id is translated to one or more OIDs. String ids are used by some groups which produce sequence data. In some cases, the string may correspond to more than one OID. For this reason, the OIDs are returned in a vector. The string provided is looked up in several ways. If it contains a pipe character ("|") the data will be interpreted as a SeqID. This routine can use faster lookup mechanisms if the simplification routines were able to recognize the sequence as one of several types that have numerical indices. The version_check flag is needed to support sparse indexing. If version_check is true, and the string has a version, and the lookup fails, this method will try to remove the version and search again. On return from this method version_check will be set to true if and only if the first search failed and the versionless search succeeded. CSeqDBVol::x_CheckVersions() can then be called to verify the OIDs; see that method for more information about this scenario.
Definition at line 1646 of file seqdbisam.cpp. References _ASSERT, eNoError, eNotFound, eStringId, CSeqDBAtlas::Lock(), m_Atlas, m_IdentType, m_Initialized, pos, x_InitSearch(), and x_StringSearch(). |
|
String id simplification. This routine tries to produce a numerical type from a string identifier. SeqDB can use faster lookup mechanisms if a PIG, GI, or OID type can be recognized in the string, for example. Even when the output is a string, it may be better formed for the purpose of lookup in the string ISAM file.
Definition at line 1957 of file seqdbisam.cpp. References CSeq_id::BestRank(), eStringId, FindBestChoice(), s_SeqDB_ParseSeqIDs(), and SimplifySeqid(). Referenced by CSeqDBVol::AccessionToOids(). |
|
Return any memory held by this object to the atlas.
Definition at line 1517 of file seqdbisam.cpp. References CSeqDBMemLease::Empty(), m_Atlas, m_IndexLease, and CSeqDBAtlas::RetRegion(). Referenced by ~CSeqDBIsam(). |
|
Advance the GI list. Skip over any GIs in the GI list that are less than the key, translate any that are equal to it, and skip past any GI/OID pairs that have already been translated. Uses the parabolic binary search technique.
Definition at line 1276 of file seqdbisam.hpp. References CSeqDBGiList::GetNumGis(), CSeqDBGiList::GetNumTis(), x_GetId(), and x_GetOid(). |
|
Advance the ISAM file. Skip over any GI/OID pairs in the ISAM file that are less than the target_gi. Uses the parabolic binary search technique.
Definition at line 1370 of file seqdbisam.hpp. References m_NumSamples, and x_TestNumericSample(). |
|
Find the first character to differ in two strings. This finds the index of the first character to differ in meaningful way between two strings. One of the strings is a term that is passed in; the other is a range of memory represented by two pointers.
Definition at line 857 of file seqdbisam.cpp. References ch1, ch2, and s_SeqDBIsam_NullifyEOLs(). Referenced by x_ExtractAllData(), and x_ExtractPageData(). |
|
Find the first character to differ in two strings. This finds the index of the first character to differ in meaningful way between two strings. One of the strings is a term that is passed in; the other is assumed to be located in the ISAM table, a lease to which is passed to this function.
Definition at line 777 of file seqdbisam.cpp. References CSeqDBMemLease::Contains(), CSeqDBAtlas::GetRegion(), CSeqDBAtlas::Lock(), and m_Atlas. |
|
Find the first character to differ in two strings. This finds the index of the first character to differ between two strings. The first string is provided, the second is one of the sample strings, indicated by the index of that sample value.
Definition at line 1148 of file seqdbisam.cpp. References CSeqDBMemLease::Contains(), CSeqDBAtlas::GetRegion(), CSeqDBAtlas::Lock(), m_Atlas, m_IndexFname, m_IndexLease, m_KeySampleOffset, m_NumSamples, m_PageSize, and MEMORY_ONLY_PAGE_SIZE. Referenced by x_StringSearch(). |
|
Find matches in the given page of a string ISAM file. This searches the area around a specific page of the data file to find all matches to term_in. The results are returned in vectors. This method may search multiple pages.
Definition at line 956 of file seqdbisam.cpp. References m_NumSamples, m_PageSize, s_SeqDBIsam_NullifyEOLs(), x_DiffChar(), x_ExtractPageData(), and x_LoadPage(). Referenced by x_StringSearch(). |
|
Extract the data from a key-value pair in memory. Given pointers to a location in mapped memory, and the end of the mapped data, this finds the key and data values for the object at that location.
Definition at line 1061 of file seqdbisam.cpp. References s_SeqDBIsam_NullifyEOLs(). Referenced by x_ExtractPageData(), and x_FindIndexBounds(). |
|
Find matches in the given memory area of a string ISAM file. This searches the specified section of memory to find all matches to term_in. The results are returned in vectors.
Definition at line 902 of file seqdbisam.cpp. References s_SeqDBIsam_NullifyEOLs(), x_DiffChar(), and x_ExtractData(). Referenced by x_ExtractAllData(), and x_StringSearch(). |
|
Find the least and greatest keys in this ISAM file.
Definition at line 2042 of file seqdbisam.cpp. References _ASSERT, eNumeric, CSeqDBAtlas::Lock(), m_Atlas, m_FirstKey, m_LastKey, m_NumSamples, m_Type, s_SeqDBIsam_NullifyEOLs(), CSeqDBIsam::SIsamKey::SetNumeric(), CSeqDBIsam::SIsamKey::SetString(), x_ExtractData(), x_GetDataElement(), x_LoadPage(), x_MapDataPage(), and x_Upper(). Referenced by GetIdBounds(), and x_OutOfBounds(). |
|
Find ID in the negative GI list using PBS. Use parabolic binary search to find the specified ID in the negative ID list. The 'index' value is the index to start the search at (this must refer to an index at or before the target data if the search is to succeed). Whether the search was successful or not, the index will be moved forward past any elements with values less than 'key'.
Definition at line 1336 of file seqdbisam.hpp. References CSeqDBNegativeList::GetNumGis(), CSeqDBNegativeList::GetNumTis(), and x_GetId(). Referenced by x_SearchNegativeMulti(). |
|
Get a particular data element from a data page.
Definition at line 1450 of file seqdbisam.hpp. References m_Keysize, x_GetNumericData(), and x_GetNumericKey(). Referenced by x_FindIndexBounds(), x_SearchDataNumericMulti(), and x_SearchNegativeMulti(). |
|
Fetch a GI or TI from a GI list.
Definition at line 1120 of file seqdbisam.hpp. References CSeqDBNegativeList::GetGi(), and CSeqDBNegativeList::GetTi(). |
|
Fetch a GI or TI from a GI list.
Definition at line 1104 of file seqdbisam.hpp. References CSeqDBGiList::GetGiOid(), and CSeqDBGiList::GetTiOid(). Referenced by x_AdvanceGiList(), x_FindInNegativeList(), and x_SearchDataNumericMulti(). |
|
Get the offset of the specified sample. For string ISAM indices, the index file contains a table of offsets of the index file samples. This function gets the offset of the specified sample in the index file's table.
Definition at line 1091 of file seqdbisam.cpp. References CSeqDBMemLease::Contains(), CSeqDBAtlas::GetRegion(), CSeqDBAtlas::Lock(), m_Atlas, m_IndexFname, and m_IndexLease. Referenced by x_StringSearch(). |
|
Read a string from the index file. Given an offset into the index file, and a maximum length, this function returns the bytes in a string object.
Definition at line 1113 of file seqdbisam.cpp. References CSeqDBMemLease::Contains(), CSeqDBAtlas::GetRegion(), CSeqDBAtlas::Lock(), m_Atlas, m_IndexFname, and m_IndexLease. Referenced by x_StringSearch(). |
|
Definition at line 1221 of file seqdbisam.hpp. References SeqDB_GetStdOrd(). Referenced by x_GetDataElement(), x_GetNumericSample(), x_SearchIndexNumeric(), and x_TestNumericSample(). |
|
Definition at line 1214 of file seqdbisam.hpp. References SeqDB_GetStdOrd(). Referenced by x_GetDataElement(), x_GetNumericSample(), x_SearchDataNumericMulti(), x_SearchIndexNumeric(), and x_TestNumericSample(). |
|
Get a sample key value from a numeric index. Given the index of a sample value, this code will get the key. If data values are stored in the index file, the corresponding data value will also be returned. The offset of the data block is computed and returned as well.
Definition at line 1259 of file seqdbisam.hpp. References CSeqDBMemLease::GetPtr(), m_KeySampleOffset, m_Keysize, x_GetNumericData(), and x_GetNumericKey(). |
|
Fetch an OID from the GI or TI vector in a GI list.
Definition at line 1112 of file seqdbisam.hpp. References CSeqDBGiList::GetGiOid(), and CSeqDBGiList::GetTiOid(). Referenced by x_AdvanceGiList(), and x_SearchDataNumericMulti(). |
|
Determine the number of elements in the data page. The number of elements is determined based on whether this is the last page and the configured page size.
Definition at line 131 of file seqdbisam.cpp. References m_NumSamples, m_NumTerms, and m_PageSize. Referenced by x_MapDataPage(), and x_SearchDataNumeric(). |
|
Numeric identifier lookup. Given a numeric identifier, this routine finds the OID.
Definition at line 1527 of file seqdbisam.cpp. References eNoError, and x_NumericSearch(). Referenced by IdToOid(), and PigToOid(). |
|
Initialize the search object. The first identifier search sets up the object by calling this function, which reads the metadata from the index file and sets all the fields needed for ISAM lookups.
Definition at line 64 of file seqdbisam.cpp. References eBadType, eBadVersion, eNoError, eNumeric, eNumericLongId, eWrongFile, CSeqDBAtlas::GetFileSize(), CSeqDBMemLease::GetPtr(), CSeqDBAtlas::GetRegion(), ISAM_VERSION, CSeqDBAtlas::Lock(), m_Atlas, m_DataFileLength, m_DataFname, m_IdxOption, m_IndexFileLength, m_IndexFname, m_IndexLease, m_Initialized, m_KeySampleOffset, m_Keysize, m_LongId, m_MaxLineSize, m_NumSamples, m_NumTerms, m_PageSize, m_Type, MEMORY_ONLY_PAGE_SIZE, and SeqDB_GetStdOrd(). Referenced by GetIdBounds(), HashToOids(), StringToOids(), x_SearchIndexNumeric(), x_SearchIndexNumericMulti(), x_SearchNegativeMulti(), and x_StringSearch(). |
|
Map a page into memory. Given two indices, this method maps into memory the area starting at the beginning of the first index and extending to the end of the other. (If the indices are equal, only one page would be mapped.)
Definition at line 1193 of file seqdbisam.cpp. References _ASSERT, CSeqDBMemLease::Contains(), CSeqDBAtlas::GetRegion(), CSeqDBAtlas::Lock(), m_Atlas, m_IndexFname, m_IndexLease, and m_KeySampleOffset. Referenced by x_ExtractAllData(), x_FindIndexBounds(), and x_StringSearch(). |
|
Make filenames for ISAM file.
Definition at line 1474 of file seqdbisam.cpp. References NCBI_THROW. Referenced by CSeqDBIsam(), and IndexExists(). |
|
Map a data page. The caller provides an index into the sample file. The page of data is mapped, and a pointer is returned. In addition, the starting index (start) of the data is returned, along with the number of elements in that page.
Definition at line 1425 of file seqdbisam.hpp. References CSeqDBMemLease::Contains(), CSeqDBAtlas::GetRegion(), CSeqDBAtlas::Lock(), m_Atlas, m_DataFname, m_DataLease, m_Keysize, and x_GetPageNumElements(). Referenced by x_FindIndexBounds(), x_SearchDataNumericMulti(), and x_SearchNegativeMulti(). |
|
Numeric identifier lookup. Given a numeric identifier, this routine finds the OID.
Definition at line 759 of file seqdbisam.cpp. References error(), x_SearchDataNumeric(), and x_SearchIndexNumeric(). Referenced by x_IdentToOid(). |
|
Check whether a string key is within this volume's bounds.
Definition at line 2188 of file seqdbisam.cpp. References CSeqDBIsam::SIsamKey::IsSet(), m_FirstKey, and x_FindIndexBounds(). |
|
Check whether a numeric key is within this volume's bounds.
Definition at line 2165 of file seqdbisam.cpp. References CSeqDBIsam::SIsamKey::IsSet(), m_FirstKey, and x_FindIndexBounds(). Referenced by x_SearchIndexNumeric(), and x_StringSearch(). |
|
Data file search. Given a numeric identifier, this routine finds the OID in the data file.
Definition at line 514 of file seqdbisam.cpp. References _ASSERT, CSeqDBMemLease::Contains(), eNumericNoData, CSeqDBAtlas::GetRegion(), CSeqDBAtlas::Lock(), m_Atlas, m_DataFname, m_DataLease, m_Keysize, m_Type, and x_GetPageNumElements(). Referenced by x_NumericSearch(). |
|
GiList translation for one page of a data file. Given a GI list, this routine finds the OID for each GI in the list using the mappings in one page of the data file. It updates the provided GI list index to skip past any GIs it translates.
Definition at line 588 of file seqdbisam.cpp. References _ASSERT, eNumericNoData, CSeqDBGiList::GetNumGis(), CSeqDBGiList::GetNumTis(), CSeqDBAtlas::Lock(), m_Atlas, m_Keysize, m_Type, x_GetDataElement(), x_GetId(), x_GetNumericKey(), x_GetOid(), and x_MapDataPage(). |
|
Index file search. Given a numeric identifier, this routine finds the OID or the page in the data file where the OID can be found.
Definition at line 148 of file seqdbisam.cpp. References _ASSERT, CSeqDBMemLease::Contains(), eNoError, eNotFound, eNumericNoData, error(), CSeqDBMemLease::GetPtr(), CSeqDBAtlas::GetRegion(), CSeqDBAtlas::Lock(), m_Atlas, m_IndexFname, m_IndexLease, m_Initialized, m_KeySampleOffset, m_Keysize, m_NumSamples, m_PageSize, m_Type, x_GetNumericData(), x_GetNumericKey(), x_InitSearch(), and x_OutOfBounds(). Referenced by x_NumericSearch(). |
|
GiList Translation. Given a GI list, this routine finds the OID for each ID in the list not already having a translation.
Definition at line 242 of file seqdbisam.cpp. References CSeqDBMemLease::Contains(), eNoError, error(), CSeqDBAtlas::GetRegion(), CSeqDBAtlas::Lock(), m_Atlas, m_IndexFileLength, m_IndexFname, m_Initialized, NCBI_THROW, and x_InitSearch(). Referenced by IdsToOids(). |
|
Negative ID List Translation. Given a Negative ID list, this routine turns on the bits for the OIDs found in the volume but not in the negated ID list.
Definition at line 417 of file seqdbisam.cpp. References _ASSERT, CSeqDBNegativeList::AddIncludedOid(), CSeqDBNegativeList::AddVisibleOid(), eNoError, eNumericNoData, error(), CSeqDBNegativeList::GetNumGis(), CSeqDBNegativeList::GetNumTis(), CSeqDBAtlas::Lock(), m_Atlas, m_Initialized, m_NumSamples, m_Type, NCBI_THROW, x_FindInNegativeList(), x_GetDataElement(), x_InitSearch(), and x_MapDataPage(). Referenced by IdsToOids(). |
|
Lookup a string in a sparse table. This does string lookup in a sparse string table. There is no support (code) for this since there are currently no examples of this kind of table to test against.
Definition at line 1793 of file seqdbisam.cpp. References _TROUBLE. |
|
String identifier lookup. Given a string identifier, this routine finds the OID(s).
Definition at line 1237 of file seqdbisam.cpp. References NStr::CompareNocase(), eNoError, eNotFound, error(), CSeqDBMemLease::GetPtr(), m_IndexFileLength, m_IndexLease, m_Initialized, m_KeySampleOffset, m_MaxLineSize, m_NumSamples, m_PageSize, MEMORY_ONLY_PAGE_SIZE, x_DiffSample(), x_ExtractAllData(), x_ExtractPageData(), x_GetIndexKeyOffset(), x_GetIndexString(), x_InitSearch(), x_LoadPage(), and x_OutOfBounds(). Referenced by HashToOids(), and StringToOids(). |
|
Test a sample key value from a numeric index. This method reads the key value of an index file sample element from a numeric index file. The calling code should insure that the data is mapped in, and that the file type is correct. The key value found will be compared to the search key. This method will return 0 for an exact match, -1 if the key is less than the sample, or 1 if the key is greater. If the match is exact, it will also return the data in data_out.
Definition at line 1230 of file seqdbisam.hpp. References CSeqDBMemLease::GetPtr(), m_KeySampleOffset, m_Keysize, x_GetNumericData(), and x_GetNumericKey(). Referenced by x_AdvanceIsamIndex(). |
|
Converts a string to upper case.
Definition at line 1096 of file seqdbisam.hpp. Referenced by x_FindIndexBounds(). |
|
The memory management layer.
Definition at line 1143 of file seqdbisam.hpp. Referenced by GetIdBounds(), HashToOids(), IdsToOids(), StringToOids(), UnLease(), x_DiffCharLease(), x_DiffSample(), x_FindIndexBounds(), x_GetIndexKeyOffset(), x_GetIndexString(), x_InitSearch(), x_LoadPage(), x_MapDataPage(), x_SearchDataNumeric(), x_SearchDataNumericMulti(), x_SearchIndexNumeric(), x_SearchIndexNumericMulti(), and x_SearchNegativeMulti(). |
|
The length of the ISAM data file.
Definition at line 1164 of file seqdbisam.hpp. Referenced by x_InitSearch(). |
|
The filename of the ISAM data file.
Definition at line 1158 of file seqdbisam.hpp. Referenced by CSeqDBIsam(), x_InitSearch(), x_MapDataPage(), and x_SearchDataNumeric(). |
|
A persistent lease on the ISAM data file.
Definition at line 1152 of file seqdbisam.hpp. Referenced by x_MapDataPage(), and x_SearchDataNumeric(). |
|
Pointer to index file if no memmap.
Definition at line 1194 of file seqdbisam.hpp. |
|
First volume key.
Definition at line 1203 of file seqdbisam.hpp. Referenced by GetIdBounds(), x_FindIndexBounds(), and x_OutOfBounds(). |
|
First and last offset's of last page.
Definition at line 1197 of file seqdbisam.hpp. |
|
The type of identifier this class uses.
Definition at line 1146 of file seqdbisam.hpp. Referenced by HashToOids(), IdsToOids(), IdToOid(), PigToOid(), and StringToOids(). |
|
Options set by upper layer.
Definition at line 1182 of file seqdbisam.hpp. Referenced by x_InitSearch(). |
|
The length of the ISAM index file.
Definition at line 1167 of file seqdbisam.hpp. Referenced by x_InitSearch(), x_SearchIndexNumericMulti(), and x_StringSearch(). |
|
The filename of the ISAM index file.
Definition at line 1161 of file seqdbisam.hpp. Referenced by CSeqDBIsam(), x_DiffSample(), x_GetIndexKeyOffset(), x_GetIndexString(), x_InitSearch(), x_LoadPage(), x_SearchIndexNumeric(), and x_SearchIndexNumericMulti(). |
|
A persistent lease on the ISAM index file.
Definition at line 1149 of file seqdbisam.hpp. Referenced by UnLease(), x_DiffSample(), x_GetIndexKeyOffset(), x_GetIndexString(), x_InitSearch(), x_LoadPage(), x_SearchIndexNumeric(), and x_StringSearch(). |
|
Flag indicating whether initialization has been done.
Definition at line 1185 of file seqdbisam.hpp. Referenced by GetIdBounds(), HashToOids(), StringToOids(), x_InitSearch(), x_SearchIndexNumeric(), x_SearchIndexNumericMulti(), x_SearchNegativeMulti(), and x_StringSearch(). |
|
Offset of samples in index file.
Definition at line 1188 of file seqdbisam.hpp. Referenced by x_DiffSample(), x_GetNumericSample(), x_InitSearch(), x_LoadPage(), x_SearchIndexNumeric(), x_StringSearch(), and x_TestNumericSample(). |
|
size of the numeric key-data
Definition at line 1212 of file seqdbisam.hpp. Referenced by x_GetDataElement(), x_GetNumericSample(), x_InitSearch(), x_MapDataPage(), x_SearchDataNumeric(), x_SearchDataNumericMulti(), x_SearchIndexNumeric(), and x_TestNumericSample(). |
|
Last volume key.
Definition at line 1206 of file seqdbisam.hpp. Referenced by GetIdBounds(), and x_FindIndexBounds(). |
|
First and last offset's of last page.
Definition at line 1200 of file seqdbisam.hpp. |
|
Use Uint8 for the key.
Definition at line 1209 of file seqdbisam.hpp. Referenced by x_InitSearch(). |
|
Maximum string length in the database.
Definition at line 1179 of file seqdbisam.hpp. Referenced by x_InitSearch(), and x_StringSearch(). |
|
Number of terms in ISAM index.
Definition at line 1173 of file seqdbisam.hpp. Referenced by x_AdvanceIsamIndex(), x_DiffSample(), x_ExtractAllData(), x_FindIndexBounds(), x_GetPageNumElements(), x_InitSearch(), x_SearchIndexNumeric(), x_SearchNegativeMulti(), and x_StringSearch(). |
|
Number of terms in database.
Definition at line 1170 of file seqdbisam.hpp. Referenced by GetIdBounds(), x_GetPageNumElements(), and x_InitSearch(). |
|
Page size of ISAM index.
Definition at line 1176 of file seqdbisam.hpp. Referenced by CSeqDBIsam(), x_DiffSample(), x_ExtractAllData(), x_GetPageNumElements(), x_InitSearch(), x_SearchIndexNumeric(), and x_StringSearch(). |
|
Check if data for String ISAM sorted.
Definition at line 1191 of file seqdbisam.hpp. |
|
The format type of database files found (eNumeric or eString).
Definition at line 1155 of file seqdbisam.hpp. Referenced by CSeqDBIsam(), x_FindIndexBounds(), x_InitSearch(), x_SearchDataNumeric(), x_SearchDataNumericMulti(), x_SearchIndexNumeric(), and x_SearchNegativeMulti(). |