#include <index.h>
Public Types | |
typedef std::map< double, uint32_t > | histogram |
enum | INDEX_TYPE { BINNING = 0, RANGE, MESA, AMBIT, PALE, PACK, ZONE, RELIC, ROSTER, SLICE, FADE, SBIAD, SAPID, EGALE, MOINS, ENTRE, BAK, BAK2, KEYWORDS, MESH, BAND, DIREKTE, GENERIC, BYLT, FUZZ, ZONA, FUGE, EXTERN } |
The integer values of this enum type are used in the index files to differentiate the indexes. More... | |
typedef std::map< double, ibis::bitvector * > | VMap |
Public Member Functions | |
virtual long | append (const char *dt, const char *df, uint32_t nnew) |
Extend the index. | |
virtual void | binBoundaries (std::vector< double > &) const |
The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively. | |
virtual void | binWeights (std::vector< uint32_t > &) const |
virtual int | contractRange (ibis::qContinuousRange &rng) const |
virtual int64_t | estimate (const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const |
virtual void | estimate (const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
Evaluating a join condition with one (likely composite) index. | |
virtual int64_t | estimate (const ibis::index &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const |
virtual int64_t | estimate (const ibis::index &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask) const |
Estimate an upper bound for the number of pairs produced from marked records. | |
virtual int64_t | estimate (const ibis::index &idx2, const ibis::rangeJoin &expr) const |
Estimate an upper bound for the number of pairs. | |
virtual void | estimate (const ibis::index &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
virtual void | estimate (const ibis::index &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
Estimate the pairs for the range join operator. | |
virtual void | estimate (const ibis::index &idx2, const ibis::rangeJoin &expr, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
Estimate the pairs for the range join operator. | |
virtual uint32_t | estimate (const ibis::qDiscreteRange &expr) const |
virtual void | estimate (const ibis::qDiscreteRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const |
Estimate the hits for discrete ranges, i.e., those translated from 'a IN (x, y, . | |
virtual uint32_t | estimate (const ibis::qContinuousRange &expr) const |
Returns an upper bound on the number of hits. | |
virtual void | estimate (const ibis::qContinuousRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const |
Computes an approximation of hits as a pair of lower and upper bounds. | |
virtual double | estimateCost (const ibis::qDiscreteRange &expr) const |
virtual double | estimateCost (const ibis::qContinuousRange &expr) const |
Estimate the code of evaluate a range condition. | |
virtual long | evaluate (const ibis::qDiscreteRange &, ibis::bitvector &) const |
To evaluate the exact hits. | |
virtual long | evaluate (const ibis::qContinuousRange &expr, ibis::bitvector &hits) const =0 |
To evaluate the exact hits. | |
virtual int | expandRange (ibis::qContinuousRange &rng) const |
The functions expandRange and contractRange expands or contracts the boundaries of a range condition so that the new range will have exact answers using the function estimate. | |
virtual const ibis::bitvector * | getBitvector (uint32_t i) const |
Return a pointer to the ith bitvector used in the index (may be 0). | |
virtual long | getCumulativeDistribution (std::vector< double > &bds, std::vector< uint32_t > &cts) const |
Cumulative distribution of the data. | |
virtual long | getDistribution (std::vector< double > &bbs, std::vector< uint32_t > &cts) const |
Binned distribution of the data. | |
virtual double | getMax () const |
The maximum value recorded in the index. | |
virtual double | getMin () const |
The minimum value recorded in the index. | |
virtual uint32_t | getNRows () const |
virtual double | getSum () const |
Compute the approximate sum of all the values indexed. | |
virtual const char * | name () const =0 |
Returns the name of the index, similar to the function type , but returns a string instead. | |
virtual uint32_t | numBitvectors () const |
Returns the number of bit vectors used by the index. | |
virtual void | print (std::ostream &out) const =0 |
Prints human readable information. | |
virtual int | read (ibis::fileManager::storage *st)=0 |
Reconstructs an index from an array of bytes. | |
virtual int | read (const char *name)=0 |
Reconstructs an index from the named file. | |
virtual void | speedTest (std::ostream &out) const |
Time some logical operations and print out their speed. | |
virtual INDEX_TYPE | type () const =0 |
Returns an index type identifier. | |
virtual float | undecidable (const ibis::qDiscreteRange &expr, ibis::bitvector &iffy) const |
virtual float | undecidable (const ibis::qContinuousRange &expr, ibis::bitvector &iffy) const |
Mark the position of the rows that can not be decided with this index. | |
virtual int | write (const char *name) const =0 |
Save index to a file. | |
virtual | ~index () |
The destructor. | |
Static Public Member Functions | |
static void | addBits (const std::vector< ibis::bitvector * > &bits, uint32_t ib, uint32_t ie, ibis::bitvector &res) |
Add the bts [ib:ie-1] to res . | |
static index * | create (const column *c, const char *name=0, const char *spec=0, int inEntirety=0) |
Index factory. | |
static void | divideCounts (array_t< uint32_t > &bounds, const array_t< uint32_t > &cnt) |
Determine how to split the array cnt , so that each group has roughly the same total value. | |
static bool | isIndex (const char *f, INDEX_TYPE t) |
Read the header of the named file to determine if it contains an index of the specified type. | |
template<typename E1, typename E2> | |
static void | mapValues (const array_t< E1 > &val1, const array_t< E2 > &val2, array_t< E1 > &bnd1, array_t< E2 > &bnd2, std::vector< uint32_t > &cnts) |
Compute a two-dimensional histogram. | |
template<typename E> | |
static void | mapValues (const array_t< E > &val, array_t< E > &bounds, std::vector< uint32_t > &cnts) |
template<typename E> | |
static void | mapValues (const array_t< E > &val, histogram &hist, uint32_t count=0) |
template<typename E> | |
static void | mapValues (const array_t< E > &val, VMap &bmap) |
static void | setBases (array_t< uint32_t > &bases, uint32_t card, uint32_t nbase=2) |
Fill the array bases with the values that cover the range [0, card). | |
static void | sumBits (const std::vector< ibis::bitvector * > &bits, const ibis::bitvector &tot, uint32_t ib, uint32_t ie, ibis::bitvector &res) |
Sum up bts [ib:ie-1] and add the result to res . | |
static void | sumBits (const std::vector< ibis::bitvector * > &bits, uint32_t ib, uint32_t ie, ibis::bitvector &res) |
Sum up bts [ib:ie-1] and place the result in res . | |
Protected Member Functions | |
virtual void | activate (uint32_t i, uint32_t j) const |
Regenerate bitvectors i (inclusive) through j (exclusive) from the underlying storage. | |
virtual void | activate (uint32_t i) const |
Regenerate the ith bitvector from the underlying storage. | |
virtual void | activate () const |
Regenerate all bitvectors from the underlying storage. | |
void | addBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, const ibis::bitvector &tot) const |
Compute the sum of bit vectors [ib , ie ). | |
void | addBins (uint32_t ib, uint32_t ie, ibis::bitvector &res) const |
Add the sum of bits [ib] through bits [ie-1] to res . | |
virtual void | clear () |
Clear the existing content. | |
void | computeMinMax (const char *f, double &min, double &max) const |
void | dataFileName (const char *f, std::string &name) const |
Generate data file name from "f". | |
index (const ibis::column *c=0, ibis::fileManager::storage *s=0) | |
Protect the constructor so that ibis::index can not be instantiated directly. | |
void | indexFileName (const char *f, std::string &name) const |
Generate index file name from "f". | |
void | mapValues (const char *f, histogram &hist, uint32_t count=0) const |
Generate a histogram. | |
void | mapValues (const char *f, VMap &bmap) const |
Map the positions of each individual value. | |
void | optionalUnpack (std::vector< ibis::bitvector * > &bits, const char *opt) |
A function to decide whether to uncompress the bitvectors. | |
void | sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, uint32_t ib0, uint32_t ie0) const |
Compute a new sum for bit vectors [ib, ie) by taking advantage of the old sum for bitvectors [ib0, ie0). | |
void | sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res) const |
Compute the bitwise OR of all bitvectors (in bits) from ib to ie. | |
Static Protected Member Functions | |
static void | indexFileName (std::string &name, const ibis::column *col1, const ibis::column *col2, const char *f=0) |
Generate the index file name for the composite index fromed on two columns. | |
Protected Attributes | |
std::vector< ibis::bitvector * > | bits |
A list of bitvectors. | |
const ibis::column * | col |
Pointer to the column this index is for. | |
const char * | fname |
The name of the file containing the index. | |
uint32_t | nrows |
The number of rows represented by the index. | |
array_t< int32_t > | offsets |
Starting positions of the bitvectors. | |
ibis::fileManager::storage * | str |
The underlying storage. | |
Classes | |
class | barrel |
A specialization that adds function setValue . More... |
Class ibis::index contains the common definitions and virtual functions of the class hierarchy. It is assumed that an ibis::index is for only one column. The user is to create an new index through the function ibis::index::create and only use the functions defined in this class.
The integer values of this enum type are used in the index files to differentiate the indexes.
Reordering the list will make some index files invalid ****
BINNING | ibis::bin. |
RANGE | ibis::range. |
MESA | ibis::interval. |
AMBIT | ibis::ambit, range-range two level encoding on bins. |
PALE | ibis::pale, equality-range encoding on bins. |
PACK | ibis::pack, range-equality encoding on bins. |
ZONE | ibis::zone, equality-equality encoding on bins. |
RELIC | ibis::relic, the basic bitmap index. |
ROSTER | ibis::roster, RID list. |
SLICE | ibis::slice, bit-sliced index. |
FADE | ibis::fade, multicomponent range encoding (unbinned). |
SBIAD | ibis::sbiad, multicomponent interval encoding (unbinned). |
SAPID | ibis::sapid, multicomponent equality encoding (unbinned). |
EGALE | ibis::egale, multicomponent equality encoding on bins. |
MOINS | ibis::moins, multicomponent range encoding on bins. |
ENTRE | ibis::entre, multicomponent interval encoding on bins. |
BAK | ibis::bak, reduced precision mapping, equality code. |
BAK2 |
ibis::bak2, splits each BAK bin in two, one less than the mapped value, one greater and equal to the mapped value.
|
KEYWORDS | ibis::keywords, boolean term-document matrix. |
MESH | not used. |
BAND | not used. |
DIREKTE | ibis::direkte, hash value to bitmaps. |
GENERIC | not used. |
BYLT | ibis::bylt, unbinned range-equality encoding. |
FUZZ | ibis::fuzz, unbinned interval-equality encoding. |
ZONA | ibis::zona, unbinned equality-equality encoding. |
FUGE | ibis::fuge, binned interval-equality encoding. |
EXTERN | externally defined index |
ibis::index::index | ( | const ibis::column * | c = 0 , |
|
ibis::fileManager::storage * | s = 0 | |||
) | [inline, protected] |
Protect the constructor so that ibis::index can not be instantiated directly.
It can not be instantiated because some functions are pure virtual, but this also reduces the size of public interface.
void ibis::index::activate | ( | uint32_t | i, | |
uint32_t | j | |||
) | const [protected, virtual] |
Regenerate bitvectors i (inclusive) through j (exclusive) from the underlying storage.
References bits, col, fname, ibis::gVerbose, ibis::column::logWarning(), ibis::column::name(), nrows, offsets, array_t< T >::size(), and str.
void ibis::index::addBins | ( | uint32_t | ib, | |
uint32_t | ie, | |||
ibis::bitvector & | res, | |||
const ibis::bitvector & | tot | |||
) | const [protected] |
Compute the sum of bit vectors [ib
, ie
).
This is basically a copy of the function sumBins
(without the 4th arguments).
If computing a complement is faster, assume all bit vectors add up to tot
.
There are two changes: (1) if res
has the same number of bits as tot
, the new sum is added to the existing bitvector, and (2) when it computes the sum through complements, it performs a subtraction from tot
.
References activate(), ibis::bitvector::adjustSize(), bits, ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), array_t< T >::clear(), ibis::bitvector::cnt(), col, ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), array_t< T >::empty(), fname, ibis::gVerbose, ibis::column::name(), nrows, offsets, ibis::horometer::realTime(), array_t< T >::resize(), ibis::bitvector::set(), array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), str, and ibis::bitvector::swap().
void ibis::index::addBins | ( | uint32_t | ib, | |
uint32_t | ie, | |||
ibis::bitvector & | res | |||
) | const [protected] |
Add the sum of bits
[ib] through bits
[ie-1] to res
.
The most important difference between this function and sumBins
is that this function always use bits
[ib] through bits
[ie-1].
Always explicitly use bits
[ib] through bits
[ie-1].
This is similar to the function addBits
.
References activate(), ibis::bitvector::adjustSize(), bits, ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::cnt(), col, ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::gVerbose, ibis::column::name(), nrows, ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
Referenced by ibis::fuge::estimate(), ibis::zona::evaluate(), ibis::fuzz::evaluate(), and ibis::bylt::evaluate().
void ibis::index::addBits | ( | const std::vector< ibis::bitvector * > & | bts, | |
uint32_t | ib, | |||
uint32_t | ie, | |||
ibis::bitvector & | res | |||
) | [static] |
Add the bts
[ib:ie-1] to res
.
Since the set of bit vectors are explicitly given, there is no need to perform activation. To minimize the burden of deciding which bit vectors to activate, this function always use the bts
[ib] through bts
[ie-1].
This function still has to check whether a particular bts[i] is a null pointer before using the bit vector.
References ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::gVerbose, ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
Referenced by ibis::zona::evaluate().
virtual void ibis::index::binBoundaries | ( | std::vector< double > & | ) | const [inline, virtual] |
The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively.
Reimplemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::egale, ibis::bak, ibis::bak2, ibis::direkte, ibis::keywords, and ibis::relic.
Referenced by ibis::part::coarsenBins(), ibis::part::get2DDistributionI(), and ibis::column::preferredBounds().
void ibis::index::clear | ( | ) | [protected, virtual] |
Clear the existing content.
Free the objectes pointed to by the pointers.
Reimplemented in ibis::bin, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::keywords, ibis::relic, ibis::slice, ibis::fade, ibis::fuzz, ibis::bylt, and ibis::zona.
References bits, array_t< T >::clear(), fname, nrows, offsets, and str.
Referenced by ibis::bin::binning(), ibis::bin::binningT(), ibis::relic::clear(), ibis::keywords::clear(), ibis::bin::clear(), ibis::relic::read(), ibis::keywords::read(), ibis::direkte::read(), and ~index().
ibis::index * ibis::index::create | ( | const column * | c, | |
const char * | dfname = 0 , |
|||
const char * | spec = 0 , |
|||
int | inEntirety = 0 | |||
) | [static] |
Index factory.
It creates a specific concrete index object.
If this function fails to read the specified index file, it attempts to create a new index based on the current data file and index specification. The new index will be written under the old name.
This function returns nil if it fails to create an index. It captures and absorbs exceptions in most cases.
c | a pointer to a ibis::column object. This argument must be present. | |
name | a name, it can be the name of the index file, the data file, or the directory containing the data file. If the name ends with '.idx' is treated as an index file and the content of the file is read. If the name does not end with '.idx', it is assumed to be the data file name unless it is determined to be a directory name. If it is a directory name, the data file is assumed to be in the specified directory with a file name that is same as the column name. Once a data file is found, the content of the data file is read to construct a new index according to the return value of function indexSpec. The argument name can be nil, in which case, the data file name is constructed by concatenate the return of partition()->currentDataDir() and the column name. |
spec | the index specification. This string contains the parameters for how to create an index. The most general form is <binning .../> <encoding .../> <compression .../>. <binning none /> <encoding equality /> |
spec
is not specified, this function checks the specification in the following order.
inEntirety | If this value is greater than zero, this function will attempt to read the whole index file in one shot. In cases where there is enough memory to hold all indexes in-memory, this option may reduce I/O overhead. Additionally, it places all bitmaps in an index consecutively in memory, which may also speed up memory accesses when the bitmaps are used to answer queries. Of course, the drawback is that this option requires more memory than necessary and may actually take more time to read the data files since many of the bitmaps may not be actually needed. The default value of this argument is 0, which allows bitmaps to be read into memory as needed. |
References AMBIT, BAK, BAK2, ibis::fileManager::storage::begin(), BINNING, ibis::util::logger::buffer(), BYLT, ibis::BYTE, ibis::CATEGORY, col, ibis::column::computeMinMax(), ibis::horometer::CPUTime(), ibis::part::currentDataDir(), DIREKTE, ibis::DOUBLE, EGALE, ENTRE, FADE, ibis::FLOAT, FUGE, FUZZ, getNRows(), ibis::gParameters(), ibis::gVerbose, ibis::part::indexSpec(), ibis::column::indexSpec(), ibis::fileManager::instance(), ibis::INT, ibis::resource::isTrue(), KEYWORDS, ibis::column::logMessage(), ibis::column::logWarning(), ibis::LONG, ibis::column::lowerBound(), MESA, MOINS, name(), ibis::column::name(), ibis::part::name(), ibis::part::nRows(), PACK, PALE, ibis::column::partition(), print(), RANGE, read(), ibis::horometer::realTime(), RELIC, SAPID, SBIAD, ibis::SHORT, SLICE, ibis::horometer::start(), ibis::horometer::stop(), str, ibis::TEXT, ibis::fileManager::tryGetFile(), ibis::column::type(), ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::column::upperBound(), ibis::USHORT, ibis::bad_alloc::what(), write(), ZONA, and ZONE.
Referenced by ibis::column::append(), ibis::mensa::buildIndex(), ibis::part::loadIndex(), and ibis::column::loadIndex().
void ibis::index::divideCounts | ( | array_t< uint32_t > & | bdry, | |
const array_t< uint32_t > & | cnt | |||
) | [static] |
Determine how to split the array cnt
, so that each group has roughly the same total value.
bdry
stores the dividers. The first group is [0, bdry[0]), the second group is [bdry[0], bdry[1]), and so on. Ths function uses the size of array bdry
to determine the number of groups to use. References array_t< T >::back(), array_t< T >::begin(), ibis::util::logger::buffer(), array_t< T >::clear(), array_t< T >::empty(), array_t< T >::end(), ibis::gVerbose, array_t< T >::push_back(), array_t< T >::resize(), and array_t< T >::size().
Referenced by ibis::part::adaptive2DBins(), ibis::part::adaptive3DBins(), ibis::part::adaptiveFloats(), ibis::part::adaptiveFloatsDetailed(), ibis::part::adaptiveInts(), ibis::part::adaptiveIntsDetailed(), ibis::part::coarsenBins(), ibis::part::get2DDistributionI(), ibis::part::getCumulativeDistribution(), ibis::part::getDistribution(), and ibis::bin::scanAndPartition().
int64_t ibis::index::estimate | ( | const ibis::index & | idx2, | |
const ibis::rangeJoin & | expr, | |||
const ibis::bitvector & | mask | |||
) | const [virtual] |
Estimate an upper bound for the number of pairs produced from marked records.
References ibis::bitvector::cnt(), col, ibis::gVerbose, ibis::part::nRows(), and ibis::column::partition().
void ibis::index::estimate | ( | const ibis::index & | idx2, | |
const ibis::rangeJoin & | expr, | |||
const ibis::bitvector & | mask, | |||
ibis::bitvector64 & | lower, | |||
ibis::bitvector64 & | upper | |||
) | const [virtual] |
Estimate the pairs for the range join operator.
Only records that are masked are evaluated.
References ibis::bitvector64::clear(), col, ibis::gVerbose, ibis::part::nRows(), ibis::column::partition(), and ibis::bitvector64::set().
void ibis::index::estimate | ( | const ibis::qDiscreteRange & | expr, | |
ibis::bitvector & | lower, | |||
ibis::bitvector & | upper | |||
) | const [virtual] |
Estimate the hits for discrete ranges, i.e., those translated from 'a IN (x, y, .
A trivial implementation to indicate the index can not determine any row.
.)'.
Reimplemented in ibis::direkte, and ibis::relic.
References col, ibis::qDiscreteRange::colName(), ibis::gVerbose, ibis::part::nRows(), ibis::column::partition(), and ibis::bitvector::set().
virtual void ibis::index::estimate | ( | const ibis::qContinuousRange & | expr, | |
ibis::bitvector & | lower, | |||
ibis::bitvector & | upper | |||
) | const [inline, virtual] |
Computes an approximation of hits as a pair of lower and upper bounds.
expr | the query expression to be evaluated. | |
lower | a bitvector marking a subset of the hits. All rows marked with one (1) are definitely hits. | |
upper | a bitvector marking a superset of the hits. All hits are marked with one, but some of the rows marked one may not be hits. If the variable upper is empty, the variable lower is assumed to contain the exact answer. |
Reimplemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, ibis::entre, ibis::direkte, ibis::keywords, ibis::relic, and ibis::slice.
References nrows, and ibis::bitvector::set().
Referenced by ibis::column::estimateRange(), ibis::column::evaluateRange(), and ibis::category::search().
virtual long ibis::index::evaluate | ( | const ibis::qDiscreteRange & | , | |
ibis::bitvector & | ||||
) | const [inline, virtual] |
To evaluate the exact hits.
On success, return the number of hits, otherwise a negative value is returned.
Reimplemented in ibis::direkte, ibis::relic, ibis::slice, ibis::fade, ibis::sbiad, and ibis::sapid.
virtual long ibis::index::evaluate | ( | const ibis::qContinuousRange & | expr, | |
ibis::bitvector & | hits | |||
) | const [pure virtual] |
To evaluate the exact hits.
On success, return the number of hits, otherwise a negative value is returned.
Implemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, ibis::entre, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, ibis::fade, ibis::sbiad, ibis::sapid, ibis::fuzz, ibis::bylt, and ibis::zona.
Referenced by ibis::part::coarsenBins(), and ibis::part::get2DDistributionI().
virtual int ibis::index::expandRange | ( | ibis::qContinuousRange & | rng | ) | const [inline, virtual] |
The functions expandRange and contractRange expands or contracts the boundaries of a range condition so that the new range will have exact answers using the function estimate.
The default implementation provided does nothing since this is only meaningful for indices based on bins.
Reimplemented in ibis::bin, ibis::range, ibis::bak, and ibis::bak2.
Referenced by ibis::column::expandRange().
long ibis::index::getCumulativeDistribution | ( | std::vector< double > & | bds, | |
std::vector< uint32_t > & | cts | |||
) | const [virtual] |
Cumulative distribution of the data.
A brute-force approach to get an accurate cumulative distribution.
Reimplemented in ibis::bin, ibis::direkte, and ibis::relic.
long ibis::index::getDistribution | ( | std::vector< double > & | bbs, | |
std::vector< uint32_t > & | cts | |||
) | const [virtual] |
Binned distribution of the data.
A brute-force approach to get an accurate distribution.
Reimplemented in ibis::bin, ibis::direkte, and ibis::relic.
virtual double ibis::index::getSum | ( | ) | const [inline, virtual] |
Compute the approximate sum of all the values indexed.
If it decides that computing the sum directly from the vertical partition is more efficient, it will return NaN immediately.
Reimplemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pack, ibis::egale, ibis::moins, ibis::entre, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, and ibis::fade.
void ibis::index::indexFileName | ( | std::string & | iname, | |
const ibis::column * | col1, | |||
const ibis::column * | col2, | |||
const char * | dir = 0 | |||
) | [static, protected] |
Generate the index file name for the composite index fromed on two columns.
May use argument "dir" if it is not null.
References ibis::part::currentDataDir(), ibis::column::name(), and ibis::column::partition().
bool ibis::index::isIndex | ( | const char * | f, | |
INDEX_TYPE | t | |||
) | [static] |
Read the header of the named file to determine if it contains an index of the specified type.
Returns true if the correct header is found, else return false.
Referenced by ibis::bak2::read(), and ibis::bak::read().
void ibis::index::mapValues | ( | const char * | f, | |
histogram & | hist, | |||
uint32_t | count = 0 | |||
) | const [protected] |
Generate a histogram.
Compute a histogram of a column.
Given a property file containing the values of a column, this function counts the occurances of each distinct values. Argument count
is the number of samples to be used for building the histogram. If it is zero or greater than half of the number of values in the data files, all values are used, otherwise, approximately count
values will be sampled with nearly uniform distances from each other.
References ibis::bitvector::adjustSize(), ibis::bitvector::appendFill(), ibis::bitvector::bitsPerLiteral(), ibis::util::logger::buffer(), ibis::BYTE, ibis::CATEGORY, col, dataFileName(), ibis::DOUBLE, ibis::bitvector::firstIndexSet(), ibis::FLOAT, ibis::fileManager::getFile(), ibis::column::getNullMask(), ibis::gVerbose, ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::INT, ibis::bitvector::indexSet::isRange(), ibis::column::logMessage(), ibis::column::logWarning(), ibis::LONG, ibis::bitvector::indexSet::nIndices(), ibis::part::nRows(), ibis::column::partition(), ibis::horometer::realTime(), ibis::SHORT, array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::column::type(), ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
void ibis::index::mapValues | ( | const char * | f, | |
VMap & | bmap | |||
) | const [protected] |
Map the positions of each individual value.
Map the locations of the values of one column.
Given a file containing the values of a column, this function maps the position of each individual values and stores the result in a set of bitmaps.
References ibis::bitvector::adjustSize(), ibis::bitvector::bitsPerLiteral(), ibis::util::logger::buffer(), ibis::BYTE, ibis::CATEGORY, col, dataFileName(), ibis::DOUBLE, ibis::bitvector::firstIndexSet(), ibis::FLOAT, ibis::fileManager::getFile(), ibis::part::getMask(), ibis::column::getNullMask(), ibis::part::getState(), ibis::gVerbose, ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::INT, ibis::bitvector::indexSet::isRange(), ibis::column::logMessage(), ibis::column::logWarning(), ibis::LONG, ibis::bitvector::indexSet::nIndices(), ibis::part::nRows(), ibis::column::partition(), ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::SHORT, ibis::bitvector::size(), array_t< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::column::type(), ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
void ibis::index::mapValues | ( | const array_t< E1 > & | val1, | |
const array_t< E2 > & | val2, | |||
array_t< E1 > & | bnd1, | |||
array_t< E2 > & | bnd2, | |||
std::vector< uint32_t > & | cnts | |||
) | [inline, static] |
Compute a two-dimensional histogram.
Given two arrays of the same size, count the number of appearance of each combinations defined by bnd1
and bnd2
. If the arrays bnd1
or bnd2
contain values in ascending order, their values are directly used in this function. The empty one will be replaced by a linear division of actual range into 256 bins. The array counts
stores the 2-D bins in raster-scan order with the second variable, val2
, as the faster varying variables. More specifically, the bins for variable 1 are defined as:
/// (..., bnd1[0]) [bnd1[1], bin1[2]) [bnd1[2], bin1[3) ... [bnd1.back(), ...) ///
/// (..., bnd2[0]) [bnd2[1], bin2[2]) [bnd2[2], bin2[3) ... [bnd2.back(), ...) ///
counts
are for the following bins /// (..., bin1[0]) (.... bin2[0]) /// (..., bin1[0]) [bin2[0], bin2[1]) /// (..., bin1[0]) [bin2[1], bin2[2]) /// ... /// (..., bin1[0]) [bin2.back(), ...) /// [bin1[0], bin1[1]) (..., bin2[0]) /// [bin1[0], bin1[1]) [bin2[0], bin2[1]) /// [bin1[0], bin1[1]) [bin2[1], bin2[2]) /// ... /// [bin1[0], bin1[1]) [bin2.back(), ...) /// ... ///
References array_t< T >::empty(), array_t< T >::find(), array_t< T >::front(), array_t< T >::push_back(), array_t< T >::reserve(), and array_t< T >::size().
virtual const char* ibis::index::name | ( | ) | const [pure virtual] |
Returns the name of the index, similar to the function type
, but returns a string instead.
Implemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, ibis::entre, ibis::bak, ibis::bak2, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, ibis::fade, ibis::sbiad, ibis::sapid, ibis::fuzz, ibis::bylt, and ibis::zona.
Referenced by create().
void ibis::index::optionalUnpack | ( | std::vector< ibis::bitvector * > & | bits, | |
const char * | opt | |||
) | [protected] |
A function to decide whether to uncompress the bitvectors.
Decide whether to uncompress the bitmaps.
References col, ibis::gParameters(), ibis::column::name(), ibis::part::name(), nrows, and ibis::column::partition().
Referenced by ibis::bak2::append(), ibis::bak::append(), ibis::bin::bin(), ibis::relic::construct(), and ibis::mesa::mesa().
virtual void ibis::index::print | ( | std::ostream & | out | ) | const [pure virtual] |
Prints human readable information.
Outputs information about the index as text to the specified output stream.
Implemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, ibis::entre, ibis::bak, ibis::bak2, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, ibis::fade, ibis::sbiad, ibis::sapid, ibis::fuzz, ibis::bylt, and ibis::zona.
Referenced by ibis::column::append(), create(), and ibis::column::loadIndex().
virtual int ibis::index::read | ( | ibis::fileManager::storage * | st | ) | [pure virtual] |
Reconstructs an index from an array of bytes.
Intended for internal use only!
Implemented in ibis::bin, ibis::range, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, ibis::fade, ibis::fuzz, ibis::bylt, and ibis::zona.
virtual int ibis::index::read | ( | const char * | name | ) | [pure virtual] |
Reconstructs an index from the named file.
The name can be the directory containing an index file. In this case, the name of the index file must be the name of the column followed by ".idx" suffix.
Implemented in ibis::bin, ibis::range, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::bak, ibis::bak2, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, ibis::fade, ibis::fuzz, ibis::bylt, and ibis::zona.
Referenced by create().
void ibis::index::setBases | ( | array_t< uint32_t > & | bases, | |
uint32_t | card, | |||
uint32_t | nbase = 2 | |||
) | [static] |
Fill the array bases with the values that cover the range [0, card).
Assumes at least two components. For one component case use indices defined explicit for one component cases.
References array_t< T >::resize().
void ibis::index::sumBins | ( | uint32_t | ib, | |
uint32_t | ie, | |||
ibis::bitvector & | res, | |||
uint32_t | ib0, | |||
uint32_t | ie0 | |||
) | const [protected] |
Compute a new sum for bit vectors [ib, ie) by taking advantage of the old sum for bitvectors [ib0, ie0).
This function attempts to take advantage of existing results of a previously computed sum.
References activate(), bits, ibis::bitvector::bytes(), ibis::bitvector::cnt(), col, ibis::gVerbose, ibis::column::name(), nrows, offsets, array_t< T >::size(), ibis::bitvector::size(), and sumBins().
void ibis::index::sumBins | ( | uint32_t | ib, | |
uint32_t | ie, | |||
ibis::bitvector & | res | |||
) | const [protected] |
Compute the bitwise OR of all bitvectors (in bits) from ib to ie.
Sum up bits[ib:ie-1]
and place the result in res.
As usual, bits[ib] is included but bits[ie] is excluded.
The bitmaps (bits) are held by this index object and may be regenerated as needed. It uses the combined strategy that was determined in previous tests. The basic rule is as follows.
References activate(), bits, ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), array_t< T >::clear(), ibis::bitvector::cnt(), col, ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::bitvector::flip(), fname, ibis::gVerbose, ibis::column::name(), nrows, offsets, ibis::horometer::realTime(), array_t< T >::resize(), ibis::bitvector::set(), array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), str, and ibis::bitvector::swap().
Referenced by ibis::zone::estimate(), ibis::pale::estimate(), ibis::fuge::estimate(), ibis::direkte::estimate(), ibis::bin::estimate(), ibis::zona::evaluate(), ibis::fuzz::evaluate(), ibis::bylt::evaluate(), ibis::relic::evaluate(), ibis::direkte::evaluate(), ibis::bin::evaluate(), and sumBins().
void ibis::index::sumBits | ( | const std::vector< ibis::bitvector * > & | bts, | |
const ibis::bitvector & | tot, | |||
uint32_t | ib, | |||
uint32_t | ie, | |||
ibis::bitvector & | res | |||
) | [static] |
Sum up bts
[ib:ie-1] and add the result to res
.
It is assumed that all bts add up to tot
. In the other version of sumBits without this argument tot
, it was assumed that all bitmaps add up to a bit vector of all ones. The decision of whether to use bts[ib:ie-1] directly or use the subtractive version (using bts[0:ib-1] and bts[ie:nobs-1]) are based on the number of bit vectors.
References ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::gVerbose, ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
void ibis::index::sumBits | ( | const std::vector< ibis::bitvector * > & | bts, | |
uint32_t | ib, | |||
uint32_t | ie, | |||
ibis::bitvector & | res | |||
) | [static] |
Sum up bts
[ib:ie-1] and place the result in res
.
This function always uses the operator |=. Tests show that using the function setBit
is always slower.
References ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::bitvector::flip(), ibis::gVerbose, ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), and ibis::bitvector::swap().
Referenced by ibis::part::adaptiveFloatsDetailed(), ibis::part::adaptiveIntsDetailed(), and ibis::mesa::mesa().
virtual float ibis::index::undecidable | ( | const ibis::qContinuousRange & | expr, | |
ibis::bitvector & | iffy | |||
) | const [inline, virtual] |
Mark the position of the rows that can not be decided with this index.
expr | the range conditions to be evaluated. | |
iffy | the bitvector marking the positions of rows that can not be decided using the index. Return value is the expected fraction of undecided rows that might satisfy the range conditions. |
Reimplemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::egale, ibis::direkte, ibis::keywords, and ibis::relic.
Referenced by ibis::column::getUndecidable().
virtual int ibis::index::write | ( | const char * | name | ) | const [pure virtual] |
Save index to a file.
Outputs the index in a compact binary format to the named file or directory. The index file contains a header that can be identified by the function isIndex.
Implemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, ibis::entre, ibis::bak, ibis::bak2, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, ibis::fade, ibis::sbiad, ibis::sapid, ibis::fuzz, ibis::bylt, and ibis::zona.
Referenced by ibis::column::append(), and create().
ibis::fileManager::storage* ibis::index::str [mutable, protected] |
The underlying storage.
It may be nil if bitvectors are not from a storage object managed by the file manager.
Referenced by activate(), addBins(), ibis::bin::bin(), clear(), create(), ibis::pack::getSum(), ibis::relic::getSum(), ibis::mesa::getSum(), ibis::moins::getSum(), ibis::entre::getSum(), ibis::egale::getSum(), ibis::fuge::read(), ibis::ambit::read(), ibis::relic::read(), ibis::keywords::read(), ibis::fade::read(), ibis::direkte::read(), ibis::egale::read(), ibis::bin::read(), ibis::relic::relic(), sumBins(), ibis::sbiad::write(), ibis::sapid::write(), ibis::relic::write(), ibis::range::write(), ibis::mesa::write(), ibis::keywords::write(), ibis::fade::write(), ibis::direkte::write(), ibis::bak2::write(), ibis::bak::write(), ibis::moins::write(), ibis::entre::write(), and ibis::egale::write().