ibis::index Class Reference
[FastBit IBIS key implementation objects.]

The base index class. More...

#include <index.h>

Inheritance diagram for ibis::index:

ibis::bin ibis::direkte ibis::keywords ibis::relic ibis::ambit ibis::bak ibis::bak2 ibis::egale ibis::fuge ibis::mesa ibis::pack ibis::pale ibis::range ibis::zone ibis::bylt ibis::fade ibis::fuzz ibis::slice ibis::zona

List of all members.

Public Types

typedef std::map< double,
uint32_t > 
histogram
enum  INDEX_TYPE {
  BINNING = 0, RANGE, MESA, AMBIT,
  PALE, PACK, ZONE, RELIC,
  ROSTER, SLICE, FADE, SBIAD,
  SAPID, EGALE, MOINS, ENTRE,
  BAK, BAK2, KEYWORDS, MESH,
  BAND, DIREKTE, GENERIC, BYLT,
  FUZZ, ZONA, FUGE, EXTERN
}
 The integer values of this enum type are used in the index files to differentiate the indexes. More...
typedef std::map< double,
ibis::bitvector * > 
VMap

Public Member Functions

virtual long append (const char *dt, const char *df, uint32_t nnew)
 Extend the index.
virtual void binBoundaries (std::vector< double > &) const
 The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively.
virtual void binWeights (std::vector< uint32_t > &) const
virtual int contractRange (ibis::qContinuousRange &rng) const
virtual int64_t estimate (const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const
virtual void estimate (const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const
 Evaluating a join condition with one (likely composite) index.
virtual int64_t estimate (const ibis::index &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const
virtual int64_t estimate (const ibis::index &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask) const
 Estimate an upper bound for the number of pairs produced from marked records.
virtual int64_t estimate (const ibis::index &idx2, const ibis::rangeJoin &expr) const
 Estimate an upper bound for the number of pairs.
virtual void estimate (const ibis::index &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const
virtual void estimate (const ibis::index &idx2, const ibis::rangeJoin &expr, const ibis::bitvector &mask, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const
 Estimate the pairs for the range join operator.
virtual void estimate (const ibis::index &idx2, const ibis::rangeJoin &expr, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const
 Estimate the pairs for the range join operator.
virtual uint32_t estimate (const ibis::qDiscreteRange &expr) const
virtual void estimate (const ibis::qDiscreteRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const
 Estimate the hits for discrete ranges, i.e., those translated from 'a IN (x, y, .
virtual uint32_t estimate (const ibis::qContinuousRange &expr) const
 Returns an upper bound on the number of hits.
virtual void estimate (const ibis::qContinuousRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const
 Computes an approximation of hits as a pair of lower and upper bounds.
virtual double estimateCost (const ibis::qDiscreteRange &expr) const
virtual double estimateCost (const ibis::qContinuousRange &expr) const
 Estimate the code of evaluate a range condition.
virtual long evaluate (const ibis::qDiscreteRange &, ibis::bitvector &) const
 To evaluate the exact hits.
virtual long evaluate (const ibis::qContinuousRange &expr, ibis::bitvector &hits) const =0
 To evaluate the exact hits.
virtual int expandRange (ibis::qContinuousRange &rng) const
 The functions expandRange and contractRange expands or contracts the boundaries of a range condition so that the new range will have exact answers using the function estimate.
virtual const ibis::bitvectorgetBitvector (uint32_t i) const
 Return a pointer to the ith bitvector used in the index (may be 0).
virtual long getCumulativeDistribution (std::vector< double > &bds, std::vector< uint32_t > &cts) const
 Cumulative distribution of the data.
virtual long getDistribution (std::vector< double > &bbs, std::vector< uint32_t > &cts) const
 Binned distribution of the data.
virtual double getMax () const
 The maximum value recorded in the index.
virtual double getMin () const
 The minimum value recorded in the index.
virtual uint32_t getNRows () const
virtual double getSum () const
 Compute the approximate sum of all the values indexed.
virtual const char * name () const =0
 Returns the name of the index, similar to the function type, but returns a string instead.
virtual uint32_t numBitvectors () const
 Returns the number of bit vectors used by the index.
virtual void print (std::ostream &out) const =0
 Prints human readable information.
virtual int read (ibis::fileManager::storage *st)=0
 Reconstructs an index from an array of bytes.
virtual int read (const char *name)=0
 Reconstructs an index from the named file.
virtual void speedTest (std::ostream &out) const
 Time some logical operations and print out their speed.
virtual INDEX_TYPE type () const =0
 Returns an index type identifier.
virtual float undecidable (const ibis::qDiscreteRange &expr, ibis::bitvector &iffy) const
virtual float undecidable (const ibis::qContinuousRange &expr, ibis::bitvector &iffy) const
 Mark the position of the rows that can not be decided with this index.
virtual int write (const char *name) const =0
 Save index to a file.
virtual ~index ()
 The destructor.

Static Public Member Functions

static void addBits (const std::vector< ibis::bitvector * > &bits, uint32_t ib, uint32_t ie, ibis::bitvector &res)
 Add the bts[ib:ie-1] to res.
static indexcreate (const column *c, const char *name=0, const char *spec=0, int inEntirety=0)
 Index factory.
static void divideCounts (array_t< uint32_t > &bounds, const array_t< uint32_t > &cnt)
 Determine how to split the array cnt, so that each group has roughly the same total value.
static bool isIndex (const char *f, INDEX_TYPE t)
 Read the header of the named file to determine if it contains an index of the specified type.
template<typename E1, typename E2>
static void mapValues (const array_t< E1 > &val1, const array_t< E2 > &val2, array_t< E1 > &bnd1, array_t< E2 > &bnd2, std::vector< uint32_t > &cnts)
 Compute a two-dimensional histogram.
template<typename E>
static void mapValues (const array_t< E > &val, array_t< E > &bounds, std::vector< uint32_t > &cnts)
template<typename E>
static void mapValues (const array_t< E > &val, histogram &hist, uint32_t count=0)
template<typename E>
static void mapValues (const array_t< E > &val, VMap &bmap)
static void setBases (array_t< uint32_t > &bases, uint32_t card, uint32_t nbase=2)
 Fill the array bases with the values that cover the range [0, card).
static void sumBits (const std::vector< ibis::bitvector * > &bits, const ibis::bitvector &tot, uint32_t ib, uint32_t ie, ibis::bitvector &res)
 Sum up bts[ib:ie-1] and add the result to res.
static void sumBits (const std::vector< ibis::bitvector * > &bits, uint32_t ib, uint32_t ie, ibis::bitvector &res)
 Sum up bts[ib:ie-1] and place the result in res.

Protected Member Functions

virtual void activate (uint32_t i, uint32_t j) const
 Regenerate bitvectors i (inclusive) through j (exclusive) from the underlying storage.
virtual void activate (uint32_t i) const
 Regenerate the ith bitvector from the underlying storage.
virtual void activate () const
 Regenerate all bitvectors from the underlying storage.
void addBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, const ibis::bitvector &tot) const
 Compute the sum of bit vectors [ib, ie).
void addBins (uint32_t ib, uint32_t ie, ibis::bitvector &res) const
 Add the sum of bits[ib] through bits[ie-1] to res.
virtual void clear ()
 Clear the existing content.
void computeMinMax (const char *f, double &min, double &max) const
void dataFileName (const char *f, std::string &name) const
 Generate data file name from "f".
 index (const ibis::column *c=0, ibis::fileManager::storage *s=0)
 Protect the constructor so that ibis::index can not be instantiated directly.
void indexFileName (const char *f, std::string &name) const
 Generate index file name from "f".
void mapValues (const char *f, histogram &hist, uint32_t count=0) const
 Generate a histogram.
void mapValues (const char *f, VMap &bmap) const
 Map the positions of each individual value.
void optionalUnpack (std::vector< ibis::bitvector * > &bits, const char *opt)
 A function to decide whether to uncompress the bitvectors.
void sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, uint32_t ib0, uint32_t ie0) const
 Compute a new sum for bit vectors [ib, ie) by taking advantage of the old sum for bitvectors [ib0, ie0).
void sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res) const
 Compute the bitwise OR of all bitvectors (in bits) from ib to ie.

Static Protected Member Functions

static void indexFileName (std::string &name, const ibis::column *col1, const ibis::column *col2, const char *f=0)
 Generate the index file name for the composite index fromed on two columns.

Protected Attributes

std::vector< ibis::bitvector * > bits
 A list of bitvectors.
const ibis::columncol
 Pointer to the column this index is for.
const char * fname
 The name of the file containing the index.
uint32_t nrows
 The number of rows represented by the index.
array_t< int32_t > offsets
 Starting positions of the bitvectors.
ibis::fileManager::storagestr
 The underlying storage.

Classes

class  barrel
 A specialization that adds function setValue. More...


Detailed Description

The base index class.

Class ibis::index contains the common definitions and virtual functions of the class hierarchy. It is assumed that an ibis::index is for only one column. The user is to create an new index through the function ibis::index::create and only use the functions defined in this class.


Member Enumeration Documentation

The integer values of this enum type are used in the index files to differentiate the indexes.

Reordering the list will make some index files invalid ****

Enumerator:
BINNING  ibis::bin.
RANGE  ibis::range.
MESA  ibis::interval.
AMBIT  ibis::ambit, range-range two level encoding on bins.
PALE  ibis::pale, equality-range encoding on bins.
PACK  ibis::pack, range-equality encoding on bins.
ZONE  ibis::zone, equality-equality encoding on bins.
RELIC  ibis::relic, the basic bitmap index.
ROSTER  ibis::roster, RID list.
SLICE  ibis::slice, bit-sliced index.
FADE  ibis::fade, multicomponent range encoding (unbinned).
SBIAD  ibis::sbiad, multicomponent interval encoding (unbinned).
SAPID  ibis::sapid, multicomponent equality encoding (unbinned).
EGALE  ibis::egale, multicomponent equality encoding on bins.
MOINS  ibis::moins, multicomponent range encoding on bins.
ENTRE  ibis::entre, multicomponent interval encoding on bins.
BAK  ibis::bak, reduced precision mapping, equality code.
BAK2  ibis::bak2, splits each BAK bin in two, one less than the mapped value, one greater and equal to the mapped value.

KEYWORDS  ibis::keywords, boolean term-document matrix.
MESH  not used.
BAND  not used.
DIREKTE  ibis::direkte, hash value to bitmaps.
GENERIC  not used.
BYLT  ibis::bylt, unbinned range-equality encoding.
FUZZ  ibis::fuzz, unbinned interval-equality encoding.
ZONA  ibis::zona, unbinned equality-equality encoding.
FUGE  ibis::fuge, binned interval-equality encoding.
EXTERN  externally defined index


Constructor & Destructor Documentation

ibis::index::index ( const ibis::column c = 0,
ibis::fileManager::storage s = 0 
) [inline, protected]

Protect the constructor so that ibis::index can not be instantiated directly.

It can not be instantiated because some functions are pure virtual, but this also reduces the size of public interface.


Member Function Documentation

void ibis::index::activate ( uint32_t  i,
uint32_t  j 
) const [protected, virtual]

Regenerate bitvectors i (inclusive) through j (exclusive) from the underlying storage.

References bits, col, fname, ibis::gVerbose, ibis::column::logWarning(), ibis::column::name(), nrows, offsets, array_t< T >::size(), and str.

void ibis::index::addBins ( uint32_t  ib,
uint32_t  ie,
ibis::bitvector res,
const ibis::bitvector tot 
) const [protected]

Compute the sum of bit vectors [ib, ie).

This is basically a copy of the function sumBins (without the 4th arguments).

If computing a complement is faster, assume all bit vectors add up to tot.

There are two changes: (1) if res has the same number of bits as tot, the new sum is added to the existing bitvector, and (2) when it computes the sum through complements, it performs a subtraction from tot.

References activate(), ibis::bitvector::adjustSize(), bits, ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), array_t< T >::clear(), ibis::bitvector::cnt(), col, ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), array_t< T >::empty(), fname, ibis::gVerbose, ibis::column::name(), nrows, offsets, ibis::horometer::realTime(), array_t< T >::resize(), ibis::bitvector::set(), array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), str, and ibis::bitvector::swap().

void ibis::index::addBins ( uint32_t  ib,
uint32_t  ie,
ibis::bitvector res 
) const [protected]

Add the sum of bits[ib] through bits[ie-1] to res.

The most important difference between this function and sumBins is that this function always use bits[ib] through bits[ie-1].

Always explicitly use bits[ib] through bits[ie-1].

This is similar to the function addBits.

References activate(), ibis::bitvector::adjustSize(), bits, ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::cnt(), col, ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::gVerbose, ibis::column::name(), nrows, ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().

Referenced by ibis::fuge::estimate(), ibis::zona::evaluate(), ibis::fuzz::evaluate(), and ibis::bylt::evaluate().

void ibis::index::addBits ( const std::vector< ibis::bitvector * > &  bts,
uint32_t  ib,
uint32_t  ie,
ibis::bitvector res 
) [static]

Add the bts[ib:ie-1] to res.

Since the set of bit vectors are explicitly given, there is no need to perform activation. To minimize the burden of deciding which bit vectors to activate, this function always use the bts[ib] through bts[ie-1].

Note:
The caller need to activate the bit vectors!

This function still has to check whether a particular bts[i] is a null pointer before using the bit vector.

References ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::gVerbose, ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().

Referenced by ibis::zona::evaluate().

virtual void ibis::index::binBoundaries ( std::vector< double > &   )  const [inline, virtual]

The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively.

Reimplemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::egale, ibis::bak, ibis::bak2, ibis::direkte, ibis::keywords, and ibis::relic.

Referenced by ibis::part::coarsenBins(), ibis::part::get2DDistributionI(), and ibis::column::preferredBounds().

void ibis::index::clear (  )  [protected, virtual]

ibis::index * ibis::index::create ( const column c,
const char *  dfname = 0,
const char *  spec = 0,
int  inEntirety = 0 
) [static]

Index factory.

It creates a specific concrete index object.

If this function fails to read the specified index file, it attempts to create a new index based on the current data file and index specification. The new index will be written under the old name.

This function returns nil if it fails to create an index. It captures and absorbs exceptions in most cases.

Parameters:
c a pointer to a ibis::column object. This argument must be present.
name a name, it can be the name of the index file, the data file, or the directory containing the data file. If the name ends with '.idx' is treated as an index file and the content of the file is read. If the name does not end with '.idx', it is assumed to be the data file name unless it is determined to be a directory name. If it is a directory name, the data file is assumed to be in the specified directory with a file name that is same as the column name. Once a data file is found, the content of the data file is read to construct a new index according to the return value of function indexSpec. The argument name can be nil, in which case, the data file name is constructed by concatenate the return of partition()->currentDataDir() and the column name.
Note:
Set name to null to build a brand new index and discard the existing index.
Parameters:
spec the index specification. This string contains the parameters for how to create an index. The most general form is
    <binning .../> <encoding .../> <compression .../>.
   
Here is one example (it is the default for some integer columns)
    <binning none /> <encoding equality />
   
FastBit always compresses every bitmap it ever generates. The compression option is to instruct it to uncompress some bitmaps or not compress indices at all. The compress option is usually not used.
If the argument spec is not specified, this function checks the specification in the following order.
  1. use the index specification for the column being indexed;
  2. use the index specification for the table containing the column being indexed;
  3. use the most specific index specification relates to the column be indexed in the global resources (gParameters).
It stops looking as soon as it finds the first non-empty string. To override any general index specification, one must provide a complete index specification string.

Parameters:
inEntirety If this value is greater than zero, this function will attempt to read the whole index file in one shot. In cases where there is enough memory to hold all indexes in-memory, this option may reduce I/O overhead. Additionally, it places all bitmaps in an index consecutively in memory, which may also speed up memory accesses when the bitmaps are used to answer queries. Of course, the drawback is that this option requires more memory than necessary and may actually take more time to read the data files since many of the bitmaps may not be actually needed. The default value of this argument is 0, which allows bitmaps to be read into memory as needed.
Note:
An index can not be built correctly if it does not fit in memory! This is the most likely reason for failure in this function. If this does happen, try to build indexes one at a time, use a machine with more memory, or break up a large partition into a number of smaller ones. Normally, we recommand one to not put much more than 100 million rows in a data partition.

References AMBIT, BAK, BAK2, ibis::fileManager::storage::begin(), BINNING, ibis::util::logger::buffer(), BYLT, ibis::BYTE, ibis::CATEGORY, col, ibis::column::computeMinMax(), ibis::horometer::CPUTime(), ibis::part::currentDataDir(), DIREKTE, ibis::DOUBLE, EGALE, ENTRE, FADE, ibis::FLOAT, FUGE, FUZZ, getNRows(), ibis::gParameters(), ibis::gVerbose, ibis::part::indexSpec(), ibis::column::indexSpec(), ibis::fileManager::instance(), ibis::INT, ibis::resource::isTrue(), KEYWORDS, ibis::column::logMessage(), ibis::column::logWarning(), ibis::LONG, ibis::column::lowerBound(), MESA, MOINS, name(), ibis::column::name(), ibis::part::name(), ibis::part::nRows(), PACK, PALE, ibis::column::partition(), print(), RANGE, read(), ibis::horometer::realTime(), RELIC, SAPID, SBIAD, ibis::SHORT, SLICE, ibis::horometer::start(), ibis::horometer::stop(), str, ibis::TEXT, ibis::fileManager::tryGetFile(), ibis::column::type(), ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::column::upperBound(), ibis::USHORT, ibis::bad_alloc::what(), write(), ZONA, and ZONE.

Referenced by ibis::column::append(), ibis::mensa::buildIndex(), ibis::part::loadIndex(), and ibis::column::loadIndex().

void ibis::index::divideCounts ( array_t< uint32_t > &  bdry,
const array_t< uint32_t > &  cnt 
) [static]

Determine how to split the array cnt, so that each group has roughly the same total value.

Note:
The array bdry stores the dividers. The first group is [0, bdry[0]), the second group is [bdry[0], bdry[1]), and so on. Ths function uses the size of array bdry to determine the number of groups to use.

References array_t< T >::back(), array_t< T >::begin(), ibis::util::logger::buffer(), array_t< T >::clear(), array_t< T >::empty(), array_t< T >::end(), ibis::gVerbose, array_t< T >::push_back(), array_t< T >::resize(), and array_t< T >::size().

Referenced by ibis::part::adaptive2DBins(), ibis::part::adaptive3DBins(), ibis::part::adaptiveFloats(), ibis::part::adaptiveFloatsDetailed(), ibis::part::adaptiveInts(), ibis::part::adaptiveIntsDetailed(), ibis::part::coarsenBins(), ibis::part::get2DDistributionI(), ibis::part::getCumulativeDistribution(), ibis::part::getDistribution(), and ibis::bin::scanAndPartition().

int64_t ibis::index::estimate ( const ibis::index idx2,
const ibis::rangeJoin expr,
const ibis::bitvector mask 
) const [virtual]

Estimate an upper bound for the number of pairs produced from marked records.

References ibis::bitvector::cnt(), col, ibis::gVerbose, ibis::part::nRows(), and ibis::column::partition().

void ibis::index::estimate ( const ibis::index idx2,
const ibis::rangeJoin expr,
const ibis::bitvector mask,
ibis::bitvector64 lower,
ibis::bitvector64 upper 
) const [virtual]

Estimate the pairs for the range join operator.

Only records that are masked are evaluated.

References ibis::bitvector64::clear(), col, ibis::gVerbose, ibis::part::nRows(), ibis::column::partition(), and ibis::bitvector64::set().

void ibis::index::estimate ( const ibis::qDiscreteRange expr,
ibis::bitvector lower,
ibis::bitvector upper 
) const [virtual]

Estimate the hits for discrete ranges, i.e., those translated from 'a IN (x, y, .

A trivial implementation to indicate the index can not determine any row.

.)'.

Reimplemented in ibis::direkte, and ibis::relic.

References col, ibis::qDiscreteRange::colName(), ibis::gVerbose, ibis::part::nRows(), ibis::column::partition(), and ibis::bitvector::set().

virtual void ibis::index::estimate ( const ibis::qContinuousRange expr,
ibis::bitvector lower,
ibis::bitvector upper 
) const [inline, virtual]

Computes an approximation of hits as a pair of lower and upper bounds.

Parameters:
expr the query expression to be evaluated.
lower a bitvector marking a subset of the hits. All rows marked with one (1) are definitely hits.
upper a bitvector marking a superset of the hits. All hits are marked with one, but some of the rows marked one may not be hits. If the variable upper is empty, the variable lower is assumed to contain the exact answer.

Reimplemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, ibis::entre, ibis::direkte, ibis::keywords, ibis::relic, and ibis::slice.

References nrows, and ibis::bitvector::set().

Referenced by ibis::column::estimateRange(), ibis::column::evaluateRange(), and ibis::category::search().

virtual long ibis::index::evaluate ( const ibis::qDiscreteRange ,
ibis::bitvector  
) const [inline, virtual]

To evaluate the exact hits.

On success, return the number of hits, otherwise a negative value is returned.

Reimplemented in ibis::direkte, ibis::relic, ibis::slice, ibis::fade, ibis::sbiad, and ibis::sapid.

virtual long ibis::index::evaluate ( const ibis::qContinuousRange expr,
ibis::bitvector hits 
) const [pure virtual]

To evaluate the exact hits.

On success, return the number of hits, otherwise a negative value is returned.

Implemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, ibis::entre, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, ibis::fade, ibis::sbiad, ibis::sapid, ibis::fuzz, ibis::bylt, and ibis::zona.

Referenced by ibis::part::coarsenBins(), and ibis::part::get2DDistributionI().

virtual int ibis::index::expandRange ( ibis::qContinuousRange rng  )  const [inline, virtual]

The functions expandRange and contractRange expands or contracts the boundaries of a range condition so that the new range will have exact answers using the function estimate.

The default implementation provided does nothing since this is only meaningful for indices based on bins.

Reimplemented in ibis::bin, ibis::range, ibis::bak, and ibis::bak2.

Referenced by ibis::column::expandRange().

long ibis::index::getCumulativeDistribution ( std::vector< double > &  bds,
std::vector< uint32_t > &  cts 
) const [virtual]

Cumulative distribution of the data.

A brute-force approach to get an accurate cumulative distribution.

Reimplemented in ibis::bin, ibis::direkte, and ibis::relic.

long ibis::index::getDistribution ( std::vector< double > &  bbs,
std::vector< uint32_t > &  cts 
) const [virtual]

Binned distribution of the data.

A brute-force approach to get an accurate distribution.

Reimplemented in ibis::bin, ibis::direkte, and ibis::relic.

virtual double ibis::index::getSum (  )  const [inline, virtual]

Compute the approximate sum of all the values indexed.

If it decides that computing the sum directly from the vertical partition is more efficient, it will return NaN immediately.

Reimplemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pack, ibis::egale, ibis::moins, ibis::entre, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, and ibis::fade.

void ibis::index::indexFileName ( std::string &  iname,
const ibis::column col1,
const ibis::column col2,
const char *  dir = 0 
) [static, protected]

Generate the index file name for the composite index fromed on two columns.

May use argument "dir" if it is not null.

References ibis::part::currentDataDir(), ibis::column::name(), and ibis::column::partition().

bool ibis::index::isIndex ( const char *  f,
INDEX_TYPE  t 
) [static]

Read the header of the named file to determine if it contains an index of the specified type.

Returns true if the correct header is found, else return false.

Referenced by ibis::bak2::read(), and ibis::bak::read().

void ibis::index::mapValues ( const char *  f,
histogram &  hist,
uint32_t  count = 0 
) const [protected]

Generate a histogram.

Compute a histogram of a column.

Given a property file containing the values of a column, this function counts the occurances of each distinct values. Argument count is the number of samples to be used for building the histogram. If it is zero or greater than half of the number of values in the data files, all values are used, otherwise, approximately count values will be sampled with nearly uniform distances from each other.

Note:
IMPROTANT ASSUMPTION. A value of any supported type is supposed to be able to fit in a double with no rounding, no approximation and no overflow.

References ibis::bitvector::adjustSize(), ibis::bitvector::appendFill(), ibis::bitvector::bitsPerLiteral(), ibis::util::logger::buffer(), ibis::BYTE, ibis::CATEGORY, col, dataFileName(), ibis::DOUBLE, ibis::bitvector::firstIndexSet(), ibis::FLOAT, ibis::fileManager::getFile(), ibis::column::getNullMask(), ibis::gVerbose, ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::INT, ibis::bitvector::indexSet::isRange(), ibis::column::logMessage(), ibis::column::logWarning(), ibis::LONG, ibis::bitvector::indexSet::nIndices(), ibis::part::nRows(), ibis::column::partition(), ibis::horometer::realTime(), ibis::SHORT, array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::column::type(), ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.

void ibis::index::mapValues ( const char *  f,
VMap &  bmap 
) const [protected]

template<typename E1, typename E2>
void ibis::index::mapValues ( const array_t< E1 > &  val1,
const array_t< E2 > &  val2,
array_t< E1 > &  bnd1,
array_t< E2 > &  bnd2,
std::vector< uint32_t > &  cnts 
) [inline, static]

Compute a two-dimensional histogram.

Given two arrays of the same size, count the number of appearance of each combinations defined by bnd1 and bnd2. If the arrays bnd1 or bnd2 contain values in ascending order, their values are directly used in this function. The empty one will be replaced by a linear division of actual range into 256 bins. The array counts stores the 2-D bins in raster-scan order with the second variable, val2, as the faster varying variables. More specifically, the bins for variable 1 are defined as:

/// (..., bnd1[0]) [bnd1[1], bin1[2]) [bnd1[2], bin1[3) ... [bnd1.back(), ...)
/// 
Note that '[' denote the left boundary is inclusive and ')' denote the right boundary is exclusive. Similarly, the bins for variable 2 are
/// (..., bnd2[0]) [bnd2[1], bin2[2]) [bnd2[2], bin2[3) ... [bnd2.back(), ...)
/// 
The counts are for the following bins
/// (..., bin1[0]) (.... bin2[0])
/// (..., bin1[0]) [bin2[0], bin2[1])
/// (..., bin1[0]) [bin2[1], bin2[2])
/// ...
/// (..., bin1[0]) [bin2.back(), ...)
/// [bin1[0], bin1[1]) (..., bin2[0])
/// [bin1[0], bin1[1]) [bin2[0], bin2[1])
/// [bin1[0], bin1[1]) [bin2[1], bin2[2])
/// ...
/// [bin1[0], bin1[1]) [bin2.back(), ...)
/// ...
/// 

References array_t< T >::empty(), array_t< T >::find(), array_t< T >::front(), array_t< T >::push_back(), array_t< T >::reserve(), and array_t< T >::size().

virtual const char* ibis::index::name (  )  const [pure virtual]

void ibis::index::optionalUnpack ( std::vector< ibis::bitvector * > &  bits,
const char *  opt 
) [protected]

A function to decide whether to uncompress the bitvectors.

Decide whether to uncompress the bitmaps.

References col, ibis::gParameters(), ibis::column::name(), ibis::part::name(), nrows, and ibis::column::partition().

Referenced by ibis::bak2::append(), ibis::bak::append(), ibis::bin::bin(), ibis::relic::construct(), and ibis::mesa::mesa().

virtual void ibis::index::print ( std::ostream &  out  )  const [pure virtual]

virtual int ibis::index::read ( ibis::fileManager::storage st  )  [pure virtual]

Reconstructs an index from an array of bytes.

Intended for internal use only!

Implemented in ibis::bin, ibis::range, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, ibis::fade, ibis::fuzz, ibis::bylt, and ibis::zona.

virtual int ibis::index::read ( const char *  name  )  [pure virtual]

Reconstructs an index from the named file.

The name can be the directory containing an index file. In this case, the name of the index file must be the name of the column followed by ".idx" suffix.

Implemented in ibis::bin, ibis::range, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::bak, ibis::bak2, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, ibis::fade, ibis::fuzz, ibis::bylt, and ibis::zona.

Referenced by create().

void ibis::index::setBases ( array_t< uint32_t > &  bases,
uint32_t  card,
uint32_t  nbase = 2 
) [static]

Fill the array bases with the values that cover the range [0, card).

Assumes at least two components. For one component case use indices defined explicit for one component cases.

References array_t< T >::resize().

void ibis::index::sumBins ( uint32_t  ib,
uint32_t  ie,
ibis::bitvector res,
uint32_t  ib0,
uint32_t  ie0 
) const [protected]

Compute a new sum for bit vectors [ib, ie) by taking advantage of the old sum for bitvectors [ib0, ie0).

This function attempts to take advantage of existing results of a previously computed sum.

  • On input, res = sum_{i=ib0}^{ie0} bits[i].
  • On exit, res = sum_{i=ib}^{ie} bits[i].

References activate(), bits, ibis::bitvector::bytes(), ibis::bitvector::cnt(), col, ibis::gVerbose, ibis::column::name(), nrows, offsets, array_t< T >::size(), ibis::bitvector::size(), and sumBins().

void ibis::index::sumBins ( uint32_t  ib,
uint32_t  ie,
ibis::bitvector res 
) const [protected]

Compute the bitwise OR of all bitvectors (in bits) from ib to ie.

Sum up bits[ib:ie-1] and place the result in res.

As usual, bits[ib] is included but bits[ie] is excluded.

The bitmaps (bits) are held by this index object and may be regenerated as needed. It uses the combined strategy that was determined in previous tests. The basic rule is as follows.

  • If there are two or less bit vectors, use |= operator directly.
  • Compute the total size of the bitmaps involved.
  • If the total size times log(number of bitvectors involved) is less than the size of an uncompressed bitvector, use a priority queue to store all input bitvectors and intermediate solutions,
  • or else, decompress the first bitvector and use inplace bitwise OR operator to complete the operations.

References activate(), bits, ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), array_t< T >::clear(), ibis::bitvector::cnt(), col, ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::bitvector::flip(), fname, ibis::gVerbose, ibis::column::name(), nrows, offsets, ibis::horometer::realTime(), array_t< T >::resize(), ibis::bitvector::set(), array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), str, and ibis::bitvector::swap().

Referenced by ibis::zone::estimate(), ibis::pale::estimate(), ibis::fuge::estimate(), ibis::direkte::estimate(), ibis::bin::estimate(), ibis::zona::evaluate(), ibis::fuzz::evaluate(), ibis::bylt::evaluate(), ibis::relic::evaluate(), ibis::direkte::evaluate(), ibis::bin::evaluate(), and sumBins().

void ibis::index::sumBits ( const std::vector< ibis::bitvector * > &  bts,
const ibis::bitvector tot,
uint32_t  ib,
uint32_t  ie,
ibis::bitvector res 
) [static]

Sum up bts[ib:ie-1] and add the result to res.

It is assumed that all bts add up to tot. In the other version of sumBits without this argument tot, it was assumed that all bitmaps add up to a bit vector of all ones. The decision of whether to use bts[ib:ie-1] directly or use the subtractive version (using bts[0:ib-1] and bts[ie:nobs-1]) are based on the number of bit vectors.

References ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::gVerbose, ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().

void ibis::index::sumBits ( const std::vector< ibis::bitvector * > &  bts,
uint32_t  ib,
uint32_t  ie,
ibis::bitvector res 
) [static]

Sum up bts[ib:ie-1] and place the result in res.

Note:
This function may either use bts[ib:ie-1] or bts[0:ib-1] and bts[ie:nobs-1] depending which set has more bit vectors! This requires the caller to activate the appropriate set.

This function always uses the operator |=. Tests show that using the function setBit is always slower.

References ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::bitvector::flip(), ibis::gVerbose, ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), and ibis::bitvector::swap().

Referenced by ibis::part::adaptiveFloatsDetailed(), ibis::part::adaptiveIntsDetailed(), and ibis::mesa::mesa().

virtual float ibis::index::undecidable ( const ibis::qContinuousRange expr,
ibis::bitvector iffy 
) const [inline, virtual]

Mark the position of the rows that can not be decided with this index.

Parameters:
expr the range conditions to be evaluated.
iffy the bitvector marking the positions of rows that can not be decided using the index. Return value is the expected fraction of undecided rows that might satisfy the range conditions.

Reimplemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::egale, ibis::direkte, ibis::keywords, and ibis::relic.

Referenced by ibis::column::getUndecidable().

virtual int ibis::index::write ( const char *  name  )  const [pure virtual]

Save index to a file.

Outputs the index in a compact binary format to the named file or directory. The index file contains a header that can be identified by the function isIndex.

Implemented in ibis::bin, ibis::range, ibis::mesa, ibis::ambit, ibis::pale, ibis::pack, ibis::zone, ibis::fuge, ibis::egale, ibis::moins, ibis::entre, ibis::bak, ibis::bak2, ibis::direkte, ibis::keywords, ibis::relic, ibis::slice, ibis::fade, ibis::sbiad, ibis::sapid, ibis::fuzz, ibis::bylt, and ibis::zona.

Referenced by ibis::column::append(), and create().


Member Data Documentation


The documentation for this class was generated from the following files:
Make It A Bit Faster
Disclaimers
FastBit source code
FastBit mailing list archive
Maintainer of this page