DASTrie API


Detailed Description

The DASTrie API.


Data Structures

struct  dastrie::doublearray4_traits
 Attributes and operations for a double array (4 bytes/element). More...
struct  dastrie::doublearray5_traits
 Attributes and operations for a double array (5 bytes/element). More...
struct  dastrie::doublearray5_traits::element_type
 A type that represents an element of a double array. More...
class  dastrie::array< value_tmpl >
 An unextendable array. More...
class  dastrie::otail
 A writer class for a tail array. More...
class  dastrie::itail
 A reader class for a tail array. More...
class  dastrie::trie< value_tmpl, doublearray_traits >
 Double Array Trie (read-only). More...
class  dastrie::trie< value_tmpl, doublearray_traits >::exception
 Exception class. More...
class  dastrie::trie< value_tmpl, doublearray_traits >::prefix_cursor
 A cursor clsss for prefix match. More...
class  dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >
 A builder of a double-array trie. More...
struct  dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::record_type
 A type that represents a record (a pair of key and value). More...
class  dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::exception
 Exception class. More...
struct  dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::stat_type
 Statistics of the double array trie. More...
struct  dastrie::empty_type
 Empty type. More...

Typedefs

typedef uint8_t dastrie::doublearray4_traits::check_type
 A type that represents an element of a check array.
typedef int32_t dastrie::doublearray4_traits::element_type
 A type that represents an element of a double array.
typedef uint8_t dastrie::doublearray5_traits::check_type
 A type that represents an element of a check array.
typedef size_t dastrie::array::size_type
 The type that represents the size of the array.
typedef std::vector< element_type > dastrie::otail::container_type
 The container for the tail array.
typedef container_type::size_type dastrie::otail::size_type
 The type that represents the size of the tail array.
typedef array< element_type > dastrie::itail::container_type
 The container for the tail array.
typedef container_type::size_type dastrie::itail::size_type
 The type that representing the size of the tail array.
typedef doublearray_traits::element_type dastrie::trie::element_type
 A type that represents an element of a double array.
typedef doublearray_traits::base_type dastrie::trie::base_type
 A type that represents a base value in a double array.
typedef doublearray_traits::check_type dastrie::trie::check_type
 A type that represents a check value in a double array.
typedef array< element_type > dastrie::trie::doublearray_type
 A type that implements a container of double-array elements.
typedef doublearray_type::size_type dastrie::trie::size_type
 A type that represents a size.
typedef value_tmpl dastrie::builder::value_type
 A type that represents a record value.
typedef doublearray_traits::element_type dastrie::builder::element_type
 A type that represents an element of a double array.
typedef doublearray_traits::base_type dastrie::builder::base_type
 A type that represents a base value in a double array.
typedef doublearray_traits::check_type dastrie::builder::check_type
 A type that represents a check value in a double array.
typedef std::vector< element_type > dastrie::builder::doublearray_type
 A type that implements a double array.
typedef doublearray_type::size_type dastrie::builder::size_type
 A type of sizes.
typedef void(*) dastrie::builder::callback_type (void *instance, size_type i, size_type n)
 The type of a progress callback function.

Enumerations

enum  {
  dastrie::INVALID_INDEX = 0, dastrie::INITIAL_INDEX = 1, dastrie::NUMCHARS = 256, dastrie::CHUNKSIZE = 8,
  dastrie::SDAT_CHUNKSIZE = 16
}
 Global constants. More...

Functions

static const char * dastrie::doublearray4_traits::chunk_id ()
 The chunk ID.
static base_type dastrie::doublearray4_traits::min_base ()
 The minimum number of BASE values.
static base_type dastrie::doublearray4_traits::max_base ()
 The maximum number of BASE values.
static element_type dastrie::doublearray4_traits::default_value ()
 The default value of an element.
static base_type dastrie::doublearray4_traits::get_base (const element_type &elem)
 Gets the BASE value of an element.
static check_type dastrie::doublearray4_traits::get_check (const element_type &elem)
 Gets the CHECK value of an element.
static void dastrie::doublearray4_traits::set_base (element_type &elem, base_type v)
 Sets the BASE value of an element.
static void dastrie::doublearray4_traits::set_check (element_type &elem, check_type v)
 Sets the CHECK value of an element.
static const char * dastrie::doublearray5_traits::chunk_id ()
 The chunk ID.
static base_type dastrie::doublearray5_traits::min_base ()
 Gets the minimum number of BASE values.
static base_type dastrie::doublearray5_traits::max_base ()
 Gets the maximum number of BASE values.
static element_type dastrie::doublearray5_traits::default_value ()
 The default value of an element.
static base_type dastrie::doublearray5_traits::get_base (const element_type &elem)
 Gets the BASE value of an element.
static check_type dastrie::doublearray5_traits::get_check (const element_type &elem)
 Gets the CHECK value of an element.
static void dastrie::doublearray5_traits::set_base (element_type &elem, base_type v)
 Sets the BASE value of an element.
static void dastrie::doublearray5_traits::set_check (element_type &elem, check_type v)
 Sets the CHECK value of an element.
 dastrie::array::array ()
 Constructs an array.
 dastrie::array::array (value_type *block, size_type size, bool own=false)
 Constructs an array from an existing memory block.
 dastrie::array::array (const array &rho)
 Constructs an array from another array instance.
virtual dastrie::array::~array ()
 Destructs an array.
array & dastrie::array::operator= (const array &rho)
 Assigns the new array to this instance.
value_type & dastrie::array::operator[] (size_type i)
 Obtains a read/write access to an element in the array.
const value_type & dastrie::array::operator[] (size_type i) const
 Obtains a read-only access to an element in the array.
 dastrie::array::operator bool () const
 Checks whether an array is allocated.
size_type dastrie::array::size () const
 Reports the size of the array.
void dastrie::array::assign (value_type *block, size_type size, bool own=false)
 Assigns a new array from an existing memory block.
void dastrie::array::free ()
 Destroy the array.
 dastrie::otail::otail ()
 Constructs an instance.
virtual dastrie::otail::~otail ()
 Destructs an instance.
const element_type * dastrie::otail::block () const
 Obtains a read-only access to the pointer of the tail array.
size_type dastrie::otail::bytes () const
 Reports the size of the tail array.
size_type dastrie::otail::tellp () const
 Reports the offset position to which a next data is written.
void dastrie::otail::clear ()
 Removes all of the contents in the tail array.
otail & dastrie::otail::write (const void *data, size_t size)
 Puts a byte stream to the tail array.
template<typename value_type>
otail & dastrie::otail::write (const value_type &value)
 Puts a value of a basic type to the tail array.
otail & dastrie::otail::write_string (const char *str, size_type offset=0)
 Puts a null-terminated string.
otail & dastrie::otail::write_string (const std::string &str, size_type offset=0)
 Puts a C++ string.
 dastrie::itail::itail ()
 Constructs an instance.
virtual dastrie::itail::~itail ()
 Destructs an instance.
 dastrie::itail::operator bool () const
 Checks whether a tail array is allocated.
void dastrie::itail::assign (const element_type *ptr, size_type size, bool own=false)
 Initializes the tail array from an existing memory block.
void dastrie::itail::seekg (size_type offset)
 Moves the read position in the tail array.
size_type dastrie::itail::tellg () const
 Reports the current read position in the tail array.
size_type dastrie::itail::strlen () const
 Counts the number of letters in the string from the current position.
bool dastrie::itail::match_string (const char *str)
 Exact match for the string from the current position.
bool dastrie::itail::match_string_partial (const char *str)
 Prefix match for the string from the current position.
itail & dastrie::itail::read (void *data, size_t size)
 Gets a byte stream to the tail array.
template<typename value_type>
itail & dastrie::itail::read (value_type &value)
 Gets a value of a basic type from the tail array.
 dastrie::trie::prefix_cursor::prefix_cursor ()
 Constructs a cursor.
 dastrie::trie::prefix_cursor::prefix_cursor (trie *t, const std::string &q)
 Constructs a cursor from a trie and query.
 dastrie::trie::prefix_cursor::prefix_cursor (const prefix_cursor &rho)
 Constructs a cursor from another instance.
bool dastrie::trie::prefix_cursor::next ()
 Moves the cursor to the next prefix.
 dastrie::trie::trie ()
 Constructs an instance.
virtual dastrie::trie::~trie ()
 Destructs an instance.
size_type dastrie::trie::size () const
 Gets the number of records in the trie.
bool dastrie::trie::in (const char *key)
 Tests if the trie contains a key.
bool dastrie::trie::find (const char *key, value_type &value)
 Finds a record.
value_type dastrie::trie::get (const char *key, const value_type &def)
 Gets the value for a key.
prefix_cursor dastrie::trie::prefix (const char *str)
 Constructs a cursor for prefix match.
void dastrie::trie::assign (const std::vector< element_type > &da, const otail &tail, const uint8_t *table)
 Assigns a double-array trie from a builder.
size_type dastrie::trie::assign (const char *block, size_type size)
 Assigns a double-array trie from a memory image.
size_type dastrie::trie::read (std::istream &is)
 Read a double-array trie from an input stream.
 dastrie::builder::builder ()
 Constructs a builder.
virtual dastrie::builder::~builder ()
 Destructs the builder.
void dastrie::builder::set_callback (void *instance, callback_type callback)
 Sets a progress callback.
void dastrie::builder::build (const record_type *first, const record_type *last)
 Builds a double-array trie from sorted records.
void dastrie::builder::clear ()
 Initializes the double array.
const doublearray_type & dastrie::builder::doublearray () const
 Obtains a read-only access to the double-array.
const otail & dastrie::builder::tail () const
 Obtains a read-only access to the tail array.
const uint8_t * dastrie::builder::table () const
 Obtains a read-only access to the character table.
void dastrie::builder::write (std::ostream &os)
 Writes out the double-array trie to an output stream.

Variables

container_type dastrie::otail::m_cont
 The tail array.
container_type dastrie::itail::m_cont
 The tail array.
size_type dastrie::itail::m_offset
 The current reading position.
std::string dastrie::trie::prefix_cursor::query
 The query.
size_type dastrie::trie::prefix_cursor::length
 The length of the prefix.
value_type dastrie::trie::prefix_cursor::value
 The value of the prefix.
size_type dastrie::trie::prefix_cursor::cur
 The cursor.
value_type dastrie::builder::record_type::value
 The value of the record.
size_type dastrie::builder::stat_type::da_num_total
 The number of elements in the double array.
size_type dastrie::builder::stat_type::da_num_used
 The number of elements used actually in the double array.
size_type dastrie::builder::stat_type::da_num_nodes
 The number of nodes (excluding leaves).
size_type dastrie::builder::stat_type::da_num_leaves
 The number of leaves.
double dastrie::builder::stat_type::da_usage
 The utilization ratio of the double array.
size_type dastrie::builder::stat_type::tail_size
 The size, in bytes, of the tail array.
size_type dastrie::builder::stat_type::bt_sum_base_trials
 The sum of the number of trials for finding bases.
double dastrie::builder::stat_type::bt_avg_base_trials
 The average number of trials for finding bases.
double dastrie::builder::unigram_freq::freq
 Frequency.


Typedef Documentation

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
typedef void(*) dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::callback_type(void *instance, size_type i, size_type n) [inherited]

The type of a progress callback function.

Parameters:
instance The pointer to a user-defined instance.
i The number of records that have already been stored in the trie.
n The total number of records to be stored.


Enumeration Type Documentation

anonymous enum

Global constants.

Enumerator:
INVALID_INDEX  Invalid index number for a double array.
INITIAL_INDEX  Initial index for a double array.
NUMCHARS  Number of characters.
CHUNKSIZE  The size of a chunk header.
SDAT_CHUNKSIZE  The size of a "SDAT" chunk.


Function Documentation

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
size_type dastrie::trie< value_tmpl, doublearray_traits >::assign ( const char *  block,
size_type  size 
) [inline, inherited]

Assigns a double-array trie from a memory image.

Parameters:
block The pointer to the memory block.
size The size, in bytes, of the memory block.
Returns:
size_type If successful, the size, in bytes, of the memory block used to read a double-array trie; otherwise zero.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
void dastrie::trie< value_tmpl, doublearray_traits >::assign ( const std::vector< element_type > &  da,
const otail tail,
const uint8_t *  table 
) [inline, inherited]

Assigns a double-array trie from a builder.

Parameters:
da The vector of double-array elements.
tail The tail array.
table The character-mapping table.

void dastrie::itail::assign ( const element_type ptr,
size_type  size,
bool  own = false 
) [inline, inherited]

Initializes the tail array from an existing memory block.

Parameters:
ptr The pointer to the memory block of the source.
size The size of the memory block of the source.
own true to copy the content of the source to a new memory block managed by this instance.

const element_type* dastrie::otail::block (  )  const [inline, inherited]

Obtains a read-only access to the pointer of the tail array.

Returns:
const element_type* The pointer to the tail array.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
void dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::build ( const record_type first,
const record_type last 
) [inline, inherited]

Builds a double-array trie from sorted records.

Parameters:
first The random-access iterator addressing the position of the first record.
last The random-access iterator addressing the position one past the final record.

size_type dastrie::otail::bytes (  )  const [inline, inherited]

Reports the size of the tail array.

Returns:
size_type The size, in bytes, of the tail array.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
const doublearray_type& dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::doublearray (  )  const [inline, inherited]

Obtains a read-only access to the double-array.

Returns:
const doublearray_type& The reference to the double array.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
bool dastrie::trie< value_tmpl, doublearray_traits >::find ( const char *  key,
value_type value 
) [inline, inherited]

Finds a record.

Parameters:
key The key string.
[out] value The reference to a variable that receives the value of the key.
Returns:
bool true if the trie contains the key; false otherwise.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
value_type dastrie::trie< value_tmpl, doublearray_traits >::get ( const char *  key,
const value_type def 
) [inline, inherited]

Gets the value for a key.

Parameters:
key The key string.
def The default value.
Returns:
value_type The value if the key exists in the trie, the default value (def) otherwise.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
bool dastrie::trie< value_tmpl, doublearray_traits >::in ( const char *  key  )  [inline, inherited]

Tests if the trie contains a key.

Parameters:
key The key string.
Returns:
bool true if the trie contains the key; false otherwise.

bool dastrie::itail::match_string ( const char *  str  )  [inline, inherited]

Exact match for the string from the current position.

Parameters:
str The pointer to the string to be compared.
Returns:
bool true if the string starting from the current position is identical to the give string str; false otherwise.

bool dastrie::itail::match_string_partial ( const char *  str  )  [inline, inherited]

Prefix match for the string from the current position.

Parameters:
str The pointer to the string to be compared.
Returns:
bool true if the give string str begins with the substring starting from the current position; false otherwise.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
bool dastrie::trie< value_tmpl, doublearray_traits >::prefix_cursor::next (  )  [inline, inherited]

Moves the cursor to the next prefix.

Returns:
true if the trie finds a key string that is a prefix of the query string; false otherwise.

dastrie::itail::operator bool (  )  const [inline, inherited]

Checks whether a tail array is allocated.

Returns:
bool true if allocated, false otherwise.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
prefix_cursor dastrie::trie< value_tmpl, doublearray_traits >::prefix ( const char *  str  )  [inline, inherited]

Constructs a cursor for prefix match.

Parameters:
str The query string.
Returns:
prefix_cursor The instance of a cursor.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
dastrie::trie< value_tmpl, doublearray_traits >::prefix_cursor::prefix_cursor ( const prefix_cursor rho  )  [inline, inherited]

Constructs a cursor from another instance.

Parameters:
rho The reference to a source instance.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
dastrie::trie< value_tmpl, doublearray_traits >::prefix_cursor::prefix_cursor ( trie t,
const std::string &  q 
) [inline, inherited]

Constructs a cursor from a trie and query.

Parameters:
t The pointer to a trie instance.
q The query string.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
size_type dastrie::trie< value_tmpl, doublearray_traits >::read ( std::istream &  is  )  [inline, inherited]

Read a double-array trie from an input stream.

Parameters:
is The input stream.
Returns:
size_type The size of the double-array data.

template<typename value_type>
itail& dastrie::itail::read ( value_type &  value  )  [inline, inherited]

Gets a value of a basic type from the tail array.

Parameters:
[out] value The reference to the value.
Returns:
itail& The reference to this object.

itail& dastrie::itail::read ( void *  data,
size_t  size 
) [inline, inherited]

Gets a byte stream to the tail array.

Parameters:
[out] data The pointer to the byte stream to receive.
size The size to read.
Returns:
itail& The reference to this object.

void dastrie::itail::seekg ( size_type  offset  )  [inline, inherited]

Moves the read position in the tail array.

Parameters:
offset The offset for the new read position.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
void dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::set_callback ( void *  instance,
callback_type  callback 
) [inline, inherited]

Sets a progress callback.

Parameters:
instance The pointer to a user-defined instance.
callback The callback function.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
size_type dastrie::trie< value_tmpl, doublearray_traits >::size (  )  const [inline, inherited]

Gets the number of records in the trie.

Returns:
size_type The number of records.

size_type dastrie::itail::strlen (  )  const [inline, inherited]

Counts the number of letters in the string from the current position.

Returns:
size_type The number of letters.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
const uint8_t* dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::table (  )  const [inline, inherited]

Obtains a read-only access to the character table.

Returns:
const uint8_t* The pointer to the character table.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
const otail& dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::tail (  )  const [inline, inherited]

Obtains a read-only access to the tail array.

Returns:
const otail& The reference to the tail array.

size_type dastrie::itail::tellg (  )  const [inline, inherited]

Reports the current read position in the tail array.

Returns:
size_type The current position in the tail array.

size_type dastrie::otail::tellp (  )  const [inline, inherited]

Reports the offset position to which a next data is written.

Returns:
size_type The current position.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
void dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::write ( std::ostream &  os  )  [inline, inherited]

Writes out the double-array trie to an output stream.

Parameters:
os The output stream.

template<typename value_type>
otail& dastrie::otail::write ( const value_type &  value  )  [inline, inherited]

Puts a value of a basic type to the tail array.

Parameters:
value The reference to the value.
Returns:
otail& The reference to this object.

otail& dastrie::otail::write ( const void *  data,
size_t  size 
) [inline, inherited]

Puts a byte stream to the tail array.

Parameters:
data The pointer to the byte stream.
size The size, in bytes, of the byte stream.
Returns:
otail& The reference to this object.

otail& dastrie::otail::write_string ( const std::string &  str,
size_type  offset = 0 
) [inline, inherited]

Puts a C++ string.

Parameters:
str The string.
offset The offset from which the string is written.
Returns:
otail& The reference to the otail object.

otail& dastrie::otail::write_string ( const char *  str,
size_type  offset = 0 
) [inline, inherited]

Puts a null-terminated string.

Parameters:
str The pointer to the string.
offset The offset from which the string is written.
Returns:
otail& The reference to this object.


Copyright (c) 2002-2008 by Naoaki Okazaki
Mon Nov 10 12:28:35 2008