Detailed Description

The DASTrie API.

Data Structures

struct  dastrie::doublearray4_traits
 Attributes and operations for a double array (4 bytes/element). More...
struct  dastrie::doublearray5_traits
 Attributes and operations for a double array (5 bytes/element). More...
struct  dastrie::doublearray5_traits::element_type
 A type that represents an element of a double array. More...
class  dastrie::array< value_tmpl >
 An unextendable array. More...
class  dastrie::otail
 A writer class for a tail array. More...
class  dastrie::itail
 A reader class for a tail array. More...
class  dastrie::trie< value_tmpl, doublearray_traits >
 Double Array Trie (read-only). More...
class  dastrie::trie< value_tmpl, doublearray_traits >::exception
 Exception class. More...
class  dastrie::trie< value_tmpl, doublearray_traits >::prefix_cursor
 A cursor clsss for prefix match. More...
class  dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >
 A builder of a double-array trie. More...
struct  dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::record_type
 A type that represents a record (a pair of key and value). More...
class  dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::exception
 Exception class. More...
struct  dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::stat_type
 Statistics of the double array trie. More...
struct  dastrie::empty_type
 Empty type. More...


typedef uint8_t dastrie::doublearray4_traits::check_type
 A type that represents an element of a check array.
typedef int32_t dastrie::doublearray4_traits::element_type
 A type that represents an element of a double array.
typedef uint8_t dastrie::doublearray5_traits::check_type
 A type that represents an element of a check array.
typedef size_t dastrie::array::size_type
 The type that represents the size of the array.
typedef std::vector< element_type > dastrie::otail::container_type
 The container for the tail array.
typedef container_type::size_type dastrie::otail::size_type
 The type that represents the size of the tail array.
typedef array< element_type > dastrie::itail::container_type
 The container for the tail array.
typedef container_type::size_type dastrie::itail::size_type
 The type that representing the size of the tail array.
typedef doublearray_traits::element_type dastrie::trie::element_type
 A type that represents an element of a double array.
typedef doublearray_traits::base_type dastrie::trie::base_type
 A type that represents a base value in a double array.
typedef doublearray_traits::check_type dastrie::trie::check_type
 A type that represents a check value in a double array.
typedef array< element_type > dastrie::trie::doublearray_type
 A type that implements a container of double-array elements.
typedef doublearray_type::size_type dastrie::trie::size_type
 A type that represents a size.
typedef value_tmpl dastrie::builder::value_type
 A type that represents a record value.
typedef doublearray_traits::element_type dastrie::builder::element_type
 A type that represents an element of a double array.
typedef doublearray_traits::base_type dastrie::builder::base_type
 A type that represents a base value in a double array.
typedef doublearray_traits::check_type dastrie::builder::check_type
 A type that represents a check value in a double array.
typedef std::vector< element_type > dastrie::builder::doublearray_type
 A type that implements a double array.
typedef doublearray_type::size_type dastrie::builder::size_type
 A type of sizes.
typedef void(*) dastrie::builder::callback_type (void *instance, size_type i, size_type n)
 The type of a progress callback function.


enum  {
  dastrie::INVALID_INDEX = 0, dastrie::INITIAL_INDEX = 1, dastrie::NUMCHARS = 256, dastrie::CHUNKSIZE = 8,
  dastrie::SDAT_CHUNKSIZE = 16
 Global constants. More...


static const char * dastrie::doublearray4_traits::chunk_id ()
 The chunk ID.
static base_type dastrie::doublearray4_traits::min_base ()
 The minimum number of BASE values.
static base_type dastrie::doublearray4_traits::max_base ()
 The maximum number of BASE values.
static element_type dastrie::doublearray4_traits::default_value ()
 The default value of an element.
static base_type dastrie::doublearray4_traits::get_base (const element_type &elem)
 Gets the BASE value of an element.
static check_type dastrie::doublearray4_traits::get_check (const element_type &elem)
 Gets the CHECK value of an element.
static void dastrie::doublearray4_traits::set_base (element_type &elem, base_type v)
 Sets the BASE value of an element.
static void dastrie::doublearray4_traits::set_check (element_type &elem, check_type v)
 Sets the CHECK value of an element.
static const char * dastrie::doublearray5_traits::chunk_id ()
 The chunk ID.
static base_type dastrie::doublearray5_traits::min_base ()
 Gets the minimum number of BASE values.
static base_type dastrie::doublearray5_traits::max_base ()
 Gets the maximum number of BASE values.
static element_type dastrie::doublearray5_traits::default_value ()
 The default value of an element.
static base_type dastrie::doublearray5_traits::get_base (const element_type &elem)
 Gets the BASE value of an element.
static check_type dastrie::doublearray5_traits::get_check (const element_type &elem)
 Gets the CHECK value of an element.
static void dastrie::doublearray5_traits::set_base (element_type &elem, base_type v)
 Sets the BASE value of an element.
static void dastrie::doublearray5_traits::set_check (element_type &elem, check_type v)
 Sets the CHECK value of an element.
 dastrie::array::array ()
 Constructs an array.
 dastrie::array::array (value_type *block, size_type size, bool own=false)
 Constructs an array from an existing memory block.
 dastrie::array::array (const array &rho)
 Constructs an array from another array instance.
virtual dastrie::array::~array ()
 Destructs an array.
array & dastrie::array::operator= (const array &rho)
 Assigns the new array to this instance.
value_type & dastrie::array::operator[] (size_type i)
 Obtains a read/write access to an element in the array.
const value_type & dastrie::array::operator[] (size_type i) const
 Obtains a read-only access to an element in the array.
 dastrie::array::operator bool () const
 Checks whether an array is allocated.
size_type dastrie::array::size () const
 Reports the size of the array.
void dastrie::array::assign (value_type *block, size_type size, bool own=false)
 Assigns a new array from an existing memory block.
void dastrie::array::free ()
 Destroy the array.
 dastrie::otail::otail ()
 Constructs an instance.
virtual dastrie::otail::~otail ()
 Destructs an instance.
const element_type * dastrie::otail::block () const
 Obtains a read-only access to the pointer of the tail array.
size_type dastrie::otail::bytes () const
 Reports the size of the tail array.
size_type dastrie::otail::tellp () const
 Reports the offset position to which a next data is written.
void dastrie::otail::clear ()
 Removes all of the contents in the tail array.
otail & dastrie::otail::write (const void *data, size_t size)
 Puts a byte stream to the tail array.
template<typename value_type>
otail & dastrie::otail::write (const value_type &value)
 Puts a value of a basic type to the tail array.
otail & dastrie::otail::write_string (const char *str, size_type offset=0)
 Puts a null-terminated string.
otail & dastrie::otail::write_string (const std::string &str, size_type offset=0)
 Puts a C++ string.
 dastrie::itail::itail ()
 Constructs an instance.
virtual dastrie::itail::~itail ()
 Destructs an instance.
 dastrie::itail::operator bool () const
 Checks whether a tail array is allocated.
void dastrie::itail::assign (const element_type *ptr, size_type size, bool own=false)
 Initializes the tail array from an existing memory block.
void dastrie::itail::seekg (size_type offset)
 Moves the read position in the tail array.
size_type dastrie::itail::tellg () const
 Reports the current read position in the tail array.
size_type dastrie::itail::strlen () const
 Counts the number of letters in the string from the current position.
bool dastrie::itail::match_string (const char *str)
 Exact match for the string from the current position.
bool dastrie::itail::match_string_partial (const char *str)
 Prefix match for the string from the current position.
itail & dastrie::itail::read (void *data, size_t size)
 Gets a byte stream to the tail array.
template<typename value_type>
itail & dastrie::itail::read (value_type &value)
 Gets a value of a basic type from the tail array.
 dastrie::trie::prefix_cursor::prefix_cursor ()
 Constructs a cursor.
 dastrie::trie::prefix_cursor::prefix_cursor (trie *t, const std::string &q)
 Constructs a cursor from a trie and query.
 dastrie::trie::prefix_cursor::prefix_cursor (const prefix_cursor &rho)
 Constructs a cursor from another instance.
bool dastrie::trie::prefix_cursor::next ()
 Moves the cursor to the next prefix.
 dastrie::trie::trie ()
 Constructs an instance.
virtual dastrie::trie::~trie ()
 Destructs an instance.
size_type dastrie::trie::size () const
 Gets the number of records in the trie.
bool dastrie::trie::in (const char *key)
 Tests if the trie contains a key.
bool dastrie::trie::find (const char *key, value_type &value)
 Finds a record.
value_type dastrie::trie::get (const char *key, const value_type &def)
 Gets the value for a key.
prefix_cursor dastrie::trie::prefix (const char *str)
 Constructs a cursor for prefix match.
void dastrie::trie::assign (const std::vector< element_type > &da, const otail &tail, const uint8_t *table)
 Assigns a double-array trie from a builder.
size_type dastrie::trie::assign (const char *block, size_type size)
 Assigns a double-array trie from a memory image.
size_type dastrie::trie::read (std::istream &is)
 Read a double-array trie from an input stream.
 dastrie::builder::builder ()
 Constructs a builder.
virtual dastrie::builder::~builder ()
 Destructs the builder.
void dastrie::builder::set_callback (void *instance, callback_type callback)
 Sets a progress callback.
void dastrie::builder::build (const record_type *first, const record_type *last)
 Builds a double-array trie from sorted records.
void dastrie::builder::clear ()
 Initializes the double array.
const doublearray_type & dastrie::builder::doublearray () const
 Obtains a read-only access to the double-array.
const otail & dastrie::builder::tail () const
 Obtains a read-only access to the tail array.
const uint8_t * dastrie::builder::table () const
 Obtains a read-only access to the character table.
void dastrie::builder::write (std::ostream &os)
 Writes out the double-array trie to an output stream.


container_type dastrie::otail::m_cont
 The tail array.
container_type dastrie::itail::m_cont
 The tail array.
size_type dastrie::itail::m_offset
 The current reading position.
std::string dastrie::trie::prefix_cursor::query
 The query.
size_type dastrie::trie::prefix_cursor::length
 The length of the prefix.
value_type dastrie::trie::prefix_cursor::value
 The value of the prefix.
size_type dastrie::trie::prefix_cursor::cur
 The cursor.
value_type dastrie::builder::record_type::value
 The value of the record.
size_type dastrie::builder::stat_type::da_num_total
 The number of elements in the double array.
size_type dastrie::builder::stat_type::da_num_used
 The number of elements used actually in the double array.
size_type dastrie::builder::stat_type::da_num_nodes
 The number of nodes (excluding leaves).
size_type dastrie::builder::stat_type::da_num_leaves
 The number of leaves.
double dastrie::builder::stat_type::da_usage
 The utilization ratio of the double array.
size_type dastrie::builder::stat_type::tail_size
 The size, in bytes, of the tail array.
size_type dastrie::builder::stat_type::bt_sum_base_trials
 The sum of the number of trials for finding bases.
double dastrie::builder::stat_type::bt_avg_base_trials
 The average number of trials for finding bases.
double dastrie::builder::unigram_freq::freq

Typedef Documentation

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
typedef void(*) dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::callback_type(void *instance, size_type i, size_type n) [inherited]

The type of a progress callback function.

instance The pointer to a user-defined instance.
i The number of records that have already been stored in the trie.
n The total number of records to be stored.

Enumeration Type Documentation

anonymous enum

Global constants.

INVALID_INDEX  Invalid index number for a double array.
INITIAL_INDEX  Initial index for a double array.
NUMCHARS  Number of characters.
CHUNKSIZE  The size of a chunk header.
SDAT_CHUNKSIZE  The size of a "SDAT" chunk.

Function Documentation

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
size_type dastrie::trie< value_tmpl, doublearray_traits >::assign ( const char *  block,
size_type  size 
) [inline, inherited]

Assigns a double-array trie from a memory image.

block The pointer to the memory block.
size The size, in bytes, of the memory block.
size_type If successful, the size, in bytes, of the memory block used to read a double-array trie; otherwise zero.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
void dastrie::trie< value_tmpl, doublearray_traits >::assign ( const std::vector< element_type > &  da,
const otail tail,
const uint8_t *  table 
) [inline, inherited]

Assigns a double-array trie from a builder.

da The vector of double-array elements.
tail The tail array.
table The character-mapping table.

void dastrie::itail::assign ( const element_type ptr,
size_type  size,
bool  own = false 
) [inline, inherited]

Initializes the tail array from an existing memory block.

ptr The pointer to the memory block of the source.
size The size of the memory block of the source.
own true to copy the content of the source to a new memory block managed by this instance.

const element_type* dastrie::otail::block (  )  const [inline, inherited]

Obtains a read-only access to the pointer of the tail array.

const element_type* The pointer to the tail array.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
void dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::build ( const record_type first,
const record_type last 
) [inline, inherited]

Builds a double-array trie from sorted records.

first The random-access iterator addressing the position of the first record.
last The random-access iterator addressing the position one past the final record.

size_type dastrie::otail::bytes (  )  const [inline, inherited]

Reports the size of the tail array.

size_type The size, in bytes, of the tail array.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
const doublearray_type& dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::doublearray (  )  const [inline, inherited]

Obtains a read-only access to the double-array.

const doublearray_type& The reference to the double array.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
bool dastrie::trie< value_tmpl, doublearray_traits >::find ( const char *  key,
value_type value 
) [inline, inherited]

Finds a record.

key The key string.
[out] value The reference to a variable that receives the value of the key.
bool true if the trie contains the key; false otherwise.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
value_type dastrie::trie< value_tmpl, doublearray_traits >::get ( const char *  key,
const value_type def 
) [inline, inherited]

Gets the value for a key.

key The key string.
def The default value.
value_type The value if the key exists in the trie, the default value (def) otherwise.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
bool dastrie::trie< value_tmpl, doublearray_traits >::in ( const char *  key  )  [inline, inherited]

Tests if the trie contains a key.

key The key string.
bool true if the trie contains the key; false otherwise.

bool dastrie::itail::match_string ( const char *  str  )  [inline, inherited]

Exact match for the string from the current position.

str The pointer to the string to be compared.
bool true if the string starting from the current position is identical to the give string str; false otherwise.

bool dastrie::itail::match_string_partial ( const char *  str  )  [inline, inherited]

Prefix match for the string from the current position.

str The pointer to the string to be compared.
bool true if the give string str begins with the substring starting from the current position; false otherwise.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
bool dastrie::trie< value_tmpl, doublearray_traits >::prefix_cursor::next (  )  [inline, inherited]

Moves the cursor to the next prefix.

true if the trie finds a key string that is a prefix of the query string; false otherwise.

dastrie::itail::operator bool (  )  const [inline, inherited]

Checks whether a tail array is allocated.

bool true if allocated, false otherwise.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
prefix_cursor dastrie::trie< value_tmpl, doublearray_traits >::prefix ( const char *  str  )  [inline, inherited]

Constructs a cursor for prefix match.

str The query string.
prefix_cursor The instance of a cursor.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
dastrie::trie< value_tmpl, doublearray_traits >::prefix_cursor::prefix_cursor ( const prefix_cursor rho  )  [inline, inherited]

Constructs a cursor from another instance.

rho The reference to a source instance.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
dastrie::trie< value_tmpl, doublearray_traits >::prefix_cursor::prefix_cursor ( trie t,
const std::string &  q 
) [inline, inherited]

Constructs a cursor from a trie and query.

t The pointer to a trie instance.
q The query string.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
size_type dastrie::trie< value_tmpl, doublearray_traits >::read ( std::istream &  is  )  [inline, inherited]

Read a double-array trie from an input stream.

is The input stream.
size_type The size of the double-array data.

template<typename value_type>
itail& dastrie::itail::read ( value_type &  value  )  [inline, inherited]

Gets a value of a basic type from the tail array.

[out] value The reference to the value.
itail& The reference to this object.

itail& dastrie::itail::read ( void *  data,
size_t  size 
) [inline, inherited]

Gets a byte stream to the tail array.

[out] data The pointer to the byte stream to receive.
size The size to read.
itail& The reference to this object.

void dastrie::itail::seekg ( size_type  offset  )  [inline, inherited]

Moves the read position in the tail array.

offset The offset for the new read position.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
void dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::set_callback ( void *  instance,
callback_type  callback 
) [inline, inherited]

Sets a progress callback.

instance The pointer to a user-defined instance.
callback The callback function.

template<class value_tmpl, class doublearray_traits = doublearray5_traits>
size_type dastrie::trie< value_tmpl, doublearray_traits >::size (  )  const [inline, inherited]

Gets the number of records in the trie.

size_type The number of records.

size_type dastrie::itail::strlen (  )  const [inline, inherited]

Counts the number of letters in the string from the current position.

size_type The number of letters.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
const uint8_t* dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::table (  )  const [inline, inherited]

Obtains a read-only access to the character table.

const uint8_t* The pointer to the character table.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
const otail& dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::tail (  )  const [inline, inherited]

Obtains a read-only access to the tail array.

const otail& The reference to the tail array.

size_type dastrie::itail::tellg (  )  const [inline, inherited]

Reports the current read position in the tail array.

size_type The current position in the tail array.

size_type dastrie::otail::tellp (  )  const [inline, inherited]

Reports the offset position to which a next data is written.

size_type The current position.

template<class key_tmpl, class value_tmpl, class doublearray_traits = doublearray5_traits>
void dastrie::builder< key_tmpl, value_tmpl, doublearray_traits >::write ( std::ostream &  os  )  [inline, inherited]

Writes out the double-array trie to an output stream.

os The output stream.

template<typename value_type>
otail& dastrie::otail::write ( const value_type &  value  )  [inline, inherited]

Puts a value of a basic type to the tail array.

value The reference to the value.
otail& The reference to this object.

otail& dastrie::otail::write ( const void *  data,
size_t  size 
) [inline, inherited]

Puts a byte stream to the tail array.

data The pointer to the byte stream.
size The size, in bytes, of the byte stream.
otail& The reference to this object.

otail& dastrie::otail::write_string ( const std::string &  str,
size_type  offset = 0 
) [inline, inherited]

Puts a C++ string.

str The string.
offset The offset from which the string is written.
otail& The reference to the otail object.

otail& dastrie::otail::write_string ( const char *  str,
size_type  offset = 0 
) [inline, inherited]

Puts a null-terminated string.

str The pointer to the string.
offset The offset from which the string is written.
otail& The reference to this object.

Copyright (c) 2002-2008 by Naoaki Okazaki
Mon Nov 10 12:28:35 2008