/*
 *		Quark (association between a string and an integer ID) class.
 *
 *		Copyright (c) 2004-2005 Naoaki Okazaki
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions (known as zlib license):
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 *
 * Naoaki Okazaki <okazaki at chokkan dot org>
 *
 */

/* $Id: quark.h 2 2006-10-31 00:57:57Z naoaki $ */

/*
Quark allocates an unique ID for each string and holds the association so that
we can obtain the string value associated with an ID and/or the ID value
associated with a string value. Since string matching is slower than integer
comparison, it's a common technique for speed/memory optimization that an
application converts all string values into integer identifiers, does some
process with the integer values, and then restores the string values from
them.

This code was comfirmed to be compiled with MCVC++ 2003 and gcc 3.4.4
Define _BUILD_NCL_SAMPLE if you want to build a sample program.
	$ g++ -D_BUILD_NCL_SAMPLE -xc++ quark.h
*/


#ifndef	__QUARK_H__
#define	__QUARK_H__

#include <vector>
#include <string>


#ifdef	USE_NCL_NAMESPACE
namespace ncl {
#endif/*USE_NCL_NAMESPACE*/


#if		defined(_MSC_VER)

#include <hash_map>
#define	HashMap		stdext::hash_map

#elif	defined(__GNUC__)

#include <locale>
#include <ext/hash_map>
#define	HashMap		__gnu_cxx::hash_map

namespace __gnu_cxx
{
	template<> struct hash< std::string >
	{
		// We define our hash function here.
		size_t operator()( const std::string& x ) const
		{
			std::locale loc;
			return std::use_facet< std::collate<char> >(loc).hash(x.c_str(), x.c_str() + x.length());
		}
	};
}

#else

#error	"Define a hash_map class for your compiler. "

#endif


/**
 * The basic class of quark class.
 *	@param	string_t				String class name to be used.
 *	@param	qid_t					ID class name to be used.
 *	@author	Naoaki Okazaki
 */
template <class string_t, class qid_t>
class basic_quark {
protected:
	typedef HashMap<string_t, qid_t> StringToId;
	typedef std::vector<string_t> IdToString;

	StringToId m_string_to_id;
	IdToString m_id_to_string;

public:
	/**
	 * Construct.
	 */
	basic_quark()
	{
		clear();
	}

	/**
	 * Destruct.
	 */
	virtual ~basic_quark()
	{
	}

	/**
	 * Map a string to its associated ID.
	 *	If string-to-integer association does not exist, allocate a new ID.
	 *	@param	str					String value.
	 *	@return						Associated ID for the string value.
	 */
	qid_t operator[](const string_t& str)
	{
		typename StringToId::const_iterator it = m_string_to_id.find(str);
		if (it != m_string_to_id.end()) {
			return it->second;
		} else {
			qid_t newid = (qid_t)m_id_to_string.size();
			m_id_to_string.push_back(str);
			m_string_to_id.insert(std::pair<string_t, qid_t>(str, newid));
			return newid;
		}
	}

	/**
	 * Convert ID value into the associated string value.
	 *	@param	qid					ID.
	 *	@param	def					Default value if the ID was out of range.
	 *	@return						String value associated with the ID.
	 */
	const string_t& from_id(const qid_t& qid, const string_t& def = "") const
	{
		if (qid < 1 || m_id_to_string.size() <= (size_t)qid) {
			return def;
		} else {
			return m_id_to_string[qid];
		}
	}

	/**
	 * Convert string value into the associated ID value.
	 *	@param	str					String value.
	 *	@return						ID if any, otherwise 0.
	 */
	qid_t from_string(const string_t& str) const
	{
		typename StringToId::const_iterator it = m_string_to_id.find(str);
		if (it != m_string_to_id.end()) {
			return it->second;
		} else {
			return 0;
		}

	}

	void clear()
	{
		m_string_to_id.clear();
		m_id_to_string.clear();
		m_id_to_string.resize(1);
	}

	/**
	 * Get the number of string-to-id associations.
	 *	@return						The number of association.
	 */
	size_t size() const
	{
		return m_id_to_string.size() - 1;
	}
};

/**
 * Specialized quark class with std::string to int association.
 */
typedef basic_quark<std::string, int> quark;


#ifdef	USE_NCL_NAMESPACE
};
#endif/*USE_NCL_NAMESPACE*/





#ifdef	_BUILD_NCL_SAMPLE

#include <iostream>

int main(int argc, char *argv[])
{
	quark q;

	std::cout << q["you"] << std::endl;								// 1
	std::cout << q["your"] << std::endl;							// 2
	std::cout << q["you"] << std::endl;								// 1
	std::cout << q["yours"] << std::endl;							// 3

	std::cout << 1 << ": " << q.from_id(1) << std::endl;			// 1: you
	std::cout << 2 << ": " << q.from_id(2) << std::endl;			// 2: your
	std::cout << 3 << ": " << q.from_id(3) << std::endl;			// 3: yours
	std::cout << 4 << ": " << q.from_id(4) << std::endl;			// 4:

	std::cout << "you: " << q.from_string("you") << std::endl;		// you: 1
	std::cout << "yours: " << q.from_string("yours") << std::endl;	// yours: 3
	std::cout << "I: " << q.from_string("I") << std::endl;			// I: 0

	return 0;
}

#endif/*_BUILD_NCL_SAMPLE*/

#endif/*__QUARK_H__*/

