首页 > 其他分享> 文章详细

unordered_map和unordered_set的模拟实现

2021-05-28 14:32:03 阅读：147 来源： 互联网

标签：map set return cur idx ht key unordered size

定义

哈希表（Hash table，也叫散列表），是根据关键字值（key，value）直接进行访问的数据结构。也就是说，它通过把关键字映射到表中一个位置来访问的纪录，以加快查找的速度。这个映射函数叫做散列函数，存放纪录的数组叫散列表。

基本原理

使用一个下标范围比较大的数组来存储元素。可以设计一个函数（哈希函数，也叫做散列函数），使得每个元素的关键字都与一个函数值（即数组下标，hash值）相对应，于是用这个数组单元来存储这个元素。

哈希冲突

通过哈希函数产生的哈希值是有限的，而数据可能比较多，导致经过哈希函数处理后仍然有不同的数据对应相同的值。这时候就产生了哈希冲突。

冲突解决

闭散列

开放定址法，当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有空位置，那么可以把key存放到冲突位置中的下一个空位置中去。

线性探测法：

当要插入一个元素时，连续地检查散列表的个各项，直到找到一个空位置来放置这个元素为止。
当查找一个元素时，要检查所有的表项，直到找到所需的元素，或元素不在表中。
当我们从位置中删除关键字时，不能将此位置元素置空。否则会导致在无法判断此位置是否有元素。应该用个特殊值表示该元素已经删除。

#include<vector>
#include<iostream>
using namespace std;

enum STATE
{
	EXIST,
	DELETE,
	EMPTY
	
};

template<class K,class V>
struct HashNode
{
	pair<K,V> _kv;
	STATE _state=EMPTY;
};

template<class K, class V>
struct HashTable
{
public:
	typedef HashNode<K,V> Node;
	HashTable(size_t n = 10)
		:_hTable(n)
		, _size(0)
	{}
	void Swap(HashTable<K, V>& newhash)
	{
		swap(newhash._hTable, _hTable);
	}
	void CheckCapacity()
	{
		if (_size * 10 / _hTable.size() >= 7)
		{
			int newC = _hTable.size() == 0 ? 10 : 2 * _hTable.size();
			HashTable<K, V> newhash(newC);
			for (size_t i = 0; i < _hTable.size(); i++)
			{
				if (_hTable[i]._state == EXIST)
				{
					newhash.insert(_hTable[i]._kv);
				}
			}
			Swap(newhash);
		}
	}
	bool insert(const pair<K, V>& kv)
	{
		CheckCapacity();
		int idx = kv.first % _hTable.size();
		while (_hTable[idx]._state != EMPTY)
		{
			if (_hTable[idx]._state == EXIST&&kv.first == _hTable[idx]._kv.first)
			{
				return false;
			}
			++idx;
			if (idx == _hTable.size())
			{
				idx = 0;
			}
		}
		_hTable[idx]._kv= kv;
		_hTable[idx]._state = EXIST;
		++_size;
		return true;

	}
	Node* find(const K& key)
	{

		int idx = key % _hTable.size();
		while (_hTable[idx]._state != EMPTY)
		{
			if (_hTable[idx]._state == EXIST&&key == _hTable[idx]._kv.first)
			{
				return &_hTable[idx];
			}
			++idx;
			if (idx == _hTable.size())
			{
				idx = 0;
			}
		}
		return nullptr;
	}
	bool erase(const K& key)
	{
		Node* node = find(key);
		if (node)
		{
			node->_state = DELETE;
			_size--;
			return true;
		}
		return false;
	}
private:
	vector<Node> _hTable;
	size_t _size;
};

void test()
{
	HashTable<int,int> ht;
	ht.insert(make_pair(1, 1));
	ht.insert(make_pair(2, 2));
	ht.insert(make_pair(3, 3));
	ht.insert(make_pair(4, 4));
	ht.insert(make_pair(5, 5));
	ht.insert(make_pair(6, 6));
	ht.insert(make_pair(7, 7));
	ht.insert(make_pair(8, 8));
	ht.insert(make_pair(9, 9));
	ht.insert(make_pair(10, 10));
	ht.insert(make_pair(11, 11));
	ht.insert(make_pair(12, 12));
	ht.find(12);
	ht.erase(8);
}
int main()
{
	test();
	return 0;
}

开散列

链地址法，首先对关键码集合用散列函数计算散列地址，具有相同地址的关键码归于同一子集合，每一个子集合称为一个桶，各个桶中的元素通过一个单链表链接起来，各链表的头结点存储在哈希表中。

哈希表的改造

哈希表的结点定义是只需要一个模板用来表示数据类型，因为是 K 还是 pair<K,V>后面的仿函数会进行判断。
不同容器V的类型不同，如果是unordered_map，V代表一个键值对，如果是unordered_set，V 为 K。
KeyOfValue: 因为V的类型不同，通过value取key的方式就不同。

哈希函数使用除留余数法，取到的是整数，如果传入的是字符串类型，需要通过仿函数转换为整形数字才能取模。

#include <iostream>
#include<vector>
using namespace std;

template<class K>
struct HashFun
{
	size_t operator()(const K& key)
	{
		return key;
	}
};
template <>
struct HashFun<string>
{
	size_t operator()(const string& s)
	{
		size_t hash = 0;
		for (const auto& ch:s)
		{
			hash = hash * 131 + ch;
		}
		return hash;
	}
};

template<class V>
struct HashNode
{
	V _val;
	HashNode<V>* _next;
	HashNode(const V& val)
		:_val(val)
		, _next(nullptr)
	{}
};
template <class K, class V, class KeyOfValue, class HashFun = HashFun<K>>
class HTable;
template <class K, class V, class KeyOfValue, class HashFun = HashFun<K>>
struct HashIterator
{
	typedef HashIterator<K, V, KeyOfValue, HashFun> Self;
	typedef HTable<K, V, KeyOfValue, HashFun> HT;
	typedef HashNode<V> Node;
	HT* _hptr;
	Node* _node;
	HashIterator(Node* node, HT* hptr)
		:_node(node)
		, _hptr(hptr)
	{}
	V* operator*()
	{
		return _node->_val;
	}
	V* operator->()
	{
		return &_node->_val;
	}
	bool operator!=(const Self& it)
	{
		return it._node != _node;
	}
	Self& operator++()
	{
		if (_node->_next)
		{
			_node = _node->_next;
		}
		else
		{
			KeyOfValue kov;
			size_t idx = kov(_node->_val) % _hptr->_ht.size();
			idx++;
			for (; idx < _hptr->_ht.size(); ++idx)
			{
				if (_hptr->_ht[idx])
				{
					_node = _hptr->_ht[idx];
					break;
				}
			}
			if (idx == _hptr->_ht.size())
			{
				_node = nullptr;
			}
		}
		return *this;
	}
		 
};
template <class K, class V, class KeyOfValue, class HashFun>
class HTable
{
public:
	template <class K, class V, class KeyOfValue, class HashFun = HashFun<K>>
	friend struct HashIterator;
	typedef HashIterator<K, V, KeyOfValue, HashFun> iterator;
	typedef HashNode<V> Node;
	HTable(int n =10)
		:_ht(n)
		, _size(0)
	{}
	iterator begin()
	{
		for (size_t i = 0; i < _ht.size(); i++)
		{
			if (_ht[i])
			{
				return iterator(_ht[i], this);
			}
		}
		return iterator(nullptr, this);
	}
	iterator end()
	{
		return iterator(nullptr , this);
	}
	iterator find(const K& key)
	//Node* find(const K& key)
	{
		KeyOfValue kov;
		HashFun hfun;
		for (size_t i = 0; i < _ht.size(); i++)
		{
			Node* cur = _ht[i];
			while (cur)
			{
				if (hfun(kov(cur->_val)) == key)
				{
					//return cur;
					return iterator(cur, this);
				}
				cur = cur->_next;
			}
		}
		//return nullptr;
		return iterator(nullptr, this);
	}
	iterator erase(const K& key)
	//bool erase(const K& key)
	{
		KeyOfValue kov;
		HashFun hfun;
		int idx = key%_ht.size();
		Node* cur = _ht[idx];
		Node* prev = nullptr;
		if (hfun(kov(cur->_val)) == key)
		{
			Node* next = cur->_next;
			_ht[idx] = cur->_next;
			delete cur;
			cur = nullptr;
			_size--;
			return iterator(next, this);
		}
		while (cur)
		{
			Node* next = cur->_next;
			prev = cur;
			if (hfun(kov(cur->_val)) == key)
			{
				prev->_next = cur->_next;
				delete cur;
				_size--;
				return iterator(next, this);
			}
			cur = cur->_next;
		}
		return iterator(nullptr, this);
	}

	void CheckCapacity()
	{
		if (_size == _ht.size())
		{
			int newC = _size == 0 ? 10 : 2 * _size;
			vector<Node*> newht(newC);
			KeyOfValue kov;
			for (size_t i = 0; i < _ht.size(); i++)
			{
				Node* cur = _ht[i];
				while (cur)
				{
					Node* next = cur->_next;
					int idx = kov(cur->_val)%newht.size();
					cur->_next = newht[idx];
					newht[idx] = cur;
					cur = next;
				}
				_ht[i] = nullptr;
			}
			swap(_ht, newht);
		}
	}
	pair<iterator, bool> insert(const V& val)
	//bool insert(const V& val)
	{
		CheckCapacity();
		KeyOfValue kov;
		HashFun hfun;
		int idx =hfun(kov(val)) % _ht.size();
		Node* cur = _ht[idx];
		while (cur)
		{
			if (kov(cur->_val) == kov(val))
			{
				//return false;
				return make_pair(iterator(cur,this),false);
			}
			cur = cur->_next;
		}
		cur = new Node(val);
		cur->_next = _ht[idx];
		_ht[idx] = cur;
		++_size;
		//return ture;
		return make_pair(iterator(cur, this), true);
	}
private:
	vector<Node*> _ht;
	size_t _size;
};

template<class K,class V>
class Map
{
	struct MapKeyOfValue
	{
		const K& operator()(const pair<K, V>& val)
		{
			return val.first;
		}
	};
public:
	typedef typename HTable<K, pair<K, V>, MapKeyOfValue, HashFun<K>>::iterator iterator;
	pair<iterator, bool> insert(const pair<K, V>& val)
	{
		return _ht.insert(val);
	}
	iterator begin()
	{
		return _ht.begin();
	}
	iterator end()
	{
		return _ht.end();
	}
	V& operator[](const K& key)
	{
		pair<iterator, bool> ret = _ht.insert(make_pair(key, V()));
		return ret.first->second;
	}
	iterator find(const K& key)
	{
		return _ht.find(key);
	}
	iterator erase(const K& key)
	{
		return _ht.erase(key);
	}
private:
	HTable<K, pair<K, V>, MapKeyOfValue> _ht;
};

template <class K>
class Set
{
	struct SetKeyOfValue
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:
	typedef typename HTable<K, K, SetKeyOfValue, HashFun<K>>::iterator iterator;
	iterator begin()
	{
		return _ht.begin();
	}
	iterator end()
	{
		return _ht.end();
	}
	pair<iterator,bool> insert(const K& key)
	{
		return _ht.insert(key);
	}
	iterator find(const K& key)
	{
		return _ht.find(key);
	}
	iterator erase(const K& key)
	{
		return _ht.erase(key);
	}
private:
	HTable<K, K, SetKeyOfValue> _ht;
};


void test()
{
	Map<int, int> m;
	m.insert(make_pair(1, 1));
	m[2] = 2;
	m[3] = 3;
	m[4] = 4;
	m[5] = 5;
	m[6] = 6;
	m[7] = 7;
	m[8] = 8;
	m.find(7);
	Map<int, int>::iterator itm = m.find(7);
	m.erase(7);
	m[9] = 9;
	Set<int> s;
	s.insert(5);
	s.insert(2);
	s.insert(3);
	s.insert(6);
	s.insert(5);
	s.insert(1);
	s.insert(7);
	s.insert(8);
	Set<int>::iterator its=s.find(7);
	s.erase(7);

}
int main()
{
	test();
	return 0;

}

标签：map,set,return,cur,idx,ht,key,unordered,size
来源： https://blog.csdn.net/qq_34270874/article/details/117361000

本站声明： 1. iCode9 技术分享网（下文简称本站）提供的所有内容，仅供技术学习、探讨和分享；
2. 关于本站的所有留言、评论、转载及引用，纯属内容发起人的个人观点，与本站观点和立场无关；
3. 关于本站的所有言论和文字，纯属内容发起人的个人观点，与本站观点和立场无关；
4. 本站文章均是网友提供，不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属；如您发现该文章侵犯了您的权益，可联系我们第一时间进行删除；
5. 本站为非盈利性的个人网站，所有内容不会用来进行牟利，也不会利用任何形式的广告来间接获益，纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

ICode9

unordered_map和unordered_set的模拟实现

定义

基本原理

哈希冲突

冲突解决

闭散列

开散列

哈希表的改造