Simple Chained Hash Table

In this lab session we will look at a hash table implementation using the simple chaining technique. Notice the tests check adding up to double the number of items into the table than the capacity. With simple chaining, the performance degradation is not as great, however normal use (where table is at least as big as number of items) is a lot worse. Also notice the amount of memory wasted is much higher.

HashTable

Header file that declares the interface for a hash table implementation using simple chaining.

// ChainedHashTable.h
#pragma once

#include <array>
#include <deque>
#include <functional>
#include <iterator>
#include <map>
#include <memory>
#include <set>
#include <sstream>
#include <vector>


namespace csc280
{
  template <typename T, std::size_t B, typename Hash = std::hash<T>,
    typename Equals = std::equal_to<T>>
  class ChainedHashTable final
  {
    using Deque = std::deque<T>;
    using Array = std::array<Deque, B>;
    using ArrayPtr = std::unique_ptr<Array>;

  public:
#include "private/CHIterator.h"

    using key_type = T;
    using value_type = T;

    using iterator = CHIterator<T, B, Hash, Equals>;
    using const_iterator = CHIterator<T, B, Hash, Equals>;

    ChainedHashTable() : buckets{ std::make_unique<Array>() } {}

    ~ChainedHashTable() = default;

    ChainedHashTable( const ChainedHashTable& ) = delete;
    ChainedHashTable& operator=( const ChainedHashTable& rhs ) = delete;

    auto begin() const -> iterator;
    auto cbegin() const -> const_iterator { return begin();  }

    auto end() const -> iterator;
    auto cend() const -> const_iterator { return end(); }

    auto insert( T data ) -> iterator;

    template <typename... Args>
    iterator emplace( Args&&... args ) { return insert( { args } ); }

    auto erase( const T& data ) -> iterator;

    void clear();

    auto find( const T& data ) const -> iterator;

    bool empty() const { return !count; }
    std::size_t size() const { return count; }
    std::size_t capacity() const { return B; }

    std::string getStats() const { return static_cast<std::string>( stats ); }

  private:
#include "private/CHStats.h"

    uint32_t bucketIndex( const T& data ) const;
    auto find( const T& data, uint32_t index, const Deque& deque ) const -> iterator;

    ArrayPtr buckets;
    Stats<B> stats;
    Hash hash;
    Equals equals;
    std::size_t count = 0;
  };

#include "private/CHTImpl.h"
}

Iterator

The iterator for the simple chained hash table. A bi-directional iterator implementation.

// private/CHIterator.h
template <typename T, std::size_t B, typename Hash, typename Equals>
struct CHIterator
{
  using difference_type = std::size_t;
  using value_type = T;
  using reference = T&;
  using const_reference = const T& ;
  using pointer = T*;
  using const_pointer = const T*;
  using iterator_category = std::bidirectional_iterator_tag;

  CHIterator() = delete;
  CHIterator( const CHIterator& ) = default;

  CHIterator& operator++()
  {
    if ( biter == buckets->cend() ) return *this;

    if ( diter != biter->cend() )
    {
      auto iter = diter;
      if ( ++iter != biter->cend() )
      {
        diter = iter;
        return *this;
      }
    }

    ++biter;
    nextBucket();
    return *this;
  }

  CHIterator& operator--()
  {
    if ( biter == buckets->cbegin() ) return *this;

    if ( diter != biter->cbegin() )
    {
      --diter;
      return *this;
    }

    --biter;
    previousBucket();
    return *this;
  }

  bool operator==( const CHIterator& rhs ) const
  {
    const auto value = buckets == rhs.buckets && biter == rhs.biter;
    return ( biter == buckets->cend() ) ? value : value && diter == rhs.diter;
  }

  bool operator!=( const CHIterator& rhs ) const
  {
    return !operator==( rhs );
  }

  const_reference operator*() const
  {
    return diter.operator*();
  }

  const_pointer operator->() const
  {
    return diter.operator->();
  }

private:
  explicit CHIterator( typename const ArrayPtr::pointer buckets ) :
    buckets{ buckets }, biter{ buckets->cbegin() }, diter{ biter->cbegin() }
  {
    nextBucket();
  }

  explicit CHIterator( typename const ArrayPtr::pointer buckets, const std::size_t index ) :
    buckets{ buckets }, biter{ buckets->cbegin() }
  {
    std::advance( biter, index );
    diter = biter->cbegin();
  }

  void nextBucket()
  {
    if ( biter == buckets->cend() ) return;
    if ( !biter->empty() ) diter = biter->cbegin();

    while ( biter->empty() )
    {
      ++biter;
      if ( biter == buckets->cend() ) break;
      diter = biter->cbegin();
    }
  }

  void previousBucket()
  {
    const auto lambda = [this]()
    {
      diter = biter->cend();
      if ( !biter->empty() ) std::advance( diter, -1 );
    };

    if ( biter == buckets->cbegin() )
    {
      lambda();
      return;
    }

    if ( !biter->empty() ) lambda();

    while ( biter->empty() )
    {
      --biter;
      if ( biter == buckets->cbegin() )
      {
        lambda();
        break;
      }

      lambda();
    }
  }

  typename const ArrayPtr::pointer buckets;
  typename Array::const_iterator biter;
  typename Deque::const_iterator diter;

  friend class ChainedHashTable<T, B, Hash, Equals>;
};

Implementation

Implementation of the hash table using simple chaining. Implementation details pulled into private header file to make the public interface easier to read.

// private/CHTImpl.h
template <typename T, std::size_t B, typename Hash, typename Equals>
auto ChainedHashTable<T, B, Hash, Equals>::begin() const -> iterator
{
  return iterator( buckets.get() );
}

template <typename T, std::size_t B, typename Hash, typename Equals>
auto ChainedHashTable<T, B, Hash, Equals>::end() const -> iterator
{
  auto iter = iterator( buckets.get() );
  iter.biter = buckets->cend();
  return iter;
}

template<typename T, std::size_t B, typename Hash, typename Equals>
auto ChainedHashTable<T, B, Hash, Equals>::insert( T data ) -> iterator
{
  auto index = bucketIndex( data );
  auto& deque = buckets->at( index );
  auto iter = find( data, index, deque );

  if ( iter == cend() )
  {
    deque.push_back( std::move( data ) );
    ++count;
    stats.addOccupied( index );

    if ( deque.size() > 1 ) stats.addCollision( index );

    iter.biter = buckets->cbegin();
    std::advance( iter.biter, index );
    iter.diter = deque.cend();
    std::advance( iter.diter, -1 );
  }

  return iter;
}

template<typename T, std::size_t B, typename Hash, typename Equals>
auto ChainedHashTable<T, B, Hash, Equals>::erase( const T& data ) -> iterator
{
  auto index = bucketIndex( data );
  auto& deque = buckets->at( index );
  auto iter = find( data );

  if ( iter == cend() ) return iter;

  auto ret = iterator( buckets.get(), index );
  ret.diter = deque.erase( iter.diter );
  --count;

  stats.reduceCollision( index );
  if ( deque.empty() ) stats.removeOccupied( index );

  return ret;
}

template <typename T, std::size_t B, typename Hash, typename Equals>
void ChainedHashTable<T, B, Hash, Equals>::clear()
{
  for ( auto iter = buckets->begin(); iter != buckets->end(); ++iter )
  {
    iter->clear();
  }
  stats.clear();
  count = 0;
}

template<typename T, std::size_t B, typename Hash, typename Equals>
auto ChainedHashTable<T, B, Hash, Equals>::find( const T& data ) const -> iterator
{
  auto index = bucketIndex( data );
  auto& deque = buckets->at( index );
  return find( data, index, deque );
}

template <typename T, std::size_t B, typename Hash, typename Equals>
uint32_t ChainedHashTable<T, B, Hash, Equals>::bucketIndex( const T& data ) const
{
  auto hcode = hash( data );
  return hcode % B;
}

template <typename T, std::size_t B, typename Hash, typename Equals>
auto ChainedHashTable<T, B, Hash, Equals>::
find( const T& data, uint32_t index, const Deque& deque ) const -> iterator
{
  for ( auto iter = deque.cbegin(); iter != deque.cend(); ++iter )
  {
    if ( equals( *iter, data ) )
    {
      auto ret = iterator( buckets.get(), index );
      ret.diter = iter;
      return ret;
    }
  }

  return cend();
}

Stats

A simple structure to keep track about some interesting storage characteristics of the hash table. The unit tests will print out the JSON data representation of the statistics for analysis.

// private/CHStats.h
template <int B>
struct Stats
{
  void addCollision( const std::size_t index )
  {
    if ( collisions.find( index ) == collisions.cend() ) collisions[index] = 1;
    else ++collisions[index];
  }

  void reduceCollision( const std::size_t index )
  {
    if ( collisions[index] ) --collisions[index];
  }

  void addOccupied( const std::size_t index )
  {
    if ( occupied.find( index ) == occupied.cend() ) occupied.insert( index );
  }

  void removeOccupied( const std::size_t index )
  {
    occupied.erase( index );
  }

  void clear()
  {
    occupied.clear();
    collisions.clear();
  }

  operator std::string() const
  {
    std::ostringstream ss;

    ss << "{\n" <<
      "  \"bucketCount\": " << B << ",\n" <<
      "  \"occupiedBuckets\": " << occupied.size() << ",\n" <<
      "  \"emptyBuckets\": " << ( B - occupied.size() ) << ",\n" <<
      "  \"usePercentage\": " << percentage() << ",\n" <<
      "  \"bucketsWithCollisions\": " << collisions.size() << ",\n" <<
      "  \"maxCollisions\": " << max() << "\n" <<
      "}\n";

    return ss.str();
  }

private:
  std::size_t max() const
  {
    auto entry = std::max_element( collisions.cbegin(), collisions.cend(),
      []( const std::pair<std::size_t, std::size_t>& lhs,
      const std::pair<std::size_t, std::size_t>& rhs )
    {
      return lhs.second < rhs.second;
    } );
    return entry->second;
  }

  float percentage() const
  {
    return 100.0f * ( occupied.size() / float( B ) );
  }

  std::set<std::size_t> occupied;
  std::map<std::size_t, std::size_t> collisions;
};

Book

A simple data structure to use for some rudimentary tests with the hash table implementation. Same as the structure used in Lab4.

// Book.h
#pragma once

#include <ostream>
#include <string>

namespace csc280
{
  class Book
  {
  public:
    const std::string& getTitle() const noexcept { return title; }
    Book& setTitle( const std::string& value )
    {
      title = value;
      return *this;
    }

    const std::string& getSubtitle() const noexcept { return subtitle; }
    Book& setSubtitle( const std::string& value )
    {
      subtitle = value;
      return *this;
    }

    const std::string& getAuthor() const noexcept { return author; }
    Book& setAuthor( const std::string& value )
    {
      author = value;
      return *this;
    }

    float getPrice() const noexcept { return price; }
    Book& setPrice( float value )
    {
      price = value;
      return *this;
    }

  private:
    std::string title;
    std::string subtitle;
    std::string author;
    float price = 0.0;
  };

  inline bool operator==( const Book& lhs, const Book& rhs )
  {
    return lhs.getAuthor() == rhs.getAuthor() &&
      lhs.getPrice() == rhs.getPrice() &&
      lhs.getTitle() == rhs.getTitle() &&
      lhs.getSubtitle() == rhs.getSubtitle();
  }

  inline std::ostream& operator<<( std::ostream& os, const Book& book )
  {
    os << "Book - title: " << book.getTitle() <<
      "; subtitle: " << book.getSubtitle() <<
      "; author: " << book.getAuthor() <<
      "; price: " << book.getPrice();
    return os;
  }
}

namespace std
{
  template<>
  struct hash<csc280::Book>
  {
    size_t operator()( const csc280::Book& book ) const
    {
      using std::hash;
      return hash<string>()( book.getTitle() ) ^
        ( hash<string>()( book.getSubtitle() ) << 1 ) ^
        ( hash<string>()( book.getAuthor() )  << 1 ) >> 1 ^
        ( hash<float>()( book.getPrice() ) << 1 );
    }
  };
}

namespace csc280
{
  struct BookPointerHash
  {
    std::size_t operator()( const Book* book ) const
    {
      using std::hash;
      return book ? hash<Book>()( *book ) : 0;
    }
  };

  struct BookPointerEquals
  {
    bool operator()( const Book* lhs, const Book* rhs ) const
    {
      if ( lhs && rhs ) return *lhs == *rhs;
      if ( !lhs && !rhs ) return true;
      return false;
    }
  };
}

Test1

Simple test suite using the user defined Book structure. Same test as in Lab4 but using the simple chained hash table implementation instead of std::unordered_set.

// ChainedSet.cpp
#include "catch.hpp"
#include "../main/Book.h"
#include "../main/ChainedHashTable.h"

SCENARIO( "Hashing same book in stages using ChainedHashTable" )
{
  GIVEN( "An unordered_hashTable of book instances" )
  {
    using HashTable = csc280::ChainedHashTable<csc280::Book, 10>;
    HashTable hashTable;

    THEN( "Changing book properties makes book invisible" )
    {
      csc280::Book book;
      hashTable.insert( book );
      REQUIRE( 1 == hashTable.size() );

      book.setTitle( "Something" );
      REQUIRE( hashTable.find( book ) == hashTable.cend() );

      hashTable.clear();
      REQUIRE( hashTable.empty() );

      hashTable.insert( book );
      REQUIRE( 1 == hashTable.size() );
      book.setTitle( "Something" );
      REQUIRE( hashTable.find( book ) != hashTable.cend() );

      hashTable.clear();
      hashTable.insert( book );
      book.setSubtitle( "Else" );
      REQUIRE( hashTable.find( book ) == hashTable.cend() );
    }

    AND_THEN( "Adding same book with different information creates copies" )
    {
      csc280::Book book;
      book.setTitle( "Something" );
      hashTable.insert( book );
      REQUIRE( 1 == hashTable.size() );
      REQUIRE_FALSE( hashTable.find( book ) == hashTable.cend() );

      hashTable.insert( book );
      REQUIRE( 1 == hashTable.size() );

      book.setSubtitle( "Else" );
      hashTable.insert( book );
      REQUIRE( 2 == hashTable.size() );
    }
  }

  GIVEN( "An unordered_hashTable of book pointers" )
  {
    using HashTable = csc280::ChainedHashTable<csc280::Book*, 10>;
    HashTable hashTable;

    THEN( "Changing book properties does not affect visibility" )
    {
      csc280::Book book;
      hashTable.insert( &book );
      REQUIRE( 1 == hashTable.size() );

      book.setTitle( "Something" );
      REQUIRE_FALSE( hashTable.find( &book ) == hashTable.cend() );
    }

    AND_THEN( "Copy of book is not visisble" )
    {
      csc280::Book book;
      book.setTitle( "Something" );
      hashTable.insert( &book );
      REQUIRE( 1 == hashTable.size() );

      csc280::Book other = book;
      REQUIRE( hashTable.find( &other ) == hashTable.cend() );
    }
  }

  GIVEN( "An unordered_hashTable of book pointers with custom hash function" )
  {
    using HashTable = csc280::ChainedHashTable<csc280::Book*, 10, csc280::BookPointerHash, csc280::BookPointerEquals>;
    HashTable hashTable;

    THEN( "Changing book properties affects visibility" )
    {
      csc280::Book book;
      hashTable.insert( &book );
      REQUIRE( 1 == hashTable.size() );

      book.setTitle( "Something" );
      REQUIRE( hashTable.find( &book ) == hashTable.cend() );
    }

    AND_THEN( "Copy of book is visisble" )
    {
      csc280::Book book;
      book.setTitle( "Something" );
      hashTable.insert( &book );
      REQUIRE( 1 == hashTable.size() );

      csc280::Book other = book;
      REQUIRE_FALSE( hashTable.find( &other ) == hashTable.cend() );
    }
  }
}

Test2

Unit test suite which exercises most features of the hash table implementation. Also contains tests with varying sizes of tables and their performance metrics.

// ChainedHashTable.cpp
#include "catch.hpp"
#include "functions.h"
#include "../main/ChainedHashTable.h"

#include <algorithm>
#include <chrono>
#include <cstdlib>
#include <ctime>
#include <iostream>

SCENARIO( "Chained hash table operations with integers" )
{
  GIVEN( "A chained hash table of integers" )
  {
    constexpr std::size_t size = 100;
    csc280::ChainedHashTable<uint32_t, size> hash;
    REQUIRE( size == hash.capacity() );

    WHEN( "Adding same number of random numbers as table size" )
    {
      std::srand( static_cast<std::size_t>( std::time( nullptr ) ) );

      for ( std::size_t i = 0; i < size; ++i )
      {
        auto value = std::rand();
        while ( hash.find( value ) != hash.cend() ) value = std::rand();
        hash.insert( value );
      }

      THEN( "Iterating all stored numbers" )
      {
        auto counter = 0;
        for ( auto iter = hash.cbegin(); iter != hash.cend(); ++iter )
        {
          auto value = *iter;
          ++counter;
        }

        REQUIRE( counter == size );
      }

      AND_THEN( "Can iterate in reverse from middle" )
      {
        auto iter = hash.cbegin();
        std::advance( iter, 10 );
        for ( std::size_t i = 0; i < 10; ++i )
        {
          --iter;
          auto value = *iter;
        }
      }

      AND_THEN( "Removing elements one-by-one" )
      {
        auto count = hash.size();
        for ( std::size_t i = 0; i < size; ++i )
        {
          const auto iter = hash.cbegin();
          auto value = *iter;
          hash.erase( value );
          REQUIRE( hash.size() == --count );
          REQUIRE( hash.find( value ) == hash.cend() );
        }

        REQUIRE( hash.empty() );
      }
    }
  }

  csc280::test::ofs.flush();
}

SCENARIO( "Generating statistics for chained hash tables with varying capacity" )
{
  using std::chrono::duration_cast;
  using std::chrono::high_resolution_clock;
  using std::chrono::microseconds;

  GIVEN( "A hash table with capacity 100" )
  {
    constexpr std::size_t size = 100;
    csc280::ChainedHashTable<uint32_t, size> hash;

    WHEN( "Adding same number of random numbers as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      std::srand( static_cast<std::size_t>( std::time( nullptr ) ) );

      for ( std::size_t i = 0; i < size; ++i )
      {
        auto value = std::rand();
        while ( hash.find( value ) != hash.cend() ) value = std::rand();
        hash.insert( value );
      }

      csc280::test::ofs << "Chained hash table stats for capacity 100\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add " << size << " numbers to chained hash table: " <<
        duration << " microseconds\n";
    }

    AND_WHEN( "Adding double the number of random numbers as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      std::srand( static_cast<std::size_t>( std::time( nullptr ) ) );

      for ( std::size_t i = 0; i < size * 2; ++i )
      {
        auto value = std::rand();
        while ( hash.find( value ) != hash.cend() ) value = std::rand();
        hash.insert( value );
      }

      csc280::test::ofs << "Chained hash table stats for capacity 100 with double the number of entries\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add 2x" << size << " numbers to chained hash table: " <<
        duration << " microseconds\n";
    }
  }

  GIVEN( "A hash table with capacity 500" )
  {
    constexpr std::size_t size = 500;
    csc280::ChainedHashTable<uint32_t, size> hash;

    WHEN( "Adding same number of random numbers as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      std::srand( static_cast<std::size_t>( std::time( nullptr ) ) );

      for ( std::size_t i = 0; i < size; ++i )
      {
        auto value = std::rand();
        while ( hash.find( value ) != hash.cend() ) value = std::rand();
        hash.insert( value );
      }

      csc280::test::ofs << "Chained hash table stats for capacity 500\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add " << size << " numbers to chained hash table: " <<
        duration << " microseconds\n";
    }

    AND_WHEN( "Adding double the number of random numbers as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      std::srand( static_cast<std::size_t>( std::time( nullptr ) ) );

      for ( std::size_t i = 0; i < size * 2; ++i )
      {
        auto value = std::rand();
        while ( hash.find( value ) != hash.cend() ) value = std::rand();
        hash.insert( value );
      }

      csc280::test::ofs << "Chained hash table stats for capacity 500 with double the number of entries\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add 2x" << size << " numbers to chained hash table: " <<
        duration << " microseconds\n";
    }
  }

  GIVEN( "A hash table with capacity 1000" )
  {
    constexpr std::size_t size = 1000;
    csc280::ChainedHashTable<uint32_t, size> hash;

    WHEN( "Adding same number of random numbers as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      std::srand( static_cast<std::size_t>( std::time( nullptr ) ) );

      for ( std::size_t i = 0; i < size; ++i )
      {
        auto value = std::rand();
        while ( hash.find( value ) != hash.cend() ) value = std::rand();
        hash.insert( value );
      }

      csc280::test::ofs << "Chained hash table stats for capacity 1000\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add " << size << " numbers to chained hash table: " <<
        duration << " microseconds\n";
    }

    AND_WHEN( "Adding double the number of random numbers as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      std::srand( static_cast<std::size_t>( std::time( nullptr ) ) );

      for ( std::size_t i = 0; i < size * 2; ++i )
      {
        auto value = std::rand();
        while ( hash.find( value ) != hash.cend() ) value = std::rand();
        hash.insert( value );
      }

      csc280::test::ofs << "Chained hash table stats for capacity 1000 with double the number of entries\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add 2x" << size << " numbers to chained hash table: " <<
        duration << " microseconds\n";
    }
  }

  GIVEN( "A hash table with capacity 5000" )
  {
    constexpr std::size_t size = 5000;
    csc280::ChainedHashTable<uint32_t, size> hash;

    WHEN( "Adding same number of random numbers as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      std::srand( static_cast<std::size_t>( std::time( nullptr ) ) );

      for ( std::size_t i = 0; i < size; ++i )
      {
        auto value = std::rand();
        while ( hash.find( value ) != hash.cend() ) value = std::rand();
        hash.insert( value );
      }

      csc280::test::ofs << "Chained hash table stats for capacity 5000\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add " << size << " numbers to chained hash table: " <<
        duration << " microseconds\n";
    }

    AND_WHEN( "Adding double the number of random numbers as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      std::srand( static_cast<std::size_t>( std::time( nullptr ) ) );

      for ( std::size_t i = 0; i < size * 2; ++i )
      {
        auto value = std::rand();
        while ( hash.find( value ) != hash.cend() ) value = std::rand();
        hash.insert( value );
      }

      csc280::test::ofs << "Chained hash table stats for capacity 5000 with double the number of entries\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add 2x" << size << " numbers to chained hash table: " <<
        duration << " microseconds\n";
    }
  }

  GIVEN( "A hash table with capacity 10000" )
  {
    constexpr std::size_t size = 10000;
    csc280::ChainedHashTable<uint32_t, size> hash;

    WHEN( "Adding same number of random numbers as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      std::srand( static_cast<std::size_t>( std::time( nullptr ) ) );

      for ( std::size_t i = 0; i < size; ++i )
      {
        auto value = std::rand();
        while ( hash.find( value ) != hash.cend() ) value = std::rand();
        hash.insert( value );
      }

      csc280::test::ofs << "Chained hash table stats for capacity 10000\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add " << size << " numbers to chained hash table: " <<
        duration << " microseconds\n";
    }

    AND_WHEN( "Adding double the number of random numbers as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      std::srand( static_cast<std::size_t>( std::time( nullptr ) ) );

      for ( std::size_t i = 0; i < size * 2; ++i )
      {
        auto value = std::rand();
        while ( hash.find( value ) != hash.cend() ) value = std::rand();
        hash.insert( value );
      }

      csc280::test::ofs << "Chained hash table stats for capacity 10000 with double the number of entries\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add 2x" << size << " numbers to chained hash table: " <<
        duration << " microseconds\n";
    }
  }

  GIVEN( "A hash table of strings with capacity 10000" )
  {
    constexpr std::size_t size = 10000;
    csc280::ChainedHashTable<std::string, size> hash;

    WHEN( "Adding same number of random strings as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      for ( std::size_t i = 0; i < size; ++i )
      {
        auto value = csc280::test::generateRandomString();
        while ( hash.find( value ) != hash.cend() ) value = csc280::test::generateRandomString();
        hash.insert( std::move( value ) );
      }

      csc280::test::ofs << "Chained hash table of strings stats for capacity 10000\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add " << size << " strings to chained hash table: " <<
        duration << " microseconds\n";
    }

    AND_WHEN( "Adding double the number of random strings as table size" )
    {
      const auto t1 = high_resolution_clock::now();
      for ( std::size_t i = 0; i < size * 2; ++i )
      {
        auto value = csc280::test::generateRandomString();
        while ( hash.find( value ) != hash.cend() ) value = csc280::test::generateRandomString();
        hash.insert( std::move( value ) );
      }

      csc280::test::ofs << "Chained hash table of strings stats for capacity 10000 with double the number of entries\n" << hash.getStats();

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to add 2x" << size << " strings to chained hash table: " <<
        duration << " microseconds\n";
    }

    AND_WHEN( "Finding each string added to table of capacity 10000" )
    {
      std::vector<std::string> values;
      values.reserve( size );

      for ( std::size_t i = 0; i < size; ++i )
      {
        auto value = csc280::test::generateRandomString();
        while ( hash.find( value ) != hash.cend() ) value = csc280::test::generateRandomString();
        values.push_back( value );
        hash.insert( std::move( value ) );
      }

      const auto t1 = high_resolution_clock::now();
      const auto iter = hash.cend();

      for ( const auto& value : values )
      {
        REQUIRE( hash.find( value ) != iter );
      }

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to find " << size << " strings in chained hash table: " <<
        duration << " microseconds\n";
    }

    AND_WHEN( "Finding each string added to table of double capacity" )
    {
      std::vector<std::string> values;
      values.reserve( size / 2 );

      for ( std::size_t i = 0; i < size / 2; ++i )
      {
        auto value = csc280::test::generateRandomString();
        while ( hash.find( value ) != hash.cend() ) value = csc280::test::generateRandomString();
        values.push_back( value );
        hash.insert( std::move( value ) );
      }

      const auto t1 = high_resolution_clock::now();
      const auto iter = hash.cend();

      for ( const auto& value : values )
      {
        REQUIRE( hash.find( value ) != iter );
      }

      const auto t2 = high_resolution_clock::now();
      const auto duration = duration_cast<microseconds>( t2 - t1 ).count();
      std::cout << "Time to find " << size / 2 << 
        " strings in chained hash table of capacity " << size
        << ": " << duration << " microseconds\n";
    }
  }

  csc280::test::ofs.flush();
}

Sans Pareil Technologies, Inc.

Key To Your Business

Simple Chained Hash Table