Skip to content
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
08d0052
changes to enable better allocator support. this has been tested with…
kstppd Sep 6, 2024
22133a4
Merge branch 'master' of github.com:kstppd/hashinator into better_all…
kstppd Sep 6, 2024
6a5d4bc
use std allocator to demonstrate last commit's functionallity
kstppd Sep 6, 2024
ad05504
remove split host allocator since there is no need for it anymore
kstppd Sep 6, 2024
f2be8b3
fix hashinator ctpr
kstppd Sep 7, 2024
1ababbc
Add some more ctors for splitvectors and a host unit test with umpire
kstppd Sep 7, 2024
754aae0
more umpire tests
kstppd Sep 16, 2024
a53b4e9
update allocators in splittools
kstppd Sep 16, 2024
5b1e7e6
unit tests update
kstppd Sep 16, 2024
c2a0e96
Meson update to only build and test Umpire unit tests if Umpire is in…
kstppd Sep 28, 2024
3e09873
Implement some first changes at 38000ft
kstppd Oct 6, 2024
9cb9486
And update README
kstppd Oct 6, 2024
ff4d157
do not use offset of in non POD stuff
kstppd Dec 7, 2024
27789a6
Use optional to fix compiler warning us for no return in function
kstppd Dec 8, 2024
714d57d
Update README.md
kstppd Dec 8, 2024
12c58fa
Add memtest unit
kstppd Feb 9, 2025
8170005
Change splitvector's swap so that is it does not check for swapping w…
kstppd Feb 10, 2025
06e7013
finally an update
kstppd Apr 4, 2025
12f1a67
comment added
kstppd Apr 6, 2025
85a61a7
revert back to previous behavior for device buckets
kstppd Apr 6, 2025
3130b6b
some fixes and less restrictiv split vector contructors
kstppd Apr 8, 2025
60d65cc
some fixes and less restrictiv split vector contructors
kstppd Apr 8, 2025
3e69d91
fix
kstppd Apr 8, 2025
a2409ed
Revert "fix"
kstppd Apr 8, 2025
a108a65
tRevert "some fixes and less restrictiv split vector contructors"
kstppd Apr 8, 2025
aa9922b
Revert "some fixes and less restrictiv split vector contructors"
kstppd Apr 8, 2025
2361d04
fix
kstppd Apr 8, 2025
93653e9
relax is_trivial
kstppd Apr 8, 2025
2cdd964
Revert "Use optional to fix compiler warning us for no return in func…
kstppd Apr 12, 2025
b13a279
remove duplicate include
kstppd Apr 12, 2025
65d3dfa
comment update
kstppd Apr 12, 2025
1448978
Merge remote-tracking branch 'fmihpc/master' into better_allocator_su…
kstppd Apr 12, 2025
1a24b37
avoid a small git wreck
kstppd Apr 12, 2025
4290365
memcpy fix
kstppd Apr 12, 2025
fd4c004
use library in meson for umpire detection
kstppd Jun 6, 2025
c6d374c
update tests for Umpire compatibillity
kstppd Jun 10, 2025
6d63fcf
off with device buckets and the offset of thingy
kstppd Jun 11, 2025
60b87c6
test coarse mem
kstppd Jun 13, 2025
218dcc7
update bench script
kstppd Jun 13, 2025
b25c150
update bench script and allow coarse grain memory when running on AMD hw
kstppd Jun 13, 2025
a3d3d7f
Oops missing main type
kstppd Aug 14, 2025
dfd59c5
Oops missing main type
kstppd Aug 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,45 @@ int main()
hmap.insert(src.data(),src.size());
}
```
### Hashinator and SplitVector can also be used with external allocators

```c++
#include "splitvec.h"
//main.cu
using vector = split::SplitVector<int,std::allocator<int>>;


int main()
{
vector* vec = new vector{1,2,3,4,5};
vec->reserve(128);
std::cout<<vec[3]<<std::endl;
delete vec;
}
```

```c++
//main.cpp
#include "hashinator.h"

int main()
{

   Hashmap<uint32_t,uint32_t,std::allocator<uint32_t>> hmap;

   //Write
   for (uint32_t i=0 ; i<64; ++i){
      hmap[i]=rand()%10000;
   }

   //Read
   for (const auto& i:hmap){
      std::cout<<"["<<i.first<<" "<<i.second<<"] ";
   }
   std::cout<<std::endl;
}
```

`nvcc main.cu -std=c++17 --expt-relaxed-constexpr --expt-extended-lambda -gencode arch=compute_80,code=sm_80 -o example`

You can have a look in the Doxygen for a more feature-rich explanation of the methods and tools included!
Expand Down
1 change: 1 addition & 0 deletions include/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ inline bool isDeviceAccessible(void* ptr){
}
return true;
#endif
return false;
}

/**
Expand Down
74 changes: 37 additions & 37 deletions include/hashinator/hashinator.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ using DefaultAllocator = split::split_unified_allocator<T>;
defaults::elementsPerWarp>
#else
template <typename T>
using DefaultAllocator = split::split_host_allocator<T>;
using DefaultAllocator = std::allocator<T>;
#define DefaultHasher void
#endif

Expand Down Expand Up @@ -116,7 +116,7 @@ class Hashmap {
inline void set_status(status code) noexcept { _mapInfo->err = code; }

public:
//By default we allocate enough space for 1<<5 elements
//By default we allocate enough space for 1<<5 (32) elements
Hashmap():_allocator(Allocator{}) {
preallocate_device_handles();
_mapInfo = reinterpret_cast<MapInfo*>(_allocator.allocate(get_number_of_Ts_for_Map_Info()));
Expand Down Expand Up @@ -323,9 +323,12 @@ class Hashmap {

size_t priorFill = _mapInfo->fill;
// Extract all valid elements
hash_pair<KEY_TYPE, VAL_TYPE>* validElements;
SPLIT_CHECK_ERR(split_gpuMallocAsync((void**)&validElements,
(_mapInfo->fill + 1) * sizeof(hash_pair<KEY_TYPE, VAL_TYPE>), s));
hash_pair<KEY_TYPE, VAL_TYPE>* validElements=nullptr;
const std::size_t alloc_size=_mapInfo->fill+1;
validElements=_allocator.allocate(alloc_size);
if (!validElements){
throw std::runtime_error("ERORR: Failed to allocate temporary buffer in device rehash");
}
if constexpr (prefetches) {
optimizeGPU(s);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
Expand All @@ -347,7 +350,7 @@ class Hashmap {
if (newSizePower == _mapInfo->sizePower && nValidElements == 0) {
clear<prefetches>(targets::device, s, 1 << newSizePower);
set_status((priorFill == _mapInfo->fill) ? status::success : status::fail);
split_gpuFreeAsync(validElements, s);
_allocator.deallocate(validElements,alloc_size);
return;
}
if (newSizePower == _mapInfo->sizePower) {
Expand All @@ -365,7 +368,7 @@ class Hashmap {
// Insert valid elements to now larger buckets
insert(validElements, nValidElements, 1, s);
set_status((priorFill == _mapInfo->fill) ? status::success : status::fail);
split_gpuFreeAsync(validElements, s);
_allocator.deallocate(validElements,alloc_size);
Comment thread
markusbattarbee marked this conversation as resolved.
return;
}
#endif
Expand Down Expand Up @@ -1336,10 +1339,12 @@ class Hashmap {
// Allocate memory for overflown elements. So far this is the same size as our buckets but we can be better than
// this

hash_pair<KEY_TYPE, VAL_TYPE>* overflownElements;
SPLIT_CHECK_ERR(split_gpuMallocAsync((void**)&overflownElements,
(1 << _mapInfo->sizePower) * sizeof(hash_pair<KEY_TYPE, VAL_TYPE>), s));

hash_pair<KEY_TYPE, VAL_TYPE>* overflownElements=nullptr;
const std::size_t alloc_size=_mapInfo->fill+1;
overflownElements=_allocator.allocate(alloc_size);
if (!overflownElements){
throw std::runtime_error("ERORR: Failed to allocate temporary buffer in tombstone cleaning");
}
if constexpr (prefetches) {
optimizeGPU(s);
}
Expand Down Expand Up @@ -1367,7 +1372,7 @@ class Hashmap {
_mapInfo->currentMaxBucketOverflow = defaults::BUCKET_OVERFLOW;

if (nOverflownElements == 0) {
SPLIT_CHECK_ERR(split_gpuFreeAsync(overflownElements, s));
_allocator.deallocate(overflownElements,alloc_size);
return;
}
// If we do have overflown elements we put them back in the buckets
Expand All @@ -1376,7 +1381,7 @@ class Hashmap {

DeviceHasher::insert(overflownElements, buckets.data(), _mapInfo, nOverflownElements, s);

SPLIT_CHECK_ERR(split_gpuFreeAsync(overflownElements, s));
_allocator.deallocate(overflownElements,alloc_size);
return;
}

Expand Down Expand Up @@ -1484,9 +1489,9 @@ class Hashmap {
if constexpr (prefetches) {
optimizeGPU(stream);
}
// device_buckets = (split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>*)((char*)device_map + offsetof(Hashmap,
// buckets)); SPLIT_CHECK_ERR(split_gpuMemcpyAsync(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice,
// stream));
device_buckets = (split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>, Allocator>*)((char*)device_map +
offsetof(Hashmap, buckets));
SPLIT_CHECK_ERR(split_gpuMemcpyAsync(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice, stream));
return device_map;
}

Expand Down Expand Up @@ -1813,27 +1818,22 @@ class Hashmap {
}

HASHINATOR_DEVICEONLY
const VAL_TYPE& read_element(const KEY_TYPE& key) const {
int bitMask = (1 << _mapInfo->sizePower) - 1; // For efficient modulo of the array size
auto hashIndex = hash(key);

// Try to find the matching bucket.
const size_t bsize = buckets.size();
for (size_t i = 0; i < bsize; i++) {
uint32_t vecindex = (hashIndex + i) & bitMask;
const hash_pair<KEY_TYPE, VAL_TYPE>& candidate = buckets[vecindex];
if (candidate.first == key) {
// Found a match, return that
return candidate.second;
}
if (candidate.first == EMPTYBUCKET) {
// Found an empty bucket, so error.
assert(false && "Key does not exist");
}
}
assert(false && "Key does not exist");
}

std::optional<std::reference_wrapper<const VAL_TYPE>> read_element(const KEY_TYPE& key) const {
int bitMask = (1 << _mapInfo->sizePower) - 1;
auto hashIndex = hash(key);
const size_t bsize = buckets.size();
for (size_t i = 0; i < bsize; i++) {
uint32_t vecindex = (hashIndex + i) & bitMask;
const hash_pair<KEY_TYPE, VAL_TYPE>& candidate = buckets[vecindex];
if (candidate.first == key) {
return candidate.second;
}
if (candidate.first == EMPTYBUCKET) {
return std::nullopt;
}
}
return std::nullopt;
Comment thread
kstppd marked this conversation as resolved.
Outdated
}
#else

// Uses Hasher's insert_kernel to insert all elements
Expand Down
37 changes: 12 additions & 25 deletions include/splitvector/splitvec.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,11 +289,9 @@ class SplitVector {
HOSTONLY SplitVector(SplitVector<T, Allocator>&& other) noexcept {
_allocator=other._allocator;
_data = other._data;
_info->size = other.size();
_info->capacity = other.capacity();
_info->capacity = 0;
_info->size = 0;
_info = other._info;
other._data = nullptr;
other._info = nullptr;
_location = other._location;
d_vec = nullptr;
}
Expand Down Expand Up @@ -417,6 +415,7 @@ class SplitVector {

/**
* @brief Move assignment operator to move from another SplitVector.
* Supported only for vectors of same Allocator type
*
* @param other The SplitVector to move from.
* @return Reference to the moved SplitVector.
Expand All @@ -427,12 +426,11 @@ class SplitVector {
}

_deallocate_and_destroy(capacity(), _data);
_allocator.deallocate(reinterpret_cast<T*>(_info),get_number_of_Ts_for_Split_Info());
_data = other._data;
_info->size = other.size();
_info->capacity= other.capacity();
other._info->capacity = 0;
other._info->size = 0;
_info = other._info;
other._data = nullptr;
other._info = nullptr;
_location = other._location;
Comment thread
markusbattarbee marked this conversation as resolved.
d_vec = nullptr;
return *this;
Expand Down Expand Up @@ -608,9 +606,6 @@ class SplitVector {
* are invalidated after swap is called.
*/
void swap(SplitVector<T, Allocator>& other) noexcept {
if (*this == other) { // no need to do any work
return;
}
split::swap(_data, other._data);
split::swap(_info, other._info);
split::swap(_allocator, other._allocator);
Expand Down Expand Up @@ -737,7 +732,7 @@ class SplitVector {
}
// Nope.
if (requested_space <= current_space) {
if constexpr (!std::is_trivial<T>::value) {
if constexpr (!std::is_trivially_copy_constructible_v<T>) {
for (size_t i = size(); i < requested_space; ++i) {
_allocator.construct(&_data[i], T());
}
Expand Down Expand Up @@ -970,7 +965,7 @@ class SplitVector {
HOSTDEVICE
void remove_from_back(size_t n) noexcept {
const size_t end = size() - n;
if constexpr (!std::is_trivial<T>::value) {
if constexpr (!std::is_trivially_copy_constructible_v<T>) {
for (auto i = size(); i > end;) {
(_data + --i)->~T();
}
Expand All @@ -983,7 +978,7 @@ class SplitVector {
*/
HOSTDEVICE
void clear() noexcept {
if constexpr (!std::is_trivial<T>::value) {
if constexpr (!std::is_trivially_copy_constructible_v<T>) {
for (size_t i = 0; i < size(); i++) {
_data[i].~T();
}
Expand Down Expand Up @@ -1066,14 +1061,6 @@ class SplitVector {
*/
HOSTONLY
void push_back(const T&& val) {

// If we have no allocated memory because the default ctor was used then
// allocate one element, set it and return
// if (_data == nullptr) {
// _allocate(1);
// _data[size() - 1] = val;
// return;
// }
resize(size() + 1);
_data[size() - 1] = std::move(val);
return;
Expand Down Expand Up @@ -1491,7 +1478,7 @@ class SplitVector {
HOSTDEVICE
iterator erase(iterator it) noexcept {
const int64_t index = it.data() - begin().data();
if constexpr (!std::is_trivial<T>::value) {
if constexpr (!std::is_trivially_copy_constructible_v<T>) {
_data[index].~T();
for (size_t i = index; i < size() - 1; i++) {
new (&_data[i]) T(_data[i + 1]);
Expand Down Expand Up @@ -1521,7 +1508,7 @@ class SplitVector {
const int64_t range = end - start;

const size_t sz = size();
if constexpr (!std::is_trivial<T>::value) {
if constexpr (!std::is_trivially_copy_constructible_v<T>) {
for (int64_t i = start; i < end; i++) {
_data[i].~T();
}
Expand Down Expand Up @@ -1559,7 +1546,7 @@ class SplitVector {
resize(size() + 1);
iterator it = &_data[index];
std::move(it.data(), end().data(), it.data() + 1);
if constexpr (!std::is_trivial<T>::value) {
if constexpr (!std::is_trivially_copy_constructible_v<T>) {
_allocator.destroy(it.data());
_allocator.construct(it.data(), args...);
}else{
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
project('Hashinator', 'cpp', 'cuda' ,default_options : ['cpp_std=c++20','warning_level=2','werror=true','buildtype=debugoptimized'])
project('Hashinator', 'cpp', 'cuda' ,default_options : ['cpp_std=c++20','warning_level=2','werror=false','buildtype=debugoptimized'])

#Config
add_global_arguments('-DHASHMAPDEBUG', language : 'cpp')
Expand Down
45 changes: 45 additions & 0 deletions unit_tests/gtest_vec_device/memtest.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include <iostream>
#include <stdlib.h>
//#include <chrono>
#include "../../include/splitvector/splitvec.h"
#include "../../include/splitvector/split_tools.h"
#include <gtest/gtest.h>

#define CHK_ERR(err) (cuda_error(err, __FILE__, __LINE__))

typedef split::SplitVector<int,split::split_unified_allocator<int>> vec ;


struct testStructure {
testStructure(const size_t initSize=100) {
std::cerr<<"a"<<std::endl;
testContent = vec(initSize);
std::cerr<<"b"<<std::endl;
};
size_t capacity() const {
return testContent.capacity();
};
size_t size() const {
return testContent.size();
};
void shrink_to_fit() {
testContent.shrink_to_fit();
};
void shrink_to_fit_2() {
vec testContent_new(size());
testContent_new.overwrite(testContent);
testContent.swap(testContent_new);
};
void recapacitate(size_t newCapacity) {
testContent.reserve(newCapacity,true);
};
void resize(size_t newSize) {
testContent.resize(newSize);
};
size_t capacityInBytes() const {
return testContent.capacity() * sizeof(uint);
};

vec testContent;
};

Loading