Initial opencl work generating kernel.

This commit is contained in:
clemahieu 2016-04-03 18:46:37 -05:00
commit 9ee74d8a89
15 changed files with 732 additions and 56 deletions

View file

@ -26,7 +26,7 @@ else (WIN32)
endif (WIN32)
if (APPLE)
set (PLATFORM_LINK_FLAGS "-framework Foundation")
set (PLATFORM_LINK_FLAGS "-framework Foundation -framework OpenCL")
else (APPLE)
set (PLATFORM_LINK_FLAGS "")
endif (APPLE)
@ -129,13 +129,16 @@ add_library (node
rai/node/common.hpp
rai/node/node.hpp
rai/node/node.cpp
rai/node/openclwork.cpp
rai/node/openclwork.hpp
rai/node/rpc.hpp
rai/node/rpc.cpp
rai/node/testing.hpp
rai/node/testing.cpp
rai/node/wallet.hpp
rai/node/wallet.cpp
rai/node/working.hpp)
rai/node/working.hpp
rai/node/xorshift.hpp)
add_executable (core_test
rai/core_test/block.cpp

View file

@ -19,7 +19,7 @@ TEST (node, block_store_path_failure)
auto service (boost::make_shared <boost::asio::io_service> ());
rai::alarm alarm (*service);
rai::logging logging;
rai::work_pool work;
rai::work_pool work (false);
auto node (std::make_shared <rai::node> (init, *service, 0, rai::unique_path (), alarm, logging, work));
ASSERT_TRUE (node->wallets.items.empty ());
node->stop ();
@ -31,7 +31,7 @@ TEST (node, inactive_supply)
auto service (boost::make_shared <boost::asio::io_service> ());
rai::alarm alarm (*service);
rai::node_config config;
rai::work_pool work;
rai::work_pool work (false);
config.inactive_supply = 10;
auto node (std::make_shared <rai::node> (init, *service, rai::unique_path (), alarm, config, work));
ASSERT_EQ (10, node->ledger.inactive_supply);
@ -43,7 +43,7 @@ TEST (node, password_fanout)
auto service (boost::make_shared <boost::asio::io_service> ());
rai::alarm alarm (*service);
rai::node_config config;
rai::work_pool work;
rai::work_pool work (false);
config.password_fanout = 10;
auto node (std::make_shared <rai::node> (init, *service, rai::unique_path (), alarm, config, work));
auto wallet (node->wallets.create (100));

View file

@ -1,9 +1,10 @@
#include <gtest/gtest.h>
#include <rai/node/wallet.hpp>
#include <rai/node/openclwork.hpp>
TEST (work, one)
{
rai::work_pool pool;
rai::work_pool pool (false);
rai::change_block block (1, 1, rai::keypair ().prv, 3, 4);
block.block_work_set (pool.generate (block.root ()));
ASSERT_FALSE (pool.work_validate (block));
@ -11,7 +12,7 @@ TEST (work, one)
TEST (work, validate)
{
rai::work_pool pool;
rai::work_pool pool (false);
rai::send_block send_block (1, 1, 2, rai::keypair ().prv, 4, 6);
ASSERT_TRUE (pool.work_validate (send_block));
send_block.block_work_set (pool.generate (send_block.root ()));
@ -20,7 +21,7 @@ TEST (work, validate)
TEST (work, cancel)
{
rai::work_pool pool;
rai::work_pool pool (false);
rai::uint256_union key (1);
bool exited (false);
std::thread thread ([&pool, &key, &exited] ()
@ -36,4 +37,17 @@ TEST (work, cancel)
}
pool.cancel (key);
thread.join ();
}
TEST (work, opencl)
{
rai::work_pool pool (true);
ASSERT_NE (nullptr, pool.opencl);
rai::uint256_union root;
for (auto i (0); i < 1; ++i)
{
rai::random_pool.GenerateBlock (root.bytes.data (), root.bytes.size ());
auto result (pool.generate (root));
ASSERT_FALSE (pool.work_validate (root, result));
}
}

View file

@ -591,7 +591,8 @@ receive_minimum (rai::Mrai_ratio),
inactive_supply (0),
password_fanout (1024),
io_threads (std::max <unsigned> (4, std::thread::hardware_concurrency ())),
work_threads (std::max <unsigned> (4, std::thread::hardware_concurrency ()))
work_threads (std::max <unsigned> (4, std::thread::hardware_concurrency ())),
opencl_work (false)
{
switch (rai::rai_network)
{
@ -2955,7 +2956,8 @@ bool rai::handle_node_options (boost::program_options::variables_map & vm)
rai::inactive_node::inactive_node () :
service (boost::make_shared <boost::asio::io_service> ()),
alarm (*service)
alarm (*service),
work (false)
{
auto working (rai::working_path ());
boost::filesystem::create_directories (working);

View file

@ -307,6 +307,7 @@ public:
unsigned password_fanout;
unsigned io_threads;
unsigned work_threads;
bool opencl_work;
static std::chrono::seconds constexpr keepalive_period = std::chrono::seconds (60);
static std::chrono::seconds constexpr keepalive_cutoff = keepalive_period * 5;
static std::chrono::minutes constexpr wallet_backup_interval = std::chrono::minutes (5);

594
rai/node/openclwork.cpp Normal file
View file

@ -0,0 +1,594 @@
#include <rai/node/openclwork.hpp>
#include <rai/utility.hpp>
#include <rai/node/wallet.hpp>
#include <map>
#include <vector>
#include <string>
#include <iostream>
#include <array>
namespace
{
std::string opencl_program = R"%%%(
enum blake2b_constant
{
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
typedef struct __blake2b_param
{
uchar digest_length; // 1
uchar key_length; // 2
uchar fanout; // 3
uchar depth; // 4
uint leaf_length; // 8
ulong node_offset; // 16
uchar node_depth; // 17
uchar inner_length; // 18
uchar reserved[14]; // 32
uchar salt[BLAKE2B_SALTBYTES]; // 48
uchar personal[BLAKE2B_PERSONALBYTES]; // 64
} blake2b_param;
typedef struct __blake2b_state
{
ulong h[8];
ulong t[2];
ulong f[2];
uchar buf[2 * BLAKE2B_BLOCKBYTES];
size_t buflen;
uchar last_node;
} blake2b_state;
__constant static ulong blake2b_IV[8] =
{
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
__constant static uchar blake2b_sigma[12][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
};
static inline int blake2b_set_lastnode( blake2b_state *S )
{
S->f[1] = ~0UL;
return 0;
}
/* Some helper functions, not necessarily useful */
static inline int blake2b_set_lastblock( blake2b_state *S )
{
if( S->last_node ) blake2b_set_lastnode( S );
S->f[0] = ~0UL;
return 0;
}
static inline int blake2b_increment_counter( blake2b_state *S, const ulong inc )
{
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
return 0;
}
static inline uint load32( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
return *( uint * )( src );
#else
const uchar *p = ( uchar * )src;
uint w = *p++;
w |= ( uint )( *p++ ) << 8;
w |= ( uint )( *p++ ) << 16;
w |= ( uint )( *p++ ) << 24;
return w;
#endif
}
static inline ulong load64( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
return *( ulong * )( src );
#else
const uchar *p = ( uchar * )src;
ulong w = *p++;
w |= ( ulong )( *p++ ) << 8;
w |= ( ulong )( *p++ ) << 16;
w |= ( ulong )( *p++ ) << 24;
w |= ( ulong )( *p++ ) << 32;
w |= ( ulong )( *p++ ) << 40;
w |= ( ulong )( *p++ ) << 48;
w |= ( ulong )( *p++ ) << 56;
return w;
#endif
}
static inline void store32( void *dst, uint w )
{
#if defined(__ENDIAN_LITTLE__)
*( uint * )( dst ) = w;
#else
uchar *p = ( uchar * )dst;
*p++ = ( uchar )w; w >>= 8;
*p++ = ( uchar )w; w >>= 8;
*p++ = ( uchar )w; w >>= 8;
*p++ = ( uchar )w;
#endif
}
static inline void store64( void *dst, ulong w )
{
#if defined(__ENDIAN_LITTLE__)
*( ulong * )( dst ) = w;
#else
uchar *p = ( uchar * )dst;
*p++ = ( uchar )w; w >>= 8;
*p++ = ( uchar )w; w >>= 8;
*p++ = ( uchar )w; w >>= 8;
*p++ = ( uchar )w; w >>= 8;
*p++ = ( uchar )w; w >>= 8;
*p++ = ( uchar )w; w >>= 8;
*p++ = ( uchar )w; w >>= 8;
*p++ = ( uchar )w;
#endif
}
static inline ulong rotr64( const ulong w, const unsigned c )
{
return ( w >> c ) | ( w << ( 64 - c ) );
}
/* init xors IV with input parameter block */
static inline int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
{
uchar *p, *h, *v;
v = ( uchar * )( blake2b_IV );
h = ( uchar * )( S->h );
p = ( uchar * )( P );
/* IV XOR ParamBlock */
memset( S, 0, sizeof( blake2b_state ) );
for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
return 0;
}
static inline int blake2b_init( blake2b_state *S, const uchar outlen )
{
blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
P->digest_length = outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store64( &P->node_offset, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2b_init_param( S, P );
}
static int blake2b_compress( blake2b_state *S, const uchar block[BLAKE2B_BLOCKBYTES] )
{
ulong m[16];
ulong v[16];
int i;
for( i = 0; i < 16; ++i )
m[i] = load64( block + i * sizeof( m[i] ) );
for( i = 0; i < 8; ++i )
v[i] = S->h[i];
v[ 8] = blake2b_IV[0];
v[ 9] = blake2b_IV[1];
v[10] = blake2b_IV[2];
v[11] = blake2b_IV[3];
v[12] = S->t[0] ^ blake2b_IV[4];
v[13] = S->t[1] ^ blake2b_IV[5];
v[14] = S->f[0] ^ blake2b_IV[6];
v[15] = S->f[1] ^ blake2b_IV[7];
#define G(r,i,a,b,c,d) \
do { \
a = a + b + m[blake2b_sigma[r][2*i+0]]; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b + m[blake2b_sigma[r][2*i+1]]; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while(0)
#define ROUND(r) \
do { \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
} while(0)
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
ROUND( 10 );
ROUND( 11 );
for( i = 0; i < 8; ++i )
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
#undef G
#undef ROUND
return 0;
}
static void ucharcpy (uchar * dst, uchar const * src, size_t count)
{
for (size_t i = 0; i < count; ++i)
{
*dst++ = *src++;
}
}
void printstate (blake2b_state * S)
{
printf ("%lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu ", S->h[0], S->h[1], S->h[2], S->h[3], S->h[4], S->h[5], S->h[6], S->h[7], S->t[0], S->t[1], S->f[0], S->f[1]);
for (int i = 0; i < 256; ++i)
{
printf ("%02x", S->buf[i]);
}
printf (" %lu %02x\n", S->buflen, S->last_node);
}
/* inlen now in bytes */
static int blake2b_update( blake2b_state *S, const uchar *in, ulong inlen )
{
while( inlen > 0 )
{
size_t left = S->buflen;
size_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
if( inlen > fill )
{
ucharcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf ); // Compress
ucharcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
S->buflen -= BLAKE2B_BLOCKBYTES;
in += fill;
inlen -= fill;
}
else // inlen <= fill
{
ucharcpy( S->buf + left, in, inlen );
S->buflen += inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
}
}
return 0;
}
static void ucharset (uchar * dest, int val, size_t count)
{
for (size_t i = 0; i < count; ++i)
{
*dest++ = val;
}
}
/* Is this correct? */
static int blake2b_final( blake2b_state *S, uchar *out, uchar outlen )
{
uchar buffer[BLAKE2B_OUTBYTES];
if( S->buflen > BLAKE2B_BLOCKBYTES )
{
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf );
S->buflen -= BLAKE2B_BLOCKBYTES;
ucharcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
}
//blake2b_increment_counter( S, S->buflen );
ulong inc = (ulong)S->buflen;
S->t[0] += inc;
// if ( S->t[0] < inc )
// S->t[1] += 1;
// This seems to crash the opencl compiler though fortunately this is calculating size and we don't do things bigger than 2^32
blake2b_set_lastblock( S );
ucharset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
blake2b_compress( S, S->buf );
for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
ucharcpy( out, buffer, outlen );
return 0;
}
static void ucharcpyglb (uchar * dst, __global uchar const * src, size_t count)
{
for (size_t i = 0; i < count; ++i)
{
*dst = *src;
++dst;
++src;
}
}
__kernel void raiblocks_work (__global ulong * attempt, __global ulong * result_a, __global uchar * item_a)
{
int const thread = get_global_id (0);
uchar item_l [32];
ucharcpyglb (item_l, item_a, 32);
ulong attempt_l = *attempt + thread;
blake2b_state state;
blake2b_init (&state, sizeof (ulong));
blake2b_update (&state, (uchar *) &attempt_l, sizeof (ulong));
blake2b_update (&state, item_l, 32);
ulong result;
blake2b_final (&state, (uchar *) &result, sizeof (result));
if (result >= 0xffffffc000000000ul)
//if (result >= 0xff00000000000000ul)
{
*result_a = attempt_l;
}
}
)%%%";
}
void printstate (blake2b_state * S)
{
printf ("%llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", S->h[0], S->h[1], S->h[2], S->h[3], S->h[4], S->h[5], S->h[6], S->h[7], S->t[0], S->t[1], S->f[0], S->f[1]);
for (int i = 0; i < 256; ++i)
{
printf ("%02x", S->buf[i]);
}
printf (" %lu %02x\n", S->buflen, S->last_node);
}
rai::opencl_environment::opencl_environment (bool & error_a)
{
cl_uint platformIdCount = 0;
clGetPlatformIDs (0, nullptr, &platformIdCount);
std::vector <cl_platform_id> platformIds (platformIdCount);
clGetPlatformIDs (platformIdCount, platformIds.data(), nullptr);
for (auto i (platformIds.begin ()), n (platformIds.end ()); i != n; ++i)
{
auto & devices_container (devices [*i]);
cl_uint deviceIdCount = 0;
clGetDeviceIDs (*i, CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceIdCount);
std::vector <cl_device_id> deviceIds (deviceIdCount);
clGetDeviceIDs (*i, CL_DEVICE_TYPE_ALL, deviceIdCount, deviceIds.data (), nullptr);
for (auto j (deviceIds.begin ()), m (deviceIds.end ()); j != m; ++j)
{
devices_container.push_back (*j);
}
}
}
void rai::opencl_environment::dump ()
{
auto index (0);
for (auto i (devices.begin ()), n (devices.end ()); i != n; ++i, ++index)
{
std::vector <unsigned> queries = {CL_PLATFORM_PROFILE, CL_PLATFORM_VERSION, CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_EXTENSIONS};
std::cout << "Platform: " << index << std::endl;
for (auto j (queries.begin ()), m (queries.end ()); j != m; ++j)
{
size_t platformInfoCount = 0;
clGetPlatformInfo(i->first, *j, 0, nullptr, &platformInfoCount);
std::vector <char> info (platformInfoCount);
clGetPlatformInfo(i->first, *j, info.size (), info.data (), nullptr);
std::cout << info.data () << std::endl;
}
for (auto j (i->second.begin ()), m (i->second.end ()); j != m; ++j)
{
std::vector <unsigned> queries = {CL_DEVICE_NAME, CL_DEVICE_VENDOR, CL_DEVICE_PROFILE};
std::cout << "Device: " << j - i->second.begin () << std::endl;
for (auto k (queries.begin ()), o (queries.end ()); k != o; ++k)
{
size_t platformInfoCount = 0;
clGetDeviceInfo(*j, *k, 0, nullptr, &platformInfoCount);
std::vector <char> info (platformInfoCount);
clGetDeviceInfo(*j, *k, info.size (), info.data (), nullptr);
std::cout << '\t' << info.data () << std::endl;
}
size_t deviceTypeCount = 0;
clGetDeviceInfo(*j, CL_DEVICE_TYPE, 0, nullptr, &deviceTypeCount);
std::vector <uint8_t> deviceTypeInfo (deviceTypeCount);
clGetDeviceInfo(*j, CL_DEVICE_TYPE, deviceTypeCount, deviceTypeInfo.data (), 0);
std::string device_type_string;
switch (deviceTypeInfo [0])
{
case CL_DEVICE_TYPE_ACCELERATOR:
device_type_string = "ACCELERATOR";
break;
case CL_DEVICE_TYPE_CPU:
device_type_string = "CPU";
break;
case CL_DEVICE_TYPE_CUSTOM:
device_type_string = "CUSTOM";
break;
case CL_DEVICE_TYPE_DEFAULT:
device_type_string = "DEFAULT";
break;
case CL_DEVICE_TYPE_GPU:
device_type_string = "GPU";
break;
default:
device_type_string = "Unknown";
break;
}
std::cout << '\t' << device_type_string << std::endl;
size_t compilerAvailableCount = 0;
clGetDeviceInfo(*j, CL_DEVICE_COMPILER_AVAILABLE, 0, nullptr, &compilerAvailableCount);
std::vector <uint8_t> compilerAvailableInfo (compilerAvailableCount);
clGetDeviceInfo(*j, CL_DEVICE_COMPILER_AVAILABLE, compilerAvailableCount, compilerAvailableInfo.data (), 0);
std::cout << '\t' << "Compiler available: " << (compilerAvailableInfo [0] ? "true" : "false") << std::endl;
size_t computeUnitsAvailableCount = 0;
clGetDeviceInfo(*j, CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, &computeUnitsAvailableCount);
std::vector <uint8_t> computeUnitsAvailableInfo (computeUnitsAvailableCount);
clGetDeviceInfo(*j, CL_DEVICE_MAX_COMPUTE_UNITS, computeUnitsAvailableCount, computeUnitsAvailableInfo.data (), 0);
uint64_t computeUnits (computeUnitsAvailableInfo [0] | (computeUnitsAvailableInfo [1] << 8) | (computeUnitsAvailableInfo [2] << 16) | (computeUnitsAvailableInfo [3] << 24));
std::cout << '\t' << "Compute units available: " << computeUnits << std::endl;
}
}
}
rai::opencl_work::opencl_work (bool & error_a, rai::opencl_environment & environment_a, rai::work_pool & pool_a) :
context (0),
attempt_buffer (0),
result_buffer (0),
item_buffer (0),
program (0),
kernel (0),
queue (0),
pool (pool_a)
{
rai::random_pool.GenerateBlock (reinterpret_cast <uint8_t *> (rand.s.data ()), rand.s.size () * sizeof (decltype (rand.s)::value_type));
auto i (environment_a.devices.begin ());
auto selected_platform (i->first);
std::array <cl_device_id, 1> selected_devices;
selected_devices [0] = i->second [1];
cl_context_properties contextProperties [] =
{
CL_CONTEXT_PLATFORM,
reinterpret_cast<cl_context_properties> (selected_platform),
0, 0
};
cl_int createContextError (0);
context = clCreateContext (contextProperties, selected_devices.size (), selected_devices.data (), nullptr, nullptr, &createContextError);
error_a |= createContextError != CL_SUCCESS;
if (!error_a)
{
cl_int queue_error (0);
queue = clCreateCommandQueue (context, selected_devices [0], 0, &queue_error);
error_a |= queue_error != CL_SUCCESS;
if (!error_a)
{
cl_int attempt_error (0);
attempt_buffer = clCreateBuffer (context, 0, sizeof (uint64_t), nullptr, &attempt_error);
error_a |= attempt_error != CL_SUCCESS;
if (!error_a)
{
cl_int result_error (0);
result_buffer = clCreateBuffer (context, 0, sizeof (uint64_t), nullptr, &result_error);
error_a |= result_error != CL_SUCCESS;
if (!error_a)
{
cl_int item_error (0);
size_t item_size (sizeof (rai::uint256_union));
item_buffer = clCreateBuffer (context, 0, item_size, nullptr, &item_error);
error_a |= item_error != CL_SUCCESS;
if (!error_a)
{
cl_int program_error (0);
char const * program_data (opencl_program.data ());
size_t program_length (opencl_program.size ());
program = clCreateProgramWithSource (context, 1, &program_data, &program_length, &program_error);
error_a |= program_error != CL_SUCCESS;
if (!error_a)
{
error_a |= clBuildProgram (program, selected_devices.size (), selected_devices.data (), "-D __APPLE__", nullptr, nullptr) != CL_SUCCESS;
if (!error_a)
{
/*for (auto i (selected_devices.begin ()), n (selected_devices.end ()); i != n; ++i)
{
size_t log_size (0);
clGetProgramBuildInfo (program, *i, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
std::vector <char> log (log_size);
clGetProgramBuildInfo (program, *i, CL_PROGRAM_BUILD_LOG, log.size (), log.data (), nullptr);
std::cout << log.data () << std::endl;
}*/
cl_int kernel_error (0);
kernel = clCreateKernel (program, "raiblocks_work", &kernel_error);
error_a |= kernel_error != CL_SUCCESS;
if (!error_a)
{
cl_int arg0_error (clSetKernelArg (kernel, 0, sizeof (attempt_buffer), &attempt_buffer));
error_a |= arg0_error != CL_SUCCESS;
if (!error_a)
{
cl_int arg1_error (clSetKernelArg (kernel, 1, sizeof (result_buffer), &result_buffer));
error_a |= arg1_error != CL_SUCCESS;
if (!error_a)
{
cl_int arg2_error (clSetKernelArg (kernel, 2, sizeof (item_buffer), &item_buffer));
error_a |= arg2_error != CL_SUCCESS;
}
}
}
}
}
}
}
}
}
}
}
rai::opencl_work::~opencl_work ()
{
clReleaseKernel (kernel);
clReleaseProgram (program);
clReleaseContext (context);
}
uint64_t rai::opencl_work::generate_work (rai::uint256_union const & root_a)
{
std::lock_guard <std::mutex> lock (mutex);
uint64_t result (0);
unsigned thread_count (rai::rai_network == rai::rai_networks::rai_test_network ? 128 : 1024 * 1024);
size_t work_size [] = { thread_count, 0, 0 };
while (pool.work_validate (root_a, result))
{
result = rand.next ();
cl_int write_error1 = clEnqueueWriteBuffer (queue, attempt_buffer, false, 0, sizeof (uint64_t), &result, 0, nullptr, nullptr);
cl_int write_error2 = clEnqueueWriteBuffer (queue, item_buffer, false, 0, sizeof (rai::uint256_union), root_a.bytes.data (), 0, nullptr, nullptr);
cl_int enqueue_error = clEnqueueNDRangeKernel (queue, kernel, 1, nullptr, work_size, nullptr, 0, nullptr, nullptr);
cl_int read_error1 = clEnqueueReadBuffer(queue, result_buffer, false, 0, sizeof (uint64_t), &result, 0, nullptr, nullptr);
cl_int finishError = clFinish (queue);
}
return result;
}

43
rai/node/openclwork.hpp Normal file
View file

@ -0,0 +1,43 @@
#pragma once
#include <rai/node/xorshift.hpp>
#include <map>
#include <mutex>
#include <vector>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
namespace rai
{
class opencl_environment
{
public:
opencl_environment (bool &);
void dump ();
std::map <cl_platform_id, std::vector <cl_device_id>> devices;
};
union uint256_union;
class work_pool;
class opencl_work
{
public:
opencl_work (bool &, rai::opencl_environment &, rai::work_pool &);
~opencl_work ();
uint64_t generate_work (rai::uint256_union const &);
std::mutex mutex;
cl_context context;
cl_mem attempt_buffer;
cl_mem result_buffer;
cl_mem item_buffer;
cl_program program;
cl_kernel kernel;
cl_command_queue queue;
rai::xorshift1024star rand;
rai::work_pool & pool;
};
}

View file

@ -5,7 +5,8 @@
rai::system::system (uint16_t port_a, size_t count_a) :
service (new boost::asio::io_service),
alarm (*service)
alarm (*service),
work (false)
{
nodes.reserve (count_a);
for (size_t i (0); i < count_a; ++i)

View file

@ -1,6 +1,7 @@
#include <rai/node/wallet.hpp>
#include <rai/node/node.hpp>
#include <rai/node/xorshift.hpp>
#include <argon2.h>
@ -11,7 +12,7 @@
#include <future>
rai::work_pool::work_pool () :
rai::work_pool::work_pool (bool opencl_work_a) :
current (0),
ticket (0),
done (false)
@ -27,6 +28,12 @@ done (false)
}));
threads.push_back (std::move (thread));
}
if (opencl_work_a)
{
auto error (false);
rai::opencl_environment environment (error);
opencl.reset (new rai::opencl_work (error, environment, *this));
}
}
rai::work_pool::~work_pool ()
@ -38,32 +45,6 @@ rai::work_pool::~work_pool ()
}
}
namespace
{
class xorshift1024star
{
public:
xorshift1024star ():
p (0)
{
}
std::array <uint64_t, 16> s;
unsigned p;
uint64_t next ()
{
auto p_l (p);
auto pn ((p_l + 1) & 15);
p = pn;
uint64_t s0 = s[ p_l ];
uint64_t s1 = s[ pn ];
s1 ^= s1 << 31; // a
s1 ^= s1 >> 11; // b
s0 ^= s0 >> 30; // c
return ( s[ pn ] = s0 ^ s1 ) * 1181783497276652981LL;
}
};
}
uint64_t rai::work_pool::work_value (rai::block_hash const & root_a, uint64_t work_a)
{
uint64_t result;
@ -183,19 +164,26 @@ boost::optional <uint64_t> rai::work_pool::generate_maybe (rai::uint256_union co
{
assert (!root_a.is_zero ());
boost::optional <uint64_t> result;
std::unique_lock <std::mutex> lock (mutex);
pending.push_back (root_a);
producer_condition.notify_all ();
auto done (false);
while (!done)
if (opencl != nullptr)
{
consumer_condition.wait (lock);
auto finish (completed.find (root_a));
if (finish != completed.end ())
result = opencl->generate_work (root_a);
}
else
{
std::unique_lock <std::mutex> lock (mutex);
pending.push_back (root_a);
producer_condition.notify_all ();
auto done (false);
while (!done)
{
done = true;
result = finish->second;
completed.erase (finish);
consumer_condition.wait (lock);
auto finish (completed.find (root_a));
if (finish != completed.end ())
{
done = true;
result = finish->second;
completed.erase (finish);
}
}
}
return result;

View file

@ -3,6 +3,7 @@
#include <boost/optional.hpp>
#include <rai/secure.hpp>
#include <rai/node/openclwork.hpp>
#include <atomic>
#include <mutex>
@ -15,7 +16,7 @@ namespace rai
class work_pool
{
public:
work_pool ();
work_pool (bool);
~work_pool ();
void loop (uint64_t);
void stop ();
@ -34,6 +35,7 @@ public:
std::mutex mutex;
std::condition_variable consumer_condition;
std::condition_variable producer_condition;
std::unique_ptr <rai::opencl_work> opencl;
// Local work threshold for rate-limiting publishing blocks. ~5 seconds of work.
static uint64_t const publish_test_threshold = 0xff00000000000000;
static uint64_t const publish_full_threshold = 0xffffffc000000000;

28
rai/node/xorshift.hpp Normal file
View file

@ -0,0 +1,28 @@
#pragma once
#include <array>
namespace rai
{
class xorshift1024star
{
public:
xorshift1024star ():
p (0)
{
}
std::array <uint64_t, 16> s;
unsigned p;
uint64_t next ()
{
auto p_l (p);
auto pn ((p_l + 1) & 15);
p = pn;
uint64_t s0 = s[ p_l ];
uint64_t s1 = s[ pn ];
s1 ^= s1 << 31; // a
s1 ^= s1 >> 11; // b
s0 ^= s0 >> 30; // c
return ( s[ pn ] = s0 ^ s1 ) * 1181783497276652981LL;
}
};
}

View file

@ -101,7 +101,7 @@ int main (int argc, char * const * argv)
{
rai::node_init init;
auto service (boost::make_shared <boost::asio::io_service> ());
rai::work_pool work;
rai::work_pool work (config.node.opencl_work);
rai::alarm alarm (*service);
auto node (std::make_shared <rai::node> (init, *service, working, alarm, config.node, work));
if (!init.error ())

View file

@ -62,7 +62,7 @@ void rai_daemon::daemon::run ()
if (!error)
{
auto service (boost::make_shared <boost::asio::io_service> ());
rai::work_pool work;
rai::work_pool work (config.node.opencl_work);
rai::alarm alarm (*service);
rai::node_init init;
auto node (std::make_shared <rai::node> (init, *service, working, alarm, config.node, work));

View file

@ -148,7 +148,7 @@ int main (int argc, char * const * argv)
if (!key.decode_hex (vm ["key"].as <std::string> ()))
{
rai::keypair genesis (key.to_string ());
rai::work_pool work;
rai::work_pool work (false);
std::cout << "Genesis: " << genesis.prv.data.to_string () << std::endl << "Public: " << genesis.pub.to_string () << std::endl << "Account: " << genesis.pub.to_account () << std::endl;
rai::keypair landing;
std::cout << "Landing: " << landing.prv.data.to_string () << std::endl << "Public: " << landing.pub.to_string () << std::endl << "Account: " << landing.pub.to_account () << std::endl;
@ -215,7 +215,7 @@ int main (int argc, char * const * argv)
}
else if (vm.count ("debug_profile_generate"))
{
rai::work_pool work;
rai::work_pool work (false);
rai::change_block block (0, 0, rai::keypair ().prv, 0, 0);
std::cerr << "Starting generation profiling\n";
for (uint64_t i (0); true; ++i)
@ -229,7 +229,7 @@ int main (int argc, char * const * argv)
}
else if (vm.count ("debug_profile_verify"))
{
rai::work_pool work;
rai::work_pool work (false);
rai::change_block block (0, 0, rai::keypair ().prv, 0, 0);
std::cerr << "Starting verification profiling\n";
for (uint64_t i (0); true; ++i)

View file

@ -149,7 +149,7 @@ int run_wallet (int argc, char * const * argv)
QApplication application (argc, const_cast <char **> (argv));
rai::set_application_icon (application);
auto service (boost::make_shared <boost::asio::io_service> ());
rai::work_pool work;
rai::work_pool work (config.node.opencl_work);
rai::alarm alarm (*service);
rai::node_init init;
auto node (std::make_shared <rai::node> (init, *service, working, alarm, config.node, work));