Initial opencl work generating kernel.
This commit is contained in:
parent
5ddd94f8c9
commit
9ee74d8a89
15 changed files with 732 additions and 56 deletions
|
@ -26,7 +26,7 @@ else (WIN32)
|
|||
endif (WIN32)
|
||||
|
||||
if (APPLE)
|
||||
set (PLATFORM_LINK_FLAGS "-framework Foundation")
|
||||
set (PLATFORM_LINK_FLAGS "-framework Foundation -framework OpenCL")
|
||||
else (APPLE)
|
||||
set (PLATFORM_LINK_FLAGS "")
|
||||
endif (APPLE)
|
||||
|
@ -129,13 +129,16 @@ add_library (node
|
|||
rai/node/common.hpp
|
||||
rai/node/node.hpp
|
||||
rai/node/node.cpp
|
||||
rai/node/openclwork.cpp
|
||||
rai/node/openclwork.hpp
|
||||
rai/node/rpc.hpp
|
||||
rai/node/rpc.cpp
|
||||
rai/node/testing.hpp
|
||||
rai/node/testing.cpp
|
||||
rai/node/wallet.hpp
|
||||
rai/node/wallet.cpp
|
||||
rai/node/working.hpp)
|
||||
rai/node/working.hpp
|
||||
rai/node/xorshift.hpp)
|
||||
|
||||
add_executable (core_test
|
||||
rai/core_test/block.cpp
|
||||
|
|
|
@ -19,7 +19,7 @@ TEST (node, block_store_path_failure)
|
|||
auto service (boost::make_shared <boost::asio::io_service> ());
|
||||
rai::alarm alarm (*service);
|
||||
rai::logging logging;
|
||||
rai::work_pool work;
|
||||
rai::work_pool work (false);
|
||||
auto node (std::make_shared <rai::node> (init, *service, 0, rai::unique_path (), alarm, logging, work));
|
||||
ASSERT_TRUE (node->wallets.items.empty ());
|
||||
node->stop ();
|
||||
|
@ -31,7 +31,7 @@ TEST (node, inactive_supply)
|
|||
auto service (boost::make_shared <boost::asio::io_service> ());
|
||||
rai::alarm alarm (*service);
|
||||
rai::node_config config;
|
||||
rai::work_pool work;
|
||||
rai::work_pool work (false);
|
||||
config.inactive_supply = 10;
|
||||
auto node (std::make_shared <rai::node> (init, *service, rai::unique_path (), alarm, config, work));
|
||||
ASSERT_EQ (10, node->ledger.inactive_supply);
|
||||
|
@ -43,7 +43,7 @@ TEST (node, password_fanout)
|
|||
auto service (boost::make_shared <boost::asio::io_service> ());
|
||||
rai::alarm alarm (*service);
|
||||
rai::node_config config;
|
||||
rai::work_pool work;
|
||||
rai::work_pool work (false);
|
||||
config.password_fanout = 10;
|
||||
auto node (std::make_shared <rai::node> (init, *service, rai::unique_path (), alarm, config, work));
|
||||
auto wallet (node->wallets.create (100));
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
#include <gtest/gtest.h>
|
||||
#include <rai/node/wallet.hpp>
|
||||
#include <rai/node/openclwork.hpp>
|
||||
|
||||
TEST (work, one)
|
||||
{
|
||||
rai::work_pool pool;
|
||||
rai::work_pool pool (false);
|
||||
rai::change_block block (1, 1, rai::keypair ().prv, 3, 4);
|
||||
block.block_work_set (pool.generate (block.root ()));
|
||||
ASSERT_FALSE (pool.work_validate (block));
|
||||
|
@ -11,7 +12,7 @@ TEST (work, one)
|
|||
|
||||
TEST (work, validate)
|
||||
{
|
||||
rai::work_pool pool;
|
||||
rai::work_pool pool (false);
|
||||
rai::send_block send_block (1, 1, 2, rai::keypair ().prv, 4, 6);
|
||||
ASSERT_TRUE (pool.work_validate (send_block));
|
||||
send_block.block_work_set (pool.generate (send_block.root ()));
|
||||
|
@ -20,7 +21,7 @@ TEST (work, validate)
|
|||
|
||||
TEST (work, cancel)
|
||||
{
|
||||
rai::work_pool pool;
|
||||
rai::work_pool pool (false);
|
||||
rai::uint256_union key (1);
|
||||
bool exited (false);
|
||||
std::thread thread ([&pool, &key, &exited] ()
|
||||
|
@ -36,4 +37,17 @@ TEST (work, cancel)
|
|||
}
|
||||
pool.cancel (key);
|
||||
thread.join ();
|
||||
}
|
||||
|
||||
TEST (work, opencl)
|
||||
{
|
||||
rai::work_pool pool (true);
|
||||
ASSERT_NE (nullptr, pool.opencl);
|
||||
rai::uint256_union root;
|
||||
for (auto i (0); i < 1; ++i)
|
||||
{
|
||||
rai::random_pool.GenerateBlock (root.bytes.data (), root.bytes.size ());
|
||||
auto result (pool.generate (root));
|
||||
ASSERT_FALSE (pool.work_validate (root, result));
|
||||
}
|
||||
}
|
|
@ -591,7 +591,8 @@ receive_minimum (rai::Mrai_ratio),
|
|||
inactive_supply (0),
|
||||
password_fanout (1024),
|
||||
io_threads (std::max <unsigned> (4, std::thread::hardware_concurrency ())),
|
||||
work_threads (std::max <unsigned> (4, std::thread::hardware_concurrency ()))
|
||||
work_threads (std::max <unsigned> (4, std::thread::hardware_concurrency ())),
|
||||
opencl_work (false)
|
||||
{
|
||||
switch (rai::rai_network)
|
||||
{
|
||||
|
@ -2955,7 +2956,8 @@ bool rai::handle_node_options (boost::program_options::variables_map & vm)
|
|||
|
||||
rai::inactive_node::inactive_node () :
|
||||
service (boost::make_shared <boost::asio::io_service> ()),
|
||||
alarm (*service)
|
||||
alarm (*service),
|
||||
work (false)
|
||||
{
|
||||
auto working (rai::working_path ());
|
||||
boost::filesystem::create_directories (working);
|
||||
|
|
|
@ -307,6 +307,7 @@ public:
|
|||
unsigned password_fanout;
|
||||
unsigned io_threads;
|
||||
unsigned work_threads;
|
||||
bool opencl_work;
|
||||
static std::chrono::seconds constexpr keepalive_period = std::chrono::seconds (60);
|
||||
static std::chrono::seconds constexpr keepalive_cutoff = keepalive_period * 5;
|
||||
static std::chrono::minutes constexpr wallet_backup_interval = std::chrono::minutes (5);
|
||||
|
|
594
rai/node/openclwork.cpp
Normal file
594
rai/node/openclwork.cpp
Normal file
|
@ -0,0 +1,594 @@
|
|||
#include <rai/node/openclwork.hpp>
|
||||
|
||||
#include <rai/utility.hpp>
|
||||
#include <rai/node/wallet.hpp>
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <array>
|
||||
|
||||
namespace
|
||||
{
|
||||
std::string opencl_program = R"%%%(
|
||||
enum blake2b_constant
|
||||
{
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
typedef struct __blake2b_param
|
||||
{
|
||||
uchar digest_length; // 1
|
||||
uchar key_length; // 2
|
||||
uchar fanout; // 3
|
||||
uchar depth; // 4
|
||||
uint leaf_length; // 8
|
||||
ulong node_offset; // 16
|
||||
uchar node_depth; // 17
|
||||
uchar inner_length; // 18
|
||||
uchar reserved[14]; // 32
|
||||
uchar salt[BLAKE2B_SALTBYTES]; // 48
|
||||
uchar personal[BLAKE2B_PERSONALBYTES]; // 64
|
||||
} blake2b_param;
|
||||
|
||||
typedef struct __blake2b_state
|
||||
{
|
||||
ulong h[8];
|
||||
ulong t[2];
|
||||
ulong f[2];
|
||||
uchar buf[2 * BLAKE2B_BLOCKBYTES];
|
||||
size_t buflen;
|
||||
uchar last_node;
|
||||
} blake2b_state;
|
||||
|
||||
__constant static ulong blake2b_IV[8] =
|
||||
{
|
||||
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
||||
};
|
||||
|
||||
__constant static uchar blake2b_sigma[12][16] =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
|
||||
|
||||
static inline int blake2b_set_lastnode( blake2b_state *S )
|
||||
{
|
||||
S->f[1] = ~0UL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Some helper functions, not necessarily useful */
|
||||
static inline int blake2b_set_lastblock( blake2b_state *S )
|
||||
{
|
||||
if( S->last_node ) blake2b_set_lastnode( S );
|
||||
|
||||
S->f[0] = ~0UL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_increment_counter( blake2b_state *S, const ulong inc )
|
||||
{
|
||||
S->t[0] += inc;
|
||||
S->t[1] += ( S->t[0] < inc );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline uint load32( const void *src )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return *( uint * )( src );
|
||||
#else
|
||||
const uchar *p = ( uchar * )src;
|
||||
uint w = *p++;
|
||||
w |= ( uint )( *p++ ) << 8;
|
||||
w |= ( uint )( *p++ ) << 16;
|
||||
w |= ( uint )( *p++ ) << 24;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline ulong load64( const void *src )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return *( ulong * )( src );
|
||||
#else
|
||||
const uchar *p = ( uchar * )src;
|
||||
ulong w = *p++;
|
||||
w |= ( ulong )( *p++ ) << 8;
|
||||
w |= ( ulong )( *p++ ) << 16;
|
||||
w |= ( ulong )( *p++ ) << 24;
|
||||
w |= ( ulong )( *p++ ) << 32;
|
||||
w |= ( ulong )( *p++ ) << 40;
|
||||
w |= ( ulong )( *p++ ) << 48;
|
||||
w |= ( ulong )( *p++ ) << 56;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void store32( void *dst, uint w )
|
||||
{
|
||||
#if defined(__ENDIAN_LITTLE__)
|
||||
*( uint * )( dst ) = w;
|
||||
#else
|
||||
uchar *p = ( uchar * )dst;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void store64( void *dst, ulong w )
|
||||
{
|
||||
#if defined(__ENDIAN_LITTLE__)
|
||||
*( ulong * )( dst ) = w;
|
||||
#else
|
||||
uchar *p = ( uchar * )dst;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline ulong rotr64( const ulong w, const unsigned c )
|
||||
{
|
||||
return ( w >> c ) | ( w << ( 64 - c ) );
|
||||
}
|
||||
|
||||
/* init xors IV with input parameter block */
|
||||
static inline int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
|
||||
{
|
||||
uchar *p, *h, *v;
|
||||
v = ( uchar * )( blake2b_IV );
|
||||
h = ( uchar * )( S->h );
|
||||
p = ( uchar * )( P );
|
||||
/* IV XOR ParamBlock */
|
||||
memset( S, 0, sizeof( blake2b_state ) );
|
||||
|
||||
for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_init( blake2b_state *S, const uchar outlen )
|
||||
{
|
||||
blake2b_param P[1];
|
||||
|
||||
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
|
||||
|
||||
P->digest_length = outlen;
|
||||
P->key_length = 0;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store64( &P->node_offset, 0 );
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
memset( P->reserved, 0, sizeof( P->reserved ) );
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
return blake2b_init_param( S, P );
|
||||
}
|
||||
|
||||
static int blake2b_compress( blake2b_state *S, const uchar block[BLAKE2B_BLOCKBYTES] )
|
||||
{
|
||||
ulong m[16];
|
||||
ulong v[16];
|
||||
int i;
|
||||
|
||||
for( i = 0; i < 16; ++i )
|
||||
m[i] = load64( block + i * sizeof( m[i] ) );
|
||||
|
||||
for( i = 0; i < 8; ++i )
|
||||
v[i] = S->h[i];
|
||||
|
||||
v[ 8] = blake2b_IV[0];
|
||||
v[ 9] = blake2b_IV[1];
|
||||
v[10] = blake2b_IV[2];
|
||||
v[11] = blake2b_IV[3];
|
||||
v[12] = S->t[0] ^ blake2b_IV[4];
|
||||
v[13] = S->t[1] ^ blake2b_IV[5];
|
||||
v[14] = S->f[0] ^ blake2b_IV[6];
|
||||
v[15] = S->f[1] ^ blake2b_IV[7];
|
||||
#define G(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2*i+0]]; \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = a + b + m[blake2b_sigma[r][2*i+1]]; \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while(0)
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
||||
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
||||
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
||||
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
||||
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
||||
} while(0)
|
||||
ROUND( 0 );
|
||||
ROUND( 1 );
|
||||
ROUND( 2 );
|
||||
ROUND( 3 );
|
||||
ROUND( 4 );
|
||||
ROUND( 5 );
|
||||
ROUND( 6 );
|
||||
ROUND( 7 );
|
||||
ROUND( 8 );
|
||||
ROUND( 9 );
|
||||
ROUND( 10 );
|
||||
ROUND( 11 );
|
||||
|
||||
for( i = 0; i < 8; ++i )
|
||||
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ucharcpy (uchar * dst, uchar const * src, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
void printstate (blake2b_state * S)
|
||||
{
|
||||
printf ("%lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu ", S->h[0], S->h[1], S->h[2], S->h[3], S->h[4], S->h[5], S->h[6], S->h[7], S->t[0], S->t[1], S->f[0], S->f[1]);
|
||||
for (int i = 0; i < 256; ++i)
|
||||
{
|
||||
printf ("%02x", S->buf[i]);
|
||||
}
|
||||
printf (" %lu %02x\n", S->buflen, S->last_node);
|
||||
}
|
||||
|
||||
/* inlen now in bytes */
|
||||
static int blake2b_update( blake2b_state *S, const uchar *in, ulong inlen )
|
||||
{
|
||||
while( inlen > 0 )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
size_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
|
||||
|
||||
if( inlen > fill )
|
||||
{
|
||||
ucharcpy( S->buf + left, in, fill ); // Fill buffer
|
||||
S->buflen += fill;
|
||||
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
|
||||
blake2b_compress( S, S->buf ); // Compress
|
||||
ucharcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
|
||||
S->buflen -= BLAKE2B_BLOCKBYTES;
|
||||
in += fill;
|
||||
inlen -= fill;
|
||||
}
|
||||
else // inlen <= fill
|
||||
{
|
||||
ucharcpy( S->buf + left, in, inlen );
|
||||
S->buflen += inlen; // Be lazy, do not compress
|
||||
in += inlen;
|
||||
inlen -= inlen;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ucharset (uchar * dest, int val, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
*dest++ = val;
|
||||
}
|
||||
}
|
||||
|
||||
/* Is this correct? */
|
||||
static int blake2b_final( blake2b_state *S, uchar *out, uchar outlen )
|
||||
{
|
||||
uchar buffer[BLAKE2B_OUTBYTES];
|
||||
|
||||
if( S->buflen > BLAKE2B_BLOCKBYTES )
|
||||
{
|
||||
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
|
||||
blake2b_compress( S, S->buf );
|
||||
S->buflen -= BLAKE2B_BLOCKBYTES;
|
||||
ucharcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
|
||||
}
|
||||
|
||||
//blake2b_increment_counter( S, S->buflen );
|
||||
ulong inc = (ulong)S->buflen;
|
||||
S->t[0] += inc;
|
||||
// if ( S->t[0] < inc )
|
||||
// S->t[1] += 1;
|
||||
// This seems to crash the opencl compiler though fortunately this is calculating size and we don't do things bigger than 2^32
|
||||
|
||||
blake2b_set_lastblock( S );
|
||||
ucharset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
|
||||
blake2b_compress( S, S->buf );
|
||||
|
||||
for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
|
||||
store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
|
||||
|
||||
ucharcpy( out, buffer, outlen );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ucharcpyglb (uchar * dst, __global uchar const * src, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
*dst = *src;
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void raiblocks_work (__global ulong * attempt, __global ulong * result_a, __global uchar * item_a)
|
||||
{
|
||||
int const thread = get_global_id (0);
|
||||
uchar item_l [32];
|
||||
ucharcpyglb (item_l, item_a, 32);
|
||||
ulong attempt_l = *attempt + thread;
|
||||
blake2b_state state;
|
||||
blake2b_init (&state, sizeof (ulong));
|
||||
blake2b_update (&state, (uchar *) &attempt_l, sizeof (ulong));
|
||||
blake2b_update (&state, item_l, 32);
|
||||
ulong result;
|
||||
blake2b_final (&state, (uchar *) &result, sizeof (result));
|
||||
if (result >= 0xffffffc000000000ul)
|
||||
//if (result >= 0xff00000000000000ul)
|
||||
{
|
||||
*result_a = attempt_l;
|
||||
}
|
||||
}
|
||||
)%%%";
|
||||
}
|
||||
|
||||
void printstate (blake2b_state * S)
|
||||
{
|
||||
printf ("%llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", S->h[0], S->h[1], S->h[2], S->h[3], S->h[4], S->h[5], S->h[6], S->h[7], S->t[0], S->t[1], S->f[0], S->f[1]);
|
||||
for (int i = 0; i < 256; ++i)
|
||||
{
|
||||
printf ("%02x", S->buf[i]);
|
||||
}
|
||||
printf (" %lu %02x\n", S->buflen, S->last_node);
|
||||
}
|
||||
|
||||
rai::opencl_environment::opencl_environment (bool & error_a)
|
||||
{
|
||||
cl_uint platformIdCount = 0;
|
||||
clGetPlatformIDs (0, nullptr, &platformIdCount);
|
||||
std::vector <cl_platform_id> platformIds (platformIdCount);
|
||||
clGetPlatformIDs (platformIdCount, platformIds.data(), nullptr);
|
||||
for (auto i (platformIds.begin ()), n (platformIds.end ()); i != n; ++i)
|
||||
{
|
||||
auto & devices_container (devices [*i]);
|
||||
cl_uint deviceIdCount = 0;
|
||||
clGetDeviceIDs (*i, CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceIdCount);
|
||||
std::vector <cl_device_id> deviceIds (deviceIdCount);
|
||||
clGetDeviceIDs (*i, CL_DEVICE_TYPE_ALL, deviceIdCount, deviceIds.data (), nullptr);
|
||||
for (auto j (deviceIds.begin ()), m (deviceIds.end ()); j != m; ++j)
|
||||
{
|
||||
devices_container.push_back (*j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rai::opencl_environment::dump ()
|
||||
{
|
||||
auto index (0);
|
||||
for (auto i (devices.begin ()), n (devices.end ()); i != n; ++i, ++index)
|
||||
{
|
||||
std::vector <unsigned> queries = {CL_PLATFORM_PROFILE, CL_PLATFORM_VERSION, CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_EXTENSIONS};
|
||||
std::cout << "Platform: " << index << std::endl;
|
||||
for (auto j (queries.begin ()), m (queries.end ()); j != m; ++j)
|
||||
{
|
||||
size_t platformInfoCount = 0;
|
||||
clGetPlatformInfo(i->first, *j, 0, nullptr, &platformInfoCount);
|
||||
std::vector <char> info (platformInfoCount);
|
||||
clGetPlatformInfo(i->first, *j, info.size (), info.data (), nullptr);
|
||||
std::cout << info.data () << std::endl;
|
||||
}
|
||||
for (auto j (i->second.begin ()), m (i->second.end ()); j != m; ++j)
|
||||
{
|
||||
std::vector <unsigned> queries = {CL_DEVICE_NAME, CL_DEVICE_VENDOR, CL_DEVICE_PROFILE};
|
||||
std::cout << "Device: " << j - i->second.begin () << std::endl;
|
||||
for (auto k (queries.begin ()), o (queries.end ()); k != o; ++k)
|
||||
{
|
||||
size_t platformInfoCount = 0;
|
||||
clGetDeviceInfo(*j, *k, 0, nullptr, &platformInfoCount);
|
||||
std::vector <char> info (platformInfoCount);
|
||||
clGetDeviceInfo(*j, *k, info.size (), info.data (), nullptr);
|
||||
std::cout << '\t' << info.data () << std::endl;
|
||||
}
|
||||
size_t deviceTypeCount = 0;
|
||||
clGetDeviceInfo(*j, CL_DEVICE_TYPE, 0, nullptr, &deviceTypeCount);
|
||||
std::vector <uint8_t> deviceTypeInfo (deviceTypeCount);
|
||||
clGetDeviceInfo(*j, CL_DEVICE_TYPE, deviceTypeCount, deviceTypeInfo.data (), 0);
|
||||
std::string device_type_string;
|
||||
switch (deviceTypeInfo [0])
|
||||
{
|
||||
case CL_DEVICE_TYPE_ACCELERATOR:
|
||||
device_type_string = "ACCELERATOR";
|
||||
break;
|
||||
case CL_DEVICE_TYPE_CPU:
|
||||
device_type_string = "CPU";
|
||||
break;
|
||||
case CL_DEVICE_TYPE_CUSTOM:
|
||||
device_type_string = "CUSTOM";
|
||||
break;
|
||||
case CL_DEVICE_TYPE_DEFAULT:
|
||||
device_type_string = "DEFAULT";
|
||||
break;
|
||||
case CL_DEVICE_TYPE_GPU:
|
||||
device_type_string = "GPU";
|
||||
break;
|
||||
default:
|
||||
device_type_string = "Unknown";
|
||||
break;
|
||||
}
|
||||
std::cout << '\t' << device_type_string << std::endl;
|
||||
size_t compilerAvailableCount = 0;
|
||||
clGetDeviceInfo(*j, CL_DEVICE_COMPILER_AVAILABLE, 0, nullptr, &compilerAvailableCount);
|
||||
std::vector <uint8_t> compilerAvailableInfo (compilerAvailableCount);
|
||||
clGetDeviceInfo(*j, CL_DEVICE_COMPILER_AVAILABLE, compilerAvailableCount, compilerAvailableInfo.data (), 0);
|
||||
std::cout << '\t' << "Compiler available: " << (compilerAvailableInfo [0] ? "true" : "false") << std::endl;
|
||||
size_t computeUnitsAvailableCount = 0;
|
||||
clGetDeviceInfo(*j, CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, &computeUnitsAvailableCount);
|
||||
std::vector <uint8_t> computeUnitsAvailableInfo (computeUnitsAvailableCount);
|
||||
clGetDeviceInfo(*j, CL_DEVICE_MAX_COMPUTE_UNITS, computeUnitsAvailableCount, computeUnitsAvailableInfo.data (), 0);
|
||||
uint64_t computeUnits (computeUnitsAvailableInfo [0] | (computeUnitsAvailableInfo [1] << 8) | (computeUnitsAvailableInfo [2] << 16) | (computeUnitsAvailableInfo [3] << 24));
|
||||
std::cout << '\t' << "Compute units available: " << computeUnits << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rai::opencl_work::opencl_work (bool & error_a, rai::opencl_environment & environment_a, rai::work_pool & pool_a) :
|
||||
context (0),
|
||||
attempt_buffer (0),
|
||||
result_buffer (0),
|
||||
item_buffer (0),
|
||||
program (0),
|
||||
kernel (0),
|
||||
queue (0),
|
||||
pool (pool_a)
|
||||
{
|
||||
rai::random_pool.GenerateBlock (reinterpret_cast <uint8_t *> (rand.s.data ()), rand.s.size () * sizeof (decltype (rand.s)::value_type));
|
||||
auto i (environment_a.devices.begin ());
|
||||
auto selected_platform (i->first);
|
||||
std::array <cl_device_id, 1> selected_devices;
|
||||
selected_devices [0] = i->second [1];
|
||||
cl_context_properties contextProperties [] =
|
||||
{
|
||||
CL_CONTEXT_PLATFORM,
|
||||
reinterpret_cast<cl_context_properties> (selected_platform),
|
||||
0, 0
|
||||
};
|
||||
cl_int createContextError (0);
|
||||
context = clCreateContext (contextProperties, selected_devices.size (), selected_devices.data (), nullptr, nullptr, &createContextError);
|
||||
error_a |= createContextError != CL_SUCCESS;
|
||||
if (!error_a)
|
||||
{
|
||||
cl_int queue_error (0);
|
||||
queue = clCreateCommandQueue (context, selected_devices [0], 0, &queue_error);
|
||||
error_a |= queue_error != CL_SUCCESS;
|
||||
if (!error_a)
|
||||
{
|
||||
cl_int attempt_error (0);
|
||||
attempt_buffer = clCreateBuffer (context, 0, sizeof (uint64_t), nullptr, &attempt_error);
|
||||
error_a |= attempt_error != CL_SUCCESS;
|
||||
if (!error_a)
|
||||
{
|
||||
cl_int result_error (0);
|
||||
result_buffer = clCreateBuffer (context, 0, sizeof (uint64_t), nullptr, &result_error);
|
||||
error_a |= result_error != CL_SUCCESS;
|
||||
if (!error_a)
|
||||
{
|
||||
cl_int item_error (0);
|
||||
size_t item_size (sizeof (rai::uint256_union));
|
||||
item_buffer = clCreateBuffer (context, 0, item_size, nullptr, &item_error);
|
||||
error_a |= item_error != CL_SUCCESS;
|
||||
if (!error_a)
|
||||
{
|
||||
cl_int program_error (0);
|
||||
char const * program_data (opencl_program.data ());
|
||||
size_t program_length (opencl_program.size ());
|
||||
program = clCreateProgramWithSource (context, 1, &program_data, &program_length, &program_error);
|
||||
error_a |= program_error != CL_SUCCESS;
|
||||
if (!error_a)
|
||||
{
|
||||
error_a |= clBuildProgram (program, selected_devices.size (), selected_devices.data (), "-D __APPLE__", nullptr, nullptr) != CL_SUCCESS;
|
||||
if (!error_a)
|
||||
{
|
||||
/*for (auto i (selected_devices.begin ()), n (selected_devices.end ()); i != n; ++i)
|
||||
{
|
||||
size_t log_size (0);
|
||||
clGetProgramBuildInfo (program, *i, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
|
||||
std::vector <char> log (log_size);
|
||||
clGetProgramBuildInfo (program, *i, CL_PROGRAM_BUILD_LOG, log.size (), log.data (), nullptr);
|
||||
std::cout << log.data () << std::endl;
|
||||
}*/
|
||||
cl_int kernel_error (0);
|
||||
kernel = clCreateKernel (program, "raiblocks_work", &kernel_error);
|
||||
error_a |= kernel_error != CL_SUCCESS;
|
||||
if (!error_a)
|
||||
{
|
||||
cl_int arg0_error (clSetKernelArg (kernel, 0, sizeof (attempt_buffer), &attempt_buffer));
|
||||
error_a |= arg0_error != CL_SUCCESS;
|
||||
if (!error_a)
|
||||
{
|
||||
cl_int arg1_error (clSetKernelArg (kernel, 1, sizeof (result_buffer), &result_buffer));
|
||||
error_a |= arg1_error != CL_SUCCESS;
|
||||
if (!error_a)
|
||||
{
|
||||
cl_int arg2_error (clSetKernelArg (kernel, 2, sizeof (item_buffer), &item_buffer));
|
||||
error_a |= arg2_error != CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rai::opencl_work::~opencl_work ()
|
||||
{
|
||||
clReleaseKernel (kernel);
|
||||
clReleaseProgram (program);
|
||||
clReleaseContext (context);
|
||||
}
|
||||
|
||||
uint64_t rai::opencl_work::generate_work (rai::uint256_union const & root_a)
|
||||
{
|
||||
std::lock_guard <std::mutex> lock (mutex);
|
||||
uint64_t result (0);
|
||||
unsigned thread_count (rai::rai_network == rai::rai_networks::rai_test_network ? 128 : 1024 * 1024);
|
||||
size_t work_size [] = { thread_count, 0, 0 };
|
||||
while (pool.work_validate (root_a, result))
|
||||
{
|
||||
result = rand.next ();
|
||||
cl_int write_error1 = clEnqueueWriteBuffer (queue, attempt_buffer, false, 0, sizeof (uint64_t), &result, 0, nullptr, nullptr);
|
||||
cl_int write_error2 = clEnqueueWriteBuffer (queue, item_buffer, false, 0, sizeof (rai::uint256_union), root_a.bytes.data (), 0, nullptr, nullptr);
|
||||
cl_int enqueue_error = clEnqueueNDRangeKernel (queue, kernel, 1, nullptr, work_size, nullptr, 0, nullptr, nullptr);
|
||||
cl_int read_error1 = clEnqueueReadBuffer(queue, result_buffer, false, 0, sizeof (uint64_t), &result, 0, nullptr, nullptr);
|
||||
cl_int finishError = clFinish (queue);
|
||||
}
|
||||
return result;
|
||||
}
|
43
rai/node/openclwork.hpp
Normal file
43
rai/node/openclwork.hpp
Normal file
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include <rai/node/xorshift.hpp>
|
||||
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
namespace rai
|
||||
{
|
||||
class opencl_environment
|
||||
{
|
||||
public:
|
||||
opencl_environment (bool &);
|
||||
void dump ();
|
||||
std::map <cl_platform_id, std::vector <cl_device_id>> devices;
|
||||
};
|
||||
union uint256_union;
|
||||
class work_pool;
|
||||
class opencl_work
|
||||
{
|
||||
public:
|
||||
opencl_work (bool &, rai::opencl_environment &, rai::work_pool &);
|
||||
~opencl_work ();
|
||||
uint64_t generate_work (rai::uint256_union const &);
|
||||
std::mutex mutex;
|
||||
cl_context context;
|
||||
cl_mem attempt_buffer;
|
||||
cl_mem result_buffer;
|
||||
cl_mem item_buffer;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
cl_command_queue queue;
|
||||
rai::xorshift1024star rand;
|
||||
rai::work_pool & pool;
|
||||
};
|
||||
}
|
|
@ -5,7 +5,8 @@
|
|||
|
||||
rai::system::system (uint16_t port_a, size_t count_a) :
|
||||
service (new boost::asio::io_service),
|
||||
alarm (*service)
|
||||
alarm (*service),
|
||||
work (false)
|
||||
{
|
||||
nodes.reserve (count_a);
|
||||
for (size_t i (0); i < count_a; ++i)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include <rai/node/wallet.hpp>
|
||||
|
||||
#include <rai/node/node.hpp>
|
||||
#include <rai/node/xorshift.hpp>
|
||||
|
||||
#include <argon2.h>
|
||||
|
||||
|
@ -11,7 +12,7 @@
|
|||
|
||||
#include <future>
|
||||
|
||||
rai::work_pool::work_pool () :
|
||||
rai::work_pool::work_pool (bool opencl_work_a) :
|
||||
current (0),
|
||||
ticket (0),
|
||||
done (false)
|
||||
|
@ -27,6 +28,12 @@ done (false)
|
|||
}));
|
||||
threads.push_back (std::move (thread));
|
||||
}
|
||||
if (opencl_work_a)
|
||||
{
|
||||
auto error (false);
|
||||
rai::opencl_environment environment (error);
|
||||
opencl.reset (new rai::opencl_work (error, environment, *this));
|
||||
}
|
||||
}
|
||||
|
||||
rai::work_pool::~work_pool ()
|
||||
|
@ -38,32 +45,6 @@ rai::work_pool::~work_pool ()
|
|||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class xorshift1024star
|
||||
{
|
||||
public:
|
||||
xorshift1024star ():
|
||||
p (0)
|
||||
{
|
||||
}
|
||||
std::array <uint64_t, 16> s;
|
||||
unsigned p;
|
||||
uint64_t next ()
|
||||
{
|
||||
auto p_l (p);
|
||||
auto pn ((p_l + 1) & 15);
|
||||
p = pn;
|
||||
uint64_t s0 = s[ p_l ];
|
||||
uint64_t s1 = s[ pn ];
|
||||
s1 ^= s1 << 31; // a
|
||||
s1 ^= s1 >> 11; // b
|
||||
s0 ^= s0 >> 30; // c
|
||||
return ( s[ pn ] = s0 ^ s1 ) * 1181783497276652981LL;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
uint64_t rai::work_pool::work_value (rai::block_hash const & root_a, uint64_t work_a)
|
||||
{
|
||||
uint64_t result;
|
||||
|
@ -183,19 +164,26 @@ boost::optional <uint64_t> rai::work_pool::generate_maybe (rai::uint256_union co
|
|||
{
|
||||
assert (!root_a.is_zero ());
|
||||
boost::optional <uint64_t> result;
|
||||
std::unique_lock <std::mutex> lock (mutex);
|
||||
pending.push_back (root_a);
|
||||
producer_condition.notify_all ();
|
||||
auto done (false);
|
||||
while (!done)
|
||||
if (opencl != nullptr)
|
||||
{
|
||||
consumer_condition.wait (lock);
|
||||
auto finish (completed.find (root_a));
|
||||
if (finish != completed.end ())
|
||||
result = opencl->generate_work (root_a);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::unique_lock <std::mutex> lock (mutex);
|
||||
pending.push_back (root_a);
|
||||
producer_condition.notify_all ();
|
||||
auto done (false);
|
||||
while (!done)
|
||||
{
|
||||
done = true;
|
||||
result = finish->second;
|
||||
completed.erase (finish);
|
||||
consumer_condition.wait (lock);
|
||||
auto finish (completed.find (root_a));
|
||||
if (finish != completed.end ())
|
||||
{
|
||||
done = true;
|
||||
result = finish->second;
|
||||
completed.erase (finish);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <boost/optional.hpp>
|
||||
|
||||
#include <rai/secure.hpp>
|
||||
#include <rai/node/openclwork.hpp>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
|
@ -15,7 +16,7 @@ namespace rai
|
|||
class work_pool
|
||||
{
|
||||
public:
|
||||
work_pool ();
|
||||
work_pool (bool);
|
||||
~work_pool ();
|
||||
void loop (uint64_t);
|
||||
void stop ();
|
||||
|
@ -34,6 +35,7 @@ public:
|
|||
std::mutex mutex;
|
||||
std::condition_variable consumer_condition;
|
||||
std::condition_variable producer_condition;
|
||||
std::unique_ptr <rai::opencl_work> opencl;
|
||||
// Local work threshold for rate-limiting publishing blocks. ~5 seconds of work.
|
||||
static uint64_t const publish_test_threshold = 0xff00000000000000;
|
||||
static uint64_t const publish_full_threshold = 0xffffffc000000000;
|
||||
|
|
28
rai/node/xorshift.hpp
Normal file
28
rai/node/xorshift.hpp
Normal file
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
|
||||
namespace rai
|
||||
{
|
||||
class xorshift1024star
|
||||
{
|
||||
public:
|
||||
xorshift1024star ():
|
||||
p (0)
|
||||
{
|
||||
}
|
||||
std::array <uint64_t, 16> s;
|
||||
unsigned p;
|
||||
uint64_t next ()
|
||||
{
|
||||
auto p_l (p);
|
||||
auto pn ((p_l + 1) & 15);
|
||||
p = pn;
|
||||
uint64_t s0 = s[ p_l ];
|
||||
uint64_t s1 = s[ pn ];
|
||||
s1 ^= s1 << 31; // a
|
||||
s1 ^= s1 >> 11; // b
|
||||
s0 ^= s0 >> 30; // c
|
||||
return ( s[ pn ] = s0 ^ s1 ) * 1181783497276652981LL;
|
||||
}
|
||||
};
|
||||
}
|
|
@ -101,7 +101,7 @@ int main (int argc, char * const * argv)
|
|||
{
|
||||
rai::node_init init;
|
||||
auto service (boost::make_shared <boost::asio::io_service> ());
|
||||
rai::work_pool work;
|
||||
rai::work_pool work (config.node.opencl_work);
|
||||
rai::alarm alarm (*service);
|
||||
auto node (std::make_shared <rai::node> (init, *service, working, alarm, config.node, work));
|
||||
if (!init.error ())
|
||||
|
|
|
@ -62,7 +62,7 @@ void rai_daemon::daemon::run ()
|
|||
if (!error)
|
||||
{
|
||||
auto service (boost::make_shared <boost::asio::io_service> ());
|
||||
rai::work_pool work;
|
||||
rai::work_pool work (config.node.opencl_work);
|
||||
rai::alarm alarm (*service);
|
||||
rai::node_init init;
|
||||
auto node (std::make_shared <rai::node> (init, *service, working, alarm, config.node, work));
|
||||
|
|
|
@ -148,7 +148,7 @@ int main (int argc, char * const * argv)
|
|||
if (!key.decode_hex (vm ["key"].as <std::string> ()))
|
||||
{
|
||||
rai::keypair genesis (key.to_string ());
|
||||
rai::work_pool work;
|
||||
rai::work_pool work (false);
|
||||
std::cout << "Genesis: " << genesis.prv.data.to_string () << std::endl << "Public: " << genesis.pub.to_string () << std::endl << "Account: " << genesis.pub.to_account () << std::endl;
|
||||
rai::keypair landing;
|
||||
std::cout << "Landing: " << landing.prv.data.to_string () << std::endl << "Public: " << landing.pub.to_string () << std::endl << "Account: " << landing.pub.to_account () << std::endl;
|
||||
|
@ -215,7 +215,7 @@ int main (int argc, char * const * argv)
|
|||
}
|
||||
else if (vm.count ("debug_profile_generate"))
|
||||
{
|
||||
rai::work_pool work;
|
||||
rai::work_pool work (false);
|
||||
rai::change_block block (0, 0, rai::keypair ().prv, 0, 0);
|
||||
std::cerr << "Starting generation profiling\n";
|
||||
for (uint64_t i (0); true; ++i)
|
||||
|
@ -229,7 +229,7 @@ int main (int argc, char * const * argv)
|
|||
}
|
||||
else if (vm.count ("debug_profile_verify"))
|
||||
{
|
||||
rai::work_pool work;
|
||||
rai::work_pool work (false);
|
||||
rai::change_block block (0, 0, rai::keypair ().prv, 0, 0);
|
||||
std::cerr << "Starting verification profiling\n";
|
||||
for (uint64_t i (0); true; ++i)
|
||||
|
|
|
@ -149,7 +149,7 @@ int run_wallet (int argc, char * const * argv)
|
|||
QApplication application (argc, const_cast <char **> (argv));
|
||||
rai::set_application_icon (application);
|
||||
auto service (boost::make_shared <boost::asio::io_service> ());
|
||||
rai::work_pool work;
|
||||
rai::work_pool work (config.node.opencl_work);
|
||||
rai::alarm alarm (*service);
|
||||
rai::node_init init;
|
||||
auto node (std::make_shared <rai::node> (init, *service, working, alarm, config.node, work));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue