Use improved OpenCL kernel (#2902)
Credit and thanks go to @jserv and @PlasmaPower for the contribution. Originally pushed to https://github.com/nanocurrency/nano-work-server/pull/18, this kernel was rewritten with the following improvements: 1. Completely remove unnecessary intermediate buffers; 2. Fully vectorize Blake2b; 3. Schedule registers in Blake2b; 4. Load all sigma constants in a single instruction and use macros for constant evaluation; 5. Assume the messages no exceeding 17 exabytes and apply optimizations; 6. Implement AMD fastpath for rotr64; 7. Specify __constant for both optimization and error checking; Co-authored-by: Jim Huang <jserv@biilabs.io> Co-authored-by: Lee Bousfield <ljbousfield@gmail.com> Signed-off-by: Guilherme Lawless <guilherme@nano.org> Signed-off-by: Sergey Kroshnin <sergiysw@gmail.com>
This commit is contained in:
parent
d7798c6e70
commit
ff424afa00
1 changed files with 89 additions and 355 deletions
|
@ -16,379 +16,113 @@ bool nano::opencl_loaded{ false };
|
|||
namespace
|
||||
{
|
||||
std::string opencl_program = R"%%%(
|
||||
enum blake2b_constant
|
||||
{
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
enum Blake2b_IV {
|
||||
iv0 = 0x6a09e667f3bcc908UL,
|
||||
iv1 = 0xbb67ae8584caa73bUL,
|
||||
iv2 = 0x3c6ef372fe94f82bUL,
|
||||
iv3 = 0xa54ff53a5f1d36f1UL,
|
||||
iv4 = 0x510e527fade682d1UL,
|
||||
iv5 = 0x9b05688c2b3e6c1fUL,
|
||||
iv6 = 0x1f83d9abfb41bd6bUL,
|
||||
iv7 = 0x5be0cd19137e2179UL,
|
||||
};
|
||||
|
||||
typedef struct __blake2b_param
|
||||
{
|
||||
uchar digest_length; // 1
|
||||
uchar key_length; // 2
|
||||
uchar fanout; // 3
|
||||
uchar depth; // 4
|
||||
uint leaf_length; // 8
|
||||
ulong node_offset; // 16
|
||||
uchar node_depth; // 17
|
||||
uchar inner_length; // 18
|
||||
uchar reserved[14]; // 32
|
||||
uchar salt[BLAKE2B_SALTBYTES]; // 48
|
||||
uchar personal[BLAKE2B_PERSONALBYTES]; // 64
|
||||
} blake2b_param;
|
||||
|
||||
typedef struct __blake2b_state
|
||||
{
|
||||
ulong h[8];
|
||||
ulong t[2];
|
||||
ulong f[2];
|
||||
uchar buf[2 * BLAKE2B_BLOCKBYTES];
|
||||
size_t buflen;
|
||||
uchar last_node;
|
||||
} blake2b_state;
|
||||
|
||||
__constant static const ulong blake2b_IV[8] =
|
||||
{
|
||||
0x6a09e667f3bcc908UL, 0xbb67ae8584caa73bUL,
|
||||
0x3c6ef372fe94f82bUL, 0xa54ff53a5f1d36f1UL,
|
||||
0x510e527fade682d1UL, 0x9b05688c2b3e6c1fUL,
|
||||
0x1f83d9abfb41bd6bUL, 0x5be0cd19137e2179UL
|
||||
enum IV_Derived {
|
||||
nano_xor_iv0 = 0x6a09e667f2bdc900UL, // iv1 ^ 0x1010000 ^ outlen
|
||||
nano_xor_iv4 = 0x510e527fade682f9UL, // iv4 ^ inbytes
|
||||
nano_xor_iv6 = 0xe07c265404be4294UL, // iv6 ^ ~0
|
||||
};
|
||||
|
||||
__constant static const uchar blake2b_sigma[12][16] =
|
||||
#ifdef cl_amd_media_ops
|
||||
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
||||
static inline ulong rotr64(ulong x, int shift)
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
|
||||
|
||||
static inline int blake2b_set_lastnode( blake2b_state *S )
|
||||
{
|
||||
S->f[1] = ~0UL;
|
||||
return 0;
|
||||
uint2 x2 = as_uint2(x);
|
||||
if (shift < 32)
|
||||
return as_ulong(amd_bitalign(x2.s10, x2, shift));
|
||||
return as_ulong(amd_bitalign(x2, x2.s10, (shift - 32)));
|
||||
}
|
||||
|
||||
/* Some helper functions, not necessarily useful */
|
||||
static inline int blake2b_set_lastblock( blake2b_state *S )
|
||||
{
|
||||
if( S->last_node ) blake2b_set_lastnode( S );
|
||||
|
||||
S->f[0] = ~0UL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_increment_counter( blake2b_state *S, const ulong inc )
|
||||
{
|
||||
S->t[0] += inc;
|
||||
S->t[1] += ( S->t[0] < inc );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline ulong load64( const void *src )
|
||||
{
|
||||
#if defined(__ENDIAN_LITTLE__)
|
||||
return *( ulong * )( src );
|
||||
#else
|
||||
const uchar *p = ( uchar * )src;
|
||||
ulong w = *p++;
|
||||
w |= ( ulong )( *p++ ) << 8;
|
||||
w |= ( ulong )( *p++ ) << 16;
|
||||
w |= ( ulong )( *p++ ) << 24;
|
||||
w |= ( ulong )( *p++ ) << 32;
|
||||
w |= ( ulong )( *p++ ) << 40;
|
||||
w |= ( ulong )( *p++ ) << 48;
|
||||
w |= ( ulong )( *p++ ) << 56;
|
||||
return w;
|
||||
static inline ulong rotr64(ulong x, int shift)
|
||||
{
|
||||
return rotate(x, 64UL - shift);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void store32( void *dst, uint w )
|
||||
#define G32(m0, m1, m2, m3, vva, vb1, vb2, vvc, vd1, vd2) \
|
||||
do { \
|
||||
vva += (ulong2)(vb1 + m0, vb2 + m2); \
|
||||
vd1 = rotr64(vd1 ^ vva.s0, 32); \
|
||||
vd2 = rotr64(vd2 ^ vva.s1, 32); \
|
||||
vvc += (ulong2)(vd1, vd2); \
|
||||
vb1 = rotr64(vb1 ^ vvc.s0, 24); \
|
||||
vb2 = rotr64(vb2 ^ vvc.s1, 24); \
|
||||
vva += (ulong2)(vb1 + m1, vb2 + m3); \
|
||||
vd1 = rotr64(vd1 ^ vva.s0, 16); \
|
||||
vd2 = rotr64(vd2 ^ vva.s1, 16); \
|
||||
vvc += (ulong2)(vd1, vd2); \
|
||||
vb1 = rotr64(vb1 ^ vvc.s0, 63); \
|
||||
vb2 = rotr64(vb2 ^ vvc.s1, 63); \
|
||||
} while (0)
|
||||
|
||||
#define G2v(m0, m1, m2, m3, a, b, c, d) \
|
||||
G32(m0, m1, m2, m3, vv[a / 2], vv[b / 2].s0, vv[b / 2].s1, vv[c / 2], \
|
||||
vv[d / 2].s0, vv[d / 2].s1)
|
||||
|
||||
#define G2v_split(m0, m1, m2, m3, a, vb1, vb2, c, vd1, vd2) \
|
||||
G32(m0, m1, m2, m3, vv[a / 2], vb1, vb2, vv[c / 2], vd1, vd2)
|
||||
|
||||
#define ROUND(m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14, \
|
||||
m15) \
|
||||
do { \
|
||||
G2v(m0, m1, m2, m3, 0, 4, 8, 12); \
|
||||
G2v(m4, m5, m6, m7, 2, 6, 10, 14); \
|
||||
G2v_split(m8, m9, m10, m11, 0, vv[5 / 2].s1, vv[6 / 2].s0, 10, \
|
||||
vv[15 / 2].s1, vv[12 / 2].s0); \
|
||||
G2v_split(m12, m13, m14, m15, 2, vv[7 / 2].s1, vv[4 / 2].s0, 8, \
|
||||
vv[13 / 2].s1, vv[14 / 2].s0); \
|
||||
} while (0)
|
||||
|
||||
static inline ulong blake2b(ulong const nonce, __constant ulong *h)
|
||||
{
|
||||
#if defined(__ENDIAN_LITTLE__)
|
||||
*( uint * )( dst ) = w;
|
||||
#else
|
||||
uchar *p = ( uchar * )dst;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w;
|
||||
#endif
|
||||
ulong2 vv[8] = {
|
||||
{nano_xor_iv0, iv1}, {iv2, iv3}, {iv4, iv5},
|
||||
{iv6, iv7}, {iv0, iv1}, {iv2, iv3},
|
||||
{nano_xor_iv4, iv5}, {nano_xor_iv6, iv7},
|
||||
};
|
||||
|
||||
ROUND(nonce, h[0], h[1], h[2], h[3], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
ROUND(0, 0, h[3], 0, 0, 0, 0, 0, h[0], 0, nonce, h[1], 0, 0, 0, h[2]);
|
||||
ROUND(0, 0, 0, nonce, 0, h[1], 0, 0, 0, 0, h[2], 0, 0, h[0], 0, h[3]);
|
||||
ROUND(0, 0, h[2], h[0], 0, 0, 0, 0, h[1], 0, 0, 0, h[3], nonce, 0, 0);
|
||||
ROUND(0, nonce, 0, 0, h[1], h[3], 0, 0, 0, h[0], 0, 0, 0, 0, h[2], 0);
|
||||
ROUND(h[1], 0, 0, 0, nonce, 0, 0, h[2], h[3], 0, 0, 0, 0, 0, h[0], 0);
|
||||
ROUND(0, 0, h[0], 0, 0, 0, h[3], 0, nonce, 0, 0, h[2], 0, h[1], 0, 0);
|
||||
ROUND(0, 0, 0, 0, 0, h[0], h[2], 0, 0, nonce, 0, h[3], 0, 0, h[1], 0);
|
||||
ROUND(0, 0, 0, 0, 0, h[2], nonce, 0, 0, h[1], 0, 0, h[0], h[3], 0, 0);
|
||||
ROUND(0, h[1], 0, h[3], 0, 0, h[0], 0, 0, 0, 0, 0, h[2], 0, 0, nonce);
|
||||
ROUND(nonce, h[0], h[1], h[2], h[3], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
ROUND(0, 0, h[3], 0, 0, 0, 0, 0, h[0], 0, nonce, h[1], 0, 0, 0, h[2]);
|
||||
|
||||
return nano_xor_iv0 ^ vv[0].s0 ^ vv[4].s0;
|
||||
}
|
||||
|
||||
static inline void store64( void *dst, ulong w )
|
||||
{
|
||||
#if defined(__ENDIAN_LITTLE__)
|
||||
*( ulong * )( dst ) = w;
|
||||
#else
|
||||
uchar *p = ( uchar * )dst;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w; w >>= 8;
|
||||
*p++ = ( uchar )w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline ulong rotr64( const ulong w, const unsigned c )
|
||||
{
|
||||
return ( w >> c ) | ( w << ( 64 - c ) );
|
||||
}
|
||||
|
||||
static void ucharset (void * dest_a, int val, size_t count)
|
||||
{
|
||||
uchar * dest = (uchar *)dest_a;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
*dest++ = val;
|
||||
}
|
||||
}
|
||||
|
||||
/* init xors IV with input parameter block */
|
||||
static inline int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
|
||||
{
|
||||
uchar *p, *h;
|
||||
__constant uchar *v;
|
||||
v = ( __constant uchar * )( blake2b_IV );
|
||||
h = ( uchar * )( S->h );
|
||||
p = ( uchar * )( P );
|
||||
/* IV XOR ParamBlock */
|
||||
ucharset( S, 0, sizeof( blake2b_state ) );
|
||||
|
||||
for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_init( blake2b_state *S, const uchar outlen )
|
||||
{
|
||||
blake2b_param P[1];
|
||||
|
||||
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
|
||||
|
||||
P->digest_length = outlen;
|
||||
P->key_length = 0;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store64( &P->node_offset, 0 );
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
ucharset( P->reserved, 0, sizeof( P->reserved ) );
|
||||
ucharset( P->salt, 0, sizeof( P->salt ) );
|
||||
ucharset( P->personal, 0, sizeof( P->personal ) );
|
||||
return blake2b_init_param( S, P );
|
||||
}
|
||||
|
||||
static int blake2b_compress( blake2b_state *S, __private const uchar block[BLAKE2B_BLOCKBYTES] )
|
||||
{
|
||||
ulong m[16];
|
||||
ulong v[16];
|
||||
int i;
|
||||
|
||||
for( i = 0; i < 16; ++i )
|
||||
m[i] = load64( block + i * sizeof( m[i] ) );
|
||||
|
||||
for( i = 0; i < 8; ++i )
|
||||
v[i] = S->h[i];
|
||||
|
||||
v[ 8] = blake2b_IV[0];
|
||||
v[ 9] = blake2b_IV[1];
|
||||
v[10] = blake2b_IV[2];
|
||||
v[11] = blake2b_IV[3];
|
||||
v[12] = S->t[0] ^ blake2b_IV[4];
|
||||
v[13] = S->t[1] ^ blake2b_IV[5];
|
||||
v[14] = S->f[0] ^ blake2b_IV[6];
|
||||
v[15] = S->f[1] ^ blake2b_IV[7];
|
||||
#define G(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2*i+0]]; \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = a + b + m[blake2b_sigma[r][2*i+1]]; \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while(0)
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
||||
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
||||
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
||||
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
||||
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
||||
} while(0)
|
||||
ROUND( 0 );
|
||||
ROUND( 1 );
|
||||
ROUND( 2 );
|
||||
ROUND( 3 );
|
||||
ROUND( 4 );
|
||||
ROUND( 5 );
|
||||
ROUND( 6 );
|
||||
ROUND( 7 );
|
||||
ROUND( 8 );
|
||||
ROUND( 9 );
|
||||
ROUND( 10 );
|
||||
ROUND( 11 );
|
||||
|
||||
for( i = 0; i < 8; ++i )
|
||||
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||
|
||||
#undef G
|
||||
#undef G32
|
||||
#undef G2v
|
||||
#undef G2v_split
|
||||
#undef ROUND
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ucharcpy (uchar * dst, uchar const * src, size_t count)
|
||||
__kernel void nano_work(__constant ulong *attempt,
|
||||
__global ulong *result_a,
|
||||
__constant uchar *item_a,
|
||||
__constant ulong *difficulty)
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
void printstate (blake2b_state * S)
|
||||
{
|
||||
printf ("%lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu ", S->h[0], S->h[1], S->h[2], S->h[3], S->h[4], S->h[5], S->h[6], S->h[7], S->t[0], S->t[1], S->f[0], S->f[1]);
|
||||
for (int i = 0; i < 256; ++i)
|
||||
{
|
||||
printf ("%02x", S->buf[i]);
|
||||
}
|
||||
printf (" %lu %02x\n", S->buflen, S->last_node);
|
||||
}
|
||||
|
||||
/* inlen now in bytes */
|
||||
static int blake2b_update( blake2b_state *S, const uchar *in, ulong inlen )
|
||||
{
|
||||
while( inlen > 0 )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
size_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
|
||||
|
||||
if( inlen > fill )
|
||||
{
|
||||
ucharcpy( S->buf + left, in, fill ); // Fill buffer
|
||||
S->buflen += fill;
|
||||
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
|
||||
blake2b_compress( S, S->buf ); // Compress
|
||||
ucharcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
|
||||
S->buflen -= BLAKE2B_BLOCKBYTES;
|
||||
in += fill;
|
||||
inlen -= fill;
|
||||
}
|
||||
else // inlen <= fill
|
||||
{
|
||||
ucharcpy( S->buf + left, in, inlen );
|
||||
S->buflen += inlen; // Be lazy, do not compress
|
||||
in += inlen;
|
||||
inlen -= inlen;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Is this correct? */
|
||||
static int blake2b_final( blake2b_state *S, uchar *out, uchar outlen )
|
||||
{
|
||||
uchar buffer[BLAKE2B_OUTBYTES];
|
||||
|
||||
if( S->buflen > BLAKE2B_BLOCKBYTES )
|
||||
{
|
||||
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
|
||||
blake2b_compress( S, S->buf );
|
||||
S->buflen -= BLAKE2B_BLOCKBYTES;
|
||||
ucharcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
|
||||
}
|
||||
|
||||
//blake2b_increment_counter( S, S->buflen );
|
||||
ulong inc = (ulong)S->buflen;
|
||||
S->t[0] += inc;
|
||||
// if ( S->t[0] < inc )
|
||||
// S->t[1] += 1;
|
||||
// This seems to crash the opencl compiler though fortunately this is calculating size and we don't do things bigger than 2^32
|
||||
|
||||
blake2b_set_lastblock( S );
|
||||
ucharset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
|
||||
blake2b_compress( S, S->buf );
|
||||
|
||||
for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
|
||||
store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
|
||||
|
||||
ucharcpy( out, buffer, outlen );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ucharcpyglb (uchar * dst, __global uchar const * src, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
*dst = *src;
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void nano_work (__global ulong const * attempt, __global ulong * result_a, __global uchar const * item_a, __global ulong const * difficulty_a)
|
||||
{
|
||||
int const thread = get_global_id (0);
|
||||
uchar item_l [32];
|
||||
ucharcpyglb (item_l, item_a, 32);
|
||||
ulong attempt_l = *attempt + thread;
|
||||
blake2b_state state;
|
||||
blake2b_init (&state, sizeof (ulong));
|
||||
blake2b_update (&state, (uchar *) &attempt_l, sizeof (ulong));
|
||||
blake2b_update (&state, item_l, 32);
|
||||
ulong result;
|
||||
blake2b_final (&state, (uchar *) &result, sizeof (result));
|
||||
if (result >= *difficulty_a)
|
||||
{
|
||||
*result_a = attempt_l;
|
||||
}
|
||||
const ulong attempt_l = *attempt + get_global_id(0);
|
||||
if (blake2b(attempt_l, item_a) >= *difficulty)
|
||||
*result_a = attempt_l;
|
||||
}
|
||||
)%%%";
|
||||
}
|
||||
|
||||
void printstate (blake2b_state * S)
|
||||
{
|
||||
std::cout << std::dec;
|
||||
for (uint64_t x : { S->h[0], S->h[1], S->h[2], S->h[3], S->h[4], S->h[5], S->h[6], S->h[7], S->t[0], S->t[1], S->f[0], S->f[1] })
|
||||
{
|
||||
std::cout << x << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << std::hex;
|
||||
for (uint8_t x : S->buf)
|
||||
{
|
||||
// print x as an integer, not as a char
|
||||
std::cout << +x;
|
||||
}
|
||||
std::cout
|
||||
<< std::dec << " " << S->buflen
|
||||
<< std::hex << " " << S->last_node
|
||||
<< std::dec << std::endl;
|
||||
}
|
||||
|
||||
nano::opencl_environment::opencl_environment (bool & error_a)
|
||||
{
|
||||
if (nano::opencl_loaded)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue