Merge branch 'pulls/165'

This commit is contained in:
clemahieu 2017-10-28 18:00:29 -05:00
commit 54384695db
24 changed files with 2339 additions and 827 deletions

View file

@ -124,13 +124,13 @@ int blake2b_long(uint8_t *out, const void *in, const uint32_t outlen, const uint
while (toproduce > BLAKE2B_OUTBYTES)
{
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
blake2b(out_buffer, in_buffer, NULL, BLAKE2B_OUTBYTES, BLAKE2B_OUTBYTES, 0);
blake2b_argon(out_buffer, in_buffer, NULL, BLAKE2B_OUTBYTES, BLAKE2B_OUTBYTES, 0);
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2;
toproduce -= BLAKE2B_OUTBYTES / 2;
}
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
blake2b(out_buffer, in_buffer, NULL, toproduce, BLAKE2B_OUTBYTES, 0);
blake2b_argon(out_buffer, in_buffer, NULL, toproduce, BLAKE2B_OUTBYTES, 0);
memcpy(out, out_buffer, toproduce);
}

View file

@ -21,6 +21,14 @@ else (WIN32)
IF (CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86(_64)?)$")
set (PLATFORM_COMPILE_FLAGS "${PLATFORM_COMPILE_FLAGS} -msse4")
set (BLAKE2_IMPLEMENTATION "blake2/blake2b.c")
if (ENABLE_AVX2)
set (PLATFORM_COMPILE_FLAGS "${PLATFORM_COMPILE_FLAGS} -mavx2 -mbmi -mbmi2")
if (PERMUTE_WITH_GATHER)
set (PLATFORM_COMPILE_FLAGS "${PLATFORM_COMPILE_FLAGS} -DPERMUTE_WITH_GATHER")
elseif (PERMUTE_WITH_SHUFFLES)
set (PLATFORM_COMPILE_FLAGS "${PLATFORM_COMPILE_FLAGS} -DPERMUTE_WITH_SHUFFLES")
endif()
endif()
else()
set (BLAKE2_IMPLEMENTATION "blake2/blake2b-ref.c")
endif()

View file

@ -1,21 +1,22 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2_CONFIG_H__
#define __BLAKE2_CONFIG_H__
#ifndef BLAKE2_CONFIG_H
#define BLAKE2_CONFIG_H
// These don't work everywhere
#if defined(__SSE2__)
/* These don't work everywhere */
#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__)
#define HAVE_SSE2
#endif
@ -31,6 +32,10 @@
#define HAVE_AVX
#endif
#if defined(__AVX2__)
#define HAVE_AVX2
#endif
#if defined(__XOP__)
#define HAVE_XOP
#endif
@ -69,4 +74,3 @@
#endif
#endif

View file

@ -1,133 +1,160 @@
/*
BLAKE2 reference source code package - optimized C implementations
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2_IMPL_H__
#define __BLAKE2_IMPL_H__
#ifndef BLAKE2_IMPL_H
#define BLAKE2_IMPL_H
#include <stdint.h>
#include <string.h>
static inline uint32_t load32( const void *src )
#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
#if defined(_MSC_VER)
#define BLAKE2_INLINE __inline
#elif defined(__GNUC__)
#define BLAKE2_INLINE __inline__
#else
#define BLAKE2_INLINE
#endif
#else
#define BLAKE2_INLINE inline
#endif
static BLAKE2_INLINE uint32_t load32( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
return *( uint32_t * )( src );
#else
const uint8_t *p = ( uint8_t * )src;
uint32_t w = *p++;
w |= ( uint32_t )( *p++ ) << 8;
w |= ( uint32_t )( *p++ ) << 16;
w |= ( uint32_t )( *p++ ) << 24;
uint32_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return (( uint32_t )( p[0] ) << 0) |
(( uint32_t )( p[1] ) << 8) |
(( uint32_t )( p[2] ) << 16) |
(( uint32_t )( p[3] ) << 24) ;
#endif
}
static inline uint64_t load64( const void *src )
static BLAKE2_INLINE uint64_t load64( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
return *( uint64_t * )( src );
#else
const uint8_t *p = ( uint8_t * )src;
uint64_t w = *p++;
w |= ( uint64_t )( *p++ ) << 8;
w |= ( uint64_t )( *p++ ) << 16;
w |= ( uint64_t )( *p++ ) << 24;
w |= ( uint64_t )( *p++ ) << 32;
w |= ( uint64_t )( *p++ ) << 40;
w |= ( uint64_t )( *p++ ) << 48;
w |= ( uint64_t )( *p++ ) << 56;
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return (( uint64_t )( p[0] ) << 0) |
(( uint64_t )( p[1] ) << 8) |
(( uint64_t )( p[2] ) << 16) |
(( uint64_t )( p[3] ) << 24) |
(( uint64_t )( p[4] ) << 32) |
(( uint64_t )( p[5] ) << 40) |
(( uint64_t )( p[6] ) << 48) |
(( uint64_t )( p[7] ) << 56) ;
#endif
}
static inline void store32( void *dst, uint32_t w )
static BLAKE2_INLINE uint16_t load16( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
*( uint32_t * )( dst ) = w;
uint16_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return (( uint16_t )( p[0] ) << 0) |
(( uint16_t )( p[1] ) << 8) ;
#endif
}
static BLAKE2_INLINE void store16( void *dst, uint16_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
#endif
}
static inline void store64( void *dst, uint64_t w )
static BLAKE2_INLINE void store32( void *dst, uint32_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
*( uint64_t * )( dst ) = w;
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
#endif
}
static inline uint64_t load48( const void *src )
static BLAKE2_INLINE void store64( void *dst, uint64_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
p[4] = (uint8_t)(w >> 32);
p[5] = (uint8_t)(w >> 40);
p[6] = (uint8_t)(w >> 48);
p[7] = (uint8_t)(w >> 56);
#endif
}
static BLAKE2_INLINE uint64_t load48( const void *src )
{
const uint8_t *p = ( const uint8_t * )src;
uint64_t w = *p++;
w |= ( uint64_t )( *p++ ) << 8;
w |= ( uint64_t )( *p++ ) << 16;
w |= ( uint64_t )( *p++ ) << 24;
w |= ( uint64_t )( *p++ ) << 32;
w |= ( uint64_t )( *p++ ) << 40;
return w;
return (( uint64_t )( p[0] ) << 0) |
(( uint64_t )( p[1] ) << 8) |
(( uint64_t )( p[2] ) << 16) |
(( uint64_t )( p[3] ) << 24) |
(( uint64_t )( p[4] ) << 32) |
(( uint64_t )( p[5] ) << 40) ;
}
static inline void store48( void *dst, uint64_t w )
static BLAKE2_INLINE void store48( void *dst, uint64_t w )
{
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
p[4] = (uint8_t)(w >> 32);
p[5] = (uint8_t)(w >> 40);
}
static inline uint32_t rotl32( const uint32_t w, const unsigned c )
{
return ( w << c ) | ( w >> ( 32 - c ) );
}
static inline uint64_t rotl64( const uint64_t w, const unsigned c )
{
return ( w << c ) | ( w >> ( 64 - c ) );
}
static inline uint32_t rotr32( const uint32_t w, const unsigned c )
static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 32 - c ) );
}
static inline uint64_t rotr64( const uint64_t w, const unsigned c )
static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 64 - c ) );
}
/* prevents compiler optimizing out memset() */
static inline void secure_zero_memory( void *v, size_t n )
static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n)
{
volatile uint8_t *p = ( volatile uint8_t * )v;
while( n-- ) *p++ = 0;
static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
memset_v(v, 0, n);
}
#endif

View file

@ -1,26 +1,27 @@
/*
BLAKE2 reference source code package - optimized C implementations
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2_H__
#define __BLAKE2_H__
#ifndef BLAKE2_H
#define BLAKE2_H
#include <stddef.h>
#include <stdint.h>
#if defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop))
#else
#define ALIGN(x) __attribute__ ((__aligned__(x)))
#define BLAKE2_PACKED(x) x __attribute__((packed))
#endif
#if defined(__cplusplus)
@ -45,112 +46,152 @@ extern "C" {
BLAKE2B_PERSONALBYTES = 16
};
#pragma pack(push, 1)
typedef struct __blake2s_param
{
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint8_t node_offset[6];// 14
uint8_t node_depth; // 15
uint8_t inner_length; // 16
// uint8_t reserved[0];
uint8_t salt[BLAKE2S_SALTBYTES]; // 24
uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32
} blake2s_param;
ALIGN( 64 ) typedef struct __blake2s_state
typedef struct blake2s_state__
{
uint32_t h[8];
uint32_t t[2];
uint32_t f[2];
uint8_t buf[2 * BLAKE2S_BLOCKBYTES];
uint8_t buf[BLAKE2S_BLOCKBYTES];
size_t buflen;
size_t outlen;
uint8_t last_node;
} blake2s_state;
typedef struct __blake2b_param
{
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint64_t node_offset; // 16
uint8_t node_depth; // 17
uint8_t inner_length; // 18
uint8_t reserved[14]; // 32
uint8_t salt[BLAKE2B_SALTBYTES]; // 48
uint8_t personal[BLAKE2B_PERSONALBYTES]; // 64
} blake2b_param;
ALIGN( 64 ) typedef struct __blake2b_state
typedef struct blake2b_state__
{
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[2 * BLAKE2B_BLOCKBYTES];
uint8_t buf[BLAKE2B_BLOCKBYTES];
size_t buflen;
size_t outlen;
uint8_t last_node;
} blake2b_state;
ALIGN( 64 ) typedef struct __blake2sp_state
typedef struct blake2sp_state__
{
blake2s_state S[8][1];
blake2s_state R[1];
uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
size_t buflen;
uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
size_t buflen;
size_t outlen;
} blake2sp_state;
ALIGN( 64 ) typedef struct __blake2bp_state
typedef struct blake2bp_state__
{
blake2b_state S[4][1];
blake2b_state R[1];
uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
size_t buflen;
uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
size_t buflen;
size_t outlen;
} blake2bp_state;
#pragma pack(pop)
// Streaming API
int blake2s_init( blake2s_state *S, const uint8_t outlen );
int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen );
int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen );
int blake2b_init( blake2b_state *S, const uint8_t outlen );
int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen );
int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen );
int blake2sp_init( blake2sp_state *S, const uint8_t outlen );
int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen );
int blake2sp_final( blake2sp_state *S, uint8_t *out, uint8_t outlen );
int blake2bp_init( blake2bp_state *S, const uint8_t outlen );
int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen );
int blake2bp_final( blake2bp_state *S, uint8_t *out, uint8_t outlen );
// Simple API
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2sp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2bp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
static inline int blake2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
BLAKE2_PACKED(struct blake2s_param__
{
return blake2b( out, in, key, outlen, inlen, keylen );
}
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint32_t node_offset; /* 12 */
uint16_t xof_length; /* 14 */
uint8_t node_depth; /* 15 */
uint8_t inner_length; /* 16 */
/* uint8_t reserved[0]; */
uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */
uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */
});
typedef struct blake2s_param__ blake2s_param;
BLAKE2_PACKED(struct blake2b_param__
{
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint32_t node_offset; /* 12 */
uint32_t xof_length; /* 16 */
uint8_t node_depth; /* 17 */
uint8_t inner_length; /* 18 */
uint8_t reserved[14]; /* 32 */
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
});
typedef struct blake2b_param__ blake2b_param;
typedef struct blake2xs_state__
{
blake2s_state S[1];
blake2s_param P[1];
} blake2xs_state;
typedef struct blake2xb_state__
{
blake2b_state S[1];
blake2b_param P[1];
} blake2xb_state;
/* Padded structs result in a compile-time error */
enum {
BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES),
BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES)
};
/* Streaming API */
int blake2s_init( blake2s_state *S, size_t outlen );
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
int blake2s_update( blake2s_state *S, const void *in, size_t inlen );
int blake2s_final( blake2s_state *S, void *out, size_t outlen );
int blake2b_init( blake2b_state *S, size_t outlen );
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
int blake2b_update( blake2b_state *S, const void *in, size_t inlen );
int blake2b_final( blake2b_state *S, void *out, size_t outlen );
int blake2sp_init( blake2sp_state *S, size_t outlen );
int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen );
int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen );
int blake2sp_final( blake2sp_state *S, void *out, size_t outlen );
int blake2bp_init( blake2bp_state *S, size_t outlen );
int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen );
int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen );
int blake2bp_final( blake2bp_state *S, void *out, size_t outlen );
/* Variable output length API */
int blake2xs_init( blake2xs_state *S, const size_t outlen );
int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen );
int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen );
int blake2xs_final(blake2xs_state *S, void *out, size_t outlen);
int blake2xb_init( blake2xb_state *S, const size_t outlen );
int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen );
int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen );
int blake2xb_final(blake2xb_state *S, void *out, size_t outlen);
/* Simple API */
int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
/* This is simply an alias for blake2b */
int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
/* alias for old blake2b */
int blake2b_argon( void *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
#if defined(__cplusplus)
}
#endif
#endif

View file

@ -0,0 +1,207 @@
/*
https://github.com/sneves/blake2-avx2
https://github.com/jedisct1/libsodium/
*/
#ifndef blake2b_compress_avx2_H
#define blake2b_compress_avx2_H
#define LOAD128(p) _mm_load_si128((__m128i *) (p))
#define STORE128(p, r) _mm_store_si128((__m128i *) (p), r)
#define LOADU128(p) _mm_loadu_si128((__m128i *) (p))
#define STOREU128(p, r) _mm_storeu_si128((__m128i *) (p), r)
#define LOAD(p) _mm256_load_si256((__m256i *) (p))
#define STORE(p, r) _mm256_store_si256((__m256i *) (p), r)
#define LOADU(p) _mm256_loadu_si256((__m256i *) (p))
#define STOREU(p, r) _mm256_storeu_si256((__m256i *) (p), r)
static inline uint64_t
LOADU64(const void *p)
{
uint64_t v;
memcpy(&v, p, sizeof v);
return v;
}
#define ROTATE16 \
_mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, \
3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)
#define ROTATE24 \
_mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, \
4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)
#define ADD(a, b) _mm256_add_epi64(a, b)
#define SUB(a, b) _mm256_sub_epi64(a, b)
#define XOR(a, b) _mm256_xor_si256(a, b)
#define AND(a, b) _mm256_and_si256(a, b)
#define OR(a, b) _mm256_or_si256(a, b)
#define ROT32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))
#define ROT24(x) _mm256_shuffle_epi8((x), ROTATE24)
#define ROT16(x) _mm256_shuffle_epi8((x), ROTATE16)
#define ROT63(x) _mm256_or_si256(_mm256_srli_epi64((x), 63), ADD((x), (x)))
#define BLAKE2B_G1_V1(a, b, c, d, m) \
do { \
a = ADD(a, m); \
a = ADD(a, b); \
d = XOR(d, a); \
d = ROT32(d); \
c = ADD(c, d); \
b = XOR(b, c); \
b = ROT24(b); \
} while (0)
#define BLAKE2B_G2_V1(a, b, c, d, m) \
do { \
a = ADD(a, m); \
a = ADD(a, b); \
d = XOR(d, a); \
d = ROT16(d); \
c = ADD(c, d); \
b = XOR(b, c); \
b = ROT63(b); \
} while (0)
#define BLAKE2B_DIAG_V1(a, b, c, d) \
do { \
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2, 1, 0, 3)); \
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \
b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0, 3, 2, 1)); \
} while (0)
#define BLAKE2B_UNDIAG_V1(a, b, c, d) \
do { \
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0, 3, 2, 1)); \
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \
b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2, 1, 0, 3)); \
} while (0)
#if defined(PERMUTE_WITH_SHUFFLES)
#include "blake2b-load-avx2.h"
#elif defined(PERMUTE_WITH_GATHER)
#else
#include "blake2b-load-avx2-simple.h"
#endif
#if defined(PERMUTE_WITH_GATHER)
ALIGN(64) static const uint32_t indices[12][16] = {
{ 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
{14, 4, 9, 13,10, 8, 15, 6, 1, 0, 11, 5,12, 2, 7, 3},
{11, 12, 5, 15, 8, 0, 2, 13,10, 3, 7, 9,14, 6, 1, 4},
{ 7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
{ 9, 5, 2, 10, 0, 7, 4, 15,14, 11, 6, 3, 1, 12, 8, 13},
{ 2, 6, 0, 8,12, 10, 11, 3, 4, 7, 15, 1,13, 5, 14, 9},
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
{13, 7, 12, 3,11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
{ 6, 14, 11, 0,15, 9, 3, 8,12, 13, 1, 10, 2, 7, 4, 5},
{10, 8, 7, 1, 2, 4, 6, 5,15, 9, 3, 13,11, 14, 12, 0},
{ 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
{14, 4, 9, 13,10, 8, 15, 6, 1, 0, 11, 5,12, 2, 7, 3},
};
#define BLAKE2B_ROUND_V1(a, b, c, d, r, m) do { \
__m256i b0; \
b0 = _mm256_i32gather_epi64((void *)(m), LOAD128(&indices[r][ 0]), 8); \
BLAKE2B_G1_V1(a, b, c, d, b0); \
b0 = _mm256_i32gather_epi64((void *)(m), LOAD128(&indices[r][ 4]), 8); \
BLAKE2B_G2_V1(a, b, c, d, b0); \
BLAKE2B_DIAG_V1(a, b, c, d); \
b0 = _mm256_i32gather_epi64((void *)(m), LOAD128(&indices[r][ 8]), 8); \
BLAKE2B_G1_V1(a, b, c, d, b0); \
b0 = _mm256_i32gather_epi64((void *)(m), LOAD128(&indices[r][12]), 8); \
BLAKE2B_G2_V1(a, b, c, d, b0); \
BLAKE2B_UNDIAG_V1(a, b, c, d); \
} while(0)
#define BLAKE2B_ROUNDS_V1(a, b, c, d, m) do { \
int i; \
for(i = 0; i < 12; ++i) { \
BLAKE2B_ROUND_V1(a, b, c, d, i, m); \
} \
} while(0)
#else /* !PERMUTE_WITH_GATHER */
#define BLAKE2B_ROUND_V1(a, b, c, d, r, m) do { \
__m256i b0; \
BLAKE2B_LOAD_MSG_ ##r ##_1(b0); \
BLAKE2B_G1_V1(a, b, c, d, b0); \
BLAKE2B_LOAD_MSG_ ##r ##_2(b0); \
BLAKE2B_G2_V1(a, b, c, d, b0); \
BLAKE2B_DIAG_V1(a, b, c, d); \
BLAKE2B_LOAD_MSG_ ##r ##_3(b0); \
BLAKE2B_G1_V1(a, b, c, d, b0); \
BLAKE2B_LOAD_MSG_ ##r ##_4(b0); \
BLAKE2B_G2_V1(a, b, c, d, b0); \
BLAKE2B_UNDIAG_V1(a, b, c, d); \
} while(0)
#define BLAKE2B_ROUNDS_V1(a, b, c, d, m) do { \
BLAKE2B_ROUND_V1(a, b, c, d, 0, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 1, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 2, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 3, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 4, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 5, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 6, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 7, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 8, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 9, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 10, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 11, (m)); \
} while(0)
#endif
#if defined(PERMUTE_WITH_GATHER)
#define DECLARE_MESSAGE_WORDS(m)
#elif defined(PERMUTE_WITH_SHUFFLES)
#define DECLARE_MESSAGE_WORDS(m) \
const __m256i m0 = _mm256_broadcastsi128_si256(LOADU128((m) + 0)); \
const __m256i m1 = _mm256_broadcastsi128_si256(LOADU128((m) + 16)); \
const __m256i m2 = _mm256_broadcastsi128_si256(LOADU128((m) + 32)); \
const __m256i m3 = _mm256_broadcastsi128_si256(LOADU128((m) + 48)); \
const __m256i m4 = _mm256_broadcastsi128_si256(LOADU128((m) + 64)); \
const __m256i m5 = _mm256_broadcastsi128_si256(LOADU128((m) + 80)); \
const __m256i m6 = _mm256_broadcastsi128_si256(LOADU128((m) + 96)); \
const __m256i m7 = _mm256_broadcastsi128_si256(LOADU128((m) + 112)); \
__m256i t0, t1;
#else
#define DECLARE_MESSAGE_WORDS(m) \
const uint64_t m0 = LOADU64((m) + 0); \
const uint64_t m1 = LOADU64((m) + 8); \
const uint64_t m2 = LOADU64((m) + 16); \
const uint64_t m3 = LOADU64((m) + 24); \
const uint64_t m4 = LOADU64((m) + 32); \
const uint64_t m5 = LOADU64((m) + 40); \
const uint64_t m6 = LOADU64((m) + 48); \
const uint64_t m7 = LOADU64((m) + 56); \
const uint64_t m8 = LOADU64((m) + 64); \
const uint64_t m9 = LOADU64((m) + 72); \
const uint64_t m10 = LOADU64((m) + 80); \
const uint64_t m11 = LOADU64((m) + 88); \
const uint64_t m12 = LOADU64((m) + 96); \
const uint64_t m13 = LOADU64((m) + 104); \
const uint64_t m14 = LOADU64((m) + 112); \
const uint64_t m15 = LOADU64((m) + 120);
#endif
#define BLAKE2B_COMPRESS_V1(a, b, m, t0, t1, f0, f1) \
do { \
DECLARE_MESSAGE_WORDS(m) \
const __m256i iv0 = a; \
const __m256i iv1 = b; \
__m256i c = LOAD(&blake2b_IV[0]); \
__m256i d = \
XOR(LOAD(&blake2b_IV[4]), _mm256_set_epi64x(f1, f0, t1, t0)); \
BLAKE2B_ROUNDS_V1(a, b, c, d, m); \
a = XOR(a, c); \
b = XOR(b, d); \
a = XOR(a, iv0); \
b = XOR(b, iv1); \
} while (0)
#endif

View file

@ -0,0 +1,54 @@
#ifndef BLAKE2_AVX2_BLAKE2B_LOAD_AVX2_SIMPLE_H
#define BLAKE2_AVX2_BLAKE2B_LOAD_AVX2_SIMPLE_H
#define BLAKE2B_LOAD_MSG_0_1(b0) b0 = _mm256_set_epi64x(m6, m4, m2, m0);
#define BLAKE2B_LOAD_MSG_0_2(b0) b0 = _mm256_set_epi64x(m7, m5, m3, m1);
#define BLAKE2B_LOAD_MSG_0_3(b0) b0 = _mm256_set_epi64x(m14, m12, m10, m8);
#define BLAKE2B_LOAD_MSG_0_4(b0) b0 = _mm256_set_epi64x(m15, m13, m11, m9);
#define BLAKE2B_LOAD_MSG_1_1(b0) b0 = _mm256_set_epi64x(m13, m9, m4, m14);
#define BLAKE2B_LOAD_MSG_1_2(b0) b0 = _mm256_set_epi64x(m6, m15, m8, m10);
#define BLAKE2B_LOAD_MSG_1_3(b0) b0 = _mm256_set_epi64x(m5, m11, m0, m1);
#define BLAKE2B_LOAD_MSG_1_4(b0) b0 = _mm256_set_epi64x(m3, m7, m2, m12);
#define BLAKE2B_LOAD_MSG_2_1(b0) b0 = _mm256_set_epi64x(m15, m5, m12, m11);
#define BLAKE2B_LOAD_MSG_2_2(b0) b0 = _mm256_set_epi64x(m13, m2, m0, m8);
#define BLAKE2B_LOAD_MSG_2_3(b0) b0 = _mm256_set_epi64x(m9, m7, m3, m10);
#define BLAKE2B_LOAD_MSG_2_4(b0) b0 = _mm256_set_epi64x(m4, m1, m6, m14);
#define BLAKE2B_LOAD_MSG_3_1(b0) b0 = _mm256_set_epi64x(m11, m13, m3, m7);
#define BLAKE2B_LOAD_MSG_3_2(b0) b0 = _mm256_set_epi64x(m14, m12, m1, m9);
#define BLAKE2B_LOAD_MSG_3_3(b0) b0 = _mm256_set_epi64x(m15, m4, m5, m2);
#define BLAKE2B_LOAD_MSG_3_4(b0) b0 = _mm256_set_epi64x(m8, m0, m10, m6);
#define BLAKE2B_LOAD_MSG_4_1(b0) b0 = _mm256_set_epi64x(m10, m2, m5, m9);
#define BLAKE2B_LOAD_MSG_4_2(b0) b0 = _mm256_set_epi64x(m15, m4, m7, m0);
#define BLAKE2B_LOAD_MSG_4_3(b0) b0 = _mm256_set_epi64x(m3, m6, m11, m14);
#define BLAKE2B_LOAD_MSG_4_4(b0) b0 = _mm256_set_epi64x(m13, m8, m12, m1);
#define BLAKE2B_LOAD_MSG_5_1(b0) b0 = _mm256_set_epi64x(m8, m0, m6, m2);
#define BLAKE2B_LOAD_MSG_5_2(b0) b0 = _mm256_set_epi64x(m3, m11, m10, m12);
#define BLAKE2B_LOAD_MSG_5_3(b0) b0 = _mm256_set_epi64x(m1, m15, m7, m4);
#define BLAKE2B_LOAD_MSG_5_4(b0) b0 = _mm256_set_epi64x(m9, m14, m5, m13);
#define BLAKE2B_LOAD_MSG_6_1(b0) b0 = _mm256_set_epi64x(m4, m14, m1, m12);
#define BLAKE2B_LOAD_MSG_6_2(b0) b0 = _mm256_set_epi64x(m10, m13, m15, m5);
#define BLAKE2B_LOAD_MSG_6_3(b0) b0 = _mm256_set_epi64x(m8, m9, m6, m0);
#define BLAKE2B_LOAD_MSG_6_4(b0) b0 = _mm256_set_epi64x(m11, m2, m3, m7);
#define BLAKE2B_LOAD_MSG_7_1(b0) b0 = _mm256_set_epi64x(m3, m12, m7, m13);
#define BLAKE2B_LOAD_MSG_7_2(b0) b0 = _mm256_set_epi64x(m9, m1, m14, m11);
#define BLAKE2B_LOAD_MSG_7_3(b0) b0 = _mm256_set_epi64x(m2, m8, m15, m5);
#define BLAKE2B_LOAD_MSG_7_4(b0) b0 = _mm256_set_epi64x(m10, m6, m4, m0);
#define BLAKE2B_LOAD_MSG_8_1(b0) b0 = _mm256_set_epi64x(m0, m11, m14, m6);
#define BLAKE2B_LOAD_MSG_8_2(b0) b0 = _mm256_set_epi64x(m8, m3, m9, m15);
#define BLAKE2B_LOAD_MSG_8_3(b0) b0 = _mm256_set_epi64x(m10, m1, m13, m12);
#define BLAKE2B_LOAD_MSG_8_4(b0) b0 = _mm256_set_epi64x(m5, m4, m7, m2);
#define BLAKE2B_LOAD_MSG_9_1(b0) b0 = _mm256_set_epi64x(m1, m7, m8, m10);
#define BLAKE2B_LOAD_MSG_9_2(b0) b0 = _mm256_set_epi64x(m5, m6, m4, m2);
#define BLAKE2B_LOAD_MSG_9_3(b0) b0 = _mm256_set_epi64x(m13, m3, m9, m15);
#define BLAKE2B_LOAD_MSG_9_4(b0) b0 = _mm256_set_epi64x(m0, m12, m14, m11);
#define BLAKE2B_LOAD_MSG_10_1(b0) b0 = _mm256_set_epi64x(m6, m4, m2, m0);
#define BLAKE2B_LOAD_MSG_10_2(b0) b0 = _mm256_set_epi64x(m7, m5, m3, m1);
#define BLAKE2B_LOAD_MSG_10_3(b0) b0 = _mm256_set_epi64x(m14, m12, m10, m8);
#define BLAKE2B_LOAD_MSG_10_4(b0) b0 = _mm256_set_epi64x(m15, m13, m11, m9);
#define BLAKE2B_LOAD_MSG_11_1(b0) b0 = _mm256_set_epi64x(m13, m9, m4, m14);
#define BLAKE2B_LOAD_MSG_11_2(b0) b0 = _mm256_set_epi64x(m6, m15, m8, m10);
#define BLAKE2B_LOAD_MSG_11_3(b0) b0 = _mm256_set_epi64x(m5, m11, m0, m1);
#define BLAKE2B_LOAD_MSG_11_4(b0) b0 = _mm256_set_epi64x(m3, m7, m2, m12);
#endif

340
blake2/blake2b-load-avx2.h Normal file
View file

@ -0,0 +1,340 @@
#ifndef BLAKE2_AVX2_BLAKE2B_LOAD_AVX2_H
#define BLAKE2_AVX2_BLAKE2B_LOAD_AVX2_H
#define BLAKE2B_LOAD_MSG_0_1(b0) do { \
t0 = _mm256_unpacklo_epi64(m0, m1); \
t1 = _mm256_unpacklo_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_0_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m0, m1);\
t1 = _mm256_unpackhi_epi64(m2, m3);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_0_3(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m4, m5);\
t1 = _mm256_unpacklo_epi64(m6, m7);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_0_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m4, m5);\
t1 = _mm256_unpackhi_epi64(m6, m7);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_1_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m7, m2);\
t1 = _mm256_unpackhi_epi64(m4, m6);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_1_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m5, m4);\
t1 = _mm256_alignr_epi8(m3, m7, 8);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_1_3(b0) \
do { \
t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2));\
t1 = _mm256_unpackhi_epi64(m5, m2);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_1_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m6, m1);\
t1 = _mm256_unpackhi_epi64(m3, m1);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_2_1(b0) \
do { \
t0 = _mm256_alignr_epi8(m6, m5, 8);\
t1 = _mm256_unpackhi_epi64(m2, m7);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_2_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m4, m0);\
t1 = _mm256_blend_epi32(m6, m1, 0x33);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_2_3(b0) \
do { \
t0 = _mm256_blend_epi32(m1, m5, 0x33);\
t1 = _mm256_unpackhi_epi64(m3, m4);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_2_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m7, m3);\
t1 = _mm256_alignr_epi8(m2, m0, 8);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_3_1(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m3, m1);\
t1 = _mm256_unpackhi_epi64(m6, m5);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_3_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m4, m0);\
t1 = _mm256_unpacklo_epi64(m6, m7);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_3_3(b0) \
do { \
t0 = _mm256_blend_epi32(m2, m1, 0x33);\
t1 = _mm256_blend_epi32(m7, m2, 0x33);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_3_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m3, m5);\
t1 = _mm256_unpacklo_epi64(m0, m4);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_4_1(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m4, m2);\
t1 = _mm256_unpacklo_epi64(m1, m5);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_4_2(b0) \
do { \
t0 = _mm256_blend_epi32(m3, m0, 0x33);\
t1 = _mm256_blend_epi32(m7, m2, 0x33);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_4_3(b0) \
do { \
t0 = _mm256_blend_epi32(m5, m7, 0x33);\
t1 = _mm256_blend_epi32(m1, m3, 0x33);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_4_4(b0) \
do { \
t0 = _mm256_alignr_epi8(m6, m0, 8);\
t1 = _mm256_blend_epi32(m6, m4, 0x33);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_5_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m1, m3);\
t1 = _mm256_unpacklo_epi64(m0, m4);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_5_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m6, m5);\
t1 = _mm256_unpackhi_epi64(m5, m1);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_5_3(b0) \
do { \
t0 = _mm256_blend_epi32(m3, m2, 0x33);\
t1 = _mm256_unpackhi_epi64(m7, m0);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_5_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m6, m2);\
t1 = _mm256_blend_epi32(m4, m7, 0x33);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_6_1(b0) \
do { \
t0 = _mm256_blend_epi32(m0, m6, 0x33);\
t1 = _mm256_unpacklo_epi64(m7, m2);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_6_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m2, m7);\
t1 = _mm256_alignr_epi8(m5, m6, 8);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_6_3(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m0, m3);\
t1 = _mm256_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2));\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_6_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m3, m1);\
t1 = _mm256_blend_epi32(m5, m1, 0x33);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_7_1(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m6, m3);\
t1 = _mm256_blend_epi32(m1, m6, 0x33);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_7_2(b0) \
do { \
t0 = _mm256_alignr_epi8(m7, m5, 8);\
t1 = _mm256_unpackhi_epi64(m0, m4);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_7_3(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m2, m7);\
t1 = _mm256_unpacklo_epi64(m4, m1);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_7_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m0, m2);\
t1 = _mm256_unpacklo_epi64(m3, m5);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_8_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m3, m7);\
t1 = _mm256_alignr_epi8(m0, m5, 8);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_8_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m7, m4);\
t1 = _mm256_alignr_epi8(m4, m1, 8);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_8_3(b0) \
do { \
t0 = m6;\
t1 = _mm256_alignr_epi8(m5, m0, 8);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_8_4(b0) \
do { \
t0 = _mm256_blend_epi32(m3, m1, 0x33);\
t1 = m2;\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_9_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m5, m4);\
t1 = _mm256_unpackhi_epi64(m3, m0);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_9_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m1, m2);\
t1 = _mm256_blend_epi32(m2, m3, 0x33);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_9_3(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m7, m4);\
t1 = _mm256_unpackhi_epi64(m1, m6);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_9_4(b0) \
do { \
t0 = _mm256_alignr_epi8(m7, m5, 8);\
t1 = _mm256_unpacklo_epi64(m6, m0);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_10_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m0, m1);\
t1 = _mm256_unpacklo_epi64(m2, m3);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_10_2(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m0, m1);\
t1 = _mm256_unpackhi_epi64(m2, m3);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_10_3(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m4, m5);\
t1 = _mm256_unpacklo_epi64(m6, m7);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_10_4(b0) \
do { \
t0 = _mm256_unpackhi_epi64(m4, m5);\
t1 = _mm256_unpackhi_epi64(m6, m7);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_11_1(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m7, m2);\
t1 = _mm256_unpackhi_epi64(m4, m6);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_11_2(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m5, m4);\
t1 = _mm256_alignr_epi8(m3, m7, 8);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_11_3(b0) \
do { \
t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2));\
t1 = _mm256_unpackhi_epi64(m5, m2);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#define BLAKE2B_LOAD_MSG_11_4(b0) \
do { \
t0 = _mm256_unpacklo_epi64(m6, m1);\
t1 = _mm256_unpackhi_epi64(m3, m1);\
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
} while(0)
#endif

View file

@ -1,18 +1,19 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2B_LOAD_SSE2_H__
#define __BLAKE2B_LOAD_SSE2_H__
#ifndef BLAKE2B_LOAD_SSE2_H
#define BLAKE2B_LOAD_SSE2_H
#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
@ -65,4 +66,3 @@
#endif

View file

@ -1,18 +1,19 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2B_LOAD_SSE41_H__
#define __BLAKE2B_LOAD_SSE41_H__
#ifndef BLAKE2B_LOAD_SSE41_H
#define BLAKE2B_LOAD_SSE41_H
#define LOAD_MSG_0_1(b0, b1) \
do \
@ -399,4 +400,3 @@ b1 = _mm_unpackhi_epi64(m3, m1); \
#endif

View file

@ -1,23 +1,21 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2B_ROUND_H__
#define __BLAKE2B_ROUND_H__
#ifndef BLAKE2B_ROUND_H
#define BLAKE2B_ROUND_H
#define LOAD(p) _mm_load_si128( (__m128i *)(p) )
#define STORE(p,r) _mm_store_si128((__m128i *)(p), r)
#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) )
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
#define TOF(reg) _mm_castsi128_ps((reg))
@ -62,7 +60,7 @@
\
row2l = _mm_roti_epi64(row2l, -24); \
row2h = _mm_roti_epi64(row2h, -24); \
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
@ -81,7 +79,7 @@
\
row2l = _mm_roti_epi64(row2l, -63); \
row2h = _mm_roti_epi64(row2h, -63); \
#if defined(HAVE_SSSE3)
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
@ -157,4 +155,3 @@
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
#endif

View file

@ -1,14 +1,16 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
@ -20,8 +22,8 @@
#include "blake2-config.h"
#if defined (_MSC_VER)
#include <intrin.h>
#ifdef _MSC_VER
#include <intrin.h> /* for _mm_set_epi64x */
#endif
#include <emmintrin.h>
#if defined(HAVE_SSSE3)
@ -37,9 +39,13 @@
#include <x86intrin.h>
#endif
#if defined(HAVE_AVX2)
#include "blake2b-compress-avx2.h"
#else
#include "blake2b-round.h"
#endif
ALIGN( 64 ) static const uint64_t blake2b_IV[8] =
static const uint64_t blake2b_IV[8] =
{
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
@ -47,194 +53,93 @@ ALIGN( 64 ) static const uint64_t blake2b_IV[8] =
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
static const uint8_t blake2b_sigma[12][16] =
/* Some helper functions */
static void blake2b_set_lastnode( blake2b_state *S )
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
};
/* Some helper functions, not necessarily useful */
static inline int blake2b_set_lastnode( blake2b_state *S )
{
S->f[1] = ~0ULL;
return 0;
S->f[1] = (uint64_t)-1;
}
static inline int blake2b_clear_lastnode( blake2b_state *S )
static int blake2b_is_lastblock( const blake2b_state *S )
{
S->f[1] = 0ULL;
return 0;
return S->f[0] != 0;
}
static inline int blake2b_set_lastblock( blake2b_state *S )
static void blake2b_set_lastblock( blake2b_state *S )
{
if( S->last_node ) blake2b_set_lastnode( S );
S->f[0] = ~0ULL;
return 0;
S->f[0] = (uint64_t)-1;
}
static inline int blake2b_clear_lastblock( blake2b_state *S )
static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
{
if( S->last_node ) blake2b_clear_lastnode( S );
S->f[0] = 0ULL;
return 0;
}
static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
{
#if __x86_64__
// ADD/ADC chain
__uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
t += inc;
S->t[0] = ( uint64_t )( t >> 0 );
S->t[1] = ( uint64_t )( t >> 64 );
#else
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
#endif
return 0;
}
// Parameter-related functions
static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
{
P->digest_length = digest_length;
return 0;
}
static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
{
P->fanout = fanout;
return 0;
}
static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
{
P->depth = depth;
return 0;
}
static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
{
P->leaf_length = leaf_length;
return 0;
}
static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
{
P->node_offset = node_offset;
return 0;
}
static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
{
P->node_depth = node_depth;
return 0;
}
static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
{
P->inner_length = inner_length;
return 0;
}
static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
{
memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
return 0;
}
static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
{
memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
return 0;
}
static inline int blake2b_init0( blake2b_state *S )
{
memset( S, 0, sizeof( blake2b_state ) );
for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
return 0;
}
/* init xors IV with input parameter block */
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
{
uint8_t *p, *h, *v;
//blake2b_init0( S );
v = ( uint8_t * )( blake2b_IV );
h = ( uint8_t * )( S->h );
p = ( uint8_t * )( P );
size_t i;
/*blake2b_init0( S ); */
const unsigned char * v = ( const unsigned char * )( blake2b_IV );
const unsigned char * p = ( const unsigned char * )( P );
unsigned char * h = ( unsigned char * )( S->h );
/* IV XOR ParamBlock */
memset( S, 0, sizeof( blake2b_state ) );
for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
for( i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
S->outlen = P->digest_length;
return 0;
}
/* Some sort of default parameter block initialization, for sequential blake2b */
int blake2b_init( blake2b_state *S, const uint8_t outlen )
int blake2b_init( blake2b_state *S, size_t outlen )
{
blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
const blake2b_param P =
{
outlen,
0,
1,
1,
0,
0,
0,
0,
{0},
{0},
{0}
};
return blake2b_init_param( S, &P );
P->digest_length = (uint8_t)outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store32( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2b_init_param( S, P );
}
int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
{
blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
const blake2b_param P =
{
outlen,
keylen,
1,
1,
0,
0,
0,
0,
{0},
{0},
{0}
};
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store32( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
if( blake2b_init_param( S, &P ) < 0 )
if( blake2b_init_param( S, P ) < 0 )
return 0;
{
@ -247,7 +152,19 @@ int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, c
return 0;
}
static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
#if defined(HAVE_AVX2)
static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
{
__m256i a = LOADU(&S->h[0]);
__m256i b = LOADU(&S->h[4]);
BLAKE2B_COMPRESS_V1(a, b, block, S->t[0], S->t[1], S->f[0], S->f[1]);
STOREU(&S->h[0], a);
STOREU(&S->h[4], b);
}
#else
static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
{
__m128i row1l, row1h;
__m128i row2l, row2h;
@ -269,22 +186,22 @@ static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2
const __m128i m6 = LOADU( block + 96 );
const __m128i m7 = LOADU( block + 112 );
#else
const uint64_t m0 = ( ( uint64_t * )block )[ 0];
const uint64_t m1 = ( ( uint64_t * )block )[ 1];
const uint64_t m2 = ( ( uint64_t * )block )[ 2];
const uint64_t m3 = ( ( uint64_t * )block )[ 3];
const uint64_t m4 = ( ( uint64_t * )block )[ 4];
const uint64_t m5 = ( ( uint64_t * )block )[ 5];
const uint64_t m6 = ( ( uint64_t * )block )[ 6];
const uint64_t m7 = ( ( uint64_t * )block )[ 7];
const uint64_t m8 = ( ( uint64_t * )block )[ 8];
const uint64_t m9 = ( ( uint64_t * )block )[ 9];
const uint64_t m10 = ( ( uint64_t * )block )[10];
const uint64_t m11 = ( ( uint64_t * )block )[11];
const uint64_t m12 = ( ( uint64_t * )block )[12];
const uint64_t m13 = ( ( uint64_t * )block )[13];
const uint64_t m14 = ( ( uint64_t * )block )[14];
const uint64_t m15 = ( ( uint64_t * )block )[15];
const uint64_t m0 = load64(block + 0 * sizeof(uint64_t));
const uint64_t m1 = load64(block + 1 * sizeof(uint64_t));
const uint64_t m2 = load64(block + 2 * sizeof(uint64_t));
const uint64_t m3 = load64(block + 3 * sizeof(uint64_t));
const uint64_t m4 = load64(block + 4 * sizeof(uint64_t));
const uint64_t m5 = load64(block + 5 * sizeof(uint64_t));
const uint64_t m6 = load64(block + 6 * sizeof(uint64_t));
const uint64_t m7 = load64(block + 7 * sizeof(uint64_t));
const uint64_t m8 = load64(block + 8 * sizeof(uint64_t));
const uint64_t m9 = load64(block + 9 * sizeof(uint64_t));
const uint64_t m10 = load64(block + 10 * sizeof(uint64_t));
const uint64_t m11 = load64(block + 11 * sizeof(uint64_t));
const uint64_t m12 = load64(block + 12 * sizeof(uint64_t));
const uint64_t m13 = load64(block + 13 * sizeof(uint64_t));
const uint64_t m14 = load64(block + 14 * sizeof(uint64_t));
const uint64_t m15 = load64(block + 15 * sizeof(uint64_t));
#endif
row1l = LOADU( &S->h[0] );
row1h = LOADU( &S->h[2] );
@ -314,70 +231,70 @@ static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2
row2h = _mm_xor_si128( row4h, row2h );
STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
return 0;
}
#endif
int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen )
int blake2b_update( blake2b_state *S, const void *pin, size_t inlen )
{
while( inlen > 0 )
const unsigned char * in = (const unsigned char *)pin;
if( inlen > 0 )
{
size_t left = S->buflen;
size_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
size_t fill = BLAKE2B_BLOCKBYTES - left;
if( inlen > fill )
{
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
S->buflen = 0;
memcpy( S->buf + left, in, fill ); /* Fill buffer */
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf ); // Compress
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
S->buflen -= BLAKE2B_BLOCKBYTES;
in += fill;
inlen -= fill;
}
else // inlen <= fill
{
memcpy( S->buf + left, in, inlen );
S->buflen += inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
blake2b_compress( S, S->buf ); /* Compress */
in += fill; inlen -= fill;
while(inlen > BLAKE2B_BLOCKBYTES) {
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress( S, in );
in += BLAKE2B_BLOCKBYTES;
inlen -= BLAKE2B_BLOCKBYTES;
}
}
memcpy( S->buf + S->buflen, in, inlen );
S->buflen += inlen;
}
return 0;
}
int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen )
int blake2b_final( blake2b_state *S, void *out, size_t outlen )
{
if( S->buflen > BLAKE2B_BLOCKBYTES )
{
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf );
S->buflen -= BLAKE2B_BLOCKBYTES;
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
}
if( out == NULL || outlen < S->outlen )
return -1;
if( blake2b_is_lastblock( S ) )
return -1;
blake2b_increment_counter( S, S->buflen );
blake2b_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
blake2b_compress( S, S->buf );
memcpy( out, &S->h[0], outlen );
memcpy( out, &S->h[0], S->outlen );
return 0;
}
int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
blake2b_state S[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if( NULL == key ) keylen = 0;
if( NULL == key && keylen > 0 ) return -1;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( keylen > BLAKE2B_KEYBYTES ) return -1;
if( keylen )
{
@ -388,46 +305,90 @@ int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen
if( blake2b_init( S, outlen ) < 0 ) return -1;
}
blake2b_update( S, ( uint8_t * )in, inlen );
blake2b_update( S, ( const uint8_t * )in, inlen );
blake2b_final( S, out, outlen );
return 0;
}
int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) {
return blake2b(out, outlen, in, inlen, key, keylen);
}
int blake2b_argon( void *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) {
return blake2b(out, outlen, in, inlen, key, keylen);
}
#if defined(SUPERCOP)
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
{
return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
return blake2b( out, BLAKE2B_OUTBYTES, in, inlen, NULL, 0 );
}
#endif
#if defined(BLAKE2B_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
int main( void )
{
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[KAT_LENGTH];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i )
for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2B_OUTBYTES];
blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
{
puts( "error" );
return -1;
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2B_OUTBYTES];
blake2b_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = blake2b_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2b_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2b_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2b_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

View file

@ -1,14 +1,16 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdio.h>
@ -25,32 +27,48 @@
#define PARALLELISM_DEGREE 4
static inline int blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset )
/*
blake2b_init_param defaults to setting the expecting output length
from the digest_length parameter block field.
In some cases, however, we do not want this, as the output length
of these instances is given by inner_length instead.
*/
static int blake2bp_init_leaf_param( blake2b_state *S, const blake2b_param *P )
{
int err = blake2b_init_param(S, P);
S->outlen = P->inner_length;
return err;
}
static int blake2bp_init_leaf( blake2b_state *S, size_t outlen, size_t keylen, uint64_t offset )
{
blake2b_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
P->leaf_length = 0;
P->node_offset = offset;
P->xof_length = 0;
P->node_depth = 0;
P->inner_length = BLAKE2B_OUTBYTES;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2b_init_param( S, P );
return blake2bp_init_leaf_param( S, P );
}
static inline int blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t keylen )
static int blake2bp_init_root( blake2b_state *S, size_t outlen, size_t keylen )
{
blake2b_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
P->leaf_length = 0;
P->node_offset = 0;
P->xof_length = 0;
P->node_depth = 1;
P->inner_length = BLAKE2B_OUTBYTES;
memset( P->reserved, 0, sizeof( P->reserved ) );
@ -60,17 +78,19 @@ static inline int blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t
}
int blake2bp_init( blake2bp_state *S, const uint8_t outlen )
int blake2bp_init( blake2bp_state *S, size_t outlen )
{
size_t i;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2bp_init_root( S->R, outlen, 0 ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
@ -78,19 +98,22 @@ int blake2bp_init( blake2bp_state *S, const uint8_t outlen )
return 0;
}
int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen )
{
size_t i;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2bp_init_root( S->R, outlen, keylen ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
S->R->last_node = 1;
@ -100,7 +123,7 @@ int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key,
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
@ -109,16 +132,18 @@ int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key,
}
int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen )
int blake2bp_update( blake2bp_state *S, const void *pin, size_t inlen )
{
const unsigned char * in = (const unsigned char *)pin;
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
size_t i;
if( left && inlen >= fill )
{
memcpy( S->buf + left, in, fill );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES );
in += fill;
@ -130,19 +155,19 @@ int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen )
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
#else
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
size_t i = omp_get_thread_num();
#endif
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2B_BLOCKBYTES;
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2B_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
{
blake2b_update( S->S[id__], in__, BLAKE2B_BLOCKBYTES );
blake2b_update( S->S[i], in__, BLAKE2B_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
}
@ -160,11 +185,16 @@ int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen )
int blake2bp_final( blake2bp_state *S, uint8_t *out, const uint8_t outlen )
int blake2bp_final( blake2bp_state *S, void *out, size_t outlen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
size_t i;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if(out == NULL || outlen < S->outlen) {
return -1;
}
for( i = 0; i < PARALLELISM_DEGREE; ++i )
{
if( S->buflen > i * BLAKE2B_BLOCKBYTES )
{
@ -178,30 +208,34 @@ int blake2bp_final( blake2bp_state *S, uint8_t *out, const uint8_t outlen )
blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES );
}
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES );
blake2b_final( S->R, out, outlen );
return 0;
return blake2b_final( S->R, out, S->outlen );
}
int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen )
int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
blake2b_state S[PARALLELISM_DEGREE][1];
blake2b_state FS[1];
size_t i;
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key ) keylen = 0;
if( NULL == key && keylen > 0 ) return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( keylen > BLAKE2B_KEYBYTES ) return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node
S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */
if( keylen > 0 )
{
@ -209,7 +243,7 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
@ -219,78 +253,109 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
#else
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
size_t i = omp_get_thread_num();
#endif
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2B_BLOCKBYTES;
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2B_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
{
blake2b_update( S[id__], in__, BLAKE2B_BLOCKBYTES );
blake2b_update( S[i], in__, BLAKE2B_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
}
if( inlen__ > id__ * BLAKE2B_BLOCKBYTES )
if( inlen__ > i * BLAKE2B_BLOCKBYTES )
{
const size_t left = inlen__ - id__ * BLAKE2B_BLOCKBYTES;
const size_t left = inlen__ - i * BLAKE2B_BLOCKBYTES;
const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES;
blake2b_update( S[id__], in__, len );
blake2b_update( S[i], in__, len );
}
blake2b_final( S[id__], hash[id__], BLAKE2B_OUTBYTES );
blake2b_final( S[i], hash[i], BLAKE2B_OUTBYTES );
}
if( blake2bp_init_root( FS, outlen, keylen ) < 0 )
return -1;
FS->last_node = 1; // Mark as last node
FS->last_node = 1; /* Mark as last node */
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES );
blake2b_final( FS, out, outlen );
return 0;
return blake2b_final( FS, out, outlen );
}
#if defined(BLAKE2BP_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
int main( void )
{
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[KAT_LENGTH];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i )
for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2B_OUTBYTES];
//blake2bp( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
blake2bp_state S[1];
blake2bp_init_key( S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES );
blake2bp_update( S, buf, i );
blake2bp_final( S, hash, BLAKE2B_OUTBYTES );
blake2bp( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) )
{
puts( "error" );
return -1;
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2B_OUTBYTES];
blake2bp_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = blake2bp_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2bp_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2bp_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2bp_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

View file

@ -1,18 +1,19 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2S_LOAD_SSE2_H__
#define __BLAKE2S_LOAD_SSE2_H__
#ifndef BLAKE2S_LOAD_SSE2_H
#define BLAKE2S_LOAD_SSE2_H
#define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0)
#define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1)

View file

@ -1,18 +1,19 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2S_LOAD_SSE41_H__
#define __BLAKE2S_LOAD_SSE41_H__
#ifndef BLAKE2S_LOAD_SSE41_H
#define BLAKE2S_LOAD_SSE41_H
#define LOAD_MSG_0_1(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0)));
@ -226,4 +227,3 @@ t2 = _mm_blend_epi16(t0,t1,0x0F); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3));
#endif

View file

@ -1,31 +1,34 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2S_LOAD_XOP_H__
#define __BLAKE2S_LOAD_XOP_H__
#ifndef BLAKE2S_LOAD_XOP_H
#define BLAKE2S_LOAD_XOP_H
#define TOB(x) ((x)*4*0x01010101 + 0x03020100) // ..or not TOB
#define TOB(x) ((x)*4*0x01010101 + 0x03020100) /* ..or not TOB */
#if 0
/* Basic VPPERM emulation, for testing purposes */
/*static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel)
static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel)
{
const __m128i sixteen = _mm_set1_epi8(16);
const __m128i t0 = _mm_shuffle_epi8(src1, sel);
const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen));
const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen),
_mm_cmpgt_epi8(sel, sixteen)); // (>=16) = 0xff : 00
_mm_cmpgt_epi8(sel, sixteen)); /* (>=16) = 0xff : 00 */
return _mm_blendv_epi8(t0, s1, mask);
}*/
}
#endif
#define LOAD_MSG_0_1(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
@ -166,7 +169,7 @@ buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) );
#define LOAD_MSG_8_3(buf) \
t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \
#define LOAD_MSG_8_4(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) );
@ -186,4 +189,3 @@ t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) );
#endif

View file

@ -1,23 +1,21 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#pragma once
#ifndef __BLAKE2S_ROUND_H__
#define __BLAKE2S_ROUND_H__
#ifndef BLAKE2S_ROUND_H
#define BLAKE2S_ROUND_H
#define LOAD(p) _mm_load_si128( (__m128i *)(p) )
#define STORE(p,r) _mm_store_si128((__m128i *)(p), r)
#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) )
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
#define TOF(reg) _mm_castsi128_ps((reg))
@ -34,7 +32,7 @@
: (16==-(c)) ? _mm_shuffle_epi8(r,r16) \
: _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) )
#else
#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-c) ))
#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) ))
#endif
#else
/* ... */
@ -86,6 +84,5 @@
LOAD_MSG_ ##r ##_4(buf4); \
G2(row1,row2,row3,row4,buf4); \
UNDIAGONALIZE(row1,row2,row3,row4); \
#endif
#endif

View file

@ -1,14 +1,16 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
@ -37,192 +39,104 @@
#include "blake2s-round.h"
ALIGN( 64 ) static const uint32_t blake2s_IV[8] =
static const uint32_t blake2s_IV[8] =
{
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
static const uint8_t blake2s_sigma[10][16] =
/* Some helper functions */
static void blake2s_set_lastnode( blake2s_state *S )
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
};
/* Some helper functions, not necessarily useful */
static inline int blake2s_set_lastnode( blake2s_state *S )
{
S->f[1] = ~0U;
return 0;
S->f[1] = (uint32_t)-1;
}
static inline int blake2s_clear_lastnode( blake2s_state *S )
static int blake2s_is_lastblock( const blake2s_state *S )
{
S->f[1] = 0U;
return 0;
return S->f[0] != 0;
}
static inline int blake2s_set_lastblock( blake2s_state *S )
static void blake2s_set_lastblock( blake2s_state *S )
{
if( S->last_node ) blake2s_set_lastnode( S );
S->f[0] = ~0U;
return 0;
S->f[0] = (uint32_t)-1;
}
static inline int blake2s_clear_lastblock( blake2s_state *S )
{
if( S->last_node ) blake2s_clear_lastnode( S );
S->f[0] = 0U;
return 0;
}
static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
{
uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0];
t += inc;
S->t[0] = ( uint32_t )( t >> 0 );
S->t[1] = ( uint32_t )( t >> 32 );
return 0;
}
// Parameter-related functions
static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length )
{
P->digest_length = digest_length;
return 0;
}
static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout )
{
P->fanout = fanout;
return 0;
}
static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth )
{
P->depth = depth;
return 0;
}
static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length )
{
P->leaf_length = leaf_length;
return 0;
}
static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset )
{
store48( P->node_offset, node_offset );
return 0;
}
static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth )
{
P->node_depth = node_depth;
return 0;
}
static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length )
{
P->inner_length = inner_length;
return 0;
}
static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] )
{
memcpy( P->salt, salt, BLAKE2S_SALTBYTES );
return 0;
}
static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] )
{
memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES );
return 0;
}
static inline int blake2s_init0( blake2s_state *S )
{
memset( S, 0, sizeof( blake2s_state ) );
for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
return 0;
}
/* init2 xors IV with input parameter block */
int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
{
uint8_t *p, *h, *v;
//blake2s_init0( S );
v = ( uint8_t * )( blake2s_IV );
h = ( uint8_t * )( S->h );
p = ( uint8_t * )( P );
size_t i;
/*blake2s_init0( S ); */
const uint8_t * v = ( const uint8_t * )( blake2s_IV );
const uint8_t * p = ( const uint8_t * )( P );
uint8_t * h = ( uint8_t * )( S->h );
/* IV XOR ParamBlock */
memset( S, 0, sizeof( blake2s_state ) );
for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
for( i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
S->outlen = P->digest_length;
return 0;
}
/* Some sort of default parameter block initialization, for sequential blake2s */
int blake2s_init( blake2s_state *S, const uint8_t outlen )
int blake2s_init( blake2s_state *S, size_t outlen )
{
blake2s_param P[1];
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
const blake2s_param P =
{
outlen,
0,
1,
1,
0,
{0},
0,
0,
{0},
{0}
};
return blake2s_init_param( S, &P );
P->digest_length = (uint8_t)outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store16( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
/* memset(P->reserved, 0, sizeof(P->reserved) ); */
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2s_init_param( S, P );
}
int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
{
blake2s_param P[1];
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1;
const blake2s_param P =
{
outlen,
keylen,
1,
1,
0,
{0},
0,
0,
{0},
{0}
};
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store16( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
/* memset(P->reserved, 0, sizeof(P->reserved) ); */
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
if( blake2s_init_param( S, &P ) < 0 )
if( blake2s_init_param( S, P ) < 0 )
return -1;
{
@ -236,7 +150,7 @@ int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, c
}
static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
static void blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
{
__m128i row1, row2, row3, row4;
__m128i buf1, buf2, buf3, buf4;
@ -257,27 +171,27 @@ static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2
const __m128i m2 = LOADU( block + 32 );
const __m128i m3 = LOADU( block + 48 );
#else
const uint32_t m0 = ( ( uint32_t * )block )[ 0];
const uint32_t m1 = ( ( uint32_t * )block )[ 1];
const uint32_t m2 = ( ( uint32_t * )block )[ 2];
const uint32_t m3 = ( ( uint32_t * )block )[ 3];
const uint32_t m4 = ( ( uint32_t * )block )[ 4];
const uint32_t m5 = ( ( uint32_t * )block )[ 5];
const uint32_t m6 = ( ( uint32_t * )block )[ 6];
const uint32_t m7 = ( ( uint32_t * )block )[ 7];
const uint32_t m8 = ( ( uint32_t * )block )[ 8];
const uint32_t m9 = ( ( uint32_t * )block )[ 9];
const uint32_t m10 = ( ( uint32_t * )block )[10];
const uint32_t m11 = ( ( uint32_t * )block )[11];
const uint32_t m12 = ( ( uint32_t * )block )[12];
const uint32_t m13 = ( ( uint32_t * )block )[13];
const uint32_t m14 = ( ( uint32_t * )block )[14];
const uint32_t m15 = ( ( uint32_t * )block )[15];
const uint32_t m0 = load32(block + 0 * sizeof(uint32_t));
const uint32_t m1 = load32(block + 1 * sizeof(uint32_t));
const uint32_t m2 = load32(block + 2 * sizeof(uint32_t));
const uint32_t m3 = load32(block + 3 * sizeof(uint32_t));
const uint32_t m4 = load32(block + 4 * sizeof(uint32_t));
const uint32_t m5 = load32(block + 5 * sizeof(uint32_t));
const uint32_t m6 = load32(block + 6 * sizeof(uint32_t));
const uint32_t m7 = load32(block + 7 * sizeof(uint32_t));
const uint32_t m8 = load32(block + 8 * sizeof(uint32_t));
const uint32_t m9 = load32(block + 9 * sizeof(uint32_t));
const uint32_t m10 = load32(block + 10 * sizeof(uint32_t));
const uint32_t m11 = load32(block + 11 * sizeof(uint32_t));
const uint32_t m12 = load32(block + 12 * sizeof(uint32_t));
const uint32_t m13 = load32(block + 13 * sizeof(uint32_t));
const uint32_t m14 = load32(block + 14 * sizeof(uint32_t));
const uint32_t m15 = load32(block + 15 * sizeof(uint32_t));
#endif
row1 = ff0 = LOADU( &S->h[0] );
row2 = ff1 = LOADU( &S->h[4] );
row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A );
row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) );
row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] );
row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &S->t[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
@ -290,76 +204,74 @@ static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2
ROUND( 9 );
STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
return 0;
}
/* inlen now in bytes */
int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen )
int blake2s_update( blake2s_state *S, const void *pin, size_t inlen )
{
while( inlen > 0 )
const unsigned char * in = (const unsigned char *)pin;
if( inlen > 0 )
{
size_t left = S->buflen;
size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
size_t fill = BLAKE2S_BLOCKBYTES - left;
if( inlen > fill )
{
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
S->buflen = 0;
memcpy( S->buf + left, in, fill ); /* Fill buffer */
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf ); // Compress
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
S->buflen -= BLAKE2S_BLOCKBYTES;
in += fill;
inlen -= fill;
}
else // inlen <= fill
{
memcpy( S->buf + left, in, inlen );
S->buflen += inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
blake2s_compress( S, S->buf ); /* Compress */
in += fill; inlen -= fill;
while(inlen > BLAKE2S_BLOCKBYTES) {
blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES);
blake2s_compress( S, in );
in += BLAKE2S_BLOCKBYTES;
inlen -= BLAKE2S_BLOCKBYTES;
}
}
memcpy( S->buf + S->buflen, in, inlen );
S->buflen += inlen;
}
return 0;
}
/* Is this correct? */
int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen )
int blake2s_final( blake2s_state *S, void *out, size_t outlen )
{
uint8_t buffer[BLAKE2S_OUTBYTES];
uint8_t buffer[BLAKE2S_OUTBYTES] = {0};
size_t i;
if( S->buflen > BLAKE2S_BLOCKBYTES )
{
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf );
S->buflen -= BLAKE2S_BLOCKBYTES;
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
}
if( out == NULL || outlen < S->outlen )
return -1;
blake2s_increment_counter( S, ( uint32_t )S->buflen );
if( blake2s_is_lastblock( S ) )
return -1;
blake2s_increment_counter( S, (uint32_t)S->buflen );
blake2s_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
blake2s_compress( S, S->buf );
for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
memcpy( out, buffer, outlen );
memcpy( out, buffer, S->outlen );
secure_zero_memory( buffer, sizeof(buffer) );
return 0;
}
/* inlen, at least, should be uint64_t. Others can be size_t. */
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
blake2s_state S[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key ) keylen = 0; /* Fail here instead if keylen != 0 and key == NULL? */
if ( NULL == key && keylen > 0) return -1;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( keylen > BLAKE2S_KEYBYTES ) return -1;
if( keylen > 0 )
{
@ -370,7 +282,7 @@ int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen
if( blake2s_init( S, outlen ) < 0 ) return -1;
}
blake2s_update( S, ( uint8_t * )in, inlen );
blake2s_update( S, ( const uint8_t * )in, inlen );
blake2s_final( S, out, outlen );
return 0;
}
@ -378,39 +290,74 @@ int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen
#if defined(SUPERCOP)
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
{
return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, inlen, 0 );
return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 );
}
#endif
#if defined(BLAKE2S_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
int main( void )
{
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[KAT_LENGTH];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i )
for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2S_OUTBYTES];
blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
if( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 ||
0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
{
puts( "error" );
return -1;
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2S_OUTBYTES];
blake2s_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2s_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2s_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

View file

@ -1,14 +1,16 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdlib.h>
@ -24,31 +26,47 @@
#define PARALLELISM_DEGREE 8
static inline int blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset )
/*
blake2sp_init_param defaults to setting the expecting output length
from the digest_length parameter block field.
In some cases, however, we do not want this, as the output length
of these instances is given by inner_length instead.
*/
static int blake2sp_init_leaf_param( blake2s_state *S, const blake2s_param *P )
{
int err = blake2s_init_param(S, P);
S->outlen = P->inner_length;
return err;
}
static int blake2sp_init_leaf( blake2s_state *S, size_t outlen, size_t keylen, uint64_t offset )
{
blake2s_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
P->leaf_length = 0;
store48( P->node_offset, offset );
P->node_offset = offset;
P->xof_length = 0;
P->node_depth = 0;
P->inner_length = BLAKE2S_OUTBYTES;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2s_init_param( S, P );
return blake2sp_init_leaf_param( S, P );
}
static inline int blake2sp_init_root( blake2s_state *S, uint8_t outlen, uint8_t keylen )
static int blake2sp_init_root( blake2s_state *S, size_t outlen, size_t keylen )
{
blake2s_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
P->leaf_length = 0;
store48( P->node_offset, 0ULL );
P->node_offset = 0;
P->xof_length = 0;
P->node_depth = 1;
P->inner_length = BLAKE2S_OUTBYTES;
memset( P->salt, 0, sizeof( P->salt ) );
@ -57,17 +75,20 @@ static inline int blake2sp_init_root( blake2s_state *S, uint8_t outlen, uint8_t
}
int blake2sp_init( blake2sp_state *S, const uint8_t outlen )
int blake2sp_init( blake2sp_state *S, size_t outlen )
{
size_t i;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2sp_init_root( S->R, outlen, 0 ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
@ -75,19 +96,22 @@ int blake2sp_init( blake2sp_state *S, const uint8_t outlen )
return 0;
}
int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen )
{
size_t i;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2sp_init_root( S->R, outlen, keylen ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
S->R->last_node = 1;
@ -97,7 +121,7 @@ int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key,
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
@ -106,16 +130,18 @@ int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key,
}
int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen )
int blake2sp_update( blake2sp_state *S, const void *pin, size_t inlen )
{
const unsigned char * in = (const unsigned char *)pin;
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
size_t i;
if( left && inlen >= fill )
{
memcpy( S->buf + left, in, fill );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
in += fill;
@ -127,19 +153,19 @@ int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen )
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
#else
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
size_t i = omp_get_thread_num();
#endif
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2S_BLOCKBYTES;
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2S_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
{
blake2s_update( S->S[id__], in__, BLAKE2S_BLOCKBYTES );
blake2s_update( S->S[i], in__, BLAKE2S_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
}
@ -156,11 +182,16 @@ int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen )
}
int blake2sp_final( blake2sp_state *S, uint8_t *out, const uint8_t outlen )
int blake2sp_final( blake2sp_state *S, void *out, size_t outlen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
size_t i;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if(out == NULL || outlen < S->outlen) {
return -1;
}
for( i = 0; i < PARALLELISM_DEGREE; ++i )
{
if( S->buflen > i * BLAKE2S_BLOCKBYTES )
{
@ -174,31 +205,35 @@ int blake2sp_final( blake2sp_state *S, uint8_t *out, const uint8_t outlen )
blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES );
}
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES );
blake2s_final( S->R, out, outlen );
return 0;
return blake2s_final( S->R, out, S->outlen );
}
int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen )
int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
blake2s_state S[PARALLELISM_DEGREE][1];
blake2s_state FS[1];
size_t i;
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key ) keylen = 0;
if ( NULL == key && keylen > 0) return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( keylen > BLAKE2S_KEYBYTES ) return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node
S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */
if( keylen > 0 )
{
@ -206,7 +241,7 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
@ -216,31 +251,31 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
#else
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
size_t i = omp_get_thread_num();
#endif
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2S_BLOCKBYTES;
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2S_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
{
blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES );
blake2s_update( S[i], in__, BLAKE2S_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
}
if( inlen__ > id__ * BLAKE2S_BLOCKBYTES )
if( inlen__ > i * BLAKE2S_BLOCKBYTES )
{
const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES;
const size_t left = inlen__ - i * BLAKE2S_BLOCKBYTES;
const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES;
blake2s_update( S[id__], in__, len );
blake2s_update( S[i], in__, len );
}
blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES );
blake2s_final( S[i], hash[i], BLAKE2S_OUTBYTES );
}
if( blake2sp_init_root( FS, outlen, keylen ) < 0 )
@ -248,44 +283,76 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin
FS->last_node = 1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES );
blake2s_final( FS, out, outlen );
return 0;
return blake2s_final( FS, out, outlen );
}
#if defined(BLAKE2SP_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
int main( void )
{
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[KAT_LENGTH];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i )
for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2S_OUTBYTES];
blake2sp( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES );
blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) )
{
puts( "error" );
return -1;
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2S_OUTBYTES];
blake2sp_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = blake2sp_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2sp_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2sp_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2sp_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

241
blake2/blake2xb.c Normal file
View file

@ -0,0 +1,241 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2016, JP Aumasson <jeanphilippe.aumasson@gmail.com>.
Copyright 2016, Samuel Neves <sneves@dei.uc.pt>.
You may use this under the terms of the CC0, the OpenSSL Licence, or
the Apache Public License 2.0, at your option. The terms of these
licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
int blake2xb_init( blake2xb_state *S, const size_t outlen ) {
return blake2xb_init_key(S, outlen, NULL, 0);
}
int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen)
{
if ( outlen == 0 || outlen > 0xFFFFFFFFUL ) {
return -1;
}
if (NULL != key && keylen > BLAKE2B_KEYBYTES) {
return -1;
}
if (NULL == key && keylen > 0) {
return -1;
}
/* Initialize parameter block */
S->P->digest_length = BLAKE2B_OUTBYTES;
S->P->key_length = keylen;
S->P->fanout = 1;
S->P->depth = 1;
store32( &S->P->leaf_length, 0 );
store32( &S->P->node_offset, 0 );
store32( &S->P->xof_length, outlen );
S->P->node_depth = 0;
S->P->inner_length = 0;
memset( S->P->reserved, 0, sizeof( S->P->reserved ) );
memset( S->P->salt, 0, sizeof( S->P->salt ) );
memset( S->P->personal, 0, sizeof( S->P->personal ) );
if( blake2b_init_param( S->S, S->P ) < 0 ) {
return -1;
}
if (keylen > 0) {
uint8_t block[BLAKE2B_BLOCKBYTES];
memset(block, 0, BLAKE2B_BLOCKBYTES);
memcpy(block, key, keylen);
blake2b_update(S->S, block, BLAKE2B_BLOCKBYTES);
secure_zero_memory(block, BLAKE2B_BLOCKBYTES);
}
return 0;
}
int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen ) {
return blake2b_update( S->S, in, inlen );
}
int blake2xb_final( blake2xb_state *S, void *out, size_t outlen) {
blake2b_state C[1];
blake2b_param P[1];
uint32_t xof_length = load32(&S->P->xof_length);
uint8_t root[BLAKE2B_BLOCKBYTES];
size_t i;
if (NULL == out) {
return -1;
}
/* outlen must match the output size defined in xof_length, */
/* unless it was -1, in which case anything goes except 0. */
if(xof_length == 0xFFFFFFFFUL) {
if(outlen == 0) {
return -1;
}
} else {
if(outlen != xof_length) {
return -1;
}
}
/* Finalize the root hash */
if (blake2b_final(S->S, root, BLAKE2B_OUTBYTES) < 0) {
return -1;
}
/* Set common block structure values */
/* Copy values from parent instance, and only change the ones below */
memcpy(P, S->P, sizeof(blake2b_param));
P->key_length = 0;
P->fanout = 0;
P->depth = 0;
store32(&P->leaf_length, BLAKE2B_OUTBYTES);
P->inner_length = BLAKE2B_OUTBYTES;
P->node_depth = 0;
for (i = 0; outlen > 0; ++i) {
const size_t block_size = (outlen < BLAKE2B_OUTBYTES) ? outlen : BLAKE2B_OUTBYTES;
/* Initialize state */
P->digest_length = block_size;
store32(&P->node_offset, i);
blake2b_init_param(C, P);
/* Process key if needed */
blake2b_update(C, root, BLAKE2B_OUTBYTES);
if (blake2b_final(C, (uint8_t *)out + i * BLAKE2B_OUTBYTES, block_size) < 0 ) {
return -1;
}
outlen -= block_size;
}
secure_zero_memory(root, sizeof(root));
secure_zero_memory(P, sizeof(P));
secure_zero_memory(C, sizeof(C));
/* Put blake2xb in an invalid state? cf. blake2s_is_lastblock */
return 0;
}
int blake2xb(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen)
{
blake2xb_state S[1];
/* Verify parameters */
if (NULL == in && inlen > 0)
return -1;
if (NULL == out)
return -1;
if (NULL == key && keylen > 0)
return -1;
if (keylen > BLAKE2B_KEYBYTES)
return -1;
if (outlen == 0)
return -1;
/* Initialize the root block structure */
if (blake2xb_init_key(S, outlen, key, keylen) < 0) {
return -1;
}
/* Absorb the input message */
blake2xb_update(S, in, inlen);
/* Compute the root node of the tree and the final hash using the counter construction */
return blake2xb_final(S, out, outlen);
}
#if defined(BLAKE2XB_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step, outlen;
for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) {
key[i] = ( uint8_t )i;
}
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) {
buf[i] = ( uint8_t )i;
}
/* Testing length of ouputs rather than inputs */
/* (Test of input lengths mostly covered by blake2s tests) */
/* Test simple API */
for( outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen )
{
uint8_t hash[BLAKE2_KAT_LENGTH] = {0};
if( blake2xb( hash, outlen, buf, BLAKE2_KAT_LENGTH, key, BLAKE2B_KEYBYTES ) < 0 ) {
goto fail;
}
if( 0 != memcmp( hash, blake2xb_keyed_kat[outlen-1], outlen ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
for (outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen) {
uint8_t hash[BLAKE2_KAT_LENGTH];
blake2xb_state S;
uint8_t * p = buf;
size_t mlen = BLAKE2_KAT_LENGTH;
int err = 0;
if( (err = blake2xb_init_key(&S, outlen, key, BLAKE2B_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2xb_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2xb_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2xb_final(&S, hash, outlen)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2xb_keyed_kat[outlen-1], outlen)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

239
blake2/blake2xs.c Normal file
View file

@ -0,0 +1,239 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2016, JP Aumasson <jeanphilippe.aumasson@gmail.com>.
Copyright 2016, Samuel Neves <sneves@dei.uc.pt>.
You may use this under the terms of the CC0, the OpenSSL Licence, or
the Apache Public License 2.0, at your option. The terms of these
licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
int blake2xs_init( blake2xs_state *S, const size_t outlen ) {
return blake2xs_init_key(S, outlen, NULL, 0);
}
int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen )
{
if ( outlen == 0 || outlen > 0xFFFFUL ) {
return -1;
}
if (NULL != key && keylen > BLAKE2B_KEYBYTES) {
return -1;
}
if (NULL == key && keylen > 0) {
return -1;
}
/* Initialize parameter block */
S->P->digest_length = BLAKE2S_OUTBYTES;
S->P->key_length = keylen;
S->P->fanout = 1;
S->P->depth = 1;
store32( &S->P->leaf_length, 0 );
store32( &S->P->node_offset, 0 );
store16( &S->P->xof_length, outlen );
S->P->node_depth = 0;
S->P->inner_length = 0;
memset( S->P->salt, 0, sizeof( S->P->salt ) );
memset( S->P->personal, 0, sizeof( S->P->personal ) );
if( blake2s_init_param( S->S, S->P ) < 0 ) {
return -1;
}
if (keylen > 0) {
uint8_t block[BLAKE2S_BLOCKBYTES];
memset(block, 0, BLAKE2S_BLOCKBYTES);
memcpy(block, key, keylen);
blake2s_update(S->S, block, BLAKE2S_BLOCKBYTES);
secure_zero_memory(block, BLAKE2S_BLOCKBYTES);
}
return 0;
}
int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen ) {
return blake2s_update( S->S, in, inlen );
}
int blake2xs_final(blake2xs_state *S, void *out, size_t outlen) {
blake2s_state C[1];
blake2s_param P[1];
uint16_t xof_length = load16(&S->P->xof_length);
uint8_t root[BLAKE2S_BLOCKBYTES];
size_t i;
if (NULL == out) {
return -1;
}
/* outlen must match the output size defined in xof_length, */
/* unless it was -1, in which case anything goes except 0. */
if(xof_length == 0xFFFFUL) {
if(outlen == 0) {
return -1;
}
} else {
if(outlen != xof_length) {
return -1;
}
}
/* Finalize the root hash */
if (blake2s_final(S->S, root, BLAKE2S_OUTBYTES) < 0) {
return -1;
}
/* Set common block structure values */
/* Copy values from parent instance, and only change the ones below */
memcpy(P, S->P, sizeof(blake2s_param));
P->key_length = 0;
P->fanout = 0;
P->depth = 0;
store32(&P->leaf_length, BLAKE2S_OUTBYTES);
P->inner_length = BLAKE2S_OUTBYTES;
P->node_depth = 0;
for (i = 0; outlen > 0; ++i) {
const size_t block_size = (outlen < BLAKE2S_OUTBYTES) ? outlen : BLAKE2S_OUTBYTES;
/* Initialize state */
P->digest_length = block_size;
store32(&P->node_offset, i);
blake2s_init_param(C, P);
/* Process key if needed */
blake2s_update(C, root, BLAKE2S_OUTBYTES);
if (blake2s_final(C, (uint8_t *)out + i * BLAKE2S_OUTBYTES, block_size) < 0) {
return -1;
}
outlen -= block_size;
}
secure_zero_memory(root, sizeof(root));
secure_zero_memory(P, sizeof(P));
secure_zero_memory(C, sizeof(C));
/* Put blake2xs in an invalid state? cf. blake2s_is_lastblock */
return 0;
}
int blake2xs(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen)
{
blake2xs_state S[1];
/* Verify parameters */
if (NULL == in && inlen > 0)
return -1;
if (NULL == out)
return -1;
if (NULL == key && keylen > 0)
return -1;
if (keylen > BLAKE2S_KEYBYTES)
return -1;
if (outlen == 0)
return -1;
/* Initialize the root block structure */
if (blake2xs_init_key(S, outlen, key, keylen) < 0) {
return -1;
}
/* Absorb the input message */
blake2xs_update(S, in, inlen);
/* Compute the root node of the tree and the final hash using the counter construction */
return blake2xs_final(S, out, outlen);
}
#if defined(BLAKE2XS_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step, outlen;
for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) {
key[i] = ( uint8_t )i;
}
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) {
buf[i] = ( uint8_t )i;
}
/* Testing length of ouputs rather than inputs */
/* (Test of input lengths mostly covered by blake2s tests) */
/* Test simple API */
for( outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen )
{
uint8_t hash[BLAKE2_KAT_LENGTH] = {0};
if( blake2xs( hash, outlen, buf, BLAKE2_KAT_LENGTH, key, BLAKE2S_KEYBYTES ) < 0 ) {
goto fail;
}
if( 0 != memcmp( hash, blake2xs_keyed_kat[outlen-1], outlen ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
for (outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen) {
uint8_t hash[BLAKE2_KAT_LENGTH];
blake2xs_state S;
uint8_t * p = buf;
size_t mlen = BLAKE2_KAT_LENGTH;
int err = 0;
if( (err = blake2xs_init_key(&S, outlen, key, BLAKE2S_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2xs_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2xs_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2xs_final(&S, hash, outlen)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2xs_keyed_kat[outlen-1], outlen)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

139
blake2/genkat-c.c Normal file
View file

@ -0,0 +1,139 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "blake2.h"
#define STR_(x) #x
#define STR(x) STR_(x)
#define LENGTH 256
#define MAKE_KAT(name, size_prefix) \
do { \
printf("static const uint8_t " #name "_kat[BLAKE2_KAT_LENGTH][" #size_prefix \
"_OUTBYTES] = \n{\n"); \
\
for (i = 0; i < LENGTH; ++i) { \
name(hash, size_prefix##_OUTBYTES, in, i, NULL, 0); \
printf("\t{\n\t\t"); \
\
for (j = 0; j < size_prefix##_OUTBYTES; ++j) \
printf("0x%02X%s", hash[j], \
(j + 1) == size_prefix##_OUTBYTES ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
printf("\t},\n"); \
} \
\
printf("};\n\n\n\n\n"); \
} while (0)
#define MAKE_KEYED_KAT(name, size_prefix) \
do { \
printf("static const uint8_t " #name "_keyed_kat[BLAKE2_KAT_LENGTH][" #size_prefix \
"_OUTBYTES] = \n{\n"); \
\
for (i = 0; i < LENGTH; ++i) { \
name(hash, size_prefix##_OUTBYTES, in, i, key, size_prefix##_KEYBYTES); \
printf("\t{\n\t\t"); \
\
for (j = 0; j < size_prefix##_OUTBYTES; ++j) \
printf("0x%02X%s", hash[j], \
(j + 1) == size_prefix##_OUTBYTES ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
printf("\t},\n"); \
} \
\
printf("};\n\n\n\n\n"); \
} while (0)
#define MAKE_XOF_KAT(name) \
do { \
printf("static const uint8_t " #name "_kat[BLAKE2_KAT_LENGTH][BLAKE2_KAT_LENGTH] = \n{\n"); \
\
for (i = 1; i <= LENGTH; ++i) { \
name(hash, i, in, LENGTH, NULL, 0); \
printf("\t{\n\t\t"); \
\
for (j = 0; j < i; ++j) \
printf("0x%02X%s", hash[j], \
(j + 1) == LENGTH ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
for (j = i; j < LENGTH; ++j) \
printf("0x00%s", (j + 1) == LENGTH ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
printf("\t},\n"); \
} \
\
printf("};\n\n\n\n\n"); \
} while (0)
#define MAKE_XOF_KEYED_KAT(name, size_prefix) \
do { \
printf("static const uint8_t " #name \
"_keyed_kat[BLAKE2_KAT_LENGTH][BLAKE2_KAT_LENGTH] = \n{\n"); \
\
for (i = 1; i <= LENGTH; ++i) { \
name(hash, i, in, LENGTH, key, size_prefix##_KEYBYTES); \
printf("\t{\n\t\t"); \
\
for (j = 0; j < i; ++j) \
printf("0x%02X%s", hash[j], \
(j + 1) == LENGTH ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
for (j = i; j < LENGTH; ++j) \
printf("0x00%s", (j + 1) == LENGTH ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
printf("\t},\n"); \
} \
\
printf("};\n\n\n\n\n"); \
} while (0)
int main() {
uint8_t key[64] = {0};
uint8_t in[LENGTH] = {0};
uint8_t hash[LENGTH] = {0};
size_t i, j;
for (i = 0; i < sizeof(in); ++i)
in[i] = i;
for (i = 0; i < sizeof(key); ++i)
key[i] = i;
puts("#ifndef BLAKE2_KAT_H\n"
"#define BLAKE2_KAT_H\n\n\n"
"#include <stdint.h>\n\n"
"#define BLAKE2_KAT_LENGTH " STR(LENGTH) "\n\n\n");
MAKE_KAT(blake2s, BLAKE2S);
MAKE_KEYED_KAT(blake2s, BLAKE2S);
MAKE_KAT(blake2b, BLAKE2B);
MAKE_KEYED_KAT(blake2b, BLAKE2B);
MAKE_KAT(blake2sp, BLAKE2S);
MAKE_KEYED_KAT(blake2sp, BLAKE2S);
MAKE_KAT(blake2bp, BLAKE2B);
MAKE_KEYED_KAT(blake2bp, BLAKE2B);
MAKE_XOF_KAT(blake2xs);
MAKE_XOF_KEYED_KAT(blake2xs, BLAKE2S);
MAKE_XOF_KAT(blake2xb);
MAKE_XOF_KEYED_KAT(blake2xb, BLAKE2B);
puts("#endif");
return 0;
}

154
blake2/genkat-json.c Normal file
View file

@ -0,0 +1,154 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "blake2.h"
#define STR_(x) #x
#define STR(x) STR_(x)
#define LENGTH 256
#define MAKE_KAT(name, size_prefix, first) \
do { \
for (i = 0; i < LENGTH; ++i) { \
printf("%s\n{\n", i == 0 && first ? "" : ","); \
\
printf(" \"hash\": \"" #name "\",\n"); \
printf(" \"in\": \""); \
for (j = 0; j < i; ++j) \
printf("%02x", in[j]); \
\
printf("\",\n"); \
printf(" \"key\": \"\",\n"); \
printf(" \"out\": \""); \
\
name(hash, size_prefix##_OUTBYTES, in, i, NULL, 0); \
\
for (j = 0; j < size_prefix##_OUTBYTES; ++j) \
printf("%02x", hash[j]); \
printf("\"\n"); \
printf("}"); \
} \
} while (0)
#define MAKE_KEYED_KAT(name, size_prefix, first) \
do { \
for (i = 0; i < LENGTH; ++i) { \
printf("%s\n{\n", i == 0 && first ? "" : ","); \
\
printf(" \"hash\": \"" #name "\",\n"); \
printf(" \"in\": \""); \
for (j = 0; j < i; ++j) \
printf("%02x", in[j]); \
\
printf("\",\n"); \
printf(" \"key\": \""); \
for (j = 0; j < size_prefix##_KEYBYTES; ++j) \
printf("%02x", key[j]); \
printf("\",\n"); \
printf(" \"out\": \""); \
\
name(hash, size_prefix##_OUTBYTES, in, i, key, size_prefix##_KEYBYTES); \
\
for (j = 0; j < size_prefix##_OUTBYTES; ++j) \
printf("%02x", hash[j]); \
printf("\"\n"); \
printf("}"); \
} \
} while (0)
#define MAKE_XOF_KAT(name, first) \
do { \
for (i = 1; i <= LENGTH; ++i) { \
printf("%s\n{\n", i == 1 && first ? "" : ","); \
\
printf(" \"hash\": \"" #name "\",\n"); \
printf(" \"in\": \""); \
for (j = 0; j < LENGTH; ++j) \
printf("%02x", in[j]); \
\
printf("\",\n"); \
printf(" \"key\": \"\",\n"); \
printf(" \"out\": \""); \
\
name(hash, i, in, LENGTH, NULL, 0); \
\
for (j = 0; j < i; ++j) \
printf("%02x", hash[j]); \
printf("\"\n"); \
printf("}"); \
} \
} while (0)
#define MAKE_XOF_KEYED_KAT(name, size_prefix, first) \
do { \
for (i = 1; i <= LENGTH; ++i) { \
printf("%s\n{\n", i == 1 && first ? "" : ","); \
\
printf(" \"hash\": \"" #name "\",\n"); \
printf(" \"in\": \""); \
for (j = 0; j < LENGTH; ++j) \
printf("%02x", in[j]); \
\
printf("\",\n"); \
printf(" \"key\": \""); \
for (j = 0; j < size_prefix##_KEYBYTES; ++j) \
printf("%02x", key[j]); \
printf("\",\n"); \
printf(" \"out\": \""); \
\
name(hash, i, in, LENGTH, key, size_prefix##_KEYBYTES); \
\
for (j = 0; j < i; ++j) \
printf("%02x", hash[j]); \
printf("\"\n"); \
printf("}"); \
} \
} while (0)
int main() {
uint8_t key[64] = {0};
uint8_t in[LENGTH] = {0};
uint8_t hash[LENGTH] = {0};
size_t i, j;
for (i = 0; i < sizeof(in); ++i)
in[i] = i;
for (i = 0; i < sizeof(key); ++i)
key[i] = i;
printf("[");
MAKE_KAT(blake2s, BLAKE2S, 1);
MAKE_KEYED_KAT(blake2s, BLAKE2S, 0);
MAKE_KAT(blake2b, BLAKE2B, 0);
MAKE_KEYED_KAT(blake2b, BLAKE2B, 0);
MAKE_KAT(blake2sp, BLAKE2S, 0);
MAKE_KEYED_KAT(blake2sp, BLAKE2S, 0);
MAKE_KAT(blake2bp, BLAKE2B, 0);
MAKE_KEYED_KAT(blake2bp, BLAKE2B, 0);
MAKE_XOF_KAT(blake2xs, 0);
MAKE_XOF_KEYED_KAT(blake2xs, BLAKE2S, 0);
MAKE_XOF_KAT(blake2xb, 0);
MAKE_XOF_KEYED_KAT(blake2xb, BLAKE2B, 0);
printf("\n]\n");
fflush(stdout);
return 0;
}

View file

@ -1,7 +1,8 @@
CC=gcc
CFLAGS=-std=c99 -Wall -pedantic -O3 -march=native
CFLAGS=-O3 -I../testvectors -Wall -Wextra -std=c89 -pedantic -Wno-long-long
BLAKEBINS=blake2s blake2b blake2sp blake2bp blake2xs blake2xb
all: blake2s blake2b blake2sp blake2bp
all: $(BLAKEBINS) check
blake2s: blake2s.c
$(CC) blake2s.c -o $@ $(CFLAGS) -DBLAKE2S_SELFTEST
@ -15,5 +16,25 @@ blake2sp: blake2sp.c blake2s.c
blake2bp: blake2bp.c blake2b.c
$(CC) blake2bp.c blake2b.c -o $@ $(CFLAGS) -DBLAKE2BP_SELFTEST
clean:
rm -rf *.o blake2s blake2b blake2sp blake2bp
blake2xs: blake2xs.c blake2s.c
$(CC) blake2xs.c blake2s.c -o $@ $(CFLAGS) -DBLAKE2XS_SELFTEST
blake2xb: blake2xb.c blake2b.c
$(CC) blake2xb.c blake2b.c -o $@ $(CFLAGS) -DBLAKE2XB_SELFTEST
check: blake2s blake2b blake2sp blake2bp blake2xs blake2xb
./blake2s
./blake2b
./blake2sp
./blake2bp
./blake2xs
./blake2xb
kat:
$(CC) $(CFLAGS) -o genkat-c genkat-c.c blake2b.c blake2s.c blake2sp.c blake2bp.c blake2xs.c blake2xb.c
$(CC) $(CFLAGS) -g -o genkat-json genkat-json.c blake2b.c blake2s.c blake2sp.c blake2bp.c blake2xs.c blake2xb.c
./genkat-c > blake2-kat.h
./genkat-json > blake2-kat.json
clean:
rm -rf *.o genkat-c genkat-json blake2-kat.h blake2-kat.json $(BLAKEBINS)