LCOV - code coverage report
Current view: top level - src/crypto - sha256_shani.cpp (source / functions) Hit Total Coverage
Test: total_coverage.info Lines: 0 283 0.0 %
Date: 2020-09-26 01:30:44 Functions: 0 11 0.0 %

          Line data    Source code
       1             : // Copyright (c) 2018-2019 The Bitcoin Core developers
       2             : // Distributed under the MIT software license, see the accompanying
       3             : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
       4             : //
       5             : // Based on https://github.com/noloader/SHA-Intrinsics/blob/master/sha256-x86.c,
       6             : // Written and placed in public domain by Jeffrey Walton.
       7             : // Based on code from Intel, and by Sean Gulley for the miTLS project.
       8             : 
       9             : #ifdef ENABLE_SHANI
      10             : 
      11             : #include <stdint.h>
      12             : #include <immintrin.h>
      13             : 
      14             : namespace {
      15             : 
      16             : alignas(__m128i) const uint8_t MASK[16] = {0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c};
      17             : alignas(__m128i) const uint8_t INIT0[16] = {0x8c, 0x68, 0x05, 0x9b, 0x7f, 0x52, 0x0e, 0x51, 0x85, 0xae, 0x67, 0xbb, 0x67, 0xe6, 0x09, 0x6a};
      18             : alignas(__m128i) const uint8_t INIT1[16] = {0x19, 0xcd, 0xe0, 0x5b, 0xab, 0xd9, 0x83, 0x1f, 0x3a, 0xf5, 0x4f, 0xa5, 0x72, 0xf3, 0x6e, 0x3c};
      19             : 
      20           0 : void inline  __attribute__((always_inline)) QuadRound(__m128i& state0, __m128i& state1, uint64_t k1, uint64_t k0)
      21             : {
      22           0 :     const __m128i msg = _mm_set_epi64x(k1, k0);
      23           0 :     state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
      24           0 :     state0 = _mm_sha256rnds2_epu32(state0, state1, _mm_shuffle_epi32(msg, 0x0e));
      25           0 : }
      26             : 
      27           0 : void inline  __attribute__((always_inline)) QuadRound(__m128i& state0, __m128i& state1, __m128i m, uint64_t k1, uint64_t k0)
      28             : {
      29           0 :     const __m128i msg = _mm_add_epi32(m, _mm_set_epi64x(k1, k0));
      30           0 :     state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
      31           0 :     state0 = _mm_sha256rnds2_epu32(state0, state1, _mm_shuffle_epi32(msg, 0x0e));
      32           0 : }
      33             : 
      34           0 : void inline  __attribute__((always_inline)) ShiftMessageA(__m128i& m0, __m128i m1)
      35             : {
      36           0 :     m0 = _mm_sha256msg1_epu32(m0, m1);
      37           0 : }
      38             : 
      39           0 : void inline  __attribute__((always_inline)) ShiftMessageC(__m128i& m0, __m128i m1, __m128i& m2)
      40             : {
      41           0 :     m2 = _mm_sha256msg2_epu32(_mm_add_epi32(m2, _mm_alignr_epi8(m1, m0, 4)), m1);
      42           0 : }
      43             : 
      44           0 : void inline __attribute__((always_inline)) ShiftMessageB(__m128i& m0, __m128i m1, __m128i& m2)
      45             : {
      46           0 :     ShiftMessageC(m0, m1, m2);
      47           0 :     ShiftMessageA(m0, m1);
      48           0 : }
      49             : 
      50           0 : void inline __attribute__((always_inline)) Shuffle(__m128i& s0, __m128i& s1)
      51             : {
      52           0 :     const __m128i t1 = _mm_shuffle_epi32(s0, 0xB1);
      53           0 :     const __m128i t2 = _mm_shuffle_epi32(s1, 0x1B);
      54           0 :     s0 = _mm_alignr_epi8(t1, t2, 0x08);
      55           0 :     s1 = _mm_blend_epi16(t2, t1, 0xF0);
      56           0 : }
      57             : 
      58           0 : void inline __attribute__((always_inline)) Unshuffle(__m128i& s0, __m128i& s1)
      59             : {
      60           0 :     const __m128i t1 = _mm_shuffle_epi32(s0, 0x1B);
      61           0 :     const __m128i t2 = _mm_shuffle_epi32(s1, 0xB1);
      62           0 :     s0 = _mm_blend_epi16(t1, t2, 0xF0);
      63           0 :     s1 = _mm_alignr_epi8(t2, t1, 0x08);
      64           0 : }
      65             : 
      66           0 : __m128i inline  __attribute__((always_inline)) Load(const unsigned char* in)
      67             : {
      68           0 :     return _mm_shuffle_epi8(_mm_loadu_si128((const __m128i*)in), _mm_load_si128((const __m128i*)MASK));
      69             : }
      70             : 
      71           0 : void inline  __attribute__((always_inline)) Save(unsigned char* out, __m128i s)
      72             : {
      73           0 :     _mm_storeu_si128((__m128i*)out, _mm_shuffle_epi8(s, _mm_load_si128((const __m128i*)MASK)));
      74           0 : }
      75             : }
      76             : 
      77             : namespace sha256_shani {
      78           0 : void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks)
      79             : {
      80           0 :     __m128i m0, m1, m2, m3, s0, s1, so0, so1;
      81             : 
      82             :     /* Load state */
      83           0 :     s0 = _mm_loadu_si128((const __m128i*)s);
      84           0 :     s1 = _mm_loadu_si128((const __m128i*)(s + 4));
      85           0 :     Shuffle(s0, s1);
      86             : 
      87           0 :     while (blocks--) {
      88             :         /* Remember old state */
      89           0 :         so0 = s0;
      90           0 :         so1 = s1;
      91             : 
      92             :         /* Load data and transform */
      93           0 :         m0 = Load(chunk);
      94           0 :         QuadRound(s0, s1, m0, 0xe9b5dba5b5c0fbcfull, 0x71374491428a2f98ull);
      95           0 :         m1 = Load(chunk + 16);
      96           0 :         QuadRound(s0, s1, m1, 0xab1c5ed5923f82a4ull, 0x59f111f13956c25bull);
      97           0 :         ShiftMessageA(m0, m1);
      98           0 :         m2 = Load(chunk + 32);
      99           0 :         QuadRound(s0, s1, m2, 0x550c7dc3243185beull, 0x12835b01d807aa98ull);
     100           0 :         ShiftMessageA(m1, m2);
     101           0 :         m3 = Load(chunk + 48);
     102           0 :         QuadRound(s0, s1, m3, 0xc19bf1749bdc06a7ull, 0x80deb1fe72be5d74ull);
     103           0 :         ShiftMessageB(m2, m3, m0);
     104           0 :         QuadRound(s0, s1, m0, 0x240ca1cc0fc19dc6ull, 0xefbe4786E49b69c1ull);
     105           0 :         ShiftMessageB(m3, m0, m1);
     106           0 :         QuadRound(s0, s1, m1, 0x76f988da5cb0a9dcull, 0x4a7484aa2de92c6full);
     107           0 :         ShiftMessageB(m0, m1, m2);
     108           0 :         QuadRound(s0, s1, m2, 0xbf597fc7b00327c8ull, 0xa831c66d983e5152ull);
     109           0 :         ShiftMessageB(m1, m2, m3);
     110           0 :         QuadRound(s0, s1, m3, 0x1429296706ca6351ull, 0xd5a79147c6e00bf3ull);
     111           0 :         ShiftMessageB(m2, m3, m0);
     112           0 :         QuadRound(s0, s1, m0, 0x53380d134d2c6dfcull, 0x2e1b213827b70a85ull);
     113           0 :         ShiftMessageB(m3, m0, m1);
     114           0 :         QuadRound(s0, s1, m1, 0x92722c8581c2c92eull, 0x766a0abb650a7354ull);
     115           0 :         ShiftMessageB(m0, m1, m2);
     116           0 :         QuadRound(s0, s1, m2, 0xc76c51A3c24b8b70ull, 0xa81a664ba2bfe8a1ull);
     117           0 :         ShiftMessageB(m1, m2, m3);
     118           0 :         QuadRound(s0, s1, m3, 0x106aa070f40e3585ull, 0xd6990624d192e819ull);
     119           0 :         ShiftMessageB(m2, m3, m0);
     120           0 :         QuadRound(s0, s1, m0, 0x34b0bcb52748774cull, 0x1e376c0819a4c116ull);
     121           0 :         ShiftMessageB(m3, m0, m1);
     122           0 :         QuadRound(s0, s1, m1, 0x682e6ff35b9cca4full, 0x4ed8aa4a391c0cb3ull);
     123           0 :         ShiftMessageC(m0, m1, m2);
     124           0 :         QuadRound(s0, s1, m2, 0x8cc7020884c87814ull, 0x78a5636f748f82eeull);
     125           0 :         ShiftMessageC(m1, m2, m3);
     126           0 :         QuadRound(s0, s1, m3, 0xc67178f2bef9A3f7ull, 0xa4506ceb90befffaull);
     127             : 
     128             :         /* Combine with old state */
     129           0 :         s0 = _mm_add_epi32(s0, so0);
     130           0 :         s1 = _mm_add_epi32(s1, so1);
     131             : 
     132             :         /* Advance */
     133           0 :         chunk += 64;
     134             :     }
     135             : 
     136           0 :     Unshuffle(s0, s1);
     137           0 :     _mm_storeu_si128((__m128i*)s, s0);
     138           0 :     _mm_storeu_si128((__m128i*)(s + 4), s1);
     139           0 : }
     140             : }
     141             : 
     142             : namespace sha256d64_shani {
     143             : 
     144           0 : void Transform_2way(unsigned char* out, const unsigned char* in)
     145             : {
     146           0 :     __m128i am0, am1, am2, am3, as0, as1, aso0, aso1;
     147           0 :     __m128i bm0, bm1, bm2, bm3, bs0, bs1, bso0, bso1;
     148             : 
     149             :     /* Transform 1 */
     150           0 :     bs0 = as0 = _mm_load_si128((const __m128i*)INIT0);
     151           0 :     bs1 = as1 = _mm_load_si128((const __m128i*)INIT1);
     152           0 :     am0 = Load(in);
     153           0 :     bm0 = Load(in + 64);
     154           0 :     QuadRound(as0, as1, am0, 0xe9b5dba5b5c0fbcfull, 0x71374491428a2f98ull);
     155           0 :     QuadRound(bs0, bs1, bm0, 0xe9b5dba5b5c0fbcfull, 0x71374491428a2f98ull);
     156           0 :     am1 = Load(in + 16);
     157           0 :     bm1 = Load(in + 80);
     158           0 :     QuadRound(as0, as1, am1, 0xab1c5ed5923f82a4ull, 0x59f111f13956c25bull);
     159           0 :     QuadRound(bs0, bs1, bm1, 0xab1c5ed5923f82a4ull, 0x59f111f13956c25bull);
     160           0 :     ShiftMessageA(am0, am1);
     161           0 :     ShiftMessageA(bm0, bm1);
     162           0 :     am2 = Load(in + 32);
     163           0 :     bm2 = Load(in + 96);
     164           0 :     QuadRound(as0, as1, am2, 0x550c7dc3243185beull, 0x12835b01d807aa98ull);
     165           0 :     QuadRound(bs0, bs1, bm2, 0x550c7dc3243185beull, 0x12835b01d807aa98ull);
     166           0 :     ShiftMessageA(am1, am2);
     167           0 :     ShiftMessageA(bm1, bm2);
     168           0 :     am3 = Load(in + 48);
     169           0 :     bm3 = Load(in + 112);
     170           0 :     QuadRound(as0, as1, am3, 0xc19bf1749bdc06a7ull, 0x80deb1fe72be5d74ull);
     171           0 :     QuadRound(bs0, bs1, bm3, 0xc19bf1749bdc06a7ull, 0x80deb1fe72be5d74ull);
     172           0 :     ShiftMessageB(am2, am3, am0);
     173           0 :     ShiftMessageB(bm2, bm3, bm0);
     174           0 :     QuadRound(as0, as1, am0, 0x240ca1cc0fc19dc6ull, 0xefbe4786E49b69c1ull);
     175           0 :     QuadRound(bs0, bs1, bm0, 0x240ca1cc0fc19dc6ull, 0xefbe4786E49b69c1ull);
     176           0 :     ShiftMessageB(am3, am0, am1);
     177           0 :     ShiftMessageB(bm3, bm0, bm1);
     178           0 :     QuadRound(as0, as1, am1, 0x76f988da5cb0a9dcull, 0x4a7484aa2de92c6full);
     179           0 :     QuadRound(bs0, bs1, bm1, 0x76f988da5cb0a9dcull, 0x4a7484aa2de92c6full);
     180           0 :     ShiftMessageB(am0, am1, am2);
     181           0 :     ShiftMessageB(bm0, bm1, bm2);
     182           0 :     QuadRound(as0, as1, am2, 0xbf597fc7b00327c8ull, 0xa831c66d983e5152ull);
     183           0 :     QuadRound(bs0, bs1, bm2, 0xbf597fc7b00327c8ull, 0xa831c66d983e5152ull);
     184           0 :     ShiftMessageB(am1, am2, am3);
     185           0 :     ShiftMessageB(bm1, bm2, bm3);
     186           0 :     QuadRound(as0, as1, am3, 0x1429296706ca6351ull, 0xd5a79147c6e00bf3ull);
     187           0 :     QuadRound(bs0, bs1, bm3, 0x1429296706ca6351ull, 0xd5a79147c6e00bf3ull);
     188           0 :     ShiftMessageB(am2, am3, am0);
     189           0 :     ShiftMessageB(bm2, bm3, bm0);
     190           0 :     QuadRound(as0, as1, am0, 0x53380d134d2c6dfcull, 0x2e1b213827b70a85ull);
     191           0 :     QuadRound(bs0, bs1, bm0, 0x53380d134d2c6dfcull, 0x2e1b213827b70a85ull);
     192           0 :     ShiftMessageB(am3, am0, am1);
     193           0 :     ShiftMessageB(bm3, bm0, bm1);
     194           0 :     QuadRound(as0, as1, am1, 0x92722c8581c2c92eull, 0x766a0abb650a7354ull);
     195           0 :     QuadRound(bs0, bs1, bm1, 0x92722c8581c2c92eull, 0x766a0abb650a7354ull);
     196           0 :     ShiftMessageB(am0, am1, am2);
     197           0 :     ShiftMessageB(bm0, bm1, bm2);
     198           0 :     QuadRound(as0, as1, am2, 0xc76c51A3c24b8b70ull, 0xa81a664ba2bfe8a1ull);
     199           0 :     QuadRound(bs0, bs1, bm2, 0xc76c51A3c24b8b70ull, 0xa81a664ba2bfe8a1ull);
     200           0 :     ShiftMessageB(am1, am2, am3);
     201           0 :     ShiftMessageB(bm1, bm2, bm3);
     202           0 :     QuadRound(as0, as1, am3, 0x106aa070f40e3585ull, 0xd6990624d192e819ull);
     203           0 :     QuadRound(bs0, bs1, bm3, 0x106aa070f40e3585ull, 0xd6990624d192e819ull);
     204           0 :     ShiftMessageB(am2, am3, am0);
     205           0 :     ShiftMessageB(bm2, bm3, bm0);
     206           0 :     QuadRound(as0, as1, am0, 0x34b0bcb52748774cull, 0x1e376c0819a4c116ull);
     207           0 :     QuadRound(bs0, bs1, bm0, 0x34b0bcb52748774cull, 0x1e376c0819a4c116ull);
     208           0 :     ShiftMessageB(am3, am0, am1);
     209           0 :     ShiftMessageB(bm3, bm0, bm1);
     210           0 :     QuadRound(as0, as1, am1, 0x682e6ff35b9cca4full, 0x4ed8aa4a391c0cb3ull);
     211           0 :     QuadRound(bs0, bs1, bm1, 0x682e6ff35b9cca4full, 0x4ed8aa4a391c0cb3ull);
     212           0 :     ShiftMessageC(am0, am1, am2);
     213           0 :     ShiftMessageC(bm0, bm1, bm2);
     214           0 :     QuadRound(as0, as1, am2, 0x8cc7020884c87814ull, 0x78a5636f748f82eeull);
     215           0 :     QuadRound(bs0, bs1, bm2, 0x8cc7020884c87814ull, 0x78a5636f748f82eeull);
     216           0 :     ShiftMessageC(am1, am2, am3);
     217           0 :     ShiftMessageC(bm1, bm2, bm3);
     218           0 :     QuadRound(as0, as1, am3, 0xc67178f2bef9A3f7ull, 0xa4506ceb90befffaull);
     219           0 :     QuadRound(bs0, bs1, bm3, 0xc67178f2bef9A3f7ull, 0xa4506ceb90befffaull);
     220           0 :     as0 = _mm_add_epi32(as0, _mm_load_si128((const __m128i*)INIT0));
     221           0 :     bs0 = _mm_add_epi32(bs0, _mm_load_si128((const __m128i*)INIT0));
     222           0 :     as1 = _mm_add_epi32(as1, _mm_load_si128((const __m128i*)INIT1));
     223           0 :     bs1 = _mm_add_epi32(bs1, _mm_load_si128((const __m128i*)INIT1));
     224             : 
     225             :     /* Transform 2 */
     226           0 :     aso0 = as0;
     227           0 :     bso0 = bs0;
     228           0 :     aso1 = as1;
     229             :     bso1 = bs1;
     230           0 :     QuadRound(as0, as1, 0xe9b5dba5b5c0fbcfull, 0x71374491c28a2f98ull);
     231           0 :     QuadRound(bs0, bs1, 0xe9b5dba5b5c0fbcfull, 0x71374491c28a2f98ull);
     232           0 :     QuadRound(as0, as1, 0xab1c5ed5923f82a4ull, 0x59f111f13956c25bull);
     233           0 :     QuadRound(bs0, bs1, 0xab1c5ed5923f82a4ull, 0x59f111f13956c25bull);
     234           0 :     QuadRound(as0, as1, 0x550c7dc3243185beull, 0x12835b01d807aa98ull);
     235           0 :     QuadRound(bs0, bs1, 0x550c7dc3243185beull, 0x12835b01d807aa98ull);
     236           0 :     QuadRound(as0, as1, 0xc19bf3749bdc06a7ull, 0x80deb1fe72be5d74ull);
     237           0 :     QuadRound(bs0, bs1, 0xc19bf3749bdc06a7ull, 0x80deb1fe72be5d74ull);
     238           0 :     QuadRound(as0, as1, 0x240cf2540fe1edc6ull, 0xf0fe4786649b69c1ull);
     239           0 :     QuadRound(bs0, bs1, 0x240cf2540fe1edc6ull, 0xf0fe4786649b69c1ull);
     240           0 :     QuadRound(as0, as1, 0x16f988fa61b9411eull, 0x6cc984be4fe9346full);
     241           0 :     QuadRound(bs0, bs1, 0x16f988fa61b9411eull, 0x6cc984be4fe9346full);
     242           0 :     QuadRound(as0, as1, 0xb9d99ec7b019fc65ull, 0xa88e5a6df2c65152ull);
     243           0 :     QuadRound(bs0, bs1, 0xb9d99ec7b019fc65ull, 0xa88e5a6df2c65152ull);
     244           0 :     QuadRound(as0, as1, 0xc7353eb0fdb1232bull, 0xe70eeaa09a1231c3ull);
     245           0 :     QuadRound(bs0, bs1, 0xc7353eb0fdb1232bull, 0xe70eeaa09a1231c3ull);
     246           0 :     QuadRound(as0, as1, 0xdc1eeefd5a0f118full, 0xcb976d5f3069bad5ull);
     247           0 :     QuadRound(bs0, bs1, 0xdc1eeefd5a0f118full, 0xcb976d5f3069bad5ull);
     248           0 :     QuadRound(as0, as1, 0xe15d5b1658f4ca9dull, 0xde0b7a040a35b689ull);
     249           0 :     QuadRound(bs0, bs1, 0xe15d5b1658f4ca9dull, 0xde0b7a040a35b689ull);
     250           0 :     QuadRound(as0, as1, 0x6fab9537a507ea32ull, 0x37088980007f3e86ull);
     251           0 :     QuadRound(bs0, bs1, 0x6fab9537a507ea32ull, 0x37088980007f3e86ull);
     252           0 :     QuadRound(as0, as1, 0xc0bbbe37cdaa3b6dull, 0x0d8cd6f117406110ull);
     253           0 :     QuadRound(bs0, bs1, 0xc0bbbe37cdaa3b6dull, 0x0d8cd6f117406110ull);
     254           0 :     QuadRound(as0, as1, 0x6fd15ca70b02e931ull, 0xdb48a36383613bdaull);
     255           0 :     QuadRound(bs0, bs1, 0x6fd15ca70b02e931ull, 0xdb48a36383613bdaull);
     256           0 :     QuadRound(as0, as1, 0x6d4378906ed41a95ull, 0x31338431521afacaull);
     257           0 :     QuadRound(bs0, bs1, 0x6d4378906ed41a95ull, 0x31338431521afacaull);
     258           0 :     QuadRound(as0, as1, 0x532fb63cb5c9a0e6ull, 0x9eccabbdc39c91f2ull);
     259           0 :     QuadRound(bs0, bs1, 0x532fb63cb5c9a0e6ull, 0x9eccabbdc39c91f2ull);
     260           0 :     QuadRound(as0, as1, 0x4c191d76a4954b68ull, 0x07237ea3d2c741c6ull);
     261           0 :     QuadRound(bs0, bs1, 0x4c191d76a4954b68ull, 0x07237ea3d2c741c6ull);
     262           0 :     as0 = _mm_add_epi32(as0, aso0);
     263           0 :     bs0 = _mm_add_epi32(bs0, bso0);
     264           0 :     as1 = _mm_add_epi32(as1, aso1);
     265           0 :     bs1 = _mm_add_epi32(bs1, bso1);
     266             : 
     267             :     /* Extract hash */
     268           0 :     Unshuffle(as0, as1);
     269           0 :     Unshuffle(bs0, bs1);
     270           0 :     am0 = as0;
     271           0 :     bm0 = bs0;
     272           0 :     am1 = as1;
     273           0 :     bm1 = bs1;
     274             : 
     275             :     /* Transform 3 */
     276           0 :     bs0 = as0 = _mm_load_si128((const __m128i*)INIT0);
     277           0 :     bs1 = as1 = _mm_load_si128((const __m128i*)INIT1);
     278           0 :     QuadRound(as0, as1, am0, 0xe9b5dba5B5c0fbcfull, 0x71374491428a2f98ull);
     279           0 :     QuadRound(bs0, bs1, bm0, 0xe9b5dba5B5c0fbcfull, 0x71374491428a2f98ull);
     280           0 :     QuadRound(as0, as1, am1, 0xab1c5ed5923f82a4ull, 0x59f111f13956c25bull);
     281           0 :     QuadRound(bs0, bs1, bm1, 0xab1c5ed5923f82a4ull, 0x59f111f13956c25bull);
     282           0 :     ShiftMessageA(am0, am1);
     283           0 :     ShiftMessageA(bm0, bm1);
     284           0 :     bm2 = am2 = _mm_set_epi64x(0x0ull, 0x80000000ull);
     285           0 :     QuadRound(as0, as1, 0x550c7dc3243185beull, 0x12835b015807aa98ull);
     286           0 :     QuadRound(bs0, bs1, 0x550c7dc3243185beull, 0x12835b015807aa98ull);
     287           0 :     ShiftMessageA(am1, am2);
     288           0 :     ShiftMessageA(bm1, bm2);
     289           0 :     bm3 = am3 = _mm_set_epi64x(0x10000000000ull, 0x0ull);
     290           0 :     QuadRound(as0, as1, 0xc19bf2749bdc06a7ull, 0x80deb1fe72be5d74ull);
     291           0 :     QuadRound(bs0, bs1, 0xc19bf2749bdc06a7ull, 0x80deb1fe72be5d74ull);
     292           0 :     ShiftMessageB(am2, am3, am0);
     293           0 :     ShiftMessageB(bm2, bm3, bm0);
     294           0 :     QuadRound(as0, as1, am0, 0x240ca1cc0fc19dc6ull, 0xefbe4786e49b69c1ull);
     295           0 :     QuadRound(bs0, bs1, bm0, 0x240ca1cc0fc19dc6ull, 0xefbe4786e49b69c1ull);
     296           0 :     ShiftMessageB(am3, am0, am1);
     297           0 :     ShiftMessageB(bm3, bm0, bm1);
     298           0 :     QuadRound(as0, as1, am1, 0x76f988da5cb0a9dcull, 0x4a7484aa2de92c6full);
     299           0 :     QuadRound(bs0, bs1, bm1, 0x76f988da5cb0a9dcull, 0x4a7484aa2de92c6full);
     300           0 :     ShiftMessageB(am0, am1, am2);
     301           0 :     ShiftMessageB(bm0, bm1, bm2);
     302           0 :     QuadRound(as0, as1, am2, 0xbf597fc7b00327c8ull, 0xa831c66d983e5152ull);
     303           0 :     QuadRound(bs0, bs1, bm2, 0xbf597fc7b00327c8ull, 0xa831c66d983e5152ull);
     304           0 :     ShiftMessageB(am1, am2, am3);
     305           0 :     ShiftMessageB(bm1, bm2, bm3);
     306           0 :     QuadRound(as0, as1, am3, 0x1429296706ca6351ull, 0xd5a79147c6e00bf3ull);
     307           0 :     QuadRound(bs0, bs1, bm3, 0x1429296706ca6351ull, 0xd5a79147c6e00bf3ull);
     308           0 :     ShiftMessageB(am2, am3, am0);
     309           0 :     ShiftMessageB(bm2, bm3, bm0);
     310           0 :     QuadRound(as0, as1, am0, 0x53380d134d2c6dfcull, 0x2e1b213827b70a85ull);
     311           0 :     QuadRound(bs0, bs1, bm0, 0x53380d134d2c6dfcull, 0x2e1b213827b70a85ull);
     312           0 :     ShiftMessageB(am3, am0, am1);
     313           0 :     ShiftMessageB(bm3, bm0, bm1);
     314           0 :     QuadRound(as0, as1, am1, 0x92722c8581c2c92eull, 0x766a0abb650a7354ull);
     315           0 :     QuadRound(bs0, bs1, bm1, 0x92722c8581c2c92eull, 0x766a0abb650a7354ull);
     316           0 :     ShiftMessageB(am0, am1, am2);
     317           0 :     ShiftMessageB(bm0, bm1, bm2);
     318           0 :     QuadRound(as0, as1, am2, 0xc76c51a3c24b8b70ull, 0xa81a664ba2bfe8A1ull);
     319           0 :     QuadRound(bs0, bs1, bm2, 0xc76c51a3c24b8b70ull, 0xa81a664ba2bfe8A1ull);
     320           0 :     ShiftMessageB(am1, am2, am3);
     321           0 :     ShiftMessageB(bm1, bm2, bm3);
     322           0 :     QuadRound(as0, as1, am3, 0x106aa070f40e3585ull, 0xd6990624d192e819ull);
     323           0 :     QuadRound(bs0, bs1, bm3, 0x106aa070f40e3585ull, 0xd6990624d192e819ull);
     324           0 :     ShiftMessageB(am2, am3, am0);
     325           0 :     ShiftMessageB(bm2, bm3, bm0);
     326           0 :     QuadRound(as0, as1, am0, 0x34b0bcb52748774cull, 0x1e376c0819a4c116ull);
     327           0 :     QuadRound(bs0, bs1, bm0, 0x34b0bcb52748774cull, 0x1e376c0819a4c116ull);
     328           0 :     ShiftMessageB(am3, am0, am1);
     329           0 :     ShiftMessageB(bm3, bm0, bm1);
     330           0 :     QuadRound(as0, as1, am1, 0x682e6ff35b9cca4full, 0x4ed8aa4a391c0cb3ull);
     331           0 :     QuadRound(bs0, bs1, bm1, 0x682e6ff35b9cca4full, 0x4ed8aa4a391c0cb3ull);
     332           0 :     ShiftMessageC(am0, am1, am2);
     333           0 :     ShiftMessageC(bm0, bm1, bm2);
     334           0 :     QuadRound(as0, as1, am2, 0x8cc7020884c87814ull, 0x78a5636f748f82eeull);
     335           0 :     QuadRound(bs0, bs1, bm2, 0x8cc7020884c87814ull, 0x78a5636f748f82eeull);
     336           0 :     ShiftMessageC(am1, am2, am3);
     337           0 :     ShiftMessageC(bm1, bm2, bm3);
     338           0 :     QuadRound(as0, as1, am3, 0xc67178f2bef9a3f7ull, 0xa4506ceb90befffaull);
     339           0 :     QuadRound(bs0, bs1, bm3, 0xc67178f2bef9a3f7ull, 0xa4506ceb90befffaull);
     340           0 :     as0 = _mm_add_epi32(as0, _mm_load_si128((const __m128i*)INIT0));
     341           0 :     bs0 = _mm_add_epi32(bs0, _mm_load_si128((const __m128i*)INIT0));
     342           0 :     as1 = _mm_add_epi32(as1, _mm_load_si128((const __m128i*)INIT1));
     343           0 :     bs1 = _mm_add_epi32(bs1, _mm_load_si128((const __m128i*)INIT1));
     344             : 
     345             :     /* Extract hash into out */
     346           0 :     Unshuffle(as0, as1);
     347           0 :     Unshuffle(bs0, bs1);
     348           0 :     Save(out, as0);
     349           0 :     Save(out + 16, as1);
     350           0 :     Save(out + 32, bs0);
     351           0 :     Save(out + 48, bs1);
     352           0 : }
     353             : 
     354             : }
     355             : 
     356             : #endif

Generated by: LCOV version 1.15