aboutsummaryrefslogtreecommitdiffstats
path: root/src/libs/3rdparty/botan/src/lib/block/aes
diff options
context:
space:
mode:
authorChristian Kandeler <christian.kandeler@qt.io>2018-01-03 17:56:52 +0100
committerChristian Kandeler <christian.kandeler@qt.io>2018-08-10 09:23:42 +0000
commita44fe2e4f03fc18ce9c3d050f71fe369916259b8 (patch)
treee5f3211ced0bc7fc1628ff5e7a051007dfa18207 /src/libs/3rdparty/botan/src/lib/block/aes
parent78c4cf9884770149fb9d69f923aa2169baa3f42a (diff)
SSH: Use Botan2
Botan 1.10 will be completely unsupported by the end of this year, so we now target API version 2 instead. Also upgrade our bundled Botan to the latest version 2.7. We no longer check in pre-processed files, but use the upstream sources directly (with unneeded parts removed), employing Botan's own configure script for building. This will make future upgrades much simpler. A script to automate this process is also provided. Task-number: QTCREATORBUG-18802 Task-number: QTCREATORBUG-8107 Change-Id: I5a5ea62cfd30d720b556217142e8b7e06bf49f7e Reviewed-by: hjk <hjk@qt.io> Reviewed-by: Eike Ziller <eike.ziller@qt.io>
Diffstat (limited to 'src/libs/3rdparty/botan/src/lib/block/aes')
-rw-r--r--src/libs/3rdparty/botan/src/lib/block/aes/aes.cpp750
-rw-r--r--src/libs/3rdparty/botan/src/lib/block/aes/aes.h153
-rw-r--r--src/libs/3rdparty/botan/src/lib/block/aes/aes_armv8/aes_armv8.cpp501
-rw-r--r--src/libs/3rdparty/botan/src/lib/block/aes/aes_armv8/info.txt10
-rw-r--r--src/libs/3rdparty/botan/src/lib/block/aes/aes_ni/aes_ni.cpp792
-rw-r--r--src/libs/3rdparty/botan/src/lib/block/aes/aes_ni/info.txt7
-rw-r--r--src/libs/3rdparty/botan/src/lib/block/aes/aes_power8/aes_power8.cpp328
-rw-r--r--src/libs/3rdparty/botan/src/lib/block/aes/aes_power8/info.txt9
-rw-r--r--src/libs/3rdparty/botan/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp637
-rw-r--r--src/libs/3rdparty/botan/src/lib/block/aes/aes_ssse3/info.txt15
-rw-r--r--src/libs/3rdparty/botan/src/lib/block/aes/info.txt3
11 files changed, 3205 insertions, 0 deletions
diff --git a/src/libs/3rdparty/botan/src/lib/block/aes/aes.cpp b/src/libs/3rdparty/botan/src/lib/block/aes/aes.cpp
new file mode 100644
index 0000000000..403945cc91
--- /dev/null
+++ b/src/libs/3rdparty/botan/src/lib/block/aes/aes.cpp
@@ -0,0 +1,750 @@
+/*
+* AES
+* (C) 1999-2010,2015,2017 Jack Lloyd
+*
+* Based on the public domain reference implementation by Paulo Baretto
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/aes.h>
+#include <botan/loadstor.h>
+#include <botan/cpuid.h>
+#include <type_traits>
+
+/*
+* This implementation is based on table lookups which are known to be
+* vulnerable to timing and cache based side channel attacks. Some
+* countermeasures are used which may be helpful in some situations:
+*
+* - Only a single 256-word T-table is used, with rotations applied.
+* Most implementations use 4 T-tables which leaks much more
+* information via cache usage.
+*
+* - The TE and TD tables are computed at runtime to avoid flush+reload
+* attacks using clflush. As different processes will not share the
+* same underlying table data, an attacker can't manipulate another
+* processes cache lines via their shared reference to the library
+* read only segment.
+*
+* - Each cache line of the lookup tables is accessed at the beginning
+* of each call to encrypt or decrypt. (See the Z variable below)
+*
+* If available SSSE3 or AES-NI are used instead of this version, as both
+* are faster and immune to side channel attacks.
+*
+* Some AES cache timing papers for reference:
+*
+* "Software mitigations to hedge AES against cache-based software side
+* channel vulnerabilities" https://eprint.iacr.org/2006/052.pdf
+*
+* "Cache Games - Bringing Access-Based Cache Attacks on AES to Practice"
+* http://www.ieee-security.org/TC/SP2011/PAPERS/2011/paper031.pdf
+*
+* "Cache-Collision Timing Attacks Against AES" Bonneau, Mironov
+* http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.88.4753
+*/
+
+namespace Botan {
+
+namespace {
+
+BOTAN_ALIGNAS(64)
+const uint8_t SE[256] = {
+ 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
+ 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+ 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26,
+ 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+ 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2,
+ 0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+ 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED,
+ 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+ 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F,
+ 0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+ 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC,
+ 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+ 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14,
+ 0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+ 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D,
+ 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+ 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F,
+ 0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+ 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11,
+ 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+ 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F,
+ 0xB0, 0x54, 0xBB, 0x16 };
+
+BOTAN_ALIGNAS(64)
+const uint8_t SD[256] = {
+ 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
+ 0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+ 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
+ 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+ 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
+ 0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+ 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
+ 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+ 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
+ 0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+ 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
+ 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+ 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
+ 0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+ 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
+ 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+ 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
+ 0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+ 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
+ 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
+ 0x55, 0x21, 0x0C, 0x7D };
+
+inline uint8_t xtime(uint8_t s) { return static_cast<uint8_t>(s << 1) ^ ((s >> 7) * 0x1B); }
+inline uint8_t xtime4(uint8_t s) { return xtime(xtime(s)); }
+inline uint8_t xtime8(uint8_t s) { return xtime(xtime(xtime(s))); }
+
+inline uint8_t xtime3(uint8_t s) { return xtime(s) ^ s; }
+inline uint8_t xtime9(uint8_t s) { return xtime8(s) ^ s; }
+inline uint8_t xtime11(uint8_t s) { return xtime8(s) ^ xtime(s) ^ s; }
+inline uint8_t xtime13(uint8_t s) { return xtime8(s) ^ xtime4(s) ^ s; }
+inline uint8_t xtime14(uint8_t s) { return xtime8(s) ^ xtime4(s) ^ xtime(s); }
+
+inline uint32_t SE_word(uint32_t x)
+ {
+ return make_uint32(SE[get_byte(0, x)],
+ SE[get_byte(1, x)],
+ SE[get_byte(2, x)],
+ SE[get_byte(3, x)]);
+ }
+
+const uint32_t* AES_TE()
+ {
+ class TE_Table final
+ {
+ public:
+ TE_Table()
+ {
+ uint32_t* p = reinterpret_cast<uint32_t*>(&data);
+ for(size_t i = 0; i != 256; ++i)
+ {
+ const uint8_t s = SE[i];
+ p[i] = make_uint32(xtime(s), s, s, xtime3(s));
+ }
+ }
+
+ const uint32_t* ptr() const
+ {
+ return reinterpret_cast<const uint32_t*>(&data);
+ }
+ private:
+ std::aligned_storage<256*sizeof(uint32_t), 64>::type data;
+ };
+
+ static TE_Table table;
+ return table.ptr();
+ }
+
+const uint32_t* AES_TD()
+ {
+ class TD_Table final
+ {
+ public:
+ TD_Table()
+ {
+ uint32_t* p = reinterpret_cast<uint32_t*>(&data);
+ for(size_t i = 0; i != 256; ++i)
+ {
+ const uint8_t s = SD[i];
+ p[i] = make_uint32(xtime14(s), xtime9(s), xtime13(s), xtime11(s));
+ }
+ }
+
+ const uint32_t* ptr() const
+ {
+ return reinterpret_cast<const uint32_t*>(&data);
+ }
+ private:
+ std::aligned_storage<256*sizeof(uint32_t), 64>::type data;
+ };
+
+ static TD_Table table;
+ return table.ptr();
+ }
+
+#define AES_T(T, K, V0, V1, V2, V3) \
+ (K ^ T[get_byte(0, V0)] ^ \
+ rotr< 8>(T[get_byte(1, V1)]) ^ \
+ rotr<16>(T[get_byte(2, V2)]) ^ \
+ rotr<24>(T[get_byte(3, V3)]))
+
+/*
+* AES Encryption
+*/
+void aes_encrypt_n(const uint8_t in[], uint8_t out[],
+ size_t blocks,
+ const secure_vector<uint32_t>& EK,
+ const secure_vector<uint8_t>& ME)
+ {
+ BOTAN_ASSERT(EK.size() && ME.size() == 16, "Key was set");
+
+ const size_t cache_line_size = CPUID::cache_line_size();
+
+ const uint32_t* TE = AES_TE();
+
+ // Hit every cache line of TE
+ volatile uint32_t Z = 0;
+ for(size_t i = 0; i < 256; i += cache_line_size / sizeof(uint32_t))
+ {
+ Z |= TE[i];
+ }
+ Z &= TE[82]; // this is zero, which hopefully the compiler cannot deduce
+
+ for(size_t i = 0; i < blocks; ++i)
+ {
+ uint32_t T0, T1, T2, T3;
+ load_be(in + 16*i, T0, T1, T2, T3);
+
+ T0 ^= EK[0];
+ T1 ^= EK[1];
+ T2 ^= EK[2];
+ T3 ^= EK[3];
+
+ T0 ^= Z;
+
+ uint32_t B0 = AES_T(TE, EK[4], T0, T1, T2, T3);
+ uint32_t B1 = AES_T(TE, EK[5], T1, T2, T3, T0);
+ uint32_t B2 = AES_T(TE, EK[6], T2, T3, T0, T1);
+ uint32_t B3 = AES_T(TE, EK[7], T3, T0, T1, T2);
+
+ for(size_t r = 2*4; r < EK.size(); r += 2*4)
+ {
+ T0 = AES_T(TE, EK[r ], B0, B1, B2, B3);
+ T1 = AES_T(TE, EK[r+1], B1, B2, B3, B0);
+ T2 = AES_T(TE, EK[r+2], B2, B3, B0, B1);
+ T3 = AES_T(TE, EK[r+3], B3, B0, B1, B2);
+
+ B0 = AES_T(TE, EK[r+4], T0, T1, T2, T3);
+ B1 = AES_T(TE, EK[r+5], T1, T2, T3, T0);
+ B2 = AES_T(TE, EK[r+6], T2, T3, T0, T1);
+ B3 = AES_T(TE, EK[r+7], T3, T0, T1, T2);
+ }
+
+ /*
+ * Use TE[x] >> 8 instead of SE[] so encryption only references a single
+ * lookup table.
+ */
+ out[16*i+ 0] = static_cast<uint8_t>(TE[get_byte(0, B0)] >> 8) ^ ME[0];
+ out[16*i+ 1] = static_cast<uint8_t>(TE[get_byte(1, B1)] >> 8) ^ ME[1];
+ out[16*i+ 2] = static_cast<uint8_t>(TE[get_byte(2, B2)] >> 8) ^ ME[2];
+ out[16*i+ 3] = static_cast<uint8_t>(TE[get_byte(3, B3)] >> 8) ^ ME[3];
+ out[16*i+ 4] = static_cast<uint8_t>(TE[get_byte(0, B1)] >> 8) ^ ME[4];
+ out[16*i+ 5] = static_cast<uint8_t>(TE[get_byte(1, B2)] >> 8) ^ ME[5];
+ out[16*i+ 6] = static_cast<uint8_t>(TE[get_byte(2, B3)] >> 8) ^ ME[6];
+ out[16*i+ 7] = static_cast<uint8_t>(TE[get_byte(3, B0)] >> 8) ^ ME[7];
+ out[16*i+ 8] = static_cast<uint8_t>(TE[get_byte(0, B2)] >> 8) ^ ME[8];
+ out[16*i+ 9] = static_cast<uint8_t>(TE[get_byte(1, B3)] >> 8) ^ ME[9];
+ out[16*i+10] = static_cast<uint8_t>(TE[get_byte(2, B0)] >> 8) ^ ME[10];
+ out[16*i+11] = static_cast<uint8_t>(TE[get_byte(3, B1)] >> 8) ^ ME[11];
+ out[16*i+12] = static_cast<uint8_t>(TE[get_byte(0, B3)] >> 8) ^ ME[12];
+ out[16*i+13] = static_cast<uint8_t>(TE[get_byte(1, B0)] >> 8) ^ ME[13];
+ out[16*i+14] = static_cast<uint8_t>(TE[get_byte(2, B1)] >> 8) ^ ME[14];
+ out[16*i+15] = static_cast<uint8_t>(TE[get_byte(3, B2)] >> 8) ^ ME[15];
+ }
+ }
+
+/*
+* AES Decryption
+*/
+void aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks,
+ const secure_vector<uint32_t>& DK,
+ const secure_vector<uint8_t>& MD)
+ {
+ BOTAN_ASSERT(DK.size() && MD.size() == 16, "Key was set");
+
+ const size_t cache_line_size = CPUID::cache_line_size();
+ const uint32_t* TD = AES_TD();
+
+ volatile uint32_t Z = 0;
+ for(size_t i = 0; i < 256; i += cache_line_size / sizeof(uint32_t))
+ {
+ Z |= TD[i];
+ }
+ Z &= TD[99]; // this is zero, which hopefully the compiler cannot deduce
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint32_t T0 = load_be<uint32_t>(in, 0) ^ DK[0];
+ uint32_t T1 = load_be<uint32_t>(in, 1) ^ DK[1];
+ uint32_t T2 = load_be<uint32_t>(in, 2) ^ DK[2];
+ uint32_t T3 = load_be<uint32_t>(in, 3) ^ DK[3];
+
+ T0 ^= Z;
+
+ uint32_t B0 = AES_T(TD, DK[4], T0, T3, T2, T1);
+ uint32_t B1 = AES_T(TD, DK[5], T1, T0, T3, T2);
+ uint32_t B2 = AES_T(TD, DK[6], T2, T1, T0, T3);
+ uint32_t B3 = AES_T(TD, DK[7], T3, T2, T1, T0);
+
+ for(size_t r = 2*4; r < DK.size(); r += 2*4)
+ {
+ T0 = AES_T(TD, DK[r ], B0, B3, B2, B1);
+ T1 = AES_T(TD, DK[r+1], B1, B0, B3, B2);
+ T2 = AES_T(TD, DK[r+2], B2, B1, B0, B3);
+ T3 = AES_T(TD, DK[r+3], B3, B2, B1, B0);
+
+ B0 = AES_T(TD, DK[r+4], T0, T3, T2, T1);
+ B1 = AES_T(TD, DK[r+5], T1, T0, T3, T2);
+ B2 = AES_T(TD, DK[r+6], T2, T1, T0, T3);
+ B3 = AES_T(TD, DK[r+7], T3, T2, T1, T0);
+ }
+
+ out[ 0] = SD[get_byte(0, B0)] ^ MD[0];
+ out[ 1] = SD[get_byte(1, B3)] ^ MD[1];
+ out[ 2] = SD[get_byte(2, B2)] ^ MD[2];
+ out[ 3] = SD[get_byte(3, B1)] ^ MD[3];
+ out[ 4] = SD[get_byte(0, B1)] ^ MD[4];
+ out[ 5] = SD[get_byte(1, B0)] ^ MD[5];
+ out[ 6] = SD[get_byte(2, B3)] ^ MD[6];
+ out[ 7] = SD[get_byte(3, B2)] ^ MD[7];
+ out[ 8] = SD[get_byte(0, B2)] ^ MD[8];
+ out[ 9] = SD[get_byte(1, B1)] ^ MD[9];
+ out[10] = SD[get_byte(2, B0)] ^ MD[10];
+ out[11] = SD[get_byte(3, B3)] ^ MD[11];
+ out[12] = SD[get_byte(0, B3)] ^ MD[12];
+ out[13] = SD[get_byte(1, B2)] ^ MD[13];
+ out[14] = SD[get_byte(2, B1)] ^ MD[14];
+ out[15] = SD[get_byte(3, B0)] ^ MD[15];
+
+ in += 16;
+ out += 16;
+ }
+ }
+
+void aes_key_schedule(const uint8_t key[], size_t length,
+ secure_vector<uint32_t>& EK,
+ secure_vector<uint32_t>& DK,
+ secure_vector<uint8_t>& ME,
+ secure_vector<uint8_t>& MD)
+ {
+ static const uint32_t RC[10] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000,
+ 0x20000000, 0x40000000, 0x80000000, 0x1B000000, 0x36000000 };
+
+ const size_t rounds = (length / 4) + 6;
+
+ secure_vector<uint32_t> XEK(length + 32), XDK(length + 32);
+
+ const size_t X = length / 4;
+
+ // Can't happen, but make static analyzers happy
+ BOTAN_ARG_CHECK(X == 4 || X == 6 || X == 8, "Invalid AES key size");
+
+ for(size_t i = 0; i != X; ++i)
+ XEK[i] = load_be<uint32_t>(key, i);
+
+ for(size_t i = X; i < 4*(rounds+1); i += X)
+ {
+ XEK[i] = XEK[i-X] ^ RC[(i-X)/X] ^ SE_word(rotl<8>(XEK[i-1]));
+
+ for(size_t j = 1; j != X; ++j)
+ {
+ XEK[i+j] = XEK[i+j-X];
+
+ if(X == 8 && j == 4)
+ XEK[i+j] ^= SE_word(XEK[i+j-1]);
+ else
+ XEK[i+j] ^= XEK[i+j-1];
+ }
+ }
+
+ for(size_t i = 0; i != 4*(rounds+1); i += 4)
+ {
+ XDK[i ] = XEK[4*rounds-i ];
+ XDK[i+1] = XEK[4*rounds-i+1];
+ XDK[i+2] = XEK[4*rounds-i+2];
+ XDK[i+3] = XEK[4*rounds-i+3];
+ }
+
+ for(size_t i = 4; i != length + 24; ++i)
+ {
+ XDK[i] = SE_word(XDK[i]);
+ XDK[i] = AES_T(AES_TD(), 0, XDK[i], XDK[i], XDK[i], XDK[i]);
+ }
+
+ ME.resize(16);
+ MD.resize(16);
+
+ for(size_t i = 0; i != 4; ++i)
+ {
+ store_be(XEK[i+4*rounds], &ME[4*i]);
+ store_be(XEK[i], &MD[4*i]);
+ }
+
+ EK.resize(length + 24);
+ DK.resize(length + 24);
+ copy_mem(EK.data(), XEK.data(), EK.size());
+ copy_mem(DK.data(), XDK.data(), DK.size());
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ // ARM needs the subkeys to be byte reversed
+
+ for(size_t i = 0; i != EK.size(); ++i)
+ EK[i] = reverse_bytes(EK[i]);
+ for(size_t i = 0; i != DK.size(); ++i)
+ DK[i] = reverse_bytes(DK[i]);
+ }
+#endif
+
+ }
+
+#undef AES_T
+
+size_t aes_parallelism()
+ {
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return 4;
+ }
+#endif
+
+ return 1;
+ }
+
+const char* aes_provider()
+ {
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return "aesni";
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_SSSE3)
+ if(CPUID::has_ssse3())
+ {
+ return "ssse3";
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ if(CPUID::has_ppc_crypto())
+ {
+ return "power8";
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return "armv8";
+ }
+#endif
+
+ return "base";
+ }
+
+}
+
+std::string AES_128::provider() const { return aes_provider(); }
+std::string AES_192::provider() const { return aes_provider(); }
+std::string AES_256::provider() const { return aes_provider(); }
+
+size_t AES_128::parallelism() const { return aes_parallelism(); }
+size_t AES_192::parallelism() const { return aes_parallelism(); }
+size_t AES_256::parallelism() const { return aes_parallelism(); }
+
+void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ verify_key_set(m_EK.empty() == false);
+
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return aesni_encrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_SSSE3)
+ if(CPUID::has_ssse3())
+ {
+ return ssse3_encrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_encrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ if(CPUID::has_ppc_crypto())
+ {
+ return power8_encrypt_n(in, out, blocks);
+ }
+#endif
+
+ aes_encrypt_n(in, out, blocks, m_EK, m_ME);
+ }
+
+void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ verify_key_set(m_DK.empty() == false);
+
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return aesni_decrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_SSSE3)
+ if(CPUID::has_ssse3())
+ {
+ return ssse3_decrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_decrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ if(CPUID::has_ppc_crypto())
+ {
+ return power8_decrypt_n(in, out, blocks);
+ }
+#endif
+
+ aes_decrypt_n(in, out, blocks, m_DK, m_MD);
+ }
+
+void AES_128::key_schedule(const uint8_t key[], size_t length)
+ {
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return aesni_key_schedule(key, length);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_SSSE3)
+ if(CPUID::has_ssse3())
+ {
+ return ssse3_key_schedule(key, length);
+ }
+#endif
+
+ aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ }
+
+void AES_128::clear()
+ {
+ zap(m_EK);
+ zap(m_DK);
+ zap(m_ME);
+ zap(m_MD);
+ }
+
+void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ verify_key_set(m_EK.empty() == false);
+
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return aesni_encrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_SSSE3)
+ if(CPUID::has_ssse3())
+ {
+ return ssse3_encrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_encrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ if(CPUID::has_ppc_crypto())
+ {
+ return power8_encrypt_n(in, out, blocks);
+ }
+#endif
+
+ aes_encrypt_n(in, out, blocks, m_EK, m_ME);
+ }
+
+void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ verify_key_set(m_DK.empty() == false);
+
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return aesni_decrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_SSSE3)
+ if(CPUID::has_ssse3())
+ {
+ return ssse3_decrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_decrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ if(CPUID::has_ppc_crypto())
+ {
+ return power8_decrypt_n(in, out, blocks);
+ }
+#endif
+
+ aes_decrypt_n(in, out, blocks, m_DK, m_MD);
+ }
+
+void AES_192::key_schedule(const uint8_t key[], size_t length)
+ {
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return aesni_key_schedule(key, length);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_SSSE3)
+ if(CPUID::has_ssse3())
+ {
+ return ssse3_key_schedule(key, length);
+ }
+#endif
+
+ aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ }
+
+void AES_192::clear()
+ {
+ zap(m_EK);
+ zap(m_DK);
+ zap(m_ME);
+ zap(m_MD);
+ }
+
+void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ verify_key_set(m_EK.empty() == false);
+
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return aesni_encrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_SSSE3)
+ if(CPUID::has_ssse3())
+ {
+ return ssse3_encrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_encrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ if(CPUID::has_ppc_crypto())
+ {
+ return power8_encrypt_n(in, out, blocks);
+ }
+#endif
+
+ aes_encrypt_n(in, out, blocks, m_EK, m_ME);
+ }
+
+void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ verify_key_set(m_DK.empty() == false);
+
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return aesni_decrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_SSSE3)
+ if(CPUID::has_ssse3())
+ {
+ return ssse3_decrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_decrypt_n(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ if(CPUID::has_ppc_crypto())
+ {
+ return power8_decrypt_n(in, out, blocks);
+ }
+#endif
+
+ aes_decrypt_n(in, out, blocks, m_DK, m_MD);
+ }
+
+void AES_256::key_schedule(const uint8_t key[], size_t length)
+ {
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return aesni_key_schedule(key, length);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_SSSE3)
+ if(CPUID::has_ssse3())
+ {
+ return ssse3_key_schedule(key, length);
+ }
+#endif
+
+ aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ }
+
+void AES_256::clear()
+ {
+ zap(m_EK);
+ zap(m_DK);
+ zap(m_ME);
+ zap(m_MD);
+ }
+
+}
diff --git a/src/libs/3rdparty/botan/src/lib/block/aes/aes.h b/src/libs/3rdparty/botan/src/lib/block/aes/aes.h
new file mode 100644
index 0000000000..294cdcad37
--- /dev/null
+++ b/src/libs/3rdparty/botan/src/lib/block/aes/aes.h
@@ -0,0 +1,153 @@
+/*
+* AES
+* (C) 1999-2010 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_AES_H_
+#define BOTAN_AES_H_
+
+#include <botan/block_cipher.h>
+
+namespace Botan {
+
+/**
+* AES-128
+*/
+class BOTAN_PUBLIC_API(2,0) AES_128 final : public Block_Cipher_Fixed_Params<16, 16>
+ {
+ public:
+ void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+ void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+
+ void clear() override;
+
+ std::string provider() const override;
+ std::string name() const override { return "AES-128"; }
+ BlockCipher* clone() const override { return new AES_128; }
+ size_t parallelism() const override;
+
+ private:
+ void key_schedule(const uint8_t key[], size_t length) override;
+
+#if defined(BOTAN_HAS_AES_SSSE3)
+ void ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void ssse3_key_schedule(const uint8_t key[], size_t length);
+#endif
+
+#if defined(BOTAN_HAS_AES_NI)
+ void aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void aesni_key_schedule(const uint8_t key[], size_t length);
+#endif
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ void power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
+ secure_vector<uint32_t> m_EK, m_DK;
+ secure_vector<uint8_t> m_ME, m_MD;
+ };
+
+/**
+* AES-192
+*/
+class BOTAN_PUBLIC_API(2,0) AES_192 final : public Block_Cipher_Fixed_Params<16, 24>
+ {
+ public:
+ void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+ void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+
+ void clear() override;
+
+ std::string provider() const override;
+ std::string name() const override { return "AES-192"; }
+ BlockCipher* clone() const override { return new AES_192; }
+ size_t parallelism() const override;
+
+ private:
+#if defined(BOTAN_HAS_AES_SSSE3)
+ void ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void ssse3_key_schedule(const uint8_t key[], size_t length);
+#endif
+
+#if defined(BOTAN_HAS_AES_NI)
+ void aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void aesni_key_schedule(const uint8_t key[], size_t length);
+#endif
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ void power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
+ void key_schedule(const uint8_t key[], size_t length) override;
+
+ secure_vector<uint32_t> m_EK, m_DK;
+ secure_vector<uint8_t> m_ME, m_MD;
+ };
+
+/**
+* AES-256
+*/
+class BOTAN_PUBLIC_API(2,0) AES_256 final : public Block_Cipher_Fixed_Params<16, 32>
+ {
+ public:
+ void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+ void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+
+ void clear() override;
+
+ std::string provider() const override;
+
+ std::string name() const override { return "AES-256"; }
+ BlockCipher* clone() const override { return new AES_256; }
+ size_t parallelism() const override;
+
+ private:
+#if defined(BOTAN_HAS_AES_SSSE3)
+ void ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void ssse3_key_schedule(const uint8_t key[], size_t length);
+#endif
+
+#if defined(BOTAN_HAS_AES_NI)
+ void aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void aesni_key_schedule(const uint8_t key[], size_t length);
+#endif
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ void power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
+ void key_schedule(const uint8_t key[], size_t length) override;
+
+ secure_vector<uint32_t> m_EK, m_DK;
+ secure_vector<uint8_t> m_ME, m_MD;
+ };
+
+}
+
+#endif
diff --git a/src/libs/3rdparty/botan/src/lib/block/aes/aes_armv8/aes_armv8.cpp b/src/libs/3rdparty/botan/src/lib/block/aes/aes_armv8/aes_armv8.cpp
new file mode 100644
index 0000000000..8a332ceafd
--- /dev/null
+++ b/src/libs/3rdparty/botan/src/lib/block/aes/aes_armv8/aes_armv8.cpp
@@ -0,0 +1,501 @@
+/*
+* AES using ARMv8
+* Contributed by Jeffrey Walton
+*
+* Further changes
+* (C) 2017,2018 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/aes.h>
+#include <botan/loadstor.h>
+#include <arm_neon.h>
+
+namespace Botan {
+
+#define AES_ENC_4_ROUNDS(K) \
+ do \
+ { \
+ B0 = vaesmcq_u8(vaeseq_u8(B0, K)); \
+ B1 = vaesmcq_u8(vaeseq_u8(B1, K)); \
+ B2 = vaesmcq_u8(vaeseq_u8(B2, K)); \
+ B3 = vaesmcq_u8(vaeseq_u8(B3, K)); \
+ } while(0)
+
+#define AES_ENC_4_LAST_ROUNDS(K, K2) \
+ do \
+ { \
+ B0 = veorq_u8(vaeseq_u8(B0, K), K2); \
+ B1 = veorq_u8(vaeseq_u8(B1, K), K2); \
+ B2 = veorq_u8(vaeseq_u8(B2, K), K2); \
+ B3 = veorq_u8(vaeseq_u8(B3, K), K2); \
+ } while(0)
+
+#define AES_DEC_4_ROUNDS(K) \
+ do \
+ { \
+ B0 = vaesimcq_u8(vaesdq_u8(B0, K)); \
+ B1 = vaesimcq_u8(vaesdq_u8(B1, K)); \
+ B2 = vaesimcq_u8(vaesdq_u8(B2, K)); \
+ B3 = vaesimcq_u8(vaesdq_u8(B3, K)); \
+ } while(0)
+
+#define AES_DEC_4_LAST_ROUNDS(K, K2) \
+ do \
+ { \
+ B0 = veorq_u8(vaesdq_u8(B0, K), K2); \
+ B1 = veorq_u8(vaesdq_u8(B1, K), K2); \
+ B2 = veorq_u8(vaesdq_u8(B2, K), K2); \
+ B3 = veorq_u8(vaesdq_u8(B3, K), K2); \
+ } while(0)
+
+/*
+* AES-128 Encryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_128::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(mkey);
+
+ while(blocks >= 4)
+ {
+ uint8x16_t B0 = vld1q_u8(in);
+ uint8x16_t B1 = vld1q_u8(in+16);
+ uint8x16_t B2 = vld1q_u8(in+32);
+ uint8x16_t B3 = vld1q_u8(in+48);
+
+ AES_ENC_4_ROUNDS(K0);
+ AES_ENC_4_ROUNDS(K1);
+ AES_ENC_4_ROUNDS(K2);
+ AES_ENC_4_ROUNDS(K3);
+ AES_ENC_4_ROUNDS(K4);
+ AES_ENC_4_ROUNDS(K5);
+ AES_ENC_4_ROUNDS(K6);
+ AES_ENC_4_ROUNDS(K7);
+ AES_ENC_4_ROUNDS(K8);
+ AES_ENC_4_LAST_ROUNDS(K9, K10);
+
+ vst1q_u8(out, B0);
+ vst1q_u8(out+16, B1);
+ vst1q_u8(out+32, B2);
+ vst1q_u8(out+48, B3);
+
+ in += 16*4;
+ out += 16*4;
+ blocks -= 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t B = vld1q_u8(in+16*i);
+ B = vaesmcq_u8(vaeseq_u8(B, K0));
+ B = vaesmcq_u8(vaeseq_u8(B, K1));
+ B = vaesmcq_u8(vaeseq_u8(B, K2));
+ B = vaesmcq_u8(vaeseq_u8(B, K3));
+ B = vaesmcq_u8(vaeseq_u8(B, K4));
+ B = vaesmcq_u8(vaeseq_u8(B, K5));
+ B = vaesmcq_u8(vaeseq_u8(B, K6));
+ B = vaesmcq_u8(vaeseq_u8(B, K7));
+ B = vaesmcq_u8(vaeseq_u8(B, K8));
+ B = veorq_u8(vaeseq_u8(B, K9), K10);
+ vst1q_u8(out+16*i, B);
+ }
+ }
+
+/*
+* AES-128 Decryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_128::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_DK.empty() == false, "Key was set");
+
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(mkey);
+
+ while(blocks >= 4)
+ {
+ uint8x16_t B0 = vld1q_u8(in);
+ uint8x16_t B1 = vld1q_u8(in+16);
+ uint8x16_t B2 = vld1q_u8(in+32);
+ uint8x16_t B3 = vld1q_u8(in+48);
+
+ AES_DEC_4_ROUNDS(K0);
+ AES_DEC_4_ROUNDS(K1);
+ AES_DEC_4_ROUNDS(K2);
+ AES_DEC_4_ROUNDS(K3);
+ AES_DEC_4_ROUNDS(K4);
+ AES_DEC_4_ROUNDS(K5);
+ AES_DEC_4_ROUNDS(K6);
+ AES_DEC_4_ROUNDS(K7);
+ AES_DEC_4_ROUNDS(K8);
+ AES_DEC_4_LAST_ROUNDS(K9, K10);
+
+ vst1q_u8(out, B0);
+ vst1q_u8(out+16, B1);
+ vst1q_u8(out+32, B2);
+ vst1q_u8(out+48, B3);
+
+ in += 16*4;
+ out += 16*4;
+ blocks -= 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t B = vld1q_u8(in+16*i);
+ B = vaesimcq_u8(vaesdq_u8(B, K0));
+ B = vaesimcq_u8(vaesdq_u8(B, K1));
+ B = vaesimcq_u8(vaesdq_u8(B, K2));
+ B = vaesimcq_u8(vaesdq_u8(B, K3));
+ B = vaesimcq_u8(vaesdq_u8(B, K4));
+ B = vaesimcq_u8(vaesdq_u8(B, K5));
+ B = vaesimcq_u8(vaesdq_u8(B, K6));
+ B = vaesimcq_u8(vaesdq_u8(B, K7));
+ B = vaesimcq_u8(vaesdq_u8(B, K8));
+ B = veorq_u8(vaesdq_u8(B, K9), K10);
+ vst1q_u8(out+16*i, B);
+ }
+ }
+
+/*
+* AES-192 Encryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_192::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(skey + 160);
+ const uint8x16_t K11 = vld1q_u8(skey + 176);
+ const uint8x16_t K12 = vld1q_u8(mkey);
+
+ while(blocks >= 4)
+ {
+ uint8x16_t B0 = vld1q_u8(in);
+ uint8x16_t B1 = vld1q_u8(in+16);
+ uint8x16_t B2 = vld1q_u8(in+32);
+ uint8x16_t B3 = vld1q_u8(in+48);
+
+ AES_ENC_4_ROUNDS(K0);
+ AES_ENC_4_ROUNDS(K1);
+ AES_ENC_4_ROUNDS(K2);
+ AES_ENC_4_ROUNDS(K3);
+ AES_ENC_4_ROUNDS(K4);
+ AES_ENC_4_ROUNDS(K5);
+ AES_ENC_4_ROUNDS(K6);
+ AES_ENC_4_ROUNDS(K7);
+ AES_ENC_4_ROUNDS(K8);
+ AES_ENC_4_ROUNDS(K9);
+ AES_ENC_4_ROUNDS(K10);
+ AES_ENC_4_LAST_ROUNDS(K11, K12);
+
+ vst1q_u8(out, B0);
+ vst1q_u8(out+16, B1);
+ vst1q_u8(out+32, B2);
+ vst1q_u8(out+48, B3);
+
+ in += 16*4;
+ out += 16*4;
+ blocks -= 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t B = vld1q_u8(in+16*i);
+ B = vaesmcq_u8(vaeseq_u8(B, K0));
+ B = vaesmcq_u8(vaeseq_u8(B, K1));
+ B = vaesmcq_u8(vaeseq_u8(B, K2));
+ B = vaesmcq_u8(vaeseq_u8(B, K3));
+ B = vaesmcq_u8(vaeseq_u8(B, K4));
+ B = vaesmcq_u8(vaeseq_u8(B, K5));
+ B = vaesmcq_u8(vaeseq_u8(B, K6));
+ B = vaesmcq_u8(vaeseq_u8(B, K7));
+ B = vaesmcq_u8(vaeseq_u8(B, K8));
+ B = vaesmcq_u8(vaeseq_u8(B, K9));
+ B = vaesmcq_u8(vaeseq_u8(B, K10));
+ B = veorq_u8(vaeseq_u8(B, K11), K12);
+ vst1q_u8(out+16*i, B);
+ }
+ }
+
+/*
+* AES-192 Decryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_192::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_DK.empty() == false, "Key was set");
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(skey + 160);
+ const uint8x16_t K11 = vld1q_u8(skey + 176);
+ const uint8x16_t K12 = vld1q_u8(mkey);
+
+ while(blocks >= 4)
+ {
+ uint8x16_t B0 = vld1q_u8(in);
+ uint8x16_t B1 = vld1q_u8(in+16);
+ uint8x16_t B2 = vld1q_u8(in+32);
+ uint8x16_t B3 = vld1q_u8(in+48);
+
+ AES_DEC_4_ROUNDS(K0);
+ AES_DEC_4_ROUNDS(K1);
+ AES_DEC_4_ROUNDS(K2);
+ AES_DEC_4_ROUNDS(K3);
+ AES_DEC_4_ROUNDS(K4);
+ AES_DEC_4_ROUNDS(K5);
+ AES_DEC_4_ROUNDS(K6);
+ AES_DEC_4_ROUNDS(K7);
+ AES_DEC_4_ROUNDS(K8);
+ AES_DEC_4_ROUNDS(K9);
+ AES_DEC_4_ROUNDS(K10);
+ AES_DEC_4_LAST_ROUNDS(K11, K12);
+
+ vst1q_u8(out, B0);
+ vst1q_u8(out+16, B1);
+ vst1q_u8(out+32, B2);
+ vst1q_u8(out+48, B3);
+
+ in += 16*4;
+ out += 16*4;
+ blocks -= 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t B = vld1q_u8(in+16*i);
+ B = vaesimcq_u8(vaesdq_u8(B, K0));
+ B = vaesimcq_u8(vaesdq_u8(B, K1));
+ B = vaesimcq_u8(vaesdq_u8(B, K2));
+ B = vaesimcq_u8(vaesdq_u8(B, K3));
+ B = vaesimcq_u8(vaesdq_u8(B, K4));
+ B = vaesimcq_u8(vaesdq_u8(B, K5));
+ B = vaesimcq_u8(vaesdq_u8(B, K6));
+ B = vaesimcq_u8(vaesdq_u8(B, K7));
+ B = vaesimcq_u8(vaesdq_u8(B, K8));
+ B = vaesimcq_u8(vaesdq_u8(B, K9));
+ B = vaesimcq_u8(vaesdq_u8(B, K10));
+ B = veorq_u8(vaesdq_u8(B, K11), K12);
+ vst1q_u8(out+16*i, B);
+ }
+ }
+
+/*
+* AES-256 Encryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_256::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(skey + 160);
+ const uint8x16_t K11 = vld1q_u8(skey + 176);
+ const uint8x16_t K12 = vld1q_u8(skey + 192);
+ const uint8x16_t K13 = vld1q_u8(skey + 208);
+ const uint8x16_t K14 = vld1q_u8(mkey);
+
+ while(blocks >= 4)
+ {
+ uint8x16_t B0 = vld1q_u8(in);
+ uint8x16_t B1 = vld1q_u8(in+16);
+ uint8x16_t B2 = vld1q_u8(in+32);
+ uint8x16_t B3 = vld1q_u8(in+48);
+
+ AES_ENC_4_ROUNDS(K0);
+ AES_ENC_4_ROUNDS(K1);
+ AES_ENC_4_ROUNDS(K2);
+ AES_ENC_4_ROUNDS(K3);
+ AES_ENC_4_ROUNDS(K4);
+ AES_ENC_4_ROUNDS(K5);
+ AES_ENC_4_ROUNDS(K6);
+ AES_ENC_4_ROUNDS(K7);
+ AES_ENC_4_ROUNDS(K8);
+ AES_ENC_4_ROUNDS(K9);
+ AES_ENC_4_ROUNDS(K10);
+ AES_ENC_4_ROUNDS(K11);
+ AES_ENC_4_ROUNDS(K12);
+ AES_ENC_4_LAST_ROUNDS(K13, K14);
+
+ vst1q_u8(out, B0);
+ vst1q_u8(out+16, B1);
+ vst1q_u8(out+32, B2);
+ vst1q_u8(out+48, B3);
+
+ in += 16*4;
+ out += 16*4;
+ blocks -= 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t B = vld1q_u8(in+16*i);
+ B = vaesmcq_u8(vaeseq_u8(B, K0));
+ B = vaesmcq_u8(vaeseq_u8(B, K1));
+ B = vaesmcq_u8(vaeseq_u8(B, K2));
+ B = vaesmcq_u8(vaeseq_u8(B, K3));
+ B = vaesmcq_u8(vaeseq_u8(B, K4));
+ B = vaesmcq_u8(vaeseq_u8(B, K5));
+ B = vaesmcq_u8(vaeseq_u8(B, K6));
+ B = vaesmcq_u8(vaeseq_u8(B, K7));
+ B = vaesmcq_u8(vaeseq_u8(B, K8));
+ B = vaesmcq_u8(vaeseq_u8(B, K9));
+ B = vaesmcq_u8(vaeseq_u8(B, K10));
+ B = vaesmcq_u8(vaeseq_u8(B, K11));
+ B = vaesmcq_u8(vaeseq_u8(B, K12));
+ B = veorq_u8(vaeseq_u8(B, K13), K14);
+ vst1q_u8(out+16*i, B);
+ }
+ }
+
+/*
+* AES-256 Decryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_256::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_DK.empty() == false, "Key was set");
+
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(skey + 160);
+ const uint8x16_t K11 = vld1q_u8(skey + 176);
+ const uint8x16_t K12 = vld1q_u8(skey + 192);
+ const uint8x16_t K13 = vld1q_u8(skey + 208);
+ const uint8x16_t K14 = vld1q_u8(mkey);
+
+ while(blocks >= 4)
+ {
+ uint8x16_t B0 = vld1q_u8(in);
+ uint8x16_t B1 = vld1q_u8(in+16);
+ uint8x16_t B2 = vld1q_u8(in+32);
+ uint8x16_t B3 = vld1q_u8(in+48);
+
+ AES_DEC_4_ROUNDS(K0);
+ AES_DEC_4_ROUNDS(K1);
+ AES_DEC_4_ROUNDS(K2);
+ AES_DEC_4_ROUNDS(K3);
+ AES_DEC_4_ROUNDS(K4);
+ AES_DEC_4_ROUNDS(K5);
+ AES_DEC_4_ROUNDS(K6);
+ AES_DEC_4_ROUNDS(K7);
+ AES_DEC_4_ROUNDS(K8);
+ AES_DEC_4_ROUNDS(K9);
+ AES_DEC_4_ROUNDS(K10);
+ AES_DEC_4_ROUNDS(K11);
+ AES_DEC_4_ROUNDS(K12);
+ AES_DEC_4_LAST_ROUNDS(K13, K14);
+
+ vst1q_u8(out, B0);
+ vst1q_u8(out+16, B1);
+ vst1q_u8(out+32, B2);
+ vst1q_u8(out+48, B3);
+
+ in += 16*4;
+ out += 16*4;
+ blocks -= 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t B = vld1q_u8(in+16*i);
+ B = vaesimcq_u8(vaesdq_u8(B, K0));
+ B = vaesimcq_u8(vaesdq_u8(B, K1));
+ B = vaesimcq_u8(vaesdq_u8(B, K2));
+ B = vaesimcq_u8(vaesdq_u8(B, K3));
+ B = vaesimcq_u8(vaesdq_u8(B, K4));
+ B = vaesimcq_u8(vaesdq_u8(B, K5));
+ B = vaesimcq_u8(vaesdq_u8(B, K6));
+ B = vaesimcq_u8(vaesdq_u8(B, K7));
+ B = vaesimcq_u8(vaesdq_u8(B, K8));
+ B = vaesimcq_u8(vaesdq_u8(B, K9));
+ B = vaesimcq_u8(vaesdq_u8(B, K10));
+ B = vaesimcq_u8(vaesdq_u8(B, K11));
+ B = vaesimcq_u8(vaesdq_u8(B, K12));
+ B = veorq_u8(vaesdq_u8(B, K13), K14);
+ vst1q_u8(out+16*i, B);
+ }
+ }
+
+#undef AES_ENC_4_ROUNDS
+#undef AES_ENC_4_LAST_ROUNDS
+#undef AES_DEC_4_ROUNDS
+#undef AES_DEC_4_LAST_ROUNDS
+
+}
diff --git a/src/libs/3rdparty/botan/src/lib/block/aes/aes_armv8/info.txt b/src/libs/3rdparty/botan/src/lib/block/aes/aes_armv8/info.txt
new file mode 100644
index 0000000000..08d51a1b2d
--- /dev/null
+++ b/src/libs/3rdparty/botan/src/lib/block/aes/aes_armv8/info.txt
@@ -0,0 +1,10 @@
+<defines>
+AES_ARMV8 -> 20170903
+</defines>
+
+need_isa armv8crypto
+
+<cc>
+gcc:5
+clang:3.8
+</cc>
diff --git a/src/libs/3rdparty/botan/src/lib/block/aes/aes_ni/aes_ni.cpp b/src/libs/3rdparty/botan/src/lib/block/aes/aes_ni/aes_ni.cpp
new file mode 100644
index 0000000000..9f1ba8fcc2
--- /dev/null
+++ b/src/libs/3rdparty/botan/src/lib/block/aes/aes_ni/aes_ni.cpp
@@ -0,0 +1,792 @@
+/*
+* AES using AES-NI instructions
+* (C) 2009,2012 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/aes.h>
+#include <botan/loadstor.h>
+#include <wmmintrin.h>
+
+namespace Botan {
+
+namespace {
+
+BOTAN_FUNC_ISA("ssse3")
+__m128i aes_128_key_expansion(__m128i key, __m128i key_with_rcon)
+ {
+ key_with_rcon = _mm_shuffle_epi32(key_with_rcon, _MM_SHUFFLE(3,3,3,3));
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ return _mm_xor_si128(key, key_with_rcon);
+ }
+
+BOTAN_FUNC_ISA("ssse3")
+void aes_192_key_expansion(__m128i* K1, __m128i* K2, __m128i key2_with_rcon,
+ uint32_t out[], bool last)
+ {
+ __m128i key1 = *K1;
+ __m128i key2 = *K2;
+
+ key2_with_rcon = _mm_shuffle_epi32(key2_with_rcon, _MM_SHUFFLE(1,1,1,1));
+ key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
+ key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
+ key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
+ key1 = _mm_xor_si128(key1, key2_with_rcon);
+
+ *K1 = key1;
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(out), key1);
+
+ if(last)
+ return;
+
+ key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
+ key2 = _mm_xor_si128(key2, _mm_shuffle_epi32(key1, _MM_SHUFFLE(3,3,3,3)));
+
+ *K2 = key2;
+ out[4] = _mm_cvtsi128_si32(key2);
+ out[5] = _mm_cvtsi128_si32(_mm_srli_si128(key2, 4));
+ }
+
+/*
+* The second half of the AES-256 key expansion (other half same as AES-128)
+*/
+BOTAN_FUNC_ISA("ssse3,aes")
+__m128i aes_256_key_expansion(__m128i key, __m128i key2)
+ {
+ __m128i key_with_rcon = _mm_aeskeygenassist_si128(key2, 0x00);
+ key_with_rcon = _mm_shuffle_epi32(key_with_rcon, _MM_SHUFFLE(2,2,2,2));
+
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ return _mm_xor_si128(key, key_with_rcon);
+ }
+
+}
+
+#define AES_ENC_4_ROUNDS(K) \
+ do \
+ { \
+ B0 = _mm_aesenc_si128(B0, K); \
+ B1 = _mm_aesenc_si128(B1, K); \
+ B2 = _mm_aesenc_si128(B2, K); \
+ B3 = _mm_aesenc_si128(B3, K); \
+ } while(0)
+
+#define AES_ENC_4_LAST_ROUNDS(K) \
+ do \
+ { \
+ B0 = _mm_aesenclast_si128(B0, K); \
+ B1 = _mm_aesenclast_si128(B1, K); \
+ B2 = _mm_aesenclast_si128(B2, K); \
+ B3 = _mm_aesenclast_si128(B3, K); \
+ } while(0)
+
+#define AES_DEC_4_ROUNDS(K) \
+ do \
+ { \
+ B0 = _mm_aesdec_si128(B0, K); \
+ B1 = _mm_aesdec_si128(B1, K); \
+ B2 = _mm_aesdec_si128(B2, K); \
+ B3 = _mm_aesdec_si128(B3, K); \
+ } while(0)
+
+#define AES_DEC_4_LAST_ROUNDS(K) \
+ do \
+ { \
+ B0 = _mm_aesdeclast_si128(B0, K); \
+ B1 = _mm_aesdeclast_si128(B1, K); \
+ B2 = _mm_aesdeclast_si128(B2, K); \
+ B3 = _mm_aesdeclast_si128(B3, K); \
+ } while(0)
+
+/*
+* AES-128 Encryption
+*/
+BOTAN_FUNC_ISA("ssse3,aes")
+void AES_128::aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_EK.data());
+
+ const __m128i K0 = _mm_loadu_si128(key_mm);
+ const __m128i K1 = _mm_loadu_si128(key_mm + 1);
+ const __m128i K2 = _mm_loadu_si128(key_mm + 2);
+ const __m128i K3 = _mm_loadu_si128(key_mm + 3);
+ const __m128i K4 = _mm_loadu_si128(key_mm + 4);
+ const __m128i K5 = _mm_loadu_si128(key_mm + 5);
+ const __m128i K6 = _mm_loadu_si128(key_mm + 6);
+ const __m128i K7 = _mm_loadu_si128(key_mm + 7);
+ const __m128i K8 = _mm_loadu_si128(key_mm + 8);
+ const __m128i K9 = _mm_loadu_si128(key_mm + 9);
+ const __m128i K10 = _mm_loadu_si128(key_mm + 10);
+
+ while(blocks >= 4)
+ {
+ __m128i B0 = _mm_loadu_si128(in_mm + 0);
+ __m128i B1 = _mm_loadu_si128(in_mm + 1);
+ __m128i B2 = _mm_loadu_si128(in_mm + 2);
+ __m128i B3 = _mm_loadu_si128(in_mm + 3);
+
+ B0 = _mm_xor_si128(B0, K0);
+ B1 = _mm_xor_si128(B1, K0);
+ B2 = _mm_xor_si128(B2, K0);
+ B3 = _mm_xor_si128(B3, K0);
+
+ AES_ENC_4_ROUNDS(K1);
+ AES_ENC_4_ROUNDS(K2);
+ AES_ENC_4_ROUNDS(K3);
+ AES_ENC_4_ROUNDS(K4);
+ AES_ENC_4_ROUNDS(K5);
+ AES_ENC_4_ROUNDS(K6);
+ AES_ENC_4_ROUNDS(K7);
+ AES_ENC_4_ROUNDS(K8);
+ AES_ENC_4_ROUNDS(K9);
+ AES_ENC_4_LAST_ROUNDS(K10);
+
+ _mm_storeu_si128(out_mm + 0, B0);
+ _mm_storeu_si128(out_mm + 1, B1);
+ _mm_storeu_si128(out_mm + 2, B2);
+ _mm_storeu_si128(out_mm + 3, B3);
+
+ blocks -= 4;
+ in_mm += 4;
+ out_mm += 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+
+ B = _mm_xor_si128(B, K0);
+
+ B = _mm_aesenc_si128(B, K1);
+ B = _mm_aesenc_si128(B, K2);
+ B = _mm_aesenc_si128(B, K3);
+ B = _mm_aesenc_si128(B, K4);
+ B = _mm_aesenc_si128(B, K5);
+ B = _mm_aesenc_si128(B, K6);
+ B = _mm_aesenc_si128(B, K7);
+ B = _mm_aesenc_si128(B, K8);
+ B = _mm_aesenc_si128(B, K9);
+ B = _mm_aesenclast_si128(B, K10);
+
+ _mm_storeu_si128(out_mm + i, B);
+ }
+ }
+
+/*
+* AES-128 Decryption
+*/
+BOTAN_FUNC_ISA("ssse3,aes")
+void AES_128::aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_DK.empty() == false, "Key was set");
+
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_DK.data());
+
+ const __m128i K0 = _mm_loadu_si128(key_mm);
+ const __m128i K1 = _mm_loadu_si128(key_mm + 1);
+ const __m128i K2 = _mm_loadu_si128(key_mm + 2);
+ const __m128i K3 = _mm_loadu_si128(key_mm + 3);
+ const __m128i K4 = _mm_loadu_si128(key_mm + 4);
+ const __m128i K5 = _mm_loadu_si128(key_mm + 5);
+ const __m128i K6 = _mm_loadu_si128(key_mm + 6);
+ const __m128i K7 = _mm_loadu_si128(key_mm + 7);
+ const __m128i K8 = _mm_loadu_si128(key_mm + 8);
+ const __m128i K9 = _mm_loadu_si128(key_mm + 9);
+ const __m128i K10 = _mm_loadu_si128(key_mm + 10);
+
+ while(blocks >= 4)
+ {
+ __m128i B0 = _mm_loadu_si128(in_mm + 0);
+ __m128i B1 = _mm_loadu_si128(in_mm + 1);
+ __m128i B2 = _mm_loadu_si128(in_mm + 2);
+ __m128i B3 = _mm_loadu_si128(in_mm + 3);
+
+ B0 = _mm_xor_si128(B0, K0);
+ B1 = _mm_xor_si128(B1, K0);
+ B2 = _mm_xor_si128(B2, K0);
+ B3 = _mm_xor_si128(B3, K0);
+
+ AES_DEC_4_ROUNDS(K1);
+ AES_DEC_4_ROUNDS(K2);
+ AES_DEC_4_ROUNDS(K3);
+ AES_DEC_4_ROUNDS(K4);
+ AES_DEC_4_ROUNDS(K5);
+ AES_DEC_4_ROUNDS(K6);
+ AES_DEC_4_ROUNDS(K7);
+ AES_DEC_4_ROUNDS(K8);
+ AES_DEC_4_ROUNDS(K9);
+ AES_DEC_4_LAST_ROUNDS(K10);
+
+ _mm_storeu_si128(out_mm + 0, B0);
+ _mm_storeu_si128(out_mm + 1, B1);
+ _mm_storeu_si128(out_mm + 2, B2);
+ _mm_storeu_si128(out_mm + 3, B3);
+
+ blocks -= 4;
+ in_mm += 4;
+ out_mm += 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+
+ B = _mm_xor_si128(B, K0);
+
+ B = _mm_aesdec_si128(B, K1);
+ B = _mm_aesdec_si128(B, K2);
+ B = _mm_aesdec_si128(B, K3);
+ B = _mm_aesdec_si128(B, K4);
+ B = _mm_aesdec_si128(B, K5);
+ B = _mm_aesdec_si128(B, K6);
+ B = _mm_aesdec_si128(B, K7);
+ B = _mm_aesdec_si128(B, K8);
+ B = _mm_aesdec_si128(B, K9);
+ B = _mm_aesdeclast_si128(B, K10);
+
+ _mm_storeu_si128(out_mm + i, B);
+ }
+ }
+
+/*
+* AES-128 Key Schedule
+*/
+BOTAN_FUNC_ISA("ssse3,aes")
+void AES_128::aesni_key_schedule(const uint8_t key[], size_t)
+ {
+ m_EK.resize(44);
+ m_DK.resize(44);
+
+ #define AES_128_key_exp(K, RCON) \
+ aes_128_key_expansion(K, _mm_aeskeygenassist_si128(K, RCON))
+
+ const __m128i K0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(key));
+ const __m128i K1 = AES_128_key_exp(K0, 0x01);
+ const __m128i K2 = AES_128_key_exp(K1, 0x02);
+ const __m128i K3 = AES_128_key_exp(K2, 0x04);
+ const __m128i K4 = AES_128_key_exp(K3, 0x08);
+ const __m128i K5 = AES_128_key_exp(K4, 0x10);
+ const __m128i K6 = AES_128_key_exp(K5, 0x20);
+ const __m128i K7 = AES_128_key_exp(K6, 0x40);
+ const __m128i K8 = AES_128_key_exp(K7, 0x80);
+ const __m128i K9 = AES_128_key_exp(K8, 0x1B);
+ const __m128i K10 = AES_128_key_exp(K9, 0x36);
+
+ __m128i* EK_mm = reinterpret_cast<__m128i*>(m_EK.data());
+ _mm_storeu_si128(EK_mm , K0);
+ _mm_storeu_si128(EK_mm + 1, K1);
+ _mm_storeu_si128(EK_mm + 2, K2);
+ _mm_storeu_si128(EK_mm + 3, K3);
+ _mm_storeu_si128(EK_mm + 4, K4);
+ _mm_storeu_si128(EK_mm + 5, K5);
+ _mm_storeu_si128(EK_mm + 6, K6);
+ _mm_storeu_si128(EK_mm + 7, K7);
+ _mm_storeu_si128(EK_mm + 8, K8);
+ _mm_storeu_si128(EK_mm + 9, K9);
+ _mm_storeu_si128(EK_mm + 10, K10);
+
+ // Now generate decryption keys
+
+ __m128i* DK_mm = reinterpret_cast<__m128i*>(m_DK.data());
+ _mm_storeu_si128(DK_mm , K10);
+ _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K9));
+ _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K8));
+ _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K7));
+ _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K6));
+ _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K5));
+ _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K4));
+ _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K3));
+ _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K2));
+ _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K1));
+ _mm_storeu_si128(DK_mm + 10, K0);
+ }
+
+/*
+* AES-192 Encryption
+*/
+BOTAN_FUNC_ISA("ssse3,aes")
+void AES_192::aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_EK.data());
+
+ const __m128i K0 = _mm_loadu_si128(key_mm);
+ const __m128i K1 = _mm_loadu_si128(key_mm + 1);
+ const __m128i K2 = _mm_loadu_si128(key_mm + 2);
+ const __m128i K3 = _mm_loadu_si128(key_mm + 3);
+ const __m128i K4 = _mm_loadu_si128(key_mm + 4);
+ const __m128i K5 = _mm_loadu_si128(key_mm + 5);
+ const __m128i K6 = _mm_loadu_si128(key_mm + 6);
+ const __m128i K7 = _mm_loadu_si128(key_mm + 7);
+ const __m128i K8 = _mm_loadu_si128(key_mm + 8);
+ const __m128i K9 = _mm_loadu_si128(key_mm + 9);
+ const __m128i K10 = _mm_loadu_si128(key_mm + 10);
+ const __m128i K11 = _mm_loadu_si128(key_mm + 11);
+ const __m128i K12 = _mm_loadu_si128(key_mm + 12);
+
+ while(blocks >= 4)
+ {
+ __m128i B0 = _mm_loadu_si128(in_mm + 0);
+ __m128i B1 = _mm_loadu_si128(in_mm + 1);
+ __m128i B2 = _mm_loadu_si128(in_mm + 2);
+ __m128i B3 = _mm_loadu_si128(in_mm + 3);
+
+ B0 = _mm_xor_si128(B0, K0);
+ B1 = _mm_xor_si128(B1, K0);
+ B2 = _mm_xor_si128(B2, K0);
+ B3 = _mm_xor_si128(B3, K0);
+
+ AES_ENC_4_ROUNDS(K1);
+ AES_ENC_4_ROUNDS(K2);
+ AES_ENC_4_ROUNDS(K3);
+ AES_ENC_4_ROUNDS(K4);
+ AES_ENC_4_ROUNDS(K5);
+ AES_ENC_4_ROUNDS(K6);
+ AES_ENC_4_ROUNDS(K7);
+ AES_ENC_4_ROUNDS(K8);
+ AES_ENC_4_ROUNDS(K9);
+ AES_ENC_4_ROUNDS(K10);
+ AES_ENC_4_ROUNDS(K11);
+ AES_ENC_4_LAST_ROUNDS(K12);
+
+ _mm_storeu_si128(out_mm + 0, B0);
+ _mm_storeu_si128(out_mm + 1, B1);
+ _mm_storeu_si128(out_mm + 2, B2);
+ _mm_storeu_si128(out_mm + 3, B3);
+
+ blocks -= 4;
+ in_mm += 4;
+ out_mm += 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+
+ B = _mm_xor_si128(B, K0);
+
+ B = _mm_aesenc_si128(B, K1);
+ B = _mm_aesenc_si128(B, K2);
+ B = _mm_aesenc_si128(B, K3);
+ B = _mm_aesenc_si128(B, K4);
+ B = _mm_aesenc_si128(B, K5);
+ B = _mm_aesenc_si128(B, K6);
+ B = _mm_aesenc_si128(B, K7);
+ B = _mm_aesenc_si128(B, K8);
+ B = _mm_aesenc_si128(B, K9);
+ B = _mm_aesenc_si128(B, K10);
+ B = _mm_aesenc_si128(B, K11);
+ B = _mm_aesenclast_si128(B, K12);
+
+ _mm_storeu_si128(out_mm + i, B);
+ }
+ }
+
+/*
+* AES-192 Decryption
+*/
+BOTAN_FUNC_ISA("ssse3,aes")
+void AES_192::aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_DK.empty() == false, "Key was set");
+
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_DK.data());
+
+ const __m128i K0 = _mm_loadu_si128(key_mm);
+ const __m128i K1 = _mm_loadu_si128(key_mm + 1);
+ const __m128i K2 = _mm_loadu_si128(key_mm + 2);
+ const __m128i K3 = _mm_loadu_si128(key_mm + 3);
+ const __m128i K4 = _mm_loadu_si128(key_mm + 4);
+ const __m128i K5 = _mm_loadu_si128(key_mm + 5);
+ const __m128i K6 = _mm_loadu_si128(key_mm + 6);
+ const __m128i K7 = _mm_loadu_si128(key_mm + 7);
+ const __m128i K8 = _mm_loadu_si128(key_mm + 8);
+ const __m128i K9 = _mm_loadu_si128(key_mm + 9);
+ const __m128i K10 = _mm_loadu_si128(key_mm + 10);
+ const __m128i K11 = _mm_loadu_si128(key_mm + 11);
+ const __m128i K12 = _mm_loadu_si128(key_mm + 12);
+
+ while(blocks >= 4)
+ {
+ __m128i B0 = _mm_loadu_si128(in_mm + 0);
+ __m128i B1 = _mm_loadu_si128(in_mm + 1);
+ __m128i B2 = _mm_loadu_si128(in_mm + 2);
+ __m128i B3 = _mm_loadu_si128(in_mm + 3);
+
+ B0 = _mm_xor_si128(B0, K0);
+ B1 = _mm_xor_si128(B1, K0);
+ B2 = _mm_xor_si128(B2, K0);
+ B3 = _mm_xor_si128(B3, K0);
+
+ AES_DEC_4_ROUNDS(K1);
+ AES_DEC_4_ROUNDS(K2);
+ AES_DEC_4_ROUNDS(K3);
+ AES_DEC_4_ROUNDS(K4);
+ AES_DEC_4_ROUNDS(K5);
+ AES_DEC_4_ROUNDS(K6);
+ AES_DEC_4_ROUNDS(K7);
+ AES_DEC_4_ROUNDS(K8);
+ AES_DEC_4_ROUNDS(K9);
+ AES_DEC_4_ROUNDS(K10);
+ AES_DEC_4_ROUNDS(K11);
+ AES_DEC_4_LAST_ROUNDS(K12);
+
+ _mm_storeu_si128(out_mm + 0, B0);
+ _mm_storeu_si128(out_mm + 1, B1);
+ _mm_storeu_si128(out_mm + 2, B2);
+ _mm_storeu_si128(out_mm + 3, B3);
+
+ blocks -= 4;
+ in_mm += 4;
+ out_mm += 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+
+ B = _mm_xor_si128(B, K0);
+
+ B = _mm_aesdec_si128(B, K1);
+ B = _mm_aesdec_si128(B, K2);
+ B = _mm_aesdec_si128(B, K3);
+ B = _mm_aesdec_si128(B, K4);
+ B = _mm_aesdec_si128(B, K5);
+ B = _mm_aesdec_si128(B, K6);
+ B = _mm_aesdec_si128(B, K7);
+ B = _mm_aesdec_si128(B, K8);
+ B = _mm_aesdec_si128(B, K9);
+ B = _mm_aesdec_si128(B, K10);
+ B = _mm_aesdec_si128(B, K11);
+ B = _mm_aesdeclast_si128(B, K12);
+
+ _mm_storeu_si128(out_mm + i, B);
+ }
+ }
+
+/*
+* AES-192 Key Schedule
+*/
+BOTAN_FUNC_ISA("ssse3,aes")
+void AES_192::aesni_key_schedule(const uint8_t key[], size_t)
+ {
+ m_EK.resize(52);
+ m_DK.resize(52);
+
+ __m128i K0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(key));
+ __m128i K1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(key + 8));
+ K1 = _mm_srli_si128(K1, 8);
+
+ load_le(m_EK.data(), key, 6);
+
+ #define AES_192_key_exp(RCON, EK_OFF) \
+ aes_192_key_expansion(&K0, &K1, \
+ _mm_aeskeygenassist_si128(K1, RCON), \
+ &m_EK[EK_OFF], EK_OFF == 48)
+
+ AES_192_key_exp(0x01, 6);
+ AES_192_key_exp(0x02, 12);
+ AES_192_key_exp(0x04, 18);
+ AES_192_key_exp(0x08, 24);
+ AES_192_key_exp(0x10, 30);
+ AES_192_key_exp(0x20, 36);
+ AES_192_key_exp(0x40, 42);
+ AES_192_key_exp(0x80, 48);
+
+ #undef AES_192_key_exp
+
+ // Now generate decryption keys
+ const __m128i* EK_mm = reinterpret_cast<const __m128i*>(m_EK.data());
+
+ __m128i* DK_mm = reinterpret_cast<__m128i*>(m_DK.data());
+ _mm_storeu_si128(DK_mm , _mm_loadu_si128(EK_mm + 12));
+ _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 11)));
+ _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 10)));
+ _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 9)));
+ _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 8)));
+ _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 7)));
+ _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 6)));
+ _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 5)));
+ _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 4)));
+ _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 3)));
+ _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 2)));
+ _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 1)));
+ _mm_storeu_si128(DK_mm + 12, _mm_loadu_si128(EK_mm + 0));
+ }
+
+/*
+* AES-256 Encryption
+*/
+BOTAN_FUNC_ISA("ssse3,aes")
+void AES_256::aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_EK.data());
+
+ const __m128i K0 = _mm_loadu_si128(key_mm);
+ const __m128i K1 = _mm_loadu_si128(key_mm + 1);
+ const __m128i K2 = _mm_loadu_si128(key_mm + 2);
+ const __m128i K3 = _mm_loadu_si128(key_mm + 3);
+ const __m128i K4 = _mm_loadu_si128(key_mm + 4);
+ const __m128i K5 = _mm_loadu_si128(key_mm + 5);
+ const __m128i K6 = _mm_loadu_si128(key_mm + 6);
+ const __m128i K7 = _mm_loadu_si128(key_mm + 7);
+ const __m128i K8 = _mm_loadu_si128(key_mm + 8);
+ const __m128i K9 = _mm_loadu_si128(key_mm + 9);
+ const __m128i K10 = _mm_loadu_si128(key_mm + 10);
+ const __m128i K11 = _mm_loadu_si128(key_mm + 11);
+ const __m128i K12 = _mm_loadu_si128(key_mm + 12);
+ const __m128i K13 = _mm_loadu_si128(key_mm + 13);
+ const __m128i K14 = _mm_loadu_si128(key_mm + 14);
+
+ while(blocks >= 4)
+ {
+ __m128i B0 = _mm_loadu_si128(in_mm + 0);
+ __m128i B1 = _mm_loadu_si128(in_mm + 1);
+ __m128i B2 = _mm_loadu_si128(in_mm + 2);
+ __m128i B3 = _mm_loadu_si128(in_mm + 3);
+
+ B0 = _mm_xor_si128(B0, K0);
+ B1 = _mm_xor_si128(B1, K0);
+ B2 = _mm_xor_si128(B2, K0);
+ B3 = _mm_xor_si128(B3, K0);
+
+ AES_ENC_4_ROUNDS(K1);
+ AES_ENC_4_ROUNDS(K2);
+ AES_ENC_4_ROUNDS(K3);
+ AES_ENC_4_ROUNDS(K4);
+ AES_ENC_4_ROUNDS(K5);
+ AES_ENC_4_ROUNDS(K6);
+ AES_ENC_4_ROUNDS(K7);
+ AES_ENC_4_ROUNDS(K8);
+ AES_ENC_4_ROUNDS(K9);
+ AES_ENC_4_ROUNDS(K10);
+ AES_ENC_4_ROUNDS(K11);
+ AES_ENC_4_ROUNDS(K12);
+ AES_ENC_4_ROUNDS(K13);
+ AES_ENC_4_LAST_ROUNDS(K14);
+
+ _mm_storeu_si128(out_mm + 0, B0);
+ _mm_storeu_si128(out_mm + 1, B1);
+ _mm_storeu_si128(out_mm + 2, B2);
+ _mm_storeu_si128(out_mm + 3, B3);
+
+ blocks -= 4;
+ in_mm += 4;
+ out_mm += 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+
+ B = _mm_xor_si128(B, K0);
+
+ B = _mm_aesenc_si128(B, K1);
+ B = _mm_aesenc_si128(B, K2);
+ B = _mm_aesenc_si128(B, K3);
+ B = _mm_aesenc_si128(B, K4);
+ B = _mm_aesenc_si128(B, K5);
+ B = _mm_aesenc_si128(B, K6);
+ B = _mm_aesenc_si128(B, K7);
+ B = _mm_aesenc_si128(B, K8);
+ B = _mm_aesenc_si128(B, K9);
+ B = _mm_aesenc_si128(B, K10);
+ B = _mm_aesenc_si128(B, K11);
+ B = _mm_aesenc_si128(B, K12);
+ B = _mm_aesenc_si128(B, K13);
+ B = _mm_aesenclast_si128(B, K14);
+
+ _mm_storeu_si128(out_mm + i, B);
+ }
+ }
+
+/*
+* AES-256 Decryption
+*/
+BOTAN_FUNC_ISA("ssse3,aes")
+void AES_256::aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_DK.empty() == false, "Key was set");
+
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_DK.data());
+
+ const __m128i K0 = _mm_loadu_si128(key_mm);
+ const __m128i K1 = _mm_loadu_si128(key_mm + 1);
+ const __m128i K2 = _mm_loadu_si128(key_mm + 2);
+ const __m128i K3 = _mm_loadu_si128(key_mm + 3);
+ const __m128i K4 = _mm_loadu_si128(key_mm + 4);
+ const __m128i K5 = _mm_loadu_si128(key_mm + 5);
+ const __m128i K6 = _mm_loadu_si128(key_mm + 6);
+ const __m128i K7 = _mm_loadu_si128(key_mm + 7);
+ const __m128i K8 = _mm_loadu_si128(key_mm + 8);
+ const __m128i K9 = _mm_loadu_si128(key_mm + 9);
+ const __m128i K10 = _mm_loadu_si128(key_mm + 10);
+ const __m128i K11 = _mm_loadu_si128(key_mm + 11);
+ const __m128i K12 = _mm_loadu_si128(key_mm + 12);
+ const __m128i K13 = _mm_loadu_si128(key_mm + 13);
+ const __m128i K14 = _mm_loadu_si128(key_mm + 14);
+
+ while(blocks >= 4)
+ {
+ __m128i B0 = _mm_loadu_si128(in_mm + 0);
+ __m128i B1 = _mm_loadu_si128(in_mm + 1);
+ __m128i B2 = _mm_loadu_si128(in_mm + 2);
+ __m128i B3 = _mm_loadu_si128(in_mm + 3);
+
+ B0 = _mm_xor_si128(B0, K0);
+ B1 = _mm_xor_si128(B1, K0);
+ B2 = _mm_xor_si128(B2, K0);
+ B3 = _mm_xor_si128(B3, K0);
+
+ AES_DEC_4_ROUNDS(K1);
+ AES_DEC_4_ROUNDS(K2);
+ AES_DEC_4_ROUNDS(K3);
+ AES_DEC_4_ROUNDS(K4);
+ AES_DEC_4_ROUNDS(K5);
+ AES_DEC_4_ROUNDS(K6);
+ AES_DEC_4_ROUNDS(K7);
+ AES_DEC_4_ROUNDS(K8);
+ AES_DEC_4_ROUNDS(K9);
+ AES_DEC_4_ROUNDS(K10);
+ AES_DEC_4_ROUNDS(K11);
+ AES_DEC_4_ROUNDS(K12);
+ AES_DEC_4_ROUNDS(K13);
+ AES_DEC_4_LAST_ROUNDS(K14);
+
+ _mm_storeu_si128(out_mm + 0, B0);
+ _mm_storeu_si128(out_mm + 1, B1);
+ _mm_storeu_si128(out_mm + 2, B2);
+ _mm_storeu_si128(out_mm + 3, B3);
+
+ blocks -= 4;
+ in_mm += 4;
+ out_mm += 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+
+ B = _mm_xor_si128(B, K0);
+
+ B = _mm_aesdec_si128(B, K1);
+ B = _mm_aesdec_si128(B, K2);
+ B = _mm_aesdec_si128(B, K3);
+ B = _mm_aesdec_si128(B, K4);
+ B = _mm_aesdec_si128(B, K5);
+ B = _mm_aesdec_si128(B, K6);
+ B = _mm_aesdec_si128(B, K7);
+ B = _mm_aesdec_si128(B, K8);
+ B = _mm_aesdec_si128(B, K9);
+ B = _mm_aesdec_si128(B, K10);
+ B = _mm_aesdec_si128(B, K11);
+ B = _mm_aesdec_si128(B, K12);
+ B = _mm_aesdec_si128(B, K13);
+ B = _mm_aesdeclast_si128(B, K14);
+
+ _mm_storeu_si128(out_mm + i, B);
+ }
+ }
+
+/*
+* AES-256 Key Schedule
+*/
+BOTAN_FUNC_ISA("ssse3,aes")
+void AES_256::aesni_key_schedule(const uint8_t key[], size_t)
+ {
+ m_EK.resize(60);
+ m_DK.resize(60);
+
+ const __m128i K0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(key));
+ const __m128i K1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(key + 16));
+
+ const __m128i K2 = aes_128_key_expansion(K0, _mm_aeskeygenassist_si128(K1, 0x01));
+ const __m128i K3 = aes_256_key_expansion(K1, K2);
+
+ const __m128i K4 = aes_128_key_expansion(K2, _mm_aeskeygenassist_si128(K3, 0x02));
+ const __m128i K5 = aes_256_key_expansion(K3, K4);
+
+ const __m128i K6 = aes_128_key_expansion(K4, _mm_aeskeygenassist_si128(K5, 0x04));
+ const __m128i K7 = aes_256_key_expansion(K5, K6);
+
+ const __m128i K8 = aes_128_key_expansion(K6, _mm_aeskeygenassist_si128(K7, 0x08));
+ const __m128i K9 = aes_256_key_expansion(K7, K8);
+
+ const __m128i K10 = aes_128_key_expansion(K8, _mm_aeskeygenassist_si128(K9, 0x10));
+ const __m128i K11 = aes_256_key_expansion(K9, K10);
+
+ const __m128i K12 = aes_128_key_expansion(K10, _mm_aeskeygenassist_si128(K11, 0x20));
+ const __m128i K13 = aes_256_key_expansion(K11, K12);
+
+ const __m128i K14 = aes_128_key_expansion(K12, _mm_aeskeygenassist_si128(K13, 0x40));
+
+ __m128i* EK_mm = reinterpret_cast<__m128i*>(m_EK.data());
+ _mm_storeu_si128(EK_mm , K0);
+ _mm_storeu_si128(EK_mm + 1, K1);
+ _mm_storeu_si128(EK_mm + 2, K2);
+ _mm_storeu_si128(EK_mm + 3, K3);
+ _mm_storeu_si128(EK_mm + 4, K4);
+ _mm_storeu_si128(EK_mm + 5, K5);
+ _mm_storeu_si128(EK_mm + 6, K6);
+ _mm_storeu_si128(EK_mm + 7, K7);
+ _mm_storeu_si128(EK_mm + 8, K8);
+ _mm_storeu_si128(EK_mm + 9, K9);
+ _mm_storeu_si128(EK_mm + 10, K10);
+ _mm_storeu_si128(EK_mm + 11, K11);
+ _mm_storeu_si128(EK_mm + 12, K12);
+ _mm_storeu_si128(EK_mm + 13, K13);
+ _mm_storeu_si128(EK_mm + 14, K14);
+
+ // Now generate decryption keys
+ __m128i* DK_mm = reinterpret_cast<__m128i*>(m_DK.data());
+ _mm_storeu_si128(DK_mm , K14);
+ _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K13));
+ _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K12));
+ _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K11));
+ _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K10));
+ _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K9));
+ _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K8));
+ _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K7));
+ _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K6));
+ _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K5));
+ _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(K4));
+ _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(K3));
+ _mm_storeu_si128(DK_mm + 12, _mm_aesimc_si128(K2));
+ _mm_storeu_si128(DK_mm + 13, _mm_aesimc_si128(K1));
+ _mm_storeu_si128(DK_mm + 14, K0);
+ }
+
+#undef AES_ENC_4_ROUNDS
+#undef AES_ENC_4_LAST_ROUNDS
+#undef AES_DEC_4_ROUNDS
+#undef AES_DEC_4_LAST_ROUNDS
+
+}
diff --git a/src/libs/3rdparty/botan/src/lib/block/aes/aes_ni/info.txt b/src/libs/3rdparty/botan/src/lib/block/aes/aes_ni/info.txt
new file mode 100644
index 0000000000..d5d3593489
--- /dev/null
+++ b/src/libs/3rdparty/botan/src/lib/block/aes/aes_ni/info.txt
@@ -0,0 +1,7 @@
+<defines>
+AES_NI -> 20131128
+</defines>
+
+load_on auto
+
+need_isa aesni
diff --git a/src/libs/3rdparty/botan/src/lib/block/aes/aes_power8/aes_power8.cpp b/src/libs/3rdparty/botan/src/lib/block/aes/aes_power8/aes_power8.cpp
new file mode 100644
index 0000000000..98520a13cf
--- /dev/null
+++ b/src/libs/3rdparty/botan/src/lib/block/aes/aes_power8/aes_power8.cpp
@@ -0,0 +1,328 @@
+/*
+* AES using POWER8 crypto extensions
+*
+* Contributed by Jeffrey Walton
+*
+* Further changes
+* (C) 2018 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/aes.h>
+#include <botan/cpuid.h>
+
+#include <altivec.h>
+#undef vector
+#undef bool
+
+namespace Botan {
+
+namespace {
+
+__vector unsigned long long LoadKey(const uint32_t* src)
+ {
+ __vector unsigned int vec = vec_vsx_ld(0, src);
+
+ if(CPUID::is_little_endian())
+ {
+ const __vector unsigned char mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
+ const __vector unsigned char zero = {0};
+ return (__vector unsigned long long)vec_perm((__vector unsigned char)vec, zero, mask);
+ }
+ else
+ {
+ return (__vector unsigned long long)vec;
+ }
+ }
+
+__vector unsigned char Reverse8x16(const __vector unsigned char src)
+ {
+ if(CPUID::is_little_endian())
+ {
+ const __vector unsigned char mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
+ const __vector unsigned char zero = {0};
+ return vec_perm(src, zero, mask);
+ }
+ else
+ {
+ return src;
+ }
+ }
+
+__vector unsigned long long LoadBlock(const uint8_t* src)
+ {
+ return (__vector unsigned long long)Reverse8x16(vec_vsx_ld(0, src));
+ }
+
+void StoreBlock(const __vector unsigned long long src, uint8_t* dest)
+ {
+ vec_vsx_st(Reverse8x16((__vector unsigned char)src), 0, dest);
+ }
+
+}
+
+BOTAN_FUNC_ISA("crypto")
+void AES_128::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const __vector unsigned long long K0 = LoadKey(&m_EK[0]);
+ const __vector unsigned long long K1 = LoadKey(&m_EK[4]);
+ const __vector unsigned long long K2 = LoadKey(&m_EK[8]);
+ const __vector unsigned long long K3 = LoadKey(&m_EK[12]);
+ const __vector unsigned long long K4 = LoadKey(&m_EK[16]);
+ const __vector unsigned long long K5 = LoadKey(&m_EK[20]);
+ const __vector unsigned long long K6 = LoadKey(&m_EK[24]);
+ const __vector unsigned long long K7 = LoadKey(&m_EK[28]);
+ const __vector unsigned long long K8 = LoadKey(&m_EK[32]);
+ const __vector unsigned long long K9 = LoadKey(&m_EK[36]);
+ const __vector unsigned long long K10 = LoadBlock(m_ME.data());
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __vector unsigned long long B = LoadBlock(in);
+
+ B = vec_xor(B, K0);
+ B = __builtin_crypto_vcipher(B, K1);
+ B = __builtin_crypto_vcipher(B, K2);
+ B = __builtin_crypto_vcipher(B, K3);
+ B = __builtin_crypto_vcipher(B, K4);
+ B = __builtin_crypto_vcipher(B, K5);
+ B = __builtin_crypto_vcipher(B, K6);
+ B = __builtin_crypto_vcipher(B, K7);
+ B = __builtin_crypto_vcipher(B, K8);
+ B = __builtin_crypto_vcipher(B, K9);
+ B = __builtin_crypto_vcipherlast(B, K10);
+
+ StoreBlock(B, out);
+
+ out += 16;
+ in += 16;
+ }
+ }
+
+BOTAN_FUNC_ISA("crypto")
+void AES_128::power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const __vector unsigned long long K0 = LoadBlock(m_ME.data());
+ const __vector unsigned long long K1 = LoadKey(&m_EK[36]);
+ const __vector unsigned long long K2 = LoadKey(&m_EK[32]);
+ const __vector unsigned long long K3 = LoadKey(&m_EK[28]);
+ const __vector unsigned long long K4 = LoadKey(&m_EK[24]);
+ const __vector unsigned long long K5 = LoadKey(&m_EK[20]);
+ const __vector unsigned long long K6 = LoadKey(&m_EK[16]);
+ const __vector unsigned long long K7 = LoadKey(&m_EK[12]);
+ const __vector unsigned long long K8 = LoadKey(&m_EK[8]);
+ const __vector unsigned long long K9 = LoadKey(&m_EK[4]);
+ const __vector unsigned long long K10 = LoadKey(&m_EK[0]);
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __vector unsigned long long B = LoadBlock(in);
+
+ B = vec_xor(B, K0);
+ B = __builtin_crypto_vncipher(B, K1);
+ B = __builtin_crypto_vncipher(B, K2);
+ B = __builtin_crypto_vncipher(B, K3);
+ B = __builtin_crypto_vncipher(B, K4);
+ B = __builtin_crypto_vncipher(B, K5);
+ B = __builtin_crypto_vncipher(B, K6);
+ B = __builtin_crypto_vncipher(B, K7);
+ B = __builtin_crypto_vncipher(B, K8);
+ B = __builtin_crypto_vncipher(B, K9);
+ B = __builtin_crypto_vncipherlast(B, K10);
+
+ StoreBlock(B, out);
+
+ out += 16;
+ in += 16;
+ }
+ }
+
+BOTAN_FUNC_ISA("crypto")
+void AES_192::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const __vector unsigned long long K0 = LoadKey(&m_EK[0]);
+ const __vector unsigned long long K1 = LoadKey(&m_EK[4]);
+ const __vector unsigned long long K2 = LoadKey(&m_EK[8]);
+ const __vector unsigned long long K3 = LoadKey(&m_EK[12]);
+ const __vector unsigned long long K4 = LoadKey(&m_EK[16]);
+ const __vector unsigned long long K5 = LoadKey(&m_EK[20]);
+ const __vector unsigned long long K6 = LoadKey(&m_EK[24]);
+ const __vector unsigned long long K7 = LoadKey(&m_EK[28]);
+ const __vector unsigned long long K8 = LoadKey(&m_EK[32]);
+ const __vector unsigned long long K9 = LoadKey(&m_EK[36]);
+ const __vector unsigned long long K10 = LoadKey(&m_EK[40]);
+ const __vector unsigned long long K11 = LoadKey(&m_EK[44]);
+ const __vector unsigned long long K12 = LoadBlock(m_ME.data());
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __vector unsigned long long B = LoadBlock(in);
+
+ B = vec_xor(B, K0);
+ B = __builtin_crypto_vcipher(B, K1);
+ B = __builtin_crypto_vcipher(B, K2);
+ B = __builtin_crypto_vcipher(B, K3);
+ B = __builtin_crypto_vcipher(B, K4);
+ B = __builtin_crypto_vcipher(B, K5);
+ B = __builtin_crypto_vcipher(B, K6);
+ B = __builtin_crypto_vcipher(B, K7);
+ B = __builtin_crypto_vcipher(B, K8);
+ B = __builtin_crypto_vcipher(B, K9);
+ B = __builtin_crypto_vcipher(B, K10);
+ B = __builtin_crypto_vcipher(B, K11);
+ B = __builtin_crypto_vcipherlast(B, K12);
+
+ StoreBlock(B, out);
+
+ out += 16;
+ in += 16;
+ }
+ }
+
+BOTAN_FUNC_ISA("crypto")
+void AES_192::power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const __vector unsigned long long K0 = LoadBlock(m_ME.data());
+ const __vector unsigned long long K1 = LoadKey(&m_EK[44]);
+ const __vector unsigned long long K2 = LoadKey(&m_EK[40]);
+ const __vector unsigned long long K3 = LoadKey(&m_EK[36]);
+ const __vector unsigned long long K4 = LoadKey(&m_EK[32]);
+ const __vector unsigned long long K5 = LoadKey(&m_EK[28]);
+ const __vector unsigned long long K6 = LoadKey(&m_EK[24]);
+ const __vector unsigned long long K7 = LoadKey(&m_EK[20]);
+ const __vector unsigned long long K8 = LoadKey(&m_EK[16]);
+ const __vector unsigned long long K9 = LoadKey(&m_EK[12]);
+ const __vector unsigned long long K10 = LoadKey(&m_EK[8]);
+ const __vector unsigned long long K11 = LoadKey(&m_EK[4]);
+ const __vector unsigned long long K12 = LoadKey(&m_EK[0]);
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __vector unsigned long long B = LoadBlock(in);
+
+ B = vec_xor(B, K0);
+ B = __builtin_crypto_vncipher(B, K1);
+ B = __builtin_crypto_vncipher(B, K2);
+ B = __builtin_crypto_vncipher(B, K3);
+ B = __builtin_crypto_vncipher(B, K4);
+ B = __builtin_crypto_vncipher(B, K5);
+ B = __builtin_crypto_vncipher(B, K6);
+ B = __builtin_crypto_vncipher(B, K7);
+ B = __builtin_crypto_vncipher(B, K8);
+ B = __builtin_crypto_vncipher(B, K9);
+ B = __builtin_crypto_vncipher(B, K10);
+ B = __builtin_crypto_vncipher(B, K11);
+ B = __builtin_crypto_vncipherlast(B, K12);
+
+ StoreBlock(B, out);
+
+ out += 16;
+ in += 16;
+ }
+ }
+
+BOTAN_FUNC_ISA("crypto")
+void AES_256::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+ const __vector unsigned long long K0 = LoadKey(&m_EK[0]);
+ const __vector unsigned long long K1 = LoadKey(&m_EK[4]);
+ const __vector unsigned long long K2 = LoadKey(&m_EK[8]);
+ const __vector unsigned long long K3 = LoadKey(&m_EK[12]);
+ const __vector unsigned long long K4 = LoadKey(&m_EK[16]);
+ const __vector unsigned long long K5 = LoadKey(&m_EK[20]);
+ const __vector unsigned long long K6 = LoadKey(&m_EK[24]);
+ const __vector unsigned long long K7 = LoadKey(&m_EK[28]);
+ const __vector unsigned long long K8 = LoadKey(&m_EK[32]);
+ const __vector unsigned long long K9 = LoadKey(&m_EK[36]);
+ const __vector unsigned long long K10 = LoadKey(&m_EK[40]);
+ const __vector unsigned long long K11 = LoadKey(&m_EK[44]);
+ const __vector unsigned long long K12 = LoadKey(&m_EK[48]);
+ const __vector unsigned long long K13 = LoadKey(&m_EK[52]);
+ const __vector unsigned long long K14 = LoadBlock(m_ME.data());
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __vector unsigned long long B = LoadBlock(in);
+
+ B = vec_xor(B, K0);
+ B = __builtin_crypto_vcipher(B, K1);
+ B = __builtin_crypto_vcipher(B, K2);
+ B = __builtin_crypto_vcipher(B, K3);
+ B = __builtin_crypto_vcipher(B, K4);
+ B = __builtin_crypto_vcipher(B, K5);
+ B = __builtin_crypto_vcipher(B, K6);
+ B = __builtin_crypto_vcipher(B, K7);
+ B = __builtin_crypto_vcipher(B, K8);
+ B = __builtin_crypto_vcipher(B, K9);
+ B = __builtin_crypto_vcipher(B, K10);
+ B = __builtin_crypto_vcipher(B, K11);
+ B = __builtin_crypto_vcipher(B, K12);
+ B = __builtin_crypto_vcipher(B, K13);
+ B = __builtin_crypto_vcipherlast(B, K14);
+
+ StoreBlock(B, out);
+
+ out += 16;
+ in += 16;
+ }
+ }
+
+BOTAN_FUNC_ISA("crypto")
+void AES_256::power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const __vector unsigned long long K0 = LoadBlock(m_ME.data());
+ const __vector unsigned long long K1 = LoadKey(&m_EK[52]);
+ const __vector unsigned long long K2 = LoadKey(&m_EK[48]);
+ const __vector unsigned long long K3 = LoadKey(&m_EK[44]);
+ const __vector unsigned long long K4 = LoadKey(&m_EK[40]);
+ const __vector unsigned long long K5 = LoadKey(&m_EK[36]);
+ const __vector unsigned long long K6 = LoadKey(&m_EK[32]);
+ const __vector unsigned long long K7 = LoadKey(&m_EK[28]);
+ const __vector unsigned long long K8 = LoadKey(&m_EK[24]);
+ const __vector unsigned long long K9 = LoadKey(&m_EK[20]);
+ const __vector unsigned long long K10 = LoadKey(&m_EK[16]);
+ const __vector unsigned long long K11 = LoadKey(&m_EK[12]);
+ const __vector unsigned long long K12 = LoadKey(&m_EK[8]);
+ const __vector unsigned long long K13 = LoadKey(&m_EK[4]);
+ const __vector unsigned long long K14 = LoadKey(&m_EK[0]);
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __vector unsigned long long B = LoadBlock(in);
+
+ B = vec_xor(B, K0);
+ B = __builtin_crypto_vncipher(B, K1);
+ B = __builtin_crypto_vncipher(B, K2);
+ B = __builtin_crypto_vncipher(B, K3);
+ B = __builtin_crypto_vncipher(B, K4);
+ B = __builtin_crypto_vncipher(B, K5);
+ B = __builtin_crypto_vncipher(B, K6);
+ B = __builtin_crypto_vncipher(B, K7);
+ B = __builtin_crypto_vncipher(B, K8);
+ B = __builtin_crypto_vncipher(B, K9);
+ B = __builtin_crypto_vncipher(B, K10);
+ B = __builtin_crypto_vncipher(B, K11);
+ B = __builtin_crypto_vncipher(B, K12);
+ B = __builtin_crypto_vncipher(B, K13);
+ B = __builtin_crypto_vncipherlast(B, K14);
+
+ StoreBlock(B, out);
+
+ out += 16;
+ in += 16;
+ }
+ }
+
+}
diff --git a/src/libs/3rdparty/botan/src/lib/block/aes/aes_power8/info.txt b/src/libs/3rdparty/botan/src/lib/block/aes/aes_power8/info.txt
new file mode 100644
index 0000000000..6aa52d25a0
--- /dev/null
+++ b/src/libs/3rdparty/botan/src/lib/block/aes/aes_power8/info.txt
@@ -0,0 +1,9 @@
+<defines>
+AES_POWER8 -> 20180223
+</defines>
+
+<arch>
+ppc64
+</arch>
+
+need_isa ppccrypto
diff --git a/src/libs/3rdparty/botan/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp b/src/libs/3rdparty/botan/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp
new file mode 100644
index 0000000000..47d70d0b8b
--- /dev/null
+++ b/src/libs/3rdparty/botan/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp
@@ -0,0 +1,637 @@
+/*
+* AES using SSSE3
+* (C) 2010,2016 Jack Lloyd
+*
+* This is more or less a direct translation of public domain x86-64
+* assembly written by Mike Hamburg, described in "Accelerating AES
+* with Vector Permute Instructions" (CHES 2009). His original code is
+* available at https://crypto.stanford.edu/vpaes/
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/aes.h>
+#include <botan/internal/ct_utils.h>
+#include <tmmintrin.h>
+
+namespace Botan {
+
+namespace {
+
+const __m128i low_nibs = _mm_set1_epi8(0x0F);
+
+const __m128i k_ipt1 = _mm_set_epi32(
+ 0xCABAE090, 0x52227808, 0xC2B2E898, 0x5A2A7000);
+const __m128i k_ipt2 = _mm_set_epi32(
+ 0xCD80B1FC, 0xB0FDCC81, 0x4C01307D, 0x317C4D00);
+
+const __m128i k_inv1 = _mm_set_epi32(
+ 0x04070309, 0x0A0B0C02, 0x0E05060F, 0x0D080180);
+const __m128i k_inv2 = _mm_set_epi32(
+ 0x030D0E0C, 0x02050809, 0x01040A06, 0x0F0B0780);
+
+const __m128i sb1u = _mm_set_epi32(
+ 0xA5DF7A6E, 0x142AF544, 0xB19BE18F, 0xCB503E00);
+const __m128i sb1t = _mm_set_epi32(
+ 0x3BF7CCC1, 0x0D2ED9EF, 0x3618D415, 0xFAE22300);
+
+const __m128i mc_forward[4] = {
+ _mm_set_epi32(0x0C0F0E0D, 0x080B0A09, 0x04070605, 0x00030201),
+ _mm_set_epi32(0x00030201, 0x0C0F0E0D, 0x080B0A09, 0x04070605),
+ _mm_set_epi32(0x04070605, 0x00030201, 0x0C0F0E0D, 0x080B0A09),
+ _mm_set_epi32(0x080B0A09, 0x04070605, 0x00030201, 0x0C0F0E0D)
+};
+
+const __m128i sr[4] = {
+ _mm_set_epi32(0x0F0E0D0C, 0x0B0A0908, 0x07060504, 0x03020100),
+ _mm_set_epi32(0x0B06010C, 0x07020D08, 0x030E0904, 0x0F0A0500),
+ _mm_set_epi32(0x070E050C, 0x030A0108, 0x0F060D04, 0x0B020900),
+ _mm_set_epi32(0x0306090C, 0x0F020508, 0x0B0E0104, 0x070A0D00),
+};
+
+#define mm_xor3(x, y, z) _mm_xor_si128(x, _mm_xor_si128(y, z))
+
+BOTAN_FUNC_ISA("ssse3")
+__m128i aes_schedule_transform(__m128i input,
+ __m128i table_1,
+ __m128i table_2)
+ {
+ __m128i i_1 = _mm_and_si128(low_nibs, input);
+ __m128i i_2 = _mm_srli_epi32(_mm_andnot_si128(low_nibs, input), 4);
+
+ return _mm_xor_si128(
+ _mm_shuffle_epi8(table_1, i_1),
+ _mm_shuffle_epi8(table_2, i_2));
+ }
+
+BOTAN_FUNC_ISA("ssse3")
+__m128i aes_schedule_mangle(__m128i k, uint8_t round_no)
+ {
+ __m128i t = _mm_shuffle_epi8(_mm_xor_si128(k, _mm_set1_epi8(0x5B)),
+ mc_forward[0]);
+
+ __m128i t2 = t;
+
+ t = _mm_shuffle_epi8(t, mc_forward[0]);
+
+ t2 = mm_xor3(t2, t, _mm_shuffle_epi8(t, mc_forward[0]));
+
+ return _mm_shuffle_epi8(t2, sr[round_no % 4]);
+ }
+
+BOTAN_FUNC_ISA("ssse3")
+__m128i aes_schedule_192_smear(__m128i x, __m128i y)
+ {
+ return mm_xor3(y,
+ _mm_shuffle_epi32(x, 0xFE),
+ _mm_shuffle_epi32(y, 0x80));
+ }
+
+BOTAN_FUNC_ISA("ssse3")
+__m128i aes_schedule_mangle_dec(__m128i k, uint8_t round_no)
+ {
+ const __m128i dsk[8] = {
+ _mm_set_epi32(0x4AED9334, 0x82255BFC, 0xB6116FC8, 0x7ED9A700),
+ _mm_set_epi32(0x8BB89FAC, 0xE9DAFDCE, 0x45765162, 0x27143300),
+ _mm_set_epi32(0x4622EE8A, 0xADC90561, 0x27438FEB, 0xCCA86400),
+ _mm_set_epi32(0x73AEE13C, 0xBD602FF2, 0x815C13CE, 0x4F92DD00),
+ _mm_set_epi32(0xF83F3EF9, 0xFA3D3CFB, 0x03C4C502, 0x01C6C700),
+ _mm_set_epi32(0xA5526A9D, 0x7384BC4B, 0xEE1921D6, 0x38CFF700),
+ _mm_set_epi32(0xA080D3F3, 0x10306343, 0xE3C390B0, 0x53732000),
+ _mm_set_epi32(0x2F45AEC4, 0x8CE60D67, 0xA0CA214B, 0x036982E8)
+ };
+
+ __m128i t = aes_schedule_transform(k, dsk[0], dsk[1]);
+ __m128i output = _mm_shuffle_epi8(t, mc_forward[0]);
+
+ t = aes_schedule_transform(t, dsk[2], dsk[3]);
+ output = _mm_shuffle_epi8(_mm_xor_si128(t, output), mc_forward[0]);
+
+ t = aes_schedule_transform(t, dsk[4], dsk[5]);
+ output = _mm_shuffle_epi8(_mm_xor_si128(t, output), mc_forward[0]);
+
+ t = aes_schedule_transform(t, dsk[6], dsk[7]);
+ output = _mm_shuffle_epi8(_mm_xor_si128(t, output), mc_forward[0]);
+
+ return _mm_shuffle_epi8(output, sr[round_no % 4]);
+ }
+
+BOTAN_FUNC_ISA("ssse3")
+__m128i aes_schedule_mangle_last(__m128i k, uint8_t round_no)
+ {
+ const __m128i out_tr1 = _mm_set_epi32(
+ 0xF7974121, 0xDEBE6808, 0xFF9F4929, 0xD6B66000);
+ const __m128i out_tr2 = _mm_set_epi32(
+ 0xE10D5DB1, 0xB05C0CE0, 0x01EDBD51, 0x50BCEC00);
+
+ k = _mm_shuffle_epi8(k, sr[round_no % 4]);
+ k = _mm_xor_si128(k, _mm_set1_epi8(0x5B));
+ return aes_schedule_transform(k, out_tr1, out_tr2);
+ }
+
+BOTAN_FUNC_ISA("ssse3")
+__m128i aes_schedule_mangle_last_dec(__m128i k)
+ {
+ const __m128i deskew1 = _mm_set_epi32(
+ 0x1DFEB95A, 0x5DBEF91A, 0x07E4A340, 0x47A4E300);
+ const __m128i deskew2 = _mm_set_epi32(
+ 0x2841C2AB, 0xF49D1E77, 0x5F36B5DC, 0x83EA6900);
+
+ k = _mm_xor_si128(k, _mm_set1_epi8(0x5B));
+ return aes_schedule_transform(k, deskew1, deskew2);
+ }
+
+BOTAN_FUNC_ISA("ssse3")
+__m128i aes_schedule_round(__m128i* rcon, __m128i input1, __m128i input2)
+ {
+ if(rcon)
+ {
+ input2 = _mm_xor_si128(_mm_alignr_epi8(_mm_setzero_si128(), *rcon, 15),
+ input2);
+
+ *rcon = _mm_alignr_epi8(*rcon, *rcon, 15); // next rcon
+
+ input1 = _mm_shuffle_epi32(input1, 0xFF); // rotate
+ input1 = _mm_alignr_epi8(input1, input1, 1);
+ }
+
+ __m128i smeared = _mm_xor_si128(input2, _mm_slli_si128(input2, 4));
+ smeared = mm_xor3(smeared, _mm_slli_si128(smeared, 8), _mm_set1_epi8(0x5B));
+
+ __m128i t = _mm_srli_epi32(_mm_andnot_si128(low_nibs, input1), 4);
+
+ input1 = _mm_and_si128(low_nibs, input1);
+
+ __m128i t2 = _mm_shuffle_epi8(k_inv2, input1);
+
+ input1 = _mm_xor_si128(input1, t);
+
+ __m128i t3 = _mm_xor_si128(t2, _mm_shuffle_epi8(k_inv1, t));
+ __m128i t4 = _mm_xor_si128(t2, _mm_shuffle_epi8(k_inv1, input1));
+
+ __m128i t5 = _mm_xor_si128(input1, _mm_shuffle_epi8(k_inv1, t3));
+ __m128i t6 = _mm_xor_si128(t, _mm_shuffle_epi8(k_inv1, t4));
+
+ return mm_xor3(_mm_shuffle_epi8(sb1u, t5),
+ _mm_shuffle_epi8(sb1t, t6),
+ smeared);
+ }
+
+BOTAN_FUNC_ISA("ssse3")
+__m128i aes_ssse3_encrypt(__m128i B, const __m128i* keys, size_t rounds)
+ {
+ const __m128i sb2u = _mm_set_epi32(
+ 0x5EB7E955, 0xBC982FCD, 0xE27A93C6, 0x0B712400);
+ const __m128i sb2t = _mm_set_epi32(
+ 0xC2A163C8, 0xAB82234A, 0x69EB8840, 0x0AE12900);
+
+ const __m128i sbou = _mm_set_epi32(
+ 0x15AABF7A, 0xC502A878, 0xD0D26D17, 0x6FBDC700);
+ const __m128i sbot = _mm_set_epi32(
+ 0x8E1E90D1, 0x412B35FA, 0xCFE474A5, 0x5FBB6A00);
+
+ const __m128i mc_backward[4] = {
+ _mm_set_epi32(0x0E0D0C0F, 0x0A09080B, 0x06050407, 0x02010003),
+ _mm_set_epi32(0x0A09080B, 0x06050407, 0x02010003, 0x0E0D0C0F),
+ _mm_set_epi32(0x06050407, 0x02010003, 0x0E0D0C0F, 0x0A09080B),
+ _mm_set_epi32(0x02010003, 0x0E0D0C0F, 0x0A09080B, 0x06050407),
+ };
+
+ B = mm_xor3(_mm_shuffle_epi8(k_ipt1, _mm_and_si128(low_nibs, B)),
+ _mm_shuffle_epi8(k_ipt2,
+ _mm_srli_epi32(
+ _mm_andnot_si128(low_nibs, B),
+ 4)),
+ _mm_loadu_si128(keys));
+
+ for(size_t r = 1; ; ++r)
+ {
+ const __m128i K = _mm_loadu_si128(keys + r);
+
+ __m128i t = _mm_srli_epi32(_mm_andnot_si128(low_nibs, B), 4);
+
+ B = _mm_and_si128(low_nibs, B);
+
+ __m128i t2 = _mm_shuffle_epi8(k_inv2, B);
+
+ B = _mm_xor_si128(B, t);
+
+ __m128i t3 = _mm_xor_si128(t2, _mm_shuffle_epi8(k_inv1, t));
+ __m128i t4 = _mm_xor_si128(t2, _mm_shuffle_epi8(k_inv1, B));
+
+ __m128i t5 = _mm_xor_si128(B, _mm_shuffle_epi8(k_inv1, t3));
+ __m128i t6 = _mm_xor_si128(t, _mm_shuffle_epi8(k_inv1, t4));
+
+ if(r == rounds)
+ {
+ B = _mm_shuffle_epi8(
+ mm_xor3(_mm_shuffle_epi8(sbou, t5),
+ _mm_shuffle_epi8(sbot, t6),
+ K),
+ sr[r % 4]);
+
+ return B;
+ }
+
+ __m128i t7 = mm_xor3(_mm_shuffle_epi8(sb1t, t6),
+ _mm_shuffle_epi8(sb1u, t5),
+ K);
+
+ __m128i t8 = mm_xor3(_mm_shuffle_epi8(sb2t, t6),
+ _mm_shuffle_epi8(sb2u, t5),
+ _mm_shuffle_epi8(t7, mc_forward[r % 4]));
+
+ B = mm_xor3(_mm_shuffle_epi8(t8, mc_forward[r % 4]),
+ _mm_shuffle_epi8(t7, mc_backward[r % 4]),
+ t8);
+ }
+ }
+
+BOTAN_FUNC_ISA("ssse3")
+__m128i aes_ssse3_decrypt(__m128i B, const __m128i* keys, size_t rounds)
+ {
+ const __m128i k_dipt1 = _mm_set_epi32(
+ 0x154A411E, 0x114E451A, 0x0F505B04, 0x0B545F00);
+ const __m128i k_dipt2 = _mm_set_epi32(
+ 0x12771772, 0xF491F194, 0x86E383E6, 0x60056500);
+
+ const __m128i sb9u = _mm_set_epi32(
+ 0xCAD51F50, 0x4F994CC9, 0x851C0353, 0x9A86D600);
+ const __m128i sb9t = _mm_set_epi32(
+ 0x725E2C9E, 0xB2FBA565, 0xC03B1789, 0xECD74900);
+
+ const __m128i sbeu = _mm_set_epi32(
+ 0x22426004, 0x64B4F6B0, 0x46F29296, 0x26D4D000);
+ const __m128i sbet = _mm_set_epi32(
+ 0x9467F36B, 0x98593E32, 0x0C55A6CD, 0xFFAAC100);
+
+ const __m128i sbdu = _mm_set_epi32(
+ 0xF56E9B13, 0x882A4439, 0x7D57CCDF, 0xE6B1A200);
+ const __m128i sbdt = _mm_set_epi32(
+ 0x2931180D, 0x15DEEFD3, 0x3CE2FAF7, 0x24C6CB00);
+
+ const __m128i sbbu = _mm_set_epi32(
+ 0x602646F6, 0xB0F2D404, 0xD0226492, 0x96B44200);
+ const __m128i sbbt = _mm_set_epi32(
+ 0xF3FF0C3E, 0x3255AA6B, 0xC19498A6, 0xCD596700);
+
+ __m128i mc = mc_forward[3];
+
+ __m128i t =
+ _mm_shuffle_epi8(k_dipt2,
+ _mm_srli_epi32(
+ _mm_andnot_si128(low_nibs, B),
+ 4));
+
+ B = mm_xor3(t, _mm_loadu_si128(keys),
+ _mm_shuffle_epi8(k_dipt1, _mm_and_si128(B, low_nibs)));
+
+ for(size_t r = 1; ; ++r)
+ {
+ const __m128i K = _mm_loadu_si128(keys + r);
+
+ t = _mm_srli_epi32(_mm_andnot_si128(low_nibs, B), 4);
+
+ B = _mm_and_si128(low_nibs, B);
+
+ __m128i t2 = _mm_shuffle_epi8(k_inv2, B);
+
+ B = _mm_xor_si128(B, t);
+
+ __m128i t3 = _mm_xor_si128(t2, _mm_shuffle_epi8(k_inv1, t));
+ __m128i t4 = _mm_xor_si128(t2, _mm_shuffle_epi8(k_inv1, B));
+ __m128i t5 = _mm_xor_si128(B, _mm_shuffle_epi8(k_inv1, t3));
+ __m128i t6 = _mm_xor_si128(t, _mm_shuffle_epi8(k_inv1, t4));
+
+ if(r == rounds)
+ {
+ const __m128i sbou = _mm_set_epi32(
+ 0xC7AA6DB9, 0xD4943E2D, 0x1387EA53, 0x7EF94000);
+ const __m128i sbot = _mm_set_epi32(
+ 0xCA4B8159, 0xD8C58E9C, 0x12D7560F, 0x93441D00);
+
+ __m128i x = _mm_shuffle_epi8(sbou, t5);
+ __m128i y = _mm_shuffle_epi8(sbot, t6);
+ x = _mm_xor_si128(x, K);
+ x = _mm_xor_si128(x, y);
+
+ const uint32_t which_sr = ((((rounds - 1) << 4) ^ 48) & 48) / 16;
+ return _mm_shuffle_epi8(x, sr[which_sr]);
+ }
+
+ __m128i t8 = _mm_xor_si128(_mm_shuffle_epi8(sb9t, t6),
+ _mm_xor_si128(_mm_shuffle_epi8(sb9u, t5), K));
+
+ __m128i t9 = mm_xor3(_mm_shuffle_epi8(t8, mc),
+ _mm_shuffle_epi8(sbdu, t5),
+ _mm_shuffle_epi8(sbdt, t6));
+
+ __m128i t12 = _mm_xor_si128(
+ _mm_xor_si128(
+ _mm_shuffle_epi8(t9, mc),
+ _mm_shuffle_epi8(sbbu, t5)),
+ _mm_shuffle_epi8(sbbt, t6));
+
+ B = _mm_xor_si128(_mm_xor_si128(_mm_shuffle_epi8(t12, mc),
+ _mm_shuffle_epi8(sbeu, t5)),
+ _mm_shuffle_epi8(sbet, t6));
+
+ mc = _mm_alignr_epi8(mc, mc, 12);
+ }
+ }
+
+}
+
+/*
+* AES-128 Encryption
+*/
+BOTAN_FUNC_ISA("ssse3")
+void AES_128::ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* keys = reinterpret_cast<const __m128i*>(m_EK.data());
+
+ CT::poison(in, blocks * block_size());
+
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+ _mm_storeu_si128(out_mm + i, aes_ssse3_encrypt(B, keys, 10));
+ }
+
+ CT::unpoison(in, blocks * block_size());
+ CT::unpoison(out, blocks * block_size());
+ }
+
+/*
+* AES-128 Decryption
+*/
+BOTAN_FUNC_ISA("ssse3")
+void AES_128::ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* keys = reinterpret_cast<const __m128i*>(m_DK.data());
+
+ CT::poison(in, blocks * block_size());
+
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+ _mm_storeu_si128(out_mm + i, aes_ssse3_decrypt(B, keys, 10));
+ }
+
+ CT::unpoison(in, blocks * block_size());
+ CT::unpoison(out, blocks * block_size());
+ }
+
+/*
+* AES-128 Key Schedule
+*/
+BOTAN_FUNC_ISA("ssse3")
+void AES_128::ssse3_key_schedule(const uint8_t keyb[], size_t)
+ {
+ __m128i rcon = _mm_set_epi32(0x702A9808, 0x4D7C7D81,
+ 0x1F8391B9, 0xAF9DEEB6);
+
+ __m128i key = _mm_loadu_si128(reinterpret_cast<const __m128i*>(keyb));
+
+ m_EK.resize(11*4);
+ m_DK.resize(11*4);
+
+ __m128i* EK_mm = reinterpret_cast<__m128i*>(m_EK.data());
+ __m128i* DK_mm = reinterpret_cast<__m128i*>(m_DK.data());
+
+ _mm_storeu_si128(DK_mm + 10, _mm_shuffle_epi8(key, sr[2]));
+
+ key = aes_schedule_transform(key, k_ipt1, k_ipt2);
+
+ _mm_storeu_si128(EK_mm, key);
+
+ for(size_t i = 1; i != 10; ++i)
+ {
+ key = aes_schedule_round(&rcon, key, key);
+
+ _mm_storeu_si128(EK_mm + i,
+ aes_schedule_mangle(key, (12-i) % 4));
+
+ _mm_storeu_si128(DK_mm + (10-i),
+ aes_schedule_mangle_dec(key, (10-i) % 4));
+ }
+
+ key = aes_schedule_round(&rcon, key, key);
+ _mm_storeu_si128(EK_mm + 10, aes_schedule_mangle_last(key, 2));
+ _mm_storeu_si128(DK_mm, aes_schedule_mangle_last_dec(key));
+ }
+
+/*
+* AES-192 Encryption
+*/
+BOTAN_FUNC_ISA("ssse3")
+void AES_192::ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* keys = reinterpret_cast<const __m128i*>(m_EK.data());
+
+ CT::poison(in, blocks * block_size());
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+ _mm_storeu_si128(out_mm + i, aes_ssse3_encrypt(B, keys, 12));
+ }
+
+ CT::unpoison(in, blocks * block_size());
+ CT::unpoison(out, blocks * block_size());
+ }
+
+/*
+* AES-192 Decryption
+*/
+BOTAN_FUNC_ISA("ssse3")
+void AES_192::ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* keys = reinterpret_cast<const __m128i*>(m_DK.data());
+
+ CT::poison(in, blocks * block_size());
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+ _mm_storeu_si128(out_mm + i, aes_ssse3_decrypt(B, keys, 12));
+ }
+
+ CT::unpoison(in, blocks * block_size());
+ CT::unpoison(out, blocks * block_size());
+ }
+
+/*
+* AES-192 Key Schedule
+*/
+BOTAN_FUNC_ISA("ssse3")
+void AES_192::ssse3_key_schedule(const uint8_t keyb[], size_t)
+ {
+ __m128i rcon = _mm_set_epi32(0x702A9808, 0x4D7C7D81,
+ 0x1F8391B9, 0xAF9DEEB6);
+
+ m_EK.resize(13*4);
+ m_DK.resize(13*4);
+
+ __m128i* EK_mm = reinterpret_cast<__m128i*>(m_EK.data());
+ __m128i* DK_mm = reinterpret_cast<__m128i*>(m_DK.data());
+
+ __m128i key1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(keyb));
+ __m128i key2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>((keyb + 8)));
+
+ _mm_storeu_si128(DK_mm + 12, _mm_shuffle_epi8(key1, sr[0]));
+
+ key1 = aes_schedule_transform(key1, k_ipt1, k_ipt2);
+ key2 = aes_schedule_transform(key2, k_ipt1, k_ipt2);
+
+ _mm_storeu_si128(EK_mm + 0, key1);
+
+ // key2 with 8 high bytes masked off
+ __m128i t = _mm_slli_si128(_mm_srli_si128(key2, 8), 8);
+
+ for(size_t i = 0; i != 4; ++i)
+ {
+ key2 = aes_schedule_round(&rcon, key2, key1);
+
+ _mm_storeu_si128(EK_mm + 3*i+1,
+ aes_schedule_mangle(_mm_alignr_epi8(key2, t, 8), (i+3)%4));
+ _mm_storeu_si128(DK_mm + 11-3*i,
+ aes_schedule_mangle_dec(_mm_alignr_epi8(key2, t, 8), (i+3)%4));
+
+ t = aes_schedule_192_smear(key2, t);
+
+ _mm_storeu_si128(EK_mm + 3*i+2,
+ aes_schedule_mangle(t, (i+2)%4));
+ _mm_storeu_si128(DK_mm + 10-3*i,
+ aes_schedule_mangle_dec(t, (i+2)%4));
+
+ key2 = aes_schedule_round(&rcon, t, key2);
+
+ if(i == 3)
+ {
+ _mm_storeu_si128(EK_mm + 3*i+3,
+ aes_schedule_mangle_last(key2, (i+1)%4));
+ _mm_storeu_si128(DK_mm + 9-3*i,
+ aes_schedule_mangle_last_dec(key2));
+ }
+ else
+ {
+ _mm_storeu_si128(EK_mm + 3*i+3,
+ aes_schedule_mangle(key2, (i+1)%4));
+ _mm_storeu_si128(DK_mm + 9-3*i,
+ aes_schedule_mangle_dec(key2, (i+1)%4));
+ }
+
+ key1 = key2;
+ key2 = aes_schedule_192_smear(key2,
+ _mm_slli_si128(_mm_srli_si128(t, 8), 8));
+ t = _mm_slli_si128(_mm_srli_si128(key2, 8), 8);
+ }
+ }
+
+/*
+* AES-256 Encryption
+*/
+BOTAN_FUNC_ISA("ssse3")
+void AES_256::ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* keys = reinterpret_cast<const __m128i*>(m_EK.data());
+
+ CT::poison(in, blocks * block_size());
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+ _mm_storeu_si128(out_mm + i, aes_ssse3_encrypt(B, keys, 14));
+ }
+
+ CT::unpoison(in, blocks * block_size());
+ CT::unpoison(out, blocks * block_size());
+ }
+
+/*
+* AES-256 Decryption
+*/
+BOTAN_FUNC_ISA("ssse3")
+void AES_256::ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ const __m128i* keys = reinterpret_cast<const __m128i*>(m_DK.data());
+
+ CT::poison(in, blocks * block_size());
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+ _mm_storeu_si128(out_mm + i, aes_ssse3_decrypt(B, keys, 14));
+ }
+
+ CT::unpoison(in, blocks * block_size());
+ CT::unpoison(out, blocks * block_size());
+ }
+
+/*
+* AES-256 Key Schedule
+*/
+BOTAN_FUNC_ISA("ssse3")
+void AES_256::ssse3_key_schedule(const uint8_t keyb[], size_t)
+ {
+ __m128i rcon = _mm_set_epi32(0x702A9808, 0x4D7C7D81,
+ 0x1F8391B9, 0xAF9DEEB6);
+
+ m_EK.resize(15*4);
+ m_DK.resize(15*4);
+
+ __m128i* EK_mm = reinterpret_cast<__m128i*>(m_EK.data());
+ __m128i* DK_mm = reinterpret_cast<__m128i*>(m_DK.data());
+
+ __m128i key1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(keyb));
+ __m128i key2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>((keyb + 16)));
+
+ _mm_storeu_si128(DK_mm + 14, _mm_shuffle_epi8(key1, sr[2]));
+
+ key1 = aes_schedule_transform(key1, k_ipt1, k_ipt2);
+ key2 = aes_schedule_transform(key2, k_ipt1, k_ipt2);
+
+ _mm_storeu_si128(EK_mm + 0, key1);
+ _mm_storeu_si128(EK_mm + 1, aes_schedule_mangle(key2, 3));
+
+ _mm_storeu_si128(DK_mm + 13, aes_schedule_mangle_dec(key2, 1));
+
+ for(size_t i = 2; i != 14; i += 2)
+ {
+ __m128i k_t = key2;
+ key1 = key2 = aes_schedule_round(&rcon, key2, key1);
+
+ _mm_storeu_si128(EK_mm + i, aes_schedule_mangle(key2, i % 4));
+ _mm_storeu_si128(DK_mm + (14-i), aes_schedule_mangle_dec(key2, (i+2) % 4));
+
+ key2 = aes_schedule_round(nullptr, _mm_shuffle_epi32(key2, 0xFF), k_t);
+ _mm_storeu_si128(EK_mm + i + 1, aes_schedule_mangle(key2, (i - 1) % 4));
+ _mm_storeu_si128(DK_mm + (13-i), aes_schedule_mangle_dec(key2, (i+1) % 4));
+ }
+
+ key2 = aes_schedule_round(&rcon, key2, key1);
+
+ _mm_storeu_si128(EK_mm + 14, aes_schedule_mangle_last(key2, 2));
+ _mm_storeu_si128(DK_mm + 0, aes_schedule_mangle_last_dec(key2));
+ }
+
+}
diff --git a/src/libs/3rdparty/botan/src/lib/block/aes/aes_ssse3/info.txt b/src/libs/3rdparty/botan/src/lib/block/aes/aes_ssse3/info.txt
new file mode 100644
index 0000000000..8457eed569
--- /dev/null
+++ b/src/libs/3rdparty/botan/src/lib/block/aes/aes_ssse3/info.txt
@@ -0,0 +1,15 @@
+<defines>
+AES_SSSE3 -> 20131128
+</defines>
+
+load_on auto
+
+need_isa ssse3
+
+# Intel C++ can't deal with syntax for defining constants :(
+<cc>
+gcc
+clang
+msvc
+sunstudio
+</cc>
diff --git a/src/libs/3rdparty/botan/src/lib/block/aes/info.txt b/src/libs/3rdparty/botan/src/lib/block/aes/info.txt
new file mode 100644
index 0000000000..62455cf2c3
--- /dev/null
+++ b/src/libs/3rdparty/botan/src/lib/block/aes/info.txt
@@ -0,0 +1,3 @@
+<defines>
+AES -> 20131128
+</defines>