优化显卡性能,抵制ASIC,ProgPoW算法到底是什么
倔强放肆的青春
发表于 2022-12-3 17:03:37
104
0
0
混合数据的lane *REGS是从散列的种子进行初始化的。void fill_mix( uint64_t hash_seed, uint32_t lane_id, uint32_t mix[PROGPOW_REGS] ) { // Use FNV to expand the per-warp seed to per-lane // Use KISS to expand the per-lane seed to fill mix uint32_t fnv_hash = 0x811c9dc5; kiss99_t st; st.z = fnv1a(fnv_hash, seed); st.w = fnv1a(fnv_hash, seed >> 32); st.jsr = fnv1a(fnv_hash, lane_id); st.jcong = fnv1a(fnv_hash, lane_id); for (int i = 0; i = kiss99(st); }
主要的搜索算法采用了Keccak海绵函数(宽度为800位,448比特率和352的容量)来生成一个种子,扩展种子,并在混合数据过程中加载序列和进行随机运算,然后压缩结果到最终的Keccak排列(参数相同)进行目标比较。bool progpow_search( const uint64_t prog_seed, const uint64_t nonce, const hash32_t header, const uint64_t target, const uint64_t *g_dag, // gigabyte DAG located in framebuffer const uint64_t *c_dag // kilobyte DAG located in l1 cache ) { uint32_t mix[PROGPOW_LANES][PROGPOW_REGS]; uint32_t result[8]; for (int i = 0; i = 0; // keccak(header..nonce) uint64_t seed = keccak_f800(header, nonce, result); // initialize mix for all lanes for (int l = 0; l ); } // Reduce all lanes to a single 128-bit result for (int i = 0; i = 0x811c9dc5; for (int l = 0; l
内循环采用FNV和KISS99从prog_seed生成随机序列。这个随机序列决定访问哪个混合状态以及执行什么随机运算。由于对prog_seed的更改相对较少,因此可以预料到的是,在挖矿过程中将会编译progPowLoop,而不是进行动态执行。kiss99_t progPowInit(uint64_t prog_seed, int mix_seq[PROGPOW_REGS]) { kiss99_t prog_rnd; uint32_t fnv_hash = 0x811c9dc5; prog_rnd.z = fnv1a(fnv_hash, prog_seed); prog_rnd.w = fnv1a(fnv_hash, prog_seed >> 32); prog_rnd.jsr = fnv1a(fnv_hash, prog_seed); prog_rnd.jcong = fnv1a(fnv_hash, prog_seed >> 32); // Create a random sequence of mix destinations for merge() // guaranteeing every location is touched once // Uses Fisher–Yates shuffle for (int i = 0; i = i; for (int i = PROGPOW_REGS - 1; i > 0; i--) { int j = kiss99(prog_rnd) % (i + 1); swap(mix_seq, mix_seq[j]); } return prog_rnd; }
将数值合并到混合数据中的数学运算是为了保持熵。// Merge new data from b into the value in a // Assuming A has high entropy only do ops that retain entropy // even if B is low entropy // (IE don't do A&B) void merge(uint32_t &a, uint32_t b, uint32_t r) { switch (r % 4) { case 0: a = (a * 33) + b; break; case 1: a = (a ^ b) * 33; break; case 2: a = ROTL32(a, ((r >> 16) % 32)) ^ b; break; case 3: a = ROTR32(a, ((r >> 16) % 32)) ^ b; break; } }
为随机数学选择的数学运算在CUDA和OpenCL(通用GOU的两种主要编程语言)中易于实现。// Random math between two input values uint32_t math(uint32_t a, uint32_t b, uint32_t r) { switch (r % 11) { case 0: return a + b; case 1: return a * b; case 2: return mul_hi(a, b); case 3: return min(a, b); case 4: return ROTL32(a, b); case 5: return ROTR32(a, b); case 6: return a & b; case 7: return a | b; case 8: return a ^ b; case 9: return clz(a) + clz(b); case 10: return popcount(a) + popcount(b); } }
主循环:// Helper to get the next value in the per-program random sequence #define rnd() (kiss99(prog_rnd)) // Helper to pick a random mix location #define mix_src() (rnd() % PROGPOW_REGS) // Helper to access the sequence of mix destinations #define mix_dst() (mix_seq[(mix_seq_cnt++)%PROGPOW_REGS]) void progPowLoop( const uint64_t prog_seed, const uint32_t loop, uint32_t mix[PROGPOW_LANES][PROGPOW_REGS], const uint64_t *g_dag, const uint32_t *c_dag) { // All lanes share a base address for the global load // Global offset uses mix[0] to guarantee it depends on the load result uint32_t offset_g = mix[loop%PROGPOW_LANES][0] % DAG_SIZE; // Lanes can execute in parallel and will be convergent for (int l = 0; l >32, rnd()); } }
成为第一个吐槽的人