Lockpick/source/sha256_armv8.s

164 lines
4.9 KiB
ArmAsm

.section .text.sha256_armv8, "ax", %progbits
.align 5
.arch armv8-a+crypto
# SHA256 assembly implementation for ARMv8 AArch64 (based on linux source code)
.global sha256_block_data_order
.type sha256_block_data_order,%function
sha256_block_data_order:
.Lsha256prolog:
stp x29, x30, [sp,#-64]!
mov x29, sp
adr x3, .LKConstant256
str q8, [sp, #16]
ld1 {v16.4s-v19.4s}, [x3], #64
ld1 {v0.4s}, [x0], #16
ld1 {v20.4s-v23.4s}, [x3], #64
add x2, x1, x2, lsl #6
ld1 {v1.4s}, [x0]
ld1 {v24.4s-v27.4s}, [x3], #64
sub x0, x0, #16
str q9, [sp, #32]
str q10, [sp, #48]
ld1 {v28.4s-v31.4s}, [x3], #64
.Lsha256loop:
ld1 {v5.16b-v8.16b}, [x1], #64
mov v2.16b, v0.16b
mov v3.16b, v1.16b
rev32 v5.16b, v5.16b
rev32 v6.16b, v6.16b
add v9.4s, v5.4s, v16.4s
rev32 v7.16b, v7.16b
add v10.4s, v6.4s, v17.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v5.4s, v6.4s
rev32 v8.16b, v8.16b
add v9.4s, v7.4s, v18.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v6.4s, v7.4s
sha256su1 v5.4s, v7.4s, v8.4s
add v10.4s, v8.4s, v19.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v7.4s, v8.4s
sha256su1 v6.4s, v8.4s, v5.4s
add v9.4s, v5.4s, v20.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v8.4s, v5.4s
sha256su1 v7.4s, v5.4s, v6.4s
add v10.4s, v6.4s, v21.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v5.4s, v6.4s
sha256su1 v8.4s, v6.4s, v7.4s
add v9.4s, v7.4s, v22.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v6.4s, v7.4s
sha256su1 v5.4s, v7.4s, v8.4s
add v10.4s, v8.4s, v23.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v7.4s, v8.4s
sha256su1 v6.4s, v8.4s, v5.4s
add v9.4s, v5.4s, v24.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v8.4s, v5.4s
sha256su1 v7.4s, v5.4s, v6.4s
add v10.4s, v6.4s, v25.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v5.4s, v6.4s
sha256su1 v8.4s, v6.4s, v7.4s
add v9.4s, v7.4s, v26.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v6.4s, v7.4s
sha256su1 v5.4s, v7.4s, v8.4s
add v10.4s, v8.4s, v27.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v7.4s, v8.4s
sha256su1 v6.4s, v8.4s, v5.4s
add v9.4s, v5.4s, v28.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v8.4s, v5.4s
sha256su1 v7.4s, v5.4s, v6.4s
add v10.4s, v6.4s, v29.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su1 v8.4s, v6.4s, v7.4s
add v9.4s, v7.4s, v30.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
add v10.4s, v8.4s, v31.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
cmp x1, x2
add v1.4s, v1.4s, v3.4s
add v0.4s, v0.4s, v2.4s
b.ne .Lsha256loop
.Lsha256epilog:
st1 {v0.4s,v1.4s}, [x0]
ldr q10, [sp, #48]
ldr q9, [sp, #32]
ldr q8, [sp, #16]
ldr x29, [sp], #64
ret
.align 5
.LKConstant256:
.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size sha256_block_data_order,.-sha256_block_data_order
.align 2