
// void arch_setup_registers(struct register_state* registers_before);
.global arch_setup_registers

// void arch_store_vector_registers(struct register_state* state);
.global arch_store_vector_registers

//  void wrap_main(int argc, char** argv, void* stack);
.global wrap_main

.extern wrapped_main

arch_setup_registers:
    
    // vector registers
    addi.d $a0, $a0, 256
    xvld $xr0,  $a0, 0  
    xvld $xr1,  $a0, 32 
    xvld $xr2,  $a0, 64 
    xvld $xr3,  $a0, 96 
    xvld $xr4,  $a0, 128
    xvld $xr5,  $a0, 160
    xvld $xr6,  $a0,  192
    xvld $xr7,  $a0,  224
    xvld $xr8,  $a0,  256
    xvld $xr9,  $a0,  288
    xvld $xr10, $a0,  320
    xvld $xr11, $a0,  352
    xvld $xr12, $a0,  384
    xvld $xr13, $a0,  416
    xvld $xr14, $a0,  448
    xvld $xr15, $a0,  480
    xvld $xr16, $a0,  512
    xvld $xr17, $a0,  544
    xvld $xr18, $a0,  576
    xvld $xr19, $a0,  608
    xvld $xr20, $a0,  640
    xvld $xr21, $a0,  672
    xvld $xr22, $a0,  704
    xvld $xr23, $a0,  736
    xvld $xr24, $a0,  768
    xvld $xr25, $a0,  800
    xvld $xr26, $a0,  832
    xvld $xr27, $a0,  864
    xvld $xr28, $a0,  896
    xvld $xr29, $a0,  928
    xvld $xr30, $a0,  960
    xvld $xr31, $a0,  992
    addi.d $a0, $a0,  -256
    
    // floating point status registers
    // TODO
    
    ld.d $r0, $a0, 0
    ld.d $r1, $a0, 8
    ld.d $r2, $a0, 16
    ld.d $r3, $a0, 24
    // ld.d $r4, $a0, 32 // (=a0) store address. do this one last
    // ld.d $r5, $a0, 40 // (=a1) will be set to address to jump to
    ld.d $r6, $a0, 48
    ld.d $r7, $a0, 56
    ld.d $r8, $a0, 64
    ld.d $r9, $a0, 72
    ld.d $r10, $a0, 80
    ld.d $r11, $a0, 88
    ld.d $r12, $a0, 96
    ld.d $r13, $a0, 104
    ld.d $r14, $a0, 112
    ld.d $r15, $a0, 120
    ld.d $r16, $a0, 128
    ld.d $r17, $a0, 136
    ld.d $r18, $a0, 144
    ld.d $r19, $a0, 152
    ld.d $r20, $a0, 160
    ld.d $r21, $a0, 168
    ld.d $r22, $a0, 176
    ld.d $r23, $a0, 184
    ld.d $r24, $a0, 192
    ld.d $r25, $a0, 200
    ld.d $r26, $a0, 208
    ld.d $r27, $a0, 216
    ld.d $r28, $a0, 224
    ld.d $r29, $a0, 232
    ld.d $r30, $a0, 240
    ld.d $r31, $a0, 248
    
    // address to jump to (pc)
    ld.d $a1, $a0, 1280
    
    // put an instruction barrier here just in case the architecture needs it
    ibar 0
    
    // jump to target address
    // target address must start with
    // ld.d $r5, $a0, 40
    // ld.d $r4, $a0, 32
    jirl $zero, $a1, 0

arch_store_vector_registers:
    // set $a0 to &state->vec_registers
    addi.d $a0, $a0, 256
    xvst $xr0,  $a0, 0
    xvst $xr1,  $a0, 32
    xvst $xr2,  $a0, 64
    xvst $xr3,  $a0, 96
    xvst $xr4,  $a0, 128
    xvst $xr5,  $a0, 160
    xvst $xr6,  $a0, 192
    xvst $xr7,  $a0, 224
    xvst $xr8,  $a0, 256
    xvst $xr9,  $a0, 288
    xvst $xr10, $a0, 320
    xvst $xr11, $a0, 352
    xvst $xr12, $a0, 384
    xvst $xr13, $a0, 416
    xvst $xr14, $a0, 448
    xvst $xr15, $a0, 480
    xvst $xr16, $a0, 512
    xvst $xr17, $a0, 544
    xvst $xr18, $a0, 576
    xvst $xr19, $a0, 608
    xvst $xr20, $a0, 640
    xvst $xr21, $a0, 672
    xvst $xr22, $a0, 704
    xvst $xr23, $a0, 736
    xvst $xr24, $a0, 768
    xvst $xr25, $a0, 800
    xvst $xr26, $a0, 832
    xvst $xr27, $a0, 864
    xvst $xr28, $a0, 896
    xvst $xr29, $a0, 928
    xvst $xr30, $a0, 960
    xvst $xr31, $a0, 992
    ret

wrap_main:
    // set stack pointer to third argument
    addi.d $sp, $a2, 0
    // other arguments are still in the correct registers, so we can just branch to main
    b wrapped_main
    
