/***********************************************************************/
/*								       */
/*			     Objective Caml			       */
/*								       */
/*	      Xavier Leroy, projet Cristal, INRIA Rocquencourt	       */
/*								       */
/*  Copyright 1996 Institut National de Recherche en Informatique et   */
/*  en Automatique.  All rights reserved.  This file is distributed    */
/*  under the terms of the GNU Library General Public License, with    */
/*  the special exception on linking described in file ../LICENSE.     */
/*								       */
/***********************************************************************/

/* By John Carr.  Based on sparc.S,v 1.24 by xleroy.  */

/* Asm part of the runtime system for the SPARC V9 processor
   using the SPARC 64 bit ABI.  The ABI uses ELF and does not
   prefix symbols with underscore. */

/* Must be preprocessed by cpp */

        .common caml_required_size, 8, 8

! Tell the assembler not to complain about use of %g2 and %g3, which
! are reserved for application use.  ocaml considers itself an
! application worthy of these registers.
.register %g2,#scratch
.register %g3,#scratch

#define Exn_ptr %l5
#define Alloc_ptr %l6
#ifdef INDIRECT_LIMIT
#define Alloc_limit_ptr %l7
#define Alloc_limit_reg %g1
#else
#define Alloc_limit_reg %l7
#endif

        BIAS = 2047	! SPARC V9 ABI-defined
        .MINFRAME = 176	! must match Emit.stack_regwin
        .TRAPOFF = .MINFRAME ! must match Emit trap patterns
	.CBOFF = .MINFRAME   ! see stack.h Callback_link macro

/* Load high 32 bits of 44 bit address. */
#define AddressH(symb,reg) \
        sethi	%h44(symb), reg; \
        or	reg, %m44(symb), reg; \
        sllx	reg, 12, reg
#define AddressH2(s1,reg1,s2,reg2) \
        sethi	%h44(s1), reg1; \
        sethi	%h44(s2), reg2; \
        or	reg1, %m44(s1), reg1; \
        or	reg2, %m44(s2), reg2; \
        sllx	reg1, 12, reg1; \
        sllx	reg2, 12, reg2
#define Load(symb, reg, sc) \
        AddressH(symb, sc); \
        ldx	[sc + %l44(symb)], reg
#define Store(reg,symb,sc) \
        AddressH(symb, sc); \
        stx	reg, [sc + %l44(symb)]
#define Address(symb,reg) \
        AddressH(symb, reg); \
        or	reg, %l44(symb), reg

/* Allocation functions */

        .text
        .global caml_allocN
        .global caml_call_gc

/* Required size in %g2 */
caml_allocN:
#ifdef INDIRECT_LIMIT
        ldx     [Alloc_limit_ptr], Alloc_limit_reg
#endif
        sub     Alloc_ptr, %g2, Alloc_ptr
        cmp     Alloc_ptr, Alloc_limit_reg
        blu,pn	%xcc,caml_call_gc
        nop
        retl
        nop
        .size	caml_allocN,.-caml_allocN

/* Required size in %g2 */
caml_call_gc:
        ! register windows need not be flushed here
        AddressH2(caml_required_size, %g1, caml_exception_pointer, %g5)
    /* Save %g2 (required size) */
        stx	%g2, [%g1 + %l44(caml_required_size)]
    /* Save exception pointer if GC raises */
        stx	Exn_ptr, [%g5 + %l44(caml_exception_pointer)]
    /* Save current allocation pointer for debugging purposes */
        Store(Alloc_ptr, caml_young_ptr, %g1)
    /* Record lowest stack address, adjusting for bias */
        add	%sp, BIAS, %g5
        Store(%g5, caml_bottom_of_stack, %g1)
    /* Record last return address */
        Store(%o7, caml_last_return_address, %g1)
    /* Allocate space on stack for caml_context structure and float regs */

        /* ??? Use save here instead?  Then caml %i and %l register are
           stored at (current)%i6 + bias, while caml %o are stored as this
           frame's %i registers at %sp + bias + 64.  The registers can be
           made contiguous by reduntantly storing copies of %i# at the high
           end of this stack block.  (A minimum block would do the same but
           room is needed for FP registers.)

           flushw takes 1 cycle if the windows are clean. A save, trapping
           flushw, restore sequence takes 101 cycles.  Save, save, trapping
           flushw, restore, restore takes 198 cycles.  Trapping save takes
           about 50 cycles.
 */

        sub	%sp, 20*8 + 32*8 + 8 /* FPRS */ + 8 /* pad */, %sp

        .ISAVEOFF = .MINFRAME + 32*8	! integer register stack offset
	.FSAVEOFF = .MINFRAME
    /* Save int regs on stack, in the order they are numbered by proc.ml,
       and save the address of the register block into caml_gc_regs */
.L100:	stx	%o0, [%sp + BIAS + .ISAVEOFF + 0x00]
        stx	%o1, [%sp + BIAS + .ISAVEOFF + 0x08]
        stx	%o2, [%sp + BIAS + .ISAVEOFF + 0x10]
        stx	%o3, [%sp + BIAS + .ISAVEOFF + 0x18]
        stx	%o4, [%sp + BIAS + .ISAVEOFF + 0x20]
        stx	%o5, [%sp + BIAS + .ISAVEOFF + 0x28]

        stx	%i0, [%sp + BIAS + .ISAVEOFF + 0x30]
        stx	%i1, [%sp + BIAS + .ISAVEOFF + 0x38]
        stx	%i2, [%sp + BIAS + .ISAVEOFF + 0x40]
        stx	%i3, [%sp + BIAS + .ISAVEOFF + 0x48]
        stx	%i4, [%sp + BIAS + .ISAVEOFF + 0x50]
        stx	%i5, [%sp + BIAS + .ISAVEOFF + 0x58]

        stx	%l0, [%sp + BIAS + .ISAVEOFF + 0x60]
        stx	%l1, [%sp + BIAS + .ISAVEOFF + 0x68]
        stx	%l2, [%sp + BIAS + .ISAVEOFF + 0x70]
        stx	%l3, [%sp + BIAS + .ISAVEOFF + 0x78]
        stx	%l4, [%sp + BIAS + .ISAVEOFF + 0x80]

      /* Note order. */
        stx	%g3, [%sp + BIAS + .ISAVEOFF + 0x88]
        stx	%g4, [%sp + BIAS + .ISAVEOFF + 0x90]
        stx	%g2, [%sp + BIAS + .ISAVEOFF + 0x98]

        add	%sp, BIAS + .ISAVEOFF, %g5
        Store(%g5, caml_gc_regs, %g1)
    /* Save the floating-point registers */

        rd	%fprs, %g5
        andcc	%g5, 1, %g0		! f0..f31 dirty bit
        be,pt	%xcc,0f
        andcc	%g5, 2, %g0		! f32..f62 dirty bit
        ! UltraSPARC could use VIS block store
        std	%f0, [%sp + BIAS + .FSAVEOFF + 0x00]
        std	%f2, [%sp + BIAS + .FSAVEOFF + 0x08]
        std	%f4, [%sp + BIAS + .FSAVEOFF + 0x10]
        std	%f6, [%sp + BIAS + .FSAVEOFF + 0x18]
        std	%f8, [%sp + BIAS + .FSAVEOFF + 0x20]
        std	%f10, [%sp + BIAS + .FSAVEOFF + 0x28]
        std	%f12, [%sp + BIAS + .FSAVEOFF + 0x30]
        std	%f14, [%sp + BIAS + .FSAVEOFF + 0x38]
        std	%f16, [%sp + BIAS + .FSAVEOFF + 0x40]
        std	%f18, [%sp + BIAS + .FSAVEOFF + 0x48]
        std	%f20, [%sp + BIAS + .FSAVEOFF + 0x50]
        std	%f22, [%sp + BIAS + .FSAVEOFF + 0x58]
        std	%f24, [%sp + BIAS + .FSAVEOFF + 0x60]
        std	%f26, [%sp + BIAS + .FSAVEOFF + 0x68]
        std	%f28, [%sp + BIAS + .FSAVEOFF + 0x70]
        ! leave space for %f30, for ease of comprehension
0:	be,pt	%xcc,1f
        nop
        std	%f32, [%sp + BIAS + .FSAVEOFF + 0x80]
        std	%f34, [%sp + BIAS + .FSAVEOFF + 0x88]
        std	%f36, [%sp + BIAS + .FSAVEOFF + 0x90]
        std	%f38, [%sp + BIAS + .FSAVEOFF + 0x98]
        std	%f40, [%sp + BIAS + .FSAVEOFF + 0xa0]
        std	%f42, [%sp + BIAS + .FSAVEOFF + 0xa8]
        std	%f44, [%sp + BIAS + .FSAVEOFF + 0xb0]
        std	%f46, [%sp + BIAS + .FSAVEOFF + 0xb8]
        std	%f48, [%sp + BIAS + .FSAVEOFF + 0xc0]
        std	%f50, [%sp + BIAS + .FSAVEOFF + 0xc8]
        std	%f52, [%sp + BIAS + .FSAVEOFF + 0xd0]
        std	%f54, [%sp + BIAS + .FSAVEOFF + 0xd8]
        std	%f56, [%sp + BIAS + .FSAVEOFF + 0xe0]
        std	%f58, [%sp + BIAS + .FSAVEOFF + 0xe8]
        std	%f60, [%sp + BIAS + .FSAVEOFF + 0xf0]
        andn	%g5, 3, %g1	! clear dirty flags
        std	%f62, [%sp + BIAS + .FSAVEOFF + 0xf8]
        wr	%g1, %fprs	! say we do not need the registers preserved
1:
        
    /* Call the garbage collector */
        call	caml_garbage_collection
        stx	%g5, [%sp + BIAS + .ISAVEOFF + 20*8]

        ldx	[%sp + BIAS + .ISAVEOFF + 20*8], %g5
    /* Restore all regs used by the code generator */
        ldx	[%sp + BIAS + .ISAVEOFF + 0x00], %o0
        ldx	[%sp + BIAS + .ISAVEOFF + 0x08], %o1
        ldx	[%sp + BIAS + .ISAVEOFF + 0x10], %o2
        ldx	[%sp + BIAS + .ISAVEOFF + 0x18], %o3
        ldx	[%sp + BIAS + .ISAVEOFF + 0x20], %o4
        ldx	[%sp + BIAS + .ISAVEOFF + 0x28], %o5

        ldx	[%sp + BIAS + .ISAVEOFF + 0x30], %i0
        ldx	[%sp + BIAS + .ISAVEOFF + 0x38], %i1
        ldx	[%sp + BIAS + .ISAVEOFF + 0x40], %i2
        ldx	[%sp + BIAS + .ISAVEOFF + 0x48], %i3
        ldx	[%sp + BIAS + .ISAVEOFF + 0x50], %i4
        ldx	[%sp + BIAS + .ISAVEOFF + 0x58], %i5

        ldx	[%sp + BIAS + .ISAVEOFF + 0x60], %l0
        ldx	[%sp + BIAS + .ISAVEOFF + 0x68], %l1
        ldx	[%sp + BIAS + .ISAVEOFF + 0x70], %l2
        ldx	[%sp + BIAS + .ISAVEOFF + 0x78], %l3
        ldx	[%sp + BIAS + .ISAVEOFF + 0x80], %l4

        ldx	[%sp + BIAS + .ISAVEOFF + 0x88], %g3
        ldx	[%sp + BIAS + .ISAVEOFF + 0x90], %g4
        ldx	[%sp + BIAS + .ISAVEOFF + 0x98], %g2

        andcc	%g5, 1, %g0
        be,pt	%xcc,0f
        andcc	%g5, 2, %g0
        ldd	[%sp + BIAS + .FSAVEOFF + 0x00], %f0
        ldd	[%sp + BIAS + .FSAVEOFF + 0x08], %f2
        ldd	[%sp + BIAS + .FSAVEOFF + 0x10], %f4
        ldd	[%sp + BIAS + .FSAVEOFF + 0x18], %f6
        ldd	[%sp + BIAS + .FSAVEOFF + 0x20], %f8
        ldd	[%sp + BIAS + .FSAVEOFF + 0x28], %f10
        ldd	[%sp + BIAS + .FSAVEOFF + 0x30], %f12
        ldd	[%sp + BIAS + .FSAVEOFF + 0x38], %f14
        ldd	[%sp + BIAS + .FSAVEOFF + 0x40], %f16
        ldd	[%sp + BIAS + .FSAVEOFF + 0x48], %f18
        ldd	[%sp + BIAS + .FSAVEOFF + 0x50], %f20
        ldd	[%sp + BIAS + .FSAVEOFF + 0x58], %f22
        ldd	[%sp + BIAS + .FSAVEOFF + 0x60], %f24
        ldd	[%sp + BIAS + .FSAVEOFF + 0x68], %f26
        ldd	[%sp + BIAS + .FSAVEOFF + 0x70], %f28
0:	be,pt	%xcc,1f
        nop
        ldd	[%sp + BIAS + .FSAVEOFF + 0x80], %f32
        ldd	[%sp + BIAS + .FSAVEOFF + 0x88], %f34
        ldd	[%sp + BIAS + .FSAVEOFF + 0x90], %f36
        ldd	[%sp + BIAS + .FSAVEOFF + 0x98], %f38
        ldd	[%sp + BIAS + .FSAVEOFF + 0xa0], %f40
        ldd	[%sp + BIAS + .FSAVEOFF + 0xa8], %f42
        ldd	[%sp + BIAS + .FSAVEOFF + 0xb0], %f44
        ldd	[%sp + BIAS + .FSAVEOFF + 0xb8], %f46
        ldd	[%sp + BIAS + .FSAVEOFF + 0xc0], %f48
        ldd	[%sp + BIAS + .FSAVEOFF + 0xc8], %f50
        ldd	[%sp + BIAS + .FSAVEOFF + 0xd0], %f52
        ldd	[%sp + BIAS + .FSAVEOFF + 0xd8], %f54
        ldd	[%sp + BIAS + .FSAVEOFF + 0xe0], %f56
        ldd	[%sp + BIAS + .FSAVEOFF + 0xe8], %f58
        ldd	[%sp + BIAS + .FSAVEOFF + 0xf0], %f60
        ldd	[%sp + BIAS + .FSAVEOFF + 0xf8], %f62
1:
	AddressH2(caml_young_ptr, %g1, caml_required_size, %g5)
    /* Reload alloc ptr */
        ldx	[%g1 + %l44(caml_young_ptr)], Alloc_ptr
    /* Allocate space for block */
        ldx	[%g5 + %l44(caml_required_size)], %g2
#ifdef INDIRECT_LIMIT
        ldx     [Alloc_limit_ptr], Alloc_limit_reg
#else
        Load(caml_young_limit, Alloc_limit_reg, %g1)
#endif
        sub     Alloc_ptr, %g2, Alloc_ptr
        cmp     Alloc_ptr, Alloc_limit_reg /* Check that we have enough free space */
        blu,pn	%xcc,.L100	    /* If not, call GC again */
        nop
    /* Return to caller */
        Load(caml_last_return_address, %o7, %g1)
        retl
        add	%sp, 20*8 + 32*8 + 8 /* FPRS */ + 8 /* pad */, %sp    /* in delay slot */
        .size	caml_call_gc,.-caml_call_gc

/* Call a C function from Caml */

        .global caml_c_call
/* Function to call is in %g2 */
caml_c_call:
        ! The global registers are available for use here.
        ! All other registers are reserved, except that the
        ! out registers not containing the return value may
        ! be modified after the call to the C function.
        add	%sp, BIAS, %g5
    /* Record lowest stack address and return address */
        Store(%g5, caml_bottom_of_stack, %g1)
        AddressH2(caml_last_return_address, %g5, caml_exception_pointer, %g1)
        stx	%o7, [%g5 + %l44(caml_last_return_address)]
    /* Save the exception handler and alloc pointer */
        stx	Exn_ptr, [%g1 + %l44(caml_exception_pointer)]
        AddressH(caml_young_ptr, %g1)
    /* Call the C function */
        call	%g2
        stx	Alloc_ptr, [%g1 + %l44(caml_young_ptr)]	 /* in delay slot */
    /* Reload return address */
        Load(caml_last_return_address, %o7, %g1)
#ifndef INDIRECT_LIMIT
        Load(caml_young_limit, Alloc_limit_reg, %g1)
#endif
    /* Reload alloc pointer */
        AddressH(caml_young_ptr, %g1)
    /* Return to caller */
        retl
        ldx	[%g1 + %l44(caml_young_ptr)], Alloc_ptr	 /* in delay slot */
        .size	caml_c_call,.-caml_c_call

/* Start the Caml program */

        .global caml_start_program
caml_start_program:
    /* Save all callee-save registers and allocate space for
        a callback link (24 + 8 to align).  The exception handler
        must be allocated separately.  */
        save	%sp, -.MINFRAME, %sp
    /* Address of code to call */
        Address(caml_program, %l2)

    /* Code shared with callback* */
.L108:
    /* Set up a callback link on the stack.  The words after the
       register window and outgoing arguments area must match
       the caml_context structure in stack.h. */
        AddressH2(caml_bottom_of_stack, %g1, caml_last_return_address, %g5)
        ldx	[%g1 + %l44(caml_bottom_of_stack)], %l0
        ldx	[%g5 + %l44(caml_last_return_address)], %l1
        Load(caml_gc_regs, %l3, %g1)
        stx	%l0, [%sp + BIAS + .CBOFF]
        stx	%l1, [%sp + BIAS + .CBOFF + 8]
        stx	%l3, [%sp + BIAS + .CBOFF + 16]
    /* Set up a trap frame to catch exceptions escaping the Caml code */
        call	.+8
        sub	%sp, 16, %sp
        add	%o7, .L110-., %o7	! point two instructions before .L110
        				! (See emit.mlp Lpushtrap/Lraise)
        Load(caml_exception_pointer, Exn_ptr, %g1)
        stx	%o7, [%sp + BIAS + .TRAPOFF]
        stx	Exn_ptr, [%sp + BIAS + .TRAPOFF + 8]
        mov	%sp, Exn_ptr
    /* Reload allocation pointers */
        AddressH2(caml_young_ptr, %g1, caml_young_limit, %g5)
        ldx	[%g1 + %l44(caml_young_ptr)], Alloc_ptr
#ifdef INDIRECT_LIMIT
        or	%g5, %l44(caml_young_limit), Alloc_limit_ptr
#else
        ldx	[%g5 + %l44(caml_young_limit)], Alloc_limit_reg
#endif

    /* Call the Caml code */
.L109:  call	%l2
        nop
    /* Pop trap frame and restore caml_exception_pointer */
        ldx	[%sp + BIAS + .TRAPOFF + 8], Exn_ptr
        add	%sp, 16, %sp
        Store(Exn_ptr, caml_exception_pointer, %g1)
    /* Pop callback link, restoring the global variables */
.L112:	ldx	[%sp + BIAS + .CBOFF], %l0
        ldx	[%sp + BIAS + .CBOFF], %l1
        ldx	[%sp + BIAS + .CBOFF], %l2
        AddressH2(caml_bottom_of_stack, %g1, caml_last_return_address, %g5)
        stx	%l0, [%g1 + %l44(caml_bottom_of_stack)]
        stx	%l1, [%g1 + %l44(caml_last_return_address)]
        Store(%l2, caml_gc_regs, %g1)
        !add	%sp, 32, %sp
    /* Save allocation pointer */
        Store(Alloc_ptr, caml_young_ptr, %g1)
    /* Reload callee-save registers and return */
    /* Move result where the C function expects it */
        ret
        restore	%o0, 0, %o0

.L110:
    /* The trap handler */
        Store(Exn_ptr, caml_exception_pointer, %g1)
    /* Encode exception bucket as an exception result */
        b	.L112
        or	%o0, 2, %o0

/* Raise an exception from C */

        .global caml_raise_exception
caml_raise_exception:
    /* Load exception pointer in a register outside the reg windows */
        Load(caml_exception_pointer, %g3, %g1)
    /* Save exception bucket in a register outside the reg windows */
        mov	%o0, %g2
    /* Pop some frames until the trap pointer is in the current frame. */
        cmp	%g3, %fp
        blt	%xcc,.L107		/* if Exn_ptr < %fp, over */
        nop
.L106:	restore
        cmp	%fp, %g3		/* if %fp <= Exn_ptr, loop */
        ble	%xcc,.L106
        nop
.L107:
    /* Reload allocation registers */
        AddressH2(caml_young_ptr, %g1, caml_young_limit, %g5)
        ldx	[%g1 + %l44(caml_young_ptr)], Alloc_ptr
#ifdef INDIRECT_LIMIT
        or	%g5, %l44(caml_young_limit), Alloc_limit_ptr
#else
        ldx	[%g5 + %l44(caml_young_limit)], Alloc_limit_reg
#endif
    /* Branch to exception handler */
        ldx	[%g3 + BIAS + .TRAPOFF], %g1
        ldx	[%g3 + BIAS + .TRAPOFF + 8], Exn_ptr
        add	%g3, 16, %sp
        jmp	%g1 + 8
    /* Restore bucket, in delay slot */
        mov	%g2, %o0
        .size	caml_raise_exception,.-caml_raise_exception

/* Callbacks C -> ML */

        .align	32
        .global caml_callback_exn
caml_callback_exn:
    /* Save callee-save registers and return address */
        save	%sp, -.MINFRAME, %sp
    /* Initial shuffling of arguments */
        mov	%i0, %g1
        mov	%i1, %i0	/* first arg */
        mov	%g1, %i1	/* environment */
        b	.L108
        ldx	[%g1], %l2	/* code pointer */
        .size	caml_callback_exn,.-caml_callback_exn

        .global caml_callback2_exn
caml_callback2_exn:
    /* Save callee-save registers and return address */
        save	%sp, -.MINFRAME, %sp
    /* Initial shuffling of arguments */
        mov	%i0, %g1
        AddressH(caml_apply2, %l3)
        mov	%i1, %i0	/* first arg */
        mov	%i2, %i1	/* second arg */
        mov	%g1, %i2	/* environment */
        b	.L108
        or	%l2, %l44(caml_apply2), %l2
        .size	caml_callback2_exn,.-caml_callback2_exn

        .global caml_callback3_exn
caml_callback3_exn:
    /* Save callee-save registers and return address */
        save	%sp, -.MINFRAME, %sp
    /* Initial shuffling of arguments */
        mov	%i0, %g1
        AddressH(caml_apply3, %l2)
        mov	%i1, %i0	/* first arg */
        mov	%i2, %i1	/* second arg */
        mov	%i3, %i2	/* third arg */
        mov	%g1, %i3	/* environment */
        b	.L108
        or	%l2, %l44(caml_apply3), %l2
        .size	caml_callback3_exn,.-caml_callback3_exn

#ifdef SYS_solaris
        .section ".rodata"
#else
        .data
#endif
        .global caml_system__frametable
        .align	8
caml_system__frametable:
        .xword	1		/* one descriptor */
        .xword	.L109		/* return address into callback */
        .half	-1		/* negative frame size => use callback link */
        .half	0		/* no roots */
        .size	caml_system__frametable,.-caml_system__frametable

#ifdef SYS_solaris
        .type caml_allocN, #function
        .type caml_call_gc, #function
        .type caml_c_call, #function
        .type caml_start_program, #function
        .type caml_raise_exception, #function
        .type caml_callback_exn,#function
        .type caml_callback2_exn,#function
        .type caml_callback3_exn,#function
        .type caml_system__frametable, #object
#endif
