static char rcsid[] = "$Id: sunprotect.c,v 1.4 1996/07/09 19:41:51 paul Exp $";

/*
 * This file contains the majority of the code for handling smx memory
 * protection, and also deals with mapping and unmapping of user processes
 * to and from the low memory addresses at which they have been linked to
 * execute. 
 *
 * There are three levels of protection: level 0 ('none') - no protection;
 * level 1 ('half') - r-x text segments for kernel, mm, fs and init
 * in "physical" memory, and for the mapped text segment of the current
 * user process; level 2 ('full') - full protection.  With full protection,
 * a process may not access smx memory outside its own address space,
 * with the exception of a small part of the kernel (eg. the layer 1
 * interrupt stack). Level 1 is the default.  Level 2 slows execution down
 * a lot, but is valuable in tracking down certain type of memory
 * corruption bugs. 
 *
 * The main entry points are:
 *	protect_init:		initialise protection info
 *      mem_released:           called when a process' addr space is released
 *	set_protect:		set protection on a region
 *
 *	entering_kernel:	called when switching to kernel
 *	leaving_kernel:		called when switching from kernel.
 *            Associated functions are mprotect, kernel_on, kernel_off,
 *                              set_process_prot, map_process, unmap_process
 *
 *	zeroclicks:		wrapper for the real zeroclicks
 *	copyclicks:		wrapper for the real copyclicks
 *	phys_copy:		wrapper for the real phys_copy
 *
 * These wrapper functions look after enabling access before the copy
 * and disabling access after the copy.
 */
 
#include "kernel.h"
#include "const.h"
#include <minix/com.h>
#include <sun/syscall.h>
#include "assert.h"

#include "proc.h"

/*
 * For protection purposes, the size of the kernel stack must be a
 * multiple of the click size.
 */
#if K_STACK_BYTES % CLICK_SIZE != 0 
error "The kernel stack is not a multiple of the click size" 
#endif 

#define max(x,y) ((x) > (y) ? (x) : (y))

/* SunOS's protection codes for mprotect(2) */

#define PROT_READ        0x1       /* page can be read */
#define PROT_WRITE       0x2       /* page can be written */
#define PROT_EXEC        0x4       /* page can be executed */
#define PROT_NONE        0x0       /* page can not be accessed */
#define PROT_ALL         (PROT_READ | PROT_WRITE | PROT_EXEC)
#define PROT_READ_EXEC   (PROT_READ | PROT_EXEC)


/*
 * The information in (see below) is used to partition the kernel address
 * space into separate areas, each of which has some protection mode
 * when execution is outside the kernel and full protection is enabled.
 * Details of each area are held in a prot_info struct.
 */
struct prot_info {
    phys_bytes begin;      /* Beginning of area */
    int len;               /* Length of area */
    int prot;              /* Permissions on the area */
};

/*
 * In the worst case, we need 2 * NUM_SEGS + 1 prot_info structs.
 * This occurs when there are gaps before and after each of the
 * areas that is to remain accessible.
 */
#define SEGMENTS (11)


/*
 * The protect_info structure contains all information needed
 * to protect and de-protect the kernel.  It must remain
 * readable when execution is outside the kernel so that when
 * a signal occurs the information needed to turn on access to the
 * kernel is available.  prot_info specifies what the kernel
 * protections should be with execution outside the kernel.
 */
struct protect_info {
    int prot_lev;
    phys_bytes kernel_base;
    phys_bytes kernel_text;
    phys_bytes kernel_data;
    struct prot_info prot_info[SEGMENTS];   
    int numsegs;                    /* number of prot_info entries in use */
    int in_kernel;                  /* is execution in the kernel? */
} pinfo;

/*
 * Current user proc mapped into low virtual memory.
 * Used to avoid mapping a process in that is already mapped in.
 */
static struct proc *mapped_proc = 0;


/*
 * We need to know about these function addresses to set kernel protections
 * when execution is outside the kernel.
 */
extern int SunOS();
extern void SunOSend(), mpx_start(), mpx_end();
static void protect_start(void);
static void protect_end(void);

/*
 * A seg_info struct records protection information for an address
 * range.  The segs array specifies all areas that must remain
 * accessible in some way when execution is outside the kernel.  All
 * stack, data and text needed to get to the point where kernel_on has
 * finished its work must be available.  This means that the following
 * must be accessible: the interrupt stack, the interrupt handler
 * (SunOSsig), the SunOS function that performs system calls, the
 * relevant code in this file, and the pinfo structure.  The fewer of
 * these segments the faster context switching in full protection will
 * be.  About the only thing that would be quite easy to do would be
 * to have the relevant functions from mpx.c and this file in a single
 * .c file.
 */
struct seg_info {
	phys_bytes begin, end;
	int prot;
};

struct seg_info segs[] = {
	{0, 0, PROT_ALL},      /* entry for kernel stack */
	{(phys_bytes)protect_start, (phys_bytes)protect_end, PROT_READ_EXEC},
	{(phys_bytes)mpx_start, (phys_bytes)mpx_end, PROT_READ_EXEC},
	{(phys_bytes)SunOS, (phys_bytes)SunOSend, PROT_READ_EXEC},
	{(phys_bytes)&pinfo, (phys_bytes)&pinfo + sizeof(pinfo), PROT_READ},
};

#define SEGS_SIZE (sizeof(segs) / sizeof(struct seg_info))


/*
 * Local functions.
 */
static void mprotect(phys_bytes addr, int len, int prot);
static void kernel_on(void);
static void kernel_off(void);
static void set_process_prot(struct proc *p,
			     int text_prot, int data_stack_prot);
static void map_process(struct proc *p);
static void unmap_process(struct proc *p);


/*
 * Function: protect_init
 * Parameters: kernel_base - virtual address that the kernel begins at
 *             kernel_text - kernel text segment size (in clicks)
 *             kernel_data - kernel data segment size (in clicks)
 *             mem_clicks - total smx "physical" memory (in clocks)
 *             level - protection level (0, 1 or 2).
 *
 * Called during system bootstrapping.  Data structures and memory protection
 * are initialised according to the protection level specified.
 */
void protect_init(phys_bytes kernel_base, phys_clicks kernel_text,
		  phys_clicks kernel_data, phys_clicks mem_clicks,
		  int level)
{
    phys_bytes kernel_end;      /* first click after kernel */
    phys_bytes next_click;
    int i, j;
    register struct proc *rp;
    
    pinfo.prot_lev = level;
    debug_str("Protection level ");
    debug_int(pinfo.prot_lev);
    debug_char('\n');

    if (pinfo.prot_lev == FULL_PROT) {
	/*
	 * Full protection.  Start by recording details of the kernel
	 * address space (needed when enabling access to the kernel.
	 */
	pinfo.kernel_base = kernel_base;
	pinfo.kernel_text = kernel_text << CLICK_SHIFT;
	pinfo.kernel_data = kernel_data << CLICK_SHIFT;

	pinfo.in_kernel = 1;
	kernel_end = pinfo.kernel_base + pinfo.kernel_text + pinfo.kernel_data;
	
	/*
	 * Update details in segs.  Details of the kernel stack are
	 * added, and all entries are aligned to click boundaries
	 */			
	segs[0].end = upclick(getksp());
	segs[0].begin = segs[0].end - K_STACK_BYTES;
	for (i = 0 ; i < SEGS_SIZE ; i++) {
	    segs[i].begin = downclick(segs[i].begin);
	    segs[i].end = upclick(segs[i].end);
	}
	
	/*
	 * Sort segments by beginning address.  Segments need to be sorted
	 * for the next step to work.
	 */
	{
	    int done = 0;
	    struct seg_info temp;
	    
	    while (!done) {
		done = 1;
		for (i = 0; i < SEGS_SIZE - 1; i++) {
		    if (segs[i].begin > segs[i+1].begin) {
			temp = segs[i];
			segs[i] = segs[i+1];
			segs[i+1] = temp;
			done = 0;
		    }
		}
	    }
	}
	
	/*
	 * The entries in segs cover only parts of the kernel address
	 * space.  The entries in pinfo.prot_info collectively cover
	 * the entire kernel address space, so that when access to the
	 * kernel is disabled we have information on what to do for all
	 * kernel areas.  We now generate pinfo.prot_info from segs.
	 * This is basically a copying operating, with extra entries
	 * added to pinfo.prot_info to cover gaps in segs, and overlapping
	 * segs entries merged.
	 */
	j = -1;      /* j is the index into pinfo */
	next_click = pinfo.kernel_base;
	for (i = 0 ; i < SEGS_SIZE ; i++) {
	    /*
	     * Each time around the loop deal with an element of segs.
	     */
	    if (segs[i].begin > next_click) {
		/*
		 * There's a gap between this special segment
		 * and the previous one - fill it with a no go area.
		 */
		j++;
		pinfo.prot_info[j].begin = next_click;
		pinfo.prot_info[j].len = segs[i].begin - next_click;
		pinfo.prot_info[j].prot = PROT_NONE;
	    }

	    if (segs[i].begin < next_click) {
		/*
		 * An overlap - merge into a single segment,
		 * with protection as weak as required.
		 */
		pinfo.prot_info[j].len = max(pinfo.prot_info[j].len,
				       segs[i].end - pinfo.prot_info[j].begin);
		pinfo.prot_info[j].prot |= segs[i].prot;
	    } else {
		/*
		 * No overlap - create a new segment
		 */
		j++;
		pinfo.prot_info[j].begin = segs[i].begin;
		pinfo.prot_info[j].len = segs[i].end - segs[i].begin;
		pinfo.prot_info[j].prot = segs[i].prot;
	    }
	    next_click = pinfo.prot_info[j].begin + pinfo.prot_info[j].len;
	}
	
	/*
	 * Is there a gap after the last segs entry?
	 */
	if (next_click != kernel_end) {
	    j++;
	    pinfo.prot_info[j].begin = next_click;
	    pinfo.prot_info[j].len = kernel_end - next_click;
	    pinfo.prot_info[j].prot = PROT_NONE;
	}

	pinfo.numsegs = j + 1;
	assert(pinfo.numsegs < SEGMENTS);
	
	/*
	 * Dump the pinfo struct if debugging is on
	 */
	debug_str("kbase: "); debug_int(pinfo.kernel_base);
	debug_str("\nktext: "); debug_int(pinfo.kernel_text);
	debug_str("\nkdata: "); debug_int(pinfo.kernel_data);
	for (i=0 ; i < pinfo.numsegs ; i++) {
	    debug_str("\nbegin: "); debug_int(pinfo.prot_info[i].begin);
	    debug_str("\nlen: "); debug_int(pinfo.prot_info[i].len);
	    debug_str("\nprot: "); debug_int(pinfo.prot_info[i].prot);
	}
	debug_char('\n');

	/*
	 * Now that the data structures are set up, setup the current
	 * protection so that there is normal access to the kernel, and
	 * no access to the rest of the SunOS Minix address space.
	 */
	kernel_on();
	mprotect(kernel_end, (mem_clicks << CLICK_SHIFT) -
		 (pinfo.kernel_text + pinfo.kernel_data), PROT_NONE);
		 
    } else if (pinfo.prot_lev == HALF_PROT) {
	/* 
	 * For half protection, we need to make the kernel, MM, FS, INET and
	 * init text segments read-exec only.
	 */
     
        mprotect(kernel_base,  kernel_text << CLICK_SHIFT, PROT_READ_EXEC);
        for (rp = BEG_SERV_ADDR; rp <= BEG_USER_ADDR; rp++){
            mprotect(rp->p_map[T].mem_phys << CLICK_SHIFT,
		     rp->p_map[T].mem_len << CLICK_SHIFT,
		     PROT_READ_EXEC);
        }
    } /* prot_lev == NO_PROT means no protection; do nothing. */
}


/*
 * Function: mem_released
 * Parameter: p - process whose memory is being released.
 * Returns: nothing
 *
 * Called when the address space of a process is about to be released
 * (process has called exec or exit).  This gives the protection/mapping
 * module a chance to remove any cached information.
 */
void mem_released(struct proc *p)
{
    unmap_process(p);
    if (gwin_proc == p) {
	gwin_proc = 0;
    }
}


/*
 * Function: set_protect
 * Parameters: start - starting address of the area whose protection is to
 *                     be changed.
 *             len - length of area.
 *             protection to assign to area.
 *
 * Set the protection for an area of memory.  The area might not be
 * click aligned, so the alignment must be performed before mprotect can be
 * called.
 */
void set_protect(phys_bytes start, int len, int prot)
{
    phys_bytes prot_start;
    int prot_len;
    
    /*
     * Only for full protection.  In lower level protection the only areas
     * of "physical" memory that are (at half protection) not writable are
     * the text segments of kernel, mm, fs and init, which are loaded by
     * the bootstrap and then never changed.
     */
    if (pinfo.prot_lev == FULL_PROT) {
	/*
	 * If the area is part of the kernel then just quietly ignore this
	 * request as the required access should already be possible.
	 */
	if (start > pinfo.kernel_base && start < pinfo.kernel_base +
	    pinfo.kernel_text + pinfo.kernel_data) {
	    return;
	}
	
	/*
	 * Click align the area whose protection is to be changed.
	 */
	prot_start = downclick(start);
	prot_len = upclick(start + len) - prot_start;
	mprotect(prot_start, prot_len, prot);
    }
}


/*
 * DO NOT MOVE THIS FUNCTION---IT MARKS THE BEGINNING OF THE AREA OF CODE
 * THAT MUST BE EXECUTABLE WHEN EXECUTION IS OUTSIDE THE KERNEL.
 */
static void protect_start(void) {}


/*
 * Function: mprotect
 *
 * This is a wrapper for the SunOS mprotect(2) system call.  Errors
 * are reported to the console.
 */
static void mprotect(phys_bytes addr, int len, int prot)
{
    typedef char *caddr_t;

    if (SunOS(SYS_mprotect, (caddr_t) addr, len, prot) == -1) {
	printk("mprotect failed: address 0x%x, len 0x%x, prot 0%o\n",
	       addr, len, prot);
    }
}


/*
 * Function: entering_kernel
 *
 * Called by SunOSsig to turn on full access to the kernel so that a
 * SunOS signal can be handled.
 */
void entering_kernel(void)
{
    /*
     * Note - only need to change protection if full protection is enabled
     * and we were outside the kernel to start with.
     */
    if (pinfo.prot_lev == FULL_PROT && !pinfo.in_kernel) {
	kernel_on();
	pinfo.in_kernel = 1;

	/*
	 * mm and fs are executed "in place" in "physical" memory (to avoid
	 * mapping and unmapping costs.  We need to disable access to their
	 * portions of physical memory.  For user processes, the physical
	 * memory containing the process has PROT_NONE access during,
	 * so no changes in "physical" memory protection are needed.  Instead,
	 * the user process is unmapped from low memory where it has been
	 * executing.  This is not strictly necessary, as these mappings will
	 * be over-ridden by the next user process is mapped in, but it gives
	 * some added protection against rogue memory accesses.
	 */
	if (isuserp(proc_ptr)) {
	    unmap_process(proc_ptr);
	} else {
	    set_process_prot(proc_ptr, PROT_NONE, PROT_NONE);
	}
    }
}


/*
 * Function: leaving_kernel
 *
 * Called just prior to leaving layer 1 and resuming an smx process
 * in layers 2-4.  If execution is switching to a user process, then
 * that process is mapped into low memory.  If full protection is enabled,
 * leaving_kernel makes the kernel address space as inaccessible as
 * possible.
 */
void leaving_kernel(void)
{
    if (isuserp(proc_ptr)) {
	map_process(proc_ptr);
    }
    
    if (pinfo.prot_lev == FULL_PROT) {
	/*
	 * Note - only change protection if execution is leaving the kernel,
	 * and full protection is enabled.  If switching to a layer 2 task
	 * then execution is remaining withing the kernel.
	 */
	if (!istaskp(proc_ptr) && !isidlehardware(proc_number(proc_ptr))) {
	    pinfo.in_kernel = 0;
	    if (!isuserp(proc_ptr)) {
		/*
		 * mm, fs and inet are executed in place (i.e. are not mapped
		 * into low memory), so the address space must be made
		 * accessible for them to execute there.
		 */
		set_process_prot(proc_ptr, PROT_READ_EXEC, PROT_ALL);
	    }
	    kernel_off();
	}
    }
}


/*
 * Function: kernel_on
 *
 * Makes the kernel text segment r-x, the data segment rwx.
 */
static void kernel_on(void)
{
    mprotect(pinfo.kernel_base, pinfo.kernel_text, PROT_READ_EXEC);
    mprotect(pinfo.kernel_base + pinfo.kernel_text, pinfo.kernel_data,
	     PROT_ALL);
}


/*
 * Function: kernel_off
 *
 * Prepare for leaving the kernel---change protection as per pinfo.
 * Areas to be left r-x and rwx are assumed to have the correct
 * protection already.
 */
static void kernel_off(void)
{
    int i;

    for (i = 0 ; i < pinfo.numsegs ; i++) {
	if (pinfo.prot_info[i].prot != PROT_READ_EXEC &&
	    pinfo.prot_info[i].prot != PROT_ALL) {
	    mprotect(pinfo.prot_info[i].begin, pinfo.prot_info[i].len,
		     pinfo.prot_info[i].prot);
	}
    }
}


/*
 * Counterpart to protect_start (see its comment for more info).
 */
static void protect_end(void) {}


/*
 * Function: set_process_prot
 * Parameters: p - process whose "physical" memory protection is to change
 *             text_prot - access rights to give to the text segment of p
 *             data_prot - access rights to give to the data/stack/gap
 *                         segment of p.
 *
 * Set the address space protection for the specified server.
 */
static void set_process_prot(struct proc *p,
			     int text_prot, int data_stack_prot)
{
    mprotect(p->p_map[T].mem_phys << CLICK_SHIFT,
	     p->p_map[T].mem_len << CLICK_SHIFT, text_prot);
    /*
     * Assume data, gap, and stack are contiguous
     */
    assert(!isuserp(p));   /* User processes have separate D and S segs */
    mprotect(p->p_map[D].mem_phys << CLICK_SHIFT,
	     (p->p_map[S].mem_phys << CLICK_SHIFT) -
	     (p->p_map[D].mem_phys << CLICK_SHIFT) +
	     (p->p_map[S].mem_len << CLICK_SHIFT), data_stack_prot);
}


/*
 * Function: map_process
 * Parameter: p - process to map into low address space
 *
 * Setup mappings for the text, data and stack segments for process p.
 * If the prot_lev is NO_PROT then the text segment is mapped rwx; otherwise
 * it is mapped r-x.  "Physical" memory is in fact a mapped file open
 * on RAM_FD.  The offset into the file of a "physical" address can be
 * determined by subtracting the address of the start of the kernel
 * from the "physical" address (the kernel starts at offset 0 in the mapped
 * file).
 */
static void map_process(struct proc *p)
{
    if (mapped_proc == p) return;    /* Already there! */
    if (mapped_proc) {
	unmap_process(mapped_proc);
    }

    /*
     * Setup mappings.  The flags 0x11 are MAP_SHARED | MAP_FIXED.
     */
    if (SunOS(SYS_mmap, p->p_map[T].mem_vir << CLICK_SHIFT,
	      p->p_map[T].mem_len << CLICK_SHIFT,
	      pinfo.prot_lev == NO_PROT ? PROT_ALL : PROT_READ_EXEC,
	      0x80000011, RAM_FD,
	      (p->p_map[T].mem_phys << CLICK_SHIFT) - code_base) == -1) {
	panic("map_process: text segment mapping failed", errno);
    }
	    
    if (SunOS(SYS_mmap, p->p_map[D].mem_vir << CLICK_SHIFT,
	      p->p_map[D].mem_len << CLICK_SHIFT,
	      PROT_ALL, 0x51, RAM_FD,
	      (p->p_map[D].mem_phys << CLICK_SHIFT) - code_base) == -1) {
	panic("map_process: data segment mapping failed", errno);
    }

    if (SunOS(SYS_mmap, p->p_map[S].mem_vir << CLICK_SHIFT,
	      p->p_map[S].mem_len << CLICK_SHIFT,
	      PROT_ALL, 0x51, RAM_FD,
	      (p->p_map[S].mem_phys << CLICK_SHIFT) - code_base) == -1) {
	panic("map_process: stack segment mapping failed", errno);
    }

    mapped_proc = p;
}
    

/*
 * Function: unmap_process
 * Parameter: p - process to unmap from low address space
 *
 * If p is the currently mapped process, then its mappings are removed.
 * When unmap_process is called from entering_kernel, then p should
 * always be the mapped process.  When unmap_process is called from
 * mem_released, then this won't always be the case.
 */
static void unmap_process(struct proc *p)
{
    if (p != mapped_proc) return;

    /*
     * Remove mapping for a user process
     */
    if (SunOS(SYS_munmap, p->p_map[T].mem_vir << CLICK_SHIFT,
	      p->p_map[T].mem_len << CLICK_SHIFT) == -1) {
	panic("unmap_process: text segment unmapping failed", errno);
    }
	    
    if (SunOS(SYS_munmap, p->p_map[D].mem_vir << CLICK_SHIFT,
	      p->p_map[D].mem_len << CLICK_SHIFT) == -1) {
	panic("unmap_process: data segment unmapping failed", errno);
    }
    
    if (SunOS(SYS_munmap, p->p_map[S].mem_vir << CLICK_SHIFT,
	      p->p_map[S].mem_len << CLICK_SHIFT) == -1) {
	panic("unmap_process: stack segment unmapping failed", errno);
    }

    mapped_proc = 0;
}


/*
 * The following are wrappers for the various copying and zeroing functions.
 * The wrappers ensure that the areas to be read and written are actually
 * accessible.
 */

void zeroclicks(phys_clicks addr, phys_clicks numclicks)
{
    set_protect(addr << CLICK_SHIFT, numclicks << CLICK_SHIFT, PROT_WRITE);
    real_zeroclicks(addr, numclicks);
    set_protect(addr << CLICK_SHIFT, numclicks << CLICK_SHIFT, PROT_NONE);
}


void copyclicks(phys_clicks src, phys_clicks dest, phys_clicks numclicks)
{
    set_protect(src << CLICK_SHIFT, numclicks << CLICK_SHIFT, PROT_READ);
    set_protect(dest << CLICK_SHIFT, numclicks << CLICK_SHIFT, PROT_WRITE);
    real_copyclicks(src, dest, numclicks);
    set_protect(src << CLICK_SHIFT, numclicks << CLICK_SHIFT, PROT_NONE);
    set_protect(dest << CLICK_SHIFT, numclicks << CLICK_SHIFT, PROT_NONE);
}


void phys_copy(phys_bytes src, phys_bytes dest, phys_bytes len)
{
    set_protect(src, len, PROT_READ);
    set_protect(dest, len, PROT_WRITE);
    real_phys_copy(src, dest, len);
    set_protect(src, len, PROT_NONE);
    set_protect(dest, len, PROT_NONE);
}
