/*
 * malloc.c --- a general purpose kernel memory allocator for Linux.
 *
 * Written by Theodore Ts'o (tytso@mit.edu), 11/29/91
 *
 * This routine is written to be as fast as possible, so that it
 * can be called from the interrupt level.
 *
 * Limitations: maximum size of memory we can allocate using this routine
 *	is 4k, the size of a page in Linux.
 *
 * The general game plan is that each page (called a bucket) will only hold
 * objects of a given size.  When all of the object on a page are released,
 * the page can be returned to the general free pool.  When kmalloc() is
 * called, it looks for the smallest bucket size which will fulfill its
 * request, and allocate a piece of memory from that bucket pool.
 *
 * Each bucket has as its control block a bucket descriptor which keeps
 * track of how many objects are in use on that page, and the free list
 * for that page.  Like the buckets themselves, bucket descriptors are
 * stored on pages requested from get_free_page().  However, unlike buckets,
 * pages devoted to bucket descriptor pages are never released back to the
 * system.  Fortunately, a system should probably only need 1 or 2 bucket
 * descriptor pages, since a page can hold 256 bucket descriptors (which
 * corresponds to 1 megabyte worth of bucket pages.)  If the kernel is using
 * that much allocated memory, it's probably doing something wrong.  :-)
 *
 * Note: kmalloc() and kfree() both call get_free_page() and free_page()
 *	in sections of code where interrupts are turned off, to allow
 *	kmalloc() and kfree() to be safely called from an interrupt routine.
 *	(We will probably need this functionality when networking code,
 *	particularily things like NFS, is added to Linux.)  However, this
 *	presumes that get_free_page() and free_page() are interrupt-level
 *	safe, which they may not be once paging is added.  If this is the
 *	case, we will need to modify kmalloc() to keep a few unused pages
 *	"pre-allocated" so that it can safely draw upon those pages if
 * 	it is called from an interrupt routine.
 *
 * 	Another concern is that get_free_page() should not sleep; if it
 *	does, the code is carefully ordered so as to avoid any race
 *	conditions.  The catch is that if kmalloc() is called re-entrantly,
 *	there is a chance that unecessary pages will be grabbed from the
 *	system.  Except for the pages for the bucket descriptor page, the
 *	extra pages will eventually get released back to the system, though,
 *	so it isn't all that bad.
 */

/* I'm going to modify it to keep some free pages around.  Get free page
   can sleep, and tcp/ip needs to call kmalloc at interrupt time  (Or keep
   big buffers around for itself.)  I guess I'll have return from
   syscall fill up the free page descriptors. -RAB */

/* since the advent of GFP_ATOMIC, I've changed the kmalloc code to
   use it and return NULL if it can't get a page. -RAB  */
/* (mostly just undid the previous changes -RAB) */

/* I've added the priority argument to kmalloc so routines can
   sleep on memory if they want. - RAB */

/* I've also got to make sure that kmalloc is reentrant now. */

/* Debugging support: add file/line info, add beginning+end markers. -M.U- */

#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>

#include <asm/system.h>

struct bucket_desc {	/* 16 bytes */
	void			*page;
	struct bucket_desc	*next;
	void			*freeptr;
	unsigned short		refcnt;
	unsigned short		bucket_size;
};

struct _bucket_dir {	/* 8 bytes */
	unsigned int		size;
	struct bucket_desc	*chain;
};

#ifdef CONFIG_DEBUG_MALLOC

struct hdr_start {
	const char *file;
	const char *ok_file;
	unsigned short line;
	unsigned short ok_line;
	unsigned short size;
	int magic;
};
struct hdr_end {
	int magic;
};

#define DEB_MAGIC_FREE  0x13579BDF /* free block */
#define DEB_MAGIC_ALLOC 0x2468ACE0 /* allocated block */
#define DEB_MAGIC_USED  0x147AD036 /* allocated but bad */
#define DEB_MAGIC_FREED 0x258BE169 /* free but abused */

#define DEB_MAGIC_END   0x369CF258 /* end marker */

#endif
/*
 * The following is the where we store a pointer to the first bucket
 * descriptor for a given size.
 *
 * If it turns out that the Linux kernel allocates a lot of objects of a
 * specific size, then we may want to add that specific size to this list,
 * since that will allow the memory to be allocated more efficiently.
 * However, since an entire page must be dedicated to each specific size
 * on this list, some amount of temperance must be exercised here.
 *
 * Note that this list *must* be kept in order.
 */
struct _bucket_dir bucket_dir[] = {
#ifndef CONFIG_DEBUG_MALLOC /* Debug headers have too much overhead */
	{ 16,	(struct bucket_desc *) 0},
#endif
	{ 32,	(struct bucket_desc *) 0},
	{ 64,	(struct bucket_desc *) 0},
	{ 128,	(struct bucket_desc *) 0},
	{ 256,	(struct bucket_desc *) 0},
	{ 512,	(struct bucket_desc *) 0},
	{ 1024,	(struct bucket_desc *) 0},
	{ 2048, (struct bucket_desc *) 0},
	{ 4096, (struct bucket_desc *) 0},
	{ 0,    (struct bucket_desc *) 0}};   /* End of list marker */

/*
 * This contains a linked list of free bucket descriptor blocks
 */
static struct bucket_desc *free_bucket_desc = (struct bucket_desc *) 0;

/*
 * This routine initializes a bucket description page.
 */

/* It assumes it is called with interrupts on. and will
   return that way.  It also can sleep if priority != GFP_ATOMIC. */
 
static inline void init_bucket_desc(unsigned long page)
{
	struct bucket_desc *bdesc;
	int i;

	bdesc = (struct bucket_desc *) page;
	for (i = PAGE_SIZE/sizeof(struct bucket_desc); i > 1; i--) {
		bdesc->next = bdesc+1;
		bdesc++;
	}
	/*
	 * This is done last, to avoid race conditions in case
	 * get_free_page() sleeps and this routine gets called again....
	 */
	cli();
	bdesc->next = free_bucket_desc;
	free_bucket_desc = (struct bucket_desc *) page;
}

/*
 * Re-organized some code to give cleaner assembly output for easier
 * verification.. LBT
 */
#ifdef CONFIG_DEBUG_MALLOC
void *
deb_kmalloc(const char *deb_file, unsigned short deb_line,
	unsigned int len, int priority)
#else
void *
kmalloc(unsigned int len, int priority)
#endif
{
	int i;
	unsigned long		flags;
	unsigned long		page;
	struct _bucket_dir	*bdir;
	struct bucket_desc	*bdesc;
	void			*retval;

#ifdef CONFIG_DEBUG_MALLOC
	len += sizeof(struct hdr_start)+sizeof(struct hdr_end);
#endif
	/*
	 * First we search the bucket_dir to find the right bucket change
	 * for this request.
	 */

	/* The sizes are static so there is no reentry problem here. */
	bdir = bucket_dir;
	for (bdir = bucket_dir ; bdir->size < len ; bdir++) {
		if (!bdir->size)
			goto too_large;
	}

	/*
	 * Now we search for a bucket descriptor which has free space
	 */
	save_flags(flags);
	cli();			/* Avoid race conditions */
	for (bdesc = bdir->chain; bdesc != NULL; bdesc = bdesc->next)
		if (bdesc->freeptr)
			goto found_bdesc;
	/*
	 * If we didn't find a bucket with free space, then we'll
	 * allocate a new one.
	 */
	
	/*
	 * Note that init_bucket_descriptor() does its
	 * own cli() before returning, and guarantees that
	 * there is a bucket desc in the page.
	 */
	if (!free_bucket_desc) {
		restore_flags(flags);
		page = get_free_page(priority);
		if (!page)
			return NULL;
		init_bucket_desc(page);
	}
	
	bdesc = free_bucket_desc;
	free_bucket_desc = bdesc->next;
	restore_flags(flags);

	page = get_free_page(priority);

	/*
	 * Out of memory? Put the bucket descriptor back on the free list
	 */
	if (!page) {
		cli();
		bdesc->next = free_bucket_desc;
		free_bucket_desc = bdesc;
		restore_flags(flags);
		return NULL;
	}
		
	bdesc->refcnt = 0;
	bdesc->bucket_size = bdir->size;
	bdesc->page = bdesc->freeptr = (void *) page;
	
	/* Set up the chain of free objects */
	for (i=PAGE_SIZE/bdir->size; i > 0 ; i--) {
#ifdef CONFIG_DEBUG_MALLOC
		struct hdr_start *hd;
		struct hdr_end *he;
		hd = (struct hdr_start *) page;
		he = (struct hdr_end *)(page+(bdir->size-sizeof(struct hdr_end)));
		hd->magic = DEB_MAGIC_FREE;
		hd->file = hd->ok_file = "(expand)"; 
		hd->line = hd->ok_line = 0;
		hd->size = bdir->size-sizeof(struct hdr_start)-sizeof(struct hdr_end);
		he->magic = DEB_MAGIC_END;

		memset(hd+1,0xF8,hd->size);

		*((void **) (hd+1)) = (i==1) ? NULL : (void *)(page + bdir->size);
#else
		*((void **) page) = (i==1) ? NULL : (void *)(page + bdir->size);
#endif
		page += bdir->size;
	}
	
	/* turn interrupts back off for putting the
	   thing onto the chain. */
	cli();
	/* remember bdir is not changed. */
	bdesc->next = bdir->chain; /* OK, link it in! */
	bdir->chain = bdesc;

found_bdesc:
	retval = (void *) bdesc->freeptr;
#ifdef CONFIG_DEBUG_MALLOC
	bdesc->freeptr = *((void **) (((char *)retval)+sizeof(struct hdr_start)));
#else
	bdesc->freeptr = *((void **) retval);
#endif
	bdesc->refcnt++;
	restore_flags(flags);	/* OK, we're safe again */
#ifdef CONFIG_DEBUG_MALLOC
	{
		struct hdr_start *hd;
		struct hdr_end *he;

		hd = (struct hdr_start *) retval;
		retval = hd+1;
		len -= sizeof(struct hdr_start)+sizeof(struct hdr_end);
		if(hd->magic != DEB_MAGIC_FREE && hd->magic != DEB_MAGIC_FREED) {
			printk("DEB_MALLOC allocating %s block 0x%x (head 0x%x) from %s:%d, magic %x\n",
				(hd->magic == DEB_MAGIC_ALLOC) ? "nonfree" : "trashed", 
				retval,hd,deb_file,deb_line,hd->magic);
			return NULL;
		}
		if(len > hd->size || len > bdir->size-sizeof(struct hdr_start)-sizeof(struct hdr_end)) {
			printk("DEB_MALLOC got %x:%x-byte block, wanted %x, from %s:%d, last %s:%d\n",
				hd->size,bdir->size,len,hd->file,hd->line,deb_file,deb_line);
			return NULL;
		}
		{
			unsigned char *x = (unsigned char *) retval;
			unsigned short pos = 4;
			x += pos;
			while(pos < hd->size) {
				if(*x++ != 0xF8) {
					printk("DEB_MALLOC used 0x%x:%x(%x) while free, from %s:%d\n",
						retval,pos,hd->size,hd->file,hd->line);
					return NULL;
				}
				pos++;
			}
		}
		he = (struct hdr_end *)(((char *)retval)+hd->size);
		if(he->magic != DEB_MAGIC_END) {
			printk("DEB_MALLOC overran 0x%x:%d while free, from %s:%d\n",retval,hd->size,hd->file,hd->line);
		}
		memset(retval, 0xf0, len);
		he = (struct hdr_end *)(((char *)retval)+len);
		hd->file = hd->ok_file = deb_file;
		hd->line = hd->ok_line = deb_line;
		hd->size = len;
		hd->magic = DEB_MAGIC_ALLOC;
		he->magic = DEB_MAGIC_END;
	}
#endif
	return retval;

too_large:
       /* This should be changed for sizes > 1 page. */
	printk("kmalloc called with impossibly large argument (%d)\n", len);
	return NULL;
}

#ifdef CONFIG_DEBUG_MALLOC
void deb_kcheck_s(const char *deb_file, unsigned short deb_line,
	void *obj, int size)
{
	struct hdr_start *hd;
	struct hdr_end *he;

	if (!obj)
		return;
	hd = (struct hdr_start *) obj;
	hd--;

	if(hd->magic != DEB_MAGIC_ALLOC) {
		if(hd->magic != DEB_MAGIC_USED && hd->magic != DEB_MAGIC_FREED)
			printk("DEB_MALLOC Using %s block of 0x%x from %s:%d, by %s:%d, last %s:%d\n",
				(hd->magic == DEB_MAGIC_FREE)?"free":"bad",obj,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
		if(hd->magic == DEB_MAGIC_FREE)
			hd->magic = DEB_MAGIC_FREED;
		return;
	}
	if(hd->size != size) {
		if(size != 0) {
			printk("DEB_MALLOC size for 0x%x given as %d, stored %d, from %s:%d, last %s:%d\n",
				obj,size,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
		}
		size = hd->size;
	}
	he = (struct hdr_end *)(((char *)obj)+size);
	if(he->magic != DEB_MAGIC_END) {
		printk("DEB_MALLOC overran block 0x%x:%d, free at %s:%d\n",obj,hd->size,deb_file,deb_line);
		hd->magic = DEB_MAGIC_USED;
		return;
	}
	hd->ok_file = deb_file;
	hd->ok_line = deb_line;
}
#endif

/*
 * Here is the kfree routine.  If you know the size of the object that you
 * are freeing, then kfree_s() will use that information to speed up the
 * search for the bucket descriptor.
 *
 * We will #define a macro so that "kfree(x)" is becomes "kfree_s(x, 0)"
 */
#ifdef CONFIG_DEBUG_MALLOC
void deb_kfree_s(const char *deb_file, unsigned short deb_line,
	void *obj, int size)
#else
void kfree_s(void *obj, int size)
#endif
{
	unsigned long		flags;
	void			*page;
	struct _bucket_dir	*bdir;
	struct bucket_desc	*bdesc, *prev;

	if (!obj)
		return;
#ifdef CONFIG_DEBUG_MALLOC
	{
		struct hdr_start *hd;
		struct hdr_end *he;
		hd = (struct hdr_start *) obj;
		hd--;

		if(hd->magic != DEB_MAGIC_ALLOC && hd->magic != DEB_MAGIC_USED) {
			if(hd->magic != DEB_MAGIC_FREED)
				printk("DEB_MALLOC %s free of 0x%x from %s:%d by %s:%d, last %s:%d\n",
					(hd->magic == DEB_MAGIC_FREE)?"dup":"bad",
					obj,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
			return;
		}
		if(hd->size != size) {
			if(size != 0) {
				if(hd->magic != DEB_MAGIC_USED)
					printk("DEB_MALLOC size for 0x%x given as %d, stored %d, from %s:%d, last %s:%d\n",
						obj,size,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
			}
			size = hd->size;
		}
		he = (struct hdr_end *)(((char *)obj)+size);
		if(he->magic != DEB_MAGIC_END) {
			if(hd->magic != DEB_MAGIC_USED)
				printk("DEB_MALLOC overran block 0x%x:%d, free at %s:%d, last %s:%d\n",
					obj,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
			return;
		}
		size += sizeof(struct hdr_start)+sizeof(struct hdr_end);
	}
#endif
	save_flags(flags);
	/* Calculate what page this object lives in */
	page = (void *)  ((unsigned long) obj & 0xfffff000);

	/* Now search the buckets looking for that page */
	for (bdir = bucket_dir; bdir->size; bdir++) {
	    prev = 0;
	    /* If size is zero then this conditional is always true */
	    if (bdir->size >= size) {
		/* We have to turn off interrupts here because
		   we are descending the chain.  If something
		   changes it in the middle we could suddenly
		   find ourselves descending the free list.
		   I think this would only cause a memory
		   leak, but better safe than sorry. */
		cli(); /* To avoid race conditions */
		for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) {
		    if (bdesc->page == page)
			goto found;
		    prev = bdesc;
		}
	    }
	}

	restore_flags(flags);
	printk("Bad address passed to kernel kfree_s(%X, %d)\n",obj, size);
#ifdef CONFIG_DEBUG_MALLOC
	printk("Offending code: %s:%d\n",deb_file,deb_line);
#else
	printk("Offending eip: %08x\n",((unsigned long *) &obj)[-1]);
#endif
	return;

found:
	/* interrupts are off here. */
#ifdef CONFIG_DEBUG_MALLOC

	{
		struct hdr_start *hd;
		struct hdr_end *he;
		hd = (struct hdr_start *) obj;
		hd--;
		
		hd->file = deb_file;
		hd->line = deb_line;
		hd->magic = DEB_MAGIC_FREE;
		hd->size = bdir->size-sizeof(struct hdr_start)-sizeof(struct hdr_end);
		he = (struct hdr_end *)(((char *)obj)+hd->size);
		memset(obj, 0xf8, hd->size);
		he->magic = DEB_MAGIC_END;
		*((void **)obj) = bdesc->freeptr;
		obj = hd;
	}
#else
	*((void **)obj) = bdesc->freeptr;
#endif

	bdesc->freeptr = obj;
	bdesc->refcnt--;
	if (bdesc->refcnt == 0) {
		/*
		 * We need to make sure that prev is still accurate.  It
		 * may not be, if someone rudely interrupted us....
		 */
		if ((prev && (prev->next != bdesc)) ||
		    (!prev && (bdir->chain != bdesc)))
			for (prev = bdir->chain; prev; prev = prev->next)
				if (prev->next == bdesc)
					break;
		if (prev)
			prev->next = bdesc->next;
		else {
			if (bdir->chain != bdesc)
				panic("kmalloc bucket chains corrupted");
			bdir->chain = bdesc->next;
		}
		bdesc->next = free_bucket_desc;
		free_bucket_desc = bdesc;
		free_page((unsigned long) bdesc->page);
	}
	restore_flags(flags);
	return;
}

#ifdef CONFIG_DEBUG_MALLOC
int get_malloc(char *buffer)
{
	int len = 0;
	int i;
	unsigned long		flags;
	void			*page;
	struct _bucket_dir	*bdir;
	struct bucket_desc	*bdesc;

	save_flags(flags);
	cli(); /* To avoid race conditions */
	for (bdir = bucket_dir; bdir->size; bdir++) {
		for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) {
			page = bdesc->page;
			for (i=PAGE_SIZE/bdir->size; i > 0 ; i--) {
				struct hdr_start *hd;
				hd = (struct hdr_start *)page;
				if(hd->magic == DEB_MAGIC_ALLOC) {
					if(len > PAGE_SIZE-80) {
						restore_flags(flags);
						len += sprintf(buffer+len,"+++\n");
						return len;
					}
					len += sprintf(buffer+len,"%08x:%03x %s:%d %s:%d\n",
						(long)(page+sizeof(struct hdr_start)),hd->size,hd->file,hd->line,hd->ok_file,hd->ok_line);
				}
				page += bdir->size;
			}
		}
	}

	restore_flags(flags);
	return len;
}
#endif