diff -uprN linux-2.6.16/COPYING.SWsoft linux-2.6.16.ovz/COPYING.SWsoft
--- linux-2.6.16/COPYING.SWsoft	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/COPYING.SWsoft	2006-07-05 08:34:55.000000000 -0400
@@ -0,0 +1,350 @@
+
+Nothing in this license should be construed as a grant by SWsoft of any rights
+beyond the rights specified in the GNU General Public License, and nothing in
+this license should be construed as a waiver by SWsoft of its patent, copyright
+and/or trademark rights, beyond the waiver required by the GNU General Public
+License. This license is expressly inapplicable to any product that is not
+within the scope of the GNU General Public License
+
+----------------------------------------
+
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff -uprN linux-2.6.16/Documentation/dvb/get_dvb_firmware linux-2.6.16.ovz/Documentation/dvb/get_dvb_firmware
--- linux-2.6.16/Documentation/dvb/get_dvb_firmware	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/Documentation/dvb/get_dvb_firmware	2006-07-05 08:34:55.000000000 -0400
@@ -240,9 +240,9 @@ sub dibusb {
 }
 
 sub nxt2002 {
-    my $sourcefile = "Broadband4PC_4_2_11.zip";
+    my $sourcefile = "Technisat_DVB-PC_4_4_COMPACT.zip";
     my $url = "http://www.bbti.us/download/windows/$sourcefile";
-    my $hash = "c6d2ea47a8f456d887ada0cfb718ff2a";
+    my $hash = "476befae8c7c1bb9648954060b1eec1f";
     my $outfile = "dvb-fe-nxt2002.fw";
     my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
 
@@ -250,8 +250,8 @@ sub nxt2002 {
 
     wgetfile($sourcefile, $url);
     unzip($sourcefile, $tmpdir);
-    verify("$tmpdir/SkyNETU.sys", $hash);
-    extract("$tmpdir/SkyNETU.sys", 375832, 5908, $outfile);
+    verify("$tmpdir/SkyNET.sys", $hash);
+    extract("$tmpdir/SkyNET.sys", 331624, 5908, $outfile);
 
     $outfile;
 }
diff -uprN linux-2.6.16/Documentation/vsched.txt linux-2.6.16.ovz/Documentation/vsched.txt
--- linux-2.6.16/Documentation/vsched.txt	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/Documentation/vsched.txt	2006-07-05 08:34:55.000000000 -0400
@@ -0,0 +1,83 @@
+Copyright (C) 2005 SWsoft. All rights reserved.
+Licensing governed by "linux/COPYING.SWsoft" file.
+
+Hierarchical CPU schedulers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Hierarchical CPU scheduler is a stack of CPU schedulers which allows
+to organize different policies of scheduling in the system and/or between
+groups of processes.
+
+Virtuozzo uses a hierarchical Fair CPU scheduler organized as a 2-stage
+CPU scheduler, where the scheduling decisions are made in 2 steps:
+1. On the first step Fair CPU scheduler selects a group of processes
+  which should get some CPU time.
+2. Then standard Linux scheduler chooses a process inside the group.
+Such scheduler efficiently allows to isolate one group of processes
+from another and still allows a group to use more than 1 CPU on SMP systems.
+
+This document describes a new middle layer of Virtuozzo hierarchical CPU
+scheduler which makes decisions after Fair scheduler, but before Linux
+scheduler and which is called VCPU scheduler.
+
+
+Where VCPU scheduler comes from?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Existing hierarchical CPU scheduler uses isolated algorithms on each stage
+of decision making, i.e. every scheduler makes its decisions without
+taking into account the details of other schedulers. This can lead to a number
+of problems described below.
+
+On SMP systems there are possible situations when the first CPU scheduler
+in the hierarchy (e.g. Fair scheduler) wants to schedule some group of
+processes on the physical CPU, but the underlying process scheduler
+(e.g. Linux O(1) CPU scheduler) is unable to schedule any processes
+on this physical CPU. Usually this happens due to the fact that Linux
+kernel scheduler uses per-physical CPU runqueues.
+
+Another problem is that Linux scheduler also knows nothing about
+Fair scheduler and can't balance efficiently without taking into account
+statistics about process groups from Fair scheduler. Without such
+statistics Linux scheduler can concentrate all processes on one physical
+CPU, thus making CPU consuming highly inefficient.
+
+VCPU scheduler solves these problems by adding a new layer between
+Fair schedule and Linux scheduler.
+
+VCPU scheduler
+~~~~~~~~~~~~~~
+
+VCPU scheduler is a CPU scheduler which splits notion of
+physical and virtual CPUs (VCPU and PCPU). This means that tasks are
+running on virtual CPU runqueues, while VCPUs are running on PCPUs.
+
+The Virtuozzo hierarchical fair scheduler becomes 3 stage CPU scheduler:
+1. First, Fair CPU scheduler select a group of processes.
+2. Then VCPU scheduler select a virtual CPU to run (this is actually
+  a runqueue).
+3. Standard Linux scheduler chooses a process from the runqueue.
+
+For example on the picture below PCPU0 executes tasks from
+VCPU1 runqueue and PCPU1 is idle:
+
+   virtual          |         physical       |          virtual
+  idle CPUs         |           CPUs         |           CPUS
+--------------------|------------------------|--------------------------
+                    |                        |     -----------------
+                    |                        |    | virtual sched X |
+                    |                        |    |   -----------   |
+                    |                        |    |  |   VCPU0   |  |
+                    |                        |    |   -----------   |
+ ------------       |        -----------          |   -----------   |
+| idle VCPU0 |      |       |   PCPU0   |  <--->  |  |   VCPU1   |  |
+ ------------       |        -----------          |   -----------   |
+                    |                        |     -----------------
+                    |                        |
+                    |                        |     -----------------
+                    |                        |    | virtual sched Y |
+ ------------                -----------     |    |   -----------   |
+| idle VCPU1 |    <--->     |   PCPU1   |    |    |  |   VCPU0   |  |
+ ------------                -----------     |    |   -----------   |
+                    |                        |     -----------------
+                    |                        |
diff -uprN linux-2.6.16/arch/alpha/kernel/setup.c linux-2.6.16.ovz/arch/alpha/kernel/setup.c
--- linux-2.6.16/arch/alpha/kernel/setup.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/alpha/kernel/setup.c	2006-07-05 08:34:55.000000000 -0400
@@ -24,6 +24,7 @@
 #include <linux/config.h>	/* CONFIG_ALPHA_LCA etc */
 #include <linux/mc146818rtc.h>
 #include <linux/console.h>
+#include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/string.h>
@@ -477,6 +478,22 @@ page_is_ram(unsigned long pfn)
 #undef PFN_PHYS
 #undef PFN_MAX
 
+static int __init
+register_cpus(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+		register_cpu(p, i, NULL);
+	}
+	return 0;
+}
+
+arch_initcall(register_cpus);
+
 void __init
 setup_arch(char **cmdline_p)
 {
diff -uprN linux-2.6.16/arch/alpha/kernel/smp.c linux-2.6.16.ovz/arch/alpha/kernel/smp.c
--- linux-2.6.16/arch/alpha/kernel/smp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/alpha/kernel/smp.c	2006-07-05 08:34:55.000000000 -0400
@@ -439,7 +439,7 @@ setup_smp(void)
 			if ((cpu->flags & 0x1cc) == 0x1cc) {
 				smp_num_probed++;
 				/* Assume here that "whami" == index */
-				cpu_set(i, cpu_possible_map);
+				cpu_set(i, cpu_present_mask);
 				cpu->pal_revision = boot_cpu_palrev;
 			}
 
@@ -450,9 +450,8 @@ setup_smp(void)
 		}
 	} else {
 		smp_num_probed = 1;
-		cpu_set(boot_cpuid, cpu_possible_map);
+		cpu_set(boot_cpuid, cpu_present_mask);
 	}
-	cpu_present_mask = cpumask_of_cpu(boot_cpuid);
 
 	printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_mask = %lx\n",
 	       smp_num_probed, cpu_possible_map.bits[0]);
@@ -488,9 +487,8 @@ void __devinit
 smp_prepare_boot_cpu(void)
 {
 	/*
-	 * Mark the boot cpu (current cpu) as both present and online
+	 * Mark the boot cpu (current cpu) as online
 	 */ 
-	cpu_set(smp_processor_id(), cpu_present_mask);
 	cpu_set(smp_processor_id(), cpu_online_map);
 }
 
diff -uprN linux-2.6.16/arch/alpha/lib/strncpy.S linux-2.6.16.ovz/arch/alpha/lib/strncpy.S
--- linux-2.6.16/arch/alpha/lib/strncpy.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/alpha/lib/strncpy.S	2006-07-05 08:34:55.000000000 -0400
@@ -43,8 +43,8 @@ strncpy:
 
 	.align	4
 $multiword:
-	subq	$24, 1, $2	# clear the final bits in the prev word
-	or	$2, $24, $2
+	subq	$27, 1, $2	# clear the final bits in the prev word
+	or	$2, $27, $2
 	zapnot	$1, $2, $1
 	subq	$18, 1, $18
 
@@ -70,8 +70,8 @@ $multiword:
 	bne	$18, 0b
 
 1:	ldq_u	$1, 0($16)	# clear the leading bits in the final word
-	subq	$27, 1, $2
-	or	$2, $27, $2
+	subq	$24, 1, $2
+	or	$2, $24, $2
 
 	zap	$1, $2, $1
 	stq_u	$1, 0($16)
diff -uprN linux-2.6.16/arch/arm/kernel/smp.c linux-2.6.16.ovz/arch/arm/kernel/smp.c
--- linux-2.6.16/arch/arm/kernel/smp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/arm/kernel/smp.c	2006-07-05 08:34:55.000000000 -0400
@@ -197,7 +197,7 @@ int __cpuexit __cpu_disable(void)
 	local_flush_tlb_all();
 
 	read_lock(&tasklist_lock);
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (p->mm)
 			cpu_clear(cpu, p->mm->cpu_vm_mask);
 	}
diff -uprN linux-2.6.16/arch/frv/mm/mmu-context.c linux-2.6.16.ovz/arch/frv/mm/mmu-context.c
--- linux-2.6.16/arch/frv/mm/mmu-context.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/frv/mm/mmu-context.c	2006-07-05 08:34:55.000000000 -0400
@@ -181,7 +181,7 @@ int cxn_pin_by_pid(pid_t pid)
 
 	/* get a handle on the mm_struct */
 	read_lock(&tasklist_lock);
-	tsk = find_task_by_pid(pid);
+	tsk = find_task_by_pid_ve(pid);
 	if (tsk) {
 		ret = -EINVAL;
 
diff -uprN linux-2.6.16/arch/i386/Kconfig linux-2.6.16.ovz/arch/i386/Kconfig
--- linux-2.6.16/arch/i386/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/Kconfig	2006-07-05 08:34:55.000000000 -0400
@@ -216,6 +216,8 @@ config NR_CPUS
 	  This is purely to save memory - each supported CPU adds
 	  approximately eight kilobytes to the kernel image.
 
+source "kernel/Kconfig.fairsched"
+
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
 	depends on SMP
@@ -268,6 +270,14 @@ config X86_VISWS_APIC
 	depends on X86_VISWS
 	default y
 
+config NMI_WATCHDOG
+	bool "NMI Watchdog"
+	default y
+	help
+	  If you say Y here the kernel will activate NMI watchdog by default
+	  on boot. You can still activate NMI watchdog via nmi_watchdog
+	  command line option even if you say N here.
+
 config X86_MCE
 	bool "Machine Check Exception"
 	depends on !X86_VOYAGER
@@ -1071,12 +1081,16 @@ endmenu
 
 source "arch/i386/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
 
+source "kernel/ub/Kconfig"
+
 #
 # Use the generic interrupt handling code in kernel/irq/:
 #
diff -uprN linux-2.6.16/arch/i386/kernel/apic.c linux-2.6.16.ovz/arch/i386/kernel/apic.c
--- linux-2.6.16/arch/i386/kernel/apic.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/apic.c	2006-07-05 08:34:55.000000000 -0400
@@ -1177,6 +1177,7 @@ inline void smp_local_timer_interrupt(st
 fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
 {
 	int cpu = smp_processor_id();
+	struct ve_struct *ve;
 
 	/*
 	 * the NMI deadlock-detector uses this.
@@ -1193,9 +1194,11 @@ fastcall void smp_apic_timer_interrupt(s
 	 * Besides, if we don't timer interrupts ignore the global
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
+	ve = set_exec_env(get_ve0());
 	irq_enter();
 	smp_local_timer_interrupt(regs);
 	irq_exit();
+	(void)set_exec_env(ve);
 }
 
 #ifndef CONFIG_SMP
diff -uprN linux-2.6.16/arch/i386/kernel/apm.c linux-2.6.16.ovz/arch/i386/kernel/apm.c
--- linux-2.6.16/arch/i386/kernel/apm.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/apm.c	2006-07-05 08:34:55.000000000 -0400
@@ -1081,7 +1081,7 @@ static int apm_console_blank(int blank)
 			break;
 	}
 
-	if (error == APM_NOT_ENGAGED && state != APM_STATE_READY) {
+	if (error == APM_NOT_ENGAGED) {
 		static int tried;
 		int eng_error;
 		if (tried++ == 0) {
diff -uprN linux-2.6.16/arch/i386/kernel/cpu/amd.c linux-2.6.16.ovz/arch/i386/kernel/cpu/amd.c
--- linux-2.6.16/arch/i386/kernel/cpu/amd.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/cpu/amd.c	2006-07-05 08:34:55.000000000 -0400
@@ -207,6 +207,8 @@ static void __init init_amd(struct cpuin
 		set_bit(X86_FEATURE_K7, c->x86_capability); 
 		break;
 	}
+	if (c->x86 >= 6)
+		set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability);
 
 	display_cacheinfo(c);
 
diff -uprN linux-2.6.16/arch/i386/kernel/cpu/cpufreq/Kconfig linux-2.6.16.ovz/arch/i386/kernel/cpu/cpufreq/Kconfig
--- linux-2.6.16/arch/i386/kernel/cpu/cpufreq/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/cpu/cpufreq/Kconfig	2006-07-05 08:34:55.000000000 -0400
@@ -203,6 +203,7 @@ config X86_LONGRUN
 config X86_LONGHAUL
 	tristate "VIA Cyrix III Longhaul"
 	select CPU_FREQ_TABLE
+	depends on BROKEN
 	help
 	  This adds the CPUFreq driver for VIA Samuel/CyrixIII, 
 	  VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T 
diff -uprN linux-2.6.16/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c linux-2.6.16.ovz/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
--- linux-2.6.16/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c	2006-07-05 08:34:55.000000000 -0400
@@ -244,7 +244,7 @@ static int cpufreq_p4_cpu_init(struct cp
 	for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
 		if ((i<2) && (has_N44_O17_errata[policy->cpu]))
 			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
-		else if (has_N60_errata[policy->cpu] && p4clockmod_table[i].frequency < 2000000)
+		else if (has_N60_errata[policy->cpu] && ((stock_freq * i)/8) < 2000000)
 			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
 		else
 			p4clockmod_table[i].frequency = (stock_freq * i)/8;
diff -uprN linux-2.6.16/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c linux-2.6.16.ovz/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c
--- linux-2.6.16/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c	2006-07-05 08:34:55.000000000 -0400
@@ -75,7 +75,9 @@ static int speedstep_smi_ownership (void
 	__asm__ __volatile__(
 		"out %%al, (%%dx)\n"
 		: "=D" (result)
-		: "a" (command), "b" (function), "c" (0), "d" (smi_port), "D" (0), "S" (magic)
+		: "a" (command), "b" (function), "c" (0), "d" (smi_port),
+			"D" (0), "S" (magic)
+		: "memory"
 	);
 
 	dprintk("result is %x\n", result);
diff -uprN linux-2.6.16/arch/i386/kernel/cpu/mtrr/if.c linux-2.6.16.ovz/arch/i386/kernel/cpu/mtrr/if.c
--- linux-2.6.16/arch/i386/kernel/cpu/mtrr/if.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/cpu/mtrr/if.c	2006-07-05 08:34:55.000000000 -0400
@@ -392,7 +392,7 @@ static int __init mtrr_if_init(void)
 		return -ENODEV;
 
 	proc_root_mtrr =
-	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
+	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, NULL);
 	if (proc_root_mtrr) {
 		proc_root_mtrr->owner = THIS_MODULE;
 		proc_root_mtrr->proc_fops = &mtrr_fops;
diff -uprN linux-2.6.16/arch/i386/kernel/dmi_scan.c linux-2.6.16.ovz/arch/i386/kernel/dmi_scan.c
--- linux-2.6.16/arch/i386/kernel/dmi_scan.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/dmi_scan.c	2006-07-05 08:34:55.000000000 -0400
@@ -106,7 +106,7 @@ static void __init dmi_save_devices(stru
 	struct dmi_device *dev;
 
 	for (i = 0; i < count; i++) {
-		char *d = ((char *) dm) + (i * 2);
+		char *d = (char *)(dm + 1) + (i * 2);
 
 		/* Skip disabled device */
 		if ((*d & 0x80) == 0)
diff -uprN linux-2.6.16/arch/i386/kernel/irq.c linux-2.6.16.ovz/arch/i386/kernel/irq.c
--- linux-2.6.16/arch/i386/kernel/irq.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/irq.c	2006-07-05 08:34:55.000000000 -0400
@@ -59,7 +59,9 @@ fastcall unsigned int do_IRQ(struct pt_r
 	union irq_ctx *curctx, *irqctx;
 	u32 *isp;
 #endif
+	struct ve_struct *ve;
 
+	ve = set_exec_env(get_ve0());
 	irq_enter();
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	/* Debugging check for stack overflow: is there less than 1KB free? */
@@ -108,6 +110,7 @@ fastcall unsigned int do_IRQ(struct pt_r
 		__do_IRQ(irq, regs);
 
 	irq_exit();
+	(void)set_exec_env(ve);
 
 	return 1;
 }
diff -uprN linux-2.6.16/arch/i386/kernel/ldt.c linux-2.6.16.ovz/arch/i386/kernel/ldt.c
--- linux-2.6.16/arch/i386/kernel/ldt.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/ldt.c	2006-07-05 08:34:55.000000000 -0400
@@ -13,6 +13,7 @@
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -20,6 +21,8 @@
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 
+#include <ub/ub_mem.h>
+
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
 {
@@ -39,9 +42,9 @@ static int alloc_ldt(mm_context_t *pc, i
 	oldsize = pc->size;
 	mincount = (mincount+511)&(~511);
 	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+		newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
 	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+		newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
 
 	if (!newldt)
 		return -ENOMEM;
@@ -105,6 +108,7 @@ int init_new_context(struct task_struct 
 	}
 	return retval;
 }
+EXPORT_SYMBOL_GPL(init_new_context);
 
 /*
  * No need to lock the MM as we are the last user
@@ -251,3 +255,5 @@ asmlinkage int sys_modify_ldt(int func, 
 	}
 	return ret;
 }
+
+EXPORT_SYMBOL_GPL(default_ldt);
diff -uprN linux-2.6.16/arch/i386/kernel/nmi.c linux-2.6.16.ovz/arch/i386/kernel/nmi.c
--- linux-2.6.16/arch/i386/kernel/nmi.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/nmi.c	2006-07-05 08:34:55.000000000 -0400
@@ -32,7 +32,13 @@
 
 #include "mach_traps.h"
 
-unsigned int nmi_watchdog = NMI_NONE;
+#ifdef CONFIG_NMI_WATCHDOG
+#define NMI_DEFAULT NMI_IO_APIC
+#else
+#define NMI_DEFAULT NMI_NONE
+#endif
+
+unsigned int nmi_watchdog = NMI_DEFAULT;
 extern int unknown_nmi_panic;
 static unsigned int nmi_hz = HZ;
 static unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
@@ -521,7 +527,22 @@ void touch_nmi_watchdog (void)
 
 extern void die_nmi(struct pt_regs *, const char *msg);
 
-void nmi_watchdog_tick (struct pt_regs * regs)
+void smp_show_regs(struct pt_regs *regs, void *info)
+{
+	static DEFINE_SPINLOCK(show_regs_lock);
+
+	if (regs == NULL)
+		return;
+
+	bust_spinlocks(1);
+	spin_lock(&show_regs_lock);
+	printk("----------- IPI show regs -----------");
+	show_regs(regs);
+	spin_unlock(&show_regs_lock);
+	bust_spinlocks(0);
+}
+
+void nmi_watchdog_tick(struct pt_regs *regs)
 {
 
 	/*
diff -uprN linux-2.6.16/arch/i386/kernel/process.c linux-2.6.16.ovz/arch/i386/kernel/process.c
--- linux-2.6.16/arch/i386/kernel/process.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/process.c	2006-07-05 08:34:55.000000000 -0400
@@ -59,6 +59,7 @@
 #include <asm/cpu.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+EXPORT_SYMBOL_GPL(ret_from_fork);
 
 static int hlt_counter;
 
@@ -289,11 +290,15 @@ __setup("idle=", idle_setup);
 void show_regs(struct pt_regs * regs)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+	extern int die_counter;
 
 	printk("\n");
-	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
-	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
-	print_symbol("EIP is at %s\n", regs->eip);
+	printk("Pid: %d, comm: %20s, oopses: %d\n",
+			current->pid, current->comm, die_counter);
+	printk("EIP: %04x:[<%08lx>] CPU: %d, VCPU: %d:%d\n",0xffff & regs->xcs,regs->eip, smp_processor_id(),
+			task_vsched_id(current), task_cpu(current));
+	if (decode_call_traces)
+		print_symbol("EIP is at %s\n", regs->eip);
 
 	if (user_mode(regs))
 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
@@ -314,6 +319,8 @@ void show_regs(struct pt_regs * regs)
 	cr4 = read_cr4_safe();
 	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
 	show_trace(NULL, &regs->esp);
+	if (!decode_call_traces)
+		printk(" EIP: [<%08lx>]\n",regs->eip);
 }
 
 /*
@@ -339,6 +346,13 @@ int kernel_thread(int (*fn)(void *), voi
 {
 	struct pt_regs regs;
 
+	/* Don't allow kernel_thread() inside VE */
+	if (!ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside VE\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 
 	regs.ebx = (unsigned long) fn;
diff -uprN linux-2.6.16/arch/i386/kernel/ptrace.c linux-2.6.16.ovz/arch/i386/kernel/ptrace.c
--- linux-2.6.16/arch/i386/kernel/ptrace.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/ptrace.c	2006-07-05 08:34:55.000000000 -0400
@@ -706,7 +706,9 @@ int do_syscall_trace(struct pt_regs *reg
 	/* the 0x80 provides a way for the tracing parent to distinguish
 	   between a syscall stop and SIGTRAP delivery */
 	/* Note that the debugger could change the result of test_thread_flag!*/
+	set_pn_state(current, entryexit ? PN_STOP_LEAVE : PN_STOP_ENTRY);
 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
+	clear_pn_state(current);
 
 	/*
 	 * this isn't the same as continuing with a signal, but it will do
diff -uprN linux-2.6.16/arch/i386/kernel/signal.c linux-2.6.16.ovz/arch/i386/kernel/signal.c
--- linux-2.6.16/arch/i386/kernel/signal.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/signal.c	2006-07-05 08:34:55.000000000 -0400
@@ -582,7 +582,7 @@ static void fastcall do_signal(struct pt
 	if (!user_mode(regs))
 		return;
 
-	if (try_to_freeze())
+	if (try_to_freeze() && !signal_pending(current))
 		goto no_signal;
 
 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
diff -uprN linux-2.6.16/arch/i386/kernel/smp.c linux-2.6.16.ovz/arch/i386/kernel/smp.c
--- linux-2.6.16/arch/i386/kernel/smp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/smp.c	2006-07-05 08:34:55.000000000 -0400
@@ -21,6 +21,7 @@
 #include <linux/cpu.h>
 #include <linux/module.h>
 
+#include <asm/nmi.h>
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
 #include <mach_apic.h>
@@ -566,6 +567,89 @@ int smp_call_function (void (*func) (voi
 }
 EXPORT_SYMBOL(smp_call_function);
 
+static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
+static struct nmi_call_data_struct {
+	smp_nmi_function func;
+	void *info;
+	atomic_t started;
+	atomic_t finished;
+	cpumask_t cpus_called;
+	int wait;
+} *nmi_call_data;
+
+static int smp_nmi_callback(struct pt_regs * regs, int cpu)
+{
+	smp_nmi_function func;
+	void *info;
+	int wait;
+
+	func = nmi_call_data->func;
+	info = nmi_call_data->info;
+	wait = nmi_call_data->wait;
+	ack_APIC_irq();
+	/* prevent from calling func() multiple times */
+	if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
+		return 0;
+	/*
+	 * notify initiating CPU that I've grabbed the data and am
+	 * about to execute the function
+	 */
+	mb();
+	atomic_inc(&nmi_call_data->started);
+	/* at this point the nmi_call_data structure is out of scope */
+	irq_enter();
+	func(regs, info);
+	irq_exit();
+	if (wait)
+		atomic_inc(&nmi_call_data->finished);
+
+	return 0;
+}
+
+/*
+ * This function tries to call func(regs, info) on each cpu.
+ * Func must be fast and non-blocking.
+ * May be called with disabled interrupts and from any context.
+ */
+int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
+{
+	struct nmi_call_data_struct data;
+	int cpus;
+
+	cpus = num_online_cpus() - 1;
+	if (!cpus)
+		return 0;
+
+	data.func = func;
+	data.info = info;
+	data.wait = wait;
+	atomic_set(&data.started, 0);
+	atomic_set(&data.finished, 0);
+	cpus_clear(data.cpus_called);
+	/* prevent this cpu from calling func if NMI happens */
+	cpu_set(smp_processor_id(), data.cpus_called);
+
+	if (!spin_trylock(&nmi_call_lock))
+		return -1;
+
+	nmi_call_data = &data;
+	set_nmi_ipi_callback(smp_nmi_callback);
+	mb();
+
+	/* Send a message to all other CPUs and wait for them to respond */
+	send_IPI_allbutself(APIC_DM_NMI);
+	while (atomic_read(&data.started) != cpus)
+		barrier();
+
+	unset_nmi_ipi_callback();
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			barrier();
+	spin_unlock(&nmi_call_lock);
+
+	return 0;
+}
+
 static void stop_this_cpu (void * dummy)
 {
 	/*
diff -uprN linux-2.6.16/arch/i386/kernel/smpboot.c linux-2.6.16.ovz/arch/i386/kernel/smpboot.c
--- linux-2.6.16/arch/i386/kernel/smpboot.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/smpboot.c	2006-07-05 08:34:55.000000000 -0400
@@ -317,6 +317,10 @@ static void __init synchronize_tsc_bp (v
 	}
 	if (!buggy)
 		printk("passed.\n");
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+#endif
 }
 
 static void __init synchronize_tsc_ap (void)
@@ -342,6 +346,10 @@ static void __init synchronize_tsc_ap (v
 		atomic_inc(&tsc_count_stop);
 		while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
 	}
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+#endif
 }
 #undef NR_LOOPS
 
@@ -908,6 +916,13 @@ static int __devinit do_boot_cpu(int api
 	if (IS_ERR(idle))
 		panic("failed fork for CPU %d", cpu);
 	idle->thread.eip = (unsigned long) start_secondary;
+
+#ifdef CONFIG_VE
+	/* Cosmetic: sleep_time won't be changed afterwards for the idle
+	* thread;  keep it 0 rather than -cycles. */
+	VE_TASK_INFO(idle)->sleep_time = 0;
+#endif
+
 	/* start_eip had better be page-aligned! */
 	start_eip = setup_trampoline();
 
diff -uprN linux-2.6.16/arch/i386/kernel/sys_i386.c linux-2.6.16.ovz/arch/i386/kernel/sys_i386.c
--- linux-2.6.16/arch/i386/kernel/sys_i386.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/sys_i386.c	2006-07-05 08:34:55.000000000 -0400
@@ -217,7 +217,7 @@ asmlinkage int sys_uname(struct old_utsn
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err=copy_to_user(name, &ve_utsname, sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
@@ -233,15 +233,15 @@ asmlinkage int sys_olduname(struct oldol
   
   	down_read(&uts_sem);
 	
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+	error = __copy_to_user(name->sysname,ve_utsname.sysname,__OLD_UTS_LEN);
 	error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->nodename,ve_utsname.nodename,__OLD_UTS_LEN);
 	error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->release,ve_utsname.release,__OLD_UTS_LEN);
 	error |= __put_user(0,name->release+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->version,ve_utsname.version,__OLD_UTS_LEN);
 	error |= __put_user(0,name->version+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->machine,ve_utsname.machine,__OLD_UTS_LEN);
 	error |= __put_user(0,name->machine+__OLD_UTS_LEN);
 	
 	up_read(&uts_sem);
diff -uprN linux-2.6.16/arch/i386/kernel/syscall_table.S linux-2.6.16.ovz/arch/i386/kernel/syscall_table.S
--- linux-2.6.16/arch/i386/kernel/syscall_table.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/syscall_table.S	2006-07-05 08:34:55.000000000 -0400
@@ -310,3 +310,21 @@ ENTRY(sys_call_table)
 	.long sys_pselect6
 	.long sys_ppoll
 	.long sys_unshare		/* 310 */
+
+	.rept 500-(.-sys_call_table)/4
+	.long sys_ni_syscall
+	.endr
+	.long sys_fairsched_mknod	/* 500 */
+	.long sys_fairsched_rmnod
+	.long sys_fairsched_chwt
+	.long sys_fairsched_mvpr
+	.long sys_fairsched_rate
+
+	.rept 510-(.-sys_call_table)/4
+	.long sys_ni_syscall
+	.endr
+
+	.long sys_getluid		/* 510 */
+	.long sys_setluid
+	.long sys_setublimit
+	.long sys_ubstat
diff -uprN linux-2.6.16/arch/i386/kernel/timers/timer_tsc.c linux-2.6.16.ovz/arch/i386/kernel/timers/timer_tsc.c
--- linux-2.6.16/arch/i386/kernel/timers/timer_tsc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/timers/timer_tsc.c	2006-07-05 08:34:55.000000000 -0400
@@ -94,7 +94,7 @@ static int count2; /* counter for mark_o
  * Equal to 2^32 * (1 / (clocks per usec) ).
  * Initialized in time_init.
  */
-static unsigned long fast_gettimeoffset_quotient;
+unsigned long fast_gettimeoffset_quotient;
 
 static unsigned long get_offset_tsc(void)
 {
diff -uprN linux-2.6.16/arch/i386/kernel/traps.c linux-2.6.16.ovz/arch/i386/kernel/traps.c
--- linux-2.6.16/arch/i386/kernel/traps.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/traps.c	2006-07-05 08:34:55.000000000 -0400
@@ -116,8 +116,10 @@ static void print_addr_and_symbol(unsign
 {
 	printk(log_lvl);
 	printk(" [<%08lx>] ", addr);
-	print_symbol("%s", addr);
-	printk("\n");
+	if (decode_call_traces) {
+		print_symbol("%s", addr);
+		printk("\n");
+	}
 }
 
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
@@ -167,7 +169,10 @@ static void show_trace_log_lvl(struct ta
 		if (!stack)
 			break;
 		printk(log_lvl);
-		printk(" =======================\n");
+		if (decode_call_traces)
+			printk(" =======================\n");
+		else
+			printk(" =<ctx>= ");
 	}
 }
 
@@ -203,8 +208,13 @@ static void show_stack_log_lvl(struct ta
 	}
 	printk("\n");
 	printk(log_lvl);
-	printk("Call Trace:\n");
+	if (decode_call_traces)
+		printk("Call Trace:\n");
+	else
+		printk("Call Trace: ");
 	show_trace_log_lvl(task, esp, log_lvl);
+	if (!decode_call_traces)
+		printk("\n");
 }
 
 void show_stack(struct task_struct *task, unsigned long *esp)
@@ -220,6 +230,8 @@ void dump_stack(void)
 	unsigned long stack;
 
 	show_trace(current, &stack);
+	if (!decode_call_traces)
+		printk("\n");
 }
 
 EXPORT_SYMBOL(dump_stack);
@@ -239,9 +251,10 @@ void show_registers(struct pt_regs *regs
 		ss = regs->xss & 0xffff;
 	}
 	print_modules();
-	printk(KERN_EMERG "CPU:    %d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
+	printk(KERN_EMERG "CPU:    %d, VCPU: %d:%d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
 			"EFLAGS: %08lx   (%s %.*s) \n",
-		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
+		smp_processor_id(), task_vsched_id(current), task_cpu(current),
+		0xffff & regs->xcs, regs->eip,
 		print_tainted(), regs->eflags, system_utsname.release,
 		(int)strcspn(system_utsname.version, " "),
 		system_utsname.version);
@@ -252,8 +265,11 @@ void show_registers(struct pt_regs *regs
 		regs->esi, regs->edi, regs->ebp, esp);
 	printk(KERN_EMERG "ds: %04x   es: %04x   ss: %04x\n",
 		regs->xds & 0xffff, regs->xes & 0xffff, ss);
-	printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)",
-		current->comm, current->pid, current_thread_info(), current);
+	printk(KERN_EMERG "Process %s (pid: %d, veid=%d, threadinfo=%p task=%p)",
+		current->comm, current->pid,
+		VEID(VE_TASK_INFO(current)->owner_env),
+		current_thread_info(), current);
+
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
@@ -299,9 +315,9 @@ static void handle_BUG(struct pt_regs *r
 		goto no_bug;
 	if (ud2 != 0x0b0f)
 		goto no_bug;
-	if (__get_user(line, (unsigned short __user *)(eip + 2)))
+	if (__get_user(line, (unsigned short __user *)(eip + 4)))
 		goto bug;
-	if (__get_user(file, (char * __user *)(eip + 4)) ||
+	if (__get_user(file, (char * __user *)(eip + 7)) ||
 		(unsigned long)file < PAGE_OFFSET || __get_user(c, file))
 		file = "<bad filename>";
 
@@ -316,6 +332,15 @@ bug:
 	printk(KERN_EMERG "Kernel BUG\n");
 }
 
+int die_counter = 0;
+
+static void inline check_kernel_csum_bug(void)
+{
+	if (kernel_text_csum_broken)
+		printk("Kernel code checksum mismatch detected %d times\n",
+				kernel_text_csum_broken);
+}
+
 /* This is gone through when something in the kernel
  * has done something bad and is about to be terminated.
 */
@@ -330,7 +355,6 @@ void die(const char * str, struct pt_reg
 		.lock_owner =		-1,
 		.lock_owner_depth =	0
 	};
-	static int die_counter;
 	unsigned long flags;
 
 	if (die.lock_owner != raw_smp_processor_id()) {
@@ -370,6 +394,7 @@ void die(const char * str, struct pt_reg
   	} else
 		printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
 
+	check_kernel_csum_bug();
 	bust_spinlocks(0);
 	die.lock_owner = -1;
 	spin_unlock_irqrestore(&die.lock, flags);
@@ -597,12 +622,27 @@ static void unknown_nmi_error(unsigned c
 	printk("Do you have a strange power saving mode enabled?\n");
 }
 
-static DEFINE_SPINLOCK(nmi_print_lock);
+/*
+ * Voyager doesn't implement these
+ */
+void __attribute__((weak)) smp_show_regs(struct pt_regs *regs, void *info)
+{
+}
+
+#ifdef CONFIG_SMP
+int __attribute__((weak))
+smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
+{
+	return 0;
+}
+#endif
 
 void die_nmi (struct pt_regs *regs, const char *msg)
 {
+	static DEFINE_SPINLOCK(nmi_print_lock);
+
 	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
-	    NOTIFY_STOP)
+			NOTIFY_STOP)
 		return;
 
 	spin_lock(&nmi_print_lock);
@@ -615,7 +655,11 @@ void die_nmi (struct pt_regs *regs, cons
 	printk(" on CPU%d, eip %08lx, registers:\n",
 		smp_processor_id(), regs->eip);
 	show_registers(regs);
-	printk(KERN_EMERG "console shuts up ...\n");
+	smp_nmi_call_function(smp_show_regs, NULL, 1);
+	bust_spinlocks(1);
+	/* current CPU messages should go bottom */
+	if (!decode_call_traces)
+		smp_show_regs(regs, NULL);
 	console_silent();
 	spin_unlock(&nmi_print_lock);
 	bust_spinlocks(0);
@@ -631,6 +675,14 @@ void die_nmi (struct pt_regs *regs, cons
 	do_exit(SIGSEGV);
 }
 
+static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+{
+	return 0;
+}
+
+static nmi_callback_t nmi_callback = dummy_nmi_callback;
+static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
+
 static void default_do_nmi(struct pt_regs * regs)
 {
 	unsigned char reason = 0;
@@ -653,6 +705,9 @@ static void default_do_nmi(struct pt_reg
 			return;
 		}
 #endif
+		if (nmi_ipi_callback != dummy_nmi_callback)
+			return;
+
 		unknown_nmi_error(reason, regs);
 		return;
 	}
@@ -669,13 +724,6 @@ static void default_do_nmi(struct pt_reg
 	reassert_nmi();
 }
 
-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
-{
-	return 0;
-}
- 
-static nmi_callback_t nmi_callback = dummy_nmi_callback;
- 
 fastcall void do_nmi(struct pt_regs * regs, long error_code)
 {
 	int cpu;
@@ -689,9 +737,20 @@ fastcall void do_nmi(struct pt_regs * re
 	if (!rcu_dereference(nmi_callback)(regs, cpu))
 		default_do_nmi(regs);
 
+	nmi_ipi_callback(regs, cpu);
 	nmi_exit();
 }
 
+void set_nmi_ipi_callback(nmi_callback_t callback)
+{
+	nmi_ipi_callback = callback;
+}
+
+void unset_nmi_ipi_callback(void)
+{
+	nmi_ipi_callback = dummy_nmi_callback;
+}
+
 void set_nmi_callback(nmi_callback_t callback)
 {
 	rcu_assign_pointer(nmi_callback, callback);
diff -uprN linux-2.6.16/arch/i386/kernel/vm86.c linux-2.6.16.ovz/arch/i386/kernel/vm86.c
--- linux-2.6.16/arch/i386/kernel/vm86.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/kernel/vm86.c	2006-07-05 08:34:55.000000000 -0400
@@ -43,6 +43,7 @@
 #include <linux/smp_lock.h>
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
+#include <linux/audit.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -252,6 +253,7 @@ out:
 static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
 {
 	struct tss_struct *tss;
+	long eax;
 /*
  * make sure the vm86() system call doesn't try to do anything silly
  */
@@ -305,13 +307,19 @@ static void do_sys_vm86(struct kernel_vm
 	tsk->thread.screen_bitmap = info->screen_bitmap;
 	if (info->flags & VM86_SCREEN_BITMAP)
 		mark_screen_rdonly(tsk->mm);
+	__asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl %eax,%gs\n\t");
+	__asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax));
+
+	/*call audit_syscall_exit since we do not exit via the normal paths */
+	if (unlikely(current->audit_context))
+		audit_syscall_exit(current, AUDITSC_RESULT(eax), eax);
+
 	__asm__ __volatile__(
-		"xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
 		"movl %0,%%esp\n\t"
 		"movl %1,%%ebp\n\t"
 		"jmp resume_userspace"
 		: /* no outputs */
-		:"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax");
+		:"r" (&info->regs), "r" (task_thread_info(tsk)));
 	/* we never return here */
 }
 
diff -uprN linux-2.6.16/arch/i386/mm/fault.c linux-2.6.16.ovz/arch/i386/mm/fault.c
--- linux-2.6.16/arch/i386/mm/fault.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/mm/fault.c	2006-07-05 08:34:55.000000000 -0400
@@ -31,32 +31,6 @@
 extern void die(const char *,struct pt_regs *,long);
 
 /*
- * Unlock any spinlocks which will prevent us from getting the
- * message out 
- */
-void bust_spinlocks(int yes)
-{
-	int loglevel_save = console_loglevel;
-
-	if (yes) {
-		oops_in_progress = 1;
-		return;
-	}
-#ifdef CONFIG_VT
-	unblank_screen();
-#endif
-	oops_in_progress = 0;
-	/*
-	 * OK, the message is on the console.  Now we call printk()
-	 * without oops_in_progress set so that printk will give klogd
-	 * a poke.  Hold onto your hats...
-	 */
-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
-	printk(" ");
-	console_loglevel = loglevel_save;
-}
-
-/*
  * Return EIP plus the CS segment base.  The segment limit is also
  * adjusted, clamped to the kernel/user address space (whichever is
  * appropriate), and returned in *eip_limit.
@@ -347,7 +321,6 @@ good_area:
 				goto bad_area;
 	}
 
- survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -485,14 +458,14 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (tsk->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
+	if (error_code & 4) {
+		/* 
+		 * 0-order allocation always success if something really 
+		 * fatal not happen: beancounter overdraft or OOM.
+		 */
+		force_sig(SIGKILL, tsk);
+		return;
 	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
-		do_exit(SIGKILL);
 	goto no_context;
 
 do_sigbus:
diff -uprN linux-2.6.16/arch/i386/mm/hugetlbpage.c linux-2.6.16.ovz/arch/i386/mm/hugetlbpage.c
--- linux-2.6.16/arch/i386/mm/hugetlbpage.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/mm/hugetlbpage.c	2006-07-05 08:34:55.000000000 -0400
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/sysctl.h>
+#include <linux/module.h>
 #include <asm/mman.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
@@ -110,6 +111,7 @@ int pmd_huge(pmd_t pmd)
 {
 	return !!(pmd_val(pmd) & _PAGE_PSE);
 }
+EXPORT_SYMBOL(pmd_huge);
 
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
diff -uprN linux-2.6.16/arch/i386/mm/init.c linux-2.6.16.ovz/arch/i386/mm/init.c
--- linux-2.6.16/arch/i386/mm/init.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/mm/init.c	2006-07-05 08:34:55.000000000 -0400
@@ -677,7 +677,7 @@ void __init pgtable_cache_init(void)
 		pmd_cache = kmem_cache_create("pmd",
 					PTRS_PER_PMD*sizeof(pmd_t),
 					PTRS_PER_PMD*sizeof(pmd_t),
-					0,
+					SLAB_UBC,
 					pmd_ctor,
 					NULL);
 		if (!pmd_cache)
@@ -686,7 +686,7 @@ void __init pgtable_cache_init(void)
 	pgd_cache = kmem_cache_create("pgd",
 				PTRS_PER_PGD*sizeof(pgd_t),
 				PTRS_PER_PGD*sizeof(pgd_t),
-				0,
+				SLAB_UBC,
 				pgd_ctor,
 				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
 	if (!pgd_cache)
diff -uprN linux-2.6.16/arch/i386/mm/pgtable.c linux-2.6.16.ovz/arch/i386/mm/pgtable.c
--- linux-2.6.16/arch/i386/mm/pgtable.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/i386/mm/pgtable.c	2006-07-05 08:34:55.000000000 -0400
@@ -5,8 +5,10 @@
 #include <linux/config.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
+#include <linux/vmalloc.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
 #include <linux/highmem.h>
@@ -64,7 +66,9 @@ void show_mem(void)
 	printk(KERN_INFO "%lu pages mapped\n", ps.nr_mapped);
 	printk(KERN_INFO "%lu pages slab\n", ps.nr_slab);
 	printk(KERN_INFO "%lu pages pagetables\n", ps.nr_page_table_pages);
+	vprintstat();
 }
+EXPORT_SYMBOL(show_mem);
 
 /*
  * Associate a virtual page frame with a given physical page frame 
@@ -159,9 +163,11 @@ struct page *pte_alloc_one(struct mm_str
 	struct page *pte;
 
 #ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_HIGHMEM|
+			__GFP_REPEAT|__GFP_ZERO, 0);
 #else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|
+			__GFP_REPEAT|__GFP_ZERO, 0);
 #endif
 	return pte;
 }
diff -uprN linux-2.6.16/arch/ia64/Kconfig linux-2.6.16.ovz/arch/ia64/Kconfig
--- linux-2.6.16/arch/ia64/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/Kconfig	2006-07-05 08:34:55.000000000 -0400
@@ -283,6 +283,8 @@ config PREEMPT
           Say Y here if you are building a kernel for a desktop, embedded
           or real-time system.  Say N if you are unsure.
 
+source "kernel/Kconfig.fairsched"
+
 source "mm/Kconfig"
 
 config ARCH_SELECT_MEMORY_MODEL
@@ -464,6 +466,10 @@ endmenu
 
 source "arch/ia64/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
+
+source "kernel/ub/Kconfig"
diff -uprN linux-2.6.16/arch/ia64/ia32/binfmt_elf32.c linux-2.6.16.ovz/arch/ia64/ia32/binfmt_elf32.c
--- linux-2.6.16/arch/ia64/ia32/binfmt_elf32.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/ia32/binfmt_elf32.c	2006-07-05 08:34:55.000000000 -0400
@@ -136,6 +136,12 @@ ia64_elf32_init (struct pt_regs *regs)
 		up_write(&current->mm->mmap_sem);
 	}
 
+	if (ub_memory_charge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
+					IA32_LDT_ENTRY_SIZE),
+				VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE,
+				NULL, UB_SOFT))
+		goto skip;
+
 	/*
 	 * Install LDT as anonymous memory.  This gives us all-zero segment descriptors
 	 * until a task modifies them via modify_ldt().
@@ -157,7 +163,12 @@ ia64_elf32_init (struct pt_regs *regs)
 			}
 		}
 		up_write(&current->mm->mmap_sem);
-	}
+	} else
+		ub_memory_uncharge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
+					IA32_LDT_ENTRY_SIZE),
+				VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE, NULL);
+
+skip:
 
 	ia64_psr(regs)->ac = 0;		/* turn off alignment checking */
 	regs->loadrs = 0;
@@ -212,9 +223,15 @@ ia32_setup_arg_pages (struct linux_binpr
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
+	ret = -ENOMEM;
+	if (ub_memory_charge(mm, IA32_STACK_TOP -
+				(PAGE_MASK & (unsigned long)bprm->p),
+				VM_STACK_FLAGS, NULL, UB_SOFT))
+		goto err_charge;
+
 	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (!mpnt)
-		return -ENOMEM;
+		goto err_alloc;
 
 	memset(mpnt, 0, sizeof(*mpnt));
 
@@ -231,11 +248,8 @@ ia32_setup_arg_pages (struct linux_binpr
 			mpnt->vm_flags = VM_STACK_FLAGS;
 		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
 					PAGE_COPY_EXEC: PAGE_COPY;
-		if ((ret = insert_vm_struct(current->mm, mpnt))) {
-			up_write(&current->mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, mpnt);
-			return ret;
-		}
+		if ((ret = insert_vm_struct(current->mm, mpnt)))
+			goto err_insert;
 		current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
 	}
 
@@ -254,6 +268,16 @@ ia32_setup_arg_pages (struct linux_binpr
 	current->thread.ppl = ia32_init_pp_list();
 
 	return 0;
+
+err_insert:
+	up_write(&current->mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, mpnt);
+err_alloc:
+	ub_memory_uncharge(mm, IA32_STACK_TOP -
+			(PAGE_MASK & (unsigned long)bprm->p),
+			VM_STACK_FLAGS, NULL);
+err_charge:
+	return ret;
 }
 
 static void
diff -uprN linux-2.6.16/arch/ia64/kernel/asm-offsets.c linux-2.6.16.ovz/arch/ia64/kernel/asm-offsets.c
--- linux-2.6.16/arch/ia64/kernel/asm-offsets.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/asm-offsets.c	2006-07-05 08:34:55.000000000 -0400
@@ -44,11 +44,21 @@ void foo(void)
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
 	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
+#ifdef CONFIG_VE
+	DEFINE(IA64_TASK_PID_OFFSET, offsetof
+			(struct task_struct, pids[PIDTYPE_PID].vnr));
+#else
 	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
+#endif
 	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
 	DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
 	DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
+#ifdef CONFIG_VE
+	DEFINE(IA64_TASK_TGID_OFFSET, offsetof
+			(struct task_struct, pids[PIDTYPE_TGID].vnr));
+#else
 	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
+#endif
 	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
 	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
 
diff -uprN linux-2.6.16/arch/ia64/kernel/entry.S linux-2.6.16.ovz/arch/ia64/kernel/entry.S
--- linux-2.6.16/arch/ia64/kernel/entry.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/entry.S	2006-07-05 08:34:55.000000000 -0400
@@ -1620,4 +1620,17 @@ sys_call_table:
 	data8 sys_ni_syscall			// 1295 reserved for ppoll
 	data8 sys_unshare
 
+.rept 1500-1297
+	data8 sys_ni_syscall
+.endr
+	data8 sys_fairsched_mknod		// 1500
+	data8 sys_fairsched_rmnod
+	data8 sys_fairsched_chwt
+	data8 sys_fairsched_mvpr
+	data8 sys_fairsched_rate
+	data8 sys_getluid			// 1505
+	data8 sys_setluid
+	data8 sys_setublimit
+	data8 sys_ubstat
+
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff -uprN linux-2.6.16/arch/ia64/kernel/fsys.S linux-2.6.16.ovz/arch/ia64/kernel/fsys.S
--- linux-2.6.16/arch/ia64/kernel/fsys.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/fsys.S	2006-07-05 08:34:55.000000000 -0400
@@ -72,6 +72,7 @@ ENTRY(fsys_getpid)
 	FSYS_RETURN
 END(fsys_getpid)
 
+#ifndef CONFIG_VE
 ENTRY(fsys_getppid)
 	.prologue
 	.altrp b6
@@ -118,6 +119,7 @@ ENTRY(fsys_getppid)
 #endif
 	FSYS_RETURN
 END(fsys_getppid)
+#endif
 
 ENTRY(fsys_set_tid_address)
 	.prologue
@@ -665,7 +667,11 @@ fsyscall_table:
 	data8 0				// chown
 	data8 0				// lseek		// 1040
 	data8 fsys_getpid		// getpid
+#ifdef CONFIG_VE
+	data8 0
+#else
 	data8 fsys_getppid		// getppid
+#endif
 	data8 0				// mount
 	data8 0				// umount
 	data8 0				// setuid		// 1045
diff -uprN linux-2.6.16/arch/ia64/kernel/irq.c linux-2.6.16.ovz/arch/ia64/kernel/irq.c
--- linux-2.6.16/arch/ia64/kernel/irq.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/irq.c	2006-07-05 08:34:55.000000000 -0400
@@ -163,7 +163,9 @@ void fixup_irqs(void)
 {
 	unsigned int irq;
 	extern void ia64_process_pending_intr(void);
+	struct ve_struct *ve;
 
+	ve = set_exec_env(get_ve0());
 	ia64_set_itv(1<<16);
 	/*
 	 * Phase 1: Locate irq's bound to this cpu and
@@ -197,5 +199,6 @@ void fixup_irqs(void)
 	 */
 	max_xtp();
 	local_irq_disable();
+	(void)set_exec_env(ve);
 }
 #endif
diff -uprN linux-2.6.16/arch/ia64/kernel/irq_ia64.c linux-2.6.16.ovz/arch/ia64/kernel/irq_ia64.c
--- linux-2.6.16/arch/ia64/kernel/irq_ia64.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/irq_ia64.c	2006-07-05 08:34:55.000000000 -0400
@@ -103,6 +103,7 @@ void
 ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 {
 	unsigned long saved_tpr;
+	struct ve_struct *ve;
 
 #if IRQ_DEBUG
 	{
@@ -139,6 +140,7 @@ ia64_handle_irq (ia64_vector vector, str
 	 * 16 (without this, it would be ~240, which could easily lead
 	 * to kernel stack overflows).
 	 */
+	ve = set_exec_env(get_ve0());
 	irq_enter();
 	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
 	ia64_srlz_d();
@@ -164,6 +166,7 @@ ia64_handle_irq (ia64_vector vector, str
 	 * come through until ia64_eoi() has been done.
 	 */
 	irq_exit();
+	(void)set_exec_env(get_ve0());
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -176,9 +179,11 @@ void ia64_process_pending_intr(void)
 	ia64_vector vector;
 	unsigned long saved_tpr;
 	extern unsigned int vectors_in_migration[NR_IRQS];
+	struct ve_struct *ve;
 
 	vector = ia64_get_ivr();
 
+	ve = set_exec_env(get_ve0());
 	 irq_enter();
 	 saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
 	 ia64_srlz_d();
@@ -210,6 +215,7 @@ void ia64_process_pending_intr(void)
 		vector = ia64_get_ivr();
 	}
 	irq_exit();
+	(void)set_exec_env(ve);
 }
 #endif
 
diff -uprN linux-2.6.16/arch/ia64/kernel/mca.c linux-2.6.16.ovz/arch/ia64/kernel/mca.c
--- linux-2.6.16/arch/ia64/kernel/mca.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/mca.c	2006-07-05 08:34:55.000000000 -0400
@@ -1241,10 +1241,10 @@ default_monarch_init_process(struct noti
 	}
 	printk("\n\n");
 	if (read_trylock(&tasklist_lock)) {
-		do_each_thread (g, t) {
+		do_each_thread_all (g, t) {
 			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
 			show_stack(t, NULL);
-		} while_each_thread (g, t);
+		} while_each_thread_all (g, t);
 		read_unlock(&tasklist_lock);
 	}
 	return NOTIFY_DONE;
diff -uprN linux-2.6.16/arch/ia64/kernel/perfmon.c linux-2.6.16.ovz/arch/ia64/kernel/perfmon.c
--- linux-2.6.16/arch/ia64/kernel/perfmon.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/perfmon.c	2006-07-05 08:34:55.000000000 -0400
@@ -2624,7 +2624,7 @@ pfm_get_task(pfm_context_t *ctx, pid_t p
 
 		read_lock(&tasklist_lock);
 
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		/* make sure task cannot go away while we operate on it */
 		if (p) get_task_struct(p);
@@ -4188,12 +4188,12 @@ pfm_check_task_exist(pfm_context_t *ctx)
 
 	read_lock(&tasklist_lock);
 
-	do_each_thread (g, t) {
+	do_each_thread_ve (g, t) {
 		if (t->thread.pfm_context == ctx) {
 			ret = 0;
 			break;
 		}
-	} while_each_thread (g, t);
+	} while_each_thread_ve (g, t);
 
 	read_unlock(&tasklist_lock);
 
diff -uprN linux-2.6.16/arch/ia64/kernel/process.c linux-2.6.16.ovz/arch/ia64/kernel/process.c
--- linux-2.6.16/arch/ia64/kernel/process.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/process.c	2006-07-05 08:34:55.000000000 -0400
@@ -109,7 +109,8 @@ show_regs (struct pt_regs *regs)
 	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
 
 	print_modules();
-	printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
+	printk("\nPid: %d, CPU %d, VCPU %d:%d, comm: %20s\n", current->pid, smp_processor_id(),
+			task_vsched_id(current), task_cpu(current), current->comm);
 	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
 	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
 	print_symbol("ip is at %s\n", ip);
@@ -681,6 +682,13 @@ kernel_thread (int (*fn)(void *), void *
 		struct pt_regs pt;
 	} regs;
 
+	/* Don't allow kernel_thread() inside VE */
+	if (!ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside VE\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
 	regs.pt.r1 = helper_fptr[1];		/* set GP */
diff -uprN linux-2.6.16/arch/ia64/kernel/ptrace.c linux-2.6.16.ovz/arch/ia64/kernel/ptrace.c
--- linux-2.6.16/arch/ia64/kernel/ptrace.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/ptrace.c	2006-07-05 08:34:55.000000000 -0400
@@ -1433,7 +1433,7 @@ sys_ptrace (long request, pid_t pid, uns
 	ret = -ESRCH;
 	read_lock(&tasklist_lock);
 	{
-		child = find_task_by_pid(pid);
+		child = find_task_by_pid_ve(pid);
 		if (child) {
 			if (peek_or_poke)
 				child = find_thread_for_addr(child, addr);
diff -uprN linux-2.6.16/arch/ia64/kernel/signal.c linux-2.6.16.ovz/arch/ia64/kernel/signal.c
--- linux-2.6.16/arch/ia64/kernel/signal.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/signal.c	2006-07-05 08:34:55.000000000 -0400
@@ -270,7 +270,7 @@ ia64_rt_sigreturn (struct sigscratch *sc
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = virt_pid(current);
 	si.si_uid = current->uid;
 	si.si_addr = sc;
 	force_sig_info(SIGSEGV, &si, current);
@@ -375,7 +375,7 @@ force_sigsegv_info (int sig, void __user
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = virt_pid(current);
 	si.si_uid = current->uid;
 	si.si_addr = addr;
 	force_sig_info(SIGSEGV, &si, current);
@@ -641,7 +641,7 @@ set_sigdelayed(pid_t pid, int signo, int
 	for (i = 1; i <= 3; ++i) {
 		switch (i) {
 		case 1:
-			t = find_task_by_pid(pid);
+			t = find_task_by_pid_ve(pid);
 			if (t)
 				start_time = start_time_ul(t);
 			break;
@@ -682,7 +682,7 @@ do_sigdelayed(void)
 	siginfo.si_code = current_thread_info()->sigdelayed.code;
 	siginfo.si_addr = current_thread_info()->sigdelayed.addr;
 	pid = current_thread_info()->sigdelayed.pid;
-	t = find_task_by_pid(pid);
+	t = find_task_by_pid_ve(pid);
 	if (!t)
 		return;
 	if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
diff -uprN linux-2.6.16/arch/ia64/kernel/traps.c linux-2.6.16.ovz/arch/ia64/kernel/traps.c
--- linux-2.6.16/arch/ia64/kernel/traps.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/traps.c	2006-07-05 08:34:55.000000000 -0400
@@ -54,34 +54,6 @@ trap_init (void)
 		fpswa_interface = __va(ia64_boot_param->fpswa);
 }
 
-/*
- * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
- * is acquired through the console unblank code)
- */
-void
-bust_spinlocks (int yes)
-{
-	int loglevel_save = console_loglevel;
-
-	if (yes) {
-		oops_in_progress = 1;
-		return;
-	}
-
-#ifdef CONFIG_VT
-	unblank_screen();
-#endif
-	oops_in_progress = 0;
-	/*
-	 * OK, the message is on the console.  Now we call printk() without
-	 * oops_in_progress set so that printk will give klogd a poke.  Hold onto
-	 * your hats...
-	 */
-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
-	printk(" ");
-	console_loglevel = loglevel_save;
-}
-
 void
 die (const char *str, struct pt_regs *regs, long err)
 {
diff -uprN linux-2.6.16/arch/ia64/kernel/unaligned.c linux-2.6.16.ovz/arch/ia64/kernel/unaligned.c
--- linux-2.6.16/arch/ia64/kernel/unaligned.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/kernel/unaligned.c	2006-07-05 08:34:55.000000000 -0400
@@ -1290,7 +1290,7 @@ within_logging_rate_limit (void)
 {
 	static unsigned long count, last_time;
 
-	if (jiffies - last_time > 5*HZ)
+	if (jiffies - last_time > 60 * HZ)
 		count = 0;
 	if (count < 5) {
 		last_time = jiffies;
diff -uprN linux-2.6.16/arch/ia64/mm/contig.c linux-2.6.16.ovz/arch/ia64/mm/contig.c
--- linux-2.6.16/arch/ia64/mm/contig.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/mm/contig.c	2006-07-05 08:34:55.000000000 -0400
@@ -64,6 +64,7 @@ show_mem (void)
 	printk("%ld pages in page table cache\n",
 		pgtable_quicklist_total_size());
 }
+EXPORT_SYMBOL(show_mem);
 
 /* physical address where the bootmem map is located */
 unsigned long bootmap_start;
diff -uprN linux-2.6.16/arch/ia64/mm/discontig.c linux-2.6.16.ovz/arch/ia64/mm/discontig.c
--- linux-2.6.16/arch/ia64/mm/discontig.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/mm/discontig.c	2006-07-05 08:34:55.000000000 -0400
@@ -594,6 +594,7 @@ void show_mem(void)
 		pgtable_quicklist_total_size());
 	printk("%d free buffer pages\n", nr_free_buffer_pages());
 }
+EXPORT_SYMBOL(show_mem);
 
 /**
  * call_pernode_memory - use SRAT to call callback functions with node info
diff -uprN linux-2.6.16/arch/ia64/mm/fault.c linux-2.6.16.ovz/arch/ia64/mm/fault.c
--- linux-2.6.16/arch/ia64/mm/fault.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/mm/fault.c	2006-07-05 08:34:55.000000000 -0400
@@ -116,7 +116,6 @@ ia64_do_page_fault (unsigned long addres
 	if ((vma->vm_flags & mask) != mask)
 		goto bad_area;
 
-  survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault, make
 	 * sure we exit gracefully rather than endlessly redo the
@@ -241,13 +240,13 @@ ia64_do_page_fault (unsigned long addres
 
   out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
-	if (user_mode(regs))
-		do_exit(SIGKILL);
+	if (user_mode(regs)) {
+		/* 
+		 * 0-order allocation always success if something really 
+		 * fatal not happen: beancounter overdraft or OOM.
+		 */
+		force_sig(SIGKILL, current);
+		return;
+	}
 	goto no_context;
 }
diff -uprN linux-2.6.16/arch/ia64/mm/init.c linux-2.6.16.ovz/arch/ia64/mm/init.c
--- linux-2.6.16/arch/ia64/mm/init.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ia64/mm/init.c	2006-07-05 08:34:55.000000000 -0400
@@ -37,6 +37,8 @@
 #include <asm/unistd.h>
 #include <asm/mca.h>
 
+#include <ub/ub_vmpages.h>
+
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
@@ -96,7 +98,7 @@ check_pgt_cache(void)
 	preempt_disable();
 	while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
 		while (pages_to_free--) {
-			free_page((unsigned long)pgtable_quicklist_alloc());
+			free_page((unsigned long)pgtable_quicklist_alloc(0));
 		}
 		preempt_enable();
 		preempt_disable();
@@ -146,6 +148,10 @@ ia64_init_addr_space (void)
 
 	ia64_set_rbs_bot();
 
+	if (ub_memory_charge(current->mm, PAGE_SIZE, VM_DATA_DEFAULT_FLAGS,
+				NULL, UB_SOFT))
+		goto skip;
+
 	/*
 	 * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
 	 * the problem.  When the process attempts to write to the register backing store
@@ -166,8 +172,11 @@ ia64_init_addr_space (void)
 			return;
 		}
 		up_write(&current->mm->mmap_sem);
-	}
+	} else
+		ub_memory_uncharge(current->mm, PAGE_SIZE,
+				VM_DATA_DEFAULT_FLAGS, NULL);
 
+skip:
 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
 		vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
diff -uprN linux-2.6.16/arch/m32r/kernel/m32r_ksyms.c linux-2.6.16.ovz/arch/m32r/kernel/m32r_ksyms.c
--- linux-2.6.16/arch/m32r/kernel/m32r_ksyms.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/m32r/kernel/m32r_ksyms.c	2006-07-05 08:34:55.000000000 -0400
@@ -38,10 +38,6 @@ EXPORT_SYMBOL(__udelay);
 EXPORT_SYMBOL(__delay);
 EXPORT_SYMBOL(__const_udelay);
 
-EXPORT_SYMBOL(__get_user_1);
-EXPORT_SYMBOL(__get_user_2);
-EXPORT_SYMBOL(__get_user_4);
-
 EXPORT_SYMBOL(strpbrk);
 EXPORT_SYMBOL(strstr);
 
diff -uprN linux-2.6.16/arch/m32r/kernel/setup.c linux-2.6.16.ovz/arch/m32r/kernel/setup.c
--- linux-2.6.16/arch/m32r/kernel/setup.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/m32r/kernel/setup.c	2006-07-05 08:34:55.000000000 -0400
@@ -9,6 +9,7 @@
 
 #include <linux/config.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/stddef.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
@@ -218,8 +219,6 @@ static unsigned long __init setup_memory
 extern unsigned long setup_memory(void);
 #endif	/* CONFIG_DISCONTIGMEM */
 
-#define M32R_PCC_PCATCR	0x00ef7014	/* will move to m32r.h */
-
 void __init setup_arch(char **cmdline_p)
 {
 	ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
@@ -268,15 +267,14 @@ void __init setup_arch(char **cmdline_p)
 	paging_init();
 }
 
-static struct cpu cpu[NR_CPUS];
+static struct cpu cpu_devices[NR_CPUS];
 
 static int __init topology_init(void)
 {
-	int cpu_id;
+	int i;
 
-	for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++)
-		if (cpu_possible(cpu_id))
-			register_cpu(&cpu[cpu_id], cpu_id, NULL);
+	for_each_present_cpu(i)
+		register_cpu(&cpu_devices[i], i, NULL);
 
 	return 0;
 }
diff -uprN linux-2.6.16/arch/m32r/kernel/smpboot.c linux-2.6.16.ovz/arch/m32r/kernel/smpboot.c
--- linux-2.6.16/arch/m32r/kernel/smpboot.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/m32r/kernel/smpboot.c	2006-07-05 08:34:55.000000000 -0400
@@ -39,8 +39,10 @@
  *		Martin J. Bligh	: 	Added support for multi-quad systems
  */
 
+#include <linux/module.h>
 #include <linux/config.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/irq.h>
@@ -72,11 +74,15 @@ physid_mask_t phys_cpu_present_map;
 
 /* Bitmask of currently online CPUs */
 cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
 
 cpumask_t cpu_bootout_map;
 cpumask_t cpu_bootin_map;
-cpumask_t cpu_callout_map;
 static cpumask_t cpu_callin_map;
+cpumask_t cpu_callout_map;
+EXPORT_SYMBOL(cpu_callout_map);
+cpumask_t cpu_possible_map = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_possible_map);
 
 /* Per CPU bogomips and other parameters */
 struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned;
@@ -110,7 +116,6 @@ static unsigned int calibration_result;
 
 void smp_prepare_boot_cpu(void);
 void smp_prepare_cpus(unsigned int);
-static void smp_tune_scheduling(void);
 static void init_ipi_lock(void);
 static void do_boot_cpu(int);
 int __cpu_up(unsigned int);
@@ -177,6 +182,9 @@ void __init smp_prepare_cpus(unsigned in
 	}
 	for (phys_id = 0 ; phys_id < nr_cpu ; phys_id++)
 		physid_set(phys_id, phys_cpu_present_map);
+#ifndef CONFIG_HOTPLUG_CPU
+	cpu_present_map = cpu_possible_map;
+#endif
 
 	show_mp_info(nr_cpu);
 
@@ -186,7 +194,6 @@ void __init smp_prepare_cpus(unsigned in
 	 * Setup boot CPU information
 	 */
 	smp_store_cpu_info(0); /* Final full version of the data */
-	smp_tune_scheduling();
 
 	/*
 	 * If SMP should be disabled, then really disable it!
@@ -230,11 +237,6 @@ smp_done:
 	Dprintk("Boot done.\n");
 }
 
-static void __init smp_tune_scheduling(void)
-{
-	/* Nothing to do. */
-}
-
 /*
  * init_ipi_lock : Initialize IPI locks.
  */
@@ -629,4 +631,3 @@ static void __init unmap_cpu_to_physid(i
 	physid_2_cpu[phys_id] = -1;
 	cpu_2_physid[cpu_id] = -1;
 }
-
diff -uprN linux-2.6.16/arch/m32r/lib/Makefile linux-2.6.16.ovz/arch/m32r/lib/Makefile
--- linux-2.6.16/arch/m32r/lib/Makefile	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/m32r/lib/Makefile	2006-07-05 08:34:55.000000000 -0400
@@ -2,6 +2,6 @@
 # Makefile for M32R-specific library files..
 #
 
-lib-y  := checksum.o ashxdi3.o memset.o memcpy.o getuser.o \
-	  putuser.o delay.o strlen.o usercopy.o csum_partial_copy.o
+lib-y  := checksum.o ashxdi3.o memset.o memcpy.o \
+	  delay.o strlen.o usercopy.o csum_partial_copy.o
 
diff -uprN linux-2.6.16/arch/mips/kernel/branch.c linux-2.6.16.ovz/arch/mips/kernel/branch.c
--- linux-2.6.16/arch/mips/kernel/branch.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/mips/kernel/branch.c	2006-07-05 08:34:55.000000000 -0400
@@ -184,7 +184,7 @@ int __compute_return_epc(struct pt_regs 
 		bit = (insn.i_format.rt >> 2);
 		bit += (bit != 0);
 		bit += 23;
-		switch (insn.i_format.rt) {
+		switch (insn.i_format.rt & 3) {
 		case 0:	/* bc1f */
 		case 2:	/* bc1fl */
 			if (~fcr31 & (1 << bit))
diff -uprN linux-2.6.16/arch/mips/kernel/irixelf.c linux-2.6.16.ovz/arch/mips/kernel/irixelf.c
--- linux-2.6.16/arch/mips/kernel/irixelf.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/mips/kernel/irixelf.c	2006-07-05 08:34:55.000000000 -0400
@@ -432,7 +432,7 @@ static inline int look_for_irix_interpre
 		if (retval < 0)
 			goto out;
 
-		file = open_exec(*name);
+		file = open_exec(*name, bprm);
 		if (IS_ERR(file)) {
 			retval = PTR_ERR(file);
 			goto out;
diff -uprN linux-2.6.16/arch/mips/kernel/sysirix.c linux-2.6.16.ovz/arch/mips/kernel/sysirix.c
--- linux-2.6.16/arch/mips/kernel/sysirix.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/mips/kernel/sysirix.c	2006-07-05 08:34:55.000000000 -0400
@@ -110,7 +110,7 @@ asmlinkage int irix_prctl(unsigned optio
 		printk("irix_prctl[%s:%d]: Wants PR_ISBLOCKED\n",
 		       current->comm, current->pid);
 		read_lock(&tasklist_lock);
-		task = find_task_by_pid(va_arg(args, pid_t));
+		task = find_task_by_pid_ve(va_arg(args, pid_t));
 		error = -ESRCH;
 		if (error)
 			error = (task->run_list.next != NULL);
diff -uprN linux-2.6.16/arch/mips/mm/c-r4k.c linux-2.6.16.ovz/arch/mips/mm/c-r4k.c
--- linux-2.6.16/arch/mips/mm/c-r4k.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/mips/mm/c-r4k.c	2006-07-05 08:34:55.000000000 -0400
@@ -154,7 +154,8 @@ static inline void blast_icache32_r4600_
 
 static inline void tx49_blast_icache32_page_indexed(unsigned long page)
 {
-	unsigned long start = page;
+	unsigned long indexmask = current_cpu_data.icache.waysize - 1;
+	unsigned long start = INDEX_BASE + (page & indexmask);
 	unsigned long end = start + PAGE_SIZE;
 	unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit;
 	unsigned long ws_end = current_cpu_data.icache.ways <<
diff -uprN linux-2.6.16/arch/powerpc/Kconfig linux-2.6.16.ovz/arch/powerpc/Kconfig
--- linux-2.6.16/arch/powerpc/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/Kconfig	2006-07-05 08:34:55.000000000 -0400
@@ -517,6 +517,7 @@ config HIGHMEM
 	bool "High memory support"
 	depends on PPC32
 
+source "kernel/Kconfig.fairsched"
 source kernel/Kconfig.hz
 source kernel/Kconfig.preempt
 source "fs/Kconfig.binfmt"
@@ -956,6 +957,8 @@ source "arch/powerpc/platforms/iseries/K
 
 source "lib/Kconfig"
 
+source "kernel/ub/Kconfig"
+
 menu "Instrumentation Support"
         depends on EXPERIMENTAL
 
@@ -974,6 +977,8 @@ endmenu
 
 source "arch/powerpc/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 config KEYS_COMPAT
diff -uprN linux-2.6.16/arch/powerpc/kernel/irq.c linux-2.6.16.ovz/arch/powerpc/kernel/irq.c
--- linux-2.6.16/arch/powerpc/kernel/irq.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/kernel/irq.c	2006-07-05 08:34:55.000000000 -0400
@@ -50,6 +50,8 @@
 #include <linux/profile.h>
 #include <linux/bitops.h>
 
+#include <ub/beancounter.h>
+
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -189,7 +191,11 @@ void do_IRQ(struct pt_regs *regs)
 #ifdef CONFIG_IRQSTACKS
 	struct thread_info *curtp, *irqtp;
 #endif
+	struct ve_struct *ve;
+	struct user_beancounter *ub;
 
+	ve = set_exec_env(get_ve0());
+	ub = set_exec_ub(get_ub0());
         irq_enter();
 
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
@@ -236,6 +242,8 @@ void do_IRQ(struct pt_regs *regs)
 		ppc_spurious_interrupts++;
 
         irq_exit();
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(ve);
 
 #ifdef CONFIG_PPC_ISERIES
 	if (get_lppaca()->int_dword.fields.decr_int) {
diff -uprN linux-2.6.16/arch/powerpc/kernel/misc_32.S linux-2.6.16.ovz/arch/powerpc/kernel/misc_32.S
--- linux-2.6.16/arch/powerpc/kernel/misc_32.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/kernel/misc_32.S	2006-07-05 08:34:55.000000000 -0400
@@ -973,7 +973,7 @@ _GLOBAL(_get_SP)
  * Create a kernel thread
  *   kernel_thread(fn, arg, flags)
  */
-_GLOBAL(kernel_thread)
+_GLOBAL(ppc_kernel_thread)
 	stwu	r1,-16(r1)
 	stw	r30,8(r1)
 	stw	r31,12(r1)
diff -uprN linux-2.6.16/arch/powerpc/kernel/misc_64.S linux-2.6.16.ovz/arch/powerpc/kernel/misc_64.S
--- linux-2.6.16/arch/powerpc/kernel/misc_64.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/kernel/misc_64.S	2006-07-05 08:34:55.000000000 -0400
@@ -677,7 +677,7 @@ _GLOBAL(scom970_write)
  * Create a kernel thread
  *   kernel_thread(fn, arg, flags)
  */
-_GLOBAL(kernel_thread)
+_GLOBAL(ppc_kernel_thread)
 	std	r29,-24(r1)
 	std	r30,-16(r1)
 	stdu	r1,-STACK_FRAME_OVERHEAD(r1)
diff -uprN linux-2.6.16/arch/powerpc/kernel/pci_64.c linux-2.6.16.ovz/arch/powerpc/kernel/pci_64.c
--- linux-2.6.16/arch/powerpc/kernel/pci_64.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/kernel/pci_64.c	2006-07-05 08:34:55.000000000 -0400
@@ -78,6 +78,7 @@ int global_phb_number;		/* Global phb co
 
 /* Cached ISA bridge dev. */
 struct pci_dev *ppc64_isabridge_dev = NULL;
+EXPORT_SYMBOL_GPL(ppc64_isabridge_dev);
 
 static void fixup_broken_pcnet32(struct pci_dev* dev)
 {
diff -uprN linux-2.6.16/arch/powerpc/kernel/process.c linux-2.6.16.ovz/arch/powerpc/kernel/process.c
--- linux-2.6.16/arch/powerpc/kernel/process.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/kernel/process.c	2006-07-05 08:34:55.000000000 -0400
@@ -429,7 +429,7 @@ void show_regs(struct pt_regs * regs)
 	       current, current->pid, current->comm, task_thread_info(current));
 
 #ifdef CONFIG_SMP
-	printk(" CPU: %d", smp_processor_id());
+	printk(" CPU: %d VCPU %d:%d", smp_processor_id(), task_vsched_id(current), task_cpu(current);
 #endif /* CONFIG_SMP */
 
 	for (i = 0;  i < 32;  i++) {
@@ -774,12 +774,12 @@ static int validate_sp(unsigned long sp,
 		return 1;
 
 #ifdef CONFIG_IRQSTACKS
-	stack_page = (unsigned long) hardirq_ctx[task_cpu(p)];
+	stack_page = (unsigned long) hardirq_ctx[task_pcpu(p)];
 	if (sp >= stack_page + sizeof(struct thread_struct)
 	    && sp <= stack_page + THREAD_SIZE - nbytes)
 		return 1;
 
-	stack_page = (unsigned long) softirq_ctx[task_cpu(p)];
+	stack_page = (unsigned long) softirq_ctx[task_pcpu(p)];
 	if (sp >= stack_page + sizeof(struct thread_struct)
 	    && sp <= stack_page + THREAD_SIZE - nbytes)
 		return 1;
@@ -889,6 +889,20 @@ void dump_stack(void)
 }
 EXPORT_SYMBOL(dump_stack);
 
+long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+	extern long ppc_kernel_thread(int (*fn)(void *), void *arg,
+			unsigned long flags);
+
+	if (!ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside VE\n");
+		dump_stack();
+		return -EPERM;
+	}
+
+	return ppc_kernel_thread(fn, arg, flags);
+}
+
 #ifdef CONFIG_PPC64
 void ppc64_runlatch_on(void)
 {
diff -uprN linux-2.6.16/arch/powerpc/kernel/setup_64.c linux-2.6.16.ovz/arch/powerpc/kernel/setup_64.c
--- linux-2.6.16/arch/powerpc/kernel/setup_64.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/kernel/setup_64.c	2006-07-05 08:34:55.000000000 -0400
@@ -256,12 +256,10 @@ void __init early_setup(unsigned long dt
 	/*
 	 * Initialize stab / SLB management except on iSeries
 	 */
-	if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
-		if (cpu_has_feature(CPU_FTR_SLB))
-			slb_initialize();
-		else
-			stab_initialize(lpaca->stab_real);
-	}
+	if (cpu_has_feature(CPU_FTR_SLB))
+		slb_initialize();
+	else if (!firmware_has_feature(FW_FEATURE_ISERIES))
+		stab_initialize(lpaca->stab_real);
 
 	DBG(" <- early_setup()\n");
 }
diff -uprN linux-2.6.16/arch/powerpc/kernel/signal_32.c linux-2.6.16.ovz/arch/powerpc/kernel/signal_32.c
--- linux-2.6.16/arch/powerpc/kernel/signal_32.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/kernel/signal_32.c	2006-07-05 08:34:55.000000000 -0400
@@ -802,10 +802,13 @@ static int do_setcontext(struct ucontext
 		if (__get_user(cmcp, &ucp->uc_regs))
 			return -EFAULT;
 		mcp = (struct mcontext __user *)(u64)cmcp;
+		/* no need to check access_ok(mcp), since mcp < 4GB */
 	}
 #else
 	if (__get_user(mcp, &ucp->uc_regs))
 		return -EFAULT;
+	if (!access_ok(VERIFY_READ, mcp, sizeof(*mcp)))
+		return -EFAULT;
 #endif
 	restore_sigmask(&set);
 	if (restore_user_regs(regs, mcp, sig))
@@ -907,13 +910,14 @@ int sys_debug_setcontext(struct ucontext
 {
 	struct sig_dbg_op op;
 	int i;
+	unsigned char tmp;
 	unsigned long new_msr = regs->msr;
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
 	unsigned long new_dbcr0 = current->thread.dbcr0;
 #endif
 
 	for (i=0; i<ndbg; i++) {
-		if (__copy_from_user(&op, dbg, sizeof(op)))
+		if (copy_from_user(&op, dbg + i, sizeof(op)))
 			return -EFAULT;
 		switch (op.dbg_type) {
 		case SIG_DBG_SINGLE_STEPPING:
@@ -958,6 +962,11 @@ int sys_debug_setcontext(struct ucontext
 	current->thread.dbcr0 = new_dbcr0;
 #endif
 
+	if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
+	    || __get_user(tmp, (u8 __user *) ctx)
+	    || __get_user(tmp, (u8 __user *) (ctx + 1) - 1))
+		return -EFAULT;
+
 	/*
 	 * If we get a fault copying the context into the kernel's
 	 * image of the user's registers, we can't just return -EFAULT
diff -uprN linux-2.6.16/arch/powerpc/kernel/signal_64.c linux-2.6.16.ovz/arch/powerpc/kernel/signal_64.c
--- linux-2.6.16/arch/powerpc/kernel/signal_64.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/kernel/signal_64.c	2006-07-05 08:34:55.000000000 -0400
@@ -183,6 +183,8 @@ static long restore_sigcontext(struct pt
 	err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
 	if (err)
 		return err;
+	if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
+		return -EFAULT;
 	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
 	if (v_regs != 0 && (msr & MSR_VEC) != 0)
 		err |= __copy_from_user(current->thread.vr, v_regs,
@@ -213,7 +215,7 @@ static inline void __user * get_sigframe
         /* Default to using normal stack */
         newsp = regs->gpr[1];
 
-	if (ka->sa.sa_flags & SA_ONSTACK) {
+	if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size) {
 		if (! on_sig_stack(regs->gpr[1]))
 			newsp = (current->sas_ss_sp + current->sas_ss_size);
 	}
diff -uprN linux-2.6.16/arch/powerpc/kernel/syscalls.c linux-2.6.16.ovz/arch/powerpc/kernel/syscalls.c
--- linux-2.6.16/arch/powerpc/kernel/syscalls.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/kernel/syscalls.c	2006-07-05 08:34:55.000000000 -0400
@@ -259,7 +259,7 @@ long ppc_newuname(struct new_utsname __u
 	int err = 0;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name, &system_utsname, sizeof(*name)))
+	if (copy_to_user(name, &ve_utsname, sizeof(*name)))
 		err = -EFAULT;
 	up_read(&uts_sem);
 	if (!err)
@@ -272,7 +272,7 @@ int sys_uname(struct old_utsname __user 
 	int err = 0;
 	
 	down_read(&uts_sem);
-	if (copy_to_user(name, &system_utsname, sizeof(*name)))
+	if (copy_to_user(name, &ve_utsname, sizeof(*name)))
 		err = -EFAULT;
 	up_read(&uts_sem);
 	if (!err)
@@ -288,19 +288,19 @@ int sys_olduname(struct oldold_utsname _
 		return -EFAULT;
   
 	down_read(&uts_sem);
-	error = __copy_to_user(&name->sysname, &system_utsname.sysname,
+	error = __copy_to_user(&name->sysname, &ve_utsname.sysname,
 			       __OLD_UTS_LEN);
 	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename, &system_utsname.nodename,
+	error |= __copy_to_user(&name->nodename, &ve_utsname.nodename,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release, &system_utsname.release,
+	error |= __copy_to_user(&name->release, &ve_utsname.release,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->release + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version, &system_utsname.version,
+	error |= __copy_to_user(&name->version, &ve_utsname.version,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->version + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine, &system_utsname.machine,
+	error |= __copy_to_user(&name->machine, &ve_utsname.machine,
 				__OLD_UTS_LEN);
 	error |= override_machine(name->machine);
 	up_read(&uts_sem);
diff -uprN linux-2.6.16/arch/powerpc/kernel/systbl.S linux-2.6.16.ovz/arch/powerpc/kernel/systbl.S
--- linux-2.6.16/arch/powerpc/kernel/systbl.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/kernel/systbl.S	2006-07-05 08:34:55.000000000 -0400
@@ -322,3 +322,12 @@ SYSCALL(spu_create)
 COMPAT_SYS(pselect6)
 COMPAT_SYS(ppoll)
 SYSCALL(unshare)
+
+.rept 410 - (. - sys_call_table)/8
+SYSX(sys_ni_syscall, sys_ni_syscall, sys_ni_syscall)
+.endr
+
+SYSX(sys_getluid, sys_ni_syscall, sys_getluid)
+SYSX(sys_setluid, sys_ni_syscall, sys_setluid)
+SYSX(sys_setublimit, sys_ni_syscall, sys_setublimit)
+SYSX(sys_ubstat, sys_ni_syscall, sys_ubstat)
diff -uprN linux-2.6.16/arch/powerpc/kernel/time.c linux-2.6.16.ovz/arch/powerpc/kernel/time.c
--- linux-2.6.16/arch/powerpc/kernel/time.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/kernel/time.c	2006-07-05 08:34:55.000000000 -0400
@@ -431,12 +431,14 @@ void timer_interrupt(struct pt_regs * re
 	int next_dec;
 	int cpu = smp_processor_id();
 	unsigned long ticks;
+	struct ve_struct *ve;
 
 #ifdef CONFIG_PPC32
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
 		do_IRQ(regs);
 #endif
 
+	ve = set_exec_env(get_ve0());
 	irq_enter();
 
 	profile_tick(CPU_PROFILING, regs);
@@ -496,6 +498,7 @@ void timer_interrupt(struct pt_regs * re
 #endif
 
 	irq_exit();
+	(void)set_exec_env(ve);
 }
 
 void wakeup_decrementer(void)
diff -uprN linux-2.6.16/arch/powerpc/mm/fault.c linux-2.6.16.ovz/arch/powerpc/mm/fault.c
--- linux-2.6.16/arch/powerpc/mm/fault.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/mm/fault.c	2006-07-05 08:34:55.000000000 -0400
@@ -307,7 +307,6 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
- survive:
 	switch (handle_mm_fault(mm, vma, address, is_write)) {
 
 	case VM_FAULT_MINOR:
@@ -351,14 +350,12 @@ bad_area_nosemaphore:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		/*
+		 * 0-order allocation always success if something really
+		 * fatal not happen: beancounter overdraft or OOM. Den
+		 */
+		force_sig(SIGKILL, current);
 	return SIGKILL;
 
 do_sigbus:
diff -uprN linux-2.6.16/arch/powerpc/mm/init_64.c linux-2.6.16.ovz/arch/powerpc/mm/init_64.c
--- linux-2.6.16/arch/powerpc/mm/init_64.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/mm/init_64.c	2006-07-05 08:34:55.000000000 -0400
@@ -225,7 +225,8 @@ void pgtable_cache_init(void)
 		pgtable_cache[i] = kmem_cache_create(name,
 						     size, size,
 						     SLAB_HWCACHE_ALIGN |
-						     SLAB_MUST_HWCACHE_ALIGN,
+						     SLAB_MUST_HWCACHE_ALIGN |
+						     SLAB_UBC | SLAB_NO_CHARGE,
 						     zero_ctor,
 						     NULL);
 		if (! pgtable_cache[i])
diff -uprN linux-2.6.16/arch/powerpc/mm/mem.c linux-2.6.16.ovz/arch/powerpc/mm/mem.c
--- linux-2.6.16/arch/powerpc/mm/mem.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/mm/mem.c	2006-07-05 08:34:55.000000000 -0400
@@ -222,6 +222,7 @@ void show_mem(void)
 	printk("%ld pages shared\n", shared);
 	printk("%ld pages swap cached\n", cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 /*
  * Initialize the bootmem system and give it all the memory we
diff -uprN linux-2.6.16/arch/powerpc/mm/pgtable_32.c linux-2.6.16.ovz/arch/powerpc/mm/pgtable_32.c
--- linux-2.6.16/arch/powerpc/mm/pgtable_32.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/mm/pgtable_32.c	2006-07-05 08:34:55.000000000 -0400
@@ -85,7 +85,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *ret;
 
-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
+			__GFP_ZERO, PGDIR_ORDER);
 	return ret;
 }
 
@@ -119,6 +120,7 @@ struct page *pte_alloc_one(struct mm_str
 #else
 	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
 #endif
+	flags |= (__GFP_UBC | __GFP_SOFT_UBC);
 
 	ptepage = alloc_pages(flags, 0);
 	if (ptepage)
diff -uprN linux-2.6.16/arch/powerpc/platforms/powermac/setup.c linux-2.6.16.ovz/arch/powerpc/platforms/powermac/setup.c
--- linux-2.6.16/arch/powerpc/platforms/powermac/setup.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/powerpc/platforms/powermac/setup.c	2006-07-05 08:34:55.000000000 -0400
@@ -456,11 +456,23 @@ static int pmac_pm_finish(suspend_state_
 	return 0;
 }
 
+static int pmac_pm_valid(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_DISK:
+		return 1;
+	/* can't do any other states via generic mechanism yet */
+	default:
+		return 0;
+	}
+}
+
 static struct pm_ops pmac_pm_ops = {
 	.pm_disk_mode	= PM_DISK_SHUTDOWN,
 	.prepare	= pmac_pm_prepare,
 	.enter		= pmac_pm_enter,
 	.finish		= pmac_pm_finish,
+	.valid		= pmac_pm_valid,
 };
 
 #endif /* CONFIG_SOFTWARE_SUSPEND */
diff -uprN linux-2.6.16/arch/ppc/Kconfig linux-2.6.16.ovz/arch/ppc/Kconfig
--- linux-2.6.16/arch/ppc/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ppc/Kconfig	2006-07-05 08:34:55.000000000 -0400
@@ -920,6 +920,7 @@ config NR_CPUS
 config HIGHMEM
 	bool "High memory support"
 
+source "kernel/Kconfig.fairsched"
 source kernel/Kconfig.hz
 source kernel/Kconfig.preempt
 source "mm/Kconfig"
@@ -1394,6 +1395,10 @@ source "arch/powerpc/oprofile/Kconfig"
 
 source "arch/ppc/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
+source "kernel/ub/Kconfig"
+
 source "crypto/Kconfig"
diff -uprN linux-2.6.16/arch/ppc/kernel/misc.S linux-2.6.16.ovz/arch/ppc/kernel/misc.S
--- linux-2.6.16/arch/ppc/kernel/misc.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ppc/kernel/misc.S	2006-07-05 08:34:55.000000000 -0400
@@ -1004,7 +1004,7 @@ _GLOBAL(_get_SP)
  * Create a kernel thread
  *   kernel_thread(fn, arg, flags)
  */
-_GLOBAL(kernel_thread)
+_GLOBAL(ppc_kernel_thread)
 	stwu	r1,-16(r1)
 	stw	r30,8(r1)
 	stw	r31,12(r1)
diff -uprN linux-2.6.16/arch/ppc/kernel/time.c linux-2.6.16.ovz/arch/ppc/kernel/time.c
--- linux-2.6.16/arch/ppc/kernel/time.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ppc/kernel/time.c	2006-07-05 08:34:55.000000000 -0400
@@ -58,6 +58,8 @@
 #include <linux/init.h>
 #include <linux/profile.h>
 
+#include <ub/beancounter.h>
+
 #include <asm/io.h>
 #include <asm/nvram.h>
 #include <asm/cache.h>
@@ -136,10 +138,14 @@ void timer_interrupt(struct pt_regs * re
 	unsigned long cpu = smp_processor_id();
 	unsigned jiffy_stamp = last_jiffy_stamp(cpu);
 	extern void do_IRQ(struct pt_regs *);
+	struct ve_struct *ve;
+	struct user_beancounter *ub;
 
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
 		do_IRQ(regs);
 
+	ve = set_exec_env(get_ve0());
+	ub = set_exec_ub(get_ub0());
 	irq_enter();
 
 	while ((next_dec = tb_ticks_per_jiffy - tb_delta(&jiffy_stamp)) <= 0) {
@@ -192,6 +198,8 @@ void timer_interrupt(struct pt_regs * re
 		ppc_md.heartbeat();
 
 	irq_exit();
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(ve);
 }
 
 /*
diff -uprN linux-2.6.16/arch/ppc/mm/fault.c linux-2.6.16.ovz/arch/ppc/mm/fault.c
--- linux-2.6.16/arch/ppc/mm/fault.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ppc/mm/fault.c	2006-07-05 08:34:55.000000000 -0400
@@ -247,7 +247,6 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
- survive:
         switch (handle_mm_fault(mm, vma, address, is_write)) {
         case VM_FAULT_MINOR:
                 current->min_flt++;
@@ -290,14 +289,12 @@ bad_area:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		/*
+		 * 0-order allocation always success if something really
+		 * fatal not happen: beancounter overdraft or OOM. Den
+		 */
+		force_sig(SIGKILL, current);
 	return SIGKILL;
 
 do_sigbus:
diff -uprN linux-2.6.16/arch/ppc/mm/init.c linux-2.6.16.ovz/arch/ppc/mm/init.c
--- linux-2.6.16/arch/ppc/mm/init.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ppc/mm/init.c	2006-07-05 08:34:55.000000000 -0400
@@ -132,6 +132,7 @@ void show_mem(void)
 	printk("%d pages shared\n",shared);
 	printk("%d pages swap cached\n",cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 /* Free up now-unused memory */
 static void free_sec(unsigned long start, unsigned long end, const char *name)
diff -uprN linux-2.6.16/arch/ppc/mm/pgtable.c linux-2.6.16.ovz/arch/ppc/mm/pgtable.c
--- linux-2.6.16/arch/ppc/mm/pgtable.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/ppc/mm/pgtable.c	2006-07-05 08:34:55.000000000 -0400
@@ -84,7 +84,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *ret;
 
-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
+			__GFP_ZERO, PGDIR_ORDER);
 	return ret;
 }
 
@@ -118,6 +119,7 @@ struct page *pte_alloc_one(struct mm_str
 #else
 	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
 #endif
+	flags |= (__GFP_UBC | __GFP_SOFT_UBC);
 
 	ptepage = alloc_pages(flags, 0);
 	if (ptepage)
diff -uprN linux-2.6.16/arch/s390/Kconfig linux-2.6.16.ovz/arch/s390/Kconfig
--- linux-2.6.16/arch/s390/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/s390/Kconfig	2006-07-05 08:34:55.000000000 -0400
@@ -472,8 +472,12 @@ source "arch/s390/oprofile/Kconfig"
 
 source "arch/s390/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "kernel/ub/Kconfig"
diff -uprN linux-2.6.16/arch/s390/kernel/process.c linux-2.6.16.ovz/arch/s390/kernel/process.c
--- linux-2.6.16/arch/s390/kernel/process.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/s390/kernel/process.c	2006-07-05 08:34:55.000000000 -0400
@@ -164,9 +164,10 @@ void show_regs(struct pt_regs *regs)
 	struct task_struct *tsk = current;
 
         printk("CPU:    %d    %s\n", task_thread_info(tsk)->cpu, print_tainted());
-        printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
-	       current->comm, current->pid, (void *) tsk,
-	       (void *) tsk->thread.ksp);
+        printk("Process %s (pid: %d, veid: %d, task: %p, ksp: %p)\n",
+	       current->comm, current->pid,
+	       VEID(VE_TASK_INFO(current)->owner_env),
+	       (void *) tsk, (void *) tsk->thread.ksp);
 
 	show_registers(regs);
 	/* Show stack backtrace if pt_regs is from kernel mode */
@@ -187,6 +188,13 @@ int kernel_thread(int (*fn)(void *), voi
 {
 	struct pt_regs regs;
 
+	if (!ve_is_super(get_exec_env())) {
+		/* Don't allow kernel_thread() inside VE */
+		printk("kernel_thread call inside VE\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 	regs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT;
 	regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
diff -uprN linux-2.6.16/arch/s390/kernel/s390_ext.c linux-2.6.16.ovz/arch/s390/kernel/s390_ext.c
--- linux-2.6.16/arch/s390/kernel/s390_ext.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/s390/kernel/s390_ext.c	2006-07-05 08:34:55.000000000 -0400
@@ -114,7 +114,9 @@ void do_extint(struct pt_regs *regs, uns
 {
         ext_int_info_t *p;
         int index;
+	struct ve_struct *envid;
 
+	envid = set_exec_env(get_ve0());
 	irq_enter();
 	asm volatile ("mc 0,0");
 	if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
@@ -132,6 +134,7 @@ void do_extint(struct pt_regs *regs, uns
 		}
 	}
 	irq_exit();
+	(void)set_exec_env(envid);
 }
 
 EXPORT_SYMBOL(register_external_interrupt);
diff -uprN linux-2.6.16/arch/s390/kernel/smp.c linux-2.6.16.ovz/arch/s390/kernel/smp.c
--- linux-2.6.16/arch/s390/kernel/smp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/s390/kernel/smp.c	2006-07-05 08:34:55.000000000 -0400
@@ -526,6 +526,17 @@ int __devinit start_secondary(void *cpuv
 {
         /* Setup the cpu */
         cpu_init();
+
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+	/*
+	 * Cosmetic: sleep_time won't be changed afterwards for the idle
+	 * thread;  keep it 0 rather than -cycles.
+	 */
+	VE_TASK_INFO(idle)->sleep_time = 0;
+#endif
+
 	preempt_disable();
         /* init per CPU timer */
         init_cpu_timer();
@@ -834,6 +845,11 @@ void __init smp_prepare_cpus(unsigned in
 	for_each_cpu(cpu)
 		if (cpu != smp_processor_id())
 			smp_create_idle(cpu);
+
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+#endif
 }
 
 void __devinit smp_prepare_boot_cpu(void)
diff -uprN linux-2.6.16/arch/s390/kernel/syscalls.S linux-2.6.16.ovz/arch/s390/kernel/syscalls.S
--- linux-2.6.16/arch/s390/kernel/syscalls.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/s390/kernel/syscalls.S	2006-07-05 08:34:55.000000000 -0400
@@ -312,3 +312,12 @@ SYSCALL(sys_faccessat,sys_faccessat,sys_
 SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper)
 SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper)
 SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper)
+
+.rept 410-(.-sys_call_table)/4
+	NI_SYSCALL
+.endr
+
+SYSCALL(sys_getluid, sys_getluid, sys_ni_syscall)	/* 410 */
+SYSCALL(sys_setluid, sys_setluid, sys_ni_syscall)
+SYSCALL(sys_setublimit, sys_setublimit, sys_ni_syscall)
+SYSCALL(sys_ubstat, sys_ubstat, sys_ni_syscall)
diff -uprN linux-2.6.16/arch/s390/mm/fault.c linux-2.6.16.ovz/arch/s390/mm/fault.c
--- linux-2.6.16/arch/s390/mm/fault.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/s390/mm/fault.c	2006-07-05 08:34:55.000000000 -0400
@@ -61,17 +61,9 @@ void bust_spinlocks(int yes)
 	if (yes) {
 		oops_in_progress = 1;
 	} else {
-		int loglevel_save = console_loglevel;
 		console_unblank();
 		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk will give klogd
-		 * a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;
-		printk(" ");
-		console_loglevel = loglevel_save;
+		wake_up_klogd();
 	}
 }
 
diff -uprN linux-2.6.16/arch/s390/mm/init.c linux-2.6.16.ovz/arch/s390/mm/init.c
--- linux-2.6.16/arch/s390/mm/init.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/s390/mm/init.c	2006-07-05 08:34:55.000000000 -0400
@@ -89,6 +89,7 @@ void show_mem(void)
         printk("%d pages shared\n",shared);
         printk("%d pages swap cached\n",cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 /* References to section boundaries */
 
diff -uprN linux-2.6.16/arch/sh/kernel/kgdb_stub.c linux-2.6.16.ovz/arch/sh/kernel/kgdb_stub.c
--- linux-2.6.16/arch/sh/kernel/kgdb_stub.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/sh/kernel/kgdb_stub.c	2006-07-05 08:34:55.000000000 -0400
@@ -412,7 +412,7 @@ static struct task_struct *get_thread(in
 	if (pid == PID_MAX) pid = 0;
 
 	/* First check via PID */
-	thread = find_task_by_pid(pid);
+	thread = find_task_by_pid_all(pid);
 
 	if (thread)
 		return thread;
diff -uprN linux-2.6.16/arch/sh64/kernel/process.c linux-2.6.16.ovz/arch/sh64/kernel/process.c
--- linux-2.6.16/arch/sh64/kernel/process.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/sh64/kernel/process.c	2006-07-05 08:34:55.000000000 -0400
@@ -906,7 +906,7 @@ asids_proc_info(char *buf, char **start,
 	int len=0;
 	struct task_struct *p;
 	read_lock(&tasklist_lock);
-	for_each_process(p) {
+	for_each_process_ve(p) {
 		int pid = p->pid;
 		struct mm_struct *mm;
 		if (!pid) continue;
diff -uprN linux-2.6.16/arch/sparc64/kernel/pci_iommu.c linux-2.6.16.ovz/arch/sparc64/kernel/pci_iommu.c
--- linux-2.6.16/arch/sparc64/kernel/pci_iommu.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/sparc64/kernel/pci_iommu.c	2006-07-05 08:34:55.000000000 -0400
@@ -219,7 +219,7 @@ static inline void iommu_free_ctx(struct
  * DMA for PCI device PDEV.  Return non-NULL cpu-side address if
  * successful and set *DMA_ADDRP to the PCI side dma address.
  */
-void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
+void *__pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp)
 {
 	struct pcidev_cookie *pcp;
 	struct pci_iommu *iommu;
@@ -233,7 +233,7 @@ void *pci_alloc_consistent(struct pci_de
 	if (order >= 10)
 		return NULL;
 
-	first_page = __get_free_pages(GFP_ATOMIC, order);
+	first_page = __get_free_pages(gfp, order);
 	if (first_page == 0UL)
 		return NULL;
 	memset((char *)first_page, 0, PAGE_SIZE << order);
diff -uprN linux-2.6.16/arch/sparc64/kernel/setup.c linux-2.6.16.ovz/arch/sparc64/kernel/setup.c
--- linux-2.6.16/arch/sparc64/kernel/setup.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/sparc64/kernel/setup.c	2006-07-05 08:34:55.000000000 -0400
@@ -156,7 +156,7 @@ int prom_callback(long *args)
 			pte_t *ptep;
 			pte_t pte;
 
-			for_each_process(p) {
+			for_each_process_all(p) {
 				mm = p->mm;
 				if (CTX_NRBITS(mm->context) == ctx)
 					break;
diff -uprN linux-2.6.16/arch/sparc64/kernel/sparc64_ksyms.c linux-2.6.16.ovz/arch/sparc64/kernel/sparc64_ksyms.c
--- linux-2.6.16/arch/sparc64/kernel/sparc64_ksyms.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/sparc64/kernel/sparc64_ksyms.c	2006-07-05 08:34:55.000000000 -0400
@@ -221,7 +221,7 @@ EXPORT_SYMBOL(insl);
 EXPORT_SYMBOL(ebus_chain);
 EXPORT_SYMBOL(isa_chain);
 EXPORT_SYMBOL(pci_memspace_mask);
-EXPORT_SYMBOL(pci_alloc_consistent);
+EXPORT_SYMBOL(__pci_alloc_consistent);
 EXPORT_SYMBOL(pci_free_consistent);
 EXPORT_SYMBOL(pci_map_single);
 EXPORT_SYMBOL(pci_unmap_single);
diff -uprN linux-2.6.16/arch/sparc64/lib/checksum.S linux-2.6.16.ovz/arch/sparc64/lib/checksum.S
--- linux-2.6.16/arch/sparc64/lib/checksum.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/sparc64/lib/checksum.S	2006-07-05 08:34:55.000000000 -0400
@@ -165,8 +165,9 @@ csum_partial_end_cruft:
 	sll		%g1, 8, %g1
 	or		%o5, %g1, %o4
 
-1:	add		%o2, %o4, %o2
+1:	addcc		%o2, %o4, %o2
+	addc		%g0, %o2, %o2
 
 csum_partial_finish:
 	retl
-	 mov		%o2, %o0
+	 srl		%o2, 0, %o0
diff -uprN linux-2.6.16/arch/sparc64/lib/csum_copy.S linux-2.6.16.ovz/arch/sparc64/lib/csum_copy.S
--- linux-2.6.16/arch/sparc64/lib/csum_copy.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/sparc64/lib/csum_copy.S	2006-07-05 08:34:55.000000000 -0400
@@ -221,11 +221,12 @@ FUNC_NAME:		/* %o0=src, %o1=dst, %o2=len
 	sll		%g1, 8, %g1
 	or		%o5, %g1, %o4
 
-1:	add		%o3, %o4, %o3
+1:	addcc		%o3, %o4, %o3
+	addc		%g0, %o3, %o3
 
 70:
 	retl
-	 mov		%o3, %o0
+	 srl		%o3, 0, %o0
 
 95:	mov		0, GLOBAL_SPARE
 	brlez,pn	%o2, 4f
diff -uprN linux-2.6.16/arch/um/drivers/mconsole_kern.c linux-2.6.16.ovz/arch/um/drivers/mconsole_kern.c
--- linux-2.6.16/arch/um/drivers/mconsole_kern.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/um/drivers/mconsole_kern.c	2006-07-05 08:34:55.000000000 -0400
@@ -600,7 +600,7 @@ static void do_stack_trace(struct mc_req
 
 	from = current;
 
-	to = find_task_by_pid(pid_requested);
+	to = find_task_by_pid_all(pid_requested);
 	if((to == NULL) || (pid_requested == 0)) {
 		mconsole_reply(req, "Couldn't find that pid", 1, 0);
 		return;
diff -uprN linux-2.6.16/arch/um/kernel/skas/process_kern.c linux-2.6.16.ovz/arch/um/kernel/skas/process_kern.c
--- linux-2.6.16/arch/um/kernel/skas/process_kern.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/um/kernel/skas/process_kern.c	2006-07-05 08:34:55.000000000 -0400
@@ -197,7 +197,7 @@ void kill_off_processes_skas(void)
 		int pid, me;
 
 		me = os_getpid();
-		for_each_process(p){
+		for_each_process_all(p){
 			if(p->mm == NULL)
 				continue;
 
diff -uprN linux-2.6.16/arch/um/kernel/tt/process_kern.c linux-2.6.16.ovz/arch/um/kernel/tt/process_kern.c
--- linux-2.6.16/arch/um/kernel/tt/process_kern.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/um/kernel/tt/process_kern.c	2006-07-05 08:34:55.000000000 -0400
@@ -301,7 +301,7 @@ void kill_off_processes_tt(void)
 	int me;
 
 	me = os_getpid();
-        for_each_process(p){
+        for_each_process_all(p){
 		if(p->thread.mode.tt.extern_pid != me) 
 			os_kill_process(p->thread.mode.tt.extern_pid, 0);
 	}
@@ -444,7 +444,7 @@ int is_valid_pid(int pid)
 	struct task_struct *task;
 
         read_lock(&tasklist_lock);
-        for_each_process(task){
+        for_each_process_all(task){
                 if(task->thread.mode.tt.extern_pid == pid){
 			read_unlock(&tasklist_lock);
 			return(1);
diff -uprN linux-2.6.16/arch/x86_64/Kconfig linux-2.6.16.ovz/arch/x86_64/Kconfig
--- linux-2.6.16/arch/x86_64/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/Kconfig	2006-07-05 08:34:55.000000000 -0400
@@ -246,6 +246,8 @@ config SCHED_SMT
 	  cost of slightly increased overhead in some places. If unsure say
 	  N here.
 
+source "kernel/Kconfig.fairsched"
+
 source "kernel/Kconfig.preempt"
 
 config NUMA
@@ -588,8 +590,12 @@ endmenu
 
 source "arch/x86_64/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "kernel/ub/Kconfig"
diff -uprN linux-2.6.16/arch/x86_64/boot/compressed/head.S linux-2.6.16.ovz/arch/x86_64/boot/compressed/head.S
--- linux-2.6.16/arch/x86_64/boot/compressed/head.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/boot/compressed/head.S	2006-07-05 08:34:55.000000000 -0400
@@ -34,7 +34,7 @@
 startup_32:
 	cld
 	cli
-	movl $(__KERNEL_DS),%eax
+	movl $(__BOOT_DS),%eax
 	movl %eax,%ds
 	movl %eax,%es
 	movl %eax,%fs
@@ -76,7 +76,7 @@ startup_32:
 	jnz  3f
 	addl $8,%esp
 	xorl %ebx,%ebx
-	ljmp $(__KERNEL_CS), $__PHYSICAL_START
+	ljmp $(__BOOT_CS), $__PHYSICAL_START
 
 /*
  * We come here, if we were loaded high.
@@ -104,7 +104,7 @@ startup_32:
 	popl %eax	# hcount
 	movl $__PHYSICAL_START,%edi
 	cli		# make sure we don't get interrupted
-	ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
+	ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
 
 /*
  * Routine (template) for moving the decompressed kernel in place,
@@ -127,7 +127,7 @@ move_routine_start:
 	movsl
 	movl %ebx,%esi	# Restore setup pointer
 	xorl %ebx,%ebx
-	ljmp $(__KERNEL_CS), $__PHYSICAL_START
+	ljmp $(__BOOT_CS), $__PHYSICAL_START
 move_routine_end:
 
 
@@ -137,5 +137,5 @@ user_stack:	 	
 	.fill 4096,4,0
 stack_start:	
 	.long user_stack+4096
-	.word __KERNEL_DS
+	.word __BOOT_DS
 
diff -uprN linux-2.6.16/arch/x86_64/boot/setup.S linux-2.6.16.ovz/arch/x86_64/boot/setup.S
--- linux-2.6.16/arch/x86_64/boot/setup.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/boot/setup.S	2006-07-05 08:34:55.000000000 -0400
@@ -729,7 +729,7 @@ flush_instr:
 	subw	$DELTA_INITSEG, %si
 	shll	$4, %esi			# Convert to 32-bit pointer
 # NOTE: For high loaded big kernels we need a
-#	jmpi    0x100000,__KERNEL_CS
+#	jmpi    0x100000,__BOOT_CS
 #
 #	but we yet haven't reloaded the CS register, so the default size 
 #	of the target offset still is 16 bit.
@@ -740,7 +740,7 @@ flush_instr:
 	.byte 0x66, 0xea			# prefix + jmpi-opcode
 code32:	.long	0x1000				# will be set to 0x100000
 						# for big kernels
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 # Here's a bunch of information about your current kernel..
 kernel_version:	.ascii	UTS_RELEASE
diff -uprN linux-2.6.16/arch/x86_64/ia32/Makefile linux-2.6.16.ovz/arch/x86_64/ia32/Makefile
--- linux-2.6.16/arch/x86_64/ia32/Makefile	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/ia32/Makefile	2006-07-05 08:34:55.000000000 -0400
@@ -27,5 +27,5 @@ $(obj)/vsyscall-sysenter.so $(obj)/vsysc
 $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
 	$(call if_changed,syscall)
 
-AFLAGS_vsyscall-sysenter.o = -m32
-AFLAGS_vsyscall-syscall.o = -m32
+AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32
+AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32
diff -uprN linux-2.6.16/arch/x86_64/ia32/ia32_aout.c linux-2.6.16.ovz/arch/x86_64/ia32/ia32_aout.c
--- linux-2.6.16/arch/x86_64/ia32/ia32_aout.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/ia32/ia32_aout.c	2006-07-05 08:34:55.000000000 -0400
@@ -347,14 +347,14 @@ static int load_aout_binary(struct linux
 		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
 		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
 		{
-			printk(KERN_NOTICE "executable not page aligned\n");
+			ve_printk(VE_LOG, KERN_NOTICE "executable not page aligned\n");
 			error_time2 = jiffies;
 		}
 
 		if ((fd_offset & ~PAGE_MASK) != 0 &&
 		    (jiffies-error_time) > 5*HZ)
 		{
-			printk(KERN_WARNING 
+			ve_printk(VE_LOG, KERN_WARNING 
 			       "fd_offset is not page aligned. Please convert program: %s\n",
 			       bprm->file->f_dentry->d_name.name);
 			error_time = jiffies;
@@ -467,7 +467,7 @@ static int load_aout_library(struct file
 		static unsigned long error_time;
 		if ((jiffies-error_time) > 5*HZ)
 		{
-			printk(KERN_WARNING 
+			ve_printk(VE_LOG, KERN_WARNING 
 			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
 			       file->f_dentry->d_name.name);
 			error_time = jiffies;
diff -uprN linux-2.6.16/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.16.ovz/arch/x86_64/ia32/ia32_binfmt.c
--- linux-2.6.16/arch/x86_64/ia32/ia32_binfmt.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/ia32/ia32_binfmt.c	2006-07-05 08:34:55.000000000 -0400
@@ -27,12 +27,14 @@
 #include <asm/ia32.h>
 #include <asm/vsyscall32.h>
 
+#include <ub/ub_vmpages.h>
+
 #define ELF_NAME "elf/i386"
 
 #define AT_SYSINFO 32
 #define AT_SYSINFO_EHDR		33
 
-int sysctl_vsyscall32 = 1;
+int sysctl_vsyscall32 = 0;
 
 #define ARCH_DLINFO do {  \
 	if (sysctl_vsyscall32) { \
@@ -347,9 +349,15 @@ int ia32_setup_arg_pages(struct linux_bi
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
+	ret = -ENOMEM;
+	if (ub_memory_charge(mm, IA32_STACK_TOP -
+				(PAGE_MASK & (unsigned long)bprm->p),
+				VM_STACK_FLAGS, NULL, UB_SOFT))
+		goto err_charge;
+
 	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (!mpnt) 
-		return -ENOMEM; 
+		goto err_alloc;
 
 	memset(mpnt, 0, sizeof(*mpnt));
 
@@ -366,11 +374,8 @@ int ia32_setup_arg_pages(struct linux_bi
 			mpnt->vm_flags = VM_STACK_FLAGS;
  		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? 
  			PAGE_COPY_EXEC : PAGE_COPY;
-		if ((ret = insert_vm_struct(mm, mpnt))) {
-			up_write(&mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, mpnt);
-			return ret;
-		}
+		if ((ret = insert_vm_struct(mm, mpnt)))
+			goto err_insert;
 		mm->stack_vm = mm->total_vm = vma_pages(mpnt);
 	} 
 
@@ -385,6 +390,16 @@ int ia32_setup_arg_pages(struct linux_bi
 	up_write(&mm->mmap_sem);
 	
 	return 0;
+
+err_insert:
+	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, mpnt);
+err_alloc:
+	ub_memory_uncharge(mm, IA32_STACK_TOP - 
+				(PAGE_MASK & (unsigned long)bprm->p),
+				VM_STACK_FLAGS, NULL);
+err_charge:
+	return ret;
 }
 EXPORT_SYMBOL(ia32_setup_arg_pages);
 
diff -uprN linux-2.6.16/arch/x86_64/ia32/ia32_signal.c linux-2.6.16.ovz/arch/x86_64/ia32/ia32_signal.c
--- linux-2.6.16/arch/x86_64/ia32/ia32_signal.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/ia32/ia32_signal.c	2006-07-05 08:34:55.000000000 -0400
@@ -39,7 +39,6 @@
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
@@ -118,22 +117,17 @@ asmlinkage long
 sys32_sigsuspend(int history0, int history1, old_sigset_t mask,
 		 struct pt_regs *regs)
 {
-	sigset_t saveset;
-
 	mask &= _BLOCKABLE;
 	spin_lock_irq(&current->sighand->siglock);
-	saveset = current->blocked;
+	current->saved_sigmask = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	regs->rax = -EINTR;
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (do_signal(regs, &saveset))
-			return -EINTR;
-	}
+	current->state = TASK_INTERRUPTIBLE;
+	schedule();
+	set_thread_flag(TIF_RESTORE_SIGMASK);
+	return -ERESTARTNOHAND;
 }
 
 asmlinkage long
@@ -510,11 +504,11 @@ int ia32_setup_frame(int sig, struct k_s
 		current->comm, current->pid, frame, regs->rip, frame->pretcode);
 #endif
 
-	return 1;
+	return 0;
 
 give_sigsegv:
 	force_sigsegv(sig, current);
-	return 0;
+	return -EFAULT;
 }
 
 int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -606,9 +600,9 @@ int ia32_setup_rt_frame(int sig, struct 
 		current->comm, current->pid, frame, regs->rip, frame->pretcode);
 #endif
 
-	return 1;
+	return 0;
 
 give_sigsegv:
 	force_sigsegv(sig, current);
-	return 0;
+	return -EFAULT;
 }
diff -uprN linux-2.6.16/arch/x86_64/ia32/sys_ia32.c linux-2.6.16.ovz/arch/x86_64/ia32/sys_ia32.c
--- linux-2.6.16/arch/x86_64/ia32/sys_ia32.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/ia32/sys_ia32.c	2006-07-05 08:34:55.000000000 -0400
@@ -527,7 +527,7 @@ int sys32_ni_syscall(int call)
 	static char lastcomm[sizeof(me->comm)];
 
 	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
+		ve_printk(VE_LOG, KERN_INFO "IA32 syscall %d from %s not implemented\n",
 		       call, me->comm);
 		strncpy(lastcomm, me->comm, sizeof(lastcomm));
 	} 
@@ -890,13 +890,13 @@ asmlinkage long sys32_olduname(struct ol
   
   	down_read(&uts_sem);
 	
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+	error = __copy_to_user(&name->sysname,&ve_utsname.sysname,__OLD_UTS_LEN);
 	 __put_user(0,name->sysname+__OLD_UTS_LEN);
-	 __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+	 __copy_to_user(&name->nodename,&ve_utsname.nodename,__OLD_UTS_LEN);
 	 __put_user(0,name->nodename+__OLD_UTS_LEN);
-	 __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+	 __copy_to_user(&name->release,&ve_utsname.release,__OLD_UTS_LEN);
 	 __put_user(0,name->release+__OLD_UTS_LEN);
-	 __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+	 __copy_to_user(&name->version,&ve_utsname.version,__OLD_UTS_LEN);
 	 __put_user(0,name->version+__OLD_UTS_LEN);
 	 { 
 		 char *arch = "x86_64";
@@ -919,7 +919,7 @@ long sys32_uname(struct old_utsname __us
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err=copy_to_user(name, &ve_utsname, sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32) 
 		err |= copy_to_user(&name->machine, "i686", 5);
@@ -1005,7 +1005,7 @@ long sys32_vm86_warning(void)
 	struct task_struct *me = current;
 	static char lastcomm[sizeof(me->comm)];
 	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
+		ve_printk(VE_LOG, KERN_INFO "%s: vm87 mode not supported on 64 bit kernel\n",
 		       me->comm);
 		strncpy(lastcomm, me->comm, sizeof(lastcomm));
 	} 
diff -uprN linux-2.6.16/arch/x86_64/ia32/syscall32.c linux-2.6.16.ovz/arch/x86_64/ia32/syscall32.c
--- linux-2.6.16/arch/x86_64/ia32/syscall32.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/ia32/syscall32.c	2006-07-05 08:34:55.000000000 -0400
@@ -14,6 +14,8 @@
 #include <asm/tlbflush.h>
 #include <asm/ia32_unistd.h>
 
+#include <ub/ub_vmpages.h>
+
 extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
 extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
 extern int sysctl_vsyscall32;
@@ -47,32 +49,45 @@ int syscall32_setup_pages(struct linux_b
 	int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
+	unsigned long flags;
 	int ret;
 
+	flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC | VM_MAYWRITE |
+		mm->def_flags;
+
+	ret = -ENOMEM;
+	if (ub_memory_charge(mm, VSYSCALL32_END - VSYSCALL32_BASE,
+			flags, NULL, UB_SOFT))
+		goto err_charge;
+
 	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (!vma)
-		return -ENOMEM;
+		goto err_alloc;
 
 	memset(vma, 0, sizeof(struct vm_area_struct));
 	/* Could randomize here */
 	vma->vm_start = VSYSCALL32_BASE;
 	vma->vm_end = VSYSCALL32_END;
 	/* MAYWRITE to allow gdb to COW and set breakpoints */
-	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
-	vma->vm_flags |= mm->def_flags;
+	vma->vm_flags = flags;
 	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
 	vma->vm_ops = &syscall32_vm_ops;
 	vma->vm_mm = mm;
 
 	down_write(&mm->mmap_sem);
-	if ((ret = insert_vm_struct(mm, vma))) {
-		up_write(&mm->mmap_sem);
-		kmem_cache_free(vm_area_cachep, vma);
-		return ret;
-	}
+	if ((ret = insert_vm_struct(mm, vma)))
+		goto err_ins;
 	mm->total_vm += npages;
 	up_write(&mm->mmap_sem);
 	return 0;
+
+err_ins:
+	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, vma);
+err_alloc:
+	ub_memory_uncharge(mm, VSYSCALL32_END - VSYSCALL32_BASE, flags, NULL);
+err_charge:
+	return ret;
 }
 
 static int __init init_syscall32(void)
diff -uprN linux-2.6.16/arch/x86_64/kernel/acpi/wakeup.S linux-2.6.16.ovz/arch/x86_64/kernel/acpi/wakeup.S
--- linux-2.6.16/arch/x86_64/kernel/acpi/wakeup.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/acpi/wakeup.S	2006-07-05 08:34:55.000000000 -0400
@@ -77,7 +77,7 @@ wakeup_code:
 
 	.byte 0x66, 0xea			# prefix + jmpi-opcode
 	.long	wakeup_32 - __START_KERNEL_map
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 	.code32
 wakeup_32:
@@ -96,13 +96,13 @@ wakeup_32:
 	jnc	bogus_cpu
 	movl	%edx,%edi
 	
-	movw	$__KERNEL_DS, %ax
+	movw	$__BOOT_DS, %ax
 	movw	%ax, %ds
 	movw	%ax, %es
 	movw	%ax, %fs
 	movw	%ax, %gs
 
-	movw	$__KERNEL_DS, %ax	
+	movw	$__BOOT_DS, %ax	
 	movw	%ax, %ss
 
 	mov	$(wakeup_stack - __START_KERNEL_map), %esp
@@ -187,7 +187,7 @@ reach_compatibility_mode:
 
 wakeup_jumpvector:
 	.long	wakeup_long64 - __START_KERNEL_map
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 .code64
 
diff -uprN linux-2.6.16/arch/x86_64/kernel/apic.c linux-2.6.16.ovz/arch/x86_64/kernel/apic.c
--- linux-2.6.16/arch/x86_64/kernel/apic.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/apic.c	2006-07-05 08:34:55.000000000 -0400
@@ -941,6 +941,7 @@ void smp_local_timer_interrupt(struct pt
  */
 void smp_apic_timer_interrupt(struct pt_regs *regs)
 {
+	struct ve_struct *ve;
 	/*
 	 * the NMI deadlock-detector uses this.
 	 */
@@ -957,9 +958,11 @@ void smp_apic_timer_interrupt(struct pt_
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
 	exit_idle();
+	ve = set_exec_env(get_ve0());
 	irq_enter();
 	smp_local_timer_interrupt(regs);
 	irq_exit();
+	(void)set_exec_env(ve);
 }
 
 /*
diff -uprN linux-2.6.16/arch/x86_64/kernel/entry.S linux-2.6.16.ovz/arch/x86_64/kernel/entry.S
--- linux-2.6.16/arch/x86_64/kernel/entry.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/entry.S	2006-07-05 08:34:55.000000000 -0400
@@ -180,6 +180,10 @@ rff_trace:
  *
  * XXX	if we had a free scratch register we could save the RSP into the stack frame
  *      and report it properly in ps. Unfortunately we haven't.
+ *
+ * When user can change the frames always force IRET. That is because
+ * it deals with uncanonical addresses better. SYSRET has trouble
+ * with them due to bugs in both AMD and Intel CPUs.
  */ 			 		
 
 ENTRY(system_call)
@@ -244,7 +248,7 @@ sysret_careful:
 	/* Handle a signal */ 
 sysret_signal:
 	sti
-	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+	testl $(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
 	jz    1f
 
 	/* Really a signal */
@@ -254,7 +258,10 @@ sysret_signal:
 	xorl %esi,%esi # oldset -> arg2
 	call ptregscall_common
 1:	movl $_TIF_NEED_RESCHED,%edi
-	jmp sysret_check
+	/* Use IRET because user could have changed frame. This
+	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
+	cli
+	jmp int_with_check
 	
 badsys:
 	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -274,13 +281,9 @@ tracesys:			 
 	ja  1f
 	movq %r10,%rcx	/* fixup for C */
 	call *sys_call_table(,%rax,8)
-	movq %rax,RAX-ARGOFFSET(%rsp)
-1:	SAVE_REST
-	movq %rsp,%rdi
-	call syscall_trace_leave
-	RESTORE_TOP_OF_STACK %rbx
-	RESTORE_REST
-	jmp ret_from_sys_call
+1:	movq %rax,RAX-ARGOFFSET(%rsp)
+	/* Use IRET because user could have changed frame */
+	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 		
 /* 
@@ -350,7 +353,7 @@ int_very_careful:
 	jmp int_restore_rest
 	
 int_signal:
-	testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
+	testl $(_TIF_NOTIFY_RESUME|_TIF_RESTORE_SIGMASK|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
 	jz 1f
 	movq %rsp,%rdi		# &ptregs -> arg1
 	xorl %esi,%esi		# oldset -> arg2
@@ -408,25 +411,9 @@ ENTRY(stub_execve)
 	CFI_ADJUST_CFA_OFFSET -8
 	CFI_REGISTER rip, r11
 	SAVE_REST
-	movq %r11, %r15
-	CFI_REGISTER rip, r15
 	FIXUP_TOP_OF_STACK %r11
 	call sys_execve
-	GET_THREAD_INFO(%rcx)
-	bt $TIF_IA32,threadinfo_flags(%rcx)
-	CFI_REMEMBER_STATE
-	jc exec_32bit
 	RESTORE_TOP_OF_STACK %r11
-	movq %r15, %r11
-	CFI_REGISTER rip, r11
-	RESTORE_REST
-	pushq %r11
-	CFI_ADJUST_CFA_OFFSET 8
-	CFI_REL_OFFSET rip, 0
-	ret
-
-exec_32bit:
-	CFI_RESTORE_STATE
 	movq %rax,RAX(%rsp)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
@@ -574,7 +561,7 @@ retint_careful:
 	jmp retint_check
 	
 retint_signal:
-	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+	testl $(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
 	jz    retint_swapgs
 	sti
 	SAVE_REST
@@ -845,7 +832,7 @@ ENTRY(kernel_thread)
 	xorl %r9d,%r9d
 	
 	# clone now
-	call do_fork
+	call do_fork_kthread
 	movq %rax,RAX(%rsp)
 	xorl %edi,%edi
 
diff -uprN linux-2.6.16/arch/x86_64/kernel/head.S linux-2.6.16.ovz/arch/x86_64/kernel/head.S
--- linux-2.6.16/arch/x86_64/kernel/head.S	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/head.S	2006-07-05 08:34:55.000000000 -0400
@@ -40,7 +40,7 @@ startup_32:
 	 */
 
 	/* Initialize the %ds segment register */
-	movl $__KERNEL_DS,%eax
+	movl $__BOOT_DS,%eax
 	movl %eax,%ds
 
 	/* Load new GDT with the 64bit segments using 32bit descriptor */
@@ -183,7 +183,14 @@ startup_64:
 	/* esi is pointer to real mode structure with interesting info.
 	   pass it to C */
 	movl	%esi, %edi
-	
+
+	/* Switch to __KERNEL_CS. The segment is the same, but selector
+	 * is different. */
+	pushq	$__KERNEL_CS
+	pushq	$switch_cs
+	lretq
+switch_cs:
+
 	/* Finally jump to run C code and to be on real kernel address
 	 * Since we are running on identity-mapped space we have to jump
 	 * to the full 64bit address , this is only possible as indirect
@@ -243,7 +250,7 @@ pGDT32:
 .org 0xf10	
 ljumpvector:
 	.long	startup_64-__START_KERNEL_map
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 ENTRY(stext)
 ENTRY(_stext)
@@ -355,21 +362,30 @@ gdt:
 .align PAGE_SIZE
 
 /* The TLS descriptors are currently at a different place compared to i386.
-   Hopefully nobody expects them at a fixed place (Wine?) */
+   Hopefully nobody expects them at a fixed place (Wine?)
+   Descriptors rearranged to plase 32bit and TLS selectors in the same
+   places, because it is really necessary. sysret/exit mandates order
+   of kernel/user cs/ds, so we have to extend gdt.
+*/
 	
 ENTRY(cpu_gdt_table)
-	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x0			/* unused */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
-	.quad	0x00cffa000000ffff	/* __USER32_CS */
-	.quad	0x00cff2000000ffff	/* __USER_DS, __USER32_DS  */		
-	.quad	0x00affa000000ffff	/* __USER_CS */
-	.quad	0x00cf9a000000ffff	/* __KERNEL32_CS */
-	.quad	0,0			/* TSS */
-	.quad	0,0			/* LDT */
-	.quad   0,0,0			/* three TLS descriptors */ 
-	.quad	0			/* unused */
+	.quad	0x0000000000000000	/* 0 NULL descriptor */
+	.quad	0x0			/* 1 unused */	
+	.quad	0x00af9a000000ffff	/* 2 __BOOT_CS */
+	.quad	0x00cf92000000ffff	/* 3 __BOOT_DS */
+	.quad	0,0			/* 4,5 TSS */
+	.quad   0,0,0			/* 6-8 three TLS descriptors */ 
+	.quad	0,0			/* 9,10 LDT */
+	.quad	0x00cf9a000000ffff	/* 11 __KERNEL32_CS */
+	.quad	0x00af9a000000ffff	/* 12 __KERNEL_CS */
+	.quad	0x00cf92000000ffff	/* 13 __KERNEL_DS */
+	.quad	0x00cffa000000ffff	/* 14 __USER32_CS */
+	.quad	0x00cff2000000ffff	/* 15 __USER_DS, __USER32_DS  */		
+	.quad	0x00affa000000ffff	/* 16 __USER_CS */
+	.quad	0x0			/* 17 unused */
+	.quad	0,0,0,0,0,0
+	.quad	0,0,0,0,0,0,0,0
+	
 gdt_end:	
 	/* asm/segment.h:GDT_ENTRIES must match this */	
 	/* This should be a multiple of the cache line size */
diff -uprN linux-2.6.16/arch/x86_64/kernel/irq.c linux-2.6.16.ovz/arch/x86_64/kernel/irq.c
--- linux-2.6.16/arch/x86_64/kernel/irq.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/irq.c	2006-07-05 08:34:55.000000000 -0400
@@ -98,12 +98,15 @@ asmlinkage unsigned int do_IRQ(struct pt
 {	
 	/* high bits used in ret_from_ code  */
 	unsigned irq = regs->orig_rax & 0xff;
+	struct ve_struct *ve;
 
 	exit_idle();
+	ve = set_exec_env(get_ve0());
 	irq_enter();
 
 	__do_IRQ(irq, regs);
 	irq_exit();
+	(void)set_exec_env(ve);
 
 	return 1;
 }
diff -uprN linux-2.6.16/arch/x86_64/kernel/ldt.c linux-2.6.16.ovz/arch/x86_64/kernel/ldt.c
--- linux-2.6.16/arch/x86_64/kernel/ldt.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/ldt.c	2006-07-05 08:34:55.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -23,6 +24,8 @@
 #include <asm/desc.h>
 #include <asm/proto.h>
 
+#include <ub/ub_mem.h>
+
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
 {
@@ -42,9 +45,9 @@ static int alloc_ldt(mm_context_t *pc, u
 	oldsize = pc->size;
 	mincount = (mincount+511)&(~511);
 	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+		newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
 	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+		newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
 
 	if (!newldt)
 		return -ENOMEM;
@@ -109,6 +112,7 @@ int init_new_context(struct task_struct 
 	}
 	return retval;
 }
+EXPORT_SYMBOL_GPL(init_new_context);
 
 /*
  * 
diff -uprN linux-2.6.16/arch/x86_64/kernel/nmi.c linux-2.6.16.ovz/arch/x86_64/kernel/nmi.c
--- linux-2.6.16/arch/x86_64/kernel/nmi.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/nmi.c	2006-07-05 08:34:55.000000000 -0400
@@ -522,6 +522,7 @@ static __kprobes int dummy_nmi_callback(
 }
  
 static nmi_callback_t nmi_callback = dummy_nmi_callback;
+static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
  
 asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
 {
@@ -531,9 +532,21 @@ asmlinkage __kprobes void do_nmi(struct 
 	add_pda(__nmi_count,1);
 	if (!rcu_dereference(nmi_callback)(regs, cpu))
 		default_do_nmi(regs);
+
+	nmi_ipi_callback(regs, cpu);
 	nmi_exit();
 }
 
+void set_nmi_ipi_callback(nmi_callback_t callback)
+{
+	nmi_ipi_callback = callback;
+}
+
+void unset_nmi_ipi_callback(void)
+{
+	nmi_ipi_callback = dummy_nmi_callback;
+}
+
 void set_nmi_callback(nmi_callback_t callback)
 {
 	rcu_assign_pointer(nmi_callback, callback);
diff -uprN linux-2.6.16/arch/x86_64/kernel/pci-gart.c linux-2.6.16.ovz/arch/x86_64/kernel/pci-gart.c
--- linux-2.6.16/arch/x86_64/kernel/pci-gart.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/pci-gart.c	2006-07-05 08:34:55.000000000 -0400
@@ -114,10 +114,6 @@ static unsigned long alloc_iommu(int siz
 static void free_iommu(unsigned long offset, int size)
 { 
 	unsigned long flags;
-	if (size == 1) { 
-		clear_bit(offset, iommu_gart_bitmap); 
-		return;
-	}
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	__clear_bit_string(iommu_gart_bitmap, offset, size);
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
diff -uprN linux-2.6.16/arch/x86_64/kernel/process.c linux-2.6.16.ovz/arch/x86_64/kernel/process.c
--- linux-2.6.16/arch/x86_64/kernel/process.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/process.c	2006-07-05 08:34:55.000000000 -0400
@@ -54,6 +54,11 @@
 #include <asm/idle.h>
 
 asmlinkage extern void ret_from_fork(void);
+asmlinkage extern void int_ret_from_sys_call(void);
+asmlinkage extern void execve(void);
+EXPORT_SYMBOL_GPL(ret_from_fork);
+EXPORT_SYMBOL_GPL(int_ret_from_sys_call);
+EXPORT_SYMBOL_GPL(execve);
 
 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
 
@@ -303,7 +308,8 @@ void __show_regs(struct pt_regs * regs)
 		(int)strcspn(system_utsname.version, " "),
 		system_utsname.version);
 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
-	printk_address(regs->rip); 
+	if (decode_call_traces)
+		printk_address(regs->rip); 
 	printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
 		regs->eflags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
@@ -340,11 +346,26 @@ void __show_regs(struct pt_regs * regs)
 
 void show_regs(struct pt_regs *regs)
 {
-	printk("CPU %d:", smp_processor_id());
+	printk("CPU %d, VCPU %d%d", smp_processor_id(), task_vsched_id(current), task_cpu(current));
 	__show_regs(regs);
 	show_trace(&regs->rsp);
 }
 
+void smp_show_regs(struct pt_regs *regs, void *data)
+{
+	static DEFINE_SPINLOCK(show_regs_lock);
+
+	if (regs == NULL)
+		return;
+
+	bust_spinlocks(1);
+	spin_lock(&show_regs_lock);
+	printk("----------- IPI show regs -----------\n");
+	show_regs(regs);
+	spin_unlock(&show_regs_lock);
+	bust_spinlocks(0);
+}
+
 /*
  * Free current thread data structures etc..
  */
@@ -527,8 +548,6 @@ __switch_to(struct task_struct *prev_p, 
 	int cpu = smp_processor_id();  
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 
-	unlazy_fpu(prev_p);
-
 	/*
 	 * Reload esp0, LDT and the page table pointer:
 	 */
@@ -591,6 +610,12 @@ __switch_to(struct task_struct *prev_p, 
 	prev->userrsp = read_pda(oldrsp); 
 	write_pda(oldrsp, next->userrsp); 
 	write_pda(pcurrent, next_p); 
+
+ 	/* This must be here to ensure both math_state_restore() and
+	   kernel_fpu_begin() work consistently.
+	   And the AMD workaround requires it to be after DS reload. */
+	unlazy_fpu(prev_p);
+
 	write_pda(kernelstack,
 		  task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
 
@@ -841,3 +866,20 @@ unsigned long arch_align_stack(unsigned 
 		sp -= get_random_int() % 8192;
 	return sp & ~0xf;
 }
+
+long do_fork_kthread(unsigned long clone_flags,
+	      unsigned long stack_start,
+	      struct pt_regs *regs,
+	      unsigned long stack_size,
+	      int __user *parent_tidptr,
+	      int __user *child_tidptr)
+{
+	if (ve_is_super(get_exec_env()))
+		return do_fork(clone_flags, stack_start, regs, stack_size,
+				parent_tidptr, child_tidptr);
+
+	/* Don't allow kernel_thread() inside VE */
+	printk("kernel_thread call inside VE\n");
+	dump_stack();
+	return -EPERM;
+}
diff -uprN linux-2.6.16/arch/x86_64/kernel/ptrace.c linux-2.6.16.ovz/arch/x86_64/kernel/ptrace.c
--- linux-2.6.16/arch/x86_64/kernel/ptrace.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/ptrace.c	2006-07-05 08:34:55.000000000 -0400
@@ -300,6 +300,15 @@ static unsigned long getreg(struct task_
 			return child->thread.fs;
 		case offsetof(struct user_regs_struct, gs_base):
 			return child->thread.gs;
+		case offsetof(struct user_regs_struct, cs):
+			if (test_tsk_thread_flag(child, TIF_SYSCALL_TRACE)) {
+				val = get_stack_long(child, regno - sizeof(struct pt_regs));
+				if (val == __USER_CS)
+					return 0x33;
+				if (val == __USER32_CS)
+					return 0x23;
+			}
+			/* fall through */
 		default:
 			regno = regno - sizeof(struct pt_regs);
 			val = get_stack_long(child, regno);
@@ -581,8 +590,10 @@ static void syscall_trace(struct pt_regs
 	       current_thread_info()->flags, current->ptrace); 
 #endif
 
+	set_pn_state(current, (regs->rax != -ENOSYS) ? PN_STOP_LEAVE : PN_STOP_ENTRY);
 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
 				? 0x80 : 0));
+	clear_pn_state(current);
 	/*
 	 * this isn't the same as continuing with a signal, but it will do
 	 * for normal use.  strace only continues with a signal if the
diff -uprN linux-2.6.16/arch/x86_64/kernel/setup.c linux-2.6.16.ovz/arch/x86_64/kernel/setup.c
--- linux-2.6.16/arch/x86_64/kernel/setup.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/setup.c	2006-07-05 08:34:55.000000000 -0400
@@ -909,6 +909,10 @@ static int __init init_amd(struct cpuinf
 	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
 		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
 
+	/* Enable workaround for FXSAVE leak */
+	if (c->x86 >= 6)
+		set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
+
 	r = get_model_name(c);
 	if (!r) { 
 		switch (c->x86) { 
diff -uprN linux-2.6.16/arch/x86_64/kernel/setup64.c linux-2.6.16.ovz/arch/x86_64/kernel/setup64.c
--- linux-2.6.16/arch/x86_64/kernel/setup64.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/setup64.c	2006-07-05 08:34:55.000000000 -0400
@@ -290,3 +290,5 @@ void __cpuinit cpu_init (void)
 
 	fpu_init(); 
 }
+
+EXPORT_SYMBOL_GPL(cpu_gdt_descr);
diff -uprN linux-2.6.16/arch/x86_64/kernel/signal.c linux-2.6.16.ovz/arch/x86_64/kernel/signal.c
--- linux-2.6.16/arch/x86_64/kernel/signal.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/signal.c	2006-07-05 08:34:55.000000000 -0400
@@ -40,37 +40,6 @@ int ia32_setup_frame(int sig, struct k_s
             sigset_t *set, struct pt_regs * regs); 
 
 asmlinkage long
-sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs *regs)
-{
-	sigset_t saveset, newset;
-
-	/* XXX: Don't preclude handling different sized sigset_t's.  */
-	if (sigsetsize != sizeof(sigset_t))
-		return -EINVAL;
-
-	if (copy_from_user(&newset, unewset, sizeof(newset)))
-		return -EFAULT;
-	sigdelsetmask(&newset, ~_BLOCKABLE);
-
-	spin_lock_irq(&current->sighand->siglock);
-	saveset = current->blocked;
-	current->blocked = newset;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-#ifdef DEBUG_SIG
-	printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
-		saveset, newset, regs, regs->rip);
-#endif 
-	regs->rax = -EINTR;
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (do_signal(regs, &saveset))
-			return -EINTR;
-	}
-}
-
-asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs)
 {
@@ -344,11 +313,11 @@ static int setup_rt_frame(int sig, struc
 		current->comm, current->pid, frame, regs->rip, frame->pretcode);
 #endif
 
-	return 1;
+	return 0;
 
 give_sigsegv:
 	force_sigsegv(sig, current);
-	return 0;
+	return -EFAULT;
 }
 
 /*
@@ -411,7 +380,7 @@ handle_signal(unsigned long sig, siginfo
 #endif
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
-	if (ret) {
+	if (ret == 0) {
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		if (!(ka->sa.sa_flags & SA_NODEFER))
@@ -428,9 +397,10 @@ handle_signal(unsigned long sig, siginfo
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
  * mistake.
  */
-int do_signal(struct pt_regs *regs, sigset_t *oldset)
+static void do_signal(struct pt_regs *regs)
 {
 	struct k_sigaction ka;
+	sigset_t *oldset;
 	siginfo_t info;
 	int signr;
 
@@ -441,12 +411,14 @@ int do_signal(struct pt_regs *regs, sigs
 	 * if so.
 	 */
 	if (!user_mode(regs))
-		return 1;
+		return;
 
-	if (try_to_freeze())
+	if (try_to_freeze() && !signal_pending(current))
 		goto no_signal;
 
-	if (!oldset)
+	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+		oldset = &current->saved_sigmask;
+	else
 		oldset = &current->blocked;
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
@@ -460,7 +432,15 @@ int do_signal(struct pt_regs *regs, sigs
 			set_debugreg(current->thread.debugreg7, 7);
 
 		/* Whee!  Actually deliver the signal.  */
-		return handle_signal(signr, &info, &ka, oldset, regs);
+		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+			/* a signal was successfully delivered; the saved
+			 * sigmask will have been stored in the signal frame,
+			 * and will be restored by sigreturn, so we can simply
+			 * clear the TIF_RESTORE_SIGMASK flag */
+			if (test_thread_flag(TIF_RESTORE_SIGMASK))
+				clear_thread_flag(TIF_RESTORE_SIGMASK);
+		}
+		return;
 	}
 
  no_signal:
@@ -481,10 +461,16 @@ int do_signal(struct pt_regs *regs, sigs
 			regs->rip -= 2;
 		}
 	}
-	return 0;
+
+	/* if there's no signal to deliver, we just put the saved sigmask
+	 * back */
+	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+		clear_thread_flag(TIF_RESTORE_SIGMASK);
+		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+	}
 }
 
-void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags)
+void do_notify_resume(struct pt_regs *regs, sigset_t *unused, __u32 thread_info_flags)
 {
 #ifdef DEBUG_SIG
 	printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n",
@@ -498,8 +484,8 @@ void do_notify_resume(struct pt_regs *re
 	}
 
 	/* deal with pending signal delivery */
-	if (thread_info_flags & _TIF_SIGPENDING)
-		do_signal(regs,oldset);
+	if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
+		do_signal(regs);
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff -uprN linux-2.6.16/arch/x86_64/kernel/smp.c linux-2.6.16.ovz/arch/x86_64/kernel/smp.c
--- linux-2.6.16/arch/x86_64/kernel/smp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/smp.c	2006-07-05 08:34:55.000000000 -0400
@@ -28,6 +28,7 @@
 #include <asm/proto.h>
 #include <asm/apicdef.h>
 #include <asm/idle.h>
+#include <asm/nmi.h>
 
 /*
  *	Smarter SMP flushing macros. 
@@ -444,6 +445,84 @@ int smp_call_function (void (*func) (voi
 	return 0;
 }
 
+static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
+static struct nmi_call_data_struct {
+	smp_nmi_function func;
+	void *info;
+	atomic_t started;
+	atomic_t finished;
+	cpumask_t cpus_called;
+	int wait;
+} *nmi_call_data;
+
+static int smp_nmi_callback(struct pt_regs * regs, int cpu)
+{
+	smp_nmi_function func;
+	void *info;
+	int wait;
+
+	func = nmi_call_data->func;
+	info = nmi_call_data->info;
+	wait = nmi_call_data->wait;
+	ack_APIC_irq();
+	/* prevent from calling func() multiple times */
+	if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
+		return 0;
+	/*
+	 * notify initiating CPU that I've grabbed the data and am
+	 * about to execute the function
+	 */
+	mb();
+	atomic_inc(&nmi_call_data->started);
+	/* at this point the nmi_call_data structure is out of scope */
+	irq_enter();
+	func(regs, info);
+	irq_exit();
+	if (wait)
+		atomic_inc(&nmi_call_data->finished);
+
+	return 0;
+}
+
+int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
+{
+	struct nmi_call_data_struct data;
+	int cpus;
+
+	cpus = num_online_cpus() - 1;
+	if (!cpus)
+		return 0;
+
+	data.func = func;
+	data.info = info;
+	data.wait = wait;
+	atomic_set(&data.started, 0);
+	atomic_set(&data.finished, 0);
+	cpus_clear(data.cpus_called);
+	/* prevent this cpu from calling func if NMI happens */
+	cpu_set(smp_processor_id(), data.cpus_called);
+
+	if (!spin_trylock(&nmi_call_lock))
+		return -1;
+
+	nmi_call_data = &data;
+	set_nmi_ipi_callback(smp_nmi_callback);
+	mb();
+
+	/* Send a message to all other CPUs and wait for them to respond */
+	send_IPI_allbutself(APIC_DM_NMI);
+	while (atomic_read(&data.started) != cpus)
+		barrier();
+
+	unset_nmi_ipi_callback();
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			barrier();
+	spin_unlock(&nmi_call_lock);
+
+	return 0;
+}
+
 void smp_stop_cpu(void)
 {
 	unsigned long flags;
diff -uprN linux-2.6.16/arch/x86_64/kernel/sys_x86_64.c linux-2.6.16.ovz/arch/x86_64/kernel/sys_x86_64.c
--- linux-2.6.16/arch/x86_64/kernel/sys_x86_64.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/sys_x86_64.c	2006-07-05 08:34:55.000000000 -0400
@@ -148,7 +148,7 @@ asmlinkage long sys_uname(struct new_uts
 {
 	int err;
 	down_read(&uts_sem);
-	err = copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, &ve_utsname, sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32) 
 		err |= copy_to_user(&name->machine, "i686", 5); 		
diff -uprN linux-2.6.16/arch/x86_64/kernel/time.c linux-2.6.16.ovz/arch/x86_64/kernel/time.c
--- linux-2.6.16/arch/x86_64/kernel/time.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/time.c	2006-07-05 08:34:55.000000000 -0400
@@ -66,6 +66,8 @@ unsigned long vxtime_hz = PIT_TICK_RATE;
 int report_lost_ticks;				/* command line option */
 unsigned long long monotonic_base;
 
+EXPORT_SYMBOL(cpu_khz);
+
 struct vxtime_data __vxtime __section_vxtime;	/* for vsyscalls */
 
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
diff -uprN linux-2.6.16/arch/x86_64/kernel/traps.c linux-2.6.16.ovz/arch/x86_64/kernel/traps.c
--- linux-2.6.16/arch/x86_64/kernel/traps.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/kernel/traps.c	2006-07-05 08:34:55.000000000 -0400
@@ -30,6 +30,7 @@
 #include <linux/moduleparam.h>
 #include <linux/nmi.h>
 #include <linux/kprobes.h>
+#include <linux/kexec.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -116,6 +117,9 @@ int printk_address(unsigned long address
 	char *delim = ":"; 
 	char namebuf[128];
 
+	if (!decode_call_traces)
+		return printk("[<%016lx>]", address);
+
 	symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); 
 	if (!symname) 
 		return printk("[<%016lx>]", address);
@@ -208,7 +212,7 @@ void show_trace(unsigned long *stack)
 	do while (cond) { \
 		unsigned long addr = *stack++; \
 		if (kernel_text_address(addr)) { \
-			if (i > 50) { \
+			if (i > 50 && decode_call_traces) { \
 				printk("\n       "); \
 				i = 0; \
 			} \
@@ -290,7 +294,7 @@ void show_stack(struct task_struct *tsk,
 		if (((long) stack & (THREAD_SIZE-1)) == 0)
 			break;
 		}
-		if (i && ((i % 4) == 0))
+		if (i && ((i % 4) == 0) && decode_call_traces)
 			printk("\n       ");
 		printk("%016lx ", *stack++);
 		touch_nmi_watchdog();
@@ -319,10 +323,12 @@ void show_registers(struct pt_regs *regs
 
 		rsp = regs->rsp;
 
-	printk("CPU %d ", cpu);
+	printk("CPU: %d ", cpu);
 	__show_regs(regs);
-	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
-		cur->comm, cur->pid, task_thread_info(cur), cur);
+	printk("Process %s (pid: %d, veid=%d, threadinfo %p, task %p)\n",
+		cur->comm, cur->pid,
+		VEID(VE_TASK_INFO(current)->owner_env),
+		task_thread_info(cur), cur);
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
@@ -434,6 +440,8 @@ void __kprobes __die(const char * str, s
 	printk(KERN_ALERT "RIP ");
 	printk_address(regs->rip); 
 	printk(" RSP <%016lx>\n", regs->rsp); 
+	if (kexec_should_crash(current))
+		crash_kexec(regs);
 }
 
 void die(const char * str, struct pt_regs * regs, long err)
@@ -456,8 +464,11 @@ void __kprobes die_nmi(char *str, struct
 	 */
 	printk(str, safe_smp_processor_id());
 	show_registers(regs);
+	if (kexec_should_crash(current))
+		crash_kexec(regs);
 	if (panic_on_timeout || panic_on_oops)
 		panic("nmi watchdog");
+	smp_nmi_call_function(smp_show_regs, NULL, 1);
 	printk("console shuts up ...\n");
 	oops_end(flags);
 	do_exit(SIGSEGV);
diff -uprN linux-2.6.16/arch/x86_64/mm/fault.c linux-2.6.16.ovz/arch/x86_64/mm/fault.c
--- linux-2.6.16/arch/x86_64/mm/fault.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/mm/fault.c	2006-07-05 08:34:55.000000000 -0400
@@ -41,27 +41,6 @@
 #define PF_RSVD	(1<<3)
 #define PF_INSTR	(1<<4)
 
-void bust_spinlocks(int yes)
-{
-	int loglevel_save = console_loglevel;
-	if (yes) {
-		oops_in_progress = 1;
-	} else {
-#ifdef CONFIG_VT
-		unblank_screen();
-#endif
-		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk will give klogd
-		 * a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
-		printk(" ");
-		console_loglevel = loglevel_save;
-	}
-}
-
 /* Sometimes the CPU reports invalid exceptions on prefetch.
    Check that here and ignore.
    Opcode checker based on code by Richard Brunner */
@@ -293,7 +272,7 @@ static int vmalloc_fault(unsigned long a
 }
 
 int page_fault_trace = 0;
-int exception_trace = 1;
+int exception_trace = 0;
 
 /*
  * This routine handles page faults.  It determines the address,
@@ -322,7 +301,7 @@ asmlinkage void __kprobes do_page_fault(
 		local_irq_enable();
 
 	if (unlikely(page_fault_trace))
-		printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+		ve_printk(VE_LOG, "pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
 		       regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); 
 
 	tsk = current;
@@ -372,7 +351,6 @@ asmlinkage void __kprobes do_page_fault(
 	if (unlikely(in_atomic() || !mm))
 		goto bad_area_nosemaphore;
 
- again:
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
@@ -476,7 +454,7 @@ bad_area_nosemaphore:
 			return;
 
 		if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
-			printk(
+			ve_printk(VE_LOG, 
 		       "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
 					tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
 					tsk->comm, tsk->pid, address, regs->rip,
@@ -526,8 +504,10 @@ no_context:
 	else
 		printk(KERN_ALERT "Unable to handle kernel paging request");
 	printk(" at %016lx RIP: \n" KERN_ALERT,address);
-	printk_address(regs->rip);
-	printk("\n");
+	if (decode_call_traces) {
+		printk_address(regs->rip);
+		printk("\n");
+	}
 	dump_pagetable(address);
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
@@ -544,13 +524,14 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) { 
-		yield();
-		goto again;
-	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
-		do_exit(SIGKILL);
+	if (error_code & 4) {
+		/* 
+		 * 0-order allocation always success if something really 
+		 * fatal not happen: beancounter overdraft or OOM.
+		 */
+		force_sig(SIGKILL, tsk);
+		return;
+	}
 	goto no_context;
 
 do_sigbus:
diff -uprN linux-2.6.16/arch/x86_64/mm/init.c linux-2.6.16.ovz/arch/x86_64/mm/init.c
--- linux-2.6.16/arch/x86_64/mm/init.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/arch/x86_64/mm/init.c	2006-07-05 08:34:55.000000000 -0400
@@ -89,6 +89,7 @@ void show_mem(void)
 	printk(KERN_INFO "%lu pages shared\n",shared);
 	printk(KERN_INFO "%lu pages swap cached\n",cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 /* References to section boundaries */
 
diff -uprN linux-2.6.16/block/elevator.c linux-2.6.16.ovz/block/elevator.c
--- linux-2.6.16/block/elevator.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/block/elevator.c	2006-07-05 08:34:55.000000000 -0400
@@ -314,6 +314,7 @@ void elv_insert(request_queue_t *q, stru
 {
 	struct list_head *pos;
 	unsigned ordseq;
+	int unplug_it = 1;
 
 	rq->q = q;
 
@@ -378,6 +379,11 @@ void elv_insert(request_queue_t *q, stru
 		}
 
 		list_add_tail(&rq->queuelist, pos);
+		/*
+		 * most requeues happen because of a busy condition, don't
+		 * force unplug of the queue for that case.
+		 */
+		unplug_it = 0;
 		break;
 
 	default:
@@ -386,7 +392,7 @@ void elv_insert(request_queue_t *q, stru
 		BUG();
 	}
 
-	if (blk_queue_plugged(q)) {
+	if (unplug_it && blk_queue_plugged(q)) {
 		int nrq = q->rq.count[READ] + q->rq.count[WRITE]
 			- q->in_flight;
 
@@ -676,7 +682,7 @@ void elv_unregister(struct elevator_type
 	 * Iterate every thread in the process to remove the io contexts.
 	 */
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		struct io_context *ioc = p->io_context;
 		if (ioc && ioc->cic) {
 			ioc->cic->exit(ioc->cic);
@@ -688,7 +694,7 @@ void elv_unregister(struct elevator_type
 			ioc->aic->dtor(ioc->aic);
 			ioc->aic = NULL;
 		}
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	read_unlock(&tasklist_lock);
 
 	spin_lock_irq(&elv_list_lock);
diff -uprN linux-2.6.16/block/genhd.c linux-2.6.16.ovz/block/genhd.c
--- linux-2.6.16/block/genhd.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/block/genhd.c	2006-07-05 08:34:55.000000000 -0400
@@ -16,9 +16,8 @@
 #include <linux/kobj_map.h>
 #include <linux/buffer_head.h>
 
-#define MAX_PROBE_HASH 255	/* random */
-
-static struct subsystem block_subsys;
+struct subsystem block_subsys;
+EXPORT_SYMBOL(block_subsys);
 
 static DECLARE_MUTEX(block_subsys_sem);
 
@@ -30,108 +29,29 @@ static struct blk_major_name {
 	struct blk_major_name *next;
 	int major;
 	char name[16];
-} *major_names[MAX_PROBE_HASH];
+} *major_names[BLKDEV_MAJOR_HASH_SIZE];
 
 /* index in the above - for now: assume no multimajor ranges */
 static inline int major_to_index(int major)
 {
-	return major % MAX_PROBE_HASH;
-}
-
-struct blkdev_info {
-        int index;
-        struct blk_major_name *bd;
-};
-
-/*
- * iterate over a list of blkdev_info structures.  allows
- * the major_names array to be iterated over from outside this file
- * must be called with the block_subsys_sem held
- */
-void *get_next_blkdev(void *dev)
-{
-        struct blkdev_info *info;
-
-        if (dev == NULL) {
-                info = kmalloc(sizeof(*info), GFP_KERNEL);
-                if (!info)
-                        goto out;
-                info->index=0;
-                info->bd = major_names[info->index];
-                if (info->bd)
-                        goto out;
-        } else {
-                info = dev;
-        }
-
-        while (info->index < ARRAY_SIZE(major_names)) {
-                if (info->bd)
-                        info->bd = info->bd->next;
-                if (info->bd)
-                        goto out;
-                /*
-                 * No devices on this chain, move to the next
-                 */
-                info->index++;
-                info->bd = (info->index < ARRAY_SIZE(major_names)) ?
-			major_names[info->index] : NULL;
-                if (info->bd)
-                        goto out;
-        }
-
-out:
-        return info;
-}
-
-void *acquire_blkdev_list(void)
-{
-        down(&block_subsys_sem);
-        return get_next_blkdev(NULL);
-}
-
-void release_blkdev_list(void *dev)
-{
-        up(&block_subsys_sem);
-        kfree(dev);
+	return major % BLKDEV_MAJOR_HASH_SIZE;
 }
 
+#ifdef CONFIG_PROC_FS
 
-/*
- * Count the number of records in the blkdev_list.
- * must be called with the block_subsys_sem held
- */
-int count_blkdev_list(void)
+void blkdev_show(struct seq_file *f, off_t offset)
 {
-	struct blk_major_name *n;
-	int i, count;
-
-	count = 0;
+	struct blk_major_name *dp;
 
-	for (i = 0; i < ARRAY_SIZE(major_names); i++) {
-		for (n = major_names[i]; n; n = n->next)
-				count++;
+	if (offset < BLKDEV_MAJOR_HASH_SIZE) {
+		down(&block_subsys_sem);
+		for (dp = major_names[offset]; dp; dp = dp->next)
+			seq_printf(f, "%3d %s\n", dp->major, dp->name);
+		up(&block_subsys_sem);
 	}
-
-	return count;
-}
-
-/*
- * extract the major and name values from a blkdev_info struct
- * passed in as a void to *dev.  Must be called with
- * block_subsys_sem held
- */
-int get_blkdev_info(void *dev, int *major, char **name)
-{
-        struct blkdev_info *info = dev;
-
-        if (info->bd == NULL)
-                return 1;
-
-        *major = info->bd->major;
-        *name = info->bd->name;
-        return 0;
 }
 
+#endif /* CONFIG_PROC_FS */
 
 int register_blkdev(unsigned int major, const char *name)
 {
@@ -592,7 +512,7 @@ static struct kset_uevent_ops block_ueve
 };
 
 /* declare block_subsys. */
-static decl_subsys(block, &ktype_block, &block_uevent_ops);
+decl_subsys(block, &ktype_block, &block_uevent_ops);
 
 
 /*
diff -uprN linux-2.6.16/block/ll_rw_blk.c linux-2.6.16.ovz/block/ll_rw_blk.c
--- linux-2.6.16/block/ll_rw_blk.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/block/ll_rw_blk.c	2006-07-05 08:34:55.000000000 -0400
@@ -1719,8 +1719,21 @@ void blk_run_queue(struct request_queue 
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_remove_plug(q);
-	if (!elv_queue_empty(q))
-		q->request_fn(q);
+
+	/*
+	 * Only recurse once to avoid overrunning the stack, let the unplug
+	 * handling reinvoke the handler shortly if we already got there.
+	 */
+	if (!elv_queue_empty(q)) {
+		if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+			q->request_fn(q);
+			clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
+		} else {
+			blk_plug_device(q);
+			kblockd_schedule_work(&q->unplug_work);
+		}
+	}
+
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
diff -uprN linux-2.6.16/configs/kernel-2.6.16-026test015-i686-enterprise.config.ovz linux-2.6.16.ovz/configs/kernel-2.6.16-026test015-i686-enterprise.config.ovz
--- linux-2.6.16/configs/kernel-2.6.16-026test015-i686-enterprise.config.ovz	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/configs/kernel-2.6.16-026test015-i686-enterprise.config.ovz	2006-07-05 08:35:03.000000000 -0400
@@ -0,0 +1,1707 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.16-026test015
+# Wed Jul  5 08:35:03 2006
+#
+CONFIG_X86_32=y
+CONFIG_SEMAPHORE_SLEEPERS=y
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_DMI=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_UID16=y
+CONFIG_VM86=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+CONFIG_SLAB=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+# CONFIG_SLOB is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+
+#
+# Processor type and features
+#
+CONFIG_X86_PC=y
+# CONFIG_X86_ELAN is not set
+# CONFIG_X86_VOYAGER is not set
+# CONFIG_X86_NUMAQ is not set
+# CONFIG_X86_SUMMIT is not set
+# CONFIG_X86_BIGSMP is not set
+# CONFIG_X86_VISWS is not set
+# CONFIG_X86_GENERICARCH is not set
+# CONFIG_X86_ES7000 is not set
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M586MMX is not set
+CONFIG_M686=y
+# CONFIG_MPENTIUMII is not set
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MEFFICEON is not set
+# CONFIG_MWINCHIPC6 is not set
+# CONFIG_MWINCHIP2 is not set
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
+# CONFIG_MGEODE_LX is not set
+# CONFIG_MCYRIXIII is not set
+# CONFIG_MVIAC3_2 is not set
+# CONFIG_X86_GENERIC is not set
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_L1_CACHE_SHIFT=5
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_X86_PPRO_FENCE=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
+CONFIG_HPET_TIMER=y
+CONFIG_HPET_EMULATE_RTC=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=32
+CONFIG_SCHED_VCPU=y
+CONFIG_FAIRSCHED=y
+CONFIG_SCHED_SMT=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_PREEMPT_BKL is not set
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
+CONFIG_NMI_WATCHDOG=y
+CONFIG_X86_MCE=y
+# CONFIG_X86_MCE_NONFATAL is not set
+CONFIG_X86_MCE_P4THERMAL=y
+# CONFIG_TOSHIBA is not set
+# CONFIG_I8K is not set
+# CONFIG_X86_REBOOTFIXUPS is not set
+CONFIG_MICROCODE=m
+CONFIG_X86_MSR=y
+# CONFIG_X86_CPUID is not set
+
+#
+# Firmware Drivers
+#
+# CONFIG_EDD is not set
+CONFIG_DELL_RBU=m
+CONFIG_DCDBAS=m
+# CONFIG_NOHIGHMEM is not set
+# CONFIG_HIGHMEM4G is not set
+CONFIG_HIGHMEM64G=y
+CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_HIGHMEM=y
+CONFIG_X86_PAE=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_STATIC=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_HIGHPTE=y
+# CONFIG_MATH_EMULATION is not set
+CONFIG_MTRR=y
+# CONFIG_EFI is not set
+CONFIG_IRQBALANCE=y
+# CONFIG_REGPARM is not set
+# CONFIG_SECCOMP is not set
+# CONFIG_HZ_100 is not set
+# CONFIG_HZ_250 is not set
+CONFIG_HZ_1000=y
+CONFIG_HZ=1000
+# CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
+CONFIG_PHYSICAL_START=0x100000
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_DOUBLEFAULT=y
+
+#
+# Power management options (ACPI, APM)
+#
+CONFIG_PM=y
+CONFIG_PM_LEGACY=y
+# CONFIG_PM_DEBUG is not set
+CONFIG_SOFTWARE_SUSPEND=y
+CONFIG_PM_STD_PARTITION=""
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI=y
+CONFIG_ACPI_AC=m
+CONFIG_ACPI_BATTERY=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_VIDEO=y
+CONFIG_ACPI_HOTKEY=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+CONFIG_ACPI_ASUS=m
+CONFIG_ACPI_IBM=m
+CONFIG_ACPI_TOSHIBA=m
+CONFIG_ACPI_BLACKLIST_YEAR=0
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_EC=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_X86_PM_TIMER=y
+# CONFIG_ACPI_CONTAINER is not set
+
+#
+# APM (Advanced Power Management) BIOS Support
+#
+# CONFIG_APM is not set
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+CONFIG_PCI=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GOMMCONFIG is not set
+# CONFIG_PCI_GODIRECT is not set
+CONFIG_PCI_GOANY=y
+CONFIG_PCI_BIOS=y
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_MMCONFIG=y
+# CONFIG_PCIEPORTBUS is not set
+# CONFIG_PCI_MSI is not set
+# CONFIG_PCI_LEGACY_PROC is not set
+# CONFIG_PCI_DEBUG is not set
+CONFIG_ISA_DMA_API=y
+CONFIG_ISA=y
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+# CONFIG_SCx200 is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=y
+# CONFIG_HOTPLUG_PCI_FAKE is not set
+CONFIG_HOTPLUG_PCI_COMPAQ=m
+# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set
+CONFIG_HOTPLUG_PCI_IBM=m
+CONFIG_HOTPLUG_PCI_ACPI=m
+# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
+# CONFIG_HOTPLUG_PCI_CPCI is not set
+CONFIG_HOTPLUG_PCI_SHPC=m
+# CONFIG_HOTPLUG_PCI_SHPC_POLL_EVENT_MODE is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_AOUT=m
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+# CONFIG_NETDEBUG is not set
+CONFIG_PACKET=m
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_ASK_IP_FIB_HASH=y
+# CONFIG_IP_FIB_TRIE is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+# CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+
+#
+# IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_IPV6_TUNNEL is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+# CONFIG_BRIDGE_NETFILTER is not set
+
+#
+# Core Netfilter Configuration
+#
+# CONFIG_NETFILTER_NETLINK is not set
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set
+# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+# CONFIG_NETFILTER_XT_MATCH_DCCP is not set
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set
+# CONFIG_NETFILTER_XT_MATCH_REALM is not set
+# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+# CONFIG_NETFILTER_XT_MATCH_STRING is not set
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+# CONFIG_IP_NF_CT_ACCT is not set
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+# CONFIG_IP_NF_CONNTRACK_EVENTS is not set
+# CONFIG_IP_NF_CT_PROTO_SCTP is not set
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_IRC=m
+# CONFIG_IP_NF_NETBIOS_NS is not set
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_AMANDA=m
+# CONFIG_IP_NF_PPTP is not set
+# CONFIG_IP_NF_QUEUE is not set
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_IPRANGE=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+# CONFIG_IP_NF_MATCH_ECN is not set
+# CONFIG_IP_NF_MATCH_DSCP is not set
+# CONFIG_IP_NF_MATCH_AH_ESP is not set
+CONFIG_IP_NF_MATCH_TTL=m
+# CONFIG_IP_NF_MATCH_OWNER is not set
+# CONFIG_IP_NF_MATCH_ADDRTYPE is not set
+# CONFIG_IP_NF_MATCH_HASHLIMIT is not set
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_LOG=m
+# CONFIG_IP_NF_TARGET_ULOG is not set
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_SAME=m
+# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+# CONFIG_IP_NF_TARGET_ECN is not set
+# CONFIG_IP_NF_TARGET_DSCP is not set
+CONFIG_IP_NF_TARGET_TTL=m
+# CONFIG_IP_NF_RAW is not set
+# CONFIG_IP_NF_ARPTABLES is not set
+
+#
+# IPv6: Netfilter Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_HL=m
+# CONFIG_IP6_NF_RAW is not set
+
+#
+# Bridge: Netfilter Configuration
+#
+# CONFIG_BRIDGE_NF_EBTABLES is not set
+
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CLK_JIFFIES=y
+# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
+# CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+# CONFIG_NET_SCH_HFSC is not set
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+# CONFIG_NET_SCH_NETEM is not set
+CONFIG_NET_SCH_INGRESS=m
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+# CONFIG_NET_CLS_BASIC is not set
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+# CONFIG_CLS_U32_PERF is not set
+# CONFIG_CLS_U32_MARK is not set
+# CONFIG_NET_CLS_RSVP is not set
+# CONFIG_NET_CLS_RSVP6 is not set
+# CONFIG_NET_EMATCH is not set
+# CONFIG_NET_CLS_ACT is not set
+CONFIG_NET_CLS_POLICE=y
+# CONFIG_NET_CLS_IND is not set
+CONFIG_NET_ESTIMATOR=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+# CONFIG_PARPORT_SERIAL is not set
+CONFIG_PARPORT_PC_FIFO=y
+# CONFIG_PARPORT_PC_SUPERIO is not set
+# CONFIG_PARPORT_GSC is not set
+# CONFIG_PARPORT_1284 is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=m
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+CONFIG_BLK_CPQ_DA=y
+CONFIG_BLK_CPQ_CISS_DA=y
+# CONFIG_CISS_SCSI_TAPE is not set
+CONFIG_BLK_DEV_DAC960=y
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=m
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_SX8=y
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CDROM_PKTCDVD is not set
+CONFIG_ATA_OVER_ETH=m
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+# CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_BLK_DEV_IDECD=m
+# CONFIG_BLK_DEV_IDETAPE is not set
+CONFIG_BLK_DEV_IDEFLOPPY=m
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_CMD640=y
+# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_RZ1000=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+CONFIG_BLK_DEV_AMD74XX=y
+# CONFIG_BLK_DEV_ATIIXP is not set
+CONFIG_BLK_DEV_CMD64X=y
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_CS5535 is not set
+CONFIG_BLK_DEV_HPT34X=y
+# CONFIG_HPT34X_AUTODMA is not set
+CONFIG_BLK_DEV_HPT366=y
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_IT821X is not set
+# CONFIG_BLK_DEV_NS87415 is not set
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+CONFIG_PDC202XX_BURST=y
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+CONFIG_BLK_DEV_VIA82CXXX=y
+# CONFIG_IDE_ARM is not set
+# CONFIG_IDE_CHIPSETS is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=m
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_ISCSI_ATTRS=y
+CONFIG_SCSI_SAS_ATTRS=y
+
+#
+# SCSI low-level drivers
+#
+CONFIG_ISCSI_TCP=m
+CONFIG_BLK_DEV_3W_XXXX_RAID=y
+CONFIG_SCSI_3W_9XXX=y
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+CONFIG_SCSI_AACRAID=y
+CONFIG_SCSI_AIC7XXX=y
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+CONFIG_SCSI_AIC79XX=y
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+CONFIG_AIC79XX_ENABLE_RD_STRM=y
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_IN2000 is not set
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=y
+CONFIG_MEGARAID_MAILBOX=y
+# CONFIG_MEGARAID_LEGACY is not set
+CONFIG_MEGARAID_SAS=y
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_AHCI=y
+CONFIG_SCSI_SATA_SVW=y
+CONFIG_SCSI_ATA_PIIX=y
+CONFIG_SCSI_SATA_MV=y
+CONFIG_SCSI_SATA_NV=y
+# CONFIG_SCSI_PDC_ADMA is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=y
+CONFIG_SCSI_SATA_SX4=y
+CONFIG_SCSI_SATA_SIL=y
+CONFIG_SCSI_SATA_SIL24=y
+CONFIG_SCSI_SATA_SIS=y
+# CONFIG_SCSI_SATA_ULI is not set
+CONFIG_SCSI_SATA_VIA=y
+CONFIG_SCSI_SATA_VITESSE=y
+CONFIG_SCSI_SATA_INTEL_COMBINED=y
+CONFIG_SCSI_BUSLOGIC=y
+# CONFIG_SCSI_OMIT_FLASHPOINT is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+CONFIG_SCSI_GDTH=y
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
+CONFIG_SCSI_IPS=y
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_PPA is not set
+# CONFIG_SCSI_IMM is not set
+# CONFIG_SCSI_NCR53C406A is not set
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PSI240I is not set
+# CONFIG_SCSI_QLOGIC_FAS is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+CONFIG_SCSI_QLA_FC=y
+# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set
+CONFIG_SCSI_LPFC=y
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+# CONFIG_SCSI_ULTRASTOR is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=y
+CONFIG_MD_RAID5=y
+# CONFIG_MD_RAID6 is not set
+CONFIG_MD_MULTIPATH=y
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_SNAPSHOT=y
+CONFIG_DM_MIRROR=y
+CONFIG_DM_ZERO=y
+CONFIG_DM_MULTIPATH=y
+CONFIG_DM_MULTIPATH_EMC=y
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
+CONFIG_FUSION_MAX_SGE=128
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+CONFIG_I2O=m
+CONFIG_I2O_LCT_NOTIFY_ON_CHANGES=y
+CONFIG_I2O_EXT_ADAPTEC=y
+CONFIG_I2O_EXT_ADAPTEC_DMA64=y
+CONFIG_I2O_CONFIG=m
+CONFIG_I2O_CONFIG_OLD_IOCTL=y
+# CONFIG_I2O_BUS is not set
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_SCSI=m
+CONFIG_I2O_PROC=m
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=m
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# PHY device support
+#
+# CONFIG_PHYLIB is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_CASSINI is not set
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_EL1 is not set
+# CONFIG_EL2 is not set
+# CONFIG_ELPLUS is not set
+# CONFIG_EL16 is not set
+# CONFIG_EL3 is not set
+# CONFIG_3C515 is not set
+CONFIG_VORTEX=m
+# CONFIG_TYPHOON is not set
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_ULI526X is not set
+# CONFIG_AT1700 is not set
+# CONFIG_DEPCA is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=m
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_AC3200 is not set
+# CONFIG_APRICOT is not set
+# CONFIG_B44 is not set
+CONFIG_FORCEDETH=m
+# CONFIG_CS89x0 is not set
+# CONFIG_DGRS is not set
+CONFIG_EEPRO100=m
+CONFIG_E100=m
+# CONFIG_FEALNX is not set
+CONFIG_NATSEMI=m
+CONFIG_NE2K_PCI=m
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+CONFIG_8139TOO_PIO=y
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_SIS900=m
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+CONFIG_VIA_RHINE=m
+# CONFIG_VIA_RHINE_MMIO is not set
+# CONFIG_NET_POCKET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=m
+# CONFIG_E1000_NAPI is not set
+# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+CONFIG_R8169=m
+# CONFIG_R8169_NAPI is not set
+# CONFIG_R8169_VLAN is not set
+CONFIG_SIS190=m
+CONFIG_SKGE=m
+CONFIG_SKY2=m
+CONFIG_SK98LIN=m
+# CONFIG_VIA_VELOCITY is not set
+CONFIG_TIGON3=m
+CONFIG_BNX2=m
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_CHELSIO_T1 is not set
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PLIP is not set
+CONFIG_PPP=m
+# CONFIG_PPP_MULTILINK is not set
+# CONFIG_PPP_FILTER is not set
+CONFIG_PPP_ASYNC=m
+# CONFIG_PPP_SYNC_TTY is not set
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_BSDCOMP=m
+# CONFIG_PPP_MPPE is not set
+# CONFIG_PPPOE is not set
+# CONFIG_SLIP is not set
+CONFIG_NET_FC=y
+# CONFIG_SHAPER is not set
+CONFIG_NETCONSOLE=m
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_INPORT is not set
+# CONFIG_MOUSE_LOGIBM is not set
+# CONFIG_MOUSE_PC110PAD is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PARKBD is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_ACPI is not set
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+CONFIG_PRINTER=m
+# CONFIG_LP_CONSOLE is not set
+# CONFIG_PPDEV is not set
+# CONFIG_TIPAR is not set
+
+#
+# IPMI
+#
+CONFIG_IPMI_HANDLER=m
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_SI=m
+CONFIG_IPMI_WATCHDOG=m
+# CONFIG_IPMI_POWEROFF is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_NVRAM is not set
+CONFIG_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_SONYPI is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_MWAVE is not set
+# CONFIG_CS5535_GPIO is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_HPET is not set
+# CONFIG_HANGCHECK_TIMER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+
+#
+# I2C support
+#
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=m
+CONFIG_I2C_ALGOPCF=m
+CONFIG_I2C_ALGOPCA=m
+
+#
+# I2C Hardware Bus support
+#
+CONFIG_I2C_ALI1535=m
+CONFIG_I2C_ALI1563=m
+CONFIG_I2C_ALI15X3=m
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD756_S4882=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_I801=m
+CONFIG_I2C_I810=m
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_ISA=m
+CONFIG_I2C_NFORCE2=m
+CONFIG_I2C_PARPORT=m
+CONFIG_I2C_PARPORT_LIGHT=m
+CONFIG_I2C_PROSAVAGE=m
+CONFIG_I2C_SAVAGE4=m
+CONFIG_SCx200_ACB=m
+CONFIG_I2C_SIS5595=m
+CONFIG_I2C_SIS630=m
+CONFIG_I2C_SIS96X=m
+CONFIG_I2C_STUB=m
+CONFIG_I2C_VIA=m
+CONFIG_I2C_VIAPRO=m
+CONFIG_I2C_VOODOO3=m
+CONFIG_I2C_PCA_ISA=m
+
+#
+# Miscellaneous I2C Chip support
+#
+CONFIG_SENSORS_DS1337=m
+CONFIG_SENSORS_DS1374=m
+CONFIG_SENSORS_EEPROM=m
+CONFIG_SENSORS_PCF8574=m
+CONFIG_SENSORS_PCA9539=m
+CONFIG_SENSORS_PCF8591=m
+CONFIG_SENSORS_RTC8564=m
+CONFIG_SENSORS_MAX6875=m
+# CONFIG_RTC_X1205_I2C is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# SPI support
+#
+# CONFIG_SPI is not set
+# CONFIG_SPI_MASTER is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Hardware Monitoring support
+#
+CONFIG_HWMON=y
+CONFIG_HWMON_VID=m
+CONFIG_SENSORS_ADM1021=m
+CONFIG_SENSORS_ADM1025=m
+CONFIG_SENSORS_ADM1026=m
+CONFIG_SENSORS_ADM1031=m
+CONFIG_SENSORS_ADM9240=m
+CONFIG_SENSORS_ASB100=m
+CONFIG_SENSORS_ATXP1=m
+CONFIG_SENSORS_DS1621=m
+# CONFIG_SENSORS_F71805F is not set
+CONFIG_SENSORS_FSCHER=m
+CONFIG_SENSORS_FSCPOS=m
+CONFIG_SENSORS_GL518SM=m
+CONFIG_SENSORS_GL520SM=m
+CONFIG_SENSORS_IT87=m
+CONFIG_SENSORS_LM63=m
+CONFIG_SENSORS_LM75=m
+CONFIG_SENSORS_LM77=m
+CONFIG_SENSORS_LM78=m
+CONFIG_SENSORS_LM80=m
+CONFIG_SENSORS_LM83=m
+CONFIG_SENSORS_LM85=m
+CONFIG_SENSORS_LM87=m
+CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_LM92=m
+CONFIG_SENSORS_MAX1619=m
+CONFIG_SENSORS_PC87360=m
+CONFIG_SENSORS_SIS5595=m
+CONFIG_SENSORS_SMSC47M1=m
+CONFIG_SENSORS_SMSC47B397=m
+CONFIG_SENSORS_VIA686A=m
+# CONFIG_SENSORS_VT8231 is not set
+CONFIG_SENSORS_W83781D=m
+CONFIG_SENSORS_W83792D=m
+CONFIG_SENSORS_W83L785TS=m
+CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_W83627EHF=m
+# CONFIG_SENSORS_HDAPS is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Misc devices
+#
+# CONFIG_IBM_ASM is not set
+
+#
+# Multimedia Capabilities Port drivers
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+CONFIG_VIDEO_SELECT=y
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+# CONFIG_MDA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_ISP116X_HCD is not set
+CONFIG_USB_OHCI_HCD=m
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_UHCI_HCD=m
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
+#
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_DPCM=y
+# CONFIG_USB_STORAGE_USBAT is not set
+CONFIG_USB_STORAGE_SDDR09=y
+# CONFIG_USB_STORAGE_SDDR55 is not set
+CONFIG_USB_STORAGE_JUMPSHOT=y
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_LIBUSUAL is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+# CONFIG_USB_HIDINPUT_POWERBOOK is not set
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+
+#
+# USB HID Boot Protocol drivers
+#
+CONFIG_USB_KBD=m
+CONFIG_USB_MOUSE=m
+# CONFIG_USB_AIPTEK is not set
+CONFIG_USB_WACOM=m
+# CONFIG_USB_ACECAD is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_ITMTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_YEALINK is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+# CONFIG_USB_ATI_REMOTE2 is not set
+# CONFIG_USB_KEYSPAN_REMOTE is not set
+# CONFIG_USB_APPLETOUCH is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+# CONFIG_USB_MON is not set
+
+#
+# USB port drivers
+#
+# CONFIG_USB_USS720 is not set
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
+# CONFIG_USB_TEST is not set
+
+#
+# USB DSL modem support
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+# CONFIG_INFINIBAND is not set
+
+#
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
+#
+# CONFIG_EDAC is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+CONFIG_JBD_DEBUG=y
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=y
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_PROC_INFO=y
+# CONFIG_REISERFS_FS_XATTR is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_COMPAT=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+CONFIG_SIM_FS=m
+CONFIG_VZ_QUOTA=m
+# CONFIG_VZ_QUOTA_UNLOAD is not set
+CONFIG_VZ_QUOTA_UGID=y
+CONFIG_QUOTACTL=y
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=m
+# CONFIG_JOLIET is not set
+# CONFIG_ZISOFS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+# CONFIG_RELAYFS_FS is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_CRAMFS=y
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=m
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS is not set
+# CONFIG_CIFS_XATTR is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Instrumentation Support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+# CONFIG_KPROBES is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=17
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_FRAME_POINTER is not set
+CONFIG_FORCED_INLINING=y
+# CONFIG_RCU_TORTURE_TEST is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+
+#
+# Page alloc debug is incompatible with Software Suspend on i386
+#
+# CONFIG_DEBUG_RODATA is not set
+CONFIG_4KSTACKS=y
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
+
+#
+# OpenVZ
+#
+CONFIG_VE=y
+CONFIG_VE_CALLS=m
+CONFIG_VE_NETDEV=m
+CONFIG_VE_ETHDEV=m
+CONFIG_VE_IPTABLES=y
+CONFIG_VZ_WDOG=m
+CONFIG_VZ_CHECKPOINT=m
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=m
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_AES_586 is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+CONFIG_CRYPTO_CRC32C=m
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
+# CONFIG_CRYPTO_DEV_PADLOCK is not set
+
+#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+
+#
+# User resources
+#
+CONFIG_USER_RESOURCE=y
+CONFIG_USER_RSS_ACCOUNTING=y
+CONFIG_USER_SWAP_ACCOUNTING=y
+CONFIG_USER_RESOURCE_PROC=y
+CONFIG_UBC_DEBUG=y
+CONFIG_UBC_DEBUG_KMEM=y
+# CONFIG_UBC_KEEP_UNUSED is not set
+# CONFIG_UBC_DEBUG_ITEMS is not set
+# CONFIG_UBC_UNLIMITED is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_PENDING_IRQ=y
+CONFIG_X86_SMP=y
+CONFIG_X86_HT=y
+CONFIG_X86_BIOS_REBOOT=y
+CONFIG_X86_TRAMPOLINE=y
+CONFIG_KTIME_SCALAR=y
diff -uprN linux-2.6.16/configs/kernel-2.6.16-026test015-i686-smp.config.ovz linux-2.6.16.ovz/configs/kernel-2.6.16-026test015-i686-smp.config.ovz
--- linux-2.6.16/configs/kernel-2.6.16-026test015-i686-smp.config.ovz	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/configs/kernel-2.6.16-026test015-i686-smp.config.ovz	2006-07-05 08:35:04.000000000 -0400
@@ -0,0 +1,1709 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.16-026test015
+# Wed Jul  5 08:35:04 2006
+#
+CONFIG_X86_32=y
+CONFIG_SEMAPHORE_SLEEPERS=y
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_DMI=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_UID16=y
+CONFIG_VM86=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+CONFIG_SLAB=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+# CONFIG_SLOB is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+
+#
+# Processor type and features
+#
+CONFIG_X86_PC=y
+# CONFIG_X86_ELAN is not set
+# CONFIG_X86_VOYAGER is not set
+# CONFIG_X86_NUMAQ is not set
+# CONFIG_X86_SUMMIT is not set
+# CONFIG_X86_BIGSMP is not set
+# CONFIG_X86_VISWS is not set
+# CONFIG_X86_GENERICARCH is not set
+# CONFIG_X86_ES7000 is not set
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M586MMX is not set
+CONFIG_M686=y
+# CONFIG_MPENTIUMII is not set
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MEFFICEON is not set
+# CONFIG_MWINCHIPC6 is not set
+# CONFIG_MWINCHIP2 is not set
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
+# CONFIG_MGEODE_LX is not set
+# CONFIG_MCYRIXIII is not set
+# CONFIG_MVIAC3_2 is not set
+# CONFIG_X86_GENERIC is not set
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_L1_CACHE_SHIFT=5
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_X86_PPRO_FENCE=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
+CONFIG_HPET_TIMER=y
+CONFIG_HPET_EMULATE_RTC=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_SCHED_VCPU=y
+CONFIG_FAIRSCHED=y
+CONFIG_SCHED_SMT=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_PREEMPT_BKL is not set
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
+CONFIG_NMI_WATCHDOG=y
+CONFIG_X86_MCE=y
+# CONFIG_X86_MCE_NONFATAL is not set
+CONFIG_X86_MCE_P4THERMAL=y
+# CONFIG_TOSHIBA is not set
+# CONFIG_I8K is not set
+# CONFIG_X86_REBOOTFIXUPS is not set
+CONFIG_MICROCODE=m
+CONFIG_X86_MSR=y
+# CONFIG_X86_CPUID is not set
+
+#
+# Firmware Drivers
+#
+# CONFIG_EDD is not set
+CONFIG_DELL_RBU=m
+CONFIG_DCDBAS=m
+# CONFIG_NOHIGHMEM is not set
+CONFIG_HIGHMEM4G=y
+# CONFIG_HIGHMEM64G is not set
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_3G_OPT is not set
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_HIGHMEM=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_STATIC=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_HIGHPTE=y
+# CONFIG_MATH_EMULATION is not set
+CONFIG_MTRR=y
+# CONFIG_EFI is not set
+CONFIG_IRQBALANCE=y
+# CONFIG_REGPARM is not set
+# CONFIG_SECCOMP is not set
+# CONFIG_HZ_100 is not set
+# CONFIG_HZ_250 is not set
+CONFIG_HZ_1000=y
+CONFIG_HZ=1000
+# CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
+CONFIG_PHYSICAL_START=0x100000
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_DOUBLEFAULT=y
+
+#
+# Power management options (ACPI, APM)
+#
+CONFIG_PM=y
+CONFIG_PM_LEGACY=y
+# CONFIG_PM_DEBUG is not set
+CONFIG_SOFTWARE_SUSPEND=y
+CONFIG_PM_STD_PARTITION=""
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI=y
+CONFIG_ACPI_AC=m
+CONFIG_ACPI_BATTERY=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_VIDEO=y
+CONFIG_ACPI_HOTKEY=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+CONFIG_ACPI_ASUS=m
+CONFIG_ACPI_IBM=m
+CONFIG_ACPI_TOSHIBA=m
+CONFIG_ACPI_BLACKLIST_YEAR=0
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_EC=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_X86_PM_TIMER=y
+# CONFIG_ACPI_CONTAINER is not set
+
+#
+# APM (Advanced Power Management) BIOS Support
+#
+# CONFIG_APM is not set
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+CONFIG_PCI=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GOMMCONFIG is not set
+# CONFIG_PCI_GODIRECT is not set
+CONFIG_PCI_GOANY=y
+CONFIG_PCI_BIOS=y
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_MMCONFIG=y
+# CONFIG_PCIEPORTBUS is not set
+# CONFIG_PCI_MSI is not set
+# CONFIG_PCI_LEGACY_PROC is not set
+# CONFIG_PCI_DEBUG is not set
+CONFIG_ISA_DMA_API=y
+CONFIG_ISA=y
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+# CONFIG_SCx200 is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=y
+# CONFIG_HOTPLUG_PCI_FAKE is not set
+CONFIG_HOTPLUG_PCI_COMPAQ=m
+# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set
+CONFIG_HOTPLUG_PCI_IBM=m
+CONFIG_HOTPLUG_PCI_ACPI=m
+# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
+# CONFIG_HOTPLUG_PCI_CPCI is not set
+CONFIG_HOTPLUG_PCI_SHPC=m
+# CONFIG_HOTPLUG_PCI_SHPC_POLL_EVENT_MODE is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_AOUT=m
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+# CONFIG_NETDEBUG is not set
+CONFIG_PACKET=m
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_ASK_IP_FIB_HASH=y
+# CONFIG_IP_FIB_TRIE is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+# CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+
+#
+# IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_IPV6_TUNNEL is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+# CONFIG_BRIDGE_NETFILTER is not set
+
+#
+# Core Netfilter Configuration
+#
+# CONFIG_NETFILTER_NETLINK is not set
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set
+# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+# CONFIG_NETFILTER_XT_MATCH_DCCP is not set
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set
+# CONFIG_NETFILTER_XT_MATCH_REALM is not set
+# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+# CONFIG_NETFILTER_XT_MATCH_STRING is not set
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+# CONFIG_IP_NF_CT_ACCT is not set
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+# CONFIG_IP_NF_CONNTRACK_EVENTS is not set
+# CONFIG_IP_NF_CT_PROTO_SCTP is not set
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_IRC=m
+# CONFIG_IP_NF_NETBIOS_NS is not set
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_AMANDA=m
+# CONFIG_IP_NF_PPTP is not set
+# CONFIG_IP_NF_QUEUE is not set
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_IPRANGE=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+# CONFIG_IP_NF_MATCH_ECN is not set
+# CONFIG_IP_NF_MATCH_DSCP is not set
+# CONFIG_IP_NF_MATCH_AH_ESP is not set
+CONFIG_IP_NF_MATCH_TTL=m
+# CONFIG_IP_NF_MATCH_OWNER is not set
+# CONFIG_IP_NF_MATCH_ADDRTYPE is not set
+# CONFIG_IP_NF_MATCH_HASHLIMIT is not set
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_LOG=m
+# CONFIG_IP_NF_TARGET_ULOG is not set
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_SAME=m
+# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+# CONFIG_IP_NF_TARGET_ECN is not set
+# CONFIG_IP_NF_TARGET_DSCP is not set
+CONFIG_IP_NF_TARGET_TTL=m
+# CONFIG_IP_NF_RAW is not set
+# CONFIG_IP_NF_ARPTABLES is not set
+
+#
+# IPv6: Netfilter Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_HL=m
+# CONFIG_IP6_NF_RAW is not set
+
+#
+# Bridge: Netfilter Configuration
+#
+# CONFIG_BRIDGE_NF_EBTABLES is not set
+
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CLK_JIFFIES=y
+# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
+# CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+# CONFIG_NET_SCH_HFSC is not set
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+# CONFIG_NET_SCH_NETEM is not set
+CONFIG_NET_SCH_INGRESS=m
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+# CONFIG_NET_CLS_BASIC is not set
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+# CONFIG_CLS_U32_PERF is not set
+# CONFIG_CLS_U32_MARK is not set
+# CONFIG_NET_CLS_RSVP is not set
+# CONFIG_NET_CLS_RSVP6 is not set
+# CONFIG_NET_EMATCH is not set
+# CONFIG_NET_CLS_ACT is not set
+CONFIG_NET_CLS_POLICE=y
+# CONFIG_NET_CLS_IND is not set
+CONFIG_NET_ESTIMATOR=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+# CONFIG_PARPORT_SERIAL is not set
+CONFIG_PARPORT_PC_FIFO=y
+# CONFIG_PARPORT_PC_SUPERIO is not set
+# CONFIG_PARPORT_GSC is not set
+# CONFIG_PARPORT_1284 is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=m
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+CONFIG_BLK_CPQ_DA=y
+CONFIG_BLK_CPQ_CISS_DA=y
+# CONFIG_CISS_SCSI_TAPE is not set
+CONFIG_BLK_DEV_DAC960=y
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=m
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_SX8=y
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CDROM_PKTCDVD is not set
+CONFIG_ATA_OVER_ETH=m
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+# CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_BLK_DEV_IDECD=m
+# CONFIG_BLK_DEV_IDETAPE is not set
+CONFIG_BLK_DEV_IDEFLOPPY=m
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_CMD640=y
+# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_RZ1000=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+CONFIG_BLK_DEV_AMD74XX=y
+# CONFIG_BLK_DEV_ATIIXP is not set
+CONFIG_BLK_DEV_CMD64X=y
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_CS5535 is not set
+CONFIG_BLK_DEV_HPT34X=y
+# CONFIG_HPT34X_AUTODMA is not set
+CONFIG_BLK_DEV_HPT366=y
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_IT821X is not set
+# CONFIG_BLK_DEV_NS87415 is not set
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+CONFIG_PDC202XX_BURST=y
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+CONFIG_BLK_DEV_VIA82CXXX=y
+# CONFIG_IDE_ARM is not set
+# CONFIG_IDE_CHIPSETS is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=m
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_ISCSI_ATTRS=y
+CONFIG_SCSI_SAS_ATTRS=y
+
+#
+# SCSI low-level drivers
+#
+CONFIG_ISCSI_TCP=m
+CONFIG_BLK_DEV_3W_XXXX_RAID=y
+CONFIG_SCSI_3W_9XXX=y
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+CONFIG_SCSI_AACRAID=y
+CONFIG_SCSI_AIC7XXX=y
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+CONFIG_SCSI_AIC79XX=y
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+CONFIG_AIC79XX_ENABLE_RD_STRM=y
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_IN2000 is not set
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=y
+CONFIG_MEGARAID_MAILBOX=y
+# CONFIG_MEGARAID_LEGACY is not set
+CONFIG_MEGARAID_SAS=y
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_AHCI=y
+CONFIG_SCSI_SATA_SVW=y
+CONFIG_SCSI_ATA_PIIX=y
+CONFIG_SCSI_SATA_MV=y
+CONFIG_SCSI_SATA_NV=y
+# CONFIG_SCSI_PDC_ADMA is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=y
+CONFIG_SCSI_SATA_SX4=y
+CONFIG_SCSI_SATA_SIL=y
+CONFIG_SCSI_SATA_SIL24=y
+CONFIG_SCSI_SATA_SIS=y
+# CONFIG_SCSI_SATA_ULI is not set
+CONFIG_SCSI_SATA_VIA=y
+CONFIG_SCSI_SATA_VITESSE=y
+CONFIG_SCSI_SATA_INTEL_COMBINED=y
+CONFIG_SCSI_BUSLOGIC=y
+# CONFIG_SCSI_OMIT_FLASHPOINT is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+CONFIG_SCSI_GDTH=y
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
+CONFIG_SCSI_IPS=y
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_PPA is not set
+# CONFIG_SCSI_IMM is not set
+# CONFIG_SCSI_NCR53C406A is not set
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PSI240I is not set
+# CONFIG_SCSI_QLOGIC_FAS is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+CONFIG_SCSI_QLA_FC=y
+# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set
+CONFIG_SCSI_LPFC=y
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+# CONFIG_SCSI_ULTRASTOR is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=y
+CONFIG_MD_RAID5=y
+# CONFIG_MD_RAID6 is not set
+CONFIG_MD_MULTIPATH=y
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_SNAPSHOT=y
+CONFIG_DM_MIRROR=y
+CONFIG_DM_ZERO=y
+CONFIG_DM_MULTIPATH=y
+CONFIG_DM_MULTIPATH_EMC=y
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
+CONFIG_FUSION_MAX_SGE=128
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+CONFIG_I2O=m
+CONFIG_I2O_LCT_NOTIFY_ON_CHANGES=y
+CONFIG_I2O_EXT_ADAPTEC=y
+CONFIG_I2O_CONFIG=m
+CONFIG_I2O_CONFIG_OLD_IOCTL=y
+# CONFIG_I2O_BUS is not set
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_SCSI=m
+CONFIG_I2O_PROC=m
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=m
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# PHY device support
+#
+# CONFIG_PHYLIB is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_CASSINI is not set
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_EL1 is not set
+# CONFIG_EL2 is not set
+# CONFIG_ELPLUS is not set
+# CONFIG_EL16 is not set
+# CONFIG_EL3 is not set
+# CONFIG_3C515 is not set
+CONFIG_VORTEX=m
+# CONFIG_TYPHOON is not set
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_ULI526X is not set
+# CONFIG_AT1700 is not set
+# CONFIG_DEPCA is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=m
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_AC3200 is not set
+# CONFIG_APRICOT is not set
+# CONFIG_B44 is not set
+CONFIG_FORCEDETH=m
+# CONFIG_CS89x0 is not set
+# CONFIG_DGRS is not set
+CONFIG_EEPRO100=m
+CONFIG_E100=m
+# CONFIG_FEALNX is not set
+CONFIG_NATSEMI=m
+CONFIG_NE2K_PCI=m
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+CONFIG_8139TOO_PIO=y
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_SIS900=m
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+CONFIG_VIA_RHINE=m
+# CONFIG_VIA_RHINE_MMIO is not set
+# CONFIG_NET_POCKET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=m
+# CONFIG_E1000_NAPI is not set
+# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+CONFIG_R8169=m
+# CONFIG_R8169_NAPI is not set
+# CONFIG_R8169_VLAN is not set
+CONFIG_SIS190=m
+CONFIG_SKGE=m
+CONFIG_SKY2=m
+CONFIG_SK98LIN=m
+# CONFIG_VIA_VELOCITY is not set
+CONFIG_TIGON3=m
+CONFIG_BNX2=m
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_CHELSIO_T1 is not set
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PLIP is not set
+CONFIG_PPP=m
+# CONFIG_PPP_MULTILINK is not set
+# CONFIG_PPP_FILTER is not set
+CONFIG_PPP_ASYNC=m
+# CONFIG_PPP_SYNC_TTY is not set
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_BSDCOMP=m
+# CONFIG_PPP_MPPE is not set
+# CONFIG_PPPOE is not set
+# CONFIG_SLIP is not set
+CONFIG_NET_FC=y
+# CONFIG_SHAPER is not set
+CONFIG_NETCONSOLE=m
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_INPORT is not set
+# CONFIG_MOUSE_LOGIBM is not set
+# CONFIG_MOUSE_PC110PAD is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PARKBD is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_ACPI is not set
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+CONFIG_PRINTER=m
+# CONFIG_LP_CONSOLE is not set
+# CONFIG_PPDEV is not set
+# CONFIG_TIPAR is not set
+
+#
+# IPMI
+#
+CONFIG_IPMI_HANDLER=m
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_SI=m
+CONFIG_IPMI_WATCHDOG=m
+# CONFIG_IPMI_POWEROFF is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_NVRAM is not set
+CONFIG_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_SONYPI is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_MWAVE is not set
+# CONFIG_CS5535_GPIO is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_HPET is not set
+# CONFIG_HANGCHECK_TIMER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+
+#
+# I2C support
+#
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=m
+CONFIG_I2C_ALGOPCF=m
+CONFIG_I2C_ALGOPCA=m
+
+#
+# I2C Hardware Bus support
+#
+CONFIG_I2C_ALI1535=m
+CONFIG_I2C_ALI1563=m
+CONFIG_I2C_ALI15X3=m
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD756_S4882=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_I801=m
+CONFIG_I2C_I810=m
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_ISA=m
+CONFIG_I2C_NFORCE2=m
+CONFIG_I2C_PARPORT=m
+CONFIG_I2C_PARPORT_LIGHT=m
+CONFIG_I2C_PROSAVAGE=m
+CONFIG_I2C_SAVAGE4=m
+CONFIG_SCx200_ACB=m
+CONFIG_I2C_SIS5595=m
+CONFIG_I2C_SIS630=m
+CONFIG_I2C_SIS96X=m
+CONFIG_I2C_STUB=m
+CONFIG_I2C_VIA=m
+CONFIG_I2C_VIAPRO=m
+CONFIG_I2C_VOODOO3=m
+CONFIG_I2C_PCA_ISA=m
+
+#
+# Miscellaneous I2C Chip support
+#
+CONFIG_SENSORS_DS1337=m
+CONFIG_SENSORS_DS1374=m
+CONFIG_SENSORS_EEPROM=m
+CONFIG_SENSORS_PCF8574=m
+CONFIG_SENSORS_PCA9539=m
+CONFIG_SENSORS_PCF8591=m
+CONFIG_SENSORS_RTC8564=m
+CONFIG_SENSORS_MAX6875=m
+# CONFIG_RTC_X1205_I2C is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# SPI support
+#
+# CONFIG_SPI is not set
+# CONFIG_SPI_MASTER is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Hardware Monitoring support
+#
+CONFIG_HWMON=y
+CONFIG_HWMON_VID=m
+CONFIG_SENSORS_ADM1021=m
+CONFIG_SENSORS_ADM1025=m
+CONFIG_SENSORS_ADM1026=m
+CONFIG_SENSORS_ADM1031=m
+CONFIG_SENSORS_ADM9240=m
+CONFIG_SENSORS_ASB100=m
+CONFIG_SENSORS_ATXP1=m
+CONFIG_SENSORS_DS1621=m
+# CONFIG_SENSORS_F71805F is not set
+CONFIG_SENSORS_FSCHER=m
+CONFIG_SENSORS_FSCPOS=m
+CONFIG_SENSORS_GL518SM=m
+CONFIG_SENSORS_GL520SM=m
+CONFIG_SENSORS_IT87=m
+CONFIG_SENSORS_LM63=m
+CONFIG_SENSORS_LM75=m
+CONFIG_SENSORS_LM77=m
+CONFIG_SENSORS_LM78=m
+CONFIG_SENSORS_LM80=m
+CONFIG_SENSORS_LM83=m
+CONFIG_SENSORS_LM85=m
+CONFIG_SENSORS_LM87=m
+CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_LM92=m
+CONFIG_SENSORS_MAX1619=m
+CONFIG_SENSORS_PC87360=m
+CONFIG_SENSORS_SIS5595=m
+CONFIG_SENSORS_SMSC47M1=m
+CONFIG_SENSORS_SMSC47B397=m
+CONFIG_SENSORS_VIA686A=m
+# CONFIG_SENSORS_VT8231 is not set
+CONFIG_SENSORS_W83781D=m
+CONFIG_SENSORS_W83792D=m
+CONFIG_SENSORS_W83L785TS=m
+CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_W83627EHF=m
+# CONFIG_SENSORS_HDAPS is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Misc devices
+#
+# CONFIG_IBM_ASM is not set
+
+#
+# Multimedia Capabilities Port drivers
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+CONFIG_VIDEO_SELECT=y
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+# CONFIG_MDA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_ISP116X_HCD is not set
+CONFIG_USB_OHCI_HCD=m
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_UHCI_HCD=m
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
+#
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_DPCM=y
+# CONFIG_USB_STORAGE_USBAT is not set
+CONFIG_USB_STORAGE_SDDR09=y
+# CONFIG_USB_STORAGE_SDDR55 is not set
+CONFIG_USB_STORAGE_JUMPSHOT=y
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_LIBUSUAL is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+# CONFIG_USB_HIDINPUT_POWERBOOK is not set
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+
+#
+# USB HID Boot Protocol drivers
+#
+CONFIG_USB_KBD=m
+CONFIG_USB_MOUSE=m
+# CONFIG_USB_AIPTEK is not set
+CONFIG_USB_WACOM=m
+# CONFIG_USB_ACECAD is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_ITMTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_YEALINK is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+# CONFIG_USB_ATI_REMOTE2 is not set
+# CONFIG_USB_KEYSPAN_REMOTE is not set
+# CONFIG_USB_APPLETOUCH is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+# CONFIG_USB_MON is not set
+
+#
+# USB port drivers
+#
+# CONFIG_USB_USS720 is not set
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
+# CONFIG_USB_TEST is not set
+
+#
+# USB DSL modem support
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+# CONFIG_INFINIBAND is not set
+
+#
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
+#
+# CONFIG_EDAC is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+CONFIG_JBD_DEBUG=y
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=y
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_PROC_INFO=y
+# CONFIG_REISERFS_FS_XATTR is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_COMPAT=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+CONFIG_SIM_FS=m
+CONFIG_VZ_QUOTA=m
+# CONFIG_VZ_QUOTA_UNLOAD is not set
+CONFIG_VZ_QUOTA_UGID=y
+CONFIG_QUOTACTL=y
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=m
+# CONFIG_JOLIET is not set
+# CONFIG_ZISOFS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+# CONFIG_RELAYFS_FS is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_CRAMFS=y
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=m
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS is not set
+# CONFIG_CIFS_XATTR is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Instrumentation Support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+# CONFIG_KPROBES is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=17
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_FRAME_POINTER is not set
+CONFIG_FORCED_INLINING=y
+# CONFIG_RCU_TORTURE_TEST is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+
+#
+# Page alloc debug is incompatible with Software Suspend on i386
+#
+# CONFIG_DEBUG_RODATA is not set
+CONFIG_4KSTACKS=y
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
+
+#
+# OpenVZ
+#
+CONFIG_VE=y
+CONFIG_VE_CALLS=m
+CONFIG_VE_NETDEV=m
+CONFIG_VE_ETHDEV=m
+CONFIG_VE_IPTABLES=y
+CONFIG_VZ_WDOG=m
+CONFIG_VZ_CHECKPOINT=m
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=m
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_AES_586 is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+CONFIG_CRYPTO_CRC32C=m
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
+# CONFIG_CRYPTO_DEV_PADLOCK is not set
+
+#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+
+#
+# User resources
+#
+CONFIG_USER_RESOURCE=y
+CONFIG_USER_RSS_ACCOUNTING=y
+CONFIG_USER_SWAP_ACCOUNTING=y
+CONFIG_USER_RESOURCE_PROC=y
+CONFIG_UBC_DEBUG=y
+CONFIG_UBC_DEBUG_KMEM=y
+# CONFIG_UBC_KEEP_UNUSED is not set
+# CONFIG_UBC_DEBUG_ITEMS is not set
+# CONFIG_UBC_UNLIMITED is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_PENDING_IRQ=y
+CONFIG_X86_SMP=y
+CONFIG_X86_HT=y
+CONFIG_X86_BIOS_REBOOT=y
+CONFIG_X86_TRAMPOLINE=y
+CONFIG_KTIME_SCALAR=y
diff -uprN linux-2.6.16/configs/kernel-2.6.16-026test015-i686.config.ovz linux-2.6.16.ovz/configs/kernel-2.6.16-026test015-i686.config.ovz
--- linux-2.6.16/configs/kernel-2.6.16-026test015-i686.config.ovz	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/configs/kernel-2.6.16-026test015-i686.config.ovz	2006-07-05 08:35:04.000000000 -0400
@@ -0,0 +1,1705 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.16-026test015
+# Wed Jul  5 08:35:04 2006
+#
+CONFIG_X86_32=y
+CONFIG_SEMAPHORE_SLEEPERS=y
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_DMI=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_UID16=y
+CONFIG_VM86=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+CONFIG_SLAB=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+# CONFIG_SLOB is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+
+#
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+
+#
+# Processor type and features
+#
+CONFIG_X86_PC=y
+# CONFIG_X86_ELAN is not set
+# CONFIG_X86_VOYAGER is not set
+# CONFIG_X86_NUMAQ is not set
+# CONFIG_X86_SUMMIT is not set
+# CONFIG_X86_BIGSMP is not set
+# CONFIG_X86_VISWS is not set
+# CONFIG_X86_GENERICARCH is not set
+# CONFIG_X86_ES7000 is not set
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M586MMX is not set
+CONFIG_M686=y
+# CONFIG_MPENTIUMII is not set
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MEFFICEON is not set
+# CONFIG_MWINCHIPC6 is not set
+# CONFIG_MWINCHIP2 is not set
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
+# CONFIG_MGEODE_LX is not set
+# CONFIG_MCYRIXIII is not set
+# CONFIG_MVIAC3_2 is not set
+# CONFIG_X86_GENERIC is not set
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_L1_CACHE_SHIFT=5
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_X86_PPRO_FENCE=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
+CONFIG_HPET_TIMER=y
+CONFIG_HPET_EMULATE_RTC=y
+# CONFIG_SMP is not set
+CONFIG_SCHED_VCPU=y
+CONFIG_FAIRSCHED=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_X86_UP_APIC=y
+CONFIG_X86_UP_IOAPIC=y
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
+CONFIG_NMI_WATCHDOG=y
+CONFIG_X86_MCE=y
+# CONFIG_X86_MCE_NONFATAL is not set
+CONFIG_X86_MCE_P4THERMAL=y
+# CONFIG_TOSHIBA is not set
+# CONFIG_I8K is not set
+# CONFIG_X86_REBOOTFIXUPS is not set
+CONFIG_MICROCODE=m
+CONFIG_X86_MSR=y
+# CONFIG_X86_CPUID is not set
+
+#
+# Firmware Drivers
+#
+# CONFIG_EDD is not set
+CONFIG_DELL_RBU=m
+CONFIG_DCDBAS=m
+# CONFIG_NOHIGHMEM is not set
+CONFIG_HIGHMEM4G=y
+# CONFIG_HIGHMEM64G is not set
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_3G_OPT is not set
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_HIGHMEM=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_STATIC=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_HIGHPTE=y
+# CONFIG_MATH_EMULATION is not set
+CONFIG_MTRR=y
+# CONFIG_EFI is not set
+# CONFIG_REGPARM is not set
+# CONFIG_SECCOMP is not set
+# CONFIG_HZ_100 is not set
+# CONFIG_HZ_250 is not set
+CONFIG_HZ_1000=y
+CONFIG_HZ=1000
+# CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
+CONFIG_PHYSICAL_START=0x100000
+CONFIG_DOUBLEFAULT=y
+
+#
+# Power management options (ACPI, APM)
+#
+CONFIG_PM=y
+CONFIG_PM_LEGACY=y
+# CONFIG_PM_DEBUG is not set
+CONFIG_SOFTWARE_SUSPEND=y
+CONFIG_PM_STD_PARTITION=""
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI=y
+CONFIG_ACPI_SLEEP=y
+CONFIG_ACPI_SLEEP_PROC_FS=y
+# CONFIG_ACPI_SLEEP_PROC_SLEEP is not set
+CONFIG_ACPI_AC=m
+CONFIG_ACPI_BATTERY=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_VIDEO=y
+CONFIG_ACPI_HOTKEY=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+CONFIG_ACPI_ASUS=m
+CONFIG_ACPI_IBM=m
+CONFIG_ACPI_TOSHIBA=m
+CONFIG_ACPI_BLACKLIST_YEAR=0
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_EC=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_X86_PM_TIMER=y
+# CONFIG_ACPI_CONTAINER is not set
+
+#
+# APM (Advanced Power Management) BIOS Support
+#
+# CONFIG_APM is not set
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+CONFIG_PCI=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GOMMCONFIG is not set
+# CONFIG_PCI_GODIRECT is not set
+CONFIG_PCI_GOANY=y
+CONFIG_PCI_BIOS=y
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_MMCONFIG=y
+# CONFIG_PCIEPORTBUS is not set
+# CONFIG_PCI_MSI is not set
+# CONFIG_PCI_LEGACY_PROC is not set
+# CONFIG_PCI_DEBUG is not set
+CONFIG_ISA_DMA_API=y
+CONFIG_ISA=y
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+# CONFIG_SCx200 is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=y
+# CONFIG_HOTPLUG_PCI_FAKE is not set
+CONFIG_HOTPLUG_PCI_COMPAQ=m
+# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set
+CONFIG_HOTPLUG_PCI_IBM=m
+CONFIG_HOTPLUG_PCI_ACPI=m
+# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
+# CONFIG_HOTPLUG_PCI_CPCI is not set
+CONFIG_HOTPLUG_PCI_SHPC=m
+# CONFIG_HOTPLUG_PCI_SHPC_POLL_EVENT_MODE is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_AOUT=m
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+# CONFIG_NETDEBUG is not set
+CONFIG_PACKET=m
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_ASK_IP_FIB_HASH=y
+# CONFIG_IP_FIB_TRIE is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+# CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+
+#
+# IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_IPV6_TUNNEL is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+# CONFIG_BRIDGE_NETFILTER is not set
+
+#
+# Core Netfilter Configuration
+#
+# CONFIG_NETFILTER_NETLINK is not set
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set
+# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+# CONFIG_NETFILTER_XT_MATCH_DCCP is not set
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set
+# CONFIG_NETFILTER_XT_MATCH_REALM is not set
+# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+# CONFIG_NETFILTER_XT_MATCH_STRING is not set
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+# CONFIG_IP_NF_CT_ACCT is not set
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+# CONFIG_IP_NF_CONNTRACK_EVENTS is not set
+# CONFIG_IP_NF_CT_PROTO_SCTP is not set
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_IRC=m
+# CONFIG_IP_NF_NETBIOS_NS is not set
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_AMANDA=m
+# CONFIG_IP_NF_PPTP is not set
+# CONFIG_IP_NF_QUEUE is not set
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_IPRANGE=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+# CONFIG_IP_NF_MATCH_ECN is not set
+# CONFIG_IP_NF_MATCH_DSCP is not set
+# CONFIG_IP_NF_MATCH_AH_ESP is not set
+CONFIG_IP_NF_MATCH_TTL=m
+# CONFIG_IP_NF_MATCH_OWNER is not set
+# CONFIG_IP_NF_MATCH_ADDRTYPE is not set
+# CONFIG_IP_NF_MATCH_HASHLIMIT is not set
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_LOG=m
+# CONFIG_IP_NF_TARGET_ULOG is not set
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_SAME=m
+# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+# CONFIG_IP_NF_TARGET_ECN is not set
+# CONFIG_IP_NF_TARGET_DSCP is not set
+CONFIG_IP_NF_TARGET_TTL=m
+# CONFIG_IP_NF_RAW is not set
+# CONFIG_IP_NF_ARPTABLES is not set
+
+#
+# IPv6: Netfilter Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_HL=m
+# CONFIG_IP6_NF_RAW is not set
+
+#
+# Bridge: Netfilter Configuration
+#
+# CONFIG_BRIDGE_NF_EBTABLES is not set
+
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CLK_JIFFIES=y
+# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
+# CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+# CONFIG_NET_SCH_HFSC is not set
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+# CONFIG_NET_SCH_NETEM is not set
+CONFIG_NET_SCH_INGRESS=m
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+# CONFIG_NET_CLS_BASIC is not set
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+# CONFIG_CLS_U32_PERF is not set
+# CONFIG_CLS_U32_MARK is not set
+# CONFIG_NET_CLS_RSVP is not set
+# CONFIG_NET_CLS_RSVP6 is not set
+# CONFIG_NET_EMATCH is not set
+# CONFIG_NET_CLS_ACT is not set
+CONFIG_NET_CLS_POLICE=y
+# CONFIG_NET_CLS_IND is not set
+CONFIG_NET_ESTIMATOR=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+# CONFIG_PARPORT_SERIAL is not set
+CONFIG_PARPORT_PC_FIFO=y
+# CONFIG_PARPORT_PC_SUPERIO is not set
+# CONFIG_PARPORT_GSC is not set
+# CONFIG_PARPORT_1284 is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=m
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+CONFIG_BLK_CPQ_DA=y
+CONFIG_BLK_CPQ_CISS_DA=y
+# CONFIG_CISS_SCSI_TAPE is not set
+CONFIG_BLK_DEV_DAC960=y
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=m
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_SX8=y
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CDROM_PKTCDVD is not set
+CONFIG_ATA_OVER_ETH=m
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+# CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_BLK_DEV_IDECD=m
+# CONFIG_BLK_DEV_IDETAPE is not set
+CONFIG_BLK_DEV_IDEFLOPPY=m
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_CMD640=y
+# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_RZ1000=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+CONFIG_BLK_DEV_AMD74XX=y
+# CONFIG_BLK_DEV_ATIIXP is not set
+CONFIG_BLK_DEV_CMD64X=y
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_CS5535 is not set
+CONFIG_BLK_DEV_HPT34X=y
+# CONFIG_HPT34X_AUTODMA is not set
+CONFIG_BLK_DEV_HPT366=y
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_IT821X is not set
+# CONFIG_BLK_DEV_NS87415 is not set
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+CONFIG_PDC202XX_BURST=y
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+CONFIG_BLK_DEV_VIA82CXXX=y
+# CONFIG_IDE_ARM is not set
+# CONFIG_IDE_CHIPSETS is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=m
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_ISCSI_ATTRS=y
+CONFIG_SCSI_SAS_ATTRS=y
+
+#
+# SCSI low-level drivers
+#
+CONFIG_ISCSI_TCP=m
+CONFIG_BLK_DEV_3W_XXXX_RAID=y
+CONFIG_SCSI_3W_9XXX=y
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+CONFIG_SCSI_AACRAID=y
+CONFIG_SCSI_AIC7XXX=y
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+CONFIG_SCSI_AIC79XX=y
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+CONFIG_AIC79XX_ENABLE_RD_STRM=y
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_IN2000 is not set
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=y
+CONFIG_MEGARAID_MAILBOX=y
+# CONFIG_MEGARAID_LEGACY is not set
+CONFIG_MEGARAID_SAS=y
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_AHCI=y
+CONFIG_SCSI_SATA_SVW=y
+CONFIG_SCSI_ATA_PIIX=y
+CONFIG_SCSI_SATA_MV=y
+CONFIG_SCSI_SATA_NV=y
+# CONFIG_SCSI_PDC_ADMA is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=y
+CONFIG_SCSI_SATA_SX4=y
+CONFIG_SCSI_SATA_SIL=y
+CONFIG_SCSI_SATA_SIL24=y
+CONFIG_SCSI_SATA_SIS=y
+# CONFIG_SCSI_SATA_ULI is not set
+CONFIG_SCSI_SATA_VIA=y
+CONFIG_SCSI_SATA_VITESSE=y
+CONFIG_SCSI_SATA_INTEL_COMBINED=y
+CONFIG_SCSI_BUSLOGIC=y
+# CONFIG_SCSI_OMIT_FLASHPOINT is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+CONFIG_SCSI_GDTH=y
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
+CONFIG_SCSI_IPS=y
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_PPA is not set
+# CONFIG_SCSI_IMM is not set
+# CONFIG_SCSI_NCR53C406A is not set
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PSI240I is not set
+# CONFIG_SCSI_QLOGIC_FAS is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+CONFIG_SCSI_QLA_FC=y
+# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set
+CONFIG_SCSI_LPFC=y
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+# CONFIG_SCSI_ULTRASTOR is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=y
+CONFIG_MD_RAID5=y
+# CONFIG_MD_RAID6 is not set
+CONFIG_MD_MULTIPATH=y
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_SNAPSHOT=y
+CONFIG_DM_MIRROR=y
+CONFIG_DM_ZERO=y
+CONFIG_DM_MULTIPATH=y
+CONFIG_DM_MULTIPATH_EMC=y
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
+CONFIG_FUSION_MAX_SGE=128
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+CONFIG_I2O=m
+CONFIG_I2O_LCT_NOTIFY_ON_CHANGES=y
+CONFIG_I2O_EXT_ADAPTEC=y
+CONFIG_I2O_CONFIG=m
+CONFIG_I2O_CONFIG_OLD_IOCTL=y
+# CONFIG_I2O_BUS is not set
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_SCSI=m
+CONFIG_I2O_PROC=m
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=m
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# PHY device support
+#
+# CONFIG_PHYLIB is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_CASSINI is not set
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_EL1 is not set
+# CONFIG_EL2 is not set
+# CONFIG_ELPLUS is not set
+# CONFIG_EL16 is not set
+# CONFIG_EL3 is not set
+# CONFIG_3C515 is not set
+CONFIG_VORTEX=m
+# CONFIG_TYPHOON is not set
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_ULI526X is not set
+# CONFIG_AT1700 is not set
+# CONFIG_DEPCA is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=m
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_AC3200 is not set
+# CONFIG_APRICOT is not set
+# CONFIG_B44 is not set
+CONFIG_FORCEDETH=m
+# CONFIG_CS89x0 is not set
+# CONFIG_DGRS is not set
+CONFIG_EEPRO100=m
+CONFIG_E100=m
+# CONFIG_FEALNX is not set
+CONFIG_NATSEMI=m
+CONFIG_NE2K_PCI=m
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+CONFIG_8139TOO_PIO=y
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_SIS900=m
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+CONFIG_VIA_RHINE=m
+# CONFIG_VIA_RHINE_MMIO is not set
+# CONFIG_NET_POCKET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=m
+# CONFIG_E1000_NAPI is not set
+# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+CONFIG_R8169=m
+# CONFIG_R8169_NAPI is not set
+# CONFIG_R8169_VLAN is not set
+CONFIG_SIS190=m
+CONFIG_SKGE=m
+CONFIG_SKY2=m
+CONFIG_SK98LIN=m
+# CONFIG_VIA_VELOCITY is not set
+CONFIG_TIGON3=m
+CONFIG_BNX2=m
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_CHELSIO_T1 is not set
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PLIP is not set
+CONFIG_PPP=m
+# CONFIG_PPP_MULTILINK is not set
+# CONFIG_PPP_FILTER is not set
+CONFIG_PPP_ASYNC=m
+# CONFIG_PPP_SYNC_TTY is not set
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_BSDCOMP=m
+# CONFIG_PPP_MPPE is not set
+# CONFIG_PPPOE is not set
+# CONFIG_SLIP is not set
+CONFIG_NET_FC=y
+# CONFIG_SHAPER is not set
+CONFIG_NETCONSOLE=m
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_INPORT is not set
+# CONFIG_MOUSE_LOGIBM is not set
+# CONFIG_MOUSE_PC110PAD is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PARKBD is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_ACPI is not set
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+CONFIG_PRINTER=m
+# CONFIG_LP_CONSOLE is not set
+# CONFIG_PPDEV is not set
+# CONFIG_TIPAR is not set
+
+#
+# IPMI
+#
+CONFIG_IPMI_HANDLER=m
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_SI=m
+CONFIG_IPMI_WATCHDOG=m
+# CONFIG_IPMI_POWEROFF is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_NVRAM is not set
+CONFIG_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_SONYPI is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_MWAVE is not set
+# CONFIG_CS5535_GPIO is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_HPET is not set
+# CONFIG_HANGCHECK_TIMER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+
+#
+# I2C support
+#
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=m
+CONFIG_I2C_ALGOPCF=m
+CONFIG_I2C_ALGOPCA=m
+
+#
+# I2C Hardware Bus support
+#
+CONFIG_I2C_ALI1535=m
+CONFIG_I2C_ALI1563=m
+CONFIG_I2C_ALI15X3=m
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD756_S4882=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_ELEKTOR=m
+CONFIG_I2C_I801=m
+CONFIG_I2C_I810=m
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_ISA=m
+CONFIG_I2C_NFORCE2=m
+CONFIG_I2C_PARPORT=m
+CONFIG_I2C_PARPORT_LIGHT=m
+CONFIG_I2C_PROSAVAGE=m
+CONFIG_I2C_SAVAGE4=m
+CONFIG_SCx200_ACB=m
+CONFIG_I2C_SIS5595=m
+CONFIG_I2C_SIS630=m
+CONFIG_I2C_SIS96X=m
+CONFIG_I2C_STUB=m
+CONFIG_I2C_VIA=m
+CONFIG_I2C_VIAPRO=m
+CONFIG_I2C_VOODOO3=m
+CONFIG_I2C_PCA_ISA=m
+
+#
+# Miscellaneous I2C Chip support
+#
+CONFIG_SENSORS_DS1337=m
+CONFIG_SENSORS_DS1374=m
+CONFIG_SENSORS_EEPROM=m
+CONFIG_SENSORS_PCF8574=m
+CONFIG_SENSORS_PCA9539=m
+CONFIG_SENSORS_PCF8591=m
+CONFIG_SENSORS_RTC8564=m
+CONFIG_SENSORS_MAX6875=m
+# CONFIG_RTC_X1205_I2C is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# SPI support
+#
+# CONFIG_SPI is not set
+# CONFIG_SPI_MASTER is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Hardware Monitoring support
+#
+CONFIG_HWMON=y
+CONFIG_HWMON_VID=m
+CONFIG_SENSORS_ADM1021=m
+CONFIG_SENSORS_ADM1025=m
+CONFIG_SENSORS_ADM1026=m
+CONFIG_SENSORS_ADM1031=m
+CONFIG_SENSORS_ADM9240=m
+CONFIG_SENSORS_ASB100=m
+CONFIG_SENSORS_ATXP1=m
+CONFIG_SENSORS_DS1621=m
+# CONFIG_SENSORS_F71805F is not set
+CONFIG_SENSORS_FSCHER=m
+CONFIG_SENSORS_FSCPOS=m
+CONFIG_SENSORS_GL518SM=m
+CONFIG_SENSORS_GL520SM=m
+CONFIG_SENSORS_IT87=m
+CONFIG_SENSORS_LM63=m
+CONFIG_SENSORS_LM75=m
+CONFIG_SENSORS_LM77=m
+CONFIG_SENSORS_LM78=m
+CONFIG_SENSORS_LM80=m
+CONFIG_SENSORS_LM83=m
+CONFIG_SENSORS_LM85=m
+CONFIG_SENSORS_LM87=m
+CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_LM92=m
+CONFIG_SENSORS_MAX1619=m
+CONFIG_SENSORS_PC87360=m
+CONFIG_SENSORS_SIS5595=m
+CONFIG_SENSORS_SMSC47M1=m
+CONFIG_SENSORS_SMSC47B397=m
+CONFIG_SENSORS_VIA686A=m
+# CONFIG_SENSORS_VT8231 is not set
+CONFIG_SENSORS_W83781D=m
+CONFIG_SENSORS_W83792D=m
+CONFIG_SENSORS_W83L785TS=m
+CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_W83627EHF=m
+# CONFIG_SENSORS_HDAPS is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Misc devices
+#
+# CONFIG_IBM_ASM is not set
+
+#
+# Multimedia Capabilities Port drivers
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+CONFIG_VIDEO_SELECT=y
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+# CONFIG_MDA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_ISP116X_HCD is not set
+CONFIG_USB_OHCI_HCD=m
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_UHCI_HCD=m
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
+#
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_DPCM=y
+# CONFIG_USB_STORAGE_USBAT is not set
+CONFIG_USB_STORAGE_SDDR09=y
+# CONFIG_USB_STORAGE_SDDR55 is not set
+CONFIG_USB_STORAGE_JUMPSHOT=y
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_LIBUSUAL is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+# CONFIG_USB_HIDINPUT_POWERBOOK is not set
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+
+#
+# USB HID Boot Protocol drivers
+#
+CONFIG_USB_KBD=m
+CONFIG_USB_MOUSE=m
+# CONFIG_USB_AIPTEK is not set
+CONFIG_USB_WACOM=m
+# CONFIG_USB_ACECAD is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_ITMTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_YEALINK is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+# CONFIG_USB_ATI_REMOTE2 is not set
+# CONFIG_USB_KEYSPAN_REMOTE is not set
+# CONFIG_USB_APPLETOUCH is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+# CONFIG_USB_MON is not set
+
+#
+# USB port drivers
+#
+# CONFIG_USB_USS720 is not set
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
+# CONFIG_USB_TEST is not set
+
+#
+# USB DSL modem support
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+# CONFIG_INFINIBAND is not set
+
+#
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
+#
+# CONFIG_EDAC is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+CONFIG_JBD_DEBUG=y
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=y
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_PROC_INFO=y
+# CONFIG_REISERFS_FS_XATTR is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_COMPAT=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+CONFIG_SIM_FS=m
+CONFIG_VZ_QUOTA=m
+# CONFIG_VZ_QUOTA_UNLOAD is not set
+CONFIG_VZ_QUOTA_UGID=y
+CONFIG_QUOTACTL=y
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=m
+# CONFIG_JOLIET is not set
+# CONFIG_ZISOFS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+# CONFIG_RELAYFS_FS is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_CRAMFS=y
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=m
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS is not set
+# CONFIG_CIFS_XATTR is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Instrumentation Support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+# CONFIG_KPROBES is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=17
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_FRAME_POINTER is not set
+CONFIG_FORCED_INLINING=y
+# CONFIG_RCU_TORTURE_TEST is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+
+#
+# Page alloc debug is incompatible with Software Suspend on i386
+#
+# CONFIG_DEBUG_RODATA is not set
+CONFIG_4KSTACKS=y
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
+
+#
+# OpenVZ
+#
+CONFIG_VE=y
+CONFIG_VE_CALLS=m
+CONFIG_VE_NETDEV=m
+CONFIG_VE_ETHDEV=m
+CONFIG_VE_IPTABLES=y
+CONFIG_VZ_WDOG=m
+CONFIG_VZ_CHECKPOINT=m
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=m
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_AES_586 is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+CONFIG_CRYPTO_CRC32C=m
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
+# CONFIG_CRYPTO_DEV_PADLOCK is not set
+
+#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+
+#
+# User resources
+#
+CONFIG_USER_RESOURCE=y
+CONFIG_USER_RSS_ACCOUNTING=y
+CONFIG_USER_SWAP_ACCOUNTING=y
+CONFIG_USER_RESOURCE_PROC=y
+CONFIG_UBC_DEBUG=y
+CONFIG_UBC_DEBUG_KMEM=y
+# CONFIG_UBC_KEEP_UNUSED is not set
+# CONFIG_UBC_DEBUG_ITEMS is not set
+# CONFIG_UBC_UNLIMITED is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_X86_BIOS_REBOOT=y
+CONFIG_KTIME_SCALAR=y
diff -uprN linux-2.6.16/drivers/acpi/processor_perflib.c linux-2.6.16.ovz/drivers/acpi/processor_perflib.c
--- linux-2.6.16/drivers/acpi/processor_perflib.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/acpi/processor_perflib.c	2006-07-05 08:34:55.000000000 -0400
@@ -577,6 +577,8 @@ acpi_processor_register_performance(stru
 		return_VALUE(-EBUSY);
 	}
 
+	WARN_ON(!performance);
+
 	pr->performance = performance;
 
 	if (acpi_processor_get_performance_info(pr)) {
@@ -609,7 +611,8 @@ acpi_processor_unregister_performance(st
 		return_VOID;
 	}
 
-	kfree(pr->performance->states);
+	if (pr->performance)
+		kfree(pr->performance->states);
 	pr->performance = NULL;
 
 	acpi_cpufreq_remove_file(pr);
diff -uprN linux-2.6.16/drivers/base/class.c linux-2.6.16.ovz/drivers/base/class.c
--- linux-2.6.16/drivers/base/class.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/base/class.c	2006-07-05 08:34:55.000000000 -0400
@@ -72,8 +72,13 @@ static struct kobj_type ktype_class = {
 };
 
 /* Hotplug events for classes go to the class_obj subsys */
-static decl_subsys(class, &ktype_class, NULL);
+decl_subsys(class, &ktype_class, NULL);
 
+#ifndef CONFIG_VE
+#define visible_class_subsys class_subsys
+#else
+#define visible_class_subsys (*get_exec_env()->class_subsys)
+#endif
 
 int class_create_file(struct class * cls, const struct class_attribute * attr)
 {
@@ -148,7 +153,7 @@ int class_register(struct class * cls)
 	if (error)
 		return error;
 
-	subsys_set_kset(cls, class_subsys);
+	subsys_set_kset(cls, visible_class_subsys);
 
 	error = subsystem_register(&cls->subsys);
 	if (!error) {
@@ -420,8 +425,13 @@ static struct kset_uevent_ops class_ueve
 	.uevent =	class_uevent,
 };
 
-static decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops);
+decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops);
 
+#ifndef CONFIG_VE
+#define visible_class_obj_subsys class_obj_subsys
+#else
+#define visible_class_obj_subsys (*get_exec_env()->class_obj_subsys)
+#endif
 
 static int class_device_add_attrs(struct class_device * cd)
 {
@@ -470,7 +480,7 @@ static ssize_t store_uevent(struct class
 
 void class_device_initialize(struct class_device *class_dev)
 {
-	kobj_set_kset_s(class_dev, class_obj_subsys);
+	kobj_set_kset_s(class_dev, visible_class_obj_subsys);
 	kobject_init(&class_dev->kobj);
 	INIT_LIST_HEAD(&class_dev->node);
 }
@@ -805,12 +815,19 @@ void class_interface_unregister(struct c
 	class_put(parent);
 }
 
-
+void prepare_sysfs_classes(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->class_subsys = &class_subsys;
+	get_ve0()->class_obj_subsys = &class_obj_subsys;
+#endif
+}
 
 int __init classes_init(void)
 {
 	int retval;
 
+	prepare_sysfs_classes();
 	retval = subsystem_register(&class_subsys);
 	if (retval)
 		return retval;
@@ -848,3 +865,6 @@ EXPORT_SYMBOL_GPL(class_device_remove_bi
 
 EXPORT_SYMBOL_GPL(class_interface_register);
 EXPORT_SYMBOL_GPL(class_interface_unregister);
+
+EXPORT_SYMBOL(class_subsys);
+EXPORT_SYMBOL(class_obj_subsys);
diff -uprN linux-2.6.16/drivers/base/cpu.c linux-2.6.16.ovz/drivers/base/cpu.c
--- linux-2.6.16/drivers/base/cpu.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/base/cpu.c	2006-07-05 08:34:55.000000000 -0400
@@ -141,7 +141,7 @@ int __devinit register_cpu(struct cpu *c
 	return error;
 }
 
-struct sys_device *get_cpu_sysdev(int cpu)
+struct sys_device *get_cpu_sysdev(unsigned cpu)
 {
 	if (cpu < NR_CPUS)
 		return cpu_sys_devices[cpu];
diff -uprN linux-2.6.16/drivers/base/firmware_class.c linux-2.6.16.ovz/drivers/base/firmware_class.c
--- linux-2.6.16/drivers/base/firmware_class.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/base/firmware_class.c	2006-07-05 08:34:55.000000000 -0400
@@ -211,18 +211,20 @@ static int
 fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size)
 {
 	u8 *new_data;
+	int new_size = fw_priv->alloc_size;
 
 	if (min_size <= fw_priv->alloc_size)
 		return 0;
 
-	new_data = vmalloc(fw_priv->alloc_size + PAGE_SIZE);
+	new_size = ALIGN(min_size, PAGE_SIZE);
+	new_data = vmalloc(new_size);
 	if (!new_data) {
 		printk(KERN_ERR "%s: unable to alloc buffer\n", __FUNCTION__);
 		/* Make sure that we don't keep incomplete data */
 		fw_load_abort(fw_priv);
 		return -ENOMEM;
 	}
-	fw_priv->alloc_size += PAGE_SIZE;
+	fw_priv->alloc_size = new_size;
 	if (fw_priv->fw->data) {
 		memcpy(new_data, fw_priv->fw->data, fw_priv->fw->size);
 		vfree(fw_priv->fw->data);
diff -uprN linux-2.6.16/drivers/base/node.c linux-2.6.16.ovz/drivers/base/node.c
--- linux-2.6.16/drivers/base/node.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/base/node.c	2006-07-05 08:34:55.000000000 -0400
@@ -106,7 +106,7 @@ static ssize_t node_read_numastat(struct
 	other_node = 0;
 	for (i = 0; i < MAX_NR_ZONES; i++) {
 		struct zone *z = &pg->node_zones[i];
-		for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		for_each_online_cpu(cpu) {
 			struct per_cpu_pageset *ps = zone_pcp(z,cpu);
 			numa_hit += ps->numa_hit;
 			numa_miss += ps->numa_miss;
diff -uprN linux-2.6.16/drivers/block/cciss.c linux-2.6.16.ovz/drivers/block/cciss.c
--- linux-2.6.16/drivers/block/cciss.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/block/cciss.c	2006-07-05 08:34:55.000000000 -0400
@@ -1181,6 +1181,53 @@ static int revalidate_allvol(ctlr_info_t
         return 0;
 }
 
+static inline void complete_buffers(struct bio *bio, int status)
+{
+	while (bio) {
+		struct bio *xbh = bio->bi_next;
+		int nr_sectors = bio_sectors(bio);
+
+		bio->bi_next = NULL;
+		blk_finished_io(len);
+		bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
+		bio = xbh;
+	}
+
+}
+
+static void cciss_softirq_done(struct request *rq)
+{
+	CommandList_struct *cmd = rq->completion_data;
+	ctlr_info_t *h = hba[cmd->ctlr];
+	unsigned long flags;
+	u64bit temp64;
+	int i, ddir;
+
+	if (cmd->Request.Type.Direction == XFER_READ)
+		ddir = PCI_DMA_FROMDEVICE;
+	else
+		ddir = PCI_DMA_TODEVICE;
+
+	/* command did not need to be retried */
+	/* unmap the DMA mapping for all the scatter gather elements */
+	for(i=0; i<cmd->Header.SGList; i++) {
+		temp64.val32.lower = cmd->SG[i].Addr.lower;
+		temp64.val32.upper = cmd->SG[i].Addr.upper;
+		pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
+	}
+
+	complete_buffers(rq->bio, rq->errors);
+
+#ifdef CCISS_DEBUG
+	printk("Done with %p\n", rq);
+#endif /* CCISS_DEBUG */
+
+	spin_lock_irqsave(&h->lock, flags);
+	end_that_request_last(rq, rq->errors);
+	cmd_free(h, cmd,1);
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
 /* This function will check the usage_count of the drive to be updated/added.
  * If the usage_count is zero then the drive information will be updated and
  * the disk will be re-registered with the kernel.  If not then it will be
@@ -1249,6 +1296,8 @@ static void cciss_update_drive_info(int 
 
 		blk_queue_max_sectors(disk->queue, 512);
 
+		blk_queue_softirq_done(disk->queue, cciss_softirq_done);
+
 		disk->queue->queuedata = hba[ctlr];
 
 		blk_queue_hardsect_size(disk->queue,
@@ -2148,20 +2197,6 @@ static void start_io( ctlr_info_t *h)
 		addQ (&(h->cmpQ), c); 
 	}
 }
-
-static inline void complete_buffers(struct bio *bio, int status)
-{
-	while (bio) {
-		struct bio *xbh = bio->bi_next; 
-		int nr_sectors = bio_sectors(bio);
-
-		bio->bi_next = NULL; 
-		blk_finished_io(len);
-		bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
-		bio = xbh;
-	}
-
-} 
 /* Assumes that CCISS_LOCK(h->ctlr) is held. */
 /* Zeros out the error record and then resends the command back */
 /* to the controller */
@@ -2179,39 +2214,6 @@ static inline void resend_cciss_cmd( ctl
 	start_io(h);
 }
 
-static void cciss_softirq_done(struct request *rq)
-{
-	CommandList_struct *cmd = rq->completion_data;
-	ctlr_info_t *h = hba[cmd->ctlr];
-	unsigned long flags;
-	u64bit temp64;
-	int i, ddir;
-
-	if (cmd->Request.Type.Direction == XFER_READ)
-		ddir = PCI_DMA_FROMDEVICE;
-	else
-		ddir = PCI_DMA_TODEVICE;
-
-	/* command did not need to be retried */
-	/* unmap the DMA mapping for all the scatter gather elements */
-	for(i=0; i<cmd->Header.SGList; i++) {
-		temp64.val32.lower = cmd->SG[i].Addr.lower;
-		temp64.val32.upper = cmd->SG[i].Addr.upper;
-		pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
-	}
-
-	complete_buffers(rq->bio, rq->errors);
-
-#ifdef CCISS_DEBUG
-	printk("Done with %p\n", rq);
-#endif /* CCISS_DEBUG */ 
-
-	spin_lock_irqsave(&h->lock, flags);
-	end_that_request_last(rq, rq->errors);
-	cmd_free(h, cmd,1);
-	spin_unlock_irqrestore(&h->lock, flags);
-}
-
 /* checks the status of the job and calls complete buffers to mark all 
  * buffers for the completed job. Note that this function does not need
  * to hold the hba/queue lock.
@@ -3269,8 +3271,8 @@ clean2:
 	unregister_blkdev(hba[i]->major, hba[i]->devname);
 clean1:
 	release_io_mem(hba[i]);
-	free_hba(i);
 	hba[i]->busy_initializing = 0;
+	free_hba(i);
 	return(-1);
 }
 
diff -uprN linux-2.6.16/drivers/block/ub.c linux-2.6.16.ovz/drivers/block/ub.c
--- linux-2.6.16/drivers/block/ub.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/block/ub.c	2006-07-05 08:34:55.000000000 -0400
@@ -704,6 +704,9 @@ static void ub_cleanup(struct ub_dev *sc
 		kfree(lun);
 	}
 
+	usb_set_intfdata(sc->intf, NULL);
+	usb_put_intf(sc->intf);
+	usb_put_dev(sc->dev);
 	kfree(sc);
 }
 
@@ -2428,7 +2431,12 @@ static int ub_probe(struct usb_interface
 	// sc->ifnum = intf->cur_altsetting->desc.bInterfaceNumber;
 	usb_set_intfdata(intf, sc);
 	usb_get_dev(sc->dev);
-	// usb_get_intf(sc->intf);	/* Do we need this? */
+	/*
+	 * Since we give the interface struct to the block level through
+	 * disk->driverfs_dev, we have to pin it. Otherwise, block_uevent
+	 * oopses on close after a disconnect (kernels 2.6.16 and up).
+	 */
+	usb_get_intf(sc->intf);
 
 	snprintf(sc->name, 12, DRV_NAME "(%d.%d)",
 	    sc->dev->bus->busnum, sc->dev->devnum);
@@ -2509,7 +2517,7 @@ static int ub_probe(struct usb_interface
 err_diag:
 err_dev_desc:
 	usb_set_intfdata(intf, NULL);
-	// usb_put_intf(sc->intf);
+	usb_put_intf(sc->intf);
 	usb_put_dev(sc->dev);
 	kfree(sc);
 err_core:
@@ -2688,12 +2696,6 @@ static void ub_disconnect(struct usb_int
 	 */
 
 	device_remove_file(&sc->intf->dev, &dev_attr_diag);
-	usb_set_intfdata(intf, NULL);
-	// usb_put_intf(sc->intf);
-	sc->intf = NULL;
-	usb_put_dev(sc->dev);
-	sc->dev = NULL;
-
 	ub_put(sc);
 }
 
diff -uprN linux-2.6.16/drivers/char/Kconfig linux-2.6.16.ovz/drivers/char/Kconfig
--- linux-2.6.16/drivers/char/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/Kconfig	2006-07-05 08:34:55.000000000 -0400
@@ -187,6 +187,7 @@ config MOXA_SMARTIO
 config ISI
 	tristate "Multi-Tech multiport card support (EXPERIMENTAL)"
 	depends on SERIAL_NONSTANDARD
+	select FW_LOADER
 	help
 	  This is a driver for the Multi-Tech cards which provide several
 	  serial ports.  The driver is experimental and can currently only be
diff -uprN linux-2.6.16/drivers/char/agp/efficeon-agp.c linux-2.6.16.ovz/drivers/char/agp/efficeon-agp.c
--- linux-2.6.16/drivers/char/agp/efficeon-agp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/agp/efficeon-agp.c	2006-07-05 08:34:55.000000000 -0400
@@ -64,6 +64,12 @@ static struct gatt_mask efficeon_generic
 	{.mask = 0x00000001, .type = 0}
 };
 
+/* This function does the same thing as mask_memory() for this chipset... */
+static inline unsigned long efficeon_mask_memory(unsigned long addr)
+{
+	return addr | 0x00000001;
+}
+
 static struct aper_size_info_lvl2 efficeon_generic_sizes[4] =
 {
 	{256, 65536, 0},
@@ -251,7 +257,7 @@ static int efficeon_insert_memory(struct
 	last_page = NULL;
 	for (i = 0; i < count; i++) {
 		int index = pg_start + i;
-		unsigned long insert = mem->memory[i];
+		unsigned long insert = efficeon_mask_memory(mem->memory[i]);
 
 		page = (unsigned int *) efficeon_private.l1_table[index >> 10];
 
diff -uprN linux-2.6.16/drivers/char/cs5535_gpio.c linux-2.6.16.ovz/drivers/char/cs5535_gpio.c
--- linux-2.6.16/drivers/char/cs5535_gpio.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/cs5535_gpio.c	2006-07-05 08:34:55.000000000 -0400
@@ -241,9 +241,10 @@ static int __init cs5535_gpio_init(void)
 static void __exit cs5535_gpio_cleanup(void)
 {
 	dev_t dev_id = MKDEV(major, 0);
+
+	cdev_del(&cs5535_gpio_cdev);
 	unregister_chrdev_region(dev_id, CS5535_GPIO_COUNT);
-	if (gpio_base != 0)
-		release_region(gpio_base, CS5535_GPIO_SIZE);
+	release_region(gpio_base, CS5535_GPIO_SIZE);
 }
 
 module_init(cs5535_gpio_init);
diff -uprN linux-2.6.16/drivers/char/ipmi/ipmi_bt_sm.c linux-2.6.16.ovz/drivers/char/ipmi/ipmi_bt_sm.c
--- linux-2.6.16/drivers/char/ipmi/ipmi_bt_sm.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/ipmi/ipmi_bt_sm.c	2006-07-05 08:34:55.000000000 -0400
@@ -165,7 +165,7 @@ static int bt_start_transaction(struct s
 {
 	unsigned int i;
 
-	if ((size < 2) || (size > IPMI_MAX_MSG_LENGTH))
+	if ((size < 2) || (size > (IPMI_MAX_MSG_LENGTH - 2)))
 	       return -1;
 
 	if ((bt->state != BT_STATE_IDLE) && (bt->state != BT_STATE_HOSED))
diff -uprN linux-2.6.16/drivers/char/pcmcia/cm4000_cs.c linux-2.6.16.ovz/drivers/char/pcmcia/cm4000_cs.c
--- linux-2.6.16/drivers/char/pcmcia/cm4000_cs.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/pcmcia/cm4000_cs.c	2006-07-05 08:34:55.000000000 -0400
@@ -2010,10 +2010,6 @@ static int __init cmm_init(void)
 	if (!cmm_class)
 		return -1;
 
-	rc = pcmcia_register_driver(&cm4000_driver);
-	if (rc < 0)
-		return rc;
-
 	major = register_chrdev(0, DEVICE_NAME, &cm4000_fops);
 	if (major < 0) {
 		printk(KERN_WARNING MODULE_NAME
@@ -2021,6 +2017,12 @@ static int __init cmm_init(void)
 		return -1;
 	}
 
+	rc = pcmcia_register_driver(&cm4000_driver);
+	if (rc < 0) {
+		unregister_chrdev(major, DEVICE_NAME);
+		return rc;
+	}
+
 	return 0;
 }
 
diff -uprN linux-2.6.16/drivers/char/pcmcia/cm4040_cs.c linux-2.6.16.ovz/drivers/char/pcmcia/cm4040_cs.c
--- linux-2.6.16/drivers/char/pcmcia/cm4040_cs.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/pcmcia/cm4040_cs.c	2006-07-05 08:34:55.000000000 -0400
@@ -769,16 +769,19 @@ static int __init cm4040_init(void)
 	if (!cmx_class)
 		return -1;
 
-	rc = pcmcia_register_driver(&reader_driver);
-	if (rc < 0)
-		return rc;
-
 	major = register_chrdev(0, DEVICE_NAME, &reader_fops);
 	if (major < 0) {
 		printk(KERN_WARNING MODULE_NAME
 			": could not get major number\n");
 		return -1;
 	}
+
+	rc = pcmcia_register_driver(&reader_driver);
+	if (rc < 0) {
+		unregister_chrdev(major, DEVICE_NAME);
+		return rc;
+	}
+
 	return 0;
 }
 
diff -uprN linux-2.6.16/drivers/char/pty.c linux-2.6.16.ovz/drivers/char/pty.c
--- linux-2.6.16/drivers/char/pty.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/pty.c	2006-07-05 08:34:55.000000000 -0400
@@ -32,16 +32,30 @@
 #include <linux/bitops.h>
 #include <linux/devpts_fs.h>
 
+#include <ub/ub_misc.h>
+
 /* These are global because they are accessed in tty_io.c */
 #ifdef CONFIG_UNIX98_PTYS
 struct tty_driver *ptm_driver;
-static struct tty_driver *pts_driver;
+struct tty_driver *pts_driver;
+EXPORT_SYMBOL(ptm_driver);
+EXPORT_SYMBOL(pts_driver);
+
+void prepare_pty(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->ptm_driver = ptm_driver;
+	/* don't clean ptm_driver and co. here, they are used in vecalls.c */
+#endif
+}
 #endif
 
 static void pty_close(struct tty_struct * tty, struct file * filp)
 {
 	if (!tty)
 		return;
+
+	ub_pty_uncharge(tty);
 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
 		if (tty->count > 1)
 			printk("master pty_close: count = %d!!\n", tty->count);
@@ -61,8 +75,12 @@ static void pty_close(struct tty_struct 
 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
 		set_bit(TTY_OTHER_CLOSED, &tty->flags);
 #ifdef CONFIG_UNIX98_PTYS
-		if (tty->driver == ptm_driver)
+		if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+			struct ve_struct *old_env;
+			old_env = set_exec_env(VE_OWNER_TTY(tty));
 			devpts_pty_kill(tty->index);
+			(void)set_exec_env(old_env);
+		}
 #endif
 		tty_vhangup(tty->link);
 	}
@@ -212,6 +230,10 @@ static int pty_open(struct tty_struct *t
 	if (tty->link->count != 1)
 		goto out;
 
+	retval = -ENODEV;
+	if (ub_pty_charge(tty))
+		goto out;
+
 	clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
 	set_bit(TTY_THROTTLED, &tty->flags);
 	set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
@@ -239,7 +261,9 @@ static struct tty_operations pty_ops = {
 
 /* Traditional BSD devices */
 #ifdef CONFIG_LEGACY_PTYS
-static struct tty_driver *pty_driver, *pty_slave_driver;
+struct tty_driver *pty_driver, *pty_slave_driver;
+EXPORT_SYMBOL(pty_driver);
+EXPORT_SYMBOL(pty_slave_driver);
 
 static int pty_bsd_ioctl(struct tty_struct *tty, struct file *file,
 			 unsigned int cmd, unsigned long arg)
@@ -397,6 +421,7 @@ static void __init unix98_pty_init(void)
 		panic("Couldn't register Unix98 pts driver");
 
 	pty_table[1].data = &ptm_driver->refcount;
+	prepare_pty();
 }
 #else
 static inline void unix98_pty_init(void) { }
diff -uprN linux-2.6.16/drivers/char/snsc.c linux-2.6.16.ovz/drivers/char/snsc.c
--- linux-2.6.16/drivers/char/snsc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/snsc.c	2006-07-05 08:34:55.000000000 -0400
@@ -391,7 +391,8 @@ scdrv_init(void)
 			format_module_id(devnamep, geo_module(geoid),
 					 MODULE_FORMAT_BRIEF);
 			devnamep = devname + strlen(devname);
-			sprintf(devnamep, "#%d", geo_slab(geoid));
+			sprintf(devnamep, "^%d#%d", geo_slot(geoid),
+				geo_slab(geoid));
 
 			/* allocate sysctl device data */
 			scd = kmalloc(sizeof (struct sysctl_data_s),
diff -uprN linux-2.6.16/drivers/char/snsc_event.c linux-2.6.16.ovz/drivers/char/snsc_event.c
--- linux-2.6.16/drivers/char/snsc_event.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/snsc_event.c	2006-07-05 08:34:55.000000000 -0400
@@ -206,7 +206,7 @@ scdrv_dispatch_event(char *event, int le
 
 		/* first find init's task */
 		read_lock(&tasklist_lock);
-		for_each_process(p) {
+		for_each_process_all(p) {
 			if (p->pid == 1)
 				break;
 		}
diff -uprN linux-2.6.16/drivers/char/sonypi.c linux-2.6.16.ovz/drivers/char/sonypi.c
--- linux-2.6.16/drivers/char/sonypi.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/sonypi.c	2006-07-05 08:34:55.000000000 -0400
@@ -1341,6 +1341,9 @@ static int __devinit sonypi_probe(struct
 	else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
 					  PCI_DEVICE_ID_INTEL_ICH6_1, NULL)))
 		sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE3;
+	else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
+					  PCI_DEVICE_ID_INTEL_ICH7_1, NULL)))
+		sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE3;
 	else
 		sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE2;
 
diff -uprN linux-2.6.16/drivers/char/sysrq.c linux-2.6.16.ovz/drivers/char/sysrq.c
--- linux-2.6.16/drivers/char/sysrq.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/sysrq.c	2006-07-05 08:34:55.000000000 -0400
@@ -174,8 +174,13 @@ static struct sysrq_key_op sysrq_showloc
 static void sysrq_handle_showregs(int key, struct pt_regs *pt_regs,
 				  struct tty_struct *tty) 
 {
+	bust_spinlocks(1);
 	if (pt_regs)
 		show_regs(pt_regs);
+	bust_spinlocks(0);
+#if defined(__i386__) || defined(__x86_64__)
+	smp_nmi_call_function(smp_show_regs, NULL, 0);
+#endif
 }
 static struct sysrq_key_op sysrq_showregs_op = {
 	.handler	= sysrq_handle_showregs,
@@ -221,7 +226,7 @@ static void send_sig_all(int sig)
 {
 	struct task_struct *p;
 
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (p->mm && p->pid != 1)
 			/* Not swapper, init nor kernel thread */
 			force_sig(sig, p);
@@ -272,6 +277,19 @@ static struct sysrq_key_op sysrq_kill_op
 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
 };
 
+#ifdef CONFIG_SCHED_VCPU
+static void sysrq_handle_vschedstate(int key, struct pt_regs *pt_regs,
+				   struct tty_struct *tty) 
+{
+	show_vsched();
+}
+static struct sysrq_key_op sysrq_vschedstate_op = {
+	.handler	= sysrq_handle_vschedstate,
+	.help_msg	= "vsced_stAte",
+	.action_msg	= "Show Vsched",
+};
+#endif
+
 /* END SIGNAL SYSRQ HANDLERS BLOCK */
 
 static void sysrq_handle_unrt(int key, struct pt_regs *pt_regs,
@@ -300,9 +318,13 @@ static struct sysrq_key_op *sysrq_key_ta
 /* 7 */	&sysrq_loglevel_op,
 /* 8 */	&sysrq_loglevel_op,
 /* 9 */	&sysrq_loglevel_op,
+#ifdef CONFIG_SCHED_VCPU
+/* a */ &sysrq_vschedstate_op,
+#else
 /* a */	NULL, /* Don't use for system provided sysrqs,
 		 it is handled specially on the sparc
 		 and will never arrive */
+#endif
 /* b */	&sysrq_reboot_op,
 #ifdef CONFIG_KEXEC
 /* c */ &sysrq_crashdump_op,
diff -uprN linux-2.6.16/drivers/char/tipar.c linux-2.6.16.ovz/drivers/char/tipar.c
--- linux-2.6.16/drivers/char/tipar.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/tipar.c	2006-07-05 08:34:55.000000000 -0400
@@ -515,7 +515,7 @@ tipar_init_module(void)
 		err = PTR_ERR(tipar_class);
 		goto out_chrdev;
 	}
-	if (parport_register_driver(&tipar_driver) || tp_count == 0) {
+	if (parport_register_driver(&tipar_driver)) {
 		printk(KERN_ERR "tipar: unable to register with parport\n");
 		err = -EIO;
 		goto out_class;
diff -uprN linux-2.6.16/drivers/char/tlclk.c linux-2.6.16.ovz/drivers/char/tlclk.c
--- linux-2.6.16/drivers/char/tlclk.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/tlclk.c	2006-07-05 08:34:55.000000000 -0400
@@ -327,7 +327,7 @@ static ssize_t store_received_ref_clk3a(
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(received_ref_clk3a, S_IWUGO, NULL,
+static DEVICE_ATTR(received_ref_clk3a, (S_IWUSR|S_IWGRP), NULL,
 		store_received_ref_clk3a);
 
 
@@ -349,7 +349,7 @@ static ssize_t store_received_ref_clk3b(
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(received_ref_clk3b, S_IWUGO, NULL,
+static DEVICE_ATTR(received_ref_clk3b, (S_IWUSR|S_IWGRP), NULL,
 		store_received_ref_clk3b);
 
 
@@ -371,7 +371,7 @@ static ssize_t store_enable_clk3b_output
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(enable_clk3b_output, S_IWUGO, NULL,
+static DEVICE_ATTR(enable_clk3b_output, (S_IWUSR|S_IWGRP), NULL,
 		store_enable_clk3b_output);
 
 static ssize_t store_enable_clk3a_output(struct device *d,
@@ -392,7 +392,7 @@ static ssize_t store_enable_clk3a_output
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(enable_clk3a_output, S_IWUGO, NULL,
+static DEVICE_ATTR(enable_clk3a_output, (S_IWUSR|S_IWGRP), NULL,
 		store_enable_clk3a_output);
 
 static ssize_t store_enable_clkb1_output(struct device *d,
@@ -413,7 +413,7 @@ static ssize_t store_enable_clkb1_output
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(enable_clkb1_output, S_IWUGO, NULL,
+static DEVICE_ATTR(enable_clkb1_output, (S_IWUSR|S_IWGRP), NULL,
 		store_enable_clkb1_output);
 
 
@@ -435,7 +435,7 @@ static ssize_t store_enable_clka1_output
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(enable_clka1_output, S_IWUGO, NULL,
+static DEVICE_ATTR(enable_clka1_output, (S_IWUSR|S_IWGRP), NULL,
 		store_enable_clka1_output);
 
 static ssize_t store_enable_clkb0_output(struct device *d,
@@ -456,7 +456,7 @@ static ssize_t store_enable_clkb0_output
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(enable_clkb0_output, S_IWUGO, NULL,
+static DEVICE_ATTR(enable_clkb0_output, (S_IWUSR|S_IWGRP), NULL,
 		store_enable_clkb0_output);
 
 static ssize_t store_enable_clka0_output(struct device *d,
@@ -477,7 +477,7 @@ static ssize_t store_enable_clka0_output
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(enable_clka0_output, S_IWUGO, NULL,
+static DEVICE_ATTR(enable_clka0_output, (S_IWUSR|S_IWGRP), NULL,
 		store_enable_clka0_output);
 
 static ssize_t store_select_amcb2_transmit_clock(struct device *d,
@@ -519,7 +519,7 @@ static ssize_t store_select_amcb2_transm
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(select_amcb2_transmit_clock, S_IWUGO, NULL,
+static DEVICE_ATTR(select_amcb2_transmit_clock, (S_IWUSR|S_IWGRP), NULL,
 	store_select_amcb2_transmit_clock);
 
 static ssize_t store_select_amcb1_transmit_clock(struct device *d,
@@ -560,7 +560,7 @@ static ssize_t store_select_amcb1_transm
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(select_amcb1_transmit_clock, S_IWUGO, NULL,
+static DEVICE_ATTR(select_amcb1_transmit_clock, (S_IWUSR|S_IWGRP), NULL,
 		store_select_amcb1_transmit_clock);
 
 static ssize_t store_select_redundant_clock(struct device *d,
@@ -581,7 +581,7 @@ static ssize_t store_select_redundant_cl
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(select_redundant_clock, S_IWUGO, NULL,
+static DEVICE_ATTR(select_redundant_clock, (S_IWUSR|S_IWGRP), NULL,
 		store_select_redundant_clock);
 
 static ssize_t store_select_ref_frequency(struct device *d,
@@ -602,7 +602,7 @@ static ssize_t store_select_ref_frequenc
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(select_ref_frequency, S_IWUGO, NULL,
+static DEVICE_ATTR(select_ref_frequency, (S_IWUSR|S_IWGRP), NULL,
 		store_select_ref_frequency);
 
 static ssize_t store_filter_select(struct device *d,
@@ -623,7 +623,7 @@ static ssize_t store_filter_select(struc
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(filter_select, S_IWUGO, NULL, store_filter_select);
+static DEVICE_ATTR(filter_select, (S_IWUSR|S_IWGRP), NULL, store_filter_select);
 
 static ssize_t store_hardware_switching_mode(struct device *d,
 		 struct device_attribute *attr, const char *buf, size_t count)
@@ -643,7 +643,7 @@ static ssize_t store_hardware_switching_
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(hardware_switching_mode, S_IWUGO, NULL,
+static DEVICE_ATTR(hardware_switching_mode, (S_IWUSR|S_IWGRP), NULL,
 		store_hardware_switching_mode);
 
 static ssize_t store_hardware_switching(struct device *d,
@@ -664,7 +664,7 @@ static ssize_t store_hardware_switching(
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(hardware_switching, S_IWUGO, NULL,
+static DEVICE_ATTR(hardware_switching, (S_IWUSR|S_IWGRP), NULL,
 		store_hardware_switching);
 
 static ssize_t store_refalign (struct device *d,
@@ -684,7 +684,7 @@ static ssize_t store_refalign (struct de
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(refalign, S_IWUGO, NULL, store_refalign);
+static DEVICE_ATTR(refalign, (S_IWUSR|S_IWGRP), NULL, store_refalign);
 
 static ssize_t store_mode_select (struct device *d,
 		 struct device_attribute *attr, const char *buf, size_t count)
@@ -704,7 +704,7 @@ static ssize_t store_mode_select (struct
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(mode_select, S_IWUGO, NULL, store_mode_select);
+static DEVICE_ATTR(mode_select, (S_IWUSR|S_IWGRP), NULL, store_mode_select);
 
 static ssize_t store_reset (struct device *d,
 		 struct device_attribute *attr, const char *buf, size_t count)
@@ -724,7 +724,7 @@ static ssize_t store_reset (struct devic
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(reset, S_IWUGO, NULL, store_reset);
+static DEVICE_ATTR(reset, (S_IWUSR|S_IWGRP), NULL, store_reset);
 
 static struct attribute *tlclk_sysfs_entries[] = {
 	&dev_attr_current_ref.attr,
@@ -767,6 +767,7 @@ static int __init tlclk_init(void)
 		printk(KERN_ERR "tlclk: can't get major %d.\n", tlclk_major);
 		return ret;
 	}
+	tlclk_major = ret;
 	alarm_events = kzalloc( sizeof(struct tlclk_alarms), GFP_KERNEL);
 	if (!alarm_events)
 		goto out1;
diff -uprN linux-2.6.16/drivers/char/tty_io.c linux-2.6.16.ovz/drivers/char/tty_io.c
--- linux-2.6.16/drivers/char/tty_io.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/char/tty_io.c	2006-07-05 08:34:55.000000000 -0400
@@ -86,6 +86,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
+#include <linux/ve_owner.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -105,6 +106,7 @@
 #include <linux/devfs_fs_kernel.h>
 
 #include <linux/kmod.h>
+#include <ub/ub_mem.h>
 
 #undef TTY_DEBUG_HANGUP
 
@@ -122,11 +124,16 @@ struct termios tty_std_termios = {	/* fo
 
 EXPORT_SYMBOL(tty_std_termios);
 
+/* this lock protects tty_drivers list, this pretty guys do no locking */
+rwlock_t tty_driver_guard = RW_LOCK_UNLOCKED;
+EXPORT_SYMBOL(tty_driver_guard);
+
 /* This list gets poked at by procfs and various bits of boot up code. This
    could do with some rationalisation such as pulling the tty proc function
    into this file */
    
 LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
+EXPORT_SYMBOL(tty_drivers);
 
 /* Semaphore to protect creating and releasing a tty. This is shared with
    vt.c for deeply disgusting hack reasons */
@@ -136,6 +143,15 @@ DECLARE_MUTEX(tty_sem);
 extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
 extern int pty_limit;		/* Config limit on Unix98 ptys */
 static DEFINE_IDR(allocated_ptys);
+#ifdef CONFIG_VE
+#define __ve_allocated_ptys(ve) (*((ve)->allocated_ptys))
+#define ve_allocated_ptys	__ve_allocated_ptys(get_exec_env())
+#define ve_ptm_driver		(get_exec_env()->ptm_driver)
+#else
+#define __ve_allocated_ptys(ve) allocated_ptys
+#define ve_allocated_ptys	allocated_ptys
+#define ve_ptm_driver		ptm_driver
+#endif
 static DECLARE_MUTEX(allocated_ptys_lock);
 static int ptmx_open(struct inode *, struct file *);
 #endif
@@ -156,11 +172,25 @@ static int tty_fasync(int fd, struct fil
 static void release_mem(struct tty_struct *tty, int idx);
 
 
+DCL_VE_OWNER(TTYDRV, struct tty_driver, owner_env)
+DCL_VE_OWNER(TTY, struct tty_struct, owner_env)
+
+void prepare_tty(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->allocated_ptys = &allocated_ptys;
+	/*
+	 * in this case, tty_register_driver() setups
+	 * owner_env correctly right from the bootup
+	 */
+#endif
+}
+
 static struct tty_struct *alloc_tty_struct(void)
 {
 	struct tty_struct *tty;
 
-	tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+	tty = ub_kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
 	if (tty)
 		memset(tty, 0, sizeof(struct tty_struct));
 	return tty;
@@ -857,14 +887,37 @@ static struct tty_driver *get_tty_driver
 {
 	struct tty_driver *p;
 
+	read_lock(&tty_driver_guard);
 	list_for_each_entry(p, &tty_drivers, tty_drivers) {
 		dev_t base = MKDEV(p->major, p->minor_start);
 		if (device < base || device >= base + p->num)
 			continue;
 		*index = device - base;
-		return p;
+#ifdef CONFIG_VE
+		if (in_interrupt())
+			goto found;
+		if (p->major!=PTY_MASTER_MAJOR && p->major!=PTY_SLAVE_MAJOR
+#ifdef CONFIG_UNIX98_PTYS
+		    && (p->major<UNIX98_PTY_MASTER_MAJOR ||
+		    	p->major>UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT-1) &&
+		       (p->major<UNIX98_PTY_SLAVE_MAJOR ||
+		        p->major>UNIX98_PTY_SLAVE_MAJOR+UNIX98_PTY_MAJOR_COUNT-1)
+#endif
+		) goto found;
+		if (ve_is_super(VE_OWNER_TTYDRV(p)) &&
+		    ve_is_super(get_exec_env()))
+			goto found;
+		if (!ve_accessible_strict(VE_OWNER_TTYDRV(p), get_exec_env()))
+			continue;
+#endif
+		goto found;
 	}
+	read_unlock(&tty_driver_guard);
 	return NULL;
+
+found:
+	read_unlock(&tty_driver_guard);
+	return p;
 }
 
 /*
@@ -1092,7 +1145,7 @@ static void do_tty_hangup(void *data)
 	
 	read_lock(&tasklist_lock);
 	if (tty->session > 0) {
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 			if (p->signal->tty == tty)
 				p->signal->tty = NULL;
 			if (!p->signal->leader)
@@ -1101,7 +1154,7 @@ static void do_tty_hangup(void *data)
 			send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
 			if (tty->pgrp > 0)
 				p->signal->tty_old_pgrp = tty->pgrp;
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 	}
 	read_unlock(&tasklist_lock);
 
@@ -1218,9 +1271,9 @@ void disassociate_ctty(int on_exit)
 
 	/* Now clear signal->tty under the lock */
 	read_lock(&tasklist_lock);
-	do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
+	do_each_task_pid_all(current->signal->session, PIDTYPE_SID, p) {
 		p->signal->tty = NULL;
-	} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
+	} while_each_task_pid_all(current->signal->session, PIDTYPE_SID, p);
 	read_unlock(&tasklist_lock);
 	up(&tty_sem);
 	unlock_kernel();
@@ -1446,21 +1499,28 @@ static inline void tty_line_name(struct 
  * really quite straightforward.  The semaphore locking can probably be
  * relaxed for the (most common) case of reopening a tty.
  */
-static int init_dev(struct tty_driver *driver, int idx,
-	struct tty_struct **ret_tty)
+static int init_dev(struct tty_driver *driver, int idx, 
+	struct tty_struct *i_tty, struct tty_struct **ret_tty)
 {
 	struct tty_struct *tty, *o_tty;
 	struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
 	struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
+	struct ve_struct * owner;
 	int retval=0;
 
-	/* check whether we're reopening an existing tty */
-	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
-		tty = devpts_get_tty(idx);
-		if (tty && driver->subtype == PTY_TYPE_MASTER)
-			tty = tty->link;
-	} else {
-		tty = driver->ttys[idx];
+	owner = VE_OWNER_TTYDRV(driver);
+
+	if (i_tty)
+		tty = i_tty;
+	else {
+		/* check whether we're reopening an existing tty */
+		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+			tty = devpts_get_tty(idx);
+			if (tty && driver->subtype == PTY_TYPE_MASTER)
+				tty = tty->link;
+		} else {
+			tty = driver->ttys[idx];
+		}
 	}
 	if (tty) goto fast_track;
 
@@ -1488,6 +1548,7 @@ static int init_dev(struct tty_driver *d
 	tty->driver = driver;
 	tty->index = idx;
 	tty_line_name(driver, idx, tty->name);
+	SET_VE_OWNER_TTY(tty, owner);
 
 	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
 		tp_loc = &tty->termios;
@@ -1498,7 +1559,7 @@ static int init_dev(struct tty_driver *d
 	}
 
 	if (!*tp_loc) {
-		tp = (struct termios *) kmalloc(sizeof(struct termios),
+		tp = (struct termios *) ub_kmalloc(sizeof(struct termios),
 						GFP_KERNEL);
 		if (!tp)
 			goto free_mem_out;
@@ -1506,7 +1567,7 @@ static int init_dev(struct tty_driver *d
 	}
 
 	if (!*ltp_loc) {
-		ltp = (struct termios *) kmalloc(sizeof(struct termios),
+		ltp = (struct termios *) ub_kmalloc(sizeof(struct termios),
 						 GFP_KERNEL);
 		if (!ltp)
 			goto free_mem_out;
@@ -1521,6 +1582,7 @@ static int init_dev(struct tty_driver *d
 		o_tty->driver = driver->other;
 		o_tty->index = idx;
 		tty_line_name(driver->other, idx, o_tty->name);
+		SET_VE_OWNER_TTY(o_tty, owner);
 
 		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
 			o_tp_loc = &o_tty->termios;
@@ -1532,7 +1594,7 @@ static int init_dev(struct tty_driver *d
 
 		if (!*o_tp_loc) {
 			o_tp = (struct termios *)
-				kmalloc(sizeof(struct termios), GFP_KERNEL);
+				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
 			if (!o_tp)
 				goto free_mem_out;
 			*o_tp = driver->other->init_termios;
@@ -1540,7 +1602,7 @@ static int init_dev(struct tty_driver *d
 
 		if (!*o_ltp_loc) {
 			o_ltp = (struct termios *)
-				kmalloc(sizeof(struct termios), GFP_KERNEL);
+				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
 			if (!o_ltp)
 				goto free_mem_out;
 			memset(o_ltp, 0, sizeof(struct termios));
@@ -1558,6 +1620,10 @@ static int init_dev(struct tty_driver *d
 			*o_ltp_loc = o_ltp;
 		o_tty->termios = *o_tp_loc;
 		o_tty->termios_locked = *o_ltp_loc;
+#ifdef CONFIG_VE
+		if (driver->other->refcount == 0)
+			(void)get_ve(owner);
+#endif
 		driver->other->refcount++;
 		if (driver->subtype == PTY_TYPE_MASTER)
 			o_tty->count++;
@@ -1582,6 +1648,10 @@ static int init_dev(struct tty_driver *d
 		*ltp_loc = ltp;
 	tty->termios = *tp_loc;
 	tty->termios_locked = *ltp_loc;
+#ifdef CONFIG_VE
+	if (driver->refcount == 0)
+		(void)get_ve(owner);
+#endif
 	driver->refcount++;
 	tty->count++;
 
@@ -1692,6 +1762,10 @@ static void release_mem(struct tty_struc
 		}
 		o_tty->magic = 0;
 		o_tty->driver->refcount--;
+#ifdef CONFIG_VE
+		if (o_tty->driver->refcount == 0)
+			put_ve(VE_OWNER_TTY(o_tty));
+#endif
 		file_list_lock();
 		list_del_init(&o_tty->tty_files);
 		file_list_unlock();
@@ -1714,6 +1788,10 @@ static void release_mem(struct tty_struc
 
 	tty->magic = 0;
 	tty->driver->refcount--;
+#ifdef CONFIG_VE
+	if (tty->driver->refcount == 0)
+		put_ve(VE_OWNER_TTY(tty));
+#endif
 	file_list_lock();
 	list_del_init(&tty->tty_files);
 	file_list_unlock();
@@ -1737,7 +1815,10 @@ static void release_dev(struct file * fi
 	int	idx;
 	char	buf[64];
 	unsigned long flags;
-	
+#ifdef CONFIG_UNIX98_PTYS
+	struct idr *idr_alloced;
+#endif
+
 	tty = (struct tty_struct *)filp->private_data;
 	if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "release_dev"))
 		return;
@@ -1752,6 +1833,9 @@ static void release_dev(struct file * fi
 	devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
 	devpts_master = pty_master && devpts;
 	o_tty = tty->link;
+#ifdef CONFIG_UNIX98_PTYS
+	idr_alloced = &__ve_allocated_ptys(tty->owner_env);
+#endif
 
 #ifdef TTY_PARANOIA_CHECK
 	if (idx < 0 || idx >= tty->driver->num) {
@@ -1924,13 +2008,13 @@ static void release_dev(struct file * fi
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 			p->signal->tty = NULL;
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 		if (o_tty)
-			do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
+			do_each_task_pid_all(o_tty->session, PIDTYPE_SID, p) {
 				p->signal->tty = NULL;
-			} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
+			} while_each_task_pid_all(o_tty->session, PIDTYPE_SID, p);
 		read_unlock(&tasklist_lock);
 	}
 
@@ -2005,7 +2089,7 @@ static void release_dev(struct file * fi
 	/* Make this pty number available for reallocation */
 	if (devpts) {
 		down(&allocated_ptys_lock);
-		idr_remove(&allocated_ptys, idx);
+		idr_remove(idr_alloced, idx);
 		up(&allocated_ptys_lock);
 	}
 #endif
@@ -2026,7 +2110,7 @@ static void release_dev(struct file * fi
  */
 static int tty_open(struct inode * inode, struct file * filp)
 {
-	struct tty_struct *tty;
+	struct tty_struct *tty, *c_tty;
 	int noctty, retval;
 	struct tty_driver *driver;
 	int index;
@@ -2039,6 +2123,7 @@ retry_open:
 	noctty = filp->f_flags & O_NOCTTY;
 	index  = -1;
 	retval = 0;
+	c_tty = NULL;
 	
 	down(&tty_sem);
 
@@ -2049,6 +2134,7 @@ retry_open:
 		}
 		driver = current->signal->tty->driver;
 		index = current->signal->tty->index;
+		c_tty = current->signal->tty;
 		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
 		/* noctty = 1; */
 		goto got_driver;
@@ -2056,6 +2142,12 @@ retry_open:
 #ifdef CONFIG_VT
 	if (device == MKDEV(TTY_MAJOR,0)) {
 		extern struct tty_driver *console_driver;
+#ifdef CONFIG_VE
+		if (!ve_is_super(get_exec_env())) {
+			up(&tty_sem);
+			return -ENODEV;
+		}
+#endif
 		driver = console_driver;
 		index = fg_console;
 		noctty = 1;
@@ -2063,6 +2155,12 @@ retry_open:
 	}
 #endif
 	if (device == MKDEV(TTYAUX_MAJOR,1)) {
+#ifdef CONFIG_VE
+		if (!ve_is_super(get_exec_env())) {
+			up(&tty_sem);
+			return -ENODEV;
+		}
+#endif
 		driver = console_device(&index);
 		if (driver) {
 			/* Don't let /dev/console block */
@@ -2080,7 +2178,7 @@ retry_open:
 		return -ENODEV;
 	}
 got_driver:
-	retval = init_dev(driver, index, &tty);
+	retval = init_dev(driver, index, c_tty, &tty);
 	up(&tty_sem);
 	if (retval)
 		return retval;
@@ -2149,11 +2247,11 @@ static int ptmx_open(struct inode * inod
 
 	/* find a device that is not in use. */
 	down(&allocated_ptys_lock);
-	if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
+	if (!idr_pre_get(&ve_allocated_ptys, GFP_KERNEL)) {
 		up(&allocated_ptys_lock);
 		return -ENOMEM;
 	}
-	idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
+	idr_ret = idr_get_new(&ve_allocated_ptys, NULL, &index);
 	if (idr_ret < 0) {
 		up(&allocated_ptys_lock);
 		if (idr_ret == -EAGAIN)
@@ -2161,14 +2259,14 @@ static int ptmx_open(struct inode * inod
 		return -EIO;
 	}
 	if (index >= pty_limit) {
-		idr_remove(&allocated_ptys, index);
+		idr_remove(&ve_allocated_ptys, index);
 		up(&allocated_ptys_lock);
 		return -EIO;
 	}
 	up(&allocated_ptys_lock);
 
 	down(&tty_sem);
-	retval = init_dev(ptm_driver, index, &tty);
+	retval = init_dev(ve_ptm_driver, index, NULL,  &tty);
 	up(&tty_sem);
 	
 	if (retval)
@@ -2183,14 +2281,14 @@ static int ptmx_open(struct inode * inod
 		goto out1;
 
 	check_tty_count(tty, "tty_open");
-	retval = ptm_driver->open(tty, filp);
+	retval = ve_ptm_driver->open(tty, filp);
 	if (!retval)
 		return 0;
 out1:
 	release_dev(filp);
 out:
 	down(&allocated_ptys_lock);
-	idr_remove(&allocated_ptys, index);
+	idr_remove(&ve_allocated_ptys, index);
 	up(&allocated_ptys_lock);
 	return retval;
 }
@@ -2303,6 +2401,8 @@ static int tioccons(struct file *file)
 {
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
+	if (!ve_is_super(get_exec_env()))
+		return -EACCES;
 	if (file->f_op->write == redirected_tty_write) {
 		struct file *f;
 		spin_lock(&redirect_lock);
@@ -2363,9 +2463,9 @@ static int tiocsctty(struct tty_struct *
 			 */
 
 			read_lock(&tasklist_lock);
-			do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+			do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 				p->signal->tty = NULL;
-			} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+			} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 			read_unlock(&tasklist_lock);
 		} else
 			return -EPERM;
@@ -2387,7 +2487,7 @@ static int tiocgpgrp(struct tty_struct *
 	 */
 	if (tty == real_tty && current->signal->tty != real_tty)
 		return -ENOTTY;
-	return put_user(real_tty->pgrp, p);
+	return put_user(pid_type_to_vpid(PIDTYPE_PGID, real_tty->pgrp), p);
 }
 
 static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -2407,6 +2507,9 @@ static int tiocspgrp(struct tty_struct *
 		return -EFAULT;
 	if (pgrp < 0)
 		return -EINVAL;
+	pgrp = vpid_to_pid(pgrp);
+	if (pgrp < 0)
+		return -EPERM;
 	if (session_of_pgrp(pgrp) != current->signal->session)
 		return -EPERM;
 	real_tty->pgrp = pgrp;
@@ -2423,7 +2526,7 @@ static int tiocgsid(struct tty_struct *t
 		return -ENOTTY;
 	if (real_tty->session <= 0)
 		return -ENOTTY;
-	return put_user(real_tty->session, p);
+	return put_user(pid_type_to_vpid(PIDTYPE_SID, real_tty->session), p);
 }
 
 static int tiocsetd(struct tty_struct *tty, int __user *p)
@@ -2696,7 +2799,7 @@ static void __do_SAK(void *arg)
 		tty->driver->flush_buffer(tty);
 	
 	read_lock(&tasklist_lock);
-	do_each_task_pid(session, PIDTYPE_SID, p) {
+	do_each_task_pid_all(session, PIDTYPE_SID, p) {
 		if (p->signal->tty == tty || session > 0) {
 			printk(KERN_NOTICE "SAK: killed process %d"
 			    " (%s): p->signal->session==tty->session\n",
@@ -2706,7 +2809,11 @@ static void __do_SAK(void *arg)
 		}
 		task_lock(p);
 		if (p->files) {
-			rcu_read_lock();
+			/*
+			 * We don't take a ref to the file, so we must
+			 * hold ->file_lock instead.
+			 */
+			spin_lock(&p->files->file_lock);
 			fdt = files_fdtable(p->files);
 			for (i=0; i < fdt->max_fds; i++) {
 				filp = fcheck_files(p->files, i);
@@ -2721,10 +2828,10 @@ static void __do_SAK(void *arg)
 					break;
 				}
 			}
-			rcu_read_unlock();
+			spin_unlock(&p->files->file_lock);
 		}
 		task_unlock(p);
-	} while_each_task_pid(session, PIDTYPE_SID, p);
+	} while_each_task_pid_all(session, PIDTYPE_SID, p);
 	read_unlock(&tasklist_lock);
 #endif
 }
@@ -3095,8 +3202,11 @@ int tty_register_driver(struct tty_drive
 
 	if (!driver->put_char)
 		driver->put_char = tty_default_put_char;
-	
+
+	SET_VE_OWNER_TTYDRV(driver, get_exec_env());
+	write_lock_irq(&tty_driver_guard);
 	list_add(&driver->tty_drivers, &tty_drivers);
+	write_unlock_irq(&tty_driver_guard);
 	
 	if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
 		for(i = 0; i < driver->num; i++)
@@ -3123,7 +3233,9 @@ int tty_unregister_driver(struct tty_dri
 	unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
 				driver->num);
 
+	write_lock_irq(&tty_driver_guard);
 	list_del(&driver->tty_drivers);
+	write_unlock_irq(&tty_driver_guard);
 
 	/*
 	 * Free the termios and termios_locked structures because
@@ -3246,6 +3358,7 @@ static int __init tty_init(void)
 
 	vty_init();
 #endif
+	prepare_tty();
 	return 0;
 }
 module_init(tty_init);
diff -uprN linux-2.6.16/drivers/edac/Kconfig linux-2.6.16.ovz/drivers/edac/Kconfig
--- linux-2.6.16/drivers/edac/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/edac/Kconfig	2006-07-05 08:34:55.000000000 -0400
@@ -71,7 +71,7 @@ config EDAC_E7XXX
 
 config EDAC_E752X
 	tristate "Intel e752x (e7520, e7525, e7320)"
-	depends on EDAC_MM_EDAC && PCI
+	depends on EDAC_MM_EDAC && PCI && HOTPLUG
 	help
 	  Support for error detection and correction on the Intel
 	  E7520, E7525, E7320 server chipsets.
diff -uprN linux-2.6.16/drivers/i2c/busses/i2c-i801.c linux-2.6.16.ovz/drivers/i2c/busses/i2c-i801.c
--- linux-2.6.16/drivers/i2c/busses/i2c-i801.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/i2c/busses/i2c-i801.c	2006-07-05 08:34:55.000000000 -0400
@@ -478,6 +478,11 @@ static s32 i801_access(struct i2c_adapte
 		ret = i801_transaction();
 	}
 
+	/* Some BIOSes don't like it when PEC is enabled at reboot or resume
+	   time, so we forcibly disable it after every transaction. */
+	if (hwpec)
+		outb_p(0, SMBAUXCTL);
+
 	if(block)
 		return ret;
 	if(ret)
diff -uprN linux-2.6.16/drivers/i2c/busses/scx200_acb.c linux-2.6.16.ovz/drivers/i2c/busses/scx200_acb.c
--- linux-2.6.16/drivers/i2c/busses/scx200_acb.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/i2c/busses/scx200_acb.c	2006-07-05 08:34:55.000000000 -0400
@@ -440,7 +440,6 @@ static int  __init scx200_acb_create(int
 	struct scx200_acb_iface *iface;
 	struct i2c_adapter *adapter;
 	int rc = 0;
-	char description[64];
 
 	iface = kzalloc(sizeof(*iface), GFP_KERNEL);
 	if (!iface) {
@@ -459,8 +458,7 @@ static int  __init scx200_acb_create(int
 
 	init_MUTEX(&iface->sem);
 
-	snprintf(description, sizeof(description), "NatSemi SCx200 ACCESS.bus [%s]", adapter->name);
-	if (request_region(base, 8, description) == 0) {
+	if (!request_region(base, 8, adapter->name)) {
 		dev_err(&adapter->dev, "can't allocate io 0x%x-0x%x\n",
 			base, base + 8-1);
 		rc = -EBUSY;
diff -uprN linux-2.6.16/drivers/i2c/chips/m41t00.c linux-2.6.16.ovz/drivers/i2c/chips/m41t00.c
--- linux-2.6.16/drivers/i2c/chips/m41t00.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/i2c/chips/m41t00.c	2006-07-05 08:34:55.000000000 -0400
@@ -129,13 +129,13 @@ m41t00_set_tlet(ulong arg)
 	if ((i2c_smbus_write_byte_data(save_client, 0, tm.tm_sec & 0x7f) < 0)
 		|| (i2c_smbus_write_byte_data(save_client, 1, tm.tm_min & 0x7f)
 			< 0)
-		|| (i2c_smbus_write_byte_data(save_client, 2, tm.tm_hour & 0x7f)
+		|| (i2c_smbus_write_byte_data(save_client, 2, tm.tm_hour & 0x3f)
 			< 0)
-		|| (i2c_smbus_write_byte_data(save_client, 4, tm.tm_mday & 0x7f)
+		|| (i2c_smbus_write_byte_data(save_client, 4, tm.tm_mday & 0x3f)
 			< 0)
-		|| (i2c_smbus_write_byte_data(save_client, 5, tm.tm_mon & 0x7f)
+		|| (i2c_smbus_write_byte_data(save_client, 5, tm.tm_mon & 0x1f)
 			< 0)
-		|| (i2c_smbus_write_byte_data(save_client, 6, tm.tm_year & 0x7f)
+		|| (i2c_smbus_write_byte_data(save_client, 6, tm.tm_year & 0xff)
 			< 0))
 
 		dev_warn(&save_client->dev,"m41t00: can't write to rtc chip\n");
diff -uprN linux-2.6.16/drivers/ide/pci/alim15x3.c linux-2.6.16.ovz/drivers/ide/pci/alim15x3.c
--- linux-2.6.16/drivers/ide/pci/alim15x3.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/ide/pci/alim15x3.c	2006-07-05 08:34:55.000000000 -0400
@@ -731,6 +731,8 @@ static unsigned int __devinit ata66_ali1
 	
 	if(m5229_revision <= 0x20)
 		tmpbyte = (tmpbyte & (~0x02)) | 0x01;
+	else if (m5229_revision == 0xc7)
+		tmpbyte |= 0x03;
 	else
 		tmpbyte |= 0x01;
 
diff -uprN linux-2.6.16/drivers/ieee1394/ohci1394.c linux-2.6.16.ovz/drivers/ieee1394/ohci1394.c
--- linux-2.6.16/drivers/ieee1394/ohci1394.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/ieee1394/ohci1394.c	2006-07-05 08:34:55.000000000 -0400
@@ -2525,7 +2525,7 @@ static irqreturn_t ohci_irq_handler(int 
 			if (phys_dma) {
 				reg_write(ohci,OHCI1394_PhyReqFilterHiSet, 0xffffffff);
 				reg_write(ohci,OHCI1394_PhyReqFilterLoSet, 0xffffffff);
-				reg_write(ohci,OHCI1394_PhyUpperBound, 0xffff0000);
+				reg_write(ohci,OHCI1394_PhyUpperBound, 0x01000000);
 			} else {
 				reg_write(ohci,OHCI1394_PhyReqFilterHiSet, 0x00000000);
 				reg_write(ohci,OHCI1394_PhyReqFilterLoSet, 0x00000000);
diff -uprN linux-2.6.16/drivers/ieee1394/sbp2.c linux-2.6.16.ovz/drivers/ieee1394/sbp2.c
--- linux-2.6.16/drivers/ieee1394/sbp2.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/ieee1394/sbp2.c	2006-07-05 08:34:55.000000000 -0400
@@ -495,22 +495,17 @@ static struct sbp2_command_info *sbp2uti
 /*
  * This function finds the sbp2_command for a given outstanding SCpnt.
  * Only looks at the inuse list.
+ * Must be called with scsi_id->sbp2_command_orb_lock held.
  */
-static struct sbp2_command_info *sbp2util_find_command_for_SCpnt(struct scsi_id_instance_data *scsi_id, void *SCpnt)
+static struct sbp2_command_info *sbp2util_find_command_for_SCpnt(
+		struct scsi_id_instance_data *scsi_id, void *SCpnt)
 {
 	struct sbp2_command_info *command;
-	unsigned long flags;
 
-	spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
-	if (!list_empty(&scsi_id->sbp2_command_orb_inuse)) {
-		list_for_each_entry(command, &scsi_id->sbp2_command_orb_inuse, list) {
-			if (command->Current_SCpnt == SCpnt) {
-				spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
+	if (!list_empty(&scsi_id->sbp2_command_orb_inuse))
+		list_for_each_entry(command, &scsi_id->sbp2_command_orb_inuse, list)
+			if (command->Current_SCpnt == SCpnt)
 				return command;
-			}
-		}
-	}
-	spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
 	return NULL;
 }
 
@@ -579,17 +574,15 @@ static void sbp2util_free_command_dma(st
 
 /*
  * This function moves a command to the completed orb list.
+ * Must be called with scsi_id->sbp2_command_orb_lock held.
  */
-static void sbp2util_mark_command_completed(struct scsi_id_instance_data *scsi_id,
-					    struct sbp2_command_info *command)
+static void sbp2util_mark_command_completed(
+		struct scsi_id_instance_data *scsi_id,
+		struct sbp2_command_info *command)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
 	list_del(&command->list);
 	sbp2util_free_command_dma(command);
 	list_add_tail(&command->list, &scsi_id->sbp2_command_orb_completed);
-	spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
 }
 
 /*
@@ -761,12 +754,17 @@ static struct scsi_id_instance_data *sbp
 
 	/* Register the status FIFO address range. We could use the same FIFO
 	 * for targets at different nodes. However we need different FIFOs per
-	 * target in order to support multi-unit devices. */
+	 * target in order to support multi-unit devices.
+	 * The FIFO is located out of the local host controller's physical range
+	 * but, if possible, within the posted write area. Status writes will
+	 * then be performed as unified transactions. This slightly reduces
+	 * bandwidth usage, and some Prolific based devices seem to require it.
+	 */
 	scsi_id->status_fifo_addr = hpsb_allocate_and_register_addrspace(
 			&sbp2_highlevel, ud->ne->host, &sbp2_ops,
 			sizeof(struct sbp2_status_block), sizeof(quadlet_t),
-			~0ULL, ~0ULL);
-	if (!scsi_id->status_fifo_addr) {
+			0x010000000000ULL, CSR1212_ALL_SPACE_END);
+	if (scsi_id->status_fifo_addr == ~0ULL) {
 		SBP2_ERR("failed to allocate status FIFO address range");
 		goto failed_alloc;
 	}
@@ -2177,7 +2175,9 @@ static int sbp2_handle_status_write(stru
 		 * Matched status with command, now grab scsi command pointers and check status
 		 */
 		SCpnt = command->Current_SCpnt;
+		spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
 		sbp2util_mark_command_completed(scsi_id, command);
+		spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
 
 		if (SCpnt) {
 
@@ -2491,9 +2491,20 @@ static int sbp2scsi_slave_alloc(struct s
 
 static int sbp2scsi_slave_configure(struct scsi_device *sdev)
 {
+	struct scsi_id_instance_data *scsi_id =
+		(struct scsi_id_instance_data *)sdev->host->hostdata[0];
+
 	blk_queue_dma_alignment(sdev->request_queue, (512 - 1));
 	sdev->use_10_for_rw = 1;
 	sdev->use_10_for_ms = 1;
+
+	if ((scsi_id->sbp2_firmware_revision & 0xffff00) == 0x0a2700 &&
+	    (scsi_id->ud->model_id == 0x000021 /* gen.4 iPod */ ||
+	     scsi_id->ud->model_id == 0x000023 /* iPod mini  */ ||
+	     scsi_id->ud->model_id == 0x00007e /* iPod Photo */ )) {
+		SBP2_INFO("enabling iPod workaround: decrement disk capacity");
+		sdev->fix_capacity = 1;
+	}
 	return 0;
 }
 
@@ -2513,6 +2524,7 @@ static int sbp2scsi_abort(struct scsi_cm
 		(struct scsi_id_instance_data *)SCpnt->device->host->hostdata[0];
 	struct sbp2scsi_host_info *hi = scsi_id->hi;
 	struct sbp2_command_info *command;
+	unsigned long flags;
 
 	SBP2_ERR("aborting sbp2 command");
 	scsi_print_command(SCpnt);
@@ -2523,6 +2535,7 @@ static int sbp2scsi_abort(struct scsi_cm
 		 * Right now, just return any matching command structures
 		 * to the free pool.
 		 */
+		spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
 		command = sbp2util_find_command_for_SCpnt(scsi_id, SCpnt);
 		if (command) {
 			SBP2_DEBUG("Found command to abort");
@@ -2540,6 +2553,7 @@ static int sbp2scsi_abort(struct scsi_cm
 				command->Current_done(command->Current_SCpnt);
 			}
 		}
+		spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
 
 		/*
 		 * Initiate a fetch agent reset.
diff -uprN linux-2.6.16/drivers/input/mouse/psmouse-base.c linux-2.6.16.ovz/drivers/input/mouse/psmouse-base.c
--- linux-2.6.16/drivers/input/mouse/psmouse-base.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/input/mouse/psmouse-base.c	2006-07-05 08:34:55.000000000 -0400
@@ -300,8 +300,10 @@ static irqreturn_t psmouse_interrupt(str
  * Check if this is a new device announcement (0xAA 0x00)
  */
 	if (unlikely(psmouse->packet[0] == PSMOUSE_RET_BAT && psmouse->pktcnt <= 2)) {
-		if (psmouse->pktcnt == 1)
+		if (psmouse->pktcnt == 1) {
+			psmouse->last = jiffies;
 			goto out;
+		}
 
 		if (psmouse->packet[1] == PSMOUSE_RET_ID) {
 			__psmouse_set_state(psmouse, PSMOUSE_IGNORE);
diff -uprN linux-2.6.16/drivers/macintosh/therm_adt746x.c linux-2.6.16.ovz/drivers/macintosh/therm_adt746x.c
--- linux-2.6.16/drivers/macintosh/therm_adt746x.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/macintosh/therm_adt746x.c	2006-07-05 08:34:55.000000000 -0400
@@ -627,8 +627,8 @@ thermostat_init(void)
 	if(therm_type == ADT7460)
 		device_create_file(&of_dev->dev, &dev_attr_sensor2_fan_speed);
 
-#ifndef CONFIG_I2C_KEYWEST
-	request_module("i2c-keywest");
+#ifndef CONFIG_I2C_POWERMAC
+	request_module("i2c-powermac");
 #endif
 
 	return i2c_add_driver(&thermostat_driver);
diff -uprN linux-2.6.16/drivers/md/dm-snap.c linux-2.6.16.ovz/drivers/md/dm-snap.c
--- linux-2.6.16/drivers/md/dm-snap.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/md/dm-snap.c	2006-07-05 08:34:55.000000000 -0400
@@ -542,8 +542,12 @@ static void snapshot_dtr(struct dm_targe
 {
 	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
 
+	/* Prevent further origin writes from using this snapshot. */
+	/* After this returns there can be no new kcopyd jobs. */
 	unregister_snapshot(s);
 
+	kcopyd_client_destroy(s->kcopyd_client);
+
 	exit_exception_table(&s->pending, pending_cache);
 	exit_exception_table(&s->complete, exception_cache);
 
@@ -552,7 +556,7 @@ static void snapshot_dtr(struct dm_targe
 
 	dm_put_device(ti, s->origin);
 	dm_put_device(ti, s->cow);
-	kcopyd_client_destroy(s->kcopyd_client);
+
 	kfree(s);
 }
 
diff -uprN linux-2.6.16/drivers/md/dm.c linux-2.6.16.ovz/drivers/md/dm.c
--- linux-2.6.16/drivers/md/dm.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/md/dm.c	2006-07-05 08:34:55.000000000 -0400
@@ -533,30 +533,35 @@ static void __clone_and_map(struct clone
 
 	} else {
 		/*
-		 * Create two copy bios to deal with io that has
-		 * been split across a target.
+		 * Handle a bvec that must be split between two or more targets.
 		 */
 		struct bio_vec *bv = bio->bi_io_vec + ci->idx;
+		sector_t remaining = to_sector(bv->bv_len);
+		unsigned int offset = 0;
 
-		clone = split_bvec(bio, ci->sector, ci->idx,
-				   bv->bv_offset, max);
-		__map_bio(ti, clone, tio);
-
-		ci->sector += max;
-		ci->sector_count -= max;
-		ti = dm_table_find_target(ci->map, ci->sector);
-
-		len = to_sector(bv->bv_len) - max;
-		clone = split_bvec(bio, ci->sector, ci->idx,
-				   bv->bv_offset + to_bytes(max), len);
-		tio = alloc_tio(ci->md);
-		tio->io = ci->io;
-		tio->ti = ti;
-		memset(&tio->info, 0, sizeof(tio->info));
-		__map_bio(ti, clone, tio);
+		do {
+			if (offset) {
+				ti = dm_table_find_target(ci->map, ci->sector);
+				max = max_io_len(ci->md, ci->sector, ti);
+
+				tio = alloc_tio(ci->md);
+				tio->io = ci->io;
+				tio->ti = ti;
+				memset(&tio->info, 0, sizeof(tio->info));
+			}
+
+			len = min(remaining, max);
+
+			clone = split_bvec(bio, ci->sector, ci->idx,
+					   bv->bv_offset + offset, len);
+
+			__map_bio(ti, clone, tio);
+
+			ci->sector += len;
+			ci->sector_count -= len;
+			offset += to_bytes(len);
+		} while (remaining -= len);
 
-		ci->sector += len;
-		ci->sector_count -= len;
 		ci->idx++;
 	}
 }
@@ -1093,6 +1098,7 @@ int dm_suspend(struct mapped_device *md,
 {
 	struct dm_table *map = NULL;
 	DECLARE_WAITQUEUE(wait, current);
+	struct bio *def;
 	int r = -EINVAL;
 
 	down(&md->suspend_lock);
@@ -1152,9 +1158,11 @@ int dm_suspend(struct mapped_device *md,
 	/* were we interrupted ? */
 	r = -EINTR;
 	if (atomic_read(&md->pending)) {
+		clear_bit(DMF_BLOCK_IO, &md->flags);
+		def = bio_list_get(&md->deferred);
+		__flush_deferred_io(md, def);
 		up_write(&md->io_lock);
 		unlock_fs(md);
-		clear_bit(DMF_BLOCK_IO, &md->flags);
 		goto out;
 	}
 	up_write(&md->io_lock);
diff -uprN linux-2.6.16/drivers/md/kcopyd.c linux-2.6.16.ovz/drivers/md/kcopyd.c
--- linux-2.6.16/drivers/md/kcopyd.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/md/kcopyd.c	2006-07-05 08:34:55.000000000 -0400
@@ -44,6 +44,9 @@ struct kcopyd_client {
 	struct page_list *pages;
 	unsigned int nr_pages;
 	unsigned int nr_free_pages;
+
+	wait_queue_head_t destroyq;
+	atomic_t nr_jobs;
 };
 
 static struct page_list *alloc_pl(void)
@@ -293,10 +296,15 @@ static int run_complete_job(struct kcopy
 	int read_err = job->read_err;
 	unsigned int write_err = job->write_err;
 	kcopyd_notify_fn fn = job->fn;
+	struct kcopyd_client *kc = job->kc;
 
-	kcopyd_put_pages(job->kc, job->pages);
+	kcopyd_put_pages(kc, job->pages);
 	mempool_free(job, _job_pool);
 	fn(read_err, write_err, context);
+
+	if (atomic_dec_and_test(&kc->nr_jobs))
+		wake_up(&kc->destroyq);
+
 	return 0;
 }
 
@@ -431,6 +439,7 @@ static void do_work(void *ignored)
  */
 static void dispatch_job(struct kcopyd_job *job)
 {
+	atomic_inc(&job->kc->nr_jobs);
 	push(&_pages_jobs, job);
 	wake();
 }
@@ -670,6 +679,9 @@ int kcopyd_client_create(unsigned int nr
 		return r;
 	}
 
+	init_waitqueue_head(&kc->destroyq);
+	atomic_set(&kc->nr_jobs, 0);
+
 	client_add(kc);
 	*result = kc;
 	return 0;
@@ -677,6 +689,9 @@ int kcopyd_client_create(unsigned int nr
 
 void kcopyd_client_destroy(struct kcopyd_client *kc)
 {
+	/* Wait for completion of all jobs submitted by this client. */
+	wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
+
 	dm_io_put(kc->nr_pages);
 	client_free_pages(kc);
 	client_del(kc);
diff -uprN linux-2.6.16/drivers/md/raid10.c linux-2.6.16.ovz/drivers/md/raid10.c
--- linux-2.6.16/drivers/md/raid10.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/md/raid10.c	2006-07-05 08:34:55.000000000 -0400
@@ -1436,9 +1436,9 @@ static void raid10d(mddev_t *mddev)
 						sl--;
 						d = r10_bio->devs[sl].devnum;
 						rdev = conf->mirrors[d].rdev;
-						atomic_add(s, &rdev->corrected_errors);
 						if (rdev &&
 						    test_bit(In_sync, &rdev->flags)) {
+							atomic_add(s, &rdev->corrected_errors);
 							if (sync_page_io(rdev->bdev,
 									 r10_bio->devs[sl].addr +
 									 sect + rdev->data_offset,
diff -uprN linux-2.6.16/drivers/media/dvb/dvb-usb/cxusb.c linux-2.6.16.ovz/drivers/media/dvb/dvb-usb/cxusb.c
--- linux-2.6.16/drivers/media/dvb/dvb-usb/cxusb.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/media/dvb/dvb-usb/cxusb.c	2006-07-05 08:34:55.000000000 -0400
@@ -149,6 +149,15 @@ static int cxusb_power_ctrl(struct dvb_u
 		return cxusb_ctrl_msg(d, CMD_POWER_OFF, &b, 1, NULL, 0);
 }
 
+static int cxusb_bluebird_power_ctrl(struct dvb_usb_device *d, int onoff)
+{
+	u8 b = 0;
+	if (onoff)
+		return cxusb_ctrl_msg(d, CMD_POWER_ON, &b, 1, NULL, 0);
+	else
+		return 0;
+}
+
 static int cxusb_streaming_ctrl(struct dvb_usb_device *d, int onoff)
 {
 	u8 buf[2] = { 0x03, 0x00 };
@@ -505,7 +514,7 @@ static struct dvb_usb_properties cxusb_b
 	.size_of_priv     = sizeof(struct cxusb_state),
 
 	.streaming_ctrl   = cxusb_streaming_ctrl,
-	.power_ctrl       = cxusb_power_ctrl,
+	.power_ctrl       = cxusb_bluebird_power_ctrl,
 	.frontend_attach  = cxusb_lgdt3303_frontend_attach,
 	.tuner_attach     = cxusb_lgh064f_tuner_attach,
 
@@ -545,7 +554,7 @@ static struct dvb_usb_properties cxusb_b
 	.size_of_priv     = sizeof(struct cxusb_state),
 
 	.streaming_ctrl   = cxusb_streaming_ctrl,
-	.power_ctrl       = cxusb_power_ctrl,
+	.power_ctrl       = cxusb_bluebird_power_ctrl,
 	.frontend_attach  = cxusb_dee1601_frontend_attach,
 	.tuner_attach     = cxusb_dee1601_tuner_attach,
 
@@ -594,7 +603,7 @@ static struct dvb_usb_properties cxusb_b
 	.size_of_priv     = sizeof(struct cxusb_state),
 
 	.streaming_ctrl   = cxusb_streaming_ctrl,
-	.power_ctrl       = cxusb_power_ctrl,
+	.power_ctrl       = cxusb_bluebird_power_ctrl,
 	.frontend_attach  = cxusb_mt352_frontend_attach,
 	.tuner_attach     = cxusb_lgz201_tuner_attach,
 
@@ -634,7 +643,7 @@ static struct dvb_usb_properties cxusb_b
 	.size_of_priv     = sizeof(struct cxusb_state),
 
 	.streaming_ctrl   = cxusb_streaming_ctrl,
-	.power_ctrl       = cxusb_power_ctrl,
+	.power_ctrl       = cxusb_bluebird_power_ctrl,
 	.frontend_attach  = cxusb_mt352_frontend_attach,
 	.tuner_attach     = cxusb_dtt7579_tuner_attach,
 
diff -uprN linux-2.6.16/drivers/media/video/Kconfig linux-2.6.16.ovz/drivers/media/video/Kconfig
--- linux-2.6.16/drivers/media/video/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/media/video/Kconfig	2006-07-05 08:34:55.000000000 -0400
@@ -349,6 +349,7 @@ config VIDEO_AUDIO_DECODER
 config VIDEO_DECODER
 	tristate "Add support for additional video chipsets"
 	depends on VIDEO_DEV && I2C && EXPERIMENTAL
+	select FW_LOADER
 	---help---
 	  Say Y here to compile drivers for SAA7115, SAA7127 and CX25840
 	  video decoders.
diff -uprN linux-2.6.16/drivers/media/video/saa7127.c linux-2.6.16.ovz/drivers/media/video/saa7127.c
--- linux-2.6.16/drivers/media/video/saa7127.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/media/video/saa7127.c	2006-07-05 08:34:55.000000000 -0400
@@ -141,6 +141,7 @@ struct i2c_reg_value {
 static const struct i2c_reg_value saa7129_init_config_extra[] = {
 	{ SAA7127_REG_OUTPUT_PORT_CONTROL, 		0x38 },
 	{ SAA7127_REG_VTRIG, 				0xfa },
+	{ 0, 0 }
 };
 
 static const struct i2c_reg_value saa7127_init_config_common[] = {
diff -uprN linux-2.6.16/drivers/media/video/tuner-types.c linux-2.6.16.ovz/drivers/media/video/tuner-types.c
--- linux-2.6.16/drivers/media/video/tuner-types.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/media/video/tuner-types.c	2006-07-05 08:34:55.000000000 -0400
@@ -1087,8 +1087,8 @@ static struct tuner_params tuner_tnf_533
 /* ------------ TUNER_SAMSUNG_TCPN_2121P30A - Samsung NTSC ------------ */
 
 static struct tuner_range tuner_samsung_tcpn_2121p30a_ntsc_ranges[] = {
-	{ 16 * 175.75 /*MHz*/, 0x01, },
-	{ 16 * 410.25 /*MHz*/, 0x02, },
+	{ 16 * 130.00 /*MHz*/, 0x01, },
+	{ 16 * 364.50 /*MHz*/, 0x02, },
 	{ 16 * 999.99        , 0x08, },
 };
 
diff -uprN linux-2.6.16/drivers/message/i2o/exec-osm.c linux-2.6.16.ovz/drivers/message/i2o/exec-osm.c
--- linux-2.6.16/drivers/message/i2o/exec-osm.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/message/i2o/exec-osm.c	2006-07-05 08:34:55.000000000 -0400
@@ -55,6 +55,7 @@ struct i2o_exec_wait {
 	u32 m;			/* message id */
 	struct i2o_message *msg;	/* pointer to the reply message */
 	struct list_head list;	/* node in global wait list */
+	spinlock_t lock;	/* lock before modifying */
 };
 
 /* Exec OSM class handling definition */
@@ -80,6 +81,7 @@ static struct i2o_exec_wait *i2o_exec_wa
 		return NULL;
 
 	INIT_LIST_HEAD(&wait->list);
+	spin_lock_init(&wait->lock);
 
 	return wait;
 };
@@ -118,6 +120,7 @@ int i2o_msg_post_wait_mem(struct i2o_con
 	DECLARE_WAIT_QUEUE_HEAD(wq);
 	struct i2o_exec_wait *wait;
 	static u32 tcntxt = 0x80000000;
+	long flags;
 	int rc = 0;
 
 	wait = i2o_exec_wait_alloc();
@@ -139,33 +142,28 @@ int i2o_msg_post_wait_mem(struct i2o_con
 	wait->tcntxt = tcntxt++;
 	msg->u.s.tcntxt = cpu_to_le32(wait->tcntxt);
 
+	wait->wq = &wq;
+	/*
+	 * we add elements to the head, because if a entry in the list will
+	 * never be removed, we have to iterate over it every time
+	 */
+	list_add(&wait->list, &i2o_exec_wait_list);
+
 	/*
 	 * Post the message to the controller. At some point later it will
 	 * return. If we time out before it returns then complete will be zero.
 	 */
 	i2o_msg_post(c, msg);
 
-	if (!wait->complete) {
-		wait->wq = &wq;
-		/*
-		 * we add elements add the head, because if a entry in the list
-		 * will never be removed, we have to iterate over it every time
-		 */
-		list_add(&wait->list, &i2o_exec_wait_list);
-
-		wait_event_interruptible_timeout(wq, wait->complete,
-						 timeout * HZ);
+	wait_event_interruptible_timeout(wq, wait->complete, timeout * HZ);
 
-		wait->wq = NULL;
-	}
+	spin_lock_irqsave(&wait->lock, flags);
 
-	barrier();
+	wait->wq = NULL;
 
-	if (wait->complete) {
+	if (wait->complete)
 		rc = le32_to_cpu(wait->msg->body[0]) >> 24;
-		i2o_flush_reply(c, wait->m);
-		i2o_exec_wait_free(wait);
-	} else {
+	else {
 		/*
 		 * We cannot remove it now. This is important. When it does
 		 * terminate (which it must do if the controller has not
@@ -179,6 +177,13 @@ int i2o_msg_post_wait_mem(struct i2o_con
 		rc = -ETIMEDOUT;
 	}
 
+	spin_unlock_irqrestore(&wait->lock, flags);
+
+	if (rc != -ETIMEDOUT) {
+		i2o_flush_reply(c, wait->m);
+		i2o_exec_wait_free(wait);
+	}
+
 	return rc;
 };
 
@@ -206,7 +211,6 @@ static int i2o_msg_post_wait_complete(st
 {
 	struct i2o_exec_wait *wait, *tmp;
 	unsigned long flags;
-	static spinlock_t lock = SPIN_LOCK_UNLOCKED;
 	int rc = 1;
 
 	/*
@@ -216,23 +220,24 @@ static int i2o_msg_post_wait_complete(st
 	 * already expired. Not much we can do about that except log it for
 	 * debug purposes, increase timeout, and recompile.
 	 */
-	spin_lock_irqsave(&lock, flags);
 	list_for_each_entry_safe(wait, tmp, &i2o_exec_wait_list, list) {
 		if (wait->tcntxt == context) {
-			list_del(&wait->list);
+			spin_lock_irqsave(&wait->lock, flags);
 
-			spin_unlock_irqrestore(&lock, flags);
+			list_del(&wait->list);
 
 			wait->m = m;
 			wait->msg = msg;
 			wait->complete = 1;
 
-			barrier();
-
-			if (wait->wq) {
-				wake_up_interruptible(wait->wq);
+			if (wait->wq)
 				rc = 0;
-			} else {
+			else
+				rc = -1;
+
+			spin_unlock_irqrestore(&wait->lock, flags);
+
+			if (rc) {
 				struct device *dev;
 
 				dev = &c->pdev->dev;
@@ -241,15 +246,13 @@ static int i2o_msg_post_wait_complete(st
 					 c->name);
 				i2o_dma_free(dev, &wait->dma);
 				i2o_exec_wait_free(wait);
-				rc = -1;
-			}
+			} else
+				wake_up_interruptible(wait->wq);
 
 			return rc;
 		}
 	}
 
-	spin_unlock_irqrestore(&lock, flags);
-
 	osm_warn("%s: Bogus reply in POST WAIT (tr-context: %08x)!\n", c->name,
 		 context);
 
@@ -315,14 +318,9 @@ static DEVICE_ATTR(product_id, S_IRUGO, 
 static int i2o_exec_probe(struct device *dev)
 {
 	struct i2o_device *i2o_dev = to_i2o_device(dev);
-	struct i2o_controller *c = i2o_dev->iop;
 
 	i2o_event_register(i2o_dev, &i2o_exec_driver, 0, 0xffffffff);
 
-	c->exec = i2o_dev;
-
-	i2o_exec_lct_notify(c, c->lct->change_ind + 1);
-
 	device_create_file(dev, &dev_attr_vendor_id);
 	device_create_file(dev, &dev_attr_product_id);
 
@@ -510,6 +508,8 @@ static int i2o_exec_lct_notify(struct i2
 	struct device *dev;
 	struct i2o_message *msg;
 
+	down(&c->lct_lock);
+
 	dev = &c->pdev->dev;
 
 	if (i2o_dma_realloc
@@ -532,6 +532,8 @@ static int i2o_exec_lct_notify(struct i2
 
 	i2o_msg_post(c, msg);
 
+	up(&c->lct_lock);
+
 	return 0;
 };
 
diff -uprN linux-2.6.16/drivers/message/i2o/iop.c linux-2.6.16.ovz/drivers/message/i2o/iop.c
--- linux-2.6.16/drivers/message/i2o/iop.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/message/i2o/iop.c	2006-07-05 08:34:55.000000000 -0400
@@ -804,8 +804,6 @@ void i2o_iop_remove(struct i2o_controlle
 
 	/* Ask the IOP to switch to RESET state */
 	i2o_iop_reset(c);
-
-	put_device(&c->device);
 }
 
 /**
@@ -1059,7 +1057,7 @@ struct i2o_controller *i2o_iop_alloc(voi
 
 	snprintf(poolname, sizeof(poolname), "i2o_%s_msg_inpool", c->name);
 	if (i2o_pool_alloc
-	    (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4,
+	    (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4 + sizeof(u32),
 	     I2O_MSG_INPOOL_MIN)) {
 		kfree(c);
 		return ERR_PTR(-ENOMEM);
diff -uprN linux-2.6.16/drivers/mtd/nand/Kconfig linux-2.6.16.ovz/drivers/mtd/nand/Kconfig
--- linux-2.6.16/drivers/mtd/nand/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/mtd/nand/Kconfig	2006-07-05 08:34:55.000000000 -0400
@@ -178,17 +178,16 @@ config MTD_NAND_DISKONCHIP_BBTWRITE
 	  Even if you leave this disabled, you can enable BBT writes at module
 	  load time (assuming you build diskonchip as a module) with the module
 	  parameter "inftl_bbt_write=1".
-	  
- config MTD_NAND_SHARPSL
- 	bool "Support for NAND Flash on Sharp SL Series (C7xx + others)"
- 	depends on MTD_NAND && ARCH_PXA
- 
- config MTD_NAND_NANDSIM
- 	bool "Support for NAND Flash Simulator"
- 	depends on MTD_NAND && MTD_PARTITIONS
 
+config MTD_NAND_SHARPSL
+	tristate "Support for NAND Flash on Sharp SL Series (C7xx + others)"
+	depends on MTD_NAND && ARCH_PXA
+
+config MTD_NAND_NANDSIM
+	tristate "Support for NAND Flash Simulator"
+	depends on MTD_NAND && MTD_PARTITIONS
 	help
 	  The simulator may simulate verious NAND flash chips for the
 	  MTD nand layer.
- 
+
 endmenu
diff -uprN linux-2.6.16/drivers/net/Makefile linux-2.6.16.ovz/drivers/net/Makefile
--- linux-2.6.16/drivers/net/Makefile	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/Makefile	2006-07-05 08:34:55.000000000 -0400
@@ -18,6 +18,12 @@ gianfar_driver-objs := gianfar.o \
 		gianfar_mii.o \
 		gianfar_sysfs.o
 
+obj-$(CONFIG_VE_NETDEV) += vznetdev.o
+vznetdev-objs := open_vznet.o venet_core.o
+
+obj-$(CONFIG_VE_ETHDEV) += vzethdev.o
+vzethdev-objs := veth.o
+
 #
 # link order important here
 #
diff -uprN linux-2.6.16/drivers/net/e1000/e1000_main.c linux-2.6.16.ovz/drivers/net/e1000/e1000_main.c
--- linux-2.6.16/drivers/net/e1000/e1000_main.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/e1000/e1000_main.c	2006-07-05 08:34:55.000000000 -0400
@@ -3851,6 +3851,7 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
 			skb_shinfo(skb)->nr_frags++;
 			skb->len += length;
 			skb->data_len += length;
+			skb->truesize += length;
 		}
 
 		e1000_rx_checksum(adapter, staterr,
diff -uprN linux-2.6.16/drivers/net/irda/irda-usb.c linux-2.6.16.ovz/drivers/net/irda/irda-usb.c
--- linux-2.6.16/drivers/net/irda/irda-usb.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/irda/irda-usb.c	2006-07-05 08:34:55.000000000 -0400
@@ -740,7 +740,7 @@ static void irda_usb_receive(struct urb 
 	struct sk_buff *newskb;
 	struct sk_buff *dataskb;
 	struct urb *next_urb;
-	int		docopy;
+	unsigned int len, docopy;
 
 	IRDA_DEBUG(2, "%s(), len=%d\n", __FUNCTION__, urb->actual_length);
 	
@@ -851,10 +851,11 @@ static void irda_usb_receive(struct urb 
 	dataskb->dev = self->netdev;
 	dataskb->mac.raw  = dataskb->data;
 	dataskb->protocol = htons(ETH_P_IRDA);
+	len = dataskb->len;
 	netif_rx(dataskb);
 
 	/* Keep stats up to date */
-	self->stats.rx_bytes += dataskb->len;
+	self->stats.rx_bytes += len;
 	self->stats.rx_packets++;
 	self->netdev->last_rx = jiffies;
 
diff -uprN linux-2.6.16/drivers/net/loopback.c linux-2.6.16.ovz/drivers/net/loopback.c
--- linux-2.6.16/drivers/net/loopback.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/loopback.c	2006-07-05 08:34:55.000000000 -0400
@@ -130,6 +130,11 @@ static int loopback_xmit(struct sk_buff 
 {
 	struct net_device_stats *lb_stats;
 
+	if (unlikely(get_exec_env()->disable_net)) {
+		kfree_skb(skb);
+		return 0;
+	}
+
 	skb_orphan(skb);
 
 	skb->protocol = eth_type_trans(skb,dev);
@@ -198,6 +203,34 @@ static struct ethtool_ops loopback_ethto
 	.set_tso		= ethtool_op_set_tso,
 };
 
+static void loopback_destructor(struct net_device *dev)
+{
+	kfree(dev->priv);
+	dev->priv = NULL;
+}
+
+struct net_device templ_loopback_dev = {
+	.name	 		= "lo",
+	.mtu			= (16 * 1024) + 20 + 20 + 12,
+	.hard_start_xmit	= loopback_xmit,
+	.hard_header		= eth_header,
+	.hard_header_cache	= eth_header_cache,
+	.header_cache_update	= eth_header_cache_update,
+	.hard_header_len	= ETH_HLEN,	/* 14	*/
+	.addr_len		= ETH_ALEN,	/* 6	*/
+	.tx_queue_len		= 0,
+	.type			= ARPHRD_LOOPBACK,	/* 0x0001*/
+	.rebuild_header		= eth_rebuild_header,
+	.flags			= IFF_LOOPBACK,
+	.features 		= NETIF_F_SG|NETIF_F_FRAGLIST
+				  |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
+				  |NETIF_F_LLTX|NETIF_F_VIRTUAL,
+};
+
+#ifdef loopback_dev
+#undef loopback_dev
+#endif
+
 struct net_device loopback_dev = {
 	.name	 		= "lo",
 	.mtu			= (16 * 1024) + 20 + 20 + 12,
@@ -231,9 +264,13 @@ int __init loopback_init(void)
 		memset(stats, 0, sizeof(struct net_device_stats));
 		loopback_dev.priv = stats;
 		loopback_dev.get_stats = &get_stats;
+		loopback_dev.destructor = &loopback_destructor;
 	}
-	
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	get_ve0()->_loopback_dev = &loopback_dev;
+#endif
 	return register_netdev(&loopback_dev);
 };
 
 EXPORT_SYMBOL(loopback_dev);
+EXPORT_SYMBOL(templ_loopback_dev);
diff -uprN linux-2.6.16/drivers/net/open_vznet.c linux-2.6.16.ovz/drivers/net/open_vznet.c
--- linux-2.6.16/drivers/net/open_vznet.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/open_vznet.c	2006-07-05 08:34:55.000000000 -0400
@@ -0,0 +1,227 @@
+/*
+ *  open_vznet.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Virtual Networking device used to change VE ownership on packets
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+
+#include <linux/inet.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <linux/venet.h>
+
+void veip_stop(struct ve_struct *ve)
+{
+	struct list_head *p, *tmp;
+
+	write_lock_irq(&veip_hash_lock);
+	if (ve->veip == NULL)
+		goto unlock;
+	list_for_each_safe(p, tmp, &ve->veip->ip_lh) {
+		struct ip_entry_struct *ptr;
+		ptr = list_entry(p, struct ip_entry_struct, ve_list);
+		ptr->active_env = NULL;
+		list_del(&ptr->ve_list);
+		list_del(&ptr->ip_hash);
+		kfree(ptr);
+	}
+	veip_put(ve->veip);
+	ve->veip = NULL;
+unlock:
+	write_unlock_irq(&veip_hash_lock);
+}
+
+int veip_start(struct ve_struct *ve)
+{
+	int err;
+
+	err = 0;
+	write_lock_irq(&veip_hash_lock);
+	ve->veip = veip_findcreate(ve->veid);
+	if (ve->veip == NULL)
+		err = -ENOMEM;
+	write_unlock_irq(&veip_hash_lock);
+	return err;
+}
+
+int veip_entry_add(struct ve_struct *ve, struct sockaddr *addr)
+{
+	struct ip_entry_struct *entry, *found;
+	int err;
+
+	entry = kmalloc(sizeof(struct ip_entry_struct), GFP_KERNEL);
+	if (entry == NULL)
+		return -ENOMEM;
+
+	memset(entry, 0, sizeof(struct ip_entry_struct));
+	entry->family = addr->sa_family;
+	if (addr->sa_family == AF_INET) {
+		entry->key[3] = ((struct sockaddr_in*)addr)->sin_addr.s_addr;
+	} else if (addr->sa_family == AF_INET6) {
+		memcpy(entry->key, &((struct sockaddr_in6*)addr)->sin6_addr, 16);
+	} else {
+		kfree(entry);
+		return -EAFNOSUPPORT;
+	}
+
+	write_lock_irq(&veip_hash_lock);
+	err = -EADDRINUSE;
+	found = venet_entry_lookup(entry->key, entry->family);
+	if (found != NULL)
+		goto out_unlock;
+	else {
+		ip_entry_hash(entry, ve->veip);
+		found = entry;
+		entry = NULL;
+	}
+	err = 0;
+	found->active_env = ve;
+out_unlock:
+	write_unlock_irq(&veip_hash_lock);
+	if (entry != NULL)
+		kfree(entry);
+	return err;
+}
+
+int veip_entry_del(envid_t veid, struct sockaddr *addr)
+{
+	struct ip_entry_struct *found;
+	u32 key[4];
+	int err;
+
+	if (addr->sa_family == AF_INET) {
+		memset(key, 0, sizeof(key));
+		key[3] = ((struct sockaddr_in*)addr)->sin_addr.s_addr;
+	} else if (addr->sa_family == AF_INET6) {
+		memcpy(key, &((struct sockaddr_in6*)addr)->sin6_addr, 16);
+	} else {
+		return -EAFNOSUPPORT;
+	}
+
+	err = -EADDRNOTAVAIL;
+	write_lock_irq(&veip_hash_lock);
+	found = venet_entry_lookup(key, addr->sa_family);
+	if (found == NULL)
+		goto out;
+	if (found->active_env->veid != veid)
+		goto out;
+
+	err = 0;
+	found->active_env = NULL;
+
+	list_del(&found->ip_hash);
+	list_del(&found->ve_list);
+	kfree(found);
+out:
+	write_unlock_irq(&veip_hash_lock);
+	return err;
+}
+
+static struct ve_struct *venet_find_ve(struct sk_buff *skb, int dir)
+{
+	struct ip_entry_struct *entry;
+
+	if (skb->protocol == __constant_htons(ETH_P_IP)) {
+		entry = ip_entry_lookup(dir ? skb->nh.iph->daddr :
+					skb->nh.iph->saddr);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	} else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
+		entry = venet_entry_lookup(dir ? skb->nh.ipv6h->daddr.s6_addr32 :
+					   skb->nh.ipv6h->saddr.s6_addr32, AF_INET6);
+#endif
+	} else {
+		return NULL;
+	}
+	if (entry == NULL)
+		return NULL;
+
+	return entry->active_env;
+}
+
+int venet_change_skb_owner(struct sk_buff *skb)
+{
+	struct ve_struct *ve, *ve_old;
+
+	ve_old = skb->owner_env;
+
+	read_lock(&veip_hash_lock);
+	if (!ve_is_super(ve_old)) {
+		/* from VE to host */
+		ve = venet_find_ve(skb, 0);
+		if (ve == NULL)
+			goto out_drop;
+		if (!ve_accessible_strict(ve, ve_old))
+			goto out_source;
+		skb->owner_env = get_ve0();
+	} else {
+		/* from host to VE */
+		ve = venet_find_ve(skb, 1);
+		if (ve == NULL)
+			goto out_drop;
+		skb->owner_env = ve;
+	}
+	read_unlock(&veip_hash_lock);
+
+	return 0;
+
+out_drop:
+	read_unlock(&veip_hash_lock);
+	return -ESRCH;
+
+out_source:
+	read_unlock(&veip_hash_lock);
+	if (net_ratelimit() && skb->protocol == __constant_htons(ETH_P_IP)) {
+		printk(KERN_WARNING "Dropped packet, source wrong "
+		       "veid=%u src-IP=%u.%u.%u.%u "
+		       "dst-IP=%u.%u.%u.%u\n",
+		       skb->owner_env->veid,
+		       NIPQUAD(skb->nh.iph->saddr),
+		       NIPQUAD(skb->nh.iph->daddr));
+	}
+	return -EACCES;
+}
+
+#ifdef CONFIG_PROC_FS
+int veip_seq_show(struct seq_file *m, void *v)
+{
+	struct list_head *p;
+	struct ip_entry_struct *entry;
+	char s[40];
+
+	p = (struct list_head *)v;
+	if (p == ip_entry_hash_table) {
+		seq_puts(m, "Version: 2.5\n");
+		return 0;
+	}
+	entry = list_entry(p, struct ip_entry_struct, ip_hash);
+	if (entry->family == AF_INET)
+		sprintf(s, "%u.%u.%u.%u", NIPQUAD(entry->key[3]));
+	else
+		sprintf(s, "%x:%x:%x:%x:%x:%x:%x:%x",
+			ntohl(entry->key[0])>>16,
+			ntohl(entry->key[0])&0xFFFF,
+			ntohl(entry->key[1])>>16,
+			ntohl(entry->key[1])&0xFFFF,
+			ntohl(entry->key[2])>>16,
+			ntohl(entry->key[2])&0xFFFF,
+			ntohl(entry->key[3])>>16,
+			ntohl(entry->key[3])&0xFFFF);
+	seq_printf(m, "%39s %10u\n", s, 0);
+	return 0;
+}
+#endif
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Virtual Network Device");
+MODULE_LICENSE("GPL v2");
diff -uprN linux-2.6.16/drivers/net/sky2.c linux-2.6.16.ovz/drivers/net/sky2.c
--- linux-2.6.16/drivers/net/sky2.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/sky2.c	2006-07-05 08:34:55.000000000 -0400
@@ -579,8 +579,8 @@ static void sky2_mac_init(struct sky2_hw
 	reg = gma_read16(hw, port, GM_PHY_ADDR);
 	gma_write16(hw, port, GM_PHY_ADDR, reg | GM_PAR_MIB_CLR);
 
-	for (i = 0; i < GM_MIB_CNT_SIZE; i++)
-		gma_read16(hw, port, GM_MIB_CNT_BASE + 8 * i);
+	for (i = GM_MIB_CNT_BASE; i <= GM_MIB_CNT_END; i += 4)
+		gma_read16(hw, port, i);
 	gma_write16(hw, port, GM_PHY_ADDR, reg);
 
 	/* transmit control */
diff -uprN linux-2.6.16/drivers/net/sky2.h linux-2.6.16.ovz/drivers/net/sky2.h
--- linux-2.6.16/drivers/net/sky2.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/sky2.h	2006-07-05 08:34:55.000000000 -0400
@@ -1380,6 +1380,7 @@ enum {
 /* MIB Counters */
 #define GM_MIB_CNT_BASE	0x0100		/* Base Address of MIB Counters */
 #define GM_MIB_CNT_SIZE	44		/* Number of MIB Counters */
+#define GM_MIB_CNT_END	0x025C		/* Last MIB counter */
 
 /*
  * MIB Counters base address definitions (low word) -
diff -uprN linux-2.6.16/drivers/net/tg3.c linux-2.6.16.ovz/drivers/net/tg3.c
--- linux-2.6.16/drivers/net/tg3.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/tg3.c	2006-07-05 08:34:55.000000000 -0400
@@ -7368,21 +7368,23 @@ static int tg3_get_settings(struct net_d
 		cmd->supported |= (SUPPORTED_1000baseT_Half |
 				   SUPPORTED_1000baseT_Full);
 
-	if (!(tp->tg3_flags2 & TG3_FLG2_ANY_SERDES))
+	if (!(tp->tg3_flags2 & TG3_FLG2_ANY_SERDES)) {
 		cmd->supported |= (SUPPORTED_100baseT_Half |
 				  SUPPORTED_100baseT_Full |
 				  SUPPORTED_10baseT_Half |
 				  SUPPORTED_10baseT_Full |
 				  SUPPORTED_MII);
-	else
+		cmd->port = PORT_TP;
+	} else {
 		cmd->supported |= SUPPORTED_FIBRE;
+		cmd->port = PORT_FIBRE;
+	}
   
 	cmd->advertising = tp->link_config.advertising;
 	if (netif_running(dev)) {
 		cmd->speed = tp->link_config.active_speed;
 		cmd->duplex = tp->link_config.active_duplex;
 	}
-	cmd->port = 0;
 	cmd->phy_address = PHY_ADDR;
 	cmd->transceiver = 0;
 	cmd->autoneg = tp->link_config.autoneg;
diff -uprN linux-2.6.16/drivers/net/tun.c linux-2.6.16.ovz/drivers/net/tun.c
--- linux-2.6.16/drivers/net/tun.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/tun.c	2006-07-05 08:34:56.000000000 -0400
@@ -62,6 +62,7 @@
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <ub/beancounter.h>
 
 #ifdef TUN_DEBUG
 static int debug;
@@ -90,6 +91,7 @@ static int tun_net_close(struct net_devi
 static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
+	struct user_beancounter *ub;
 
 	DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
 
@@ -114,6 +116,18 @@ static int tun_net_xmit(struct sk_buff *
 		}
 	}
 
+	ub = netdev_bc(dev)->exec_ub;
+	if (ub && (skb_bc(skb)->charged == 0)) {
+		unsigned long charge;
+		charge = skb_charge_fullsize(skb);
+		if (charge_beancounter(ub, UB_OTHERSOCKBUF, charge, 1))
+			goto drop;
+		get_beancounter(ub);
+		skb_bc(skb)->ub = ub;
+		skb_bc(skb)->charged = charge;
+		skb_bc(skb)->resource = UB_OTHERSOCKBUF;
+	}
+
 	/* Queue packet */
 	skb_queue_tail(&tun->readq, skb);
 	dev->trans_start = jiffies;
@@ -410,12 +424,14 @@ static ssize_t tun_chr_readv(struct file
 					tun->dev->name, addr[0], addr[1], addr[2],
 					addr[3], addr[4], addr[5]);
 			ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
+			/* skb will be uncharged in kfree_skb() */
 			kfree_skb(skb);
 			break;
 		} else {
 			DBG(KERN_DEBUG "%s: tun_chr_readv: rejected: %x:%x:%x:%x:%x:%x\n",
 					tun->dev->name, addr[0], addr[1], addr[2],
 					addr[3], addr[4], addr[5]);
+			/* skb will be uncharged in kfree_skb() */
 			kfree_skb(skb);
 			continue;
 		}
@@ -451,6 +467,7 @@ static void tun_setup(struct net_device 
 	dev->get_stats = tun_net_stats;
 	dev->ethtool_ops = &tun_ethtool_ops;
 	dev->destructor = free_netdev;
+	dev->features |= NETIF_F_VIRTUAL;
 }
 
 static struct tun_struct *tun_get_by_name(const char *name)
@@ -459,8 +476,9 @@ static struct tun_struct *tun_get_by_nam
 
 	ASSERT_RTNL();
 	list_for_each_entry(tun, &tun_dev_list, list) {
-		if (!strncmp(tun->dev->name, name, IFNAMSIZ))
-		    return tun;
+		if (ve_accessible_strict(tun->dev->owner_env, get_exec_env()) &&
+		    !strncmp(tun->dev->name, name, IFNAMSIZ))
+			return tun;
 	}
 
 	return NULL;
@@ -479,7 +497,8 @@ static int tun_set_iff(struct file *file
 
 		/* Check permissions */
 		if (tun->owner != -1 &&
-		    current->euid != tun->owner && !capable(CAP_NET_ADMIN))
+		    current->euid != tun->owner && 
+		    !capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			return -EPERM;
 	} 
 	else if (__dev_get_by_name(ifr->ifr_name)) 
diff -uprN linux-2.6.16/drivers/net/venet_core.c linux-2.6.16.ovz/drivers/net/venet_core.c
--- linux-2.6.16/drivers/net/venet_core.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/venet_core.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,675 @@
+/*
+ *  venet_core.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Common part for Virtuozzo virtual network devices
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/addrconf.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/if_ether.h>	/* For the statistics structure. */
+#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
+#include <linux/venet.h>
+#include <linux/ve_proto.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_venet.h>
+
+struct list_head ip_entry_hash_table[VEIP_HASH_SZ];
+rwlock_t veip_hash_lock = RW_LOCK_UNLOCKED;
+LIST_HEAD(veip_lh);
+
+#define ip_entry_hash_function(ip)  (ntohl(ip) & (VEIP_HASH_SZ - 1))
+
+void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip)
+{
+	list_add(&entry->ip_hash,
+		 ip_entry_hash_table + ip_entry_hash_function(entry->key[3]));
+	list_add(&entry->ve_list, &veip->ip_lh);
+}
+
+void veip_put(struct veip_struct *veip)
+{
+	if (!list_empty(&veip->ip_lh))
+		return;
+	if (!list_empty(&veip->src_lh))
+		return;
+	if (!list_empty(&veip->dst_lh))
+		return;
+
+	list_del(&veip->list);
+	kfree(veip);
+}
+
+struct ip_entry_struct *ip_entry_lookup(u32 addr)
+{
+	struct ip_entry_struct *entry;
+	struct list_head *tmp;
+
+	list_for_each(tmp, ip_entry_hash_table + ip_entry_hash_function(addr)) {
+		entry = list_entry(tmp, struct ip_entry_struct, ip_hash);
+		if (entry->key[3] != addr || entry->family != AF_INET)
+			continue;
+		return entry;
+	}
+	return NULL;
+}
+
+struct ip_entry_struct *venet_entry_lookup(u32 *addr, int family)
+{
+	struct ip_entry_struct *entry;
+	struct list_head *tmp;
+
+	list_for_each(tmp, ip_entry_hash_table + ip_entry_hash_function(addr[3])) {
+		entry = list_entry(tmp, struct ip_entry_struct, ip_hash);
+		if (memcmp(entry->key, addr, 16) != 0
+		    || entry->family != family)
+			continue;
+		return entry;
+	}
+	return NULL;
+}
+
+struct veip_struct *veip_find(envid_t veid)
+{
+	struct veip_struct *ptr;
+	list_for_each_entry(ptr, &veip_lh, list) {
+		if (ptr->veid != veid)
+			continue;
+		return ptr;
+	}
+	return NULL;
+}
+
+struct veip_struct *veip_findcreate(envid_t veid)
+{
+	struct veip_struct *ptr;
+
+	ptr = veip_find(veid);
+	if (ptr != NULL)
+		return ptr;
+
+	ptr = kmalloc(sizeof(struct veip_struct), GFP_ATOMIC);
+	if (ptr == NULL)
+		return NULL;
+	memset(ptr, 0, sizeof(struct veip_struct));
+	INIT_LIST_HEAD(&ptr->ip_lh);
+	INIT_LIST_HEAD(&ptr->src_lh);
+	INIT_LIST_HEAD(&ptr->dst_lh);
+	list_add(&ptr->list, &veip_lh);
+	ptr->veid = veid;
+	return ptr;
+}
+
+/*
+ * Device functions
+ */
+
+static int venet_open(struct net_device *dev)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+	return 0;
+}
+
+static int venet_close(struct net_device *master)
+{
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+static void venet_destructor(struct net_device *dev)
+{
+	kfree(dev->priv);
+	dev->priv = NULL;
+}
+
+/*
+ * The higher levels take care of making this non-reentrant (it's
+ * called with bh's disabled).
+ */
+static int venet_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats = (struct net_device_stats *)dev->priv;
+	struct net_device *rcv = NULL;
+	int length;
+
+	if (unlikely(get_exec_env()->disable_net))
+		goto outf;
+
+	/*
+	 *	Optimise so buffers with skb->free=1 are not copied but
+	 *	instead are lobbed from tx queue to rx queue
+	 */
+	if (atomic_read(&skb->users) != 1) {
+	  	struct sk_buff *skb2 = skb;
+	  	skb = skb_clone(skb, GFP_ATOMIC);	/* Clone the buffer */
+	  	if (skb == NULL) {
+			kfree_skb(skb2);
+			goto out;
+		}
+	  	kfree_skb(skb2);
+	} else
+		skb_orphan(skb);
+
+	if (skb->protocol == __constant_htons(ETH_P_IP)) {
+		struct iphdr *iph;
+		iph = skb->nh.iph;
+		if (MULTICAST(iph->daddr))
+			goto outf;
+	} else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ip6h;
+		ip6h = skb->nh.ipv6h;
+		if (ipv6_addr_is_multicast(&ip6h->daddr))
+			goto outf;
+	} else {
+		goto outf;
+	}
+
+	if (venet_change_skb_owner(skb) < 0)
+		goto outf;
+
+	if (unlikely(VE_OWNER_SKB(skb)->disable_net))
+		goto outf;
+
+	rcv = VE_OWNER_SKB(skb)->_venet_dev;
+	if (!rcv)
+		/* VE going down */
+		goto outf;
+
+	dev_hold(rcv);
+
+	if (!(rcv->flags & IFF_UP)) {
+		/* Target VE does not want to receive packets */
+		dev_put(rcv);
+		goto outf;
+	}
+
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = rcv;
+
+	skb->mac.raw = skb->data;
+	memset(skb->data - dev->hard_header_len, 0, dev->hard_header_len);
+
+	dst_release(skb->dst);
+	skb->dst = NULL;
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = NULL;
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 0;
+#endif
+#endif
+	length = skb->len;
+
+	netif_rx(skb);
+
+	stats->tx_bytes += length;
+	stats->tx_packets++;
+	if (rcv) {
+		struct net_device_stats *rcv_stats =
+			(struct net_device_stats *)rcv->priv;
+		rcv_stats->rx_bytes += length;
+		rcv_stats->rx_packets++;
+		dev_put(rcv);
+	}
+
+	return 0;
+
+outf:
+	kfree_skb(skb);
+	++stats->tx_dropped;
+out:
+	return 0;
+}
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+	return (struct net_device_stats *)dev->priv;
+}
+
+/* Initialize the rest of the LOOPBACK device. */
+int venet_init_dev(struct net_device *dev)
+{
+	dev->hard_start_xmit	= venet_xmit;
+	dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+	if (dev->priv == NULL)
+		return -ENOMEM;
+	memset(dev->priv, 0, sizeof(struct net_device_stats));
+	dev->get_stats = get_stats;
+	dev->open = venet_open;
+	dev->stop = venet_close;
+	dev->destructor = venet_destructor;
+
+	/*
+	 *	Fill in the generic fields of the device structure.
+	 */
+	dev->type		= ARPHRD_VOID;
+	dev->hard_header_len 	= ETH_HLEN;
+	dev->mtu		= 1500; /* eth_mtu */
+	dev->tx_queue_len	= 0;
+
+	memset(dev->broadcast, 0xFF, ETH_ALEN);
+
+	/* New-style flags. */
+	dev->flags		= IFF_BROADCAST|IFF_NOARP|IFF_POINTOPOINT;
+	return 0;
+}
+
+static void venet_setup(struct net_device *dev)
+{
+	dev->init = venet_init_dev;
+	/*
+	 * No other features, as they are:
+	 *  - checksumming is required, and nobody else will done our job
+	 */
+	dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL;
+}
+
+#ifdef CONFIG_PROC_FS
+static int veinfo_seq_show(struct seq_file *m, void *v)
+{
+	struct ve_struct *ve = (struct ve_struct *)v;
+	struct list_head *tmp;
+
+	seq_printf(m, "%10u %5u %5u", ve->veid,
+                                ve->class_id, atomic_read(&ve->pcounter));
+	read_lock(&veip_hash_lock);
+	if (ve->veip == NULL)
+		goto unlock;
+	list_for_each(tmp, &ve->veip->ip_lh) {
+		char ip[40];
+		struct ip_entry_struct *entry;
+
+		entry = list_entry(tmp, struct ip_entry_struct, ve_list);
+		if (entry->active_env == NULL)
+			continue;
+
+		if (entry->family == AF_INET)
+			sprintf(ip, "%u.%u.%u.%u", NIPQUAD(entry->key[3]));
+		else
+			sprintf(ip, "%x:%x:%x:%x:%x:%x:%x:%x",
+				ntohl(entry->key[0])>>16,
+				ntohl(entry->key[0])&0xFFFF,
+				ntohl(entry->key[1])>>16,
+				ntohl(entry->key[1])&0xFFFF,
+				ntohl(entry->key[2])>>16,
+				ntohl(entry->key[2])&0xFFFF,
+				ntohl(entry->key[3])>>16,
+				ntohl(entry->key[3])&0xFFFF);
+		seq_printf(m, " %39s", ip);
+	}
+unlock:
+	read_unlock(&veip_hash_lock);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static void *ve_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct ve_struct *ve, *curve;
+	loff_t l;
+
+	curve = get_exec_env();
+	read_lock(&ve_list_guard);
+	if (!ve_is_super(curve)) {
+		if (*pos != 0)
+			return NULL;
+		return curve;
+	}
+	for (ve = ve_list_head, l = *pos;
+	     ve != NULL && l > 0;
+	     ve = ve->next, l--);
+	return ve;
+}
+
+static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct ve_struct *ve = (struct ve_struct *)v;
+
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
+	(*pos)++;
+	return ve->next;
+}
+
+static void ve_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_list_guard);
+}
+
+
+static struct seq_operations veinfo_seq_op = {
+        start:  ve_seq_start,
+        next:   ve_seq_next,
+        stop:   ve_seq_stop,
+        show:   veinfo_seq_show
+};
+
+static int veinfo_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &veinfo_seq_op);
+}
+
+static struct file_operations proc_veinfo_operations = {
+        open:           veinfo_open,
+        read:           seq_read,
+        llseek:         seq_lseek,
+        release:        seq_release
+};
+
+static void *veip_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t l;
+	struct list_head *p;
+	int i;
+
+	l = *pos;
+	write_lock_irq(&veip_hash_lock);
+	if (l == 0)
+		return ip_entry_hash_table;
+	for (i = 0; i < VEIP_HASH_SZ; i++) {
+		list_for_each(p, ip_entry_hash_table + i) {
+			if (--l == 0)
+				return p;
+		}
+	}
+	return NULL;
+}
+
+static void *veip_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *p;
+
+	p = (struct list_head *)v;
+	while (1) {
+		p = p->next;
+		if (p < ip_entry_hash_table ||
+		    p >= ip_entry_hash_table + VEIP_HASH_SZ) {
+			(*pos)++;
+			return p;
+		}
+		if (++p >= ip_entry_hash_table + VEIP_HASH_SZ)
+			return NULL;
+	}
+	return NULL;
+}
+
+static void veip_seq_stop(struct seq_file *m, void *v)
+{
+	write_unlock_irq(&veip_hash_lock);
+}
+
+static struct seq_operations veip_seq_op = {
+        start:  veip_seq_start,
+        next:   veip_seq_next,
+        stop:   veip_seq_stop,
+        show:   veip_seq_show
+};
+
+static int veip_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &veip_seq_op);
+}
+
+static struct file_operations proc_veip_operations = {
+        open:           veip_open,
+        read:           seq_read,
+        llseek:         seq_lseek,
+        release:        seq_release
+};
+#endif
+
+int real_ve_ip_map(envid_t veid, int op, struct sockaddr *uservaddr, int addrlen)
+{
+	int err;
+	union {
+		struct sockaddr		g;
+		struct sockaddr_in	a4;
+		struct sockaddr_in6	a6;
+	} addr;
+	struct ve_struct *ve;
+
+	err = -EPERM;
+	if (!capable(CAP_SETVEID))
+		goto out;
+
+	err = -EINVAL;
+	if (addrlen > sizeof(addr) || addrlen < sizeof(struct sockaddr_in))
+		goto out;
+
+	err = move_addr_to_kernel(uservaddr, addrlen, &addr);
+	if (err < 0)
+		goto out;
+
+	err = -EINVAL;
+	if (addr.g.sa_family == AF_INET) {
+		if (addrlen != sizeof(struct sockaddr_in))
+			goto out;
+	} else if (addr.g.sa_family == AF_INET6) {
+		if (addrlen != sizeof(struct sockaddr_in6))
+			goto out;
+	} else {
+		err = -EAFNOSUPPORT;
+		goto out;
+	}
+
+	switch (op)
+	{
+		case VE_IP_ADD:
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veip_entry_add(ve, &addr.g);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+
+		case VE_IP_DEL:
+			err = veip_entry_del(veid, &addr.g);
+			break;
+		default:
+			err = -EINVAL;
+	}
+
+out:
+	return err;
+}
+
+int venet_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	    case VENETCTL_VE_IP_MAP: {
+			struct vzctl_ve_ip_map s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = real_ve_ip_map(s.veid, s.op, s.addr, s.addrlen);
+		}
+		break;
+	}
+	return err;
+}
+
+static struct vzioctlinfo venetcalls = {
+	type: VENETCTLTYPE,
+	func: venet_ioctl,
+	owner: THIS_MODULE,
+};
+
+int venet_dev_start(struct ve_struct *env)
+{
+	struct net_device *dev_venet;
+	int err;
+
+	dev_venet = alloc_netdev(0, "venet%d", venet_setup);
+	if (!dev_venet)
+		return -ENOMEM;
+	err = dev_alloc_name(dev_venet, dev_venet->name);
+	if (err<0)
+		goto err;
+	if ((err = register_netdev(dev_venet)) != 0)
+		goto err;
+	env->_venet_dev = dev_venet;
+	return 0;
+err:
+	free_netdev(dev_venet);
+	printk(KERN_ERR "VENET initialization error err=%d\n", err);
+	return err;
+}
+
+static int venet_start(unsigned int hooknum, void *data)
+{
+	struct ve_struct *env;
+	int err;
+
+	env = (struct ve_struct *)data;
+	if (env->veip)
+		return -EEXIST;
+	if (!ve_is_super(env) && !try_module_get(THIS_MODULE))
+		return 0;
+
+	err = veip_start(env);
+	if (err)
+		goto err;
+
+	err = venet_dev_start(env);
+	if (err)
+		goto err_free;
+	return 0;
+
+err_free:
+	veip_stop(env);
+err:
+	if (!ve_is_super(env))
+		module_put(THIS_MODULE);
+	return err;
+}
+
+static int venet_stop(unsigned int hooknum, void *data)
+{
+	struct ve_struct *env;
+
+	env = (struct ve_struct *)data;
+	veip_stop(env);
+	if (!ve_is_super(env))
+		module_put(THIS_MODULE);
+	return 0;
+}
+
+#define VE_HOOK_PRI_NET		0
+
+static struct ve_hook venet_ve_hook_init = {
+	hook:	venet_start,
+	undo:	venet_stop,
+	hooknum: VE_HOOK_INIT,
+	priority: VE_HOOK_PRI_NET
+};
+
+static struct ve_hook venet_ve_hook_fini = {
+	hook:	venet_stop,
+	hooknum: VE_HOOK_FINI,
+	priority: VE_HOOK_PRI_NET
+};
+
+__init int venet_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *de;
+#endif
+	int i, err;
+
+	if (get_ve0()->_venet_dev != NULL)
+		return -EEXIST;
+
+	for (i = 0; i < VEIP_HASH_SZ; i++)
+		INIT_LIST_HEAD(ip_entry_hash_table + i);
+
+	err = venet_start(VE_HOOK_INIT, (void *)get_ve0());
+	if (err)
+		return err;
+
+#ifdef CONFIG_PROC_FS
+	de = create_proc_glob_entry("vz/veinfo",
+			S_IFREG|S_IRUSR, NULL);
+	if (de)
+		de->proc_fops = &proc_veinfo_operations;
+	else
+		printk(KERN_WARNING "venet: can't make veinfo proc entry\n");
+
+	de = create_proc_entry("vz/veip", S_IFREG|S_IRUSR, NULL);
+	if (de)
+		de->proc_fops = &proc_veip_operations;
+	else
+		printk(KERN_WARNING "venet: can't make veip proc entry\n");
+#endif
+
+	ve_hook_register(&venet_ve_hook_init);
+	ve_hook_register(&venet_ve_hook_fini);
+	vzioctl_register(&venetcalls);
+	return 0;
+}
+
+__exit void venet_exit(void)
+{
+	struct net_device *dev_venet;
+
+	vzioctl_unregister(&venetcalls);
+	ve_hook_unregister(&venet_ve_hook_fini);
+	ve_hook_unregister(&venet_ve_hook_init);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("vz/veip", NULL);
+	remove_proc_entry("vz/veinfo", NULL);
+#endif
+
+	dev_venet = get_ve0()->_venet_dev;
+	if (dev_venet != NULL) {
+		get_ve0()->_venet_dev = NULL;
+		unregister_netdev(dev_venet);
+		free_netdev(dev_venet);
+	}
+	veip_stop(get_ve0());
+}
+
+module_init(venet_init);
+module_exit(venet_exit);
diff -uprN linux-2.6.16/drivers/net/veth.c linux-2.6.16.ovz/drivers/net/veth.c
--- linux-2.6.16/drivers/net/veth.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/veth.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,582 @@
+/*
+ *  veth.c
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Virtual ethernet device used to change VE ownership on packets
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/if_ether.h>	/* For the statistics structure. */
+#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
+#include <linux/ve_proto.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_veth.h>
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/vzcalluser.h>
+
+struct veth_struct
+{
+	struct net_device_stats stats;
+	struct net_device	*pair;
+	struct list_head	hwaddr_list;
+};
+
+struct list_head veth_hwaddr_list;
+rwlock_t ve_hwaddr_lock = RW_LOCK_UNLOCKED;
+DECLARE_MUTEX(hwaddr_sem);
+
+#define veth_from_netdev(dev) \
+	((struct veth_struct *)(netdev_priv(dev)))
+#define veth_to_netdev(veth) \
+	((struct net_device*)((char*)veth - \
+	(unsigned long)netdev_priv(NULL)))
+
+struct net_device * veth_dev_start(char *dev_addr, char *name);
+
+struct veth_struct *hwaddr_entry_lookup(char *name)
+{
+	struct veth_struct *entry;
+	struct list_head *tmp;
+
+	list_for_each(tmp, &veth_hwaddr_list) {
+		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
+		BUG_ON(entry->pair == NULL);
+		if (strncmp(name, entry->pair->name, IFNAMSIZ) == 0)
+			return entry;
+	}
+	return NULL;
+}
+
+int veth_entry_add(struct ve_struct *ve, char *dev_addr, char *name,
+		char *dev_addr_ve, char *name_ve)
+{
+	struct net_device *dev_ve;
+	struct net_device *dev_ve0;
+	struct ve_struct *old_env;
+	char dev_name[IFNAMSIZ];
+	int err;
+
+	down(&hwaddr_sem);
+
+	if (name[0] == '\0')
+		snprintf(dev_name, sizeof(dev_name), "vz%d.%%d", ve->veid);
+	else {
+		memcpy(dev_name, name, IFNAMSIZ - 1);
+		dev_name[IFNAMSIZ - 1] = '\0';
+	}
+	dev_ve0 = veth_dev_start(dev_addr, dev_name);
+	if (IS_ERR(dev_ve0)) {
+		err = PTR_ERR(dev_ve0);
+		goto err;
+	}
+
+	old_env = set_exec_env(ve);
+	if (name_ve[0] == '\0')
+		sprintf(dev_name, "eth%%d");
+	else {
+		memcpy(dev_name, name_ve, IFNAMSIZ - 1);
+		dev_name[IFNAMSIZ - 1] = '\0';
+	}
+	dev_ve = veth_dev_start(dev_addr_ve, dev_name);
+	if (IS_ERR(dev_ve)) {
+		err = PTR_ERR(dev_ve);
+		goto err_ve;
+	}
+	set_exec_env(old_env);
+	veth_from_netdev(dev_ve)->pair = dev_ve0;
+	veth_from_netdev(dev_ve0)->pair = dev_ve;
+
+	write_lock(&ve_hwaddr_lock);
+	list_add(&(veth_from_netdev(dev_ve)->hwaddr_list), &veth_hwaddr_list);
+	write_unlock(&ve_hwaddr_lock);
+
+	up(&hwaddr_sem);
+	return 0;
+
+err_ve:
+	set_exec_env(old_env);
+	unregister_netdev(dev_ve0);
+err:
+	up(&hwaddr_sem);
+	return err;
+}
+
+int veth_entry_del(struct ve_struct *ve, char *name)
+{
+	struct veth_struct *found;
+	struct ve_struct *old_env;
+	struct net_device *dev;
+	int err;
+
+	err = -ENODEV;
+	down(&hwaddr_sem);
+	found = hwaddr_entry_lookup(name);
+	if (found == NULL)
+		goto out;
+	if (veth_to_netdev(found)->owner_env != ve)
+		goto out;
+
+	write_lock(&ve_hwaddr_lock);
+	list_del(&found->hwaddr_list);
+	write_unlock(&ve_hwaddr_lock);
+	err = 0;
+	dev = found->pair;
+	BUG_ON(found->pair == NULL);
+
+	old_env = get_exec_env();
+	set_exec_env(ve);
+	unregister_netdev(veth_to_netdev(found));
+	set_exec_env(old_env);
+
+	unregister_netdev(dev);
+
+out:
+	up(&hwaddr_sem);
+	return err;
+}
+
+/*
+ * Device functions
+ */
+
+static int veth_open(struct net_device *dev)
+{
+	return 0;
+}
+
+static int veth_close(struct net_device *master)
+{
+	return 0;
+}
+
+static void veth_destructor(struct net_device *dev)
+{
+	free_netdev(dev);
+}
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+	return &veth_from_netdev(dev)->stats;
+}
+
+/*
+ * The higher levels take care of making this non-reentrant (it's
+ * called with bh's disabled).
+ */
+static int veth_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats = get_stats(dev);
+	struct net_device *rcv = NULL;
+	struct veth_struct *entry;
+	int length;
+
+	if (unlikely(get_exec_env()->disable_net))
+		goto outf;
+
+	skb_orphan(skb);
+
+	entry = veth_from_netdev(dev);
+	rcv = entry->pair;
+	if (!rcv)
+		/* VE going down */
+		goto outf;
+
+	if (unlikely(rcv->owner_env->disable_net))
+		goto outf;
+
+	skb->owner_env = rcv->owner_env;
+
+	if (!(rcv->flags & IFF_UP)) {
+		/* Target VE does not want to receive packets */
+		goto outf;
+	}
+
+	skb->dev = rcv;
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, rcv);
+
+	dst_release(skb->dst);
+	skb->dst = NULL;
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = NULL;
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 0;
+#endif
+#endif
+	length = skb->len;
+
+	netif_rx(skb);
+
+	stats->tx_bytes += length;
+	stats->tx_packets++;
+	if (rcv) {
+		struct net_device_stats *rcv_stats = get_stats(rcv);
+		rcv_stats->rx_bytes += length;
+		rcv_stats->rx_packets++;
+	}
+
+	return 0;
+
+outf:
+	kfree_skb(skb);
+	stats->tx_dropped++;
+	return 0;
+}
+
+int veth_init_dev(struct net_device *dev)
+{
+	dev->hard_start_xmit = veth_xmit;
+	dev->get_stats = get_stats;
+	dev->open = veth_open;
+	dev->stop = veth_close;
+	dev->destructor = veth_destructor;
+
+	ether_setup(dev);
+
+	dev->tx_queue_len = 0;
+	return 0;
+}
+
+static void veth_setup(struct net_device *dev)
+{
+	dev->init = veth_init_dev;
+	/*
+	 * No other features, as they are:
+	 *  - checksumming is required, and nobody else will done our job
+	 */
+	dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL;
+}
+
+#ifdef CONFIG_PROC_FS
+#define ADDR_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
+#define ADDR(x) (x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5]
+static int vehwaddr_seq_show(struct seq_file *m, void *v)
+{
+	struct list_head *p;
+	struct veth_struct *entry;
+
+	p = (struct list_head *)v;
+	if (p == &veth_hwaddr_list) {
+		seq_puts(m, "Version: 1.0\n");
+		return 0;
+	}
+	entry = list_entry(p, struct veth_struct, hwaddr_list);
+	seq_printf(m, ADDR_FMT " %16s ",
+			ADDR(entry->pair->dev_addr), entry->pair->name);
+	seq_printf(m, ADDR_FMT " %16s %10u\n",
+			ADDR(veth_to_netdev(entry)->dev_addr),
+			veth_to_netdev(entry)->name,
+			VEID(veth_to_netdev(entry)->owner_env));
+	return 0;
+}
+
+static void *vehwaddr_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t l;
+	struct list_head *p;
+
+	l = *pos;
+	read_lock(&ve_hwaddr_lock);
+	if (l == 0)
+		return &veth_hwaddr_list;
+	list_for_each(p, &veth_hwaddr_list) {
+		if (--l == 0)
+			return p;
+	}
+	return NULL;
+}
+
+static void *vehwaddr_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *p;
+
+	p = (struct list_head *)v;
+	(*pos)++;
+	return p->next == &veth_hwaddr_list ? NULL : p->next;
+}
+
+static void vehwaddr_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_hwaddr_lock);
+}
+
+static struct seq_operations vehwaddr_seq_op = {
+	.start 	= vehwaddr_seq_start,
+	.next	= vehwaddr_seq_next,
+	.stop	= vehwaddr_seq_stop,
+	.show	= vehwaddr_seq_show
+};
+
+static int vehwaddr_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &vehwaddr_seq_op);
+}
+
+static struct file_operations proc_vehwaddr_operations = {
+	.open		= vehwaddr_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release
+};
+#endif
+
+int real_ve_hwaddr(envid_t veid, int op,
+		unsigned char *dev_addr, int addrlen, char *name,
+		unsigned char *dev_addr_ve, int addrlen_ve, char *name_ve)
+{
+	int err;
+	struct ve_struct *ve;
+	char ve_addr[ETH_ALEN];
+
+	err = -EPERM;
+	if (!capable(CAP_NET_ADMIN))
+		goto out;
+
+	err = -EINVAL;
+	switch (op)
+	{
+		case VE_ETH_ADD:
+			if (addrlen != ETH_ALEN)
+				goto out;
+			if (addrlen_ve != ETH_ALEN && addrlen_ve != 0)
+				goto out;
+			/* If ve addr is not set then we use dev_addr[3] & 0x80 for it */
+			if (addrlen_ve == 0 && (dev_addr[3] & 0x80))
+				goto out;
+			if (addrlen_ve == 0) {
+				memcpy(ve_addr, dev_addr, ETH_ALEN);
+				ve_addr[3] |= 0x80;
+			} else {
+				memcpy(ve_addr, dev_addr_ve, ETH_ALEN);
+			}
+
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veth_entry_add(ve, dev_addr, name,
+						ve_addr, name_ve);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+
+		case VE_ETH_DEL:
+			if (name[0] == '\0')
+				goto out;
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veth_entry_del(ve, name);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+	}
+
+out:
+	return err;
+}
+
+int veth_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	    case VETHCTL_VE_HWADDR: {
+			struct vzctl_ve_hwaddr s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = real_ve_hwaddr(s.veid, s.op,
+					s.dev_addr, s.addrlen, s.dev_name,
+					s.dev_addr_ve, s.addrlen_ve, s.dev_name_ve);
+		}
+		break;
+	}
+	return err;
+}
+
+static struct vzioctlinfo vethcalls = {
+	.type	= VETHCTLTYPE,
+	.func	= veth_ioctl,
+	.owner	= THIS_MODULE,
+};
+
+struct net_device * veth_dev_start(char *dev_addr, char *name)
+{
+	struct net_device *dev;
+	int err;
+
+	dev = alloc_netdev(sizeof(struct veth_struct), name, veth_setup);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
+	if (strchr(dev->name, '%')) {
+		err = dev_alloc_name(dev, dev->name);
+		if (err < 0)
+			goto err;
+	}
+	if ((err = register_netdev(dev)) != 0)
+		goto err;
+
+	memcpy(dev->dev_addr, dev_addr, ETH_ALEN);
+	dev->addr_len = ETH_ALEN;
+
+	return dev;
+err:
+	free_netdev(dev);
+	printk(KERN_ERR "%s initialization error err=%d\n", name, err);
+	return ERR_PTR(err);
+}
+
+static int veth_stop(unsigned int hooknum, void *data)
+{
+	struct ve_struct *old_env;
+	struct ve_struct *env;
+	struct list_head *tmp, *n;
+
+	env = (struct ve_struct *)data;
+	down(&hwaddr_sem);
+	list_for_each_safe(tmp, n, &veth_hwaddr_list) {
+		struct veth_struct *entry;
+		struct net_device *dev;
+		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
+		if (VEID(env) != VEID(veth_to_netdev(entry)->owner_env))
+			continue;
+
+		write_lock(&ve_hwaddr_lock);
+		list_del(&entry->hwaddr_list);
+		write_unlock(&ve_hwaddr_lock);
+
+		dev = entry->pair;
+		BUG_ON(entry->pair == NULL);
+		old_env = set_exec_env(env);
+		unregister_netdev(veth_to_netdev(entry));
+		set_exec_env(old_env);
+
+		old_env = set_exec_env(get_ve0());
+		unregister_netdev(dev);
+		set_exec_env(old_env);
+	}
+	up(&hwaddr_sem);
+	return 0;
+}
+
+#define VE_HOOK_PRI_NET		0
+
+static struct ve_hook veth_ve_hook_fini = {
+	.hook		= veth_stop,
+	.hooknum	= VE_HOOK_FINI,
+	.priority	= VE_HOOK_PRI_NET,
+	.owner		= THIS_MODULE,
+};
+
+__init int veth_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *de;
+#endif
+
+	INIT_LIST_HEAD(&veth_hwaddr_list);
+
+#ifdef CONFIG_PROC_FS
+	de = create_proc_glob_entry("vz/veth",
+			S_IFREG|S_IRUSR, NULL);
+	if (de)
+		de->proc_fops = &proc_vehwaddr_operations;
+	else
+		printk(KERN_WARNING "veth: can't make vehwaddr proc entry\n");
+
+#endif
+
+	ve_hook_register(&veth_ve_hook_fini);
+	vzioctl_register(&vethcalls);
+	return 0;
+}
+
+__exit void veth_exit(void)
+{
+	struct veth_struct *entry;
+	struct list_head *tmp, *n;
+	struct ve_struct *ve;
+	struct ve_struct *old_env;
+
+	vzioctl_unregister(&vethcalls);
+	ve_hook_unregister(&veth_ve_hook_fini);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("vz/veth", NULL);
+#endif
+
+	down(&hwaddr_sem);
+	list_for_each_safe(tmp, n, &veth_hwaddr_list) {
+		struct net_device *dev;
+		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
+		ve = get_ve(veth_to_netdev(entry)->owner_env);
+
+		write_lock(&ve_hwaddr_lock);
+		list_del(&entry->hwaddr_list);
+		write_unlock(&ve_hwaddr_lock);
+
+		dev = entry->pair;
+		BUG_ON(entry->pair == NULL);
+		old_env = set_exec_env(ve);
+		unregister_netdev(veth_to_netdev(entry));
+		set_exec_env(old_env);
+
+		unregister_netdev(dev);
+
+		put_ve(ve);
+	}
+	up(&hwaddr_sem);
+}
+
+module_init(veth_init);
+module_exit(veth_exit);
+
+MODULE_AUTHOR("Andrey Mirkin <amirkin@sw.ru>");
+MODULE_DESCRIPTION("Virtuozzo Virtual Ethernet Device");
+MODULE_LICENSE("GPL v2");
+
diff -uprN linux-2.6.16/drivers/net/via-rhine.c linux-2.6.16.ovz/drivers/net/via-rhine.c
--- linux-2.6.16/drivers/net/via-rhine.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/via-rhine.c	2006-07-05 08:34:56.000000000 -0400
@@ -129,6 +129,7 @@
 	- Massive clean-up
 	- Rewrite PHY, media handling (remove options, full_duplex, backoff)
 	- Fix Tx engine race for good
+	- Craig Brind: Zero padded aligned buffers for short packets.
 
 */
 
@@ -1306,7 +1307,12 @@ static int rhine_start_tx(struct sk_buff
 			rp->stats.tx_dropped++;
 			return 0;
 		}
+
+		/* Padding is not copied and so must be redone. */
 		skb_copy_and_csum_dev(skb, rp->tx_buf[entry]);
+		if (skb->len < ETH_ZLEN)
+			memset(rp->tx_buf[entry] + skb->len, 0,
+			       ETH_ZLEN - skb->len);
 		rp->tx_skbuff_dma[entry] = 0;
 		rp->tx_ring[entry].addr = cpu_to_le32(rp->tx_bufs_dma +
 						      (rp->tx_buf[entry] -
diff -uprN linux-2.6.16/drivers/net/wireless/Kconfig linux-2.6.16.ovz/drivers/net/wireless/Kconfig
--- linux-2.6.16/drivers/net/wireless/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/wireless/Kconfig	2006-07-05 08:34:56.000000000 -0400
@@ -239,7 +239,8 @@ config IPW2200_DEBUG
 
 config AIRO
 	tristate "Cisco/Aironet 34X/35X/4500/4800 ISA and PCI cards"
-	depends on NET_RADIO && ISA_DMA_API && CRYPTO && (PCI || BROKEN)
+ 	depends on NET_RADIO && ISA_DMA_API && (PCI || BROKEN)
+	select CRYPTO
 	---help---
 	  This is the standard Linux driver to support Cisco/Aironet ISA and
 	  PCI 802.11 wireless cards.
@@ -374,6 +375,7 @@ config PCMCIA_HERMES
 config PCMCIA_SPECTRUM
 	tristate "Symbol Spectrum24 Trilogy PCMCIA card support"
 	depends on NET_RADIO && PCMCIA && HERMES
+	select FW_LOADER
 	---help---
 
 	  This is a driver for 802.11b cards using RAM-loadable Symbol
@@ -387,6 +389,7 @@ config PCMCIA_SPECTRUM
 config AIRO_CS
 	tristate "Cisco/Aironet 34X/35X/4500/4800 PCMCIA cards"
 	depends on NET_RADIO && PCMCIA && (BROKEN || !M32R)
+	select CRYPTO
 	---help---
 	  This is the standard Linux driver to support Cisco/Aironet PCMCIA
 	  802.11 wireless cards.  This driver is the same as the Aironet
diff -uprN linux-2.6.16/drivers/net/wireless/hostap/hostap_80211_tx.c linux-2.6.16.ovz/drivers/net/wireless/hostap/hostap_80211_tx.c
--- linux-2.6.16/drivers/net/wireless/hostap/hostap_80211_tx.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/wireless/hostap/hostap_80211_tx.c	2006-07-05 08:34:56.000000000 -0400
@@ -469,7 +469,7 @@ int hostap_master_start_xmit(struct sk_b
 	}
 
 	if (local->ieee_802_1x && meta->ethertype == ETH_P_PAE && tx.crypt &&
-	    !(fc & IEEE80211_FCTL_VERS)) {
+	    !(fc & IEEE80211_FCTL_PROTECTED)) {
 		no_encrypt = 1;
 		PDEBUG(DEBUG_EXTRA2, "%s: TX: IEEE 802.1X - passing "
 		       "unencrypted EAPOL frame\n", dev->name);
diff -uprN linux-2.6.16/drivers/net/wireless/ipw2200.c linux-2.6.16.ovz/drivers/net/wireless/ipw2200.c
--- linux-2.6.16/drivers/net/wireless/ipw2200.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/net/wireless/ipw2200.c	2006-07-05 08:34:56.000000000 -0400
@@ -8391,20 +8391,28 @@ static int ipw_wx_get_range(struct net_d
 
 	i = 0;
 	if (priv->ieee->mode & (IEEE_B | IEEE_G)) {
-		for (j = 0; j < geo->bg_channels && i < IW_MAX_FREQUENCIES;
-		     i++, j++) {
+		for (j = 0; j < geo->bg_channels && i < IW_MAX_FREQUENCIES; j++) {
+			if ((priv->ieee->iw_mode == IW_MODE_ADHOC) &&
+			    (geo->bg[j].flags & IEEE80211_CH_PASSIVE_ONLY))
+				continue;
+
 			range->freq[i].i = geo->bg[j].channel;
 			range->freq[i].m = geo->bg[j].freq * 100000;
 			range->freq[i].e = 1;
+			i++;
 		}
 	}
 
 	if (priv->ieee->mode & IEEE_A) {
-		for (j = 0; j < geo->a_channels && i < IW_MAX_FREQUENCIES;
-		     i++, j++) {
+		for (j = 0; j < geo->a_channels && i < IW_MAX_FREQUENCIES; j++) {
+			if ((priv->ieee->iw_mode == IW_MODE_ADHOC) &&
+			    (geo->a[j].flags & IEEE80211_CH_PASSIVE_ONLY))
+				continue;
+
 			range->freq[i].i = geo->a[j].channel;
 			range->freq[i].m = geo->a[j].freq * 100000;
 			range->freq[i].e = 1;
+			i++;
 		}
 	}
 
@@ -9956,9 +9964,8 @@ static int ipw_ethtool_set_eeprom(struct
 		return -EINVAL;
 	down(&p->sem);
 	memcpy(&p->eeprom[eeprom->offset], bytes, eeprom->len);
-	for (i = IPW_EEPROM_DATA;
-	     i < IPW_EEPROM_DATA + IPW_EEPROM_IMAGE_SIZE; i++)
-		ipw_write8(p, i, p->eeprom[i]);
+	for (i = 0; i < IPW_EEPROM_IMAGE_SIZE; i++)
+		ipw_write8(p, i + IPW_EEPROM_DATA, p->eeprom[i]);
 	up(&p->sem);
 	return 0;
 }
diff -uprN linux-2.6.16/drivers/pci/pci-acpi.c linux-2.6.16.ovz/drivers/pci/pci-acpi.c
--- linux-2.6.16/drivers/pci/pci-acpi.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/pci/pci-acpi.c	2006-07-05 08:34:56.000000000 -0400
@@ -33,13 +33,10 @@ acpi_query_osc (
 	acpi_status		status;
 	struct acpi_object_list	input;
 	union acpi_object 	in_params[4];
-	struct acpi_buffer	output;
-	union acpi_object 	out_obj;	
+	struct acpi_buffer	output = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object 	*out_obj;
 	u32			osc_dw0;
 
-	/* Setting up output buffer */
-	output.length = sizeof(out_obj) + 3*sizeof(u32);  
-	output.pointer = &out_obj;
 	
 	/* Setting up input parameters */
 	input.count = 4;
@@ -61,12 +58,15 @@ acpi_query_osc (
 			"Evaluate _OSC Set fails. Status = 0x%04x\n", status);
 		return status;
 	}
-	if (out_obj.type != ACPI_TYPE_BUFFER) {
+	out_obj = output.pointer;
+
+	if (out_obj->type != ACPI_TYPE_BUFFER) {
 		printk(KERN_DEBUG  
 			"Evaluate _OSC returns wrong type\n");
-		return AE_TYPE;
+		status = AE_TYPE;
+		goto query_osc_out;
 	}
-	osc_dw0 = *((u32 *) out_obj.buffer.pointer);
+	osc_dw0 = *((u32 *) out_obj->buffer.pointer);
 	if (osc_dw0) {
 		if (osc_dw0 & OSC_REQUEST_ERROR)
 			printk(KERN_DEBUG "_OSC request fails\n"); 
@@ -76,15 +76,21 @@ acpi_query_osc (
 			printk(KERN_DEBUG "_OSC invalid revision\n"); 
 		if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) {
 			/* Update Global Control Set */
-			global_ctrlsets = *((u32 *)(out_obj.buffer.pointer+8));
-			return AE_OK;
+			global_ctrlsets = *((u32 *)(out_obj->buffer.pointer+8));
+			status = AE_OK;
+			goto query_osc_out;
 		}
-		return AE_ERROR;
+		status = AE_ERROR;
+		goto query_osc_out;
 	}
 
 	/* Update Global Control Set */
-	global_ctrlsets = *((u32 *)(out_obj.buffer.pointer + 8));
-	return AE_OK;
+	global_ctrlsets = *((u32 *)(out_obj->buffer.pointer + 8));
+	status = AE_OK;
+
+query_osc_out:
+	kfree(output.pointer);
+	return status;
 }
 
 
@@ -96,14 +102,10 @@ acpi_run_osc (
 	acpi_status		status;
 	struct acpi_object_list	input;
 	union acpi_object 	in_params[4];
-	struct acpi_buffer	output;
-	union acpi_object 	out_obj;	
+	struct acpi_buffer	output = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object 	*out_obj;
 	u32			osc_dw0;
 
-	/* Setting up output buffer */
-	output.length = sizeof(out_obj) + 3*sizeof(u32);  
-	output.pointer = &out_obj;
-	
 	/* Setting up input parameters */
 	input.count = 4;
 	input.pointer = in_params;
@@ -124,12 +126,14 @@ acpi_run_osc (
 			"Evaluate _OSC Set fails. Status = 0x%04x\n", status);
 		return status;
 	}
-	if (out_obj.type != ACPI_TYPE_BUFFER) {
+	out_obj = output.pointer;
+	if (out_obj->type != ACPI_TYPE_BUFFER) {
 		printk(KERN_DEBUG  
 			"Evaluate _OSC returns wrong type\n");
-		return AE_TYPE;
+		status = AE_TYPE;
+		goto run_osc_out;
 	}
-	osc_dw0 = *((u32 *) out_obj.buffer.pointer);
+	osc_dw0 = *((u32 *) out_obj->buffer.pointer);
 	if (osc_dw0) {
 		if (osc_dw0 & OSC_REQUEST_ERROR)
 			printk(KERN_DEBUG "_OSC request fails\n"); 
@@ -139,11 +143,17 @@ acpi_run_osc (
 			printk(KERN_DEBUG "_OSC invalid revision\n"); 
 		if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) {
 			printk(KERN_DEBUG "_OSC FW not grant req. control\n");
-			return AE_SUPPORT;
+			status = AE_SUPPORT;
+			goto run_osc_out;
 		}
-		return AE_ERROR;
+		status = AE_ERROR;
+		goto run_osc_out;
 	}
-	return AE_OK;
+	status = AE_OK;
+
+run_osc_out:
+	kfree(output.pointer);
+	return status;
 }
 
 /**
diff -uprN linux-2.6.16/drivers/pci/probe.c linux-2.6.16.ovz/drivers/pci/probe.c
--- linux-2.6.16/drivers/pci/probe.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/pci/probe.c	2006-07-05 08:34:56.000000000 -0400
@@ -21,6 +21,7 @@ LIST_HEAD(pci_root_buses);
 EXPORT_SYMBOL(pci_root_buses);
 
 LIST_HEAD(pci_devices);
+EXPORT_SYMBOL(pci_devices);
 
 #ifdef HAVE_PCI_LEGACY
 /**
diff -uprN linux-2.6.16/drivers/pci/quirks.c linux-2.6.16.ovz/drivers/pci/quirks.c
--- linux-2.6.16/drivers/pci/quirks.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/pci/quirks.c	2006-07-05 08:34:56.000000000 -0400
@@ -631,6 +631,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_V
  * non-x86 architectures (yes Via exists on PPC among other places),
  * we must mask the PCI_INTERRUPT_LINE value versus 0xf to get
  * interrupts delivered properly.
+ *
+ * Some of the on-chip devices are actually '586 devices' so they are
+ * listed here.
  */
 static void quirk_via_irq(struct pci_dev *dev)
 {
@@ -639,13 +642,19 @@ static void quirk_via_irq(struct pci_dev
 	new_irq = dev->irq & 0xf;
 	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
 	if (new_irq != irq) {
-		printk(KERN_INFO "PCI: Via IRQ fixup for %s, from %d to %d\n",
+		printk(KERN_INFO "PCI: VIA IRQ fixup for %s, from %d to %d\n",
 			pci_name(dev), irq, new_irq);
 		udelay(15);	/* unknown if delay really needed */
 		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, new_irq);
 	}
 }
-DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_ANY_ID, quirk_via_irq);
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, quirk_via_irq);
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1, quirk_via_irq);
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_2, quirk_via_irq);
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3, quirk_via_irq);
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_irq);
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4, quirk_via_irq);
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, quirk_via_irq);
 
 /*
  * VIA VT82C598 has its device ID settable and many BIOSes
@@ -861,6 +870,7 @@ static void __init quirk_eisa_bridge(str
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82375,	quirk_eisa_bridge );
 
+#ifndef CONFIG_ACPI_SLEEP
 /*
  * On ASUS P4B boards, the SMBus PCI Device within the ICH2/4 southbridge
  * is not activated. The myth is that Asus said that they do not want the
@@ -872,8 +882,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I
  * bridge. Unfortunately, this device has no subvendor/subdevice ID. So it 
  * becomes necessary to do this tweak in two steps -- I've chosen the Host
  * bridge as trigger.
+ *
+ * Actually, leaving it unhidden and not redoing the quirk over suspend2ram
+ * will cause thermal management to break down, and causing machine to
+ * overheat.
  */
-static int __initdata asus_hides_smbus = 0;
+static int __initdata asus_hides_smbus;
 
 static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev)
 {
@@ -1008,6 +1022,8 @@ static void __init asus_hides_smbus_lpc_
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_1,	asus_hides_smbus_lpc_ich6 );
 
+#endif
+
 /*
  * SiS 96x south bridge: BIOS typically hides SMBus device...
  */
diff -uprN linux-2.6.16/drivers/pcmcia/ds.c linux-2.6.16.ovz/drivers/pcmcia/ds.c
--- linux-2.6.16/drivers/pcmcia/ds.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/pcmcia/ds.c	2006-07-05 08:34:56.000000000 -0400
@@ -546,7 +546,7 @@ static int pcmcia_device_query(struct pc
 			tmp = vers1->str + vers1->ofs[i];
 
 			length = strlen(tmp) + 1;
-			if ((length < 3) || (length > 255))
+			if ((length < 2) || (length > 255))
 				continue;
 
 			p_dev->prod_id[i] = kmalloc(sizeof(char) * length,
diff -uprN linux-2.6.16/drivers/s390/cio/cio.c linux-2.6.16.ovz/drivers/s390/cio/cio.c
--- linux-2.6.16/drivers/s390/cio/cio.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/s390/cio/cio.c	2006-07-05 08:34:56.000000000 -0400
@@ -610,7 +610,11 @@ do_IRQ (struct pt_regs *regs)
 	struct tpi_info *tpi_info;
 	struct subchannel *sch;
 	struct irb *irb;
+	struct ve_struct *ve;
+	struct user_beancounter *ub;
 
+	ve = set_exec_env(get_ve0());
+	ub = set_exec_ub(get_ub0());
 	irq_enter ();
 	asm volatile ("mc 0,0");
 	if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
@@ -657,6 +661,8 @@ do_IRQ (struct pt_regs *regs)
 		 */
 	} while (!MACHINE_IS_VM && tpi (NULL) != 0);
 	irq_exit ();
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(ve);
 }
 
 #ifdef CONFIG_CCW_CONSOLE
diff -uprN linux-2.6.16/drivers/scsi/3w-9xxx.c linux-2.6.16.ovz/drivers/scsi/3w-9xxx.c
--- linux-2.6.16/drivers/scsi/3w-9xxx.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/scsi/3w-9xxx.c	2006-07-05 08:34:56.000000000 -0400
@@ -85,7 +85,7 @@
 #include "3w-9xxx.h"
 
 /* Globals */
-#define TW_DRIVER_VERSION "2.26.02.005"
+#define TW_DRIVER_VERSION "2.26.02.007"
 static TW_Device_Extension *twa_device_extension_list[TW_MAX_SLOT];
 static unsigned int twa_device_extension_count;
 static int twa_major = -1;
@@ -1944,9 +1944,13 @@ static void twa_scsiop_execute_scsi_comp
 		}
 		if (tw_dev->srb[request_id]->use_sg == 1) {
 			struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
-			char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+			char *buf;
+			unsigned long flags = 0;
+			local_irq_save(flags);
+			buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
 			memcpy(buf, tw_dev->generic_buffer_virt[request_id], sg->length);
 			kunmap_atomic(buf - sg->offset, KM_IRQ0);
+			local_irq_restore(flags);
 		}
 	}
 } /* End twa_scsiop_execute_scsi_complete() */
diff -uprN linux-2.6.16/drivers/scsi/3w-xxxx.c linux-2.6.16.ovz/drivers/scsi/3w-xxxx.c
--- linux-2.6.16/drivers/scsi/3w-xxxx.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/scsi/3w-xxxx.c	2006-07-05 08:34:56.000000000 -0400
@@ -1508,10 +1508,12 @@ static void tw_transfer_internal(TW_Devi
 	struct scsi_cmnd *cmd = tw_dev->srb[request_id];
 	void *buf;
 	unsigned int transfer_len;
+	unsigned long flags = 0;
 
 	if (cmd->use_sg) {
 		struct scatterlist *sg =
 			(struct scatterlist *)cmd->request_buffer;
+		local_irq_save(flags);
 		buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
 		transfer_len = min(sg->length, len);
 	} else {
@@ -1526,6 +1528,7 @@ static void tw_transfer_internal(TW_Devi
 
 		sg = (struct scatterlist *)cmd->request_buffer;
 		kunmap_atomic(buf - sg->offset, KM_IRQ0);
+		local_irq_restore(flags);
 	}
 }
 
diff -uprN linux-2.6.16/drivers/scsi/libata-core.c linux-2.6.16.ovz/drivers/scsi/libata-core.c
--- linux-2.6.16/drivers/scsi/libata-core.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/scsi/libata-core.c	2006-07-05 08:34:56.000000000 -0400
@@ -4293,6 +4293,7 @@ static int ata_start_drive(struct ata_po
 int ata_device_resume(struct ata_port *ap, struct ata_device *dev)
 {
 	if (ap->flags & ATA_FLAG_SUSPENDED) {
+		ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 200000);
 		ap->flags &= ~ATA_FLAG_SUSPENDED;
 		ata_set_mode(ap);
 	}
diff -uprN linux-2.6.16/drivers/scsi/sata_mv.c linux-2.6.16.ovz/drivers/scsi/sata_mv.c
--- linux-2.6.16/drivers/scsi/sata_mv.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/scsi/sata_mv.c	2006-07-05 08:34:56.000000000 -0400
@@ -1102,6 +1102,7 @@ static u8 mv_get_crpb_status(struct ata_
 	void __iomem *port_mmio = mv_ap_base(ap);
 	struct mv_port_priv *pp = ap->private_data;
 	u32 out_ptr;
+	u8 ata_status;
 
 	out_ptr = readl(port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
 
@@ -1109,6 +1110,8 @@ static u8 mv_get_crpb_status(struct ata_
 	assert(((out_ptr >> EDMA_RSP_Q_PTR_SHIFT) & MV_MAX_Q_DEPTH_MASK) ==
 	       pp->rsp_consumer);
 
+	ata_status = pp->crpb[pp->rsp_consumer].flags >> CRPB_FLAG_STATUS_SHIFT;
+
 	/* increment our consumer index... */
 	pp->rsp_consumer = mv_inc_q_index(&pp->rsp_consumer);
 
@@ -1123,7 +1126,7 @@ static u8 mv_get_crpb_status(struct ata_
 	writelfl(out_ptr, port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
 
 	/* Return ATA status register for completed CRPB */
-	return (pp->crpb[pp->rsp_consumer].flags >> CRPB_FLAG_STATUS_SHIFT);
+	return ata_status;
 }
 
 /**
@@ -1192,7 +1195,6 @@ static void mv_host_intr(struct ata_host
 	u32 hc_irq_cause;
 	int shift, port, port0, hard_port, handled;
 	unsigned int err_mask;
-	u8 ata_status = 0;
 
 	if (hc == 0) {
 		port0 = 0;
@@ -1210,6 +1212,7 @@ static void mv_host_intr(struct ata_host
 		hc,relevant,hc_irq_cause);
 
 	for (port = port0; port < port0 + MV_PORTS_PER_HC; port++) {
+		u8 ata_status = 0;
 		ap = host_set->ports[port];
 		hard_port = port & MV_PORT_MASK;	/* range 0-3 */
 		handled = 0;	/* ensure ata_status is set if handled++ */
diff -uprN linux-2.6.16/drivers/scsi/scsi_lib.c linux-2.6.16.ovz/drivers/scsi/scsi_lib.c
--- linux-2.6.16/drivers/scsi/scsi_lib.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/scsi/scsi_lib.c	2006-07-05 08:34:56.000000000 -0400
@@ -368,7 +368,7 @@ static int scsi_req_map_sg(struct reques
 			   int nsegs, unsigned bufflen, gfp_t gfp)
 {
 	struct request_queue *q = rq->q;
-	int nr_pages = (bufflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	int nr_pages = (bufflen + sgl[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	unsigned int data_len = 0, len, bytes, off;
 	struct page *page;
 	struct bio *bio = NULL;
diff -uprN linux-2.6.16/drivers/sn/ioc3.c linux-2.6.16.ovz/drivers/sn/ioc3.c
--- linux-2.6.16/drivers/sn/ioc3.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/sn/ioc3.c	2006-07-05 08:34:56.000000000 -0400
@@ -677,7 +677,7 @@ static int ioc3_probe(struct pci_dev *pd
 	/* Track PCI-device specific data */
 	pci_set_drvdata(pdev, idd);
 	down_write(&ioc3_devices_rwsem);
-	list_add(&idd->list, &ioc3_devices);
+	list_add_tail(&idd->list, &ioc3_devices);
 	idd->id = ioc3_counter++;
 	up_write(&ioc3_devices_rwsem);
 
diff -uprN linux-2.6.16/drivers/sn/ioc4.c linux-2.6.16.ovz/drivers/sn/ioc4.c
--- linux-2.6.16/drivers/sn/ioc4.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/sn/ioc4.c	2006-07-05 08:34:56.000000000 -0400
@@ -313,7 +313,7 @@ ioc4_probe(struct pci_dev *pdev, const s
 	idd->idd_serial_data = NULL;
 	pci_set_drvdata(idd->idd_pdev, idd);
 	down_write(&ioc4_devices_rwsem);
-	list_add(&idd->idd_list, &ioc4_devices);
+	list_add_tail(&idd->idd_list, &ioc4_devices);
 	up_write(&ioc4_devices_rwsem);
 
 	/* Add this IOC4 to all submodules */
diff -uprN linux-2.6.16/drivers/usb/core/message.c linux-2.6.16.ovz/drivers/usb/core/message.c
--- linux-2.6.16/drivers/usb/core/message.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/usb/core/message.c	2006-07-05 08:34:56.000000000 -0400
@@ -1388,11 +1388,13 @@ free_interfaces:
 	if (dev->state != USB_STATE_ADDRESS)
 		usb_disable_device (dev, 1);	// Skip ep0
 
-	i = dev->bus_mA - cp->desc.bMaxPower * 2;
-	if (i < 0)
-		dev_warn(&dev->dev, "new config #%d exceeds power "
-				"limit by %dmA\n",
-				configuration, -i);
+	if (cp) {
+		i = dev->bus_mA - cp->desc.bMaxPower * 2;
+		if (i < 0)
+			dev_warn(&dev->dev, "new config #%d exceeds power "
+					"limit by %dmA\n",
+					configuration, -i);
+	}
 
 	if ((ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			USB_REQ_SET_CONFIGURATION, 0, configuration, 0,
diff -uprN linux-2.6.16/drivers/usb/host/ehci-sched.c linux-2.6.16.ovz/drivers/usb/host/ehci-sched.c
--- linux-2.6.16/drivers/usb/host/ehci-sched.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/usb/host/ehci-sched.c	2006-07-05 08:34:56.000000000 -0400
@@ -707,6 +707,7 @@ iso_stream_init (
 	} else {
 		u32		addr;
 		int		think_time;
+		int		hs_transfers;
 
 		addr = dev->ttport << 24;
 		if (!ehci_is_TDI(ehci)
@@ -719,6 +720,7 @@ iso_stream_init (
 		think_time = dev->tt ? dev->tt->think_time : 0;
 		stream->tt_usecs = NS_TO_US (think_time + usb_calc_bus_time (
 				dev->speed, is_input, 1, maxp));
+		hs_transfers = max (1u, (maxp + 187) / 188);
 		if (is_input) {
 			u32	tmp;
 
@@ -727,12 +729,11 @@ iso_stream_init (
 			stream->usecs = HS_USECS_ISO (1);
 			stream->raw_mask = 1;
 
-			/* pessimistic c-mask */
-			tmp = usb_calc_bus_time (USB_SPEED_FULL, 1, 0, maxp)
-					/ (125 * 1000);
-			stream->raw_mask |= 3 << (tmp + 9);
+			/* c-mask as specified in USB 2.0 11.18.4 3.c */
+			tmp = (1 << (hs_transfers + 2)) - 1;
+			stream->raw_mask |= tmp << (8 + 2);
 		} else
-			stream->raw_mask = smask_out [maxp / 188];
+			stream->raw_mask = smask_out [hs_transfers - 1];
 		bandwidth = stream->usecs + stream->c_usecs;
 		bandwidth /= 1 << (interval + 2);
 
diff -uprN linux-2.6.16/drivers/usb/serial/console.c linux-2.6.16.ovz/drivers/usb/serial/console.c
--- linux-2.6.16/drivers/usb/serial/console.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/usb/serial/console.c	2006-07-05 08:34:56.000000000 -0400
@@ -54,7 +54,7 @@ static struct console usbcons;
  * serial.c code, except that the specifier is "ttyUSB" instead
  * of "ttyS".
  */
-static int __init usb_console_setup(struct console *co, char *options)
+static int usb_console_setup(struct console *co, char *options)
 {
 	struct usbcons_info *info = &usbcons_info;
 	int baud = 9600;
diff -uprN linux-2.6.16/drivers/usb/serial/option.c linux-2.6.16.ovz/drivers/usb/serial/option.c
--- linux-2.6.16/drivers/usb/serial/option.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/usb/serial/option.c	2006-07-05 08:34:56.000000000 -0400
@@ -582,14 +582,14 @@ static void option_setup_urbs(struct usb
 	portdata = usb_get_serial_port_data(port);
 
 	/* Do indat endpoints first */
-	for (j = 0; j <= N_IN_URB; ++j) {
+	for (j = 0; j < N_IN_URB; ++j) {
 		portdata->in_urbs[j] = option_setup_urb (serial,
                   port->bulk_in_endpointAddress, USB_DIR_IN, port,
                   portdata->in_buffer[j], IN_BUFLEN, option_indat_callback);
 	}
 
 	/* outdat endpoints */
-	for (j = 0; j <= N_OUT_URB; ++j) {
+	for (j = 0; j < N_OUT_URB; ++j) {
 		portdata->out_urbs[j] = option_setup_urb (serial,
                   port->bulk_out_endpointAddress, USB_DIR_OUT, port,
                   portdata->out_buffer[j], OUT_BUFLEN, option_outdat_callback);
diff -uprN linux-2.6.16/drivers/usb/serial/whiteheat.c linux-2.6.16.ovz/drivers/usb/serial/whiteheat.c
--- linux-2.6.16/drivers/usb/serial/whiteheat.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/usb/serial/whiteheat.c	2006-07-05 08:34:56.000000000 -0400
@@ -388,7 +388,7 @@ static int whiteheat_attach (struct usb_
 	if (ret) {
 		err("%s: Couldn't send command [%d]", serial->type->description, ret);
 		goto no_firmware;
-	} else if (alen != sizeof(command)) {
+	} else if (alen != 2) {
 		err("%s: Send command incomplete [%d]", serial->type->description, alen);
 		goto no_firmware;
 	}
@@ -400,7 +400,7 @@ static int whiteheat_attach (struct usb_
 	if (ret) {
 		err("%s: Couldn't get results [%d]", serial->type->description, ret);
 		goto no_firmware;
-	} else if (alen != sizeof(result)) {
+	} else if (alen != sizeof(*hw_info) + 1) {
 		err("%s: Get results incomplete [%d]", serial->type->description, alen);
 		goto no_firmware;
 	} else if (result[0] != command[0]) {
diff -uprN linux-2.6.16/drivers/usb/storage/Kconfig linux-2.6.16.ovz/drivers/usb/storage/Kconfig
--- linux-2.6.16/drivers/usb/storage/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/usb/storage/Kconfig	2006-07-05 08:34:56.000000000 -0400
@@ -48,7 +48,8 @@ config USB_STORAGE_FREECOM
 
 config USB_STORAGE_ISD200
 	bool "ISD-200 USB/ATA Bridge support"
-	depends on USB_STORAGE && BLK_DEV_IDE
+	depends on USB_STORAGE
+	depends on BLK_DEV_IDE=y || BLK_DEV_IDE=USB_STORAGE
 	---help---
 	  Say Y here if you want to use USB Mass Store devices based
 	  on the In-Systems Design ISD-200 USB/ATA bridge.
diff -uprN linux-2.6.16/drivers/video/cfbimgblt.c linux-2.6.16.ovz/drivers/video/cfbimgblt.c
--- linux-2.6.16/drivers/video/cfbimgblt.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/video/cfbimgblt.c	2006-07-05 08:34:56.000000000 -0400
@@ -169,7 +169,7 @@ static inline void slow_imageblit(const 
 
 		while (j--) {
 			l--;
-			color = (*s & 1 << (FB_BIT_NR(l))) ? fgcolor : bgcolor;
+			color = (*s & (1 << l)) ? fgcolor : bgcolor;
 			val |= FB_SHIFT_HIGH(color, shift);
 			
 			/* Did the bitshift spill bits to the next long? */
diff -uprN linux-2.6.16/drivers/video/fbmem.c linux-2.6.16.ovz/drivers/video/fbmem.c
--- linux-2.6.16/drivers/video/fbmem.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/video/fbmem.c	2006-07-05 08:34:56.000000000 -0400
@@ -669,13 +669,19 @@ fb_write(struct file *file, const char _
 		total_size = info->fix.smem_len;
 
 	if (p > total_size)
-		return 0;
+		return -EFBIG;
 
-	if (count >= total_size)
+	if (count > total_size) {
+		err = -EFBIG;
 		count = total_size;
+	}
+
+	if (count + p > total_size) {
+		if (!err)
+			err = -ENOSPC;
 
-	if (count + p > total_size)
 		count = total_size - p;
+	}
 
 	buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count,
 			 GFP_KERNEL);
@@ -717,7 +723,7 @@ fb_write(struct file *file, const char _
 
 	kfree(buffer);
 
-	return (err) ? err : cnt;
+	return (cnt) ? cnt : err;
 }
 
 #ifdef CONFIG_KMOD
diff -uprN linux-2.6.16/drivers/video/i810/i810_main.c linux-2.6.16.ovz/drivers/video/i810/i810_main.c
--- linux-2.6.16/drivers/video/i810/i810_main.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/drivers/video/i810/i810_main.c	2006-07-05 08:34:56.000000000 -0400
@@ -1508,7 +1508,7 @@ static int i810fb_cursor(struct fb_info 
 		int size = ((cursor->image.width + 7) >> 3) *
 			cursor->image.height;
 		int i;
-		u8 *data = kmalloc(64 * 8, GFP_KERNEL);
+		u8 *data = kmalloc(64 * 8, GFP_ATOMIC);
 
 		if (data == NULL)
 			return -ENOMEM;
diff -uprN linux-2.6.16/fs/9p/vfs_inode.c linux-2.6.16.ovz/fs/9p/vfs_inode.c
--- linux-2.6.16/fs/9p/vfs_inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/9p/vfs_inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -614,6 +614,7 @@ static struct dentry *v9fs_vfs_lookup(st
 
 	sb = dir->i_sb;
 	v9ses = v9fs_inode2v9ses(dir);
+	dentry->d_op = &v9fs_dentry_operations;
 	dirfid = v9fs_fid_lookup(dentry->d_parent);
 
 	if (!dirfid) {
@@ -681,8 +682,6 @@ static struct dentry *v9fs_vfs_lookup(st
 		goto FreeFcall;
 
 	fid->qid = fcall->params.rstat.stat.qid;
-
-	dentry->d_op = &v9fs_dentry_operations;
 	v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
 
 	d_add(dentry, inode);
diff -uprN linux-2.6.16/fs/Kconfig linux-2.6.16.ovz/fs/Kconfig
--- linux-2.6.16/fs/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/Kconfig	2006-07-05 08:34:56.000000000 -0400
@@ -418,6 +418,15 @@ config QUOTA
 	  with the quota tools. Probably the quota support is only useful for
 	  multi user systems. If unsure, say N.
 
+config QUOTA_COMPAT
+	bool "Compatibility with older quotactl interface"
+	depends on QUOTA
+	help
+	  This option enables compatibility layer for older version
+	  of quotactl interface with byte granularity (QUOTAON at 0x0100,
+	  GETQUOTA at 0x0D00).  Interface versions older than that one and
+	  with block granularity are still not supported.
+
 config QFMT_V1
 	tristate "Old quota format support"
 	depends on QUOTA
@@ -433,6 +442,38 @@ config QFMT_V2
 	  This quota format allows using quotas with 32-bit UIDs/GIDs. If you
 	  need this functionality say Y here.
 
+config SIM_FS
+	tristate "VPS filesystem"
+	depends on VZ_QUOTA
+	default m
+	help
+	  This file system is a part of Virtuozzo. It intoduces a fake
+	  superblock and blockdev to VE to hide real device and show
+	  statfs results taken from quota.
+
+config VZ_QUOTA
+	tristate "Virtuozzo Disk Quota support"
+	depends on QUOTA
+	default m
+	help
+	  Virtuozzo Disk Quota imposes disk quota on directories with their
+	  files and subdirectories in total.  Such disk quota is used to
+	  account and limit disk usage by Virtuozzo VPS, but also may be used
+	  separately.
+
+config VZ_QUOTA_UNLOAD
+	bool "Unloadable Virtuozzo Disk Quota module"
+	depends on VZ_QUOTA=m
+	default n
+	help
+	  Make Virtuozzo Disk Quota module unloadable.
+	  Doesn't work reliably now.
+
+config VZ_QUOTA_UGID
+	bool "Per-user and per-group quota in Virtuozzo quota partitions"
+	depends on VZ_QUOTA!=n
+	default y
+
 config QUOTACTL
 	bool
 	depends on XFS_QUOTA || QUOTA
diff -uprN linux-2.6.16/fs/Makefile linux-2.6.16.ovz/fs/Makefile
--- linux-2.6.16/fs/Makefile	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/Makefile	2006-07-05 08:34:56.000000000 -0400
@@ -39,9 +39,15 @@ obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
 obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
 obj-$(CONFIG_QUOTACTL)		+= quota.o
+obj-$(CONFIG_VZ_QUOTA)		+= vzdquota.o
+vzdquota-y			+= vzdquot.o vzdq_mgmt.o vzdq_ops.o vzdq_tree.o
+vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_ugid.o
+vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_file.o
 
 obj-$(CONFIG_DNOTIFY)		+= dnotify.o
 
+obj-$(CONFIG_SIM_FS)		+= simfs.o
+
 obj-$(CONFIG_PROC_FS)		+= proc/
 obj-y				+= partitions/
 obj-$(CONFIG_SYSFS)		+= sysfs/
diff -uprN linux-2.6.16/fs/aio.c linux-2.6.16.ovz/fs/aio.c
--- linux-2.6.16/fs/aio.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/aio.c	2006-07-05 08:34:56.000000000 -0400
@@ -41,13 +41,16 @@
 #endif
 
 /*------ sysctl variables----*/
-static DEFINE_SPINLOCK(aio_nr_lock);
+DEFINE_SPINLOCK(aio_nr_lock);
 unsigned long aio_nr;		/* current system wide number of aio requests */
 unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
+EXPORT_SYMBOL_GPL(aio_nr_lock);
+EXPORT_SYMBOL_GPL(aio_nr);
 /*----end sysctl variables---*/
 
 static kmem_cache_t	*kiocb_cachep;
-static kmem_cache_t	*kioctx_cachep;
+kmem_cache_t	*kioctx_cachep;
+EXPORT_SYMBOL_GPL(kioctx_cachep);
 
 static struct workqueue_struct *aio_wq;
 
@@ -58,7 +61,7 @@ static DECLARE_WORK(fput_work, aio_fput_
 static DEFINE_SPINLOCK(fput_lock);
 static LIST_HEAD(fput_head);
 
-static void aio_kick_handler(void *);
+void aio_kick_handler(void *);
 static void aio_queue_work(struct kioctx *);
 
 /* aio_setup
@@ -293,7 +296,7 @@ static void aio_cancel_all(struct kioctx
 	spin_unlock_irq(&ctx->ctx_lock);
 }
 
-static void wait_for_all_aios(struct kioctx *ctx)
+void wait_for_all_aios(struct kioctx *ctx)
 {
 	struct task_struct *tsk = current;
 	DECLARE_WAITQUEUE(wait, tsk);
@@ -310,6 +313,7 @@ static void wait_for_all_aios(struct kio
 	__set_task_state(tsk, TASK_RUNNING);
 	remove_wait_queue(&ctx->wait, &wait);
 }
+EXPORT_SYMBOL_GPL(wait_for_all_aios);
 
 /* wait_on_sync_kiocb:
  *	Waits on the given sync kiocb to complete.
@@ -856,7 +860,7 @@ static inline void aio_run_all_iocbs(str
  *      space.
  * Run on aiod's context.
  */
-static void aio_kick_handler(void *data)
+void aio_kick_handler(void *data)
 {
 	struct kioctx *ctx = data;
 	mm_segment_t oldfs = get_fs();
@@ -875,6 +879,7 @@ static void aio_kick_handler(void *data)
 	if (requeue)
 		queue_work(aio_wq, &ctx->wq);
 }
+EXPORT_SYMBOL_GPL(aio_kick_handler);
 
 
 /*
diff -uprN linux-2.6.16/fs/autofs/autofs_i.h linux-2.6.16.ovz/fs/autofs/autofs_i.h
--- linux-2.6.16/fs/autofs/autofs_i.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/autofs/autofs_i.h	2006-07-05 08:34:56.000000000 -0400
@@ -124,7 +124,7 @@ static inline struct autofs_sb_info *aut
    filesystem without "magic".) */
 
 static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
+	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
 }
 
 /* Hash operations */
diff -uprN linux-2.6.16/fs/autofs/init.c linux-2.6.16.ovz/fs/autofs/init.c
--- linux-2.6.16/fs/autofs/init.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/autofs/init.c	2006-07-05 08:34:56.000000000 -0400
@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
 	.name		= "autofs",
 	.get_sb		= autofs_get_sb,
 	.kill_sb	= kill_anon_super,
+	.fs_flags	= FS_VIRTUALIZED,
 };
 
 static int __init init_autofs_fs(void)
diff -uprN linux-2.6.16/fs/autofs/inode.c linux-2.6.16.ovz/fs/autofs/inode.c
--- linux-2.6.16/fs/autofs/inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/autofs/inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -66,7 +66,7 @@ static int parse_options(char *options, 
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = virt_pgid(current);
 
 	*minproto = *maxproto = AUTOFS_PROTO_VERSION;
 
@@ -138,7 +138,7 @@ int autofs_fill_super(struct super_block
 	sbi->magic = AUTOFS_SBI_MAGIC;
 	sbi->catatonic = 0;
 	sbi->exp_timeout = 0;
-	sbi->oz_pgrp = process_group(current);
+	sbi->oz_pgrp = virt_pgid(current);
 	autofs_initialize_hash(&sbi->dirhash);
 	sbi->queues = NULL;
 	memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
diff -uprN linux-2.6.16/fs/autofs/root.c linux-2.6.16.ovz/fs/autofs/root.c
--- linux-2.6.16/fs/autofs/root.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/autofs/root.c	2006-07-05 08:34:56.000000000 -0400
@@ -354,7 +354,7 @@ static int autofs_root_unlink(struct ino
 
 	/* This allows root to remove symlinks */
 	lock_kernel();
-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) {
+	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) ) {
 		unlock_kernel();
 		return -EACCES;
 	}
@@ -541,7 +541,7 @@ static int autofs_root_ioctl(struct inod
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
 		return -ENOTTY;
 	
-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EPERM;
 	
 	switch(cmd) {
diff -uprN linux-2.6.16/fs/autofs4/autofs_i.h linux-2.6.16.ovz/fs/autofs4/autofs_i.h
--- linux-2.6.16/fs/autofs4/autofs_i.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/autofs4/autofs_i.h	2006-07-05 08:34:56.000000000 -0400
@@ -122,7 +122,7 @@ static inline struct autofs_info *autofs
    filesystem without "magic".) */
 
 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
+	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
 }
 
 /* Does a dentry have some pending activity? */
diff -uprN linux-2.6.16/fs/autofs4/init.c linux-2.6.16.ovz/fs/autofs4/init.c
--- linux-2.6.16/fs/autofs4/init.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/autofs4/init.c	2006-07-05 08:34:56.000000000 -0400
@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
 	.name		= "autofs",
 	.get_sb		= autofs_get_sb,
 	.kill_sb	= kill_anon_super,
+	.fs_flags	= FS_VIRTUALIZED,
 };
 
 static int __init init_autofs4_fs(void)
diff -uprN linux-2.6.16/fs/autofs4/inode.c linux-2.6.16.ovz/fs/autofs4/inode.c
--- linux-2.6.16/fs/autofs4/inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/autofs4/inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -179,7 +179,7 @@ static int parse_options(char *options, 
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = virt_pgid(current);
 
 	*minproto = AUTOFS_MIN_PROTO_VERSION;
 	*maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -265,7 +265,7 @@ int autofs4_fill_super(struct super_bloc
 	sbi->root = NULL;
 	sbi->catatonic = 0;
 	sbi->exp_timeout = 0;
-	sbi->oz_pgrp = process_group(current);
+	sbi->oz_pgrp = virt_pgid(current);
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
diff -uprN linux-2.6.16/fs/autofs4/root.c linux-2.6.16.ovz/fs/autofs4/root.c
--- linux-2.6.16/fs/autofs4/root.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/autofs4/root.c	2006-07-05 08:34:56.000000000 -0400
@@ -592,7 +592,7 @@ static int autofs4_dir_unlink(struct ino
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	
 	/* This allows root to remove symlinks */
-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EACCES;
 
 	dput(ino->dentry);
@@ -784,7 +784,7 @@ static int autofs4_root_ioctl(struct ino
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
 		return -ENOTTY;
 	
-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EPERM;
 	
 	switch(cmd) {
diff -uprN linux-2.6.16/fs/binfmt_aout.c linux-2.6.16.ovz/fs/binfmt_aout.c
--- linux-2.6.16/fs/binfmt_aout.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/binfmt_aout.c	2006-07-05 08:34:56.000000000 -0400
@@ -446,9 +446,11 @@ beyond_if:
 #endif
 	start_thread(regs, ex.a_entry, current->mm->start_stack);
 	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if (current->ptrace & PT_TRACE_EXEC)
+		if (current->ptrace & PT_TRACE_EXEC) {
+			set_pn_state(current, PN_STOP_EXEC);
 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-		else
+			clear_pn_state(current);
+		} else
 			send_sig(SIGTRAP, current, 0);
 	}
 	return 0;
diff -uprN linux-2.6.16/fs/binfmt_elf.c linux-2.6.16.ovz/fs/binfmt_elf.c
--- linux-2.6.16/fs/binfmt_elf.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/binfmt_elf.c	2006-07-05 08:34:56.000000000 -0400
@@ -361,7 +361,7 @@ static unsigned long load_elf_interp(str
 	eppnt = elf_phdata;
 	for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
 	  if (eppnt->p_type == PT_LOAD) {
-	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
+	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECPRIO;
 	    int elf_prot = 0;
 	    unsigned long vaddr = 0;
 	    unsigned long k, map_addr;
@@ -669,7 +669,7 @@ static int load_elf_binary(struct linux_
 			 */
 			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 
-			interpreter = open_exec(elf_interpreter);
+			interpreter = open_exec(elf_interpreter, NULL);
 			retval = PTR_ERR(interpreter);
 			if (IS_ERR(interpreter))
 				goto out_free_interp;
@@ -834,7 +834,7 @@ static int load_elf_binary(struct linux_
 		if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
 		if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
 
-		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
+		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE|MAP_EXECPRIO;
 
 		vaddr = elf_ppnt->p_vaddr;
 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
@@ -1000,9 +1000,11 @@ static int load_elf_binary(struct linux_
 
 	start_thread(regs, elf_entry, bprm->p);
 	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if (current->ptrace & PT_TRACE_EXEC)
+		if (current->ptrace & PT_TRACE_EXEC) {
+			set_pn_state(current, PN_STOP_EXEC);
 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-		else
+			clear_pn_state(current);
+		} else
 			send_sig(SIGTRAP, current, 0);
 	}
 	retval = 0;
@@ -1022,8 +1024,13 @@ out_free_file:
 	sys_close(elf_exec_fileno);
 out_free_fh:
 	if (files) {
-		put_files_struct(current->files);
+		struct files_struct *old;
+
+		old = current->files;
+		task_lock(current);
 		current->files = files;
+		task_unlock(current);
+		put_files_struct(old);
 	}
 out_free_ph:
 	kfree(elf_phdata);
@@ -1281,10 +1288,10 @@ static void fill_prstatus(struct elf_prs
 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
 	prstatus->pr_sigpend = p->pending.signal.sig[0];
 	prstatus->pr_sighold = p->blocked.sig[0];
-	prstatus->pr_pid = p->pid;
-	prstatus->pr_ppid = p->parent->pid;
-	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = p->signal->session;
+	prstatus->pr_pid = virt_pid(p);
+	prstatus->pr_ppid = virt_pid(p->parent);
+	prstatus->pr_pgrp = virt_pgid(p);
+	prstatus->pr_sid = virt_sid(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1327,10 +1334,10 @@ static int fill_psinfo(struct elf_prpsin
 			psinfo->pr_psargs[i] = ' ';
 	psinfo->pr_psargs[len] = 0;
 
-	psinfo->pr_pid = p->pid;
-	psinfo->pr_ppid = p->parent->pid;
-	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = p->signal->session;
+	psinfo->pr_pid = virt_pid(p);
+	psinfo->pr_ppid = virt_pid(p->parent);
+	psinfo->pr_pgrp = virt_pgid(p);
+	psinfo->pr_sid = virt_sid(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
@@ -1463,7 +1470,7 @@ static int elf_core_dump(long signr, str
 	if (signr) {
 		struct elf_thread_status *tmp;
 		read_lock(&tasklist_lock);
-		do_each_thread(g,p)
+		do_each_thread_ve(g,p)
 			if (current->mm == p->mm && current != p) {
 				tmp = kmalloc(sizeof(*tmp), GFP_ATOMIC);
 				if (!tmp) {
@@ -1475,7 +1482,7 @@ static int elf_core_dump(long signr, str
 				tmp->thread = p;
 				list_add(&tmp->list, &thread_list);
 			}
-		while_each_thread(g,p);
+		while_each_thread_ve(g,p);
 		read_unlock(&tasklist_lock);
 		list_for_each(t, &thread_list) {
 			struct elf_thread_status *tmp;
diff -uprN linux-2.6.16/fs/binfmt_elf_fdpic.c linux-2.6.16.ovz/fs/binfmt_elf_fdpic.c
--- linux-2.6.16/fs/binfmt_elf_fdpic.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/binfmt_elf_fdpic.c	2006-07-05 08:34:56.000000000 -0400
@@ -205,7 +205,7 @@ static int load_elf_fdpic_binary(struct 
 			kdebug("Using ELF interpreter %s", interpreter_name);
 
 			/* replace the program with the interpreter */
-			interpreter = open_exec(interpreter_name);
+			interpreter = open_exec(interpreter_name, bprm);
 			retval = PTR_ERR(interpreter);
 			if (IS_ERR(interpreter)) {
 				interpreter = NULL;
diff -uprN linux-2.6.16/fs/binfmt_em86.c linux-2.6.16.ovz/fs/binfmt_em86.c
--- linux-2.6.16/fs/binfmt_em86.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/binfmt_em86.c	2006-07-05 08:34:56.000000000 -0400
@@ -82,7 +82,7 @@ static int load_em86(struct linux_binprm
 	 * Note that we use open_exec() as the name is now in kernel
 	 * space, and we don't need to copy it.
 	 */
-	file = open_exec(interp);
+	file = open_exec(interp, bprm);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
diff -uprN linux-2.6.16/fs/binfmt_flat.c linux-2.6.16.ovz/fs/binfmt_flat.c
--- linux-2.6.16/fs/binfmt_flat.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/binfmt_flat.c	2006-07-05 08:34:56.000000000 -0400
@@ -774,7 +774,7 @@ static int load_flat_shared_library(int 
 
 	/* Open the file up */
 	bprm.filename = buf;
-	bprm.file = open_exec(bprm.filename);
+	bprm.file = open_exec(bprm.filename, bprm);
 	res = PTR_ERR(bprm.file);
 	if (IS_ERR(bprm.file))
 		return res;
diff -uprN linux-2.6.16/fs/binfmt_misc.c linux-2.6.16.ovz/fs/binfmt_misc.c
--- linux-2.6.16/fs/binfmt_misc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/binfmt_misc.c	2006-07-05 08:34:56.000000000 -0400
@@ -179,7 +179,7 @@ static int load_misc_binary(struct linux
 
 	bprm->interp = iname;	/* for binfmt_script */
 
-	interp_file = open_exec (iname);
+	interp_file = open_exec (iname, bprm);
 	retval = PTR_ERR (interp_file);
 	if (IS_ERR (interp_file))
 		goto _error;
@@ -216,8 +216,13 @@ _error:
 	bprm->interp_data = 0;
 _unshare:
 	if (files) {
-		put_files_struct(current->files);
+		struct files_struct *old;
+
+		old = current->files;
+		task_lock(current);
 		current->files = files;
+		task_unlock(current);
+		put_files_struct(old);
 	}
 	goto _ret;
 }
diff -uprN linux-2.6.16/fs/binfmt_script.c linux-2.6.16.ovz/fs/binfmt_script.c
--- linux-2.6.16/fs/binfmt_script.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/binfmt_script.c	2006-07-05 08:34:56.000000000 -0400
@@ -85,7 +85,7 @@ static int load_script(struct linux_binp
 	/*
 	 * OK, now restart the process with the interpreter's dentry.
 	 */
-	file = open_exec(interp);
+	file = open_exec(interp, bprm);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
diff -uprN linux-2.6.16/fs/block_dev.c linux-2.6.16.ovz/fs/block_dev.c
--- linux-2.6.16/fs/block_dev.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/block_dev.c	2006-07-05 08:34:56.000000000 -0400
@@ -561,9 +561,16 @@ static int do_open(struct block_device *
 {
 	struct module *owner = NULL;
 	struct gendisk *disk;
-	int ret = -ENXIO;
+	int ret;
 	int part;
 
+#ifdef CONFIG_VE
+	ret = get_device_perms_ve(S_IFBLK, bdev->bd_dev,
+				  file->f_mode&(FMODE_READ|FMODE_WRITE));
+	if (ret)
+	        return ret;
+#endif
+	ret = -ENXIO;
 	file->f_mapping = bdev->bd_inode->i_mapping;
 	lock_kernel();
 	disk = get_gendisk(bdev->bd_dev, &part);
@@ -832,7 +839,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
  * namespace if possible and return it.  Return ERR_PTR(error)
  * otherwise.
  */
-struct block_device *lookup_bdev(const char *path)
+struct block_device *lookup_bdev(const char *path, int mode)
 {
 	struct block_device *bdev;
 	struct inode *inode;
@@ -850,6 +857,11 @@ struct block_device *lookup_bdev(const c
 	error = -ENOTBLK;
 	if (!S_ISBLK(inode->i_mode))
 		goto fail;
+#ifdef CONFIG_VE
+	error = get_device_perms_ve(S_IFBLK, inode->i_rdev, mode);
+	if (error)
+		goto fail;
+#endif
 	error = -EACCES;
 	if (nd.mnt->mnt_flags & MNT_NODEV)
 		goto fail;
@@ -881,12 +893,13 @@ struct block_device *open_bdev_excl(cons
 	mode_t mode = FMODE_READ;
 	int error = 0;
 
-	bdev = lookup_bdev(path);
+	if (!(flags & MS_RDONLY))
+		mode |= FMODE_WRITE;
+
+	bdev = lookup_bdev(path, mode);
 	if (IS_ERR(bdev))
 		return bdev;
 
-	if (!(flags & MS_RDONLY))
-		mode |= FMODE_WRITE;
 	error = blkdev_get(bdev, mode, 0);
 	if (error)
 		return ERR_PTR(error);
diff -uprN linux-2.6.16/fs/buffer.c linux-2.6.16.ovz/fs/buffer.c
--- linux-2.6.16/fs/buffer.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/buffer.c	2006-07-05 08:34:56.000000000 -0400
@@ -1942,8 +1942,9 @@ static int __block_prepare_write(struct 
 			if (err)
 				break;
 			if (buffer_new(bh)) {
-				unmap_underlying_metadata(bh->b_bdev,
-							bh->b_blocknr);
+				if (buffer_mapped(bh))
+					unmap_underlying_metadata(bh->b_bdev,
+								bh->b_blocknr);
 				if (PageUptodate(page)) {
 					set_buffer_uptodate(bh);
 					continue;
diff -uprN linux-2.6.16/fs/char_dev.c linux-2.6.16.ovz/fs/char_dev.c
--- linux-2.6.16/fs/char_dev.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/char_dev.c	2006-07-05 08:34:56.000000000 -0400
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/smp_lock.h>
 #include <linux/devfs_fs_kernel.h>
+#include <linux/seq_file.h>
 
 #include <linux/kobject.h>
 #include <linux/kobj_map.h>
@@ -26,8 +27,6 @@
 
 static struct kobj_map *cdev_map;
 
-#define MAX_PROBE_HASH 255	/* random */
-
 static DECLARE_MUTEX(chrdevs_lock);
 
 static struct char_device_struct {
@@ -38,93 +37,29 @@ static struct char_device_struct {
 	char name[64];
 	struct file_operations *fops;
 	struct cdev *cdev;		/* will die */
-} *chrdevs[MAX_PROBE_HASH];
+} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
 
 /* index in the above */
 static inline int major_to_index(int major)
 {
-	return major % MAX_PROBE_HASH;
-}
-
-struct chrdev_info {
-	int index;
-	struct char_device_struct *cd;
-};
-
-void *get_next_chrdev(void *dev)
-{
-	struct chrdev_info *info;
-
-	if (dev == NULL) {
-		info = kmalloc(sizeof(*info), GFP_KERNEL);
-		if (!info)
-			goto out;
-		info->index=0;
-		info->cd = chrdevs[info->index];
-		if (info->cd)
-			goto out;
-	} else {
-		info = dev;
-	}
-
-	while (info->index < ARRAY_SIZE(chrdevs)) {
-		if (info->cd)
-			info->cd = info->cd->next;
-		if (info->cd)
-			goto out;
-		/*
-		 * No devices on this chain, move to the next
-		 */
-		info->index++;
-		info->cd = (info->index < ARRAY_SIZE(chrdevs)) ?
-			chrdevs[info->index] : NULL;
-		if (info->cd)
-			goto out;
-	}
-
-out:
-	return info;
-}
-
-void *acquire_chrdev_list(void)
-{
-	down(&chrdevs_lock);
-	return get_next_chrdev(NULL);
-}
-
-void release_chrdev_list(void *dev)
-{
-	up(&chrdevs_lock);
-	kfree(dev);
+	return major % CHRDEV_MAJOR_HASH_SIZE;
 }
 
+#ifdef CONFIG_PROC_FS
 
-int count_chrdev_list(void)
+void chrdev_show(struct seq_file *f, off_t offset)
 {
 	struct char_device_struct *cd;
-	int i, count;
 
-	count = 0;
-
-	for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) {
-		for (cd = chrdevs[i]; cd; cd = cd->next)
-			count++;
+	if (offset < CHRDEV_MAJOR_HASH_SIZE) {
+		down(&chrdevs_lock);
+		for (cd = chrdevs[offset]; cd; cd = cd->next)
+			seq_printf(f, "%3d %s\n", cd->major, cd->name);
+		up(&chrdevs_lock);
 	}
-
-	return count;
 }
 
-int get_chrdev_info(void *dev, int *major, char **name)
-{
-	struct chrdev_info *info = dev;
-
-	if (info->cd == NULL)
-		return 1;
-
-	*major = info->cd->major;
-	*name = info->cd->name;
-	return 0;
-}
+#endif /* CONFIG_PROC_FS */
 
 /*
  * Register a single major with a specified minor range.
@@ -342,6 +277,13 @@ int chrdev_open(struct inode * inode, st
 	struct cdev *new = NULL;
 	int ret = 0;
 
+#ifdef CONFIG_VE
+	ret = get_device_perms_ve(S_IFCHR, inode->i_rdev,
+				  filp->f_mode&(FMODE_READ|FMODE_WRITE));
+	if (ret)
+		return ret;
+#endif
+
 	spin_lock(&cdev_lock);
 	p = inode->i_cdev;
 	if (!p) {
diff -uprN linux-2.6.16/fs/cifs/cifsencrypt.c linux-2.6.16.ovz/fs/cifs/cifsencrypt.c
--- linux-2.6.16/fs/cifs/cifsencrypt.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/cifs/cifsencrypt.c	2006-07-05 08:34:56.000000000 -0400
@@ -56,9 +56,6 @@ int cifs_sign_smb(struct smb_hdr * cifs_
 	int rc = 0;
 	char smb_signature[20];
 
-	/* BB remember to initialize sequence number elsewhere and initialize mac_signing key elsewhere BB */
-	/* BB remember to add code to save expected sequence number in midQ entry BB */
-
 	if((cifs_pdu == NULL) || (server == NULL))
 		return -EINVAL;
 
@@ -85,20 +82,33 @@ int cifs_sign_smb(struct smb_hdr * cifs_
 static int cifs_calc_signature2(const struct kvec * iov, int n_vec,
 				const char * key, char * signature)
 {
-        struct  MD5Context context;
-
-        if((iov == NULL) || (signature == NULL))
-                return -EINVAL;
+	struct  MD5Context context;
+	int i;
 
-        MD5Init(&context);
-        MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16);
+	if((iov == NULL) || (signature == NULL))
+		return -EINVAL;
 
-/*        MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length); */ /* BB FIXME BB */
+	MD5Init(&context);
+	MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16);
+	for(i=0;i<n_vec;i++) {
+		if(iov[i].iov_base == NULL) {
+			cERROR(1,("null iovec entry"));
+			return -EIO;
+		} else if(iov[i].iov_len == 0)
+			break; /* bail out if we are sent nothing to sign */
+		/* The first entry includes a length field (which does not get
+		   signed that occupies the first 4 bytes before the header */
+		if(i==0) {
+			if (iov[0].iov_len <= 8 ) /* cmd field at offset 9 */
+				break; /* nothing to sign or corrupt header */
+			MD5Update(&context,iov[0].iov_base+4, iov[0].iov_len-4);
+		} else
+			MD5Update(&context,iov[i].iov_base, iov[i].iov_len);
+	}
 
-        MD5Final(signature,&context);
+	MD5Final(signature,&context);
 
-	return -EOPNOTSUPP;
-/*        return 0; */
+	return 0;
 }
 
 
diff -uprN linux-2.6.16/fs/cifs/cifsfs.c linux-2.6.16.ovz/fs/cifs/cifsfs.c
--- linux-2.6.16/fs/cifs/cifsfs.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/cifs/cifsfs.c	2006-07-05 08:34:56.000000000 -0400
@@ -220,7 +220,8 @@ cifs_statfs(struct super_block *sb, stru
 				   longer available? */
 }
 
-static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd)
+static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
 	struct cifs_sb_info *cifs_sb;
 
@@ -232,7 +233,7 @@ static int cifs_permission(struct inode 
 		on the client (above and beyond ACL on servers) for  
 		servers which do not support setting and viewing mode bits,
 		so allowing client to check permissions is useful */ 
-		return generic_permission(inode, mask, NULL);
+		return generic_permission(inode, mask, NULL, perm);
 }
 
 static kmem_cache_t *cifs_inode_cachep;
diff -uprN linux-2.6.16/fs/cifs/dir.c linux-2.6.16.ovz/fs/cifs/dir.c
--- linux-2.6.16/fs/cifs/dir.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/cifs/dir.c	2006-07-05 08:34:56.000000000 -0400
@@ -441,6 +441,20 @@ cifs_lookup(struct inode *parent_dir_ino
 	cifs_sb = CIFS_SB(parent_dir_inode->i_sb);
 	pTcon = cifs_sb->tcon;
 
+	/*
+	 * Don't allow the separator character in a path component.
+	 * The VFS will not allow "/", but "\" is allowed by posix.
+	 */
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
+		int i;
+		for (i = 0; i < direntry->d_name.len; i++)
+			if (direntry->d_name.name[i] == '\\') {
+				cFYI(1, ("Invalid file name"));
+				FreeXid(xid);
+				return ERR_PTR(-EINVAL);
+			}
+	}
+
 	/* can not grab the rename sem here since it would
 	deadlock in the cases (beginning of sys_rename itself)
 	in which we already have the sb rename sem */
diff -uprN linux-2.6.16/fs/coda/dir.c linux-2.6.16.ovz/fs/coda/dir.c
--- linux-2.6.16/fs/coda/dir.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/coda/dir.c	2006-07-05 08:34:56.000000000 -0400
@@ -151,7 +151,8 @@ exit:
 }
 
 
-int coda_permission(struct inode *inode, int mask, struct nameidata *nd)
+int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
         int error = 0;
  
diff -uprN linux-2.6.16/fs/coda/pioctl.c linux-2.6.16.ovz/fs/coda/pioctl.c
--- linux-2.6.16/fs/coda/pioctl.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/coda/pioctl.c	2006-07-05 08:34:56.000000000 -0400
@@ -25,7 +25,7 @@
 
 /* pioctl ops */
 static int coda_ioctl_permission(struct inode *inode, int mask,
-				 struct nameidata *nd);
+				 struct nameidata *nd, struct exec_perm *perm);
 static int coda_pioctl(struct inode * inode, struct file * filp, 
                        unsigned int cmd, unsigned long user_data);
 
@@ -43,7 +43,7 @@ struct file_operations coda_ioctl_operat
 
 /* the coda pioctl inode ops */
 static int coda_ioctl_permission(struct inode *inode, int mask,
-				 struct nameidata *nd)
+				 struct nameidata *nd, struct exec_perm *perm)
 {
         return 0;
 }
diff -uprN linux-2.6.16/fs/compat.c linux-2.6.16.ovz/fs/compat.c
--- linux-2.6.16/fs/compat.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/compat.c	2006-07-05 08:34:56.000000000 -0400
@@ -197,6 +197,8 @@ asmlinkage long compat_sys_statfs(const 
 		struct kstatfs tmp;
 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
 		if (!error)
+			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
+		if (!error)
 			error = put_compat_statfs(buf, &tmp);
 		path_release(&nd);
 	}
@@ -215,6 +217,8 @@ asmlinkage long compat_sys_fstatfs(unsig
 		goto out;
 	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
 	if (!error)
+		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
+	if (!error)
 		error = put_compat_statfs(buf, &tmp);
 	fput(file);
 out:
@@ -265,6 +269,8 @@ asmlinkage long compat_sys_statfs64(cons
 		struct kstatfs tmp;
 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
 		if (!error)
+			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
+		if (!error)
 			error = put_compat_statfs64(buf, &tmp);
 		path_release(&nd);
 	}
@@ -286,6 +292,8 @@ asmlinkage long compat_sys_fstatfs64(uns
 		goto out;
 	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
 	if (!error)
+		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
+	if (!error)
 		error = put_compat_statfs64(buf, &tmp);
 	fput(file);
 out:
@@ -1215,6 +1223,10 @@ static ssize_t compat_do_readv_writev(in
 	if (ret < 0)
 		goto out;
 
+	ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE);
+	if (ret)
+		goto out;
+
 	fnv = NULL;
 	if (type == READ) {
 		fn = file->f_op->read;
@@ -1479,7 +1491,7 @@ int compat_do_execve(char * filename,
 		goto out_ret;
 	memset(bprm, 0, sizeof(*bprm));
 
-	file = open_exec(filename);
+	file = open_exec(filename, bprm);
 	retval = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out_kfree;
@@ -1897,7 +1909,7 @@ asmlinkage long compat_sys_ppoll(struct 
 	}
 
 	if (sigmask) {
-		if (sigsetsize |= sizeof(compat_sigset_t))
+		if (sigsetsize != sizeof(compat_sigset_t))
 			return -EINVAL;
 		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
 			return -EFAULT;
diff -uprN linux-2.6.16/fs/dcache.c linux-2.6.16.ovz/fs/dcache.c
--- linux-2.6.16/fs/dcache.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/dcache.c	2006-07-05 08:34:56.000000000 -0400
@@ -28,11 +28,16 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/file.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <linux/security.h>
 #include <linux/seqlock.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
+#include <linux/kernel_stat.h>
+#include <net/inet_sock.h>
+
+#include <ub/ub_dcache.h>
 
 /* #define DCACHE_DEBUG 1 */
 
@@ -44,7 +49,7 @@ static seqlock_t rename_lock __cacheline
 
 EXPORT_SYMBOL(dcache_lock);
 
-static kmem_cache_t *dentry_cache; 
+kmem_cache_t *dentry_cache;
 
 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
 
@@ -143,11 +148,8 @@ static void dentry_iput(struct dentry * 
  * no dcache lock, please.
  */
 
-void dput(struct dentry *dentry)
+static void dput_recursive(struct dentry *dentry)
 {
-	if (!dentry)
-		return;
-
 repeat:
 	if (atomic_read(&dentry->d_count) == 1)
 		might_sleep();
@@ -206,6 +208,17 @@ kill_it: {
 	}
 }
 
+void dput(struct dentry *dentry)
+{
+	if (!dentry)
+		return;
+
+	spin_lock(&dcache_lock);
+	ub_dentry_uncharge(dentry);
+	spin_unlock(&dcache_lock);
+	dput_recursive(dentry);
+}
+
 /**
  * d_invalidate - invalidate a dentry
  * @dentry: dentry to invalidate
@@ -272,6 +285,8 @@ static inline struct dentry * __dget_loc
 		dentry_stat.nr_unused--;
 		list_del_init(&dentry->d_lru);
 	}
+
+	ub_dentry_charge_nofail(dentry);
 	return dentry;
 }
 
@@ -373,13 +388,19 @@ static inline void prune_one_dentry(stru
 	parent = dentry->d_parent;
 	d_free(dentry);
 	if (parent != dentry)
-		dput(parent);
+		/*
+		 * dentry is not in use, only child (not outside)
+		 * references change, so parent->d_inuse does not change
+		 */
+		dput_recursive(parent);
 	spin_lock(&dcache_lock);
 }
 
 /**
  * prune_dcache - shrink the dcache
  * @count: number of entries to try and free
+ * @sb: if given, ignore dentries for other superblocks
+ *         which are being unmounted.
  *
  * Shrink the dcache. This is done when we need
  * more memory, or simply when we need to unmount
@@ -390,16 +411,29 @@ static inline void prune_one_dentry(stru
  * all the dentries are in use.
  */
  
-static void prune_dcache(int count)
+static void prune_dcache(int count, struct super_block *sb)
 {
 	spin_lock(&dcache_lock);
 	for (; count ; count--) {
 		struct dentry *dentry;
 		struct list_head *tmp;
+		struct rw_semaphore *s_umount;
 
 		cond_resched_lock(&dcache_lock);
 
 		tmp = dentry_unused.prev;
+		if (unlikely(sb)) {
+			/* Try to find a dentry for this sb, but don't try
+			 * too hard, if they aren't near the tail they will
+			 * be moved down again soon
+			 */
+			int skip = count;
+			while (skip && tmp != &dentry_unused &&
+			    list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
+				skip--;
+				tmp = tmp->prev;
+			}
+		}
 		if (tmp == &dentry_unused)
 			break;
 		list_del_init(tmp);
@@ -425,7 +459,45 @@ static void prune_dcache(int count)
  			spin_unlock(&dentry->d_lock);
 			continue;
 		}
-		prune_one_dentry(dentry);
+		/*
+		 * If the dentry is not DCACHED_REFERENCED, it is time
+		 * to remove it from the dcache, provided the super block is
+		 * NULL (which means we are trying to reclaim memory)
+		 * or this dentry belongs to the same super block that
+		 * we want to shrink.
+		 */
+		/*
+		 * If this dentry is for "my" filesystem, then I can prune it
+		 * without taking the s_umount lock (I already hold it).
+		 */
+		if (sb && dentry->d_sb == sb) {
+			prune_one_dentry(dentry);
+			continue;
+		}
+		/*
+		 * ...otherwise we need to be sure this filesystem isn't being
+		 * unmounted, otherwise we could race with
+		 * generic_shutdown_super(), and end up holding a reference to
+		 * an inode while the filesystem is unmounted.
+		 * So we try to get s_umount, and make sure s_root isn't NULL.
+		 * (Take a local copy of s_umount to avoid a use-after-free of
+		 * `dentry').
+		 */
+		s_umount = &dentry->d_sb->s_umount;
+		if (down_read_trylock(s_umount)) {
+			if (dentry->d_sb->s_root != NULL) {
+				prune_one_dentry(dentry);
+				up_read(s_umount);
+				continue;
+			}
+			up_read(s_umount);
+		}
+		spin_unlock(&dentry->d_lock);
+		/* Cannot remove the first dentry, and it isn't appropriate
+		 * to move it to the head of the list, so give up, and try
+		 * later
+		 */
+		break;
 	}
 	spin_unlock(&dcache_lock);
 }
@@ -486,6 +558,7 @@ repeat:
 			continue;
 		}
 		prune_one_dentry(dentry);
+		cond_resched_lock(&dcache_lock);
 		goto repeat;
 	}
 	spin_unlock(&dcache_lock);
@@ -635,7 +708,7 @@ void shrink_dcache_parent(struct dentry 
 	int found;
 
 	while ((found = select_parent(parent)) != 0)
-		prune_dcache(found);
+		prune_dcache(found, parent->d_sb);
 }
 
 /**
@@ -648,9 +721,10 @@ void shrink_dcache_parent(struct dentry 
  * done under dcache_lock.
  *
  */
-void shrink_dcache_anon(struct hlist_head *head)
+void shrink_dcache_anon(struct super_block *sb)
 {
 	struct hlist_node *lp;
+	struct hlist_head *head = &sb->s_anon;
 	int found;
 	do {
 		found = 0;
@@ -673,7 +747,7 @@ void shrink_dcache_anon(struct hlist_hea
 			}
 		}
 		spin_unlock(&dcache_lock);
-		prune_dcache(found);
+		prune_dcache(found, sb);
 	} while(found);
 }
 
@@ -691,12 +765,18 @@ void shrink_dcache_anon(struct hlist_hea
  */
 static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
 {
+	int res = -1;
+
+	KSTAT_PERF_ENTER(shrink_dcache)
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
-			return -1;
-		prune_dcache(nr);
+			goto out;
+		prune_dcache(nr, NULL);
 	}
-	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+	res = (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+out:
+	KSTAT_PERF_LEAVE(shrink_dcache)
+	return res;
 }
 
 /**
@@ -716,19 +796,20 @@ struct dentry *d_alloc(struct dentry * p
 
 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
 	if (!dentry)
-		return NULL;
+		goto err_alloc;
 
 	if (name->len > DNAME_INLINE_LEN-1) {
 		dname = kmalloc(name->len + 1, GFP_KERNEL);
-		if (!dname) {
-			kmem_cache_free(dentry_cache, dentry); 
-			return NULL;
-		}
+		if (!dname)
+			goto err_name;
 	} else  {
 		dname = dentry->d_iname;
 	}	
 	dentry->d_name.name = dname;
 
+	if (ub_dentry_alloc(dentry))
+		goto err_charge;
+
 	dentry->d_name.len = name->len;
 	dentry->d_name.hash = name->hash;
 	memcpy(dname, name->name, name->len);
@@ -759,12 +840,23 @@ struct dentry *d_alloc(struct dentry * p
 	}
 
 	spin_lock(&dcache_lock);
-	if (parent)
+	if (parent) {
 		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+		if (parent->d_flags & DCACHE_VIRTUAL)
+			dentry->d_flags |= DCACHE_VIRTUAL;
+	}
 	dentry_stat.nr_dentry++;
 	spin_unlock(&dcache_lock);
 
 	return dentry;
+
+err_charge:
+	if (name->len > DNAME_INLINE_LEN - 1)
+		kfree(dname);
+err_name:
+	kmem_cache_free(dentry_cache, dentry);
+err_alloc:
+	return NULL;
 }
 
 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
@@ -1048,7 +1140,6 @@ struct dentry * __d_lookup(struct dentry
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
 	struct hlist_head *head = d_hash(parent,hash);
-	struct dentry *found = NULL;
 	struct hlist_node *node;
 	struct dentry *dentry;
 
@@ -1089,7 +1180,7 @@ struct dentry * __d_lookup(struct dentry
 
 		if (!d_unhashed(dentry)) {
 			atomic_inc(&dentry->d_count);
-			found = dentry;
+			goto found;
 		}
 		spin_unlock(&dentry->d_lock);
 		break;
@@ -1098,7 +1189,18 @@ next:
  	}
  	rcu_read_unlock();
 
- 	return found;
+ 	return NULL;
+
+found:
+	/*
+	 * d_lock and rcu_read_lock
+	 * are dropped in ub_dentry_charge()
+	 */
+	if (ub_dentry_charge(dentry)) {
+		dput(dentry);
+		dentry = NULL;
+	}
+	return dentry;
 }
 
 /**
@@ -1345,6 +1447,32 @@ already_unhashed:
 }
 
 /**
+ * __d_path_add_deleted - prepend "(deleted) " text
+ * @end: a pointer to the character after free space at the beginning of the
+ *       buffer
+ * @buflen: remaining free space
+ */
+static inline char * __d_path_add_deleted(char * end, int buflen)
+{
+	buflen -= 10;
+	if (buflen < 0)
+		return ERR_PTR(-ENAMETOOLONG);
+	end -= 10;
+	memcpy(end, "(deleted) ", 10);
+	return end;
+}
+
+/**
+ * d_root_check - checks if dentry is accessible from current's fs root
+ * @dentry: dentry to be verified
+ * @vfsmnt: vfsmnt to which the dentry belongs
+ */
+int d_root_check(struct dentry *dentry, struct vfsmount *vfsmnt)
+{
+	return PTR_ERR(d_path(dentry, vfsmnt, NULL, 0));
+}
+
+/**
  * d_path - return the path of a dentry
  * @dentry: dentry to report
  * @vfsmnt: vfsmnt to which the dentry belongs
@@ -1365,36 +1493,35 @@ static char * __d_path( struct dentry *d
 			char *buffer, int buflen)
 {
 	char * end = buffer+buflen;
-	char * retval;
+	char * retval = NULL;
 	int namelen;
+	int deleted;
+	struct vfsmount *oldvfsmnt;
 
-	*--end = '\0';
-	buflen--;
-	if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
-		buflen -= 10;
-		end -= 10;
-		if (buflen < 0)
+	oldvfsmnt = vfsmnt;
+	deleted = (!IS_ROOT(dentry) && d_unhashed(dentry));
+	if (buffer != NULL) {
+		*--end = '\0';
+		buflen--;
+
+		if (buflen < 1)
 			goto Elong;
-		memcpy(end, " (deleted)", 10);
+		/* Get '/' right */
+		retval = end-1;
+		*retval = '/';
 	}
 
-	if (buflen < 1)
-		goto Elong;
-	/* Get '/' right */
-	retval = end-1;
-	*retval = '/';
-
 	for (;;) {
 		struct dentry * parent;
 
 		if (dentry == root && vfsmnt == rootmnt)
 			break;
 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
-			/* Global root? */
+			/* root of a tree? */
 			spin_lock(&vfsmount_lock);
 			if (vfsmnt->mnt_parent == vfsmnt) {
 				spin_unlock(&vfsmount_lock);
-				goto global_root;
+				goto other_root;
 			}
 			dentry = vfsmnt->mnt_mountpoint;
 			vfsmnt = vfsmnt->mnt_parent;
@@ -1403,27 +1530,51 @@ static char * __d_path( struct dentry *d
 		}
 		parent = dentry->d_parent;
 		prefetch(parent);
+		if (buffer != NULL) {
+			namelen = dentry->d_name.len;
+			buflen -= namelen + 1;
+			if (buflen < 0)
+				goto Elong;
+			end -= namelen;
+			memcpy(end, dentry->d_name.name, namelen);
+			*--end = '/';
+			retval = end;
+		}
+		dentry = parent;
+	}
+	/* the given root point is reached */
+finish:
+	if (buffer != NULL && deleted)
+		retval = __d_path_add_deleted(end, buflen);
+	return retval;
+
+other_root:
+	/*
+	 * We traversed the tree upward and reached a root, but the given
+	 * lookup terminal point wasn't encountered.  It means either that the
+	 * dentry is out of our scope or belongs to an abstract space like
+	 * sock_mnt or pipe_mnt.  Check for it.
+	 *
+	 * There are different options to check it.
+	 * We may assume that any dentry tree is unreachable unless it's
+	 * connected to `root' (defined as fs root of init aka child reaper)
+	 * and expose all paths that are not connected to it.
+	 * The other option is to allow exposing of known abstract spaces
+	 * explicitly and hide the path information for other cases.
+	 * This approach is more safe, let's take it.  2001/04/22  SAW
+	 */
+	if (!(oldvfsmnt->mnt_sb->s_flags & MS_NOUSER))
+		return ERR_PTR(-EINVAL);
+	if (buffer != NULL) {
 		namelen = dentry->d_name.len;
-		buflen -= namelen + 1;
+		buflen -= namelen;
 		if (buflen < 0)
 			goto Elong;
-		end -= namelen;
-		memcpy(end, dentry->d_name.name, namelen);
-		*--end = '/';
-		retval = end;
-		dentry = parent;
+		retval -= namelen-1;	/* hit the slash */
+		memcpy(retval, dentry->d_name.name, namelen);
 	}
+	goto finish;
 
-	return retval;
-
-global_root:
-	namelen = dentry->d_name.len;
-	buflen -= namelen;
-	if (buflen < 0)
-		goto Elong;
-	retval -= namelen-1;	/* hit the slash */
-	memcpy(retval, dentry->d_name.name, namelen);
-	return retval;
 Elong:
 	return ERR_PTR(-ENAMETOOLONG);
 }
@@ -1448,6 +1599,229 @@ char * d_path(struct dentry *dentry, str
 	return res;
 }
 
+#ifdef CONFIG_VE
+#include <net/sock.h>
+#include <linux/ip.h>
+#include <linux/file.h>
+#include <linux/namespace.h>
+#include <linux/vzratelimit.h>
+
+static void mark_sub_tree_virtual(struct dentry *d)
+{
+	struct dentry *orig_root;
+
+	orig_root = d;
+	while (1) {
+		spin_lock(&d->d_lock);
+		d->d_flags |= DCACHE_VIRTUAL;
+		spin_unlock(&d->d_lock);
+
+		if (!list_empty(&d->d_subdirs)) {
+			d = list_entry(d->d_subdirs.next,
+					struct dentry, d_u.d_child);
+			continue;
+		}
+		if (d == orig_root)
+			break;
+		while (d == list_entry(d->d_parent->d_subdirs.prev,
+					struct dentry, d_u.d_child)) {
+			d = d->d_parent;
+			if (d == orig_root)
+				goto out;
+		}
+		d = list_entry(d->d_u.d_child.next,
+				struct dentry, d_u.d_child);
+	}
+out:
+	return;
+}
+
+void mark_tree_virtual(struct vfsmount *m, struct dentry *d)
+{
+	struct vfsmount *orig_rootmnt;
+
+	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
+	orig_rootmnt = m;
+	while (1) {
+		mark_sub_tree_virtual(d);
+		if (!list_empty(&m->mnt_mounts)) {
+			m = list_entry(m->mnt_mounts.next,
+					struct vfsmount, mnt_child);
+			d = m->mnt_root;
+			continue;
+		}
+		if (m == orig_rootmnt)
+			break;
+		while (m == list_entry(m->mnt_parent->mnt_mounts.prev,
+					struct vfsmount, mnt_child)) {
+			m = m->mnt_parent;
+			if (m == orig_rootmnt)
+				goto out;
+		}
+		m = list_entry(m->mnt_child.next,
+				struct vfsmount, mnt_child);
+		d = m->mnt_root;
+	}
+out:
+	spin_unlock(&vfsmount_lock);
+	spin_unlock(&dcache_lock);
+}
+EXPORT_SYMBOL(mark_tree_virtual);
+
+static struct vz_rate_info area_ri = { 20, 10*HZ };
+#define VE_AREA_ACC_CHECK	0x0001
+#define VE_AREA_ACC_DENY	0x0002
+#define VE_AREA_EXEC_CHECK	0x0010
+#define VE_AREA_EXEC_DENY	0x0020
+#define VE0_AREA_ACC_CHECK	0x0100
+#define VE0_AREA_ACC_DENY	0x0200
+#define VE0_AREA_EXEC_CHECK	0x1000
+#define VE0_AREA_EXEC_DENY	0x2000
+int ve_area_access_check = 0;
+
+static void print_connection_info(struct task_struct *tsk)
+{
+	struct files_struct *files;
+	struct fdtable *fdt;
+	int fd;
+
+	files = get_files_struct(tsk);
+	if (!files)
+		return;
+
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	for (fd = 0; fd < fdt->max_fds; fd++) {
+		struct file *file;
+		struct inode *inode;
+		struct socket *socket;
+		struct sock *sk;
+		struct inet_sock *inet;
+
+		file = fdt->fd[fd];
+		if (file == NULL)
+			continue;
+
+		inode = file->f_dentry->d_inode;
+		if (!S_ISSOCK(inode->i_mode))
+			continue;
+
+		socket = SOCKET_I(inode);
+		if (socket == NULL)
+			continue;
+
+		sk = socket->sk;
+		if ((sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+		    || sk->sk_type != SOCK_STREAM)
+			continue;
+
+		inet = inet_sk(sk);
+		printk(KERN_ALERT "connection from %u.%u.%u.%u:%u to port %u\n",
+				NIPQUAD(inet->daddr), ntohs(inet->dport),
+				inet->num);
+	}
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+}
+
+static void check_alert(struct vfsmount *vfsmnt, struct dentry *dentry,
+		char *str)
+{
+	struct task_struct *tsk;
+	unsigned long page;
+	struct super_block *sb;
+	char *p;
+
+	if (!vz_ratelimit(&area_ri))
+		return;
+
+	tsk = current;
+	p = ERR_PTR(-ENOMEM);
+	page = __get_free_page(GFP_KERNEL);
+	if (page) {
+		spin_lock(&dcache_lock);
+		p = __d_path(dentry, vfsmnt, tsk->fs->root, tsk->fs->rootmnt,
+				(char *)page, PAGE_SIZE);
+		spin_unlock(&dcache_lock);
+	}
+	if (IS_ERR(p))
+		p = "(undefined)";
+
+	sb = dentry->d_sb;
+	printk(KERN_ALERT "%s check alert! file:[%s] from %d/%s, dev%x\n"
+			"Task %d/%d[%s] from VE%d, execenv %d\n",
+			str, p,	VE_OWNER_FSTYPE(sb->s_type)->veid,
+			sb->s_type->name, sb->s_dev,
+			tsk->pid, virt_pid(tsk), tsk->comm,
+			VE_TASK_INFO(tsk)->owner_env->veid,
+			get_exec_env()->veid);
+
+	free_page(page);
+
+	print_connection_info(tsk);
+
+	read_lock(&tasklist_lock);
+	tsk = tsk->real_parent;
+	get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+
+	printk(KERN_ALERT "Parent %d/%d[%s] from VE%d\n",
+			tsk->pid, virt_pid(tsk), tsk->comm,
+			VE_TASK_INFO(tsk)->owner_env->veid);
+
+	print_connection_info(tsk);
+	put_task_struct(tsk);
+	dump_stack();
+}
+#endif
+
+int check_area_access_ve(struct dentry *dentry, struct vfsmount *mnt)
+{
+#ifdef CONFIG_VE
+	int check, alert, deny;
+
+	if (ve_is_super(get_exec_env())) {
+		check = ve_area_access_check & VE0_AREA_ACC_CHECK;
+		alert = dentry->d_flags & DCACHE_VIRTUAL;
+		deny = ve_area_access_check & VE0_AREA_ACC_DENY;
+	} else {
+		check = ve_area_access_check & VE_AREA_ACC_CHECK;
+		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
+		deny = ve_area_access_check & VE_AREA_ACC_DENY;
+	}
+
+	if (check && alert)
+		check_alert(mnt, dentry, "Access");
+	if (deny && alert)
+		return -EACCES;
+#endif
+	return 0;
+}
+
+int check_area_execute_ve(struct dentry *dentry, struct vfsmount *mnt)
+{
+#ifdef CONFIG_VE
+	int check, alert, deny;
+
+	if (ve_is_super(get_exec_env())) {
+		check = ve_area_access_check & VE0_AREA_EXEC_CHECK;
+		alert = dentry->d_flags & DCACHE_VIRTUAL;
+		deny = ve_area_access_check & VE0_AREA_EXEC_DENY;
+	} else {
+		check = ve_area_access_check & VE_AREA_EXEC_CHECK;
+		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
+		deny = ve_area_access_check & VE_AREA_EXEC_DENY;
+	}
+
+	if (check && alert)
+		check_alert(mnt, dentry, "Exec");
+	if (deny && alert)
+		return -EACCES;
+#endif
+	return 0;
+}
+
 /*
  * NOTE! The user-level library version returns a
  * character pointer. The kernel system call just
@@ -1584,10 +1958,12 @@ resume:
 			goto repeat;
 		}
 		atomic_dec(&dentry->d_count);
+		ub_dentry_uncharge(dentry);
 	}
 	if (this_parent != root) {
 		next = this_parent->d_u.d_child.next;
 		atomic_dec(&this_parent->d_count);
+		ub_dentry_uncharge(this_parent);
 		this_parent = this_parent->d_parent;
 		goto resume;
 	}
@@ -1736,7 +2112,8 @@ void __init vfs_caches_init(unsigned lon
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_UBC,
+			NULL, NULL);
 
 	dcache_init(mempages);
 	inode_init(mempages);
diff -uprN linux-2.6.16/fs/devpts/inode.c linux-2.6.16.ovz/fs/devpts/inode.c
--- linux-2.6.16/fs/devpts/inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/devpts/inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/ve.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/namei.h>
@@ -21,16 +22,17 @@
 
 #define DEVPTS_SUPER_MAGIC 0x1cd1
 
+struct devpts_config devpts_config = {.mode = 0600};
+
+#ifndef CONFIG_VE
 static struct vfsmount *devpts_mnt;
 static struct dentry *devpts_root;
-
-static struct {
-	int setuid;
-	int setgid;
-	uid_t   uid;
-	gid_t   gid;
-	umode_t mode;
-} config = {.mode = 0600};
+#define config	devpts_config
+#else
+#define devpts_mnt	(get_exec_env()->devpts_mnt)
+#define devpts_root	(get_exec_env()->devpts_root)
+#define config		(*(get_exec_env()->devpts_config))
+#endif
 
 static int devpts_remount(struct super_block *sb, int *flags, char *data)
 {
@@ -56,7 +58,8 @@ static int devpts_remount(struct super_b
 		} else if (sscanf(this_char, "mode=%o%c", &n, &dummy) == 1)
 			mode = n & ~S_IFMT;
 		else {
-			printk("devpts: called with bogus options\n");
+			ve_printk(VE_LOG,
+					"devpts: called with bogus options\n");
 			return -EINVAL;
 		}
 	}
@@ -114,13 +117,15 @@ static struct super_block *devpts_get_sb
 	return get_sb_single(fs_type, flags, data, devpts_fill_super);
 }
 
-static struct file_system_type devpts_fs_type = {
+struct file_system_type devpts_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "devpts",
 	.get_sb		= devpts_get_sb,
 	.kill_sb	= kill_anon_super,
 };
 
+EXPORT_SYMBOL(devpts_fs_type);
+
 /*
  * The normal naming convention is simply /dev/pts/<number>; this conforms
  * to the System V naming convention
@@ -212,6 +217,7 @@ static int __init init_devpts_fs(void)
 
 static void __exit exit_devpts_fs(void)
 {
+	/* the code is never called, the argument is irrelevant */
 	unregister_filesystem(&devpts_fs_type);
 	mntput(devpts_mnt);
 }
diff -uprN linux-2.6.16/fs/eventpoll.c linux-2.6.16.ovz/fs/eventpoll.c
--- linux-2.6.16/fs/eventpoll.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/eventpoll.c	2006-07-05 08:34:56.000000000 -0400
@@ -105,11 +105,6 @@
 #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
 
 
-struct epoll_filefd {
-	struct file *file;
-	int fd;
-};
-
 /*
  * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
  * It is used to keep track on all tasks that are currently inside the wake_up() code
@@ -132,36 +127,6 @@ struct poll_safewake {
 	spinlock_t lock;
 };
 
-/*
- * This structure is stored inside the "private_data" member of the file
- * structure and rapresent the main data sructure for the eventpoll
- * interface.
- */
-struct eventpoll {
-	/* Protect the this structure access */
-	rwlock_t lock;
-
-	/*
-	 * This semaphore is used to ensure that files are not removed
-	 * while epoll is using them. This is read-held during the event
-	 * collection loop and it is write-held during the file cleanup
-	 * path, the epoll file exit code and the ctl operations.
-	 */
-	struct rw_semaphore sem;
-
-	/* Wait queue used by sys_epoll_wait() */
-	wait_queue_head_t wq;
-
-	/* Wait queue used by file->poll() */
-	wait_queue_head_t poll_wait;
-
-	/* List of ready file descriptors */
-	struct list_head rdllist;
-
-	/* RB-Tree root used to store monitored fd structs */
-	struct rb_root rbr;
-};
-
 /* Wait structure used by the poll hooks */
 struct eppoll_entry {
 	/* List header used to link this structure to the "struct epitem" */
@@ -180,51 +145,6 @@ struct eppoll_entry {
 	wait_queue_head_t *whead;
 };
 
-/*
- * Each file descriptor added to the eventpoll interface will
- * have an entry of this type linked to the hash.
- */
-struct epitem {
-	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
-	struct rb_node rbn;
-
-	/* List header used to link this structure to the eventpoll ready list */
-	struct list_head rdllink;
-
-	/* The file descriptor information this item refers to */
-	struct epoll_filefd ffd;
-
-	/* Number of active wait queue attached to poll operations */
-	int nwait;
-
-	/* List containing poll wait queues */
-	struct list_head pwqlist;
-
-	/* The "container" of this item */
-	struct eventpoll *ep;
-
-	/* The structure that describe the interested events and the source fd */
-	struct epoll_event event;
-
-	/*
-	 * Used to keep track of the usage count of the structure. This avoids
-	 * that the structure will desappear from underneath our processing.
-	 */
-	atomic_t usecnt;
-
-	/* List header used to link this item to the "struct file" items list */
-	struct list_head fllink;
-
-	/* List header used to link the item to the transfer list */
-	struct list_head txlink;
-
-	/*
-	 * This is used during the collection/transfer of events to userspace
-	 * to pin items empty events set.
-	 */
-	unsigned int revents;
-};
-
 /* Wrapper struct used by poll queueing */
 struct ep_pqueue {
 	poll_table pt;
@@ -239,14 +159,10 @@ static int ep_getfd(int *efd, struct ino
 		    struct eventpoll *ep);
 static int ep_alloc(struct eventpoll **pep);
 static void ep_free(struct eventpoll *ep);
-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
 static void ep_use_epitem(struct epitem *epi);
-static void ep_release_epitem(struct epitem *epi);
 static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
 				 poll_table *pt);
 static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi);
-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
-		     struct file *tfile, int fd);
 static int ep_modify(struct eventpoll *ep, struct epitem *epi,
 		     struct epoll_event *event);
 static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi);
@@ -274,7 +190,8 @@ static struct super_block *eventpollfs_g
 /*
  * This semaphore is used to serialize ep_free() and eventpoll_release_file().
  */
-static struct semaphore epsem;
+struct semaphore epsem;
+EXPORT_SYMBOL_GPL(epsem);
 
 /* Safe wake up implementation */
 static struct poll_safewake psw;
@@ -289,10 +206,11 @@ static kmem_cache_t *pwq_cache;
 static struct vfsmount *eventpoll_mnt;
 
 /* File callbacks that implement the eventpoll file behaviour */
-static struct file_operations eventpoll_fops = {
+struct file_operations eventpoll_fops = {
 	.release	= ep_eventpoll_close,
 	.poll		= ep_eventpoll_poll
 };
+EXPORT_SYMBOL_GPL(eventpoll_fops);
 
 /*
  * This is used to register the virtual file system from where
@@ -542,7 +460,7 @@ eexit_1:
 		     current, size, error));
 	return error;
 }
-
+EXPORT_SYMBOL_GPL(sys_epoll_create);
 
 /*
  * The following function implements the controller interface for
@@ -852,7 +770,7 @@ static void ep_free(struct eventpoll *ep
  * the returned item, so the caller must call ep_release_epitem()
  * after finished using the "struct epitem".
  */
-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
+struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 {
 	int kcmp;
 	unsigned long flags;
@@ -882,6 +800,7 @@ static struct epitem *ep_find(struct eve
 
 	return epir;
 }
+EXPORT_SYMBOL_GPL(ep_find);
 
 
 /*
@@ -900,13 +819,13 @@ static void ep_use_epitem(struct epitem 
  * has finished using the structure. It might lead to freeing the
  * structure itself if the count goes to zero.
  */
-static void ep_release_epitem(struct epitem *epi)
+void ep_release_epitem(struct epitem *epi)
 {
 
 	if (atomic_dec_and_test(&epi->usecnt))
 		kmem_cache_free(epi_cache, epi);
 }
-
+EXPORT_SYMBOL_GPL(ep_release_epitem);
 
 /*
  * This is the callback that is used to add our wait queue to the
@@ -952,7 +871,7 @@ static void ep_rbtree_insert(struct even
 }
 
 
-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 		     struct file *tfile, int fd)
 {
 	int error, revents, pwake = 0;
@@ -1044,6 +963,7 @@ eexit_2:
 eexit_1:
 	return error;
 }
+EXPORT_SYMBOL_GPL(ep_insert);
 
 
 /*
diff -uprN linux-2.6.16/fs/exec.c linux-2.6.16.ovz/fs/exec.c
--- linux-2.6.16/fs/exec.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/exec.c	2006-07-05 08:34:56.000000000 -0400
@@ -53,6 +53,8 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 
+#include <ub/ub_vmpages.h>
+
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
@@ -64,6 +66,8 @@ int suid_dumpable = 0;
 EXPORT_SYMBOL(suid_dumpable);
 /* The maximal length of core_pattern is also specified in sysctl.c */
 
+int sysctl_at_vsyscall;
+
 static struct linux_binfmt *formats;
 static DEFINE_RWLOCK(binfmt_lock);
 
@@ -135,7 +139,7 @@ asmlinkage long sys_uselib(const char __
 	if (!S_ISREG(nd.dentry->d_inode->i_mode))
 		goto exit;
 
-	error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
+	error = vfs_permission(&nd, MAY_READ | MAY_EXEC, NULL);
 	if (error)
 		goto exit;
 
@@ -308,6 +312,10 @@ void install_arg_page(struct vm_area_str
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t * pte;
 	spinlock_t *ptl;
+	struct page_beancounter *pb;
+
+	if (unlikely(pb_alloc(&pb)))
+		goto out_nopb;
 
 	if (unlikely(anon_vma_prepare(vma)))
 		goto out;
@@ -321,15 +329,21 @@ void install_arg_page(struct vm_area_str
 		goto out;
 	}
 	inc_mm_counter(mm, anon_rss);
+	inc_vma_rss(vma);
 	lru_cache_add_active(page);
 	set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
 					page, vma->vm_page_prot))));
+	pb_add_ref(page, mm, &pb);
+	ub_unused_privvm_dec(mm, vma);
+	pb_free(&pb);
 	page_add_new_anon_rmap(page, vma, address);
 	pte_unmap_unlock(pte, ptl);
 
 	/* no need for flush_tlb */
 	return;
 out:
+	pb_free(&pb);
+out_nopb:
 	__free_page(page);
 	force_sig(SIGKILL, current);
 }
@@ -404,9 +418,13 @@ int setup_arg_pages(struct linux_binprm 
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
-	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (ub_memory_charge(mm, arg_size, VM_STACK_FLAGS | mm->def_flags,
+				NULL, UB_SOFT))
+		goto fail_charge;
+
+	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL | __GFP_SOFT_UBC);
 	if (!mpnt)
-		return -ENOMEM;
+		goto fail_alloc;
 
 	memset(mpnt, 0, sizeof(*mpnt));
 
@@ -450,6 +468,11 @@ int setup_arg_pages(struct linux_binprm 
 	up_write(&mm->mmap_sem);
 	
 	return 0;
+
+fail_alloc:
+	ub_memory_uncharge(mm, arg_size, VM_STACK_FLAGS | mm->def_flags, NULL);
+fail_charge:
+	return -ENOMEM;
 }
 
 EXPORT_SYMBOL(setup_arg_pages);
@@ -471,7 +494,7 @@ static inline void free_arg_pages(struct
 
 #endif /* CONFIG_MMU */
 
-struct file *open_exec(const char *name)
+struct file *open_exec(const char *name, struct linux_binprm *bprm)
 {
 	struct nameidata nd;
 	int err;
@@ -485,9 +508,16 @@ struct file *open_exec(const char *name)
 		file = ERR_PTR(-EACCES);
 		if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
 		    S_ISREG(inode->i_mode)) {
-			int err = vfs_permission(&nd, MAY_EXEC);
-			if (!err && !(inode->i_mode & 0111))
-				err = -EACCES;
+			int err;
+			struct exec_perm *perm;
+
+			if (bprm != NULL) {
+				perm = &bprm->perm;
+				perm->set = 0;
+			} else
+				perm = NULL;
+
+			err = vfs_permission(&nd, MAY_EXEC, perm);
 			file = ERR_PTR(err);
 			if (!err) {
 				file = nameidata_to_filp(&nd, O_RDONLY);
@@ -657,7 +687,7 @@ static int de_thread(struct task_struct 
 	 */
 	if (!thread_group_leader(current)) {
 		struct task_struct *parent;
-		struct dentry *proc_dentry1, *proc_dentry2;
+		struct dentry *proc_dentry1[2], *proc_dentry2[2];
 		unsigned long ptrace;
 
 		/*
@@ -671,8 +701,8 @@ static int de_thread(struct task_struct 
 
 		spin_lock(&leader->proc_lock);
 		spin_lock(&current->proc_lock);
-		proc_dentry1 = proc_pid_unhash(current);
-		proc_dentry2 = proc_pid_unhash(leader);
+		proc_pid_unhash(current, proc_dentry1);
+		proc_pid_unhash(leader, proc_dentry2);
 		write_lock_irq(&tasklist_lock);
 
 		BUG_ON(leader->tgid != current->tgid);
@@ -829,7 +859,7 @@ int flush_old_exec(struct linux_binprm *
 {
 	char * name;
 	int i, ch, retval;
-	struct files_struct *files;
+	struct files_struct *files, *old;
 	char tcomm[sizeof(current->comm)];
 
 	/*
@@ -897,6 +927,7 @@ int flush_old_exec(struct linux_binprm *
 		suid_keys(current);
 		current->mm->dumpable = suid_dumpable;
 	}
+	current->mm->vps_dumpable = 1;
 
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
@@ -909,8 +940,11 @@ int flush_old_exec(struct linux_binprm *
 	return 0;
 
 mmap_failed:
-	put_files_struct(current->files);
+	old = current->files;
+	task_lock(current);
 	current->files = files;
+	task_unlock(current);
+	put_files_struct(old);
 out:
 	return retval;
 }
@@ -927,13 +961,6 @@ int prepare_binprm(struct linux_binprm *
 	struct inode * inode = bprm->file->f_dentry->d_inode;
 	int retval;
 
-	mode = inode->i_mode;
-	/*
-	 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
-	 * generic_permission lets a non-executable through
-	 */
-	if (!(mode & 0111))	/* with at least _one_ execute bit set */
-		return -EACCES;
 	if (bprm->file->f_op == NULL)
 		return -EACCES;
 
@@ -941,10 +968,24 @@ int prepare_binprm(struct linux_binprm *
 	bprm->e_gid = current->egid;
 
 	if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
+		if (!bprm->perm.set) {
+			/*
+			 * This piece of code creates a time window between
+			 * MAY_EXEC permission check and setuid/setgid
+			 * operations and may be considered as a security hole.
+			 * This code is here for compatibility reasons,
+			 * if the filesystem is unable to return info now.
+			 */
+			bprm->perm.mode = inode->i_mode;
+			bprm->perm.uid = inode->i_uid;
+			bprm->perm.gid = inode->i_gid;
+		}
+		mode = bprm->perm.mode;
+
 		/* Set-uid? */
 		if (mode & S_ISUID) {
 			current->personality &= ~PER_CLEAR_ON_SETID;
-			bprm->e_uid = inode->i_uid;
+			bprm->e_uid = bprm->perm.uid;
 		}
 
 		/* Set-gid? */
@@ -955,7 +996,7 @@ int prepare_binprm(struct linux_binprm *
 		 */
 		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
 			current->personality &= ~PER_CLEAR_ON_SETID;
-			bprm->e_gid = inode->i_gid;
+			bprm->e_gid = bprm->perm.gid;
 		}
 	}
 
@@ -1054,7 +1095,7 @@ int search_binary_handler(struct linux_b
 
 	        loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
 
-		file = open_exec("/sbin/loader");
+		file = open_exec("/sbin/loader", bprm);
 		retval = PTR_ERR(file);
 		if (IS_ERR(file))
 			return retval;
@@ -1148,7 +1189,7 @@ int do_execve(char * filename,
 		goto out_ret;
 	memset(bprm, 0, sizeof(*bprm));
 
-	file = open_exec(filename);
+	file = open_exec(filename, bprm);
 	retval = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out_kfree;
@@ -1288,7 +1329,7 @@ static void format_corename(char *corena
 			case 'p':
 				pid_in_pattern = 1;
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", current->tgid);
+					      "%d", virt_tgid(current));
 				if (rc > out_end - out_ptr)
 					goto out;
 				out_ptr += rc;
@@ -1332,7 +1373,7 @@ static void format_corename(char *corena
 			case 'h':
 				down_read(&uts_sem);
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%s", system_utsname.nodename);
+					      "%s", ve_utsname.nodename);
 				up_read(&uts_sem);
 				if (rc > out_end - out_ptr)
 					goto out;
@@ -1360,7 +1401,7 @@ static void format_corename(char *corena
 	if (!pid_in_pattern
             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
 		rc = snprintf(out_ptr, out_end - out_ptr,
-			      ".%d", current->tgid);
+			      ".%d", virt_tgid(current));
 		if (rc > out_end - out_ptr)
 			goto out;
 		out_ptr += rc;
@@ -1386,7 +1427,7 @@ static void zap_threads (struct mm_struc
 	}
 
 	read_lock(&tasklist_lock);
-	do_each_thread(g,p)
+	do_each_thread_ve(g,p)
 		if (mm == p->mm && p != tsk) {
 			force_sig_specific(SIGKILL, p);
 			mm->core_waiters++;
@@ -1394,7 +1435,7 @@ static void zap_threads (struct mm_struc
 			    unlikely(p->parent->mm == mm))
 				traced = 1;
 		}
-	while_each_thread(g,p);
+	while_each_thread_ve(g,p);
 
 	read_unlock(&tasklist_lock);
 
@@ -1406,12 +1447,12 @@ static void zap_threads (struct mm_struc
 		 * coredump to finish.  Detach them so they can both die.
 		 */
 		write_lock_irq(&tasklist_lock);
-		do_each_thread(g,p) {
+		do_each_thread_ve(g,p) {
 			if (mm == p->mm && p != tsk &&
 			    p->ptrace && p->parent->mm == mm) {
 				__ptrace_detach(p, 0);
 			}
-		} while_each_thread(g,p);
+		} while_each_thread_ve(g,p);
 		write_unlock_irq(&tasklist_lock);
 	}
 }
@@ -1447,7 +1488,8 @@ int do_coredump(long signr, int exit_cod
 	if (!binfmt || !binfmt->core_dump)
 		goto fail;
 	down_write(&mm->mmap_sem);
-	if (!mm->dumpable) {
+	if (!mm->dumpable ||
+	   (!mm->vps_dumpable && !ve_is_super(get_exec_env()))) {
 		up_write(&mm->mmap_sem);
 		goto fail;
 	}
diff -uprN linux-2.6.16/fs/ext2/acl.c linux-2.6.16.ovz/fs/ext2/acl.c
--- linux-2.6.16/fs/ext2/acl.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ext2/acl.c	2006-07-05 08:34:56.000000000 -0400
@@ -294,9 +294,10 @@ ext2_check_acl(struct inode *inode, int 
 }
 
 int
-ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext2_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
-	return generic_permission(inode, mask, ext2_check_acl);
+	return generic_permission(inode, mask, ext2_check_acl, perm);
 }
 
 /*
diff -uprN linux-2.6.16/fs/ext2/acl.h linux-2.6.16.ovz/fs/ext2/acl.h
--- linux-2.6.16/fs/ext2/acl.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ext2/acl.h	2006-07-05 08:34:56.000000000 -0400
@@ -58,7 +58,8 @@ static inline int ext2_acl_count(size_t 
 #define EXT2_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext2_permission (struct inode *, int, struct nameidata *);
+extern int ext2_permission (struct inode *, int, struct nameidata *,
+		struct exec_perm *);
 extern int ext2_acl_chmod (struct inode *);
 extern int ext2_init_acl (struct inode *, struct inode *);
 
diff -uprN linux-2.6.16/fs/ext2/namei.c linux-2.6.16.ovz/fs/ext2/namei.c
--- linux-2.6.16/fs/ext2/namei.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ext2/namei.c	2006-07-05 08:34:56.000000000 -0400
@@ -31,6 +31,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -273,6 +274,8 @@ static int ext2_unlink(struct inode * di
 	struct page * page;
 	int err = -ENOENT;
 
+	DQUOT_INIT(inode);
+
 	de = ext2_find_entry (dir, dentry, &page);
 	if (!de)
 		goto out;
@@ -315,6 +318,9 @@ static int ext2_rename (struct inode * o
 	struct ext2_dir_entry_2 * old_de;
 	int err = -ENOENT;
 
+	if (new_inode)
+		DQUOT_INIT(new_inode);
+
 	old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
 	if (!old_de)
 		goto out;
diff -uprN linux-2.6.16/fs/ext2/super.c linux-2.6.16.ovz/fs/ext2/super.c
--- linux-2.6.16/fs/ext2/super.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ext2/super.c	2006-07-05 08:34:56.000000000 -0400
@@ -996,7 +996,7 @@ static int ext2_remount (struct super_bl
 	es = sbi->s_es;
 	if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
 	    (old_mount_opt & EXT2_MOUNT_XIP)) &&
-	    invalidate_inodes(sb))
+	    invalidate_inodes(sb, 0))
 		ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
 			     "xip remain in cache (no functional problem)");
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
@@ -1205,7 +1205,7 @@ static struct file_system_type ext2_fs_t
 	.name		= "ext2",
 	.get_sb		= ext2_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_VIRTUALIZED,
 };
 
 static int __init init_ext2_fs(void)
diff -uprN linux-2.6.16/fs/ext3/acl.c linux-2.6.16.ovz/fs/ext3/acl.c
--- linux-2.6.16/fs/ext3/acl.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ext3/acl.c	2006-07-05 08:34:56.000000000 -0400
@@ -299,9 +299,10 @@ ext3_check_acl(struct inode *inode, int 
 }
 
 int
-ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext3_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
-	return generic_permission(inode, mask, ext3_check_acl);
+	return generic_permission(inode, mask, ext3_check_acl, perm);
 }
 
 /*
diff -uprN linux-2.6.16/fs/ext3/acl.h linux-2.6.16.ovz/fs/ext3/acl.h
--- linux-2.6.16/fs/ext3/acl.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ext3/acl.h	2006-07-05 08:34:56.000000000 -0400
@@ -58,7 +58,8 @@ static inline int ext3_acl_count(size_t 
 #define EXT3_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext3_permission (struct inode *, int, struct nameidata *);
+extern int ext3_permission (struct inode *, int, struct nameidata *,
+		struct exec_perm *);
 extern int ext3_acl_chmod (struct inode *);
 extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
 
diff -uprN linux-2.6.16/fs/ext3/inode.c linux-2.6.16.ovz/fs/ext3/inode.c
--- linux-2.6.16/fs/ext3/inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ext3/inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -771,6 +771,7 @@ ext3_get_block_handle(handle_t *handle, 
 
 	set_buffer_new(bh_result);
 got_it:
+	clear_buffer_delay(bh_result);
 	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
 	if (boundary)
 		set_buffer_boundary(bh_result);
@@ -964,11 +965,13 @@ static int walk_page_buffers(	handle_t *
  * and the commit_write().  So doing the journal_start at the start of
  * prepare_write() is the right place.
  *
- * Also, this function can nest inside ext3_writepage() ->
- * block_write_full_page(). In that case, we *know* that ext3_writepage()
- * has generated enough buffer credits to do the whole page.  So we won't
- * block on the journal in that case, which is good, because the caller may
- * be PF_MEMALLOC.
+ * [2004/09/04 SAW] journal_start() in prepare_write() causes different ranking
+ * violations if copy_from_user() triggers a page fault (mmap_sem, may be page
+ * lock, plus __GFP_FS allocations).
+ * Now we read in not up-to-date buffers in prepare_write(), and do the rest
+ * including hole instantiation and inode extension in commit_write().
+ *
+ * Other notes.
  *
  * By accident, ext3 can be reentered when a transaction is open via
  * quota file writes.  If we were to commit the transaction while thus
@@ -983,6 +986,27 @@ static int walk_page_buffers(	handle_t *
  * write.  
  */
 
+static int ext3_get_block_delay(struct inode *inode, sector_t iblock,
+			struct buffer_head *bh, int create)
+{
+	int ret;
+
+	ret = ext3_get_block_handle(NULL, inode, iblock, bh, 0, 0);
+	if (ret)
+		return ret;
+	if (!buffer_mapped(bh)) {
+		set_buffer_delay(bh);
+		set_buffer_new(bh);
+	}
+	return ret;
+}
+
+static int ext3_prepare_write(struct file *file, struct page *page,
+		unsigned from, unsigned to)
+{
+	return block_prepare_write(page, from, to, ext3_get_block_delay);
+}
+
 static int do_journal_get_write_access(handle_t *handle, 
 				       struct buffer_head *bh)
 {
@@ -991,8 +1015,52 @@ static int do_journal_get_write_access(h
 	return ext3_journal_get_write_access(handle, bh);
 }
 
-static int ext3_prepare_write(struct file *file, struct page *page,
-			      unsigned from, unsigned to)
+/*
+ * This function zeroes buffers not mapped to disk.
+ * We do it similarly to the error path in __block_prepare_write() to avoid
+ * keeping garbage in the page cache.
+ * Here we check BH_delay state.  We know that if the buffer appears
+ * !buffer_mapped then
+ *   - it was !buffer_mapped at the moment of ext3_prepare_write, and
+ *   - ext3_get_block failed to map this buffer (e.g., ENOSPC).
+ * If this !mapped buffer is not up to date (it can be up to date if
+ * PageUptodate), then we zero its content.
+ */
+static void ext3_clear_delayed_buffers(struct page *page,
+		unsigned from, unsigned to)
+{
+	struct buffer_head *bh, *head, *next;
+	unsigned block_start, block_end;
+	unsigned blocksize;
+	void *kaddr;
+
+	head = page_buffers(page);
+	blocksize = head->b_size;
+	for (	bh = head, block_start = 0;
+		bh != head || !block_start;
+	    	block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from || block_start >= to)
+			continue;
+		if (!buffer_delay(bh))
+			continue;
+		J_ASSERT_BH(bh, !buffer_mapped(bh));
+		clear_buffer_new(bh);
+		clear_buffer_delay(bh);
+		if (!buffer_uptodate(bh)) {
+			kaddr = kmap_atomic(page, KM_USER0);
+			memset(kaddr + block_start, 0, bh->b_size);
+			kunmap_atomic(kaddr, KM_USER0);
+			set_buffer_uptodate(bh);
+			mark_buffer_dirty(bh);
+		}
+	}
+}
+
+static int ext3_map_write(struct file *file, struct page *page,
+		unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
 	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
@@ -1009,18 +1077,17 @@ retry:
 		ret = nobh_prepare_write(page, from, to, ext3_get_block);
 	else
 		ret = block_prepare_write(page, from, to, ext3_get_block);
-	if (ret)
-		goto prepare_write_failed;
-
-	if (ext3_should_journal_data(inode)) {
+	if (!ret && ext3_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
 	}
-prepare_write_failed:
-	if (ret)
-		ext3_journal_stop(handle);
+	if (!ret)
+		goto out;
+
+	ext3_journal_stop(handle);
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
+	ext3_clear_delayed_buffers(page, from, to);
 out:
 	return ret;
 }
@@ -1055,10 +1122,15 @@ static int commit_write_fn(handle_t *han
 static int ext3_ordered_commit_write(struct file *file, struct page *page,
 			     unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle;
 	struct inode *inode = page->mapping->host;
 	int ret = 0, ret2;
 
+	ret = ext3_map_write(file, page, from, to);
+	if (ret)
+		return ret;
+	handle = ext3_journal_current_handle();
+
 	ret = walk_page_buffers(handle, page_buffers(page),
 		from, to, NULL, ext3_journal_dirty_data);
 
@@ -1084,11 +1156,15 @@ static int ext3_ordered_commit_write(str
 static int ext3_writeback_commit_write(struct file *file, struct page *page,
 			     unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle;
 	struct inode *inode = page->mapping->host;
 	int ret = 0, ret2;
 	loff_t new_i_size;
 
+	ret = ext3_map_write(file, page, from, to);
+	if (ret)
+		return ret;
+	handle = ext3_journal_current_handle();
 	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 	if (new_i_size > EXT3_I(inode)->i_disksize)
 		EXT3_I(inode)->i_disksize = new_i_size;
@@ -1107,12 +1183,17 @@ static int ext3_writeback_commit_write(s
 static int ext3_journalled_commit_write(struct file *file,
 			struct page *page, unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle;
 	struct inode *inode = page->mapping->host;
 	int ret = 0, ret2;
 	int partial = 0;
 	loff_t pos;
 
+	ret = ext3_map_write(file, page, from, to);
+	if (ret)
+		return ret;
+	handle = ext3_journal_current_handle();
+
 	/*
 	 * Here we duplicate the generic_commit_write() functionality
 	 */
diff -uprN linux-2.6.16/fs/ext3/ioctl.c linux-2.6.16.ovz/fs/ext3/ioctl.c
--- linux-2.6.16/fs/ext3/ioctl.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ext3/ioctl.c	2006-07-05 08:34:56.000000000 -0400
@@ -69,7 +69,7 @@ int ext3_ioctl (struct inode * inode, st
 		 * the relevant capability.
 		 */
 		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
-			if (!capable(CAP_SYS_RESOURCE))
+			if (!capable(CAP_SYS_ADMIN))
 				return -EPERM;
 		}
 
diff -uprN linux-2.6.16/fs/ext3/resize.c linux-2.6.16.ovz/fs/ext3/resize.c
--- linux-2.6.16/fs/ext3/resize.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ext3/resize.c	2006-07-05 08:34:56.000000000 -0400
@@ -974,6 +974,7 @@ int ext3_group_extend(struct super_block
 	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
 		ext3_warning(sb, __FUNCTION__,
 			     "multiple resizers run on filesystem!");
+		unlock_super(sb);
 		err = -EBUSY;
 		goto exit_put;
 	}
diff -uprN linux-2.6.16/fs/ext3/super.c linux-2.6.16.ovz/fs/ext3/super.c
--- linux-2.6.16/fs/ext3/super.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ext3/super.c	2006-07-05 08:34:56.000000000 -0400
@@ -2661,7 +2661,7 @@ static struct file_system_type ext3_fs_t
 	.name		= "ext3",
 	.get_sb		= ext3_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_VIRTUALIZED,
 };
 
 static int __init init_ext3_fs(void)
diff -uprN linux-2.6.16/fs/fcntl.c linux-2.6.16.ovz/fs/fcntl.c
--- linux-2.6.16/fs/fcntl.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/fcntl.c	2006-07-05 08:34:56.000000000 -0400
@@ -18,6 +18,7 @@
 #include <linux/ptrace.h>
 #include <linux/signal.h>
 #include <linux/rcupdate.h>
+#include <linux/ve_owner.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -190,6 +191,7 @@ out_fput:
 	fput(file);
 	goto out;
 }
+EXPORT_SYMBOL_GPL(sys_dup2);
 
 asmlinkage long sys_dup(unsigned int fildes)
 {
@@ -254,6 +256,7 @@ static int setfl(int fd, struct file * f
 static void f_modown(struct file *filp, unsigned long pid,
                      uid_t uid, uid_t euid, int force)
 {
+	pid = comb_vpid_to_pid(pid);
 	write_lock_irq(&filp->f_owner.lock);
 	if (force || !filp->f_owner.pid) {
 		filp->f_owner.pid = pid;
@@ -320,7 +323,7 @@ static long do_fcntl(int fd, unsigned in
 		 * current syscall conventions, the only way
 		 * to fix this will be in libc.
 		 */
-		err = filp->f_owner.pid;
+		err = comb_pid_to_vpid(filp->f_owner.pid);
 		force_successful_syscall_return();
 		break;
 	case F_SETOWN:
@@ -472,23 +475,29 @@ static void send_sigio_to_task(struct ta
 void send_sigio(struct fown_struct *fown, int fd, int band)
 {
 	struct task_struct *p;
+	struct file *f;
+	struct ve_struct *ve;
 	int pid;
 	
 	read_lock(&fown->lock);
 	pid = fown->pid;
 	if (!pid)
 		goto out_unlock_fown;
+
+	/* hack: fown's are always embedded in struct file */
+	f = container_of(fown, struct file, f_owner);
+	ve = VE_OWNER_FILP(f);
 	
 	read_lock(&tasklist_lock);
 	if (pid > 0) {
-		p = find_task_by_pid(pid);
-		if (p) {
+		p = find_task_by_pid_all(pid);
+		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
 			send_sigio_to_task(p, fown, fd, band);
 		}
 	} else {
-		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
+		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
 			send_sigio_to_task(p, fown, fd, band);
-		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
+		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
 	}
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
@@ -505,6 +514,8 @@ static void send_sigurg_to_task(struct t
 int send_sigurg(struct fown_struct *fown)
 {
 	struct task_struct *p;
+	struct file *f;
+	struct ve_struct *ve;
 	int pid, ret = 0;
 	
 	read_lock(&fown->lock);
@@ -513,17 +524,19 @@ int send_sigurg(struct fown_struct *fown
 		goto out_unlock_fown;
 
 	ret = 1;
+	f = container_of(fown, struct file, f_owner);
+	ve = VE_OWNER_FILP(f);
 	
 	read_lock(&tasklist_lock);
 	if (pid > 0) {
-		p = find_task_by_pid(pid);
-		if (p) {
+		p = find_task_by_pid_all(pid);
+		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
 			send_sigurg_to_task(p, fown);
 		}
 	} else {
-		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
+		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
 			send_sigurg_to_task(p, fown);
-		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
+		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
 	}
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
diff -uprN linux-2.6.16/fs/file.c linux-2.6.16.ovz/fs/file.c
--- linux-2.6.16/fs/file.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/file.c	2006-07-05 08:34:56.000000000 -0400
@@ -8,6 +8,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/time.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -18,6 +19,8 @@
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
 
+#include <ub/ub_mem.h>
+
 struct fdtable_defer {
 	spinlock_t lock;
 	struct work_struct wq;
@@ -44,9 +47,9 @@ struct file ** alloc_fd_array(int num)
 	int size = num * sizeof(struct file *);
 
 	if (size <= PAGE_SIZE)
-		new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
+		new_fds = (struct file **) ub_kmalloc(size, GFP_KERNEL);
 	else 
-		new_fds = (struct file **) vmalloc(size);
+		new_fds = (struct file **) ub_vmalloc(size);
 	return new_fds;
 }
 
@@ -212,9 +215,9 @@ fd_set * alloc_fdset(int num)
 	int size = num / 8;
 
 	if (size <= PAGE_SIZE)
-		new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
+		new_fdset = (fd_set *) ub_kmalloc(size, GFP_KERNEL);
 	else
-		new_fdset = (fd_set *) vmalloc(size);
+		new_fdset = (fd_set *) ub_vmalloc(size);
 	return new_fdset;
 }
 
@@ -302,7 +305,7 @@ out:
  * both fd array and fdset. It is expected to be called with the
  * files_lock held.
  */
-static int expand_fdtable(struct files_struct *files, int nr)
+int expand_fdtable(struct files_struct *files, int nr)
 	__releases(files->file_lock)
 	__acquires(files->file_lock)
 {
@@ -338,6 +341,7 @@ static int expand_fdtable(struct files_s
 out:
 	return error;
 }
+EXPORT_SYMBOL_GPL(expand_fdtable);
 
 /*
  * Expand files.
diff -uprN linux-2.6.16/fs/file_table.c linux-2.6.16.ovz/fs/file_table.c
--- linux-2.6.16/fs/file_table.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/file_table.c	2006-07-05 08:34:56.000000000 -0400
@@ -9,6 +9,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/ve_owner.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/smp_lock.h>
@@ -25,6 +26,8 @@
 
 #include <asm/atomic.h>
 
+#include <ub/ub_misc.h>
+
 /* sysctl tunables... */
 struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
@@ -38,6 +41,8 @@ static struct percpu_counter nr_files __
 static inline void file_free_rcu(struct rcu_head *head)
 {
 	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
+	ub_file_uncharge(f);
+	put_ve(VE_OWNER_FILP(f));
 	kmem_cache_free(filp_cachep, f);
 }
 
@@ -109,6 +114,12 @@ struct file *get_empty_filp(void)
 
 	percpu_counter_inc(&nr_files);
 	memset(f, 0, sizeof(*f));
+
+	if (ub_file_charge(f))
+		goto fail_ch;
+
+	SET_VE_OWNER_FILP(f, get_ve(get_exec_env()));
+
 	if (security_file_alloc(f))
 		goto fail_sec;
 
@@ -134,6 +145,10 @@ fail_sec:
 	file_free(f);
 fail:
 	return NULL;
+
+fail_ch:
+	kmem_cache_free(filp_cachep, f);
+	return NULL;
 }
 
 EXPORT_SYMBOL(get_empty_filp);
diff -uprN linux-2.6.16/fs/filesystems.c linux-2.6.16.ovz/fs/filesystems.c
--- linux-2.6.16/fs/filesystems.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/filesystems.c	2006-07-05 08:34:56.000000000 -0400
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/sched.h>	/* for 'current' */
+#include <linux/ve_owner.h>
 #include <asm/uaccess.h>
 
 /*
@@ -22,8 +23,8 @@
  *	During the unload module must call unregister_filesystem().
  *	We can access the fields of list element if:
  *		1) spinlock is held or
- *		2) we hold the reference to the module.
- *	The latter can be guaranteed by call of try_module_get(); if it
+ *		2) we hold the reference to the element.
+ *	The latter can be guaranteed by call of try_filesystem(); if it
  *	returned 0 we must skip the element, otherwise we got the reference.
  *	Once the reference is obtained we can drop the spinlock.
  */
@@ -31,23 +32,51 @@
 static struct file_system_type *file_systems;
 static DEFINE_RWLOCK(file_systems_lock);
 
+int try_get_filesystem(struct file_system_type *fs)
+{
+	if (try_module_get(fs->owner)) {
+#ifdef CONFIG_VE
+		get_ve(VE_OWNER_FSTYPE(fs));
+#endif
+		return 1;
+	}
+	return 0;
+}
+
 /* WARNING: This can be used only if we _already_ own a reference */
 void get_filesystem(struct file_system_type *fs)
 {
+#ifdef CONFIG_VE
+	get_ve(VE_OWNER_FSTYPE(fs));
+#endif
 	__module_get(fs->owner);
 }
 
 void put_filesystem(struct file_system_type *fs)
 {
 	module_put(fs->owner);
+#ifdef CONFIG_VE
+	put_ve(VE_OWNER_FSTYPE(fs));
+#endif
+}
+
+static inline int check_ve_fstype(struct file_system_type *p,
+		struct ve_struct *env)
+{
+	return ((p->fs_flags & FS_VIRTUALIZED) ||
+			ve_accessible_strict(VE_OWNER_FSTYPE(p), env));
 }
 
-static struct file_system_type **find_filesystem(const char *name)
+static struct file_system_type **find_filesystem(const char *name,
+		struct ve_struct *env)
 {
 	struct file_system_type **p;
-	for (p=&file_systems; *p; p=&(*p)->next)
+	for (p=&file_systems; *p; p=&(*p)->next) {
+		if (!check_ve_fstype(*p, env))
+			continue;
 		if (strcmp((*p)->name,name) == 0)
 			break;
+	}
 	return p;
 }
 
@@ -74,8 +103,10 @@ int register_filesystem(struct file_syst
 	if (fs->next)
 		return -EBUSY;
 	INIT_LIST_HEAD(&fs->fs_supers);
+	if (VE_OWNER_FSTYPE(fs) == NULL)
+		SET_VE_OWNER_FSTYPE(fs, get_ve0());
 	write_lock(&file_systems_lock);
-	p = find_filesystem(fs->name);
+	p = find_filesystem(fs->name, VE_OWNER_FSTYPE(fs));
 	if (*p)
 		res = -EBUSY;
 	else
@@ -132,11 +163,14 @@ static int fs_index(const char __user * 
 
 	err = -EINVAL;
 	read_lock(&file_systems_lock);
-	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
+	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next) {
+		if (!check_ve_fstype(tmp, get_exec_env()))
+			continue;
 		if (strcmp(tmp->name,name) == 0) {
 			err = index;
 			break;
 		}
+		index++;
 	}
 	read_unlock(&file_systems_lock);
 	putname(name);
@@ -149,9 +183,15 @@ static int fs_name(unsigned int index, c
 	int len, res;
 
 	read_lock(&file_systems_lock);
-	for (tmp = file_systems; tmp; tmp = tmp->next, index--)
-		if (index <= 0 && try_module_get(tmp->owner))
-			break;
+	for (tmp = file_systems; tmp; tmp = tmp->next) {
+		if (!check_ve_fstype(tmp, get_exec_env()))
+			continue;
+		if (!index) {
+			if (try_get_filesystem(tmp))
+				break;
+		} else
+			index--;
+	}
 	read_unlock(&file_systems_lock);
 	if (!tmp)
 		return -EINVAL;
@@ -169,8 +209,9 @@ static int fs_maxindex(void)
 	int index;
 
 	read_lock(&file_systems_lock);
-	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
-		;
+	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next)
+		if (check_ve_fstype(tmp, get_exec_env()))
+			index++;
 	read_unlock(&file_systems_lock);
 	return index;
 }
@@ -206,9 +247,10 @@ int get_filesystem_list(char * buf)
 	read_lock(&file_systems_lock);
 	tmp = file_systems;
 	while (tmp && len < PAGE_SIZE - 80) {
-		len += sprintf(buf+len, "%s\t%s\n",
-			(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
-			tmp->name);
+		if (check_ve_fstype(tmp, get_exec_env()))
+			len += sprintf(buf+len, "%s\t%s\n",
+				(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
+				tmp->name);
 		tmp = tmp->next;
 	}
 	read_unlock(&file_systems_lock);
@@ -220,14 +262,14 @@ struct file_system_type *get_fs_type(con
 	struct file_system_type *fs;
 
 	read_lock(&file_systems_lock);
-	fs = *(find_filesystem(name));
-	if (fs && !try_module_get(fs->owner))
+	fs = *(find_filesystem(name, get_exec_env()));
+	if (fs && !try_get_filesystem(fs))
 		fs = NULL;
 	read_unlock(&file_systems_lock);
 	if (!fs && (request_module("%s", name) == 0)) {
 		read_lock(&file_systems_lock);
-		fs = *(find_filesystem(name));
-		if (fs && !try_module_get(fs->owner))
+		fs = *(find_filesystem(name, get_exec_env()));
+		if (fs && !try_get_filesystem(fs))
 			fs = NULL;
 		read_unlock(&file_systems_lock);
 	}
@@ -235,3 +277,5 @@ struct file_system_type *get_fs_type(con
 }
 
 EXPORT_SYMBOL(get_fs_type);
+EXPORT_SYMBOL(get_filesystem);
+EXPORT_SYMBOL(put_filesystem);
diff -uprN linux-2.6.16/fs/fuse/dir.c linux-2.6.16.ovz/fs/fuse/dir.c
--- linux-2.6.16/fs/fuse/dir.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/fuse/dir.c	2006-07-05 08:34:56.000000000 -0400
@@ -708,14 +708,15 @@ static int fuse_access(struct inode *ino
  * access request is sent.  Execute permission is still checked
  * locally based on file mode.
  */
-static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 
 	if (!fuse_allow_task(fc, current))
 		return -EACCES;
 	else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
-		int err = generic_permission(inode, mask, NULL);
+		int err = generic_permission(inode, mask, NULL, perm);
 
 		/* If permission is denied, try to refresh file
 		   attributes.  This is also needed, because the root
@@ -723,7 +724,7 @@ static int fuse_permission(struct inode 
 		if (err == -EACCES) {
 		 	err = fuse_do_getattr(inode);
 			if (!err)
-				err = generic_permission(inode, mask, NULL);
+				err = generic_permission(inode, mask, NULL, perm);
 		}
 
 		/* Note: the opposite of the above test does not
diff -uprN linux-2.6.16/fs/fuse/file.c linux-2.6.16.ovz/fs/fuse/file.c
--- linux-2.6.16/fs/fuse/file.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/fuse/file.c	2006-07-05 08:34:56.000000000 -0400
@@ -397,8 +397,12 @@ static int fuse_readpages(struct file *f
 		return -EINTR;
 
 	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
-	if (!err)
-		fuse_send_readpages(data.req, file, inode);
+	if (!err) {
+		if (data.req->num_pages)
+			fuse_send_readpages(data.req, file, inode);
+		else
+			fuse_put_request(fc, data.req);
+	}
 	return err;
 }
 
diff -uprN linux-2.6.16/fs/hfs/inode.c linux-2.6.16.ovz/fs/hfs/inode.c
--- linux-2.6.16/fs/hfs/inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/hfs/inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -520,11 +520,11 @@ void hfs_clear_inode(struct inode *inode
 }
 
 static int hfs_permission(struct inode *inode, int mask,
-			  struct nameidata *nd)
+			  struct nameidata *nd, struct exec_perm *perm)
 {
 	if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
 		return 0;
-	return generic_permission(inode, mask, NULL);
+	return generic_permission(inode, mask, NULL, perm);
 }
 
 static int hfs_file_open(struct inode *inode, struct file *file)
diff -uprN linux-2.6.16/fs/hfsplus/inode.c linux-2.6.16.ovz/fs/hfsplus/inode.c
--- linux-2.6.16/fs/hfsplus/inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/hfsplus/inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -237,7 +237,8 @@ static void hfsplus_set_perms(struct ino
 	perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
 }
 
-static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
 	/* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
 	 * open_exec has the same test, so it's still not executable, if a x bit
@@ -245,7 +246,7 @@ static int hfsplus_permission(struct ino
 	 */
 	if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111))
 		return 0;
-	return generic_permission(inode, mask, NULL);
+	return generic_permission(inode, mask, NULL, perm);
 }
 
 
diff -uprN linux-2.6.16/fs/hostfs/hostfs_kern.c linux-2.6.16.ovz/fs/hostfs/hostfs_kern.c
--- linux-2.6.16/fs/hostfs/hostfs_kern.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/hostfs/hostfs_kern.c	2006-07-05 08:34:56.000000000 -0400
@@ -796,7 +796,8 @@ int hostfs_rename(struct inode *from_ino
 	return(err);
 }
 
-int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
+int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd,
+		struct exec_perm *perm)
 {
 	char *name;
 	int r = 0, w = 0, x = 0, err;
@@ -814,7 +815,7 @@ int hostfs_permission(struct inode *ino,
 		err = access_file(name, r, w, x);
 	kfree(name);
 	if(!err)
-		err = generic_permission(ino, desired, NULL);
+		err = generic_permission(ino, desired, NULL, perm);
 	return err;
 }
 
diff -uprN linux-2.6.16/fs/hpfs/namei.c linux-2.6.16.ovz/fs/hpfs/namei.c
--- linux-2.6.16/fs/hpfs/namei.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/hpfs/namei.c	2006-07-05 08:34:56.000000000 -0400
@@ -415,7 +415,7 @@ again:
 		d_drop(dentry);
 		spin_lock(&dentry->d_lock);
 		if (atomic_read(&dentry->d_count) > 1 ||
-		    permission(inode, MAY_WRITE, NULL) ||
+		    permission(inode, MAY_WRITE, NULL, NULL) ||
 		    !S_ISREG(inode->i_mode) ||
 		    get_write_access(inode)) {
 			spin_unlock(&dentry->d_lock);
diff -uprN linux-2.6.16/fs/hugetlbfs/inode.c linux-2.6.16.ovz/fs/hugetlbfs/inode.c
--- linux-2.6.16/fs/hugetlbfs/inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/hugetlbfs/inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -800,7 +800,7 @@ struct file *hugetlb_zero_setup(size_t s
 	struct inode *inode;
 	struct dentry *dentry, *root;
 	struct qstr quick_string;
-	char buf[16];
+	char buf[64];
 
 	if (!can_do_hugetlb_shm())
 		return ERR_PTR(-EPERM);
@@ -812,7 +812,8 @@ struct file *hugetlb_zero_setup(size_t s
 		return ERR_PTR(-ENOMEM);
 
 	root = hugetlbfs_vfsmount->mnt_root;
-	snprintf(buf, 16, "%lu", hugetlbfs_counter());
+	snprintf(buf, sizeof(buf), "VE%d-%lu",
+			VEID(get_exec_env()), hugetlbfs_counter());
 	quick_string.name = buf;
 	quick_string.len = strlen(quick_string.name);
 	quick_string.hash = 0;
diff -uprN linux-2.6.16/fs/inode.c linux-2.6.16.ovz/fs/inode.c
--- linux-2.6.16/fs/inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/dcache.h>
 #include <linux/init.h>
+#include <linux/kernel_stat.h>
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/writeback.h>
@@ -98,13 +99,15 @@ DECLARE_MUTEX(iprune_sem);
  */
 struct inodes_stat_t inodes_stat;
 
-static kmem_cache_t * inode_cachep;
+kmem_cache_t *inode_cachep;
+
+static struct address_space_operations vfs_empty_aops;
+struct inode_operations vfs_empty_iops;
+static struct file_operations vfs_empty_fops;
+EXPORT_SYMBOL(vfs_empty_iops);
 
 static struct inode *alloc_inode(struct super_block *sb)
 {
-	static struct address_space_operations empty_aops;
-	static struct inode_operations empty_iops;
-	static struct file_operations empty_fops;
 	struct inode *inode;
 
 	if (sb->s_op->alloc_inode)
@@ -119,8 +122,8 @@ static struct inode *alloc_inode(struct 
 		inode->i_blkbits = sb->s_blocksize_bits;
 		inode->i_flags = 0;
 		atomic_set(&inode->i_count, 1);
-		inode->i_op = &empty_iops;
-		inode->i_fop = &empty_fops;
+		inode->i_op = &vfs_empty_iops;
+		inode->i_fop = &vfs_empty_fops;
 		inode->i_nlink = 1;
 		atomic_set(&inode->i_writecount, 0);
 		inode->i_size = 0;
@@ -144,7 +147,7 @@ static struct inode *alloc_inode(struct 
 			return NULL;
 		}
 
-		mapping->a_ops = &empty_aops;
+		mapping->a_ops = &vfs_empty_aops;
  		mapping->host = inode;
 		mapping->flags = 0;
 		mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
@@ -303,13 +306,57 @@ static void dispose_list(struct list_hea
 	spin_unlock(&inode_lock);
 }
 
+static void show_header(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+
+	printk("VFS: Busy inodes after unmount. "
+			"sb = %p, fs type = %s, sb count = %d, "
+			"sb->s_root = %s\n", sb,
+			(sb->s_type != NULL) ? sb->s_type->name : "",
+			sb->s_count,
+			(sb->s_root != NULL) ?
+			(char *)sb->s_root->d_name.name : "");
+}
+
+static void show_inode(struct list_head *tmp, struct inode *inode)
+{
+	struct dentry *d;
+	int i;
+
+	printk("inode = %p, inode->i_count = %d, "
+			"inode->i_nlink = %d, "
+			"inode->i_mode = %d, "
+			"inode->i_state = %ld, "
+			"inode->i_flags = %d, "
+			"inode->i_devices.next = %p, "
+			"inode->i_devices.prev = %p, "
+			"inode->i_ino = %ld\n",
+			tmp,
+			atomic_read(&inode->i_count),
+			inode->i_nlink,
+			inode->i_mode,
+			inode->i_state,
+			inode->i_flags,
+			inode->i_devices.next,
+			inode->i_devices.prev,
+			inode->i_ino);
+	printk("inode dump: ");
+	for (i = 0; i < sizeof(*tmp); i++)
+		printk("%2.2x ", *((u_char *)tmp + i));
+	printk("\n");
+	list_for_each_entry(d, &inode->i_dentry, d_alias)
+		printk("  d_alias %s\n",
+				d->d_name.name);
+}
+
 /*
  * Invalidate all inodes for a device.
  */
-static int invalidate_list(struct list_head *head, struct list_head *dispose)
+static int invalidate_list(struct list_head *head, struct list_head *dispose, int check)
 {
 	struct list_head *next;
-	int busy = 0, count = 0;
+	int busy = 0, count = 0, once = 1;
 
 	next = head->next;
 	for (;;) {
@@ -336,6 +383,14 @@ static int invalidate_list(struct list_h
 			continue;
 		}
 		busy = 1;
+
+		if (check) {
+			if (once) {
+				once = 0;
+				show_header(inode);
+			}
+			show_inode(tmp, inode);
+		}
 	}
 	/* only unused inodes may be cached with i_count zero */
 	inodes_stat.nr_unused -= count;
@@ -350,7 +405,7 @@ static int invalidate_list(struct list_h
  *	fails because there are busy inodes then a non zero value is returned.
  *	If the discard is successful all the inodes have been discarded.
  */
-int invalidate_inodes(struct super_block * sb)
+int invalidate_inodes(struct super_block * sb, int check)
 {
 	int busy;
 	LIST_HEAD(throw_away);
@@ -358,7 +413,7 @@ int invalidate_inodes(struct super_block
 	down(&iprune_sem);
 	spin_lock(&inode_lock);
 	inotify_unmount_inodes(&sb->s_inodes);
-	busy = invalidate_list(&sb->s_inodes, &throw_away);
+	busy = invalidate_list(&sb->s_inodes, &throw_away, check);
 	spin_unlock(&inode_lock);
 
 	dispose_list(&throw_away);
@@ -382,7 +437,7 @@ int __invalidate_device(struct block_dev
 		 * hold).
 		 */
 		shrink_dcache_sb(sb);
-		res = invalidate_inodes(sb);
+		res = invalidate_inodes(sb, 0);
 		drop_super(sb);
 	}
 	invalidate_bdev(bdev, 0);
@@ -478,6 +533,7 @@ static void prune_icache(int nr_to_scan)
  */
 static int shrink_icache_memory(int nr, gfp_t gfp_mask)
 {
+	KSTAT_PERF_ENTER(shrink_icache)
 	if (nr) {
 		/*
 		 * Nasty deadlock avoidance.  We may hold various FS locks,
@@ -488,6 +544,7 @@ static int shrink_icache_memory(int nr, 
 			return -1;
 		prune_icache(nr);
 	}
+	KSTAT_PERF_LEAVE(shrink_icache)
 	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
 
@@ -737,7 +794,7 @@ EXPORT_SYMBOL(iunique);
 struct inode *igrab(struct inode *inode)
 {
 	spin_lock(&inode_lock);
-	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
+	if (inode && !(inode->i_state & (I_FREEING|I_WILL_FREE)))
 		__iget(inode);
 	else
 		/*
diff -uprN linux-2.6.16/fs/inotify.c linux-2.6.16.ovz/fs/inotify.c
--- linux-2.6.16/fs/inotify.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/inotify.c	2006-07-05 08:34:56.000000000 -0400
@@ -374,7 +374,7 @@ static int find_inode(const char __user 
 	if (error)
 		return error;
 	/* you can only watch an inode if you have read permissions on it */
-	error = vfs_permission(nd, MAY_READ);
+	error = vfs_permission(nd, MAY_READ, NULL);
 	if (error) 
 		path_release(nd);
 	return error;
diff -uprN linux-2.6.16/fs/ioprio.c linux-2.6.16.ovz/fs/ioprio.c
--- linux-2.6.16/fs/ioprio.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ioprio.c	2006-07-05 08:34:56.000000000 -0400
@@ -53,6 +53,9 @@ asmlinkage long sys_ioprio_set(int which
 	struct user_struct *user;
 	int ret;
 
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	switch (class) {
 		case IOPRIO_CLASS_RT:
 			if (!capable(CAP_SYS_ADMIN))
@@ -78,18 +81,18 @@ asmlinkage long sys_ioprio_set(int which
 			if (!who)
 				p = current;
 			else
-				p = find_task_by_pid(who);
+				p = find_task_by_pid_all(who);
 			if (p)
 				ret = set_task_ioprio(p, ioprio);
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			do_each_task_pid_all(who, PIDTYPE_PGID, p) {
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
 					break;
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_all(who, PIDTYPE_PGID, p);
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
@@ -100,13 +103,13 @@ asmlinkage long sys_ioprio_set(int which
 			if (!user)
 				break;
 
-			do_each_thread(g, p) {
+			do_each_thread_all(g, p) {
 				if (p->uid != who)
 					continue;
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
 					break;
-			} while_each_thread(g, p);
+			} while_each_thread_all(g, p);
 
 			if (who)
 				free_uid(user);
@@ -131,19 +134,19 @@ asmlinkage long sys_ioprio_get(int which
 			if (!who)
 				p = current;
 			else
-				p = find_task_by_pid(who);
+				p = find_task_by_pid_ve(who);
 			if (p)
 				ret = p->ioprio;
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
 				if (ret == -ESRCH)
 					ret = p->ioprio;
 				else
 					ret = ioprio_best(ret, p->ioprio);
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
@@ -154,14 +157,14 @@ asmlinkage long sys_ioprio_get(int which
 			if (!user)
 				break;
 
-			do_each_thread(g, p) {
+			do_each_thread_ve(g, p) {
 				if (p->uid != user->uid)
 					continue;
 				if (ret == -ESRCH)
 					ret = p->ioprio;
 				else
 					ret = ioprio_best(ret, p->ioprio);
-			} while_each_thread(g, p);
+			} while_each_thread_ve(g, p);
 
 			if (who)
 				free_uid(user);
diff -uprN linux-2.6.16/fs/jbd/journal.c linux-2.6.16.ovz/fs/jbd/journal.c
--- linux-2.6.16/fs/jbd/journal.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/jbd/journal.c	2006-07-05 08:34:56.000000000 -0400
@@ -210,10 +210,16 @@ end_loop:
 	return 0;
 }
 
-static void journal_start_thread(journal_t *journal)
+static int journal_start_thread(journal_t *journal)
 {
-	kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
+	int err;
+
+	err = kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
+	if (err < 0)
+		return err;
+
 	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
+	return 0;
 }
 
 static void journal_kill_thread(journal_t *journal)
@@ -839,8 +845,7 @@ static int journal_reset(journal_t *jour
 
 	/* Add the dynamic fields and write it to disk. */
 	journal_update_superblock(journal, 1);
-	journal_start_thread(journal);
-	return 0;
+	return journal_start_thread(journal);
 }
 
 /** 
diff -uprN linux-2.6.16/fs/jbd/transaction.c linux-2.6.16.ovz/fs/jbd/transaction.c
--- linux-2.6.16/fs/jbd/transaction.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/jbd/transaction.c	2006-07-05 08:34:56.000000000 -0400
@@ -1868,6 +1868,7 @@ zap_buffer_unlocked:
 	clear_buffer_mapped(bh);
 	clear_buffer_req(bh);
 	clear_buffer_new(bh);
+	clear_buffer_delay(bh);
 	bh->b_bdev = NULL;
 	return may_free;
 }
diff -uprN linux-2.6.16/fs/jfs/acl.c linux-2.6.16.ovz/fs/jfs/acl.c
--- linux-2.6.16/fs/jfs/acl.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/jfs/acl.c	2006-07-05 08:34:56.000000000 -0400
@@ -140,9 +140,10 @@ static int jfs_check_acl(struct inode *i
 	return -EAGAIN;
 }
 
-int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int jfs_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
-	return generic_permission(inode, mask, jfs_check_acl);
+	return generic_permission(inode, mask, jfs_check_acl, perm);
 }
 
 int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
diff -uprN linux-2.6.16/fs/jfs/jfs_acl.h linux-2.6.16.ovz/fs/jfs/jfs_acl.h
--- linux-2.6.16/fs/jfs/jfs_acl.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/jfs/jfs_acl.h	2006-07-05 08:34:56.000000000 -0400
@@ -20,7 +20,7 @@
 
 #ifdef CONFIG_JFS_POSIX_ACL
 
-int jfs_permission(struct inode *, int, struct nameidata *);
+int jfs_permission(struct inode *, int, struct nameidata *, struct exec_perm *);
 int jfs_init_acl(tid_t, struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
 
diff -uprN linux-2.6.16/fs/jfs/jfs_metapage.c linux-2.6.16.ovz/fs/jfs/jfs_metapage.c
--- linux-2.6.16/fs/jfs/jfs_metapage.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/jfs/jfs_metapage.c	2006-07-05 08:34:56.000000000 -0400
@@ -543,7 +543,7 @@ add_failed:
 static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
 {
 	struct metapage *mp;
-	int busy = 0;
+	int ret = 1;
 	unsigned int offset;
 
 	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
@@ -553,30 +553,20 @@ static int metapage_releasepage(struct p
 			continue;
 
 		jfs_info("metapage_releasepage: mp = 0x%p", mp);
-		if (mp->count || mp->nohomeok) {
+		if (mp->count || mp->nohomeok ||
+		    test_bit(META_dirty, &mp->flag)) {
 			jfs_info("count = %ld, nohomeok = %d", mp->count,
 				 mp->nohomeok);
-			busy = 1;
+			ret = 0;
 			continue;
 		}
-		wait_on_page_writeback(page);
-		//WARN_ON(test_bit(META_dirty, &mp->flag));
-		if (test_bit(META_dirty, &mp->flag)) {
-			dump_mem("dirty mp in metapage_releasepage", mp,
-				 sizeof(struct metapage));
-			dump_mem("page", page, sizeof(struct page));
-			dump_stack();
-		}
 		if (mp->lsn)
 			remove_from_logsync(mp);
 		remove_metapage(page, mp);
 		INCREMENT(mpStat.pagefree);
 		free_metapage(mp);
 	}
-	if (busy)
-		return -1;
-
-	return 0;
+	return ret;
 }
 
 static int metapage_invalidatepage(struct page *page, unsigned long offset)
diff -uprN linux-2.6.16/fs/lockd/clntproc.c linux-2.6.16.ovz/fs/lockd/clntproc.c
--- linux-2.6.16/fs/lockd/clntproc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/lockd/clntproc.c	2006-07-05 08:34:56.000000000 -0400
@@ -130,10 +130,10 @@ static void nlmclnt_setlockargs(struct n
 	nlmclnt_next_cookie(&argp->cookie);
 	argp->state   = nsm_local_state;
 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
-	lock->caller  = system_utsname.nodename;
+	lock->caller  = ve_utsname.nodename;
 	lock->oh.data = req->a_owner;
 	lock->oh.len  = sprintf(req->a_owner, "%d@%s",
-				current->pid, system_utsname.nodename);
+				current->pid, ve_utsname.nodename);
 	locks_copy_lock(&lock->fl, fl);
 }
 
@@ -154,7 +154,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca
 {
 	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
 	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-	call->a_args.lock.caller = system_utsname.nodename;
+	call->a_args.lock.caller = ve_utsname.nodename;
 	call->a_args.lock.oh.len = lock->oh.len;
 
 	/* set default data area */
diff -uprN linux-2.6.16/fs/lockd/mon.c linux-2.6.16.ovz/fs/lockd/mon.c
--- linux-2.6.16/fs/lockd/mon.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/lockd/mon.c	2006-07-05 08:34:56.000000000 -0400
@@ -147,7 +147,7 @@ xdr_encode_common(struct rpc_rqst *rqstp
 	 */
 	sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
 	if (!(p = xdr_encode_string(p, buffer))
-	 || !(p = xdr_encode_string(p, system_utsname.nodename)))
+	 || !(p = xdr_encode_string(p, ve_utsname.nodename)))
 		return ERR_PTR(-EIO);
 	*p++ = htonl(argp->prog);
 	*p++ = htonl(argp->vers);
diff -uprN linux-2.6.16/fs/locks.c linux-2.6.16.ovz/fs/locks.c
--- linux-2.6.16/fs/locks.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/locks.c	2006-07-05 08:34:56.000000000 -0400
@@ -129,6 +129,8 @@
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 
+#include <ub/ub_misc.h>
+
 #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
 #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
 #define IS_LEASE(fl)	(fl->fl_flags & FL_LEASE)
@@ -148,11 +150,28 @@ static LIST_HEAD(blocked_list);
 static kmem_cache_t *filelock_cache;
 
 /* Allocate an empty lock structure. */
-static struct file_lock *locks_alloc_lock(void)
+static struct file_lock *locks_alloc_lock(int charge)
 {
-	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+	struct file_lock *fl;
+
+	fl = kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+#ifdef CONFIG_USER_RESOURCE
+	if (fl == NULL)
+		goto out;
+	fl->fl_charged = 0;
+	if (!charge)
+		goto out;
+	if (!ub_flock_charge(fl, 1))
+		goto out;
+
+	kmem_cache_free(filelock_cache, fl);
+	fl = NULL;
+out:
+#endif
+	return fl;
 }
 
+
 /* Free a lock which is not in use. */
 static void locks_free_lock(struct file_lock *fl)
 {
@@ -181,6 +200,7 @@ static void locks_free_lock(struct file_
 		fl->fl_lmops = NULL;
 	}
 
+	ub_flock_uncharge(fl);
 	kmem_cache_free(filelock_cache, fl);
 }
 
@@ -263,7 +283,7 @@ static int flock_make_lock(struct file *
 	if (type < 0)
 		return type;
 	
-	fl = locks_alloc_lock();
+	fl = locks_alloc_lock(type != F_UNLCK);
 	if (fl == NULL)
 		return -ENOMEM;
 
@@ -432,15 +452,14 @@ static struct lock_manager_operations le
  */
 static int lease_init(struct file *filp, int type, struct file_lock *fl)
  {
+	if (assign_type(fl, type) != 0)
+		return -EINVAL;
+
 	fl->fl_owner = current->files;
 	fl->fl_pid = current->tgid;
 
 	fl->fl_file = filp;
 	fl->fl_flags = FL_LEASE;
-	if (assign_type(fl, type) != 0) {
-		locks_free_lock(fl);
-		return -EINVAL;
-	}
 	fl->fl_start = 0;
 	fl->fl_end = OFFSET_MAX;
 	fl->fl_ops = NULL;
@@ -451,17 +470,20 @@ static int lease_init(struct file *filp,
 /* Allocate a file_lock initialised to this type of lease */
 static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
 {
-	struct file_lock *fl = locks_alloc_lock();
-	int error;
+	struct file_lock *fl = locks_alloc_lock(1);
+	int error = -ENOMEM;
 
 	if (fl == NULL)
-		return -ENOMEM;
+		goto out;
 
 	error = lease_init(filp, type, fl);
-	if (error)
-		return error;
+	if (error) {
+		locks_free_lock(fl);
+		fl = NULL;
+	}
+out:
 	*flp = fl;
-	return 0;
+	return error;
 }
 
 /* Check if two locks overlap each other.
@@ -712,8 +734,9 @@ EXPORT_SYMBOL(posix_locks_deadlock);
  * at the head of the list, but that's secret knowledge known only to
  * flock_lock_file and posix_lock_file.
  */
-static int flock_lock_file(struct file *filp, struct file_lock *new_fl)
+static int flock_lock_file(struct file *filp, struct file_lock *request)
 {
+	struct file_lock *new_fl = NULL;
 	struct file_lock **before;
 	struct inode * inode = filp->f_dentry->d_inode;
 	int error = 0;
@@ -728,44 +751,60 @@ static int flock_lock_file(struct file *
 			continue;
 		if (filp != fl->fl_file)
 			continue;
-		if (new_fl->fl_type == fl->fl_type)
+		if (request->fl_type == fl->fl_type)
 			goto out;
 		found = 1;
 		locks_delete_lock(before);
 		break;
 	}
-	unlock_kernel();
 
-	if (new_fl->fl_type == F_UNLCK)
-		return 0;
+	if (request->fl_type == F_UNLCK)
+		goto out;
 
 	/*
+	 * Nont F_UNLCK request must be already charged in
+	 * flock_make_lock().
+	 *
+	 * actually new_fl must be charged not the request,
+	 * but we try to fail earlier
+	 */
+	error = -ENOMEM;
+	new_fl = locks_alloc_lock(0);
+	if (new_fl == NULL)
+		goto out;
+	/*
 	 * If a higher-priority process was blocked on the old file lock,
 	 * give it the opportunity to lock the file.
 	 */
 	if (found)
 		cond_resched();
 
-	lock_kernel();
 	for_each_lock(inode, before) {
 		struct file_lock *fl = *before;
 		if (IS_POSIX(fl))
 			break;
 		if (IS_LEASE(fl))
 			continue;
-		if (!flock_locks_conflict(new_fl, fl))
+		if (!flock_locks_conflict(request, fl))
 			continue;
 		error = -EAGAIN;
-		if (new_fl->fl_flags & FL_SLEEP) {
-			locks_insert_block(fl, new_fl);
-		}
+		if (request->fl_flags & FL_SLEEP)
+			locks_insert_block(fl, request);
 		goto out;
 	}
+
+	set_flock_charged(new_fl);
+	unset_flock_charged(request);
+
+	locks_copy_lock(new_fl, request);
 	locks_insert_lock(&inode->i_flock, new_fl);
+	new_fl = NULL;
 	error = 0;
 
 out:
 	unlock_kernel();
+	if (new_fl)
+		locks_free_lock(new_fl);
 	return error;
 }
 
@@ -784,8 +823,11 @@ static int __posix_lock_file(struct inod
 	 * We may need two file_lock structures for this operation,
 	 * so we get them in advance to avoid races.
 	 */
-	new_fl = locks_alloc_lock();
-	new_fl2 = locks_alloc_lock();
+	if (request->fl_type != F_UNLCK)
+		new_fl = locks_alloc_lock(1);
+	else
+		new_fl = NULL;
+	new_fl2 = locks_alloc_lock(0);
 
 	lock_kernel();
 	if (request->fl_type != F_UNLCK) {
@@ -813,7 +855,7 @@ static int __posix_lock_file(struct inod
 		goto out;
 
 	error = -ENOLCK; /* "no luck" */
-	if (!(new_fl && new_fl2))
+	if (!((request->fl_type == F_UNLCK || new_fl) && new_fl2))
 		goto out;
 
 	/*
@@ -919,19 +961,30 @@ static int __posix_lock_file(struct inod
 	if (!added) {
 		if (request->fl_type == F_UNLCK)
 			goto out;
+		error = -ENOLCK;
+		if (right && (left == right) && ub_flock_charge(new_fl, 1))
+			goto out;
 		locks_copy_lock(new_fl, request);
 		locks_insert_lock(before, new_fl);
 		new_fl = NULL;
+		error = 0;
 	}
 	if (right) {
 		if (left == right) {
 			/* The new lock breaks the old one in two pieces,
 			 * so we have to use the second new lock.
 			 */
+			error = -ENOLCK;
+			if (added && ub_flock_charge(new_fl2,
+						request->fl_type != F_UNLCK))
+				goto out;
+			/* FIXME move all fl_charged manipulations in ub code */
+			set_flock_charged(new_fl2);
 			left = new_fl2;
 			new_fl2 = NULL;
 			locks_copy_lock(left, right);
 			locks_insert_lock(before, left);
+			error = 0;
 		}
 		right->fl_start = request->fl_end + 1;
 		locks_wake_up_blocks(right);
@@ -1337,6 +1390,7 @@ static int __setlease(struct file *filp,
 		goto out;
 
 	if (my_before != NULL) {
+		*flp = *my_before;
 		error = lease->fl_lmops->fl_change(my_before, arg);
 		goto out;
 	}
@@ -1529,15 +1583,14 @@ asmlinkage long sys_flock(unsigned int f
 		error = flock_lock_file_wait(filp, lock);
 
  out_free:
-	if (list_empty(&lock->fl_link)) {
-		locks_free_lock(lock);
-	}
+	locks_free_lock(lock);
 
  out_putf:
 	fput(filp);
  out:
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_flock);
 
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
@@ -1573,7 +1626,7 @@ int fcntl_getlk(struct file *filp, struc
  
 	flock.l_type = F_UNLCK;
 	if (fl != NULL) {
-		flock.l_pid = fl->fl_pid;
+		flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
 #if BITS_PER_LONG == 32
 		/*
 		 * Make sure we can represent the posix lock via
@@ -1605,7 +1658,7 @@ out:
 int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		struct flock __user *l)
 {
-	struct file_lock *file_lock = locks_alloc_lock();
+	struct file_lock *file_lock = locks_alloc_lock(0);
 	struct flock flock;
 	struct inode *inode;
 	int error;
@@ -1727,7 +1780,7 @@ int fcntl_getlk64(struct file *filp, str
  
 	flock.l_type = F_UNLCK;
 	if (fl != NULL) {
-		flock.l_pid = fl->fl_pid;
+		flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
 		flock.l_start = fl->fl_start;
 		flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
 			fl->fl_end - fl->fl_start + 1;
@@ -1748,7 +1801,7 @@ out:
 int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		struct flock64 __user *l)
 {
-	struct file_lock *file_lock = locks_alloc_lock();
+	struct file_lock *file_lock = locks_alloc_lock(0);
 	struct flock64 flock;
 	struct inode *inode;
 	int error;
@@ -1976,7 +2029,9 @@ EXPORT_SYMBOL(posix_unblock_lock);
 static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
 {
 	struct inode *inode = NULL;
+	unsigned int fl_pid;
 
+	fl_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
 	if (fl->fl_file != NULL)
 		inode = fl->fl_file->f_dentry->d_inode;
 
@@ -2018,16 +2073,16 @@ static void lock_get_status(char* out, s
 	}
 	if (inode) {
 #ifdef WE_CAN_BREAK_LSLK_NOW
-		out += sprintf(out, "%d %s:%ld ", fl->fl_pid,
+		out += sprintf(out, "%d %s:%ld ", fl_pid,
 				inode->i_sb->s_id, inode->i_ino);
 #else
 		/* userspace relies on this representation of dev_t ;-( */
-		out += sprintf(out, "%d %02x:%02x:%ld ", fl->fl_pid,
+		out += sprintf(out, "%d %02x:%02x:%ld ", fl_pid,
 				MAJOR(inode->i_sb->s_dev),
 				MINOR(inode->i_sb->s_dev), inode->i_ino);
 #endif
 	} else {
-		out += sprintf(out, "%d <none>:0 ", fl->fl_pid);
+		out += sprintf(out, "%d <none>:0 ", fl_pid);
 	}
 	if (IS_POSIX(fl)) {
 		if (fl->fl_end == OFFSET_MAX)
@@ -2076,11 +2131,17 @@ int get_locks_status(char *buffer, char 
 	char *q = buffer;
 	off_t pos = 0;
 	int i = 0;
+	struct ve_struct *env;
 
 	lock_kernel();
+	env = get_exec_env();
 	list_for_each(tmp, &file_lock_list) {
 		struct list_head *btmp;
 		struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
+
+		if (!ve_accessible(VE_OWNER_FILP(fl->fl_file), env))
+			continue;
+
 		lock_get_status(q, fl, ++i, "");
 		move_lock_status(&q, &pos, offset);
 
@@ -2212,7 +2273,12 @@ void steal_locks(fl_owner_t from)
 
 	lock_kernel();
 	j = 0;
-	rcu_read_lock();
+
+	/*
+	 * We are not taking a ref to the file structures, so
+	 * we need to acquire ->file_lock.
+	 */
+	spin_lock(&files->file_lock);
 	fdt = files_fdtable(files);
 	for (;;) {
 		unsigned long set;
@@ -2230,7 +2296,7 @@ void steal_locks(fl_owner_t from)
 			set >>= 1;
 		}
 	}
-	rcu_read_unlock();
+	spin_unlock(&files->file_lock);
 	unlock_kernel();
 }
 EXPORT_SYMBOL(steal_locks);
@@ -2238,7 +2304,7 @@ EXPORT_SYMBOL(steal_locks);
 static int __init filelock_init(void)
 {
 	filelock_cache = kmem_cache_create("file_lock_cache",
-			sizeof(struct file_lock), 0, SLAB_PANIC,
+			sizeof(struct file_lock), 0, SLAB_PANIC | SLAB_UBC,
 			init_once, NULL);
 	return 0;
 }
diff -uprN linux-2.6.16/fs/namei.c linux-2.6.16.ovz/fs/namei.c
--- linux-2.6.16/fs/namei.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/namei.c	2006-07-05 08:34:56.000000000 -0400
@@ -179,7 +179,7 @@ EXPORT_SYMBOL(putname);
  * for filesystem access without changing the "normal" uids which
  * are used for other things..
  */
-int generic_permission(struct inode *inode, int mask,
+static int __generic_permission(struct inode *inode, int mask,
 		int (*check_acl)(struct inode *inode, int mask))
 {
 	umode_t			mode = inode->i_mode;
@@ -225,7 +225,26 @@ int generic_permission(struct inode *ino
 	return -EACCES;
 }
 
-int permission(struct inode *inode, int mask, struct nameidata *nd)
+int generic_permission(struct inode *inode, int mask,
+		int (*check_acl)(struct inode *inode, int mask),
+		struct exec_perm *perm)
+{
+	int ret;
+
+	if (perm == NULL)
+		return __generic_permission(inode, mask, check_acl);
+
+	mutex_lock(&inode->i_mutex);
+	ret = __generic_permission(inode, mask, check_acl);
+	if (!ret)
+		set_exec_perm(perm, inode);
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
+
+int permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
 	int retval, submask;
 
@@ -250,9 +269,9 @@ int permission(struct inode *inode, int 
 	/* Ordinary permission routines do not understand MAY_APPEND. */
 	submask = mask & ~MAY_APPEND;
 	if (inode->i_op && inode->i_op->permission)
-		retval = inode->i_op->permission(inode, submask, nd);
+		retval = inode->i_op->permission(inode, submask, nd, perm);
 	else
-		retval = generic_permission(inode, submask, NULL);
+		retval = generic_permission(inode, submask, NULL, perm);
 	if (retval)
 		return retval;
 
@@ -269,9 +288,9 @@ int permission(struct inode *inode, int 
  * for filesystem access without changing the "normal" uids which
  * are used for other things.
  */
-int vfs_permission(struct nameidata *nd, int mask)
+int vfs_permission(struct nameidata *nd, int mask, struct exec_perm *perm)
 {
-	return permission(nd->dentry->d_inode, mask, nd);
+	return permission(nd->dentry->d_inode, mask, nd, perm);
 }
 
 /**
@@ -288,7 +307,7 @@ int vfs_permission(struct nameidata *nd,
  */
 int file_permission(struct file *file, int mask)
 {
-	return permission(file->f_dentry->d_inode, mask, NULL);
+	return permission(file->f_dentry->d_inode, mask, NULL, NULL);
 }
 
 /*
@@ -379,6 +398,21 @@ static struct dentry * cached_lookup(str
 	if (!dentry)
 		dentry = d_lookup(parent, name);
 
+	/*
+	 * The revalidation rules are simple:
+	 * d_revalidate operation is called when we're about to use a cached
+	 * dentry rather than call d_lookup.
+	 * d_revalidate method may unhash the dentry itself or return FALSE, in
+	 * which case if the dentry can be released d_lookup will be called.
+	 *
+	 * Additionally, by request of NFS people
+	 * (http://linux.bkbits.net:8080/linux-2.4/cset@1.181?nav=index.html|src/|src/fs|related/fs/namei.c)
+	 * d_revalidate is called when `/', `.' or `..' are looked up.
+	 * Since re-lookup is impossible on them, we introduce a hack and
+	 * return an error in this case.
+	 *
+	 *     2003/02/19  SAW
+	 */
 	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 		if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) {
 			dput(dentry);
@@ -441,6 +475,7 @@ static struct dentry * real_lookup(struc
 	struct dentry * result;
 	struct inode *dir = parent->d_inode;
 
+repeat:
 	mutex_lock(&dir->i_mutex);
 	/*
 	 * First re-do the cached lookup just in case it was created
@@ -479,7 +514,7 @@ static struct dentry * real_lookup(struc
 	if (result->d_op && result->d_op->d_revalidate) {
 		if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
 			dput(result);
-			result = ERR_PTR(-ENOENT);
+			goto repeat;
 		}
 	}
 	return result;
@@ -704,7 +739,14 @@ static __always_inline void follow_dotdo
                         read_unlock(&current->fs->lock);
 			break;
 		}
-                read_unlock(&current->fs->lock);
+#ifdef CONFIG_VE
+		if (nd->dentry == get_exec_env()->fs_root &&
+		    nd->mnt == get_exec_env()->fs_rootmnt) {
+			read_unlock(&current->fs->lock);
+			break;
+		}
+#endif
+		read_unlock(&current->fs->lock);
 		spin_lock(&dcache_lock);
 		if (nd->dentry != nd->mnt->mnt_root) {
 			nd->dentry = dget(nd->dentry->d_parent);
@@ -745,6 +787,10 @@ static int do_lookup(struct nameidata *n
 	if (dentry->d_op && dentry->d_op->d_revalidate)
 		goto need_revalidate;
 done:
+	if ((nd->flags & LOOKUP_STRICT) && d_mountpoint(dentry)) {
+		dput(dentry);
+		return -ENOENT;
+	}
 	path->mnt = mnt;
 	path->dentry = dentry;
 	__follow_mount(path);
@@ -780,6 +826,7 @@ static fastcall int __link_path_walk(con
 {
 	struct path next;
 	struct inode *inode;
+	int real_components = 0;
 	int err;
 	unsigned int lookup_flags = nd->flags;
 	
@@ -801,7 +848,7 @@ static fastcall int __link_path_walk(con
 		nd->flags |= LOOKUP_CONTINUE;
 		err = exec_permission_lite(inode, nd);
 		if (err == -EAGAIN)
-			err = vfs_permission(nd, MAY_EXEC);
+			err = vfs_permission(nd, MAY_EXEC, NULL);
  		if (err)
 			break;
 
@@ -851,6 +898,7 @@ static fastcall int __link_path_walk(con
 				break;
 		}
 		/* This does the actual lookups.. */
+		real_components++;
 		err = do_lookup(nd, &this, &next);
 		if (err)
 			break;
@@ -864,6 +912,9 @@ static fastcall int __link_path_walk(con
 			goto out_dput;
 
 		if (inode->i_op->follow_link) {
+			err = -ENOENT;
+			if (lookup_flags & LOOKUP_STRICT)
+				goto out_dput;
 			err = do_follow_link(&next, nd);
 			if (err)
 				goto return_err;
@@ -911,6 +962,7 @@ last_component:
 			break;
 		inode = next.dentry->d_inode;
 		if ((lookup_flags & LOOKUP_FOLLOW)
+		    && !(lookup_flags & LOOKUP_STRICT)
 		    && inode && inode->i_op && inode->i_op->follow_link) {
 			err = do_follow_link(&next, nd);
 			if (err)
@@ -932,26 +984,40 @@ lookup_parent:
 		nd->last_type = LAST_NORM;
 		if (this.name[0] != '.')
 			goto return_base;
-		if (this.len == 1)
+		if (this.len == 1) {
 			nd->last_type = LAST_DOT;
-		else if (this.len == 2 && this.name[1] == '.')
+			goto return_reval;
+		} else if (this.len == 2 && this.name[1] == '.') {
 			nd->last_type = LAST_DOTDOT;
-		else
-			goto return_base;
+			goto return_reval;
+		}
+return_base:
+		if (!(nd->flags & LOOKUP_NOAREACHECK)) {
+			err = check_area_access_ve(nd->dentry, nd->mnt);
+			if (err)
+				break;
+		}
+		return 0;
 return_reval:
 		/*
 		 * We bypassed the ordinary revalidation routines.
 		 * We may need to check the cached dentry for staleness.
 		 */
-		if (nd->dentry && nd->dentry->d_sb &&
+		if (!real_components && nd->dentry && nd->dentry->d_sb &&
 		    (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
 			err = -ESTALE;
 			/* Note: we do not d_invalidate() */
 			if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
+				/*
+				 * This lookup is for `/' or `.' or `..'.
+				 * The filesystem unhashed the dentry itself
+				 * inside d_revalidate (otherwise, d_invalidate
+				 * wouldn't succeed).  As a special courtesy to
+				 * NFS we return an error.   2003/02/19  SAW
+				 */
 				break;
 		}
-return_base:
-		return 0;
+		goto return_base;
 out_dput:
 		dput_path(&next, nd);
 		break;
@@ -1077,8 +1143,8 @@ static int fastcall do_path_lookup(int d
 	nd->flags = flags;
 	nd->depth = 0;
 
-	read_lock(&current->fs->lock);
 	if (*name=='/') {
+		read_lock(&current->fs->lock);
 		if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
 			nd->mnt = mntget(current->fs->altrootmnt);
 			nd->dentry = dget(current->fs->altroot);
@@ -1089,33 +1155,35 @@ static int fastcall do_path_lookup(int d
 		}
 		nd->mnt = mntget(current->fs->rootmnt);
 		nd->dentry = dget(current->fs->root);
+		read_unlock(&current->fs->lock);
 	} else if (dfd == AT_FDCWD) {
+		read_lock(&current->fs->lock);
 		nd->mnt = mntget(current->fs->pwdmnt);
 		nd->dentry = dget(current->fs->pwd);
+		read_unlock(&current->fs->lock);
 	} else {
 		struct dentry *dentry;
 
 		file = fget_light(dfd, &fput_needed);
 		retval = -EBADF;
 		if (!file)
-			goto unlock_fail;
+			goto out_fail;
 
 		dentry = file->f_dentry;
 
 		retval = -ENOTDIR;
 		if (!S_ISDIR(dentry->d_inode->i_mode))
-			goto fput_unlock_fail;
+			goto fput_fail;
 
 		retval = file_permission(file, MAY_EXEC);
 		if (retval)
-			goto fput_unlock_fail;
+			goto fput_fail;
 
 		nd->mnt = mntget(file->f_vfsmnt);
 		nd->dentry = dget(dentry);
 
 		fput_light(file, fput_needed);
 	}
-	read_unlock(&current->fs->lock);
 	current->total_link_count = 0;
 	retval = link_path_walk(name, nd);
 out:
@@ -1124,13 +1192,12 @@ out:
 				nd->dentry->d_inode))
 		audit_inode(name, nd->dentry->d_inode, flags);
 	}
+out_fail:
 	return retval;
 
-fput_unlock_fail:
+fput_fail:
 	fput_light(file, fput_needed);
-unlock_fail:
-	read_unlock(&current->fs->lock);
-	return retval;
+	goto out_fail;
 }
 
 int fastcall path_lookup(const char *name, unsigned int flags,
@@ -1219,7 +1286,7 @@ static struct dentry * __lookup_hash(str
 	int err;
 
 	inode = base->d_inode;
-	err = permission(inode, MAY_EXEC, nd);
+	err = permission(inode, MAY_EXEC, nd, NULL);
 	dentry = ERR_PTR(err);
 	if (err)
 		goto out;
@@ -1354,7 +1421,7 @@ static int may_delete(struct inode *dir,
 
 	BUG_ON(victim->d_parent->d_inode != dir);
 
-	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
+	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL, NULL);
 	if (error)
 		return error;
 	if (IS_APPEND(dir))
@@ -1391,7 +1458,7 @@ static inline int may_create(struct inod
 		return -EEXIST;
 	if (IS_DEADDIR(dir))
 		return -ENOENT;
-	return permission(dir,MAY_WRITE | MAY_EXEC, nd);
+	return permission(dir,MAY_WRITE | MAY_EXEC, nd, NULL);
 }
 
 /* 
@@ -1491,7 +1558,7 @@ int may_open(struct nameidata *nd, int a
 	if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
 		return -EISDIR;
 
-	error = vfs_permission(nd, acc_mode);
+	error = vfs_permission(nd, acc_mode, NULL);
 	if (error)
 		return error;
 
@@ -1628,6 +1695,12 @@ do_last:
 		goto exit;
 	}
 
+	if (IS_ERR(nd->intent.open.file)) {
+		mutex_unlock(&dir->d_inode->i_mutex);
+		error = PTR_ERR(nd->intent.open.file);
+		goto exit_dput;
+	}
+
 	/* Negative dentry, just create the file */
 	if (!path.dentry->d_inode) {
 		if (!IS_POSIXACL(dir->d_inode))
@@ -1851,6 +1924,7 @@ asmlinkage long sys_mknod(const char __u
 {
 	return sys_mknodat(AT_FDCWD, filename, mode, dev);
 }
+EXPORT_SYMBOL_GPL(sys_mknod);
 
 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
@@ -1909,6 +1983,7 @@ asmlinkage long sys_mkdir(const char __u
 {
 	return sys_mkdirat(AT_FDCWD, pathname, mode);
 }
+EXPORT_SYMBOL_GPL(sys_mkdir);
 
 /*
  * We try to drop the dentry early: we should have
@@ -2016,6 +2091,7 @@ asmlinkage long sys_rmdir(const char __u
 {
 	return do_rmdir(AT_FDCWD, pathname);
 }
+EXPORT_SYMBOL_GPL(sys_rmdir);
 
 int vfs_unlink(struct inode *dir, struct dentry *dentry)
 {
@@ -2115,6 +2191,7 @@ asmlinkage long sys_unlink(const char __
 {
 	return do_unlinkat(AT_FDCWD, pathname);
 }
+EXPORT_SYMBOL_GPL(sys_unlink);
 
 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
 {
@@ -2313,7 +2390,7 @@ static int vfs_rename_dir(struct inode *
 	 * we'll need to flip '..'.
 	 */
 	if (new_dir != old_dir) {
-		error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
+		error = permission(old_dentry->d_inode, MAY_WRITE, NULL, NULL);
 		if (error)
 			return error;
 	}
@@ -2380,6 +2457,9 @@ int vfs_rename(struct inode *old_dir, st
 	int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
 	const char *old_name;
 
+	if (DQUOT_RENAME(old_dentry->d_inode, old_dir, new_dir))
+		return -EXDEV;
+
 	if (old_dentry->d_inode == new_dentry->d_inode)
  		return 0;
  
diff -uprN linux-2.6.16/fs/namespace.c linux-2.6.16.ovz/fs/namespace.c
--- linux-2.6.16/fs/namespace.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/namespace.c	2006-07-05 08:34:56.000000000 -0400
@@ -40,13 +40,15 @@ static inline int sysfs_init(void)
 
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
+EXPORT_SYMBOL(vfsmount_lock);
 
 static int event;
 
 static struct list_head *mount_hashtable;
 static int hash_mask __read_mostly, hash_bits __read_mostly;
 static kmem_cache_t *mnt_cache;
-static struct rw_semaphore namespace_sem;
+struct rw_semaphore namespace_sem;
+EXPORT_SYMBOL(namespace_sem);
 
 /* /sys/fs */
 decl_subsys(fs, NULL, NULL);
@@ -65,6 +67,7 @@ struct vfsmount *alloc_vfsmnt(const char
 	struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
 	if (mnt) {
 		memset(mnt, 0, sizeof(struct vfsmount));
+		mnt->owner = VEID(get_exec_env());
 		atomic_set(&mnt->mnt_count, 1);
 		INIT_LIST_HEAD(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
@@ -371,10 +374,32 @@ static int show_vfsmnt(struct seq_file *
 		{ 0, NULL }
 	};
 	struct proc_fs_info *fs_infop;
+	char *path_buf, *path;
 
-	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+	/* skip FS_NOMOUNT mounts (rootfs) */
+	if (mnt->mnt_sb->s_flags & MS_NOUSER)
+		return 0;
+
+	path_buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!path_buf)
+		return -ENOMEM;
+	path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+	if (IS_ERR(path)) {
+		free_page((unsigned long) path_buf);
+		/*
+		 * This means that the file position will be incremented, i.e.
+		 * the total number of "invisible" vfsmnt will leak.
+		 */
+		return 0;
+	}
+
+	if (ve_is_super(get_exec_env()))
+		mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+	else
+		mangle(m, mnt->mnt_sb->s_type->name);
 	seq_putc(m, ' ');
-	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	mangle(m, path);
+	free_page((unsigned long) path_buf);
 	seq_putc(m, ' ');
 	mangle(m, mnt->mnt_sb->s_type->name);
 	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
@@ -474,6 +499,7 @@ void release_mounts(struct list_head *he
 		mntput(mnt);
 	}
 }
+EXPORT_SYMBOL(release_mounts);
 
 void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
 {
@@ -498,6 +524,7 @@ void umount_tree(struct vfsmount *mnt, i
 		change_mnt_propagation(p, MS_PRIVATE);
 	}
 }
+EXPORT_SYMBOL(umount_tree);
 
 static int do_umount(struct vfsmount *mnt, int flags)
 {
@@ -608,7 +635,7 @@ asmlinkage long sys_umount(char __user *
 		goto dput_and_out;
 
 	retval = -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		goto dput_and_out;
 
 	retval = do_umount(nd.mnt, flags);
@@ -632,7 +659,7 @@ asmlinkage long sys_oldumount(char __use
 
 static int mount_is_safe(struct nameidata *nd)
 {
-	if (capable(CAP_SYS_ADMIN))
+	if (capable(CAP_VE_SYS_ADMIN))
 		return 0;
 	return -EPERM;
 #ifdef notyet
@@ -642,7 +669,7 @@ static int mount_is_safe(struct nameidat
 		if (current->uid != nd->dentry->d_inode->i_uid)
 			return -EPERM;
 	}
-	if (vfs_permission(nd, MAY_WRITE))
+	if (vfs_permission(nd, MAY_WRITE, NULL))
 		return -EPERM;
 	return 0;
 #endif
@@ -848,6 +875,8 @@ static int do_change_type(struct nameida
 
 	if (nd->dentry != nd->mnt->mnt_root)
 		return -EINVAL;
+	if (!ve_accessible_veid(nd->mnt->owner, get_exec_env()->veid))
+		return -EPERM;
 
 	down_write(&namespace_sem);
 	spin_lock(&vfsmount_lock);
@@ -917,7 +946,7 @@ static int do_remount(struct nameidata *
 	int err;
 	struct super_block *sb = nd->mnt->mnt_sb;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 
 	if (!check_mnt(nd->mnt))
@@ -926,6 +955,9 @@ static int do_remount(struct nameidata *
 	if (nd->dentry != nd->mnt->mnt_root)
 		return -EINVAL;
 
+	if (!ve_accessible_veid(nd->mnt->owner, get_exec_env()->veid))
+		return -EPERM;
+
 	down_write(&sb->s_umount);
 	err = do_remount_sb(sb, flags, data, 0);
 	if (!err)
@@ -951,7 +983,7 @@ static int do_move_mount(struct nameidat
 	struct nameidata old_nd, parent_nd;
 	struct vfsmount *p;
 	int err = 0;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	if (!old_name || !*old_name)
 		return -EINVAL;
@@ -959,6 +991,10 @@ static int do_move_mount(struct nameidat
 	if (err)
 		return err;
 
+	err = -EPERM;
+	if (!ve_accessible_veid(old_nd.mnt->owner, get_exec_env()->veid))
+		goto out_nosem;
+
 	down_write(&namespace_sem);
 	while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
 		;
@@ -1014,6 +1050,7 @@ out:
 	up_write(&namespace_sem);
 	if (!err)
 		path_release(&parent_nd);
+out_nosem:
 	path_release(&old_nd);
 	return err;
 }
@@ -1031,7 +1068,7 @@ static int do_new_mount(struct nameidata
 		return -EINVAL;
 
 	/* we need capabilities... */
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 
 	mnt = do_kern_mount(type, flags, name, data);
@@ -1072,6 +1109,10 @@ int do_add_mount(struct vfsmount *newmnt
 	if ((err = graft_tree(newmnt, nd)))
 		goto unlock;
 
+	if (newmnt->mnt_mountpoint->d_flags & DCACHE_VIRTUAL)
+		/* unaccessible yet - no lock */
+		newmnt->mnt_root->d_flags |= DCACHE_VIRTUAL;
+
 	if (fslist) {
 		/* add to the specified expiration list */
 		spin_lock(&vfsmount_lock);
@@ -1469,6 +1510,7 @@ out1:
 	free_page(type_page);
 	return retval;
 }
+EXPORT_SYMBOL_GPL(sys_mount);
 
 /*
  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
@@ -1520,7 +1562,7 @@ static void chroot_fs_refs(struct nameid
 	struct fs_struct *fs;
 
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_ve(g, p) {
 		task_lock(p);
 		fs = p->fs;
 		if (fs) {
@@ -1535,7 +1577,7 @@ static void chroot_fs_refs(struct nameid
 			put_fs_struct(fs);
 		} else
 			task_unlock(p);
-	} while_each_thread(g, p);
+	} while_each_thread_ve(g, p);
 	read_unlock(&tasklist_lock);
 }
 
@@ -1688,10 +1730,10 @@ static void __init init_mount_tree(void)
 
 	init_task.namespace = namespace;
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		get_namespace(namespace);
 		p->namespace = namespace;
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	read_unlock(&tasklist_lock);
 
 	set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
@@ -1707,7 +1749,8 @@ void __init mnt_init(unsigned long mempa
 	init_rwsem(&namespace_sem);
 
 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
-			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_UBC,
+			NULL, NULL);
 
 	mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
 
@@ -1763,3 +1806,4 @@ void __put_namespace(struct namespace *n
 	release_mounts(&umount_list);
 	kfree(namespace);
 }
+EXPORT_SYMBOL_GPL(__put_namespace);
diff -uprN linux-2.6.16/fs/nfs/dir.c linux-2.6.16.ovz/fs/nfs/dir.c
--- linux-2.6.16/fs/nfs/dir.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/nfs/dir.c	2006-07-05 08:34:56.000000000 -0400
@@ -1635,7 +1635,8 @@ out:
 	return -EACCES;
 }
 
-int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int nfs_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
 	struct rpc_cred *cred;
 	int res = 0;
@@ -1683,7 +1684,7 @@ out:
 out_notsup:
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	if (res == 0)
-		res = generic_permission(inode, mask, NULL);
+		res = generic_permission(inode, mask, NULL, perm);
 	unlock_kernel();
 	return res;
 }
diff -uprN linux-2.6.16/fs/nfs/nfsroot.c linux-2.6.16.ovz/fs/nfs/nfsroot.c
--- linux-2.6.16/fs/nfs/nfsroot.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/nfs/nfsroot.c	2006-07-05 08:34:56.000000000 -0400
@@ -312,7 +312,7 @@ static int __init root_nfs_name(char *na
 	/* Override them by options set on kernel command-line */
 	root_nfs_parse(name, buf);
 
-	cp = system_utsname.nodename;
+	cp = ve_utsname.nodename;
 	if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
 		printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
 		return -1;
diff -uprN linux-2.6.16/fs/nfsd/nfs3proc.c linux-2.6.16.ovz/fs/nfsd/nfs3proc.c
--- linux-2.6.16/fs/nfsd/nfs3proc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/nfsd/nfs3proc.c	2006-07-05 08:34:56.000000000 -0400
@@ -682,7 +682,7 @@ static struct svc_procedure		nfsd_proced
   PROC(lookup,	 dirop,		dirop,		fhandle2, RC_NOCACHE, ST+FH+pAT+pAT),
   PROC(access,	 access,	access,		fhandle,  RC_NOCACHE, ST+pAT+1),
   PROC(readlink, readlink,	readlink,	fhandle,  RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4),
-  PROC(read,	 read,		read,		fhandle,  RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE),
+  PROC(read,	 read,		read,		fhandle,  RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4),
   PROC(write,	 write,		write,		fhandle,  RC_REPLBUFF, ST+WC+4),
   PROC(create,	 create,	create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
   PROC(mkdir,	 mkdir,		create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
diff -uprN linux-2.6.16/fs/nfsd/nfs4proc.c linux-2.6.16.ovz/fs/nfsd/nfs4proc.c
--- linux-2.6.16/fs/nfsd/nfs4proc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/nfsd/nfs4proc.c	2006-07-05 08:34:56.000000000 -0400
@@ -975,7 +975,7 @@ struct nfsd4_voidargs { int dummy; };
  */
 static struct svc_procedure		nfsd_procedures4[2] = {
   PROC(null,	 void,		void,		void,	  RC_NOCACHE, 1),
-  PROC(compound, compound,	compound,	compound, RC_NOCACHE, NFSD_BUFSIZE)
+  PROC(compound, compound,	compound,	compound, RC_NOCACHE, NFSD_BUFSIZE/4)
 };
 
 struct svc_version	nfsd_version4 = {
diff -uprN linux-2.6.16/fs/nfsd/nfsfh.c linux-2.6.16.ovz/fs/nfsd/nfsfh.c
--- linux-2.6.16/fs/nfsd/nfsfh.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/nfsd/nfsfh.c	2006-07-05 08:34:56.000000000 -0400
@@ -56,7 +56,7 @@ static int nfsd_acceptable(void *expv, s
 		/* make sure parents give x permission to user */
 		int err;
 		parent = dget_parent(tdentry);
-		err = permission(parent->d_inode, MAY_EXEC, NULL);
+		err = permission(parent->d_inode, MAY_EXEC, NULL, NULL);
 		if (err < 0) {
 			dput(parent);
 			break;
diff -uprN linux-2.6.16/fs/nfsd/nfsproc.c linux-2.6.16.ovz/fs/nfsd/nfsproc.c
--- linux-2.6.16/fs/nfsd/nfsproc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/nfsd/nfsproc.c	2006-07-05 08:34:56.000000000 -0400
@@ -553,7 +553,7 @@ static struct svc_procedure		nfsd_proced
   PROC(none,	 void,		void,		none,		RC_NOCACHE, ST),
   PROC(lookup,	 diropargs,	diropres,	fhandle,	RC_NOCACHE, ST+FH+AT),
   PROC(readlink, readlinkargs,	readlinkres,	none,		RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
-  PROC(read,	 readargs,	readres,	fhandle,	RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE),
+  PROC(read,	 readargs,	readres,	fhandle,	RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4),
   PROC(none,	 void,		void,		none,		RC_NOCACHE, ST),
   PROC(write,	 writeargs,	attrstat,	fhandle,	RC_REPLBUFF, ST+AT),
   PROC(create,	 createargs,	diropres,	fhandle,	RC_REPLBUFF, ST+FH+AT),
diff -uprN linux-2.6.16/fs/nfsd/vfs.c linux-2.6.16.ovz/fs/nfsd/vfs.c
--- linux-2.6.16/fs/nfsd/vfs.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/nfsd/vfs.c	2006-07-05 08:34:56.000000000 -0400
@@ -1817,12 +1817,13 @@ nfsd_permission(struct svc_export *exp, 
 	    inode->i_uid == current->fsuid)
 		return 0;
 
-	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
+	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC),
+			NULL, NULL);
 
 	/* Allow read access to binaries even when mode 111 */
 	if (err == -EACCES && S_ISREG(inode->i_mode) &&
 	    acc == (MAY_READ | MAY_OWNER_OVERRIDE))
-		err = permission(inode, MAY_EXEC, NULL);
+		err = permission(inode, MAY_EXEC, NULL, NULL);
 
 	return err? nfserrno(err) : 0;
 }
diff -uprN linux-2.6.16/fs/ntfs/file.c linux-2.6.16.ovz/fs/ntfs/file.c
--- linux-2.6.16/fs/ntfs/file.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ntfs/file.c	2006-07-05 08:34:56.000000000 -0400
@@ -1489,14 +1489,15 @@ static inline void ntfs_flush_dcache_pag
 		unsigned nr_pages)
 {
 	BUG_ON(!nr_pages);
+	/*
+	 * Warning: Do not do the decrement at the same time as the call to
+	 * flush_dcache_page() because it is a NULL macro on i386 and hence the
+	 * decrement never happens so the loop never terminates.
+	 */
 	do {
-		/*
-		 * Warning: Do not do the decrement at the same time as the
-		 * call because flush_dcache_page() is a NULL macro on i386
-		 * and hence the decrement never happens.
-		 */
+		--nr_pages;
 		flush_dcache_page(pages[nr_pages]);
-	} while (--nr_pages > 0);
+	} while (nr_pages > 0);
 }
 
 /**
diff -uprN linux-2.6.16/fs/ntfs/super.c linux-2.6.16.ovz/fs/ntfs/super.c
--- linux-2.6.16/fs/ntfs/super.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/ntfs/super.c	2006-07-05 08:34:56.000000000 -0400
@@ -3033,7 +3033,7 @@ iput_tmp_ino_err_out_now:
 	 * method again... FIXME: Do we need to do this twice now because of
 	 * attribute inodes? I think not, so leave as is for now... (AIA)
 	 */
-	if (invalidate_inodes(sb)) {
+	if (invalidate_inodes(sb, 0)) {
 		ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
 				"driver bug.");
 		/* Copied from fs/super.c. I just love this message. (-; */
diff -uprN linux-2.6.16/fs/open.c linux-2.6.16.ovz/fs/open.c
--- linux-2.6.16/fs/open.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/open.c	2006-07-05 08:34:56.000000000 -0400
@@ -25,6 +25,7 @@
 #include <linux/fs.h>
 #include <linux/personality.h>
 #include <linux/pagemap.h>
+#include <linux/faudit.h>
 #include <linux/syscalls.h>
 #include <linux/rcupdate.h>
 
@@ -51,7 +52,21 @@ int vfs_statfs(struct super_block *sb, s
 
 EXPORT_SYMBOL(vfs_statfs);
 
-static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
+int faudit_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	struct faudit_statfs_arg arg;
+
+	arg.sb = sb;
+	arg.stat = buf;
+
+	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS, &arg)
+			!= NOTIFY_DONE)
+		return arg.err;
+	return 0;
+}
+
+static int vfs_statfs_native(struct super_block *sb, struct vfsmount *mnt,
+		struct statfs *buf)
 {
 	struct kstatfs st;
 	int retval;
@@ -60,6 +75,10 @@ static int vfs_statfs_native(struct supe
 	if (retval)
 		return retval;
 
+	retval = faudit_statfs(mnt->mnt_sb, &st);
+	if (retval)
+		return retval;
+
 	if (sizeof(*buf) == sizeof(st))
 		memcpy(buf, &st, sizeof(st));
 	else {
@@ -94,7 +113,8 @@ static int vfs_statfs_native(struct supe
 	return 0;
 }
 
-static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
+static int vfs_statfs64(struct super_block *sb, struct vfsmount *mnt,
+		struct statfs64 *buf)
 {
 	struct kstatfs st;
 	int retval;
@@ -103,6 +123,10 @@ static int vfs_statfs64(struct super_blo
 	if (retval)
 		return retval;
 
+	retval = faudit_statfs(mnt->mnt_sb, &st);
+	if (retval)
+		return retval;
+
 	if (sizeof(*buf) == sizeof(st))
 		memcpy(buf, &st, sizeof(st));
 	else {
@@ -129,7 +153,8 @@ asmlinkage long sys_statfs(const char __
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs tmp;
-		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs_native(nd.dentry->d_inode->i_sb,
+				nd.mnt, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -148,7 +173,8 @@ asmlinkage long sys_statfs64(const char 
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs64 tmp;
-		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs64(nd.dentry->d_inode->i_sb,
+				nd.mnt, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -167,7 +193,8 @@ asmlinkage long sys_fstatfs(unsigned int
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb,
+			file->f_vfsmnt, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -188,7 +215,8 @@ asmlinkage long sys_fstatfs64(unsigned i
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs64(file->f_dentry->d_inode->i_sb,
+			file->f_vfsmnt, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -243,7 +271,7 @@ static long do_sys_truncate(const char _
 	if (!S_ISREG(inode->i_mode))
 		goto dput_and_out;
 
-	error = vfs_permission(&nd, MAY_WRITE);
+	error = vfs_permission(&nd, MAY_WRITE, NULL);
 	if (error)
 		goto dput_and_out;
 
@@ -330,7 +358,10 @@ out:
 
 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
 {
-	return do_sys_ftruncate(fd, length, 1);
+	long ret = do_sys_ftruncate(fd, length, 1);
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 /* LFS versions of truncate are only needed on 32 bit machines */
@@ -342,7 +373,10 @@ asmlinkage long sys_truncate64(const cha
 
 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
 {
-	return do_sys_ftruncate(fd, length, 0);
+	long ret = do_sys_ftruncate(fd, length, 0);
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 #endif
 
@@ -397,7 +431,7 @@ asmlinkage long sys_utime(char __user * 
                         goto dput_and_out;
 
 		if (current->fsuid != inode->i_uid &&
-		    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
+		    (error = vfs_permission(&nd, MAY_WRITE, NULL)) != 0)
 			goto dput_and_out;
 	}
 	mutex_lock(&inode->i_mutex);
@@ -450,7 +484,7 @@ long do_utimes(int dfd, char __user *fil
                         goto dput_and_out;
 
 		if (current->fsuid != inode->i_uid &&
-		    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
+		    (error = vfs_permission(&nd, MAY_WRITE, NULL)) != 0)
 			goto dput_and_out;
 	}
 	mutex_lock(&inode->i_mutex);
@@ -514,7 +548,7 @@ asmlinkage long sys_faccessat(int dfd, c
 
 	res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
 	if (!res) {
-		res = vfs_permission(&nd, mode);
+		res = vfs_permission(&nd, mode, NULL);
 		/* SuS v2 requires we report a read only fs too */
 		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
 		   && !special_file(nd.dentry->d_inode->i_mode))
@@ -543,7 +577,7 @@ asmlinkage long sys_chdir(const char __u
 	if (error)
 		goto out;
 
-	error = vfs_permission(&nd, MAY_EXEC);
+	error = vfs_permission(&nd, MAY_EXEC, NULL);
 	if (error)
 		goto dput_and_out;
 
@@ -594,7 +628,7 @@ asmlinkage long sys_chroot(const char __
 	if (error)
 		goto out;
 
-	error = vfs_permission(&nd, MAY_EXEC);
+	error = vfs_permission(&nd, MAY_EXEC, NULL);
 	if (error)
 		goto dput_and_out;
 
@@ -733,6 +767,7 @@ asmlinkage long sys_chown(const char __u
 	}
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_chown);
 
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 			     gid_t group, int flag)
@@ -1083,20 +1118,30 @@ long do_sys_open(int dfd, const char __u
 
 asmlinkage long sys_open(const char __user *filename, int flags, int mode)
 {
+	long ret;
+
 	if (force_o_largefile())
 		flags |= O_LARGEFILE;
 
-	return do_sys_open(AT_FDCWD, filename, flags, mode);
+	ret = do_sys_open(AT_FDCWD, filename, flags, mode);
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(sys_open);
 
 asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
 			   int mode)
 {
+	long ret;
+
 	if (force_o_largefile())
 		flags |= O_LARGEFILE;
 
-	return do_sys_open(dfd, filename, flags, mode);
+	ret = do_sys_open(dfd, filename, flags, mode);
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(sys_openat);
 
diff -uprN linux-2.6.16/fs/partitions/check.c linux-2.6.16.ovz/fs/partitions/check.c
--- linux-2.6.16/fs/partitions/check.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/partitions/check.c	2006-07-05 08:34:56.000000000 -0400
@@ -128,6 +128,7 @@ char *disk_name(struct gendisk *hd, int 
 
 	return buf;
 }
+EXPORT_SYMBOL(disk_name);
 
 const char *bdevname(struct block_device *bdev, char *buf)
 {
@@ -345,6 +346,7 @@ static char *make_block_name(struct gend
 	char *name;
 	static char *block_str = "block:";
 	int size;
+	char *s;
 
 	size = strlen(block_str) + strlen(disk->disk_name) + 1;
 	name = kmalloc(size, GFP_KERNEL);
@@ -352,6 +354,10 @@ static char *make_block_name(struct gend
 		return NULL;
 	strcpy(name, block_str);
 	strcat(name, disk->disk_name);
+	/* ewww... some of these buggers have / in name... */
+	s = strchr(name, '/');
+	if (s)
+		*s = '!';
 	return name;
 }
 
diff -uprN linux-2.6.16/fs/pipe.c linux-2.6.16.ovz/fs/pipe.c
--- linux-2.6.16/fs/pipe.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/pipe.c	2006-07-05 08:34:56.000000000 -0400
@@ -19,6 +19,8 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
+#include <ub/ub_mem.h>
+
 /*
  * We use a start+len construction, which provides full use of the 
  * allocated memory.
@@ -284,7 +286,7 @@ pipe_writev(struct file *filp, const str
 			int error;
 
 			if (!page) {
-				page = alloc_page(GFP_HIGHUSER);
+				page = alloc_page(GFP_HIGHUSER | __GFP_UBC);
 				if (unlikely(!page)) {
 					ret = ret ? : -ENOMEM;
 					break;
@@ -662,7 +664,7 @@ struct inode* pipe_new(struct inode* ino
 {
 	struct pipe_inode_info *info;
 
-	info = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+	info = ub_kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 	if (!info)
 		goto fail_page;
 	memset(info, 0, sizeof(*info));
@@ -797,6 +799,7 @@ close_f1:
 no_files:
 	return error;	
 }
+EXPORT_SYMBOL_GPL(do_pipe);
 
 /*
  * pipefs should _never_ be mounted by userland - too much of security hassle,
diff -uprN linux-2.6.16/fs/proc/array.c linux-2.6.16.ovz/fs/proc/array.c
--- linux-2.6.16/fs/proc/array.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/proc/array.c	2006-07-05 08:34:56.000000000 -0400
@@ -75,6 +75,9 @@
 #include <linux/times.h>
 #include <linux/cpuset.h>
 #include <linux/rcupdate.h>
+#include <linux/fairsched.h>
+
+#include <ub/beancounter.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -161,8 +164,13 @@ static inline char * task_state(struct t
 	struct group_info *group_info;
 	int g;
 	struct fdtable *fdt = NULL;
+	pid_t pid, ppid, tgid;
+
+	pid = get_task_pid(p);
+	tgid = get_task_tgid(p);
 
 	read_lock(&tasklist_lock);
+	ppid = get_task_ppid(p);
 	buffer += sprintf(buffer,
 		"State:\t%s\n"
 		"SleepAVG:\t%lu%%\n"
@@ -170,13 +178,19 @@ static inline char * task_state(struct t
 		"Pid:\t%d\n"
 		"PPid:\t%d\n"
 		"TracerPid:\t%d\n"
+#ifdef CONFIG_FAIRSCHED
+		"FNid:\t%d\n"
+#endif
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p),
 		(p->sleep_avg/1024)*100/(1020000000/1024),
-	       	p->tgid,
-		p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
-		pid_alive(p) && p->ptrace ? p->parent->pid : 0,
+	       	tgid,
+		pid, ppid,
+		pid_alive(p) && p->ptrace ? get_task_pid(p->parent) : 0,
+#ifdef CONFIG_FAIRSCHED
+		task_fairsched_node_id(p),
+#endif
 		p->uid, p->euid, p->suid, p->fsuid,
 		p->gid, p->egid, p->sgid, p->fsgid);
 	read_unlock(&tasklist_lock);
@@ -199,6 +213,18 @@ static inline char * task_state(struct t
 	put_group_info(group_info);
 
 	buffer += sprintf(buffer, "\n");
+
+#ifdef CONFIG_VE
+	buffer += sprintf(buffer,
+			"envID:\t%d\n"
+			"VPid:\t%d\n"
+			"PNState:\t%u\n"
+			"StopState:\t%u\n",
+			VE_TASK_INFO(p)->owner_env->veid,
+			virt_pid(p),
+			p->pn_state,
+			p->stopped_state);
+#endif
 	return buffer;
 }
 
@@ -244,7 +270,7 @@ static void collect_sigign_sigcatch(stru
 
 static inline char * task_sig(struct task_struct *p, char *buffer)
 {
-	sigset_t pending, shpending, blocked, ignored, caught;
+	sigset_t pending, shpending, blocked, ignored, caught, saved;
 	int num_threads = 0;
 	unsigned long qsize = 0;
 	unsigned long qlim = 0;
@@ -254,6 +280,7 @@ static inline char * task_sig(struct tas
 	sigemptyset(&blocked);
 	sigemptyset(&ignored);
 	sigemptyset(&caught);
+	sigemptyset(&saved);
 
 	/* Gather all the data with the appropriate locks held */
 	read_lock(&tasklist_lock);
@@ -262,6 +289,7 @@ static inline char * task_sig(struct tas
 		pending = p->pending.signal;
 		shpending = p->signal->shared_pending.signal;
 		blocked = p->blocked;
+		saved = p->saved_sigmask;
 		collect_sigign_sigcatch(p, &ignored, &caught);
 		num_threads = atomic_read(&p->signal->count);
 		qsize = atomic_read(&p->user->sigpending);
@@ -279,6 +307,7 @@ static inline char * task_sig(struct tas
 	buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
 	buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
 	buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
+	buffer = render_sigset_t("SigSvd:\t", &saved, buffer);
 
 	return buffer;
 }
@@ -293,10 +322,27 @@ static inline char *task_cap(struct task
 			    cap_t(p->cap_effective));
 }
 
+#ifdef CONFIG_USER_RESOURCE
+static inline void ub_dump_task_info(struct task_struct *tsk,
+		char *stsk, int ltsk, char *smm, int lmm)
+{
+	print_ub_uid(tsk->task_bc.task_ub, stsk, ltsk);
+	task_lock(tsk);
+	if (tsk->mm)
+		print_ub_uid(tsk->mm->mm_ub, smm, lmm);
+	else
+		strncpy(smm, "N/A", lmm);
+	task_unlock(tsk);
+}
+#endif
+
 int proc_pid_status(struct task_struct *task, char * buffer)
 {
 	char * orig = buffer;
 	struct mm_struct *mm = get_task_mm(task);
+#ifdef CONFIG_USER_RESOURCE
+	char tsk_ub_info[64], mm_ub_info[64];
+#endif
 
 	buffer = task_name(task, buffer);
 	buffer = task_state(task, buffer);
@@ -311,6 +357,14 @@ int proc_pid_status(struct task_struct *
 #if defined(CONFIG_S390)
 	buffer = task_show_regs(task, buffer);
 #endif
+#ifdef CONFIG_USER_RESOURCE
+	ub_dump_task_info(task,
+			tsk_ub_info, sizeof(tsk_ub_info),
+			mm_ub_info, sizeof(mm_ub_info));
+
+	buffer += sprintf(buffer, "TaskUB:\t%s\n", tsk_ub_info);
+	buffer += sprintf(buffer, "MMUB:\t%s\n", mm_ub_info);
+#endif
 	return buffer - orig;
 }
 
@@ -333,6 +387,10 @@ static int do_task_stat(struct task_stru
 	DEFINE_KTIME(it_real_value);
 	struct task_struct *t;
 	char tcomm[sizeof(task->comm)];
+#ifdef CONFIG_USER_RESOURCE
+	char ub_task_info[64];
+	char ub_mm_info[64];
+#endif
 
 	state = *get_task_state(task);
 	vsize = eip = esp = 0;
@@ -370,11 +428,12 @@ static int do_task_stat(struct task_stru
 	}
 	if (task->signal) {
 		if (task->signal->tty) {
-			tty_pgrp = task->signal->tty->pgrp;
+			tty_pgrp = pid_type_to_vpid(PIDTYPE_PGID,
+						    task->signal->tty->pgrp);
 			tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
 		}
-		pgid = process_group(task);
-		sid = task->signal->session;
+		pgid = get_task_pgid(task);
+		sid = get_task_sid(task);
 		cmin_flt = task->signal->cmin_flt;
 		cmaj_flt = task->signal->cmaj_flt;
 		cutime = task->signal->cutime;
@@ -388,7 +447,7 @@ static int do_task_stat(struct task_stru
 		}
 		it_real_value = task->signal->real_timer.expires;
 	}
-	ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
+	ppid = get_task_ppid(task);
 	read_unlock(&tasklist_lock);
 
 	if (!whole || num_threads<2)
@@ -407,14 +466,34 @@ static int do_task_stat(struct task_stru
 
 	/* Temporary variable needed for gcc-2.96 */
 	/* convert timespec -> nsec*/
+#ifndef CONFIG_VE
 	start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
 				+ task->start_time.tv_nsec;
+#else
+	start_time = (unsigned long long)(task->start_time.tv_sec -
+			get_exec_env()->init_entry->start_time.tv_sec) *
+			NSEC_PER_SEC + task->start_time.tv_nsec -
+			get_exec_env()->init_entry->start_time.tv_nsec;
+#endif
 	/* convert nsec -> ticks */
 	start_time = nsec_to_clock_t(start_time);
 
+#ifdef CONFIG_USER_RESOURCE
+	ub_dump_task_info(task,
+			ub_task_info, sizeof(ub_task_info),
+			ub_mm_info, sizeof(ub_mm_info));
+#endif
+
 	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu"
+#ifdef CONFIG_VE
+"0 0 0 0 0 0 0 0 %d %u"
+#endif
+#ifdef CONFIG_USER_RESOURCE
+	" %s %s"
+#endif
+	"\n",
 		task->pid,
 		tcomm,
 		state,
@@ -459,7 +538,16 @@ static int do_task_stat(struct task_stru
 		task->exit_signal,
 		task_cpu(task),
 		task->rt_priority,
-		task->policy);
+		task->policy
+#ifdef CONFIG_VE
+		, virt_pid(task),
+		VEID(VE_TASK_INFO(task)->owner_env)
+#endif
+#ifdef CONFIG_USER_RESOURCE
+		, ub_task_info,
+		ub_mm_info
+#endif
+		);
 	if(mm)
 		mmput(mm);
 	return res;
diff -uprN linux-2.6.16/fs/proc/base.c linux-2.6.16.ovz/fs/proc/base.c
--- linux-2.6.16/fs/proc/base.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/proc/base.c	2006-07-05 08:34:56.000000000 -0400
@@ -291,22 +291,29 @@ static int proc_fd_link(struct inode *in
 	struct files_struct *files;
 	struct file *file;
 	int fd = proc_type(inode) - PROC_TID_FD_DIR;
+	int err = -ENOENT;
 
 	files = get_files_struct(task);
 	if (files) {
-		rcu_read_lock();
+		/*
+		 * We are not taking a ref to the file structure, so we must
+		 * hold ->file_lock.
+		 */
+		spin_lock(&files->file_lock);
 		file = fcheck_files(files, fd);
 		if (file) {
-			*mnt = mntget(file->f_vfsmnt);
-			*dentry = dget(file->f_dentry);
-			rcu_read_unlock();
-			put_files_struct(files);
-			return 0;
+			if (d_root_check(file->f_dentry, file->f_vfsmnt)) {
+				err = -EACCES;
+			} else {
+				*mnt = mntget(file->f_vfsmnt);
+				*dentry = dget(file->f_dentry);
+				err = 0;
+			}
 		}
-		rcu_read_unlock();
+		spin_unlock(&files->file_lock);
 		put_files_struct(files);
 	}
-	return -ENOENT;
+	return err;
 }
 
 static struct fs_struct *get_fs_struct(struct task_struct *task)
@@ -326,10 +333,12 @@ static int proc_cwd_link(struct inode *i
 	int result = -ENOENT;
 	if (fs) {
 		read_lock(&fs->lock);
-		*mnt = mntget(fs->pwdmnt);
-		*dentry = dget(fs->pwd);
+		result = d_root_check(fs->pwd, fs->pwdmnt);
+		if (!result) {
+			*mnt = mntget(fs->pwdmnt);
+			*dentry = dget(fs->pwd);
+		}
 		read_unlock(&fs->lock);
-		result = 0;
 		put_fs_struct(fs);
 	}
 	return result;
@@ -579,19 +588,21 @@ static int proc_check_root(struct inode 
 	return proc_check_chroot(root, vfsmnt);
 }
 
-static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int proc_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
-	if (generic_permission(inode, mask, NULL) != 0)
+	if (generic_permission(inode, mask, NULL, perm) != 0)
 		return -EACCES;
 	return proc_check_root(inode);
 }
 
-static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
 	struct dentry *root;
 	struct vfsmount *vfsmnt;
 
-	if (generic_permission(inode, mask, NULL) != 0)
+	if (generic_permission(inode, mask, NULL, perm) != 0)
 		return -EACCES;
 
 	if (proc_task_root_link(inode, &root, &vfsmnt))
@@ -1303,6 +1314,10 @@ static struct inode *proc_pid_make_inode
 	struct inode * inode;
 	struct proc_inode *ei;
 
+	if (!ve_accessible(VE_TASK_INFO(task)->owner_env,
+			   VE_OWNER_FSTYPE(sb->s_type)))
+		return NULL;
+
 	/* We need a new inode */
 	
 	inode = new_inode(sb);
@@ -1406,6 +1421,10 @@ static void pid_base_iput(struct dentry 
 	spin_lock(&task->proc_lock);
 	if (task->proc_dentry == dentry)
 		task->proc_dentry = NULL;
+#ifdef CONFIG_VE
+	if (VE_TASK_INFO(task)->glob_proc_dentry == dentry)
+		VE_TASK_INFO(task)->glob_proc_dentry = NULL;
+#endif
 	spin_unlock(&task->proc_lock);
 	iput(inode);
 }
@@ -1485,7 +1504,12 @@ static struct dentry *proc_lookupfd(stru
 	if (!files)
 		goto out_unlock;
 	inode->i_mode = S_IFLNK;
-	rcu_read_lock();
+
+	/*
+	 * We are not taking a ref to the file structure, so we must
+	 * hold ->file_lock.
+	 */
+	spin_lock(&files->file_lock);
 	file = fcheck_files(files, fd);
 	if (!file)
 		goto out_unlock2;
@@ -1493,7 +1517,7 @@ static struct dentry *proc_lookupfd(stru
 		inode->i_mode |= S_IRUSR | S_IXUSR;
 	if (file->f_mode & 2)
 		inode->i_mode |= S_IWUSR | S_IXUSR;
-	rcu_read_unlock();
+	spin_unlock(&files->file_lock);
 	put_files_struct(files);
 	inode->i_op = &proc_pid_link_inode_operations;
 	inode->i_size = 64;
@@ -1503,7 +1527,7 @@ static struct dentry *proc_lookupfd(stru
 	return NULL;
 
 out_unlock2:
-	rcu_read_unlock();
+	spin_unlock(&files->file_lock);
 	put_files_struct(files);
 out_unlock:
 	iput(inode);
@@ -1879,14 +1903,14 @@ static int proc_self_readlink(struct den
 			      int buflen)
 {
 	char tmp[30];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", get_task_tgid(current));
 	return vfs_readlink(dentry,buffer,buflen,tmp);
 }
 
 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	char tmp[30];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", get_task_tgid(current));
 	return ERR_PTR(vfs_follow_link(nd,tmp));
 }	
 
@@ -1911,11 +1935,8 @@ static struct inode_operations proc_self
  *   of PIDTYPE_PID.
  */
 
-struct dentry *proc_pid_unhash(struct task_struct *p)
+struct dentry *__proc_pid_unhash(struct task_struct *p, struct dentry *proc_dentry)
 {
-	struct dentry *proc_dentry;
-
-	proc_dentry = p->proc_dentry;
 	if (proc_dentry != NULL) {
 
 		spin_lock(&dcache_lock);
@@ -1933,6 +1954,14 @@ struct dentry *proc_pid_unhash(struct ta
 	return proc_dentry;
 }
 
+void proc_pid_unhash(struct task_struct *p, struct dentry *pd[2])
+{
+	pd[0] = __proc_pid_unhash(p, p->proc_dentry);
+#ifdef CONFIG_VE
+	pd[1] = __proc_pid_unhash(p, VE_TASK_INFO(p)->glob_proc_dentry);
+#endif
+}
+
 /**
  * proc_pid_flush - recover memory used by stale /proc/@pid/x entries
  * @proc_dentry: directoy to prune.
@@ -1940,7 +1969,7 @@ struct dentry *proc_pid_unhash(struct ta
  * Shrink the /proc directory that was used by the just killed thread.
  */
 	
-void proc_pid_flush(struct dentry *proc_dentry)
+void __proc_pid_flush(struct dentry *proc_dentry)
 {
 	might_sleep();
 	if(proc_dentry != NULL) {
@@ -1949,12 +1978,21 @@ void proc_pid_flush(struct dentry *proc_
 	}
 }
 
+void proc_pid_flush(struct dentry *proc_dentry[2])
+{
+	__proc_pid_flush(proc_dentry[0]);
+#ifdef CONFIG_VE
+	__proc_pid_flush(proc_dentry[1]);
+#endif
+}
+
 /* SMP-safe */
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
 	struct task_struct *task;
 	struct inode *inode;
 	struct proc_inode *ei;
+	struct dentry *pd[2];
 	unsigned tgid;
 	int died;
 
@@ -1978,7 +2016,19 @@ struct dentry *proc_pid_lookup(struct in
 		goto out;
 
 	read_lock(&tasklist_lock);
-	task = find_task_by_pid(tgid);
+	task = find_task_by_pid_ve(tgid);
+	/* In theory we are allowed to lookup both /proc/VIRT_PID and
+	 * /proc/GLOBAL_PID inside VE. However, current /proc implementation
+	 * cannot maintain two references to one task, so that we have
+	 * to prohibit /proc/GLOBAL_PID.
+	 */
+	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tgid)) {
+		/* However, VE_ENTERed tasks are exception, they use global
+		 * pids.
+		 */
+		if (virt_pid(task) != tgid)
+			task = NULL;
+	}
 	if (task)
 		get_task_struct(task);
 	read_unlock(&tasklist_lock);
@@ -2007,16 +2057,23 @@ struct dentry *proc_pid_lookup(struct in
 	died = 0;
 	d_add(dentry, inode);
 	spin_lock(&task->proc_lock);
+#ifdef CONFIG_VE
+	if (ve_is_super(VE_OWNER_FSTYPE(inode->i_sb->s_type)))
+		VE_TASK_INFO(task)->glob_proc_dentry = dentry;
+	else
+		task->proc_dentry = dentry;
+#else
 	task->proc_dentry = dentry;
+#endif
 	if (!pid_alive(task)) {
-		dentry = proc_pid_unhash(task);
+		proc_pid_unhash(task, pd);
 		died = 1;
 	}
 	spin_unlock(&task->proc_lock);
 
 	put_task_struct(task);
 	if (died) {
-		proc_pid_flush(dentry);
+		proc_pid_flush(pd);
 		goto out;
 	}
 	return NULL;
@@ -2037,7 +2094,12 @@ static struct dentry *proc_task_lookup(s
 		goto out;
 
 	read_lock(&tasklist_lock);
-	task = find_task_by_pid(tid);
+	task = find_task_by_pid_ve(tid);
+	/* See comment above in similar place. */
+	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tid)) {
+		if (virt_pid(task) != tid)
+			task = NULL;
+	}
 	if (task)
 		get_task_struct(task);
 	read_unlock(&tasklist_lock);
@@ -2081,16 +2143,23 @@ out:
  * tasklist lock while doing this, and we must release it before
  * we actually do the filldir itself, so we use a temp buffer..
  */
-static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
+static int get_tgid_list(int index, unsigned long version, unsigned int *tgids,
+		struct ve_struct *ve)
 {
 	struct task_struct *p;
 	int nr_tgids = 0;
 
 	index--;
 	read_lock(&tasklist_lock);
+	if (list_empty(&ve->vetask_lh))
+		goto out;
 	p = NULL;
 	if (version) {
-		p = find_task_by_pid(version);
+		struct ve_struct *oldve;
+
+		oldve = set_exec_env(ve);
+		p = find_task_by_pid_ve(version);
+		(void)set_exec_env(oldve);
 		if (p && !thread_group_leader(p))
 			p = NULL;
 	}
@@ -2098,10 +2167,10 @@ static int get_tgid_list(int index, unsi
 	if (p)
 		index = 0;
 	else
-		p = next_task(&init_task);
+		p = __first_task_ve(ve);
 
-	for ( ; p != &init_task; p = next_task(p)) {
-		int tgid = p->pid;
+	for ( ; p != NULL; p = __next_task_ve(ve, p)) {
+		int tgid = get_task_pid_ve(p, ve);
 		if (!pid_alive(p))
 			continue;
 		if (--index >= 0)
@@ -2111,6 +2180,7 @@ static int get_tgid_list(int index, unsi
 		if (nr_tgids >= PROC_MAXPIDS)
 			break;
 	}
+out:
 	read_unlock(&tasklist_lock);
 	return nr_tgids;
 }
@@ -2134,7 +2204,7 @@ static int get_tid_list(int index, unsig
 	 * via next_thread().
 	 */
 	if (pid_alive(task)) do {
-		int tid = task->pid;
+		int tid = get_task_pid(task);
 
 		if (--index >= 0)
 			continue;
@@ -2171,7 +2241,8 @@ int proc_pid_readdir(struct file * filp,
 	next_tgid = filp->f_version;
 	filp->f_version = 0;
 	for (;;) {
-		nr_tgids = get_tgid_list(nr, next_tgid, tgid_array);
+		nr_tgids = get_tgid_list(nr, next_tgid, tgid_array,
+				filp->f_dentry->d_sb->s_type->owner_env);
 		if (!nr_tgids) {
 			/* no more entries ! */
 			break;
diff -uprN linux-2.6.16/fs/proc/generic.c linux-2.6.16.ovz/fs/proc/generic.c
--- linux-2.6.16/fs/proc/generic.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/proc/generic.c	2006-07-05 08:34:56.000000000 -0400
@@ -10,7 +10,9 @@
 
 #include <linux/errno.h>
 #include <linux/time.h>
+#include <linux/fs.h>
 #include <linux/proc_fs.h>
+#include <linux/ve_owner.h>
 #include <linux/stat.h>
 #include <linux/module.h>
 #include <linux/mount.h>
@@ -29,6 +31,8 @@ static ssize_t proc_file_write(struct fi
 			       size_t count, loff_t *ppos);
 static loff_t proc_file_lseek(struct file *, loff_t, int);
 
+static DEFINE_RWLOCK(proc_tree_lock);
+
 int proc_match(int len, const char *name, struct proc_dir_entry *de)
 {
 	if (de->namelen != len)
@@ -229,6 +233,7 @@ proc_file_lseek(struct file *file, loff_
 	return retval;
 }
 
+#ifndef CONFIG_VE
 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
@@ -261,9 +266,12 @@ static int proc_getattr(struct vfsmount 
 	generic_fillattr(inode, stat);
 	return 0;
 }
+#endif
 
 static struct inode_operations proc_file_inode_operations = {
+#ifndef CONFIG_VE
 	.setattr	= proc_notify_change,
+#endif
 };
 
 /*
@@ -271,14 +279,20 @@ static struct inode_operations proc_file
  * returns the struct proc_dir_entry for "/proc/tty/driver", and
  * returns "serial" in residual.
  */
-static int xlate_proc_name(const char *name,
+static int __xlate_proc_name(struct proc_dir_entry *root, const char *name,
 			   struct proc_dir_entry **ret, const char **residual)
 {
 	const char     		*cp = name, *next;
 	struct proc_dir_entry	*de;
 	int			len;
 
-	de = &proc_root;
+	if (*ret) {
+		de_get(*ret);
+		return 0;
+	}
+
+	read_lock(&proc_tree_lock);
+	de = root;
 	while (1) {
 		next = strchr(cp, '/');
 		if (!next)
@@ -289,15 +303,35 @@ static int xlate_proc_name(const char *n
 			if (proc_match(len, cp, de))
 				break;
 		}
-		if (!de)
+		if (!de) {
+			read_unlock(&proc_tree_lock);
 			return -ENOENT;
+		}
 		cp += len + 1;
 	}
 	*residual = cp;
-	*ret = de;
+	*ret = de_get(de);
+	read_unlock(&proc_tree_lock);
 	return 0;
 }
 
+#ifndef CONFIG_VE
+#define xlate_proc_loc_name xlate_proc_name
+#else
+static int xlate_proc_loc_name(const char *name,
+			   struct proc_dir_entry **ret, const char **residual)
+{
+	return __xlate_proc_name(get_exec_env()->proc_root,
+			name, ret, residual);
+}
+#endif
+
+static int xlate_proc_name(const char *name,
+		struct proc_dir_entry **ret, const char **residual)
+{
+	return __xlate_proc_name(&proc_root, name, ret, residual);
+}
+
 static DEFINE_IDR(proc_inum_idr);
 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 
@@ -369,6 +403,20 @@ static struct dentry_operations proc_den
 	.d_delete	= proc_delete_dentry,
 };
 
+static struct proc_dir_entry *__proc_lookup(struct proc_dir_entry *dir,
+		struct dentry *d)
+{
+	struct proc_dir_entry *de;
+
+	for (de = dir->subdir; de; de = de->next) {
+		if (de->namelen != d->d_name.len)
+			continue;
+		if (!memcmp(d->d_name.name, de->name, de->namelen))
+			break;
+	}
+	return de_get(de);
+}
+
 /*
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
@@ -376,34 +424,147 @@ static struct dentry_operations proc_den
 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = NULL;
-	struct proc_dir_entry * de;
+	struct proc_dir_entry *lde, *gde;
 	int error = -ENOENT;
 
 	lock_kernel();
-	de = PDE(dir);
-	if (de) {
-		for (de = de->subdir; de ; de = de->next) {
-			if (de->namelen != dentry->d_name.len)
-				continue;
-			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
-				unsigned int ino = de->low_ino;
+	lde = LPDE(dir);
 
-				error = -EINVAL;
-				inode = proc_get_inode(dir->i_sb, ino, de);
-				break;
-			}
-		}
-	}
+	if (!lde)
+		goto out;
+
+	read_lock(&proc_tree_lock);
+	lde = __proc_lookup(lde, dentry);
+#ifdef CONFIG_VE
+	gde = GPDE(dir);
+	if (gde)
+		gde = __proc_lookup(gde, dentry);
+#else
+	gde = NULL;
+#endif
+	read_unlock(&proc_tree_lock);
+
+	/*
+	 * There are following possible cases after lookup:
+	 *
+	 * lde		gde
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * NULL		NULL		ENOENT
+	 * loc		NULL		found in local tree
+	 * loc		glob		found in both trees
+	 * NULL		glob		found in global tree
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 *
+	 * We initialized inode as follows after lookup:
+	 *
+	 * inode->lde	inode->gde
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * loc		NULL		in local tree
+	 * loc		glob		both trees
+	 * glob		glob		global tree
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * i.e. inode->lde is always initialized
+	 */
+
+	if (lde == NULL && gde == NULL)
+		goto out;
+
+	if (lde != NULL)
+		inode = proc_get_inode(dir->i_sb, lde->low_ino, lde);
+	else
+		inode = proc_get_inode(dir->i_sb, gde->low_ino, gde);
+
+	/*
+	 * We can sleep in proc_get_inode(), but since we have i_sem
+	 * being taken, no one can setup GPDE/LPDE on this inode.
+	 */
+	if (!inode)
+		goto out_put;
+
+#ifdef CONFIG_VE
+	GPDE(inode) = de_get(gde);
+	if (gde)
+		__module_get(gde->owner);
+
+	/* if dentry is found in both trees and it is a directory
+	 * then inode's nlink count must be altered, because local
+	 * and global subtrees may differ.
+	 * on the other hand, they may intersect, so actual nlink
+	 * value is difficult to calculate - upper estimate is used
+	 * instead of it.
+	 * dentry found in global tree only must not be writable
+	 * in non-super ve.
+	 */
+	if (lde && gde && lde != gde && gde->nlink > 1)
+		inode->i_nlink += gde->nlink - 2;
+	if (lde == NULL && !ve_is_super(
+				VE_OWNER_FSTYPE(dir->i_sb->s_type)))
+		inode->i_mode &= ~S_IWUGO;
+#endif
 	unlock_kernel();
+	dentry->d_op = &proc_dentry_operations;
+	d_add(dentry, inode);
+	de_put(lde);
+	de_put(gde);
+	return NULL;
 
-	if (inode) {
-		dentry->d_op = &proc_dentry_operations;
-		d_add(dentry, inode);
-		return NULL;
-	}
+out_put:
+	de_put(lde);
+	de_put(gde);
+out:
+	unlock_kernel();
 	return ERR_PTR(error);
 }
 
+struct proc_dir_reader {
+	struct list_head list;
+	struct proc_dir_entry *next;
+};
+
+static LIST_HEAD(proc_dir_readers);
+static DEFINE_SPINLOCK(proc_dir_readers_lock);
+
+static inline void add_reader(struct proc_dir_reader *r,
+		struct proc_dir_entry *cur)
+{
+	r->next = cur->next;
+	spin_lock(&proc_dir_readers_lock);
+	list_add(&r->list, &proc_dir_readers);
+	spin_unlock(&proc_dir_readers_lock);
+}
+
+static inline struct proc_dir_entry *del_reader(struct proc_dir_reader *r)
+{
+	spin_lock(&proc_dir_readers_lock);
+	list_del(&r->list);
+	spin_unlock(&proc_dir_readers_lock);
+	return r->next;
+}
+
+static void notify_readers(struct proc_dir_entry *de)
+{
+	struct proc_dir_reader *r;
+
+	/* lockless since proc_tree_lock is taken for writing */
+	list_for_each_entry(r, &proc_dir_readers, list)
+		if (r->next == de)
+			r->next = de->next;
+}
+
+static inline int in_tree(struct proc_dir_entry *de, struct proc_dir_entry *dir)
+{
+	struct proc_dir_entry *gde;
+
+	for (gde = dir->subdir; gde; gde = gde->next) {
+		if (de->namelen != gde->namelen)
+			continue;
+		if (memcmp(de->name, gde->name, gde->namelen))
+			continue;
+		return 1;
+	}
+	return 0;
+}
+
 /*
  * This returns non-zero if at EOF, so that the /proc
  * root directory can use this and check if it should
@@ -421,6 +582,7 @@ int proc_readdir(struct file * filp,
 	int i;
 	struct inode *inode = filp->f_dentry->d_inode;
 	int ret = 0;
+	struct proc_dir_reader this;
 
 	lock_kernel();
 
@@ -447,13 +609,12 @@ int proc_readdir(struct file * filp,
 			filp->f_pos++;
 			/* fall through */
 		default:
+			read_lock(&proc_tree_lock);
 			de = de->subdir;
 			i -= 2;
 			for (;;) {
-				if (!de) {
-					ret = 1;
-					goto out;
-				}
+				if (!de)
+					goto chk_global;
 				if (!i)
 					break;
 				de = de->next;
@@ -461,12 +622,60 @@ int proc_readdir(struct file * filp,
 			}
 
 			do {
-				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
-					    de->low_ino, de->mode >> 12) < 0)
+				de_get(de);
+				add_reader(&this, de);
+				read_unlock(&proc_tree_lock);
+				ret = filldir(dirent, de->name, de->namelen,
+						filp->f_pos, de->low_ino,
+						de->mode >> 12);
+				read_lock(&proc_tree_lock);
+				de_put(de);
+				de = del_reader(&this);
+				if (ret < 0) {
+					read_unlock(&proc_tree_lock);
+					ret = 0;
 					goto out;
+				}
 				filp->f_pos++;
-				de = de->next;
 			} while (de);
+chk_global:
+#ifdef CONFIG_VE
+			de = GPDE(inode);
+			if (de == NULL)
+				goto done;
+
+			de = de->subdir;
+			while (de) {
+				if (in_tree(de, LPDE(inode))) {
+					de = de->next;
+					continue;
+				}
+
+				if (i > 0) {
+					i--;
+					de = de->next;
+					continue;
+				}
+
+				de_get(de);
+				add_reader(&this, de);
+				read_unlock(&proc_tree_lock);
+				ret = filldir(dirent, de->name, de->namelen,
+						filp->f_pos, de->low_ino,
+						de->mode >> 12);
+				read_lock(&proc_tree_lock);
+				de_put(de);
+				de = del_reader(&this);
+				if (ret < 0) {
+					read_unlock(&proc_tree_lock);
+					ret = 0;
+					goto out;
+				}
+				filp->f_pos++;
+			}
+done:
+#endif
+			read_unlock(&proc_tree_lock);
 	}
 	ret = 1;
 out:	unlock_kernel();
@@ -488,8 +697,10 @@ static struct file_operations proc_dir_o
  */
 static struct inode_operations proc_dir_inode_operations = {
 	.lookup		= proc_lookup,
+#ifndef CONFIG_VE
 	.getattr	= proc_getattr,
 	.setattr	= proc_notify_change,
+#endif
 };
 
 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
@@ -499,10 +710,20 @@ static int proc_register(struct proc_dir
 	i = get_inode_number();
 	if (i == 0)
 		return -EAGAIN;
+
+	write_lock(&proc_tree_lock);
+	if (dir->deleted) {
+		write_unlock(&proc_tree_lock);
+		release_inode_number(i);
+		return -ENOENT;
+	}
+
 	dp->low_ino = i;
 	dp->next = dir->subdir;
-	dp->parent = dir;
+	dp->parent = de_get(dir);
 	dir->subdir = dp;
+	write_unlock(&proc_tree_lock);
+
 	if (S_ISDIR(dp->mode)) {
 		if (dp->proc_iops == NULL) {
 			dp->proc_fops = &proc_dir_operations;
@@ -556,24 +777,26 @@ static struct proc_dir_entry *proc_creat
 					  mode_t mode,
 					  nlink_t nlink)
 {
-	struct proc_dir_entry *ent = NULL;
+	struct proc_dir_entry *ent;
 	const char *fn = name;
 	int len;
 
 	/* make sure name is valid */
-	if (!name || !strlen(name)) goto out;
+	if (!name || !strlen(name))
+		goto out;
 
-	if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
+	if (xlate_proc_loc_name(name, parent, &fn) != 0)
 		goto out;
 
 	/* At this point there must not be any '/' characters beyond *fn */
 	if (strchr(fn, '/'))
-		goto out;
+		goto out_put;
 
 	len = strlen(fn);
 
 	ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
-	if (!ent) goto out;
+	if (!ent)
+		goto out_put;
 
 	memset(ent, 0, sizeof(struct proc_dir_entry));
 	memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
@@ -581,8 +804,13 @@ static struct proc_dir_entry *proc_creat
 	ent->namelen = len;
 	ent->mode = mode;
 	ent->nlink = nlink;
- out:
+	atomic_set(&ent->count, 1);
 	return ent;
+
+out_put:
+	de_put(*parent);
+out:
+	return NULL;
 }
 
 struct proc_dir_entry *proc_symlink(const char *name,
@@ -606,6 +834,7 @@ struct proc_dir_entry *proc_symlink(cons
 			kfree(ent);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
@@ -624,6 +853,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
 			kfree(ent);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
@@ -662,9 +892,28 @@ struct proc_dir_entry *create_proc_entry
 			kfree(ent);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
+EXPORT_SYMBOL(remove_proc_glob_entry);
+
+struct proc_dir_entry *create_proc_glob_entry(const char *name, mode_t mode,
+		struct proc_dir_entry *parent)
+{
+	const char *path;
+	struct proc_dir_entry *ent;
+
+	path = name;
+	if (xlate_proc_name(path, &parent, &name) != 0)
+		return NULL;
+
+	ent = create_proc_entry(name, mode, parent);
+	de_put(parent);
+	return ent;
+}
+
+EXPORT_SYMBOL(create_proc_glob_entry);
 
 void free_proc_entry(struct proc_dir_entry *de)
 {
@@ -684,20 +933,21 @@ void free_proc_entry(struct proc_dir_ent
  * Remove a /proc entry and free it if it's not currently in use.
  * If it is in use, we set the 'deleted' flag.
  */
-void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+static void __remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 {
 	struct proc_dir_entry **p;
 	struct proc_dir_entry *de;
 	const char *fn = name;
 	int len;
 
-	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
-		goto out;
 	len = strlen(fn);
+	write_lock(&proc_tree_lock);
 	for (p = &parent->subdir; *p; p=&(*p)->next ) {
 		if (!proc_match(len, fn, *p))
 			continue;
+
 		de = *p;
+		notify_readers(de);
 		*p = de->next;
 		de->next = NULL;
 		if (S_ISDIR(de->mode))
@@ -705,15 +955,43 @@ void remove_proc_entry(const char *name,
 		proc_kill_inodes(de);
 		de->nlink = 0;
 		WARN_ON(de->subdir);
-		if (!atomic_read(&de->count))
-			free_proc_entry(de);
-		else {
-			de->deleted = 1;
-			printk("remove_proc_entry: %s/%s busy, count=%d\n",
-				parent->name, de->name, atomic_read(&de->count));
-		}
+		de->deleted = 1;
+		de_put(de);
+		de_put(parent);
 		break;
 	}
-out:
-	return;
+	write_unlock(&proc_tree_lock);
+}
+
+void remove_proc_loc_entry(const char *name, struct proc_dir_entry *parent)
+{
+	const char *path;
+
+	path = name;
+	if (xlate_proc_loc_name(path, &parent, &name) != 0)
+		return;
+
+	__remove_proc_entry(name, parent);
+	de_put(parent);
+}
+
+void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent)
+{
+	const char *path;
+
+	path = name;
+	if (xlate_proc_name(path, &parent, &name) != 0)
+		return;
+
+	__remove_proc_entry(name, parent);
+	de_put(parent);
+}
+
+void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+{
+	remove_proc_loc_entry(name, parent);
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		remove_proc_glob_entry(name, parent);
+#endif
 }
diff -uprN linux-2.6.16/fs/proc/inode.c linux-2.6.16.ovz/fs/proc/inode.c
--- linux-2.6.16/fs/proc/inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/proc/inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -8,6 +8,7 @@
 #include <linux/proc_fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/ve_owner.h>
 #include <linux/string.h>
 #include <linux/stat.h>
 #include <linux/file.h>
@@ -21,34 +22,25 @@
 
 #include "internal.h"
 
-static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
-{
-	if (de)
-		atomic_inc(&de->count);
-	return de;
-}
-
 /*
  * Decrements the use count and checks for deferred deletion.
  */
-static void de_put(struct proc_dir_entry *de)
+void de_put(struct proc_dir_entry *de)
 {
 	if (de) {	
-		lock_kernel();		
 		if (!atomic_read(&de->count)) {
 			printk("de_put: entry %s already free!\n", de->name);
-			unlock_kernel();
 			return;
 		}
 
 		if (atomic_dec_and_test(&de->count)) {
-			if (de->deleted) {
-				printk("de_put: deferred delete of %s\n",
+			if (unlikely(!de->deleted)) {
+				printk("de_put: early delete of %s\n",
 					de->name);
-				free_proc_entry(de);
+				return;
 			}
+			free_proc_entry(de);
 		}		
-		unlock_kernel();
 	}
 }
 
@@ -68,12 +60,19 @@ static void proc_delete_inode(struct ino
 		put_task_struct(tsk);
 
 	/* Let go of any associated proc directory entry */
-	de = PROC_I(inode)->pde;
+	de = LPDE(inode);
 	if (de) {
 		if (de->owner)
 			module_put(de->owner);
 		de_put(de);
 	}
+#ifdef CONFIG_VE
+	de = GPDE(inode);
+	if (de) {
+		module_put(de->owner);
+		de_put(de);
+	}
+#endif
 	clear_inode(inode);
 }
 
@@ -100,6 +99,9 @@ static struct inode *proc_alloc_inode(st
 	ei->pde = NULL;
 	inode = &ei->vfs_inode;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+#ifdef CONFIG_VE
+	GPDE(inode) = NULL;
+#endif
 	return inode;
 }
 
@@ -209,6 +211,12 @@ int proc_fill_super(struct super_block *
 	s->s_root = d_alloc_root(root_inode);
 	if (!s->s_root)
 		goto out_no_root;
+#ifdef CONFIG_VE
+	LPDE(root_inode) = de_get(get_exec_env()->proc_root);
+	GPDE(root_inode) = &proc_root;
+#else
+	LPDE(root_inode) = &proc_root;
+#endif
 	return 0;
 
 out_no_root:
diff -uprN linux-2.6.16/fs/proc/kmsg.c linux-2.6.16.ovz/fs/proc/kmsg.c
--- linux-2.6.16/fs/proc/kmsg.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/proc/kmsg.c	2006-07-05 08:34:56.000000000 -0400
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/poll.h>
 #include <linux/fs.h>
+#include <linux/veprintk.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -40,7 +41,7 @@ static ssize_t kmsg_read(struct file *fi
 
 static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 {
-	poll_wait(file, &log_wait, wait);
+	poll_wait(file, &ve_log_wait, wait);
 	if (do_syslog(9, NULL, 0))
 		return POLLIN | POLLRDNORM;
 	return 0;
diff -uprN linux-2.6.16/fs/proc/proc_misc.c linux-2.6.16.ovz/fs/proc/proc_misc.c
--- linux-2.6.16/fs/proc/proc_misc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/proc/proc_misc.c	2006-07-05 08:34:56.000000000 -0400
@@ -32,6 +32,7 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
+#include <linux/virtinfo.h>
 #include <linux/smp.h>
 #include <linux/signal.h>
 #include <linux/module.h>
@@ -45,6 +46,8 @@
 #include <linux/jiffies.h>
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/compile.h>
 #include <linux/crash_dump.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -53,8 +56,10 @@
 #include <asm/div64.h>
 #include "internal.h"
 
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+#ifdef CONFIG_FAIRSCHED
+#include <linux/fairsched.h>
+#endif
+
 /*
  * Warning: stuff below (imported functions) assumes that its output will fit
  * into one page. For some of those functions it may be wrong. Moreover, we
@@ -84,15 +89,33 @@ static int loadavg_read_proc(char *page,
 {
 	int a, b, c;
 	int len;
-
-	a = avenrun[0] + (FIXED_1/200);
-	b = avenrun[1] + (FIXED_1/200);
-	c = avenrun[2] + (FIXED_1/200);
+	unsigned long __nr_running;
+	int __nr_threads;
+	unsigned long *__avenrun;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+
+	if (ve_is_super(ve)) {
+		__avenrun = &avenrun[0];
+		__nr_running = nr_running();
+		__nr_threads = nr_threads;
+	} 
+#ifdef CONFIG_VE
+	else {
+		__avenrun = &ve->avenrun[0];
+		__nr_running = nr_running_ve(ve); 
+		__nr_threads = atomic_read(&ve->pcounter);
+	}
+#endif
+	a = __avenrun[0] + (FIXED_1/200);
+	b = __avenrun[1] + (FIXED_1/200);
+	c = __avenrun[2] + (FIXED_1/200);
 	len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
 		LOAD_INT(a), LOAD_FRAC(a),
 		LOAD_INT(b), LOAD_FRAC(b),
 		LOAD_INT(c), LOAD_FRAC(c),
-		nr_running(), nr_threads, last_pid);
+		__nr_running, __nr_threads, last_pid);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
@@ -105,6 +128,13 @@ static int uptime_read_proc(char *page, 
 	cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
 
 	do_posix_clock_monotonic_gettime(&uptime);
+#ifdef CONFIG_VE
+	if (!ve_is_super(get_exec_env())) {
+		set_normalized_timespec(&uptime,
+		      uptime.tv_sec - get_exec_env()->start_timespec.tv_sec,
+		      uptime.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
+	}
+#endif
 	cputime_to_timespec(idletime, &idle);
 	len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
 			(unsigned long) uptime.tv_sec,
@@ -118,35 +148,37 @@ static int uptime_read_proc(char *page, 
 static int meminfo_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
-	struct sysinfo i;
+	struct meminfo mi;
 	int len;
-	struct page_state ps;
-	unsigned long inactive;
-	unsigned long active;
-	unsigned long free;
-	unsigned long committed;
-	unsigned long allowed;
+	unsigned long dummy;
 	struct vmalloc_info vmi;
-	long cached;
 
-	get_page_state(&ps);
-	get_zone_counts(&active, &inactive, &free);
+	get_page_state(&mi.ps);
+	get_zone_counts(&mi.active, &mi.inactive, &dummy);
 
 /*
  * display in kilobytes.
  */
 #define K(x) ((x) << (PAGE_SHIFT - 10))
-	si_meminfo(&i);
-	si_swapinfo(&i);
-	committed = atomic_read(&vm_committed_space);
-	allowed = ((totalram_pages - hugetlb_total_pages())
-		* sysctl_overcommit_ratio / 100) + total_swap_pages;
+	si_meminfo(&mi.si);
+	si_swapinfo(&mi.si);
+	mi.committed_space = atomic_read(&vm_committed_space);
+	mi.swapcache = total_swapcache_pages;
+	mi.cache = get_page_cache_size() - mi.swapcache - mi.si.bufferram;
+	if (mi.cache < 0)
+		mi.cache = 0;
 
-	cached = get_page_cache_size() - total_swapcache_pages - i.bufferram;
-	if (cached < 0)
-		cached = 0;
+	mi.vmalloc_total = (VMALLOC_END - VMALLOC_START) >> PAGE_SHIFT;
+	mi.allowed = ((totalram_pages - hugetlb_total_pages())
+		* sysctl_overcommit_ratio / 100) + total_swap_pages;
 
 	get_vmalloc_info(&vmi);
+	mi.vmalloc_used = vmi.used >> PAGE_SHIFT;
+	mi.vmalloc_largest = vmi.largest_chunk >> PAGE_SHIFT;
+
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
+			& NOTIFY_FAIL)
+		return -ENOMSG;
 
 	/*
 	 * Tagged format, for easy grepping and expansion.
@@ -175,29 +207,29 @@ static int meminfo_read_proc(char *page,
 		"VmallocTotal: %8lu kB\n"
 		"VmallocUsed:  %8lu kB\n"
 		"VmallocChunk: %8lu kB\n",
-		K(i.totalram),
-		K(i.freeram),
-		K(i.bufferram),
-		K(cached),
-		K(total_swapcache_pages),
-		K(active),
-		K(inactive),
-		K(i.totalhigh),
-		K(i.freehigh),
-		K(i.totalram-i.totalhigh),
-		K(i.freeram-i.freehigh),
-		K(i.totalswap),
-		K(i.freeswap),
-		K(ps.nr_dirty),
-		K(ps.nr_writeback),
-		K(ps.nr_mapped),
-		K(ps.nr_slab),
-		K(allowed),
-		K(committed),
-		K(ps.nr_page_table_pages),
-		(unsigned long)VMALLOC_TOTAL >> 10,
-		vmi.used >> 10,
-		vmi.largest_chunk >> 10
+		K(mi.si.totalram),
+		K(mi.si.freeram),
+		K(mi.si.bufferram),
+		K(mi.cache),
+		K(mi.swapcache),
+		K(mi.active),
+		K(mi.inactive),
+		K(mi.si.totalhigh),
+		K(mi.si.freehigh),
+		K(mi.si.totalram-mi.si.totalhigh),
+		K(mi.si.freeram-mi.si.freehigh),
+		K(mi.si.totalswap),
+		K(mi.si.freeswap),
+		K(mi.ps.nr_dirty),
+		K(mi.ps.nr_writeback),
+		K(mi.ps.nr_mapped),
+		K(mi.ps.nr_slab),
+		K(mi.allowed),
+		K(mi.committed_space),
+		K(mi.ps.nr_page_table_pages),
+		K(mi.vmalloc_total),
+		K(mi.vmalloc_used),
+		K(mi.vmalloc_largest)
 		);
 
 		len += hugetlb_report_meminfo(page + len);
@@ -237,8 +269,15 @@ static int version_read_proc(char *page,
 				 int count, int *eof, void *data)
 {
 	int len;
+	struct new_utsname *utsname = &ve_utsname;
 
-	strcpy(page, linux_banner);
+	if (ve_is_super(get_exec_env()))
+		strcpy(page, linux_banner);
+	else
+		sprintf(page, "Linux version %s ("
+		      LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") ("
+		      LINUX_COMPILER ") %s\n",
+		      utsname->release, utsname->version);
 	len = strlen(page);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
@@ -249,144 +288,60 @@ static int cpuinfo_open(struct inode *in
 	return seq_open(file, &cpuinfo_op);
 }
 
-enum devinfo_states {
-	CHR_HDR,
-	CHR_LIST,
-	BLK_HDR,
-	BLK_LIST,
-	DEVINFO_DONE
-};
-
-struct devinfo_state {
-	void *chrdev;
-	void *blkdev;
-	unsigned int num_records;
-	unsigned int cur_record;
-	enum devinfo_states state;
+static struct file_operations proc_cpuinfo_operations = {
+	.open		= cpuinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
 };
 
-static void *devinfo_start(struct seq_file *f, loff_t *pos)
+static int devinfo_show(struct seq_file *f, void *v)
 {
-	struct devinfo_state *info = f->private;
+	int i = *(loff_t *) v;
 
-	if (*pos) {
-		if ((info) && (*pos <= info->num_records))
-			return info;
-		return NULL;
+	if (i < CHRDEV_MAJOR_HASH_SIZE) {
+		if (i == 0)
+			seq_printf(f, "Character devices:\n");
+		chrdev_show(f, i);
+	} else {
+		i -= CHRDEV_MAJOR_HASH_SIZE;
+		if (i == 0)
+			seq_printf(f, "\nBlock devices:\n");
+		blkdev_show(f, i);
 	}
-	info = kmalloc(sizeof(*info), GFP_KERNEL);
-	f->private = info;
-	info->chrdev = acquire_chrdev_list();
-	info->blkdev = acquire_blkdev_list();
-	info->state = CHR_HDR;
-	info->num_records = count_chrdev_list();
-	info->num_records += count_blkdev_list();
-	info->num_records += 2; /* Character and Block headers */
-	*pos = 1;
-	info->cur_record = *pos;
-	return info;
+	return 0;
 }
 
-static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
+static void *devinfo_start(struct seq_file *f, loff_t *pos)
 {
-	int idummy;
-	char *ndummy;
-	struct devinfo_state *info = f->private;
-
-	switch (info->state) {
-		case CHR_HDR:
-			info->state = CHR_LIST;
-			(*pos)++;
-			/*fallthrough*/
-		case CHR_LIST:
-			if (get_chrdev_info(info->chrdev,&idummy,&ndummy)) {
-				/*
-				 * The character dev list is complete
-				 */
-				info->state = BLK_HDR;
-			} else {
-				info->chrdev = get_next_chrdev(info->chrdev);
-			}
-			(*pos)++;
-			break;
-		case BLK_HDR:
-			info->state = BLK_LIST;
-			(*pos)++;
-			break;
-		case BLK_LIST:
-			if (get_blkdev_info(info->blkdev,&idummy,&ndummy)) {
-				/*
-				 * The block dev list is complete
-				 */
-				info->state = DEVINFO_DONE;
-			} else {
-				info->blkdev = get_next_blkdev(info->blkdev);
-			}
-			(*pos)++;
-			break;
-		case DEVINFO_DONE:
-			(*pos)++;
-			info->cur_record = *pos;
-			info = NULL;
-			break;
-		default:
-			break;
-	}
-	if (info)
-		info->cur_record = *pos;
-	return info;
+	if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
+		return pos;
+	return NULL;
 }
 
-static void devinfo_stop(struct seq_file *f, void *v)
+static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
 {
-	struct devinfo_state *info = f->private;
-
-	if (info) {
-		release_chrdev_list(info->chrdev);
-		release_blkdev_list(info->blkdev);
-		f->private = NULL;
-		kfree(info);
-	}
+	(*pos)++;
+	if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
+		return NULL;
+	return pos;
 }
 
-static int devinfo_show(struct seq_file *f, void *arg)
-{
-	int major;
-	char *name;
-	struct devinfo_state *info = f->private;
-
-	switch(info->state) {
-		case CHR_HDR:
-			seq_printf(f,"Character devices:\n");
-			/* fallthrough */
-		case CHR_LIST:
-			if (!get_chrdev_info(info->chrdev,&major,&name))
-				seq_printf(f,"%3d %s\n",major,name);
-			break;
-		case BLK_HDR:
-			seq_printf(f,"\nBlock devices:\n");
-			/* fallthrough */
-		case BLK_LIST:
-			if (!get_blkdev_info(info->blkdev,&major,&name))
-				seq_printf(f,"%3d %s\n",major,name);
-			break;
-		default:
-			break;
-	}
-
-	return 0;
+static void devinfo_stop(struct seq_file *f, void *v)
+{
+	/* Nothing to do */
 }
 
-static  struct seq_operations devinfo_op = {
-	.start  = devinfo_start,
-	.next   = devinfo_next,
-	.stop   = devinfo_stop,
-	.show   = devinfo_show,
+static struct seq_operations devinfo_ops = {
+	.start = devinfo_start,
+	.next  = devinfo_next,
+	.stop  = devinfo_stop,
+	.show  = devinfo_show
 };
 
-static int devinfo_open(struct inode *inode, struct file *file)
+static int devinfo_open(struct inode *inode, struct file *filp)
 {
-	return seq_open(file, &devinfo_op);
+	return seq_open(filp, &devinfo_ops);
 }
 
 static struct file_operations proc_devinfo_operations = {
@@ -396,13 +351,6 @@ static struct file_operations proc_devin
 	.release	= seq_release,
 };
 
-static struct file_operations proc_cpuinfo_operations = {
-	.open		= cpuinfo_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 extern struct seq_operations vmstat_op;
 static int vmstat_open(struct inode *inode, struct file *file)
 {
@@ -487,18 +435,15 @@ static struct file_operations proc_slabi
 };
 #endif
 
-static int show_stat(struct seq_file *p, void *v)
+static void show_stat_ve0(struct seq_file *p)
 {
 	int i;
-	unsigned long jif;
+	struct page_state page_state;
 	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
 	u64 sum = 0;
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
-	jif = - wall_to_monotonic.tv_sec;
-	if (wall_to_monotonic.tv_nsec)
-		--jif;
 
 	for_each_cpu(i) {
 		int j;
@@ -552,9 +497,84 @@ static int show_stat(struct seq_file *p,
 	for (i = 0; i < NR_IRQS; i++)
 		seq_printf(p, " %u", kstat_irqs(i));
 #endif
+	get_full_page_state(&page_state);
+	seq_printf(p, "\nswap %lu %lu\n", page_state.pswpin, page_state.pswpout);
+}
+
+#ifdef CONFIG_VE
+static void show_stat_ve(struct seq_file *p, struct ve_struct *env)
+{
+	int i;
+	u64 user, nice, system;
+	cycles_t idle, iowait;
+	cpumask_t ve_cpus;
+
+	ve_cpu_online_map(env, &ve_cpus);
+
+	user = nice = system = idle = iowait = 0;
+	for_each_cpu_mask(i, ve_cpus) {
+		user += VE_CPU_STATS(env, i)->user;
+		nice += VE_CPU_STATS(env, i)->nice;
+		system += VE_CPU_STATS(env, i)->system;
+		idle += ve_sched_get_idle_time(env, i);
+		iowait += ve_sched_get_iowait_time(env, i);
+	}
+
+	seq_printf(p, "cpu  %llu %llu %llu %llu %llu 0 0 0\n",
+		(unsigned long long)cputime64_to_clock_t(user),
+		(unsigned long long)cputime64_to_clock_t(nice),
+		(unsigned long long)cputime64_to_clock_t(system),
+		(unsigned long long)cycles_to_clocks(idle),
+		(unsigned long long)cycles_to_clocks(iowait));
+
+	for_each_cpu_mask(i, ve_cpus) {
+		user = VE_CPU_STATS(env, i)->user;
+		nice = VE_CPU_STATS(env, i)->nice;
+		system = VE_CPU_STATS(env, i)->system;
+		idle = ve_sched_get_idle_time(env, i);
+		iowait = ve_sched_get_iowait_time(env, i);
+		seq_printf(p, "cpu%d %llu %llu %llu %llu %llu 0 0 0\n",
+			i,
+			(unsigned long long)cputime64_to_clock_t(user),
+			(unsigned long long)cputime64_to_clock_t(nice),
+			(unsigned long long)cputime64_to_clock_t(system),
+			(unsigned long long)cycles_to_clocks(idle),
+			(unsigned long long)cycles_to_clocks(iowait));
+	}
+	seq_printf(p, "intr 0\nswap 0 0\n");
+}
+#endif
+
+int show_stat(struct seq_file *p, void *v)
+{
+	extern unsigned long total_forks;
+	unsigned long seq, jif;
+	struct ve_struct *env;
+	unsigned long __nr_running, __nr_iowait;
+ 
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		jif = - wall_to_monotonic.tv_sec;
+		if (wall_to_monotonic.tv_nsec)
+			--jif;
+	} while (read_seqretry(&xtime_lock, seq));
+
+	env = get_exec_env();
+	if (ve_is_super(env)) {
+		show_stat_ve0(p);
+		__nr_running = nr_running();
+		__nr_iowait = nr_iowait();
+	}
+#ifdef CONFIG_VE
+	else {
+		show_stat_ve(p, env);
+		__nr_running = nr_running_ve(env);
+		__nr_iowait = nr_iowait_ve(env);
+	}
+#endif
 
 	seq_printf(p,
-		"\nctxt %llu\n"
+		"ctxt %llu\n"
 		"btime %lu\n"
 		"processes %lu\n"
 		"procs_running %lu\n"
@@ -562,8 +582,8 @@ static int show_stat(struct seq_file *p,
 		nr_context_switches(),
 		(unsigned long)jif,
 		total_forks,
-		nr_running(),
-		nr_iowait());
+		__nr_running,
+		__nr_iowait);
 
 	return 0;
 }
@@ -652,7 +672,8 @@ static int cmdline_read_proc(char *page,
 {
 	int len;
 
-	len = sprintf(page, "%s\n", saved_command_line);
+	len = sprintf(page, "%s\n",
+		ve_is_super(get_exec_env()) ? saved_command_line : "");
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
diff -uprN linux-2.6.16/fs/proc/proc_tty.c linux-2.6.16.ovz/fs/proc/proc_tty.c
--- linux-2.6.16/fs/proc/proc_tty.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/proc/proc_tty.c	2006-07-05 08:34:56.000000000 -0400
@@ -6,6 +6,7 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/ve_owner.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/time.h>
@@ -106,24 +107,35 @@ static int show_tty_driver(struct seq_fi
 /* iterator */
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-	struct list_head *p;
+	struct tty_driver *drv;
+
 	loff_t l = *pos;
-	list_for_each(p, &tty_drivers)
+	read_lock(&tty_driver_guard);
+	list_for_each_entry(drv, &tty_drivers, tty_drivers) {
+		if (!ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
+			continue;
 		if (!l--)
-			return list_entry(p, struct tty_driver, tty_drivers);
+			return drv;
+	}
 	return NULL;
 }
 
 static void *t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
+	struct tty_driver *drv;
+
 	(*pos)++;
-	return p==&tty_drivers ? NULL :
-			list_entry(p, struct tty_driver, tty_drivers);
+	drv = (struct tty_driver *)v;
+	list_for_each_entry_continue(drv, &tty_drivers, tty_drivers) {
+		if (ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
+			return drv;
+	}
+	return NULL;
 }
 
 static void t_stop(struct seq_file *m, void *v)
 {
+	read_unlock(&tty_driver_guard);
 }
 
 static struct seq_operations tty_drivers_op = {
diff -uprN linux-2.6.16/fs/proc/root.c linux-2.6.16.ovz/fs/proc/root.c
--- linux-2.6.16/fs/proc/root.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/proc/root.c	2006-07-05 08:34:56.000000000 -0400
@@ -20,7 +20,10 @@
 
 #include "internal.h"
 
-struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
+#ifndef CONFIG_VE
+struct proc_dir_entry *proc_net, *proc_net_stat;
+#endif
+struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
 
 #ifdef CONFIG_SYSCTL
 struct proc_dir_entry *proc_sys_root;
@@ -32,12 +35,14 @@ static struct super_block *proc_get_sb(s
 	return get_sb_single(fs_type, flags, data, proc_fill_super);
 }
 
-static struct file_system_type proc_fs_type = {
+struct file_system_type proc_fs_type = {
 	.name		= "proc",
 	.get_sb		= proc_get_sb,
 	.kill_sb	= kill_anon_super,
 };
 
+EXPORT_SYMBOL(proc_fs_type);
+
 void __init proc_root_init(void)
 {
 	int err = proc_init_inodecache();
@@ -157,7 +162,9 @@ EXPORT_SYMBOL(create_proc_entry);
 EXPORT_SYMBOL(remove_proc_entry);
 EXPORT_SYMBOL(proc_root);
 EXPORT_SYMBOL(proc_root_fs);
+#ifndef CONFIG_VE
 EXPORT_SYMBOL(proc_net);
 EXPORT_SYMBOL(proc_net_stat);
+#endif
 EXPORT_SYMBOL(proc_bus);
 EXPORT_SYMBOL(proc_root_driver);
diff -uprN linux-2.6.16/fs/proc/task_mmu.c linux-2.6.16.ovz/fs/proc/task_mmu.c
--- linux-2.6.16/fs/proc/task_mmu.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/proc/task_mmu.c	2006-07-05 08:34:56.000000000 -0400
@@ -90,9 +90,12 @@ int proc_exe_link(struct inode *inode, s
 	}
 
 	if (vma) {
-		*mnt = mntget(vma->vm_file->f_vfsmnt);
-		*dentry = dget(vma->vm_file->f_dentry);
-		result = 0;
+		result = d_root_check(vma->vm_file->f_dentry,
+				vma->vm_file->f_vfsmnt);
+		if (!result) {
+			*mnt = mntget(vma->vm_file->f_vfsmnt);
+			*dentry = dget(vma->vm_file->f_dentry);
+		}
 	}
 
 	up_read(&mm->mmap_sem);
diff -uprN linux-2.6.16/fs/proc/task_nommu.c linux-2.6.16.ovz/fs/proc/task_nommu.c
--- linux-2.6.16/fs/proc/task_nommu.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/proc/task_nommu.c	2006-07-05 08:34:56.000000000 -0400
@@ -126,9 +126,12 @@ int proc_exe_link(struct inode *inode, s
 	}
 
 	if (vma) {
-		*mnt = mntget(vma->vm_file->f_vfsmnt);
-		*dentry = dget(vma->vm_file->f_dentry);
-		result = 0;
+		result = d_root_check(vma->vm_file->f_dentry,
+				vma->vm_file->f_vfsmnt);
+		if (!result) {
+			*mnt = mntget(vma->vm_file->f_vfsmnt);
+			*dentry = dget(vma->vm_file->f_dentry);
+		}
 	}
 
 	up_read(&mm->mmap_sem);
diff -uprN linux-2.6.16/fs/proc/vmcore.c linux-2.6.16.ovz/fs/proc/vmcore.c
--- linux-2.6.16/fs/proc/vmcore.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/proc/vmcore.c	2006-07-05 08:34:56.000000000 -0400
@@ -103,8 +103,8 @@ static ssize_t read_vmcore(struct file *
 				size_t buflen, loff_t *fpos)
 {
 	ssize_t acc = 0, tmp;
-	size_t tsz, nr_bytes;
-	u64 start;
+	size_t tsz;
+	u64 start, nr_bytes;
 	struct vmcore *curr_m = NULL;
 
 	if (buflen == 0 || *fpos >= vmcore_size)
diff -uprN linux-2.6.16/fs/quota.c linux-2.6.16.ovz/fs/quota.c
--- linux-2.6.16/fs/quota.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/quota.c	2006-07-05 08:34:56.000000000 -0400
@@ -81,11 +81,11 @@ static int generic_quotactl_valid(struct
 	if (cmd == Q_GETQUOTA) {
 		if (((type == USRQUOTA && current->euid != id) ||
 		     (type == GRPQUOTA && !in_egroup_p(id))) &&
-		    !capable(CAP_SYS_ADMIN))
+		    !capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 	}
 	else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO)
-		if (!capable(CAP_SYS_ADMIN))
+		if (!capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 
 	return 0;
@@ -132,10 +132,10 @@ static int xqm_quotactl_valid(struct sup
 	if (cmd == Q_XGETQUOTA) {
 		if (((type == XQM_USRQUOTA && current->euid != id) ||
 		     (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
-		     !capable(CAP_SYS_ADMIN))
+		     !capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 	} else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
-		if (!capable(CAP_SYS_ADMIN))
+		if (!capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 	}
 
@@ -216,7 +216,7 @@ restart:
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
-		if (sb->s_root && sb->s_qcop->quota_sync)
+		if (sb->s_root && sb->s_qcop && sb->s_qcop->quota_sync)
 			quota_sync_sb(sb, type);
 		up_read(&sb->s_umount);
 		spin_lock(&sb_lock);
@@ -337,6 +337,235 @@ static int do_quotactl(struct super_bloc
 	return 0;
 }
 
+static struct super_block *quota_get_sb(const char __user *special)
+{
+	struct super_block *sb;
+	struct block_device *bdev;
+	char *tmp;
+
+	tmp = getname(special);
+	if (IS_ERR(tmp))
+		return (struct super_block *)tmp;
+	bdev = lookup_bdev(tmp, FMODE_QUOTACTL);
+	putname(tmp);
+	if (IS_ERR(bdev))
+		return (struct super_block *)bdev;
+	sb = get_super(bdev);
+	bdput(bdev);
+	if (!sb)
+		return ERR_PTR(-ENODEV);
+	return sb;
+}
+
+#ifdef CONFIG_QUOTA_COMPAT
+
+#define QC_QUOTAON  0x0100	/* enable quotas */
+#define QC_QUOTAOFF 0x0200	/* disable quotas */
+/* GETQUOTA, SETQUOTA and SETUSE which were at 0x0300-0x0500 has now other parameteres */
+#define QC_SYNC     0x0600	/* sync disk copy of a filesystems quotas */
+#define QC_SETQLIM  0x0700	/* set limits */
+/* GETSTATS at 0x0800 is now longer... */
+#define QC_GETINFO  0x0900	/* get info about quotas - graces, flags... */
+#define QC_SETINFO  0x0A00	/* set info about quotas */
+#define QC_SETGRACE 0x0B00	/* set inode and block grace */
+#define QC_SETFLAGS 0x0C00	/* set flags for quota */
+#define QC_GETQUOTA 0x0D00	/* get limits and usage */
+#define QC_SETQUOTA 0x0E00	/* set limits and usage */
+#define QC_SETUSE   0x0F00	/* set usage */
+/* 0x1000 used by old RSQUASH */
+#define QC_GETSTATS 0x1100	/* get collected stats */
+
+struct compat_dqblk {
+	unsigned int dqb_ihardlimit;
+	unsigned int dqb_isoftlimit;
+	unsigned int dqb_curinodes;
+	unsigned int dqb_bhardlimit;
+	unsigned int dqb_bsoftlimit;
+	qsize_t dqb_curspace;
+	__kernel_time_t dqb_btime;
+	__kernel_time_t dqb_itime;
+};
+
+struct compat_dqinfo {
+	unsigned int dqi_bgrace;
+	unsigned int dqi_igrace;
+	unsigned int dqi_flags;
+	unsigned int dqi_blocks;
+	unsigned int dqi_free_blk;
+	unsigned int dqi_free_entry;
+};
+
+struct compat_dqstats {
+	__u32 lookups;
+	__u32 drops;
+	__u32 reads;
+	__u32 writes;
+	__u32 cache_hits;
+	__u32 allocated_dquots;
+	__u32 free_dquots;
+	__u32 syncs;
+	__u32 version;
+};
+
+asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr);
+static long compat_quotactl(unsigned int cmds, unsigned int type,
+		const char __user *special, qid_t id,
+		void __user *addr)
+{
+	struct super_block *sb;
+	long ret;
+
+	sb = NULL;
+	switch (cmds) {
+		case QC_QUOTAON:
+			return sys_quotactl(QCMD(Q_QUOTAON, type),
+					special, id, addr);
+
+		case QC_QUOTAOFF:
+			return sys_quotactl(QCMD(Q_QUOTAOFF, type),
+					special, id, addr);
+
+		case QC_SYNC:
+			return sys_quotactl(QCMD(Q_SYNC, type),
+					special, id, addr);
+
+		case QC_GETQUOTA: {
+			struct if_dqblk idq;
+			struct compat_dqblk cdq;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
+			if (ret)
+				break;
+			ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
+			if (ret)
+				break;
+			cdq.dqb_ihardlimit = idq.dqb_ihardlimit;
+			cdq.dqb_isoftlimit = idq.dqb_isoftlimit;
+			cdq.dqb_curinodes = idq.dqb_curinodes;
+			cdq.dqb_bhardlimit = idq.dqb_bhardlimit;
+			cdq.dqb_bsoftlimit = idq.dqb_bsoftlimit;
+			cdq.dqb_curspace = idq.dqb_curspace;
+			cdq.dqb_btime = idq.dqb_btime;
+			cdq.dqb_itime = idq.dqb_itime;
+			ret = 0;
+			if (copy_to_user(addr, &cdq, sizeof(cdq)))
+				ret = -EFAULT;
+			break;
+		}
+
+		case QC_SETQUOTA:
+		case QC_SETUSE:
+		case QC_SETQLIM: {
+			struct if_dqblk idq;
+			struct compat_dqblk cdq;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_SETQUOTA, id);
+			if (ret)
+				break;
+			ret = -EFAULT;
+			if (copy_from_user(&cdq, addr, sizeof(cdq)))
+				break;
+			idq.dqb_ihardlimit = cdq.dqb_ihardlimit;
+			idq.dqb_isoftlimit = cdq.dqb_isoftlimit;
+			idq.dqb_curinodes = cdq.dqb_curinodes;
+			idq.dqb_bhardlimit = cdq.dqb_bhardlimit;
+			idq.dqb_bsoftlimit = cdq.dqb_bsoftlimit;
+			idq.dqb_curspace = cdq.dqb_curspace;
+			idq.dqb_valid = 0;
+			if (cmds == QC_SETQUOTA || cmds == QC_SETQLIM)
+				idq.dqb_valid |= QIF_LIMITS;
+			if (cmds == QC_SETQUOTA || cmds == QC_SETUSE)
+				idq.dqb_valid |= QIF_USAGE;
+			ret = sb->s_qcop->set_dqblk(sb, type, id, &idq);
+			break;
+		}
+
+		case QC_GETINFO: {
+			struct if_dqinfo iinf;
+			struct compat_dqinfo cinf;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
+			if (ret)
+				break;
+			ret = sb->s_qcop->get_info(sb, type, &iinf);
+			if (ret)
+				break;
+			cinf.dqi_bgrace = iinf.dqi_bgrace;
+			cinf.dqi_igrace = iinf.dqi_igrace;
+			cinf.dqi_flags = 0;
+			if (iinf.dqi_flags & DQF_INFO_DIRTY)
+				cinf.dqi_flags |= 0x0010;
+			cinf.dqi_blocks = 0;
+			cinf.dqi_free_blk = 0;
+			cinf.dqi_free_entry = 0;
+			ret = 0;
+			if (copy_to_user(addr, &cinf, sizeof(cinf)))
+				ret = -EFAULT;
+			break;
+		}
+
+		case QC_SETINFO:
+		case QC_SETGRACE:
+		case QC_SETFLAGS: {
+			struct if_dqinfo iinf;
+			struct compat_dqinfo cinf;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_SETINFO, id);
+			if (ret)
+				break;
+			ret = -EFAULT;
+			if (copy_from_user(&cinf, addr, sizeof(cinf)))
+				break;
+			iinf.dqi_bgrace = cinf.dqi_bgrace;
+			iinf.dqi_igrace = cinf.dqi_igrace;
+			iinf.dqi_flags = cinf.dqi_flags;
+			iinf.dqi_valid = 0;
+			if (cmds == QC_SETINFO || cmds == QC_SETGRACE)
+				iinf.dqi_valid |= IIF_BGRACE | IIF_IGRACE;
+			if (cmds == QC_SETINFO || cmds == QC_SETFLAGS)
+				iinf.dqi_valid |= IIF_FLAGS;
+			ret = sb->s_qcop->set_info(sb, type, &iinf);
+			break;
+		}
+
+		case QC_GETSTATS: {
+			struct compat_dqstats stat;
+
+			memset(&stat, 0, sizeof(stat));
+			stat.version = 6*10000+5*100+0;
+			ret = 0;
+			if (copy_to_user(addr, &stat, sizeof(stat)))
+				ret = -EFAULT;
+			break;
+		}
+
+		default:
+			ret = -ENOSYS;
+			break;
+	}
+	if (sb && !IS_ERR(sb))
+		drop_super(sb);
+	return ret;
+}
+
+#endif
+
 /*
  * This is the system call interface. This communicates with
  * the user-level programs. Currently this only supports diskquota
@@ -347,25 +576,20 @@ asmlinkage long sys_quotactl(unsigned in
 {
 	uint cmds, type;
 	struct super_block *sb = NULL;
-	struct block_device *bdev;
-	char *tmp;
 	int ret;
 
 	cmds = cmd >> SUBCMDSHIFT;
 	type = cmd & SUBCMDMASK;
 
+#ifdef CONFIG_QUOTA_COMPAT
+	if (cmds >= 0x0100 && cmds < 0x3000)
+		return compat_quotactl(cmds, type, special, id, addr);
+#endif
+
 	if (cmds != Q_SYNC || special) {
-		tmp = getname(special);
-		if (IS_ERR(tmp))
-			return PTR_ERR(tmp);
-		bdev = lookup_bdev(tmp);
-		putname(tmp);
-		if (IS_ERR(bdev))
-			return PTR_ERR(bdev);
-		sb = get_super(bdev);
-		bdput(bdev);
-		if (!sb)
-			return -ENODEV;
+		sb = quota_get_sb(special);
+		if (IS_ERR(sb))
+			return PTR_ERR(sb);
 	}
 
 	ret = check_quotactl_valid(sb, type, cmds, id);
diff -uprN linux-2.6.16/fs/reiserfs/namei.c linux-2.6.16.ovz/fs/reiserfs/namei.c
--- linux-2.6.16/fs/reiserfs/namei.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/reiserfs/namei.c	2006-07-05 08:34:56.000000000 -0400
@@ -864,6 +864,9 @@ static int reiserfs_rmdir(struct inode *
 	INITIALIZE_PATH(path);
 	struct reiserfs_dir_entry de;
 
+	inode = dentry->d_inode;
+	DQUOT_INIT(inode);
+
 	/* we will be doing 2 balancings and update 2 stat data, we change quotas
 	 * of the owner of the directory and of the owner of the parent directory.
 	 * The quota structure is possibly deleted only on last iput => outside
@@ -888,8 +891,6 @@ static int reiserfs_rmdir(struct inode *
 		goto end_rmdir;
 	}
 
-	inode = dentry->d_inode;
-
 	reiserfs_update_inode_transaction(inode);
 	reiserfs_update_inode_transaction(dir);
 
@@ -952,6 +953,7 @@ static int reiserfs_unlink(struct inode 
 	unsigned long savelink;
 
 	inode = dentry->d_inode;
+	DQUOT_INIT(inode);
 
 	/* in this transaction we can be doing at max two balancings and update
 	 * two stat datas, we change quotas of the owner of the directory and of
@@ -1259,6 +1261,8 @@ static int reiserfs_rename(struct inode 
 
 	old_inode = old_dentry->d_inode;
 	new_dentry_inode = new_dentry->d_inode;
+	if (new_dentry_inode)
+		DQUOT_INIT(new_dentry_inode);
 
 	// make sure, that oldname still exists and points to an object we
 	// are going to rename
diff -uprN linux-2.6.16/fs/reiserfs/xattr.c linux-2.6.16.ovz/fs/reiserfs/xattr.c
--- linux-2.6.16/fs/reiserfs/xattr.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/reiserfs/xattr.c	2006-07-05 08:34:56.000000000 -0400
@@ -1343,7 +1343,8 @@ static int reiserfs_check_acl(struct ino
 	return error;
 }
 
-int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
 	/*
 	 * We don't do permission checks on the internal objects.
@@ -1356,7 +1357,7 @@ int reiserfs_permission(struct inode *in
 	 * Stat data v1 doesn't support ACLs.
 	 */
 	if (get_inode_sd_version(inode) == STAT_DATA_V1)
-		return generic_permission(inode, mask, NULL);
+		return generic_permission(inode, mask, NULL, perm);
 	else
-		return generic_permission(inode, mask, reiserfs_check_acl);
+		return generic_permission(inode, mask, reiserfs_check_acl, perm);
 }
diff -uprN linux-2.6.16/fs/reiserfs/xattr_acl.c linux-2.6.16.ovz/fs/reiserfs/xattr_acl.c
--- linux-2.6.16/fs/reiserfs/xattr_acl.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/reiserfs/xattr_acl.c	2006-07-05 08:34:56.000000000 -0400
@@ -408,8 +408,9 @@ int reiserfs_cache_default_acl(struct in
 		acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
 		reiserfs_read_unlock_xattrs(inode->i_sb);
 		reiserfs_read_unlock_xattr_i(inode);
-		ret = acl ? 1 : 0;
-		posix_acl_release(acl);
+		ret = (acl && !IS_ERR(acl));
+		if (ret)
+			posix_acl_release(acl);
 	}
 
 	return ret;
diff -uprN linux-2.6.16/fs/select.c linux-2.6.16.ovz/fs/select.c
--- linux-2.6.16/fs/select.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/select.c	2006-07-05 08:34:56.000000000 -0400
@@ -24,6 +24,8 @@
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
 
+#include <ub/ub_mem.h>
+
 #include <asm/uaccess.h>
 
 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
@@ -286,7 +288,7 @@ int do_select(int n, fd_set_bits *fds, s
 
 static void *select_bits_alloc(int size)
 {
-	return kmalloc(6 * size, GFP_KERNEL);
+	return ub_kmalloc(6 * size, GFP_KERNEL);
 }
 
 static void select_bits_free(void *bits, int size)
@@ -645,7 +647,7 @@ int do_sys_poll(struct pollfd __user *uf
 	err = -ENOMEM;
 	while(i!=0) {
 		struct poll_list *pp;
-		pp = kmalloc(sizeof(struct poll_list)+
+		pp = ub_kmalloc(sizeof(struct poll_list)+
 				sizeof(struct pollfd)*
 				(i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
 					GFP_KERNEL);
diff -uprN linux-2.6.16/fs/seq_file.c linux-2.6.16.ovz/fs/seq_file.c
--- linux-2.6.16/fs/seq_file.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/seq_file.c	2006-07-05 08:34:56.000000000 -0400
@@ -345,6 +345,8 @@ int seq_path(struct seq_file *m,
 	if (m->count < m->size) {
 		char *s = m->buf + m->count;
 		char *p = d_path(dentry, mnt, s, m->size - m->count);
+		if (IS_ERR(p) && PTR_ERR(p) != -ENAMETOOLONG)
+			return 0;
 		if (!IS_ERR(p)) {
 			while (s <= p) {
 				char c = *p++;
diff -uprN linux-2.6.16/fs/simfs.c linux-2.6.16.ovz/fs/simfs.c
--- linux-2.6.16/fs/simfs.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/fs/simfs.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,290 @@
+/*
+ *  fs/simfs.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/namei.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/vzquota.h>
+#include <linux/statfs.h>
+#include <linux/virtinfo.h>
+#include <linux/faudit.h>
+#include <linux/genhd.h>
+
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+
+#define SIMFS_GET_LOWER_FS_SB(sb) sb->s_root->d_sb
+
+static struct super_operations sim_super_ops;
+
+static int sim_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		struct kstat *stat)
+{
+	struct super_block *sb;
+	struct inode *inode;
+
+	inode = dentry->d_inode;
+	if (!inode->i_op->getattr) {
+		generic_fillattr(inode, stat);
+		if (!stat->blksize) {
+			unsigned blocks;
+
+			sb = inode->i_sb;
+			blocks = (stat->size + sb->s_blocksize-1) >>
+				sb->s_blocksize_bits;
+			stat->blocks = (sb->s_blocksize / 512) * blocks;
+			stat->blksize = sb->s_blocksize;
+		}
+	} else {
+		int err;
+
+		err = inode->i_op->getattr(mnt, dentry, stat);
+		if (err)
+			return err;
+	}
+
+	sb = mnt->mnt_sb;
+	if (sb->s_op == &sim_super_ops)
+		stat->dev = sb->s_dev;
+	return 0;
+}
+
+static void quota_get_stat(struct super_block *sb, struct kstatfs *buf)
+{
+	int err;
+	struct dq_stat qstat;
+	struct virt_info_quota q;
+	long free_file, adj_file;
+	s64 blk, free_blk, adj_blk;
+	int bsize_bits;
+
+	q.super = sb;
+	q.qstat = &qstat;
+	err = virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_GETSTAT, &q);
+	if (err != NOTIFY_OK)
+		return;
+
+	bsize_bits = ffs(buf->f_bsize) - 1;
+	free_blk = (s64)(qstat.bsoftlimit - qstat.bcurrent) >> bsize_bits;
+	if (free_blk < 0)
+		free_blk = 0;
+	/*
+	 * In the regular case, we always set buf->f_bfree and buf->f_blocks to
+	 * the values reported by quota.  In case of real disk space shortage,
+	 * we adjust the values.  We want this adjustment to look as if the
+	 * total disk space were reduced, not as if the usage were increased.
+	 *    -- SAW
+	 */
+	adj_blk = 0;
+	if (buf->f_bfree < free_blk)
+		adj_blk = free_blk - buf->f_bfree;
+	buf->f_bfree = (long)(free_blk - adj_blk);
+
+	if (free_blk < buf->f_bavail)
+		buf->f_bavail = (long)free_blk; /* min(f_bavail, free_blk) */
+
+	blk = (qstat.bsoftlimit >> bsize_bits) - adj_blk;
+	buf->f_blocks = blk > LONG_MAX ? LONG_MAX : blk;
+
+	free_file = qstat.isoftlimit - qstat.icurrent;
+	if (free_file < 0)
+		free_file = 0;
+	if (buf->f_ffree == -1)
+		/*
+		 * One filesystem uses -1 to represent the fact that it doesn't
+		 * have a detached limit for inode number.
+		 * May be, because -1 is a good pretendent for the maximum value
+		 * of signed long type, may be, because it's just nice to have
+		 * an exceptional case...  Guess what that filesystem is :-)
+		 *    -- SAW
+		 */
+		buf->f_ffree = free_file;
+	adj_file = 0;
+	if (buf->f_ffree < free_file)
+		adj_file = free_file - buf->f_ffree;
+	buf->f_ffree = free_file - adj_file;
+	buf->f_files = qstat.isoftlimit - adj_file;
+}
+
+static int sim_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	int err;
+	struct super_block *lsb;
+	struct kstatfs statbuf;
+
+	err = 0;
+	if (sb->s_op != &sim_super_ops)
+		return 0;
+
+	lsb = SIMFS_GET_LOWER_FS_SB(sb);
+
+	err = -ENOSYS;
+	if (lsb && lsb->s_op && lsb->s_op->statfs)
+		err = lsb->s_op->statfs(lsb, &statbuf);
+	if (err)
+		return err;
+
+	quota_get_stat(sb, &statbuf);
+
+	buf->f_files    = statbuf.f_files;
+	buf->f_ffree    = statbuf.f_ffree;
+	buf->f_blocks   = statbuf.f_blocks;
+	buf->f_bfree    = statbuf.f_bfree;
+	buf->f_bavail   = statbuf.f_bavail;
+	return 0;
+}
+
+static int sim_systemcall(struct vnotifier_block *me, unsigned long n,
+		void *d, int old_ret)
+{
+	int err;
+
+	switch (n) {
+	case VIRTINFO_FAUDIT_STAT: {
+		struct faudit_stat_arg *arg;
+
+		arg = (struct faudit_stat_arg *)d;
+		err = sim_getattr(arg->mnt, arg->dentry, arg->stat);
+		arg->err = err;
+		}
+		break;
+	case VIRTINFO_FAUDIT_STATFS: {
+		struct faudit_statfs_arg *arg;
+
+		arg = (struct faudit_statfs_arg *)d;
+		err = sim_statfs(arg->sb, arg->stat);
+		arg->err = err;
+		}
+		break;
+	default:
+		return old_ret;
+	}
+	return (err ? NOTIFY_BAD : NOTIFY_OK);
+}
+
+static struct inode *sim_quota_root(struct super_block *sb)
+{
+	return sb->s_root->d_inode;
+}
+
+void sim_put_super(struct super_block *sb)
+{
+	struct virt_info_quota viq;
+
+	viq.super = sb;
+	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_OFF, &viq);
+	bdput(sb->s_bdev);
+}
+
+static struct super_operations sim_super_ops = {
+	.get_quota_root	= sim_quota_root,
+	.put_super = sim_put_super,
+};
+
+static int sim_fill_super(struct super_block *s, void *data)
+{
+	int err;
+	struct nameidata *nd;
+
+	err = set_anon_super(s, NULL);
+	if (err)
+		goto out;
+
+	err = 0;
+	nd = (struct nameidata *)data;
+	s->s_root = dget(nd->dentry);
+	s->s_op = &sim_super_ops;
+out:
+	return err;
+}
+
+struct super_block *sim_get_sb(struct file_system_type *type,
+		int flags, const char *dev_name, void *opt)
+{
+	int err;
+	struct nameidata nd;
+	struct super_block *sb;
+	struct block_device *bd;
+	struct virt_info_quota viq;
+	static struct hd_struct fake_hds;
+
+	sb = ERR_PTR(-EINVAL);
+	if (opt == NULL)
+		goto out;
+
+	err = path_lookup(opt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+	sb = ERR_PTR(err);
+	if (err)
+		goto out;
+
+	sb = sget(type, NULL, sim_fill_super, &nd);
+	if (IS_ERR(sb))
+		goto out_path;
+
+	bd = bdget(sb->s_dev);
+	if (!bd)
+		goto out_killsb;
+
+	sb->s_bdev = bd;
+	bd->bd_part = &fake_hds;
+	viq.super = sb;
+	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_ON, &viq);
+out_path:
+	path_release(&nd);
+out:
+	return sb;
+
+out_killsb:
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+	sb = ERR_PTR(-ENODEV);
+	goto out_path;
+}
+
+static struct file_system_type sim_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "simfs",
+	.get_sb		= sim_get_sb,
+	.kill_sb	= kill_anon_super,
+};
+
+static struct vnotifier_block sim_syscalls = {
+	.notifier_call = sim_systemcall,
+};
+
+static int __init init_simfs(void)
+{
+	int err;
+
+	err = register_filesystem(&sim_fs_type);
+	if (err)
+		return err;
+
+	virtinfo_notifier_register(VITYPE_FAUDIT, &sim_syscalls);
+	return 0;
+}
+
+static void __exit exit_simfs(void)
+{
+	virtinfo_notifier_unregister(VITYPE_FAUDIT, &sim_syscalls);
+	unregister_filesystem(&sim_fs_type);
+}
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Open Virtuozzo Simulation of File System");
+MODULE_LICENSE("GPL v2");
+
+module_init(init_simfs);
+module_exit(exit_simfs);
diff -uprN linux-2.6.16/fs/smbfs/dir.c linux-2.6.16.ovz/fs/smbfs/dir.c
--- linux-2.6.16/fs/smbfs/dir.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/smbfs/dir.c	2006-07-05 08:34:56.000000000 -0400
@@ -434,6 +434,11 @@ smb_lookup(struct inode *dir, struct den
 	if (dentry->d_name.len > SMB_MAXNAMELEN)
 		goto out;
 
+	/* Do not allow lookup of names with backslashes in */
+	error = -EINVAL;
+	if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
+		goto out;
+
 	lock_kernel();
 	error = smb_proc_getattr(dentry, &finfo);
 #ifdef SMBFS_PARANOIA
diff -uprN linux-2.6.16/fs/smbfs/file.c linux-2.6.16.ovz/fs/smbfs/file.c
--- linux-2.6.16/fs/smbfs/file.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/smbfs/file.c	2006-07-05 08:34:56.000000000 -0400
@@ -387,7 +387,8 @@ smb_file_release(struct inode *inode, st
  * privileges, so we need our own check for this.
  */
 static int
-smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
+smb_file_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
 	int mode = inode->i_mode;
 	int error = 0;
diff -uprN linux-2.6.16/fs/smbfs/inode.c linux-2.6.16.ovz/fs/smbfs/inode.c
--- linux-2.6.16/fs/smbfs/inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/smbfs/inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -233,7 +233,7 @@ smb_invalidate_inodes(struct smb_sb_info
 {
 	VERBOSE("\n");
 	shrink_dcache_sb(SB_of(server));
-	invalidate_inodes(SB_of(server));
+	invalidate_inodes(SB_of(server), 0);
 }
 
 /*
diff -uprN linux-2.6.16/fs/smbfs/request.c linux-2.6.16.ovz/fs/smbfs/request.c
--- linux-2.6.16/fs/smbfs/request.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/smbfs/request.c	2006-07-05 08:34:56.000000000 -0400
@@ -339,9 +339,11 @@ int smb_add_request(struct smb_request *
 		/*
 		 * On timeout or on interrupt we want to try and remove the
 		 * request from the recvq/xmitq.
+		 * First check if the request is still part of a queue. (May
+		 * have been removed by some error condition)
 		 */
 		smb_lock_server(server);
-		if (!(req->rq_flags & SMB_REQ_RECEIVED)) {
+		if (!list_empty(&req->rq_queue)) {
 			list_del_init(&req->rq_queue);
 			smb_rput(req);
 		}
diff -uprN linux-2.6.16/fs/stat.c linux-2.6.16.ovz/fs/stat.c
--- linux-2.6.16/fs/stat.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/stat.c	2006-07-05 08:34:56.000000000 -0400
@@ -15,6 +15,7 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/faudit.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -42,11 +43,19 @@ int vfs_getattr(struct vfsmount *mnt, st
 {
 	struct inode *inode = dentry->d_inode;
 	int retval;
+	struct faudit_stat_arg arg;
 
 	retval = security_inode_getattr(mnt, dentry);
 	if (retval)
 		return retval;
 
+	arg.mnt = mnt;
+	arg.dentry = dentry;
+	arg.stat = stat;
+	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STAT, &arg)
+			!= NOTIFY_DONE)
+		return arg.err;
+
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(mnt, dentry, stat);
 
diff -uprN linux-2.6.16/fs/super.c linux-2.6.16.ovz/fs/super.c
--- linux-2.6.16/fs/super.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/super.c	2006-07-05 08:34:56.000000000 -0400
@@ -23,6 +23,7 @@
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/ve_owner.h>
 #include <linux/init.h>
 #include <linux/smp_lock.h>
 #include <linux/acct.h>
@@ -231,13 +232,13 @@ void generic_shutdown_super(struct super
 	if (root) {
 		sb->s_root = NULL;
 		shrink_dcache_parent(root);
-		shrink_dcache_anon(&sb->s_anon);
+		shrink_dcache_anon(sb);
 		dput(root);
 		fsync_super(sb);
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
 		/* bad name - it should be evict_inodes() */
-		invalidate_inodes(sb);
+		invalidate_inodes(sb, 0);
 		lock_kernel();
 
 		if (sop->write_super && sb->s_dirt)
@@ -246,7 +247,7 @@ void generic_shutdown_super(struct super
 			sop->put_super(sb);
 
 		/* Forget any remaining inodes */
-		if (invalidate_inodes(sb)) {
+		if (invalidate_inodes(sb, 1)) {
 			printk("VFS: Busy inodes after unmount of %s. "
 			   "Self-destruct in 5 seconds.  Have a nice day...\n",
 			   sb->s_id);
@@ -481,11 +482,20 @@ asmlinkage long sys_ustat(unsigned dev, 
         struct super_block *s;
         struct ustat tmp;
         struct kstatfs sbuf;
-	int err = -EINVAL;
+	dev_t kdev;
+	int err;
+
+	kdev = new_decode_dev(dev);
+#ifdef CONFIG_VE
+	err = get_device_perms_ve(S_IFBLK, kdev, FMODE_READ);
+	if (err)
+		goto out;
+#endif
 
-        s = user_get_super(new_decode_dev(dev));
-        if (s == NULL)
-                goto out;
+	err = -EINVAL;
+	s = user_get_super(kdev);
+	if (s == NULL)
+		goto out;
 	err = vfs_statfs(s, &sbuf);
 	drop_super(s);
 	if (err)
@@ -599,6 +609,13 @@ void emergency_remount(void)
 static struct idr unnamed_dev_idr;
 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
 
+/* for compatibility with coreutils still unaware of new minor sizes */
+int unnamed_dev_majors[] = {
+	0, 144, 145, 146, 242, 243, 244, 245,
+	246, 247, 248, 249, 250, 251, 252, 253
+};
+EXPORT_SYMBOL(unnamed_dev_majors);
+
 int set_anon_super(struct super_block *s, void *data)
 {
 	int dev;
@@ -616,13 +633,13 @@ int set_anon_super(struct super_block *s
 	else if (error)
 		return -EAGAIN;
 
-	if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
+	if ((dev & MAX_ID_MASK) >= (1 << MINORBITS)) {
 		spin_lock(&unnamed_dev_lock);
 		idr_remove(&unnamed_dev_idr, dev);
 		spin_unlock(&unnamed_dev_lock);
 		return -EMFILE;
 	}
-	s->s_dev = MKDEV(0, dev & MINORMASK);
+	s->s_dev = make_unnamed_dev(dev);
 	return 0;
 }
 
@@ -630,8 +647,9 @@ EXPORT_SYMBOL(set_anon_super);
 
 void kill_anon_super(struct super_block *sb)
 {
-	int slot = MINOR(sb->s_dev);
+	int slot;
 
+	slot = unnamed_dev_idx(sb->s_dev);
 	generic_shutdown_super(sb);
 	spin_lock(&unnamed_dev_lock);
 	idr_remove(&unnamed_dev_idr, slot);
diff -uprN linux-2.6.16/fs/sysfs/bin.c linux-2.6.16.ovz/fs/sysfs/bin.c
--- linux-2.6.16/fs/sysfs/bin.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/sysfs/bin.c	2006-07-05 08:34:56.000000000 -0400
@@ -120,6 +120,9 @@ static int open(struct inode * inode, st
 	struct bin_attribute * attr = to_bin_attr(file->f_dentry);
 	int error = -EINVAL;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	if (!kobj || !attr)
 		goto Done;
 
@@ -196,6 +199,9 @@ int sysfs_create_bin_file(struct kobject
 
 int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
 	return 0;
 }
diff -uprN linux-2.6.16/fs/sysfs/dir.c linux-2.6.16.ovz/fs/sysfs/dir.c
--- linux-2.6.16/fs/sysfs/dir.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/sysfs/dir.c	2006-07-05 08:34:56.000000000 -0400
@@ -144,6 +144,9 @@ int sysfs_create_dir(struct kobject * ko
 	struct dentry * parent;
 	int error = 0;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	BUG_ON(!kobj);
 
 	if (kobj->parent)
@@ -278,10 +281,14 @@ void sysfs_remove_subdir(struct dentry *
 
 void sysfs_remove_dir(struct kobject * kobj)
 {
-	struct dentry * dentry = dget(kobj->dentry);
+	struct dentry * dentry;
 	struct sysfs_dirent * parent_sd;
 	struct sysfs_dirent * sd, * tmp;
 
+	if (!ve_sysfs_alowed())
+		return;
+
+	dentry = dget(kobj->dentry);
 	if (!dentry)
 		return;
 
@@ -302,6 +309,7 @@ void sysfs_remove_dir(struct kobject * k
 	 * Drop reference from dget() on entrance.
 	 */
 	dput(dentry);
+	kobj->dentry = NULL;
 }
 
 int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
@@ -309,6 +317,9 @@ int sysfs_rename_dir(struct kobject * ko
 	int error = 0;
 	struct dentry * new_dentry, * parent;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	if (!strcmp(kobject_name(kobj), new_name))
 		return -EINVAL;
 
diff -uprN linux-2.6.16/fs/sysfs/file.c linux-2.6.16.ovz/fs/sysfs/file.c
--- linux-2.6.16/fs/sysfs/file.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/sysfs/file.c	2006-07-05 08:34:56.000000000 -0400
@@ -183,7 +183,7 @@ fill_write_buffer(struct sysfs_buffer * 
 		return -ENOMEM;
 
 	if (count >= PAGE_SIZE)
-		count = PAGE_SIZE;
+		count = PAGE_SIZE - 1;
 	error = copy_from_user(buffer->page,buf,count);
 	buffer->needs_read_fill = 1;
 	return error ? -EFAULT : count;
@@ -380,6 +380,9 @@ int sysfs_add_file(struct dentry * dir, 
 
 int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 {
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	BUG_ON(!kobj || !kobj->dentry || !attr);
 
 	return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
@@ -398,6 +401,9 @@ int sysfs_update_file(struct kobject * k
 	struct dentry * victim;
 	int res = -ENOENT;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	mutex_lock(&dir->d_inode->i_mutex);
 	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
 	if (!IS_ERR(victim)) {
@@ -473,6 +479,9 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
 void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 {
+	if (!ve_sysfs_alowed())
+		return;
+
 	sysfs_hash_and_remove(kobj->dentry,attr->name);
 }
 
diff -uprN linux-2.6.16/fs/sysfs/group.c linux-2.6.16.ovz/fs/sysfs/group.c
--- linux-2.6.16/fs/sysfs/group.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/sysfs/group.c	2006-07-05 08:34:56.000000000 -0400
@@ -46,6 +46,9 @@ int sysfs_create_group(struct kobject * 
 	struct dentry * dir;
 	int error;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	BUG_ON(!kobj || !kobj->dentry);
 
 	if (grp->name) {
@@ -68,6 +71,9 @@ void sysfs_remove_group(struct kobject *
 {
 	struct dentry * dir;
 
+	if (!ve_sysfs_alowed())
+		return;
+
 	if (grp->name)
 		dir = lookup_one_len(grp->name, kobj->dentry,
 				strlen(grp->name));
diff -uprN linux-2.6.16/fs/sysfs/inode.c linux-2.6.16.ovz/fs/sysfs/inode.c
--- linux-2.6.16/fs/sysfs/inode.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/sysfs/inode.c	2006-07-05 08:34:56.000000000 -0400
@@ -8,14 +8,13 @@
 
 #undef DEBUG 
 
+#include <linux/config.h>
 #include <linux/pagemap.h>
 #include <linux/namei.h>
 #include <linux/backing-dev.h>
 #include <linux/capability.h>
 #include "sysfs.h"
 
-extern struct super_block * sysfs_sb;
-
 static struct address_space_operations sysfs_aops = {
 	.readpage	= simple_readpage,
 	.prepare_write	= simple_prepare_write,
@@ -227,12 +226,16 @@ void sysfs_drop_dentry(struct sysfs_dire
 void sysfs_hash_and_remove(struct dentry * dir, const char * name)
 {
 	struct sysfs_dirent * sd;
-	struct sysfs_dirent * parent_sd = dir->d_fsdata;
+	struct sysfs_dirent * parent_sd;
+
+	if (!dir)
+		return;
 
 	if (dir->d_inode == NULL)
 		/* no inode means this hasn't been made visible yet */
 		return;
 
+	parent_sd = dir->d_fsdata;
 	mutex_lock(&dir->d_inode->i_mutex);
 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
 		if (!sd->s_element)
diff -uprN linux-2.6.16/fs/sysfs/mount.c linux-2.6.16.ovz/fs/sysfs/mount.c
--- linux-2.6.16/fs/sysfs/mount.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/sysfs/mount.c	2006-07-05 08:34:56.000000000 -0400
@@ -7,6 +7,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
+#include <linux/module.h>
 #include <linux/init.h>
 
 #include "sysfs.h"
@@ -14,8 +15,11 @@
 /* Random magic number */
 #define SYSFS_MAGIC 0x62656572
 
+#ifndef CONFIG_VE
 struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
+#endif
+
 kmem_cache_t *sysfs_dir_cachep;
 
 static struct super_operations sysfs_ops = {
@@ -31,6 +35,15 @@ static struct sysfs_dirent sysfs_root = 
 	.s_iattr	= NULL,
 };
 
+#ifdef CONFIG_VE
+static void init_ve0_sysfs_root(void)
+{
+	get_ve0()->sysfs_root = &sysfs_root;
+}
+
+#define sysfs_root (*(get_exec_env()->sysfs_root))
+#endif
+
 static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
@@ -72,16 +85,21 @@ static struct super_block *sysfs_get_sb(
 	return get_sb_single(fs_type, flags, data, sysfs_fill_super);
 }
 
-static struct file_system_type sysfs_fs_type = {
+struct file_system_type sysfs_fs_type = {
 	.name		= "sysfs",
 	.get_sb		= sysfs_get_sb,
 	.kill_sb	= kill_litter_super,
 };
 
+EXPORT_SYMBOL(sysfs_fs_type);
+
 int __init sysfs_init(void)
 {
 	int err = -ENOMEM;
 
+#ifdef CONFIG_VE
+	init_ve0_sysfs_root();
+#endif
 	sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache",
 					      sizeof(struct sysfs_dirent),
 					      0, 0, NULL, NULL);
diff -uprN linux-2.6.16/fs/sysfs/symlink.c linux-2.6.16.ovz/fs/sysfs/symlink.c
--- linux-2.6.16/fs/sysfs/symlink.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/sysfs/symlink.c	2006-07-05 08:34:56.000000000 -0400
@@ -66,6 +66,7 @@ static int sysfs_add_link(struct dentry 
 	if (!error)
 		return 0;
 
+	kobject_put(target);
 	kfree(sl->link_name);
 exit2:
 	kfree(sl);
@@ -86,6 +87,9 @@ int sysfs_create_link(struct kobject * k
 
 	BUG_ON(!kobj || !kobj->dentry || !name);
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	mutex_lock(&dentry->d_inode->i_mutex);
 	error = sysfs_add_link(dentry, name, target);
 	mutex_unlock(&dentry->d_inode->i_mutex);
@@ -101,6 +105,9 @@ int sysfs_create_link(struct kobject * k
 
 void sysfs_remove_link(struct kobject * kobj, const char * name)
 {
+	if(!ve_sysfs_alowed())
+		return;
+
 	sysfs_hash_and_remove(kobj->dentry,name);
 }
 
diff -uprN linux-2.6.16/fs/sysfs/sysfs.h linux-2.6.16.ovz/fs/sysfs/sysfs.h
--- linux-2.6.16/fs/sysfs/sysfs.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/sysfs/sysfs.h	2006-07-05 08:34:56.000000000 -0400
@@ -1,5 +1,14 @@
 
-extern struct vfsmount * sysfs_mount;
+#ifndef CONFIG_VE
+extern struct vfsmount *sysfs_mount;
+extern struct super_block *sysfs_sb;
+#define ve_sysfs_alowed()	(1)
+#else
+#define sysfs_mount		(get_exec_env()->sysfs_mnt)
+#define sysfs_sb		(get_exec_env()->sysfs_sb)
+#define ve_sysfs_alowed()	(sysfs_sb != NULL)
+#endif
+
 extern kmem_cache_t *sysfs_dir_cachep;
 
 extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
@@ -19,7 +28,6 @@ extern void sysfs_drop_dentry(struct sys
 extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 
 extern struct rw_semaphore sysfs_rename_sem;
-extern struct super_block * sysfs_sb;
 extern struct file_operations sysfs_dir_operations;
 extern struct file_operations sysfs_file_operations;
 extern struct file_operations bin_fops;
diff -uprN linux-2.6.16/fs/vzdq_file.c linux-2.6.16.ovz/fs/vzdq_file.c
--- linux-2.6.16/fs/vzdq_file.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/fs/vzdq_file.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,851 @@
+/*
+ *
+ * Copyright (C) 2005 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo quota files as proc entry implementation.
+ * It is required for std quota tools to work correctly as they are expecting
+ * aquota.user and aquota.group files.
+ */
+
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
+#include <linux/quotaio_v2.h>
+#include <asm/uaccess.h>
+
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/vzdq_tree.h>
+#include <linux/vzquota.h>
+
+/* ----------------------------------------------------------------------
+ *
+ * File read operation
+ *
+ * FIXME: functions in this section (as well as many functions in vzdq_ugid.c,
+ * perhaps) abuse vz_quota_sem.
+ * Taking a global semaphore for lengthy and user-controlled operations inside
+ * VPSs is not a good idea in general.
+ * In this case, the reasons for taking this semaphore are completely unclear,
+ * especially taking into account that the only function that has comments
+ * about the necessity to be called under this semaphore
+ * (create_proc_quotafile) is actually called OUTSIDE it.
+ *
+ * --------------------------------------------------------------------- */
+
+#define DQBLOCK_SIZE		1024
+#define DQUOTBLKNUM		21U
+#define DQTREE_DEPTH		4
+#define TREENUM_2_BLKNUM(num)	(((num) + 1) << 1)
+#define ISINDBLOCK(num)		((num)%2 != 0)
+#define FIRST_DATABLK	  	2  /* first even number */
+#define LAST_IND_LEVEL		(DQTREE_DEPTH - 1)
+#define CONVERT_LEVEL(level)	((level) * (QUOTAID_EBITS/QUOTAID_BBITS))
+#define GETLEVINDX(ind, lev)	(((ind) >> QUOTAID_BBITS*(lev)) \
+					& QUOTATREE_BMASK)
+
+#if (QUOTAID_EBITS / QUOTAID_BBITS) != (QUOTATREE_DEPTH / DQTREE_DEPTH)
+#error xBITS and DQTREE_DEPTH does not correspond
+#endif
+
+#define BLOCK_NOT_FOUND	1
+
+/* data for quota file -- one per proc entry */
+struct quotatree_data {
+	struct list_head	list;
+	struct vz_quota_master	*qmblk;
+	int			type;	/* type of the tree */
+};
+
+/* serialized by vz_quota_sem */
+static LIST_HEAD(qf_data_head);
+
+static const u_int32_t vzquota_magics[] = V2_INITQMAGICS;
+static const u_int32_t vzquota_versions[] = V2_INITQVERSIONS;
+
+static inline loff_t get_depoff(int depth)
+{
+	loff_t res = 1;
+	while (depth) {
+		res += (1 << ((depth - 1)*QUOTAID_EBITS + 1));
+		depth--;
+	}
+	return res;
+}
+
+static inline loff_t get_blknum(loff_t num, int depth)
+{
+	loff_t res;
+	res = (num << 1) + get_depoff(depth);
+	return res;
+}
+
+static int get_depth(loff_t num)
+{
+	int i;
+	for (i = 0; i < DQTREE_DEPTH; i++) {
+		if (num >= get_depoff(i) && (i == DQTREE_DEPTH - 1
+				|| num < get_depoff(i + 1)))
+			return i;
+	}
+	return -1;
+}
+
+static inline loff_t get_offset(loff_t num)
+{
+	loff_t res, tmp;
+
+	tmp = get_depth(num);
+	if (tmp < 0)
+		return -1;
+	num -= get_depoff(tmp);
+	BUG_ON(num < 0);
+	res = num >> 1;
+
+	return res;
+}
+
+static inline loff_t get_quot_blk_num(struct quotatree_tree *tree, int level)
+{
+	/* return maximum available block num */
+	return tree->levels[level].freenum;
+}
+
+static inline loff_t get_block_num(struct quotatree_tree *tree)
+{
+	loff_t ind_blk_num, quot_blk_num, max_ind, max_quot;
+
+	quot_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH) - 1);
+	max_quot = TREENUM_2_BLKNUM(quot_blk_num);
+	ind_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH - 1));
+	max_ind = (quot_blk_num) ? get_blknum(ind_blk_num, LAST_IND_LEVEL)
+		: get_blknum(ind_blk_num, 0);
+
+	return (max_ind > max_quot) ? max_ind + 1 : max_quot + 1;
+}
+
+/*  Write quota file header */
+static int read_header(void *buf, struct quotatree_tree *tree,
+	struct dq_info *dq_ugid_info, int type)
+{
+	struct v2_disk_dqheader *dqh;
+	struct v2_disk_dqinfo *dq_disk_info;
+
+	dqh = buf;
+	dq_disk_info = buf + sizeof(struct v2_disk_dqheader);
+
+	dqh->dqh_magic = vzquota_magics[type];
+	dqh->dqh_version = vzquota_versions[type];
+
+	dq_disk_info->dqi_bgrace = dq_ugid_info[type].bexpire;
+	dq_disk_info->dqi_igrace = dq_ugid_info[type].iexpire;
+	dq_disk_info->dqi_flags = 0;	/* no flags */
+	dq_disk_info->dqi_blocks = get_block_num(tree);
+	dq_disk_info->dqi_free_blk = 0;	/* first block in the file */
+	dq_disk_info->dqi_free_entry = FIRST_DATABLK;
+
+	return 0;
+}
+
+static int get_block_child(int depth, struct quotatree_node *p, u_int32_t *buf)
+{
+	int i, j, lev_num;
+
+	lev_num = QUOTATREE_DEPTH/DQTREE_DEPTH - 1;
+	for (i = 0; i < BLOCK_SIZE/sizeof(u_int32_t); i++) {
+		struct quotatree_node *next, *parent;
+
+		parent = p;
+		next = p;
+		for (j = lev_num; j >= 0; j--) {
+			if (!next->blocks[GETLEVINDX(i,j)]) {
+				buf[i] = 0;
+				goto bad_branch;
+			}
+			parent = next;
+			next = next->blocks[GETLEVINDX(i,j)];
+		}
+		buf[i] = (depth == DQTREE_DEPTH - 1) ?
+			TREENUM_2_BLKNUM(parent->num)
+			: get_blknum(next->num, depth + 1);
+
+	bad_branch:
+		;
+	}
+
+	return 0;
+}
+
+/*
+ * Write index block to disk (or buffer)
+ * @buf has length 256*sizeof(u_int32_t) bytes
+ */
+static int read_index_block(int num, u_int32_t *buf,
+		struct quotatree_tree *tree)
+{
+	struct quotatree_node *p;
+	u_int32_t index;
+	loff_t off;
+	int depth, res;
+
+	res = BLOCK_NOT_FOUND; 
+	index = 0;
+	depth = get_depth(num);
+	off = get_offset(num);
+	if (depth < 0 || off < 0)
+		return -EINVAL;
+
+	list_for_each_entry(p, &tree->levels[CONVERT_LEVEL(depth)].usedlh,
+			list) {
+		if (p->num >= off)
+			res = 0;
+		if (p->num != off)
+			continue;
+		get_block_child(depth, p, buf);
+		break;
+	}
+
+	return res;
+}
+
+static inline void convert_quot_format(struct v2_disk_dqblk *dq,
+		struct vz_quota_ugid *vzq)
+{
+	dq->dqb_id = vzq->qugid_id;
+	dq->dqb_ihardlimit = vzq->qugid_stat.ihardlimit;
+	dq->dqb_isoftlimit = vzq->qugid_stat.isoftlimit;
+	dq->dqb_curinodes = vzq->qugid_stat.icurrent;
+	dq->dqb_bhardlimit = vzq->qugid_stat.bhardlimit / QUOTABLOCK_SIZE;
+	dq->dqb_bsoftlimit = vzq->qugid_stat.bsoftlimit / QUOTABLOCK_SIZE;
+	dq->dqb_curspace = vzq->qugid_stat.bcurrent;
+	dq->dqb_btime = vzq->qugid_stat.btime;
+	dq->dqb_itime = vzq->qugid_stat.itime;
+}
+
+static int read_dquot(loff_t num, void *buf, struct quotatree_tree *tree)
+{
+	int res, i, entries = 0;
+	struct v2_disk_dqdbheader *dq_header;
+	struct quotatree_node *p;
+	struct v2_disk_dqblk *blk = buf + sizeof(struct v2_disk_dqdbheader);
+
+	res = BLOCK_NOT_FOUND;
+	dq_header = buf;
+	memset(dq_header, 0, sizeof(*dq_header));
+
+	list_for_each_entry(p, &(tree->levels[QUOTATREE_DEPTH - 1].usedlh),
+			list) {
+		if (TREENUM_2_BLKNUM(p->num) >= num)
+			res = 0;
+		if (TREENUM_2_BLKNUM(p->num) != num)
+			continue;
+
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {
+			if (!p->blocks[i])
+				continue;
+			convert_quot_format(blk + entries,
+					(struct vz_quota_ugid *)p->blocks[i]);
+			entries++;
+			res = 0;
+		}
+		break;
+	}
+	dq_header->dqdh_entries = entries;
+
+	return res;
+}
+
+static int read_block(int num, void *buf, struct quotatree_tree *tree,
+	struct dq_info *dq_ugid_info, int magic)
+{
+	int res;
+
+	memset(buf, 0, DQBLOCK_SIZE);
+	if (!num)
+		res = read_header(buf, tree, dq_ugid_info, magic);
+	else if (ISINDBLOCK(num))
+		res = read_index_block(num, (u_int32_t*)buf, tree);
+	else
+		res = read_dquot(num, buf, tree);
+
+	return res;
+}
+
+/*
+ * FIXME: this function can handle quota files up to 2GB only.
+ */
+static int read_proc_quotafile(char *page, char **start, off_t off, int count,
+		int *eof, void *data)
+{
+	off_t blk_num, blk_off, buf_off;
+	char *tmp;
+	size_t buf_size;
+	struct quotatree_data *qtd;
+	struct quotatree_tree *tree;
+	struct dq_info *dqi;
+	int res;
+
+	tmp = kmalloc(DQBLOCK_SIZE, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	qtd = data;
+	down(&vz_quota_sem);
+	down(&qtd->qmblk->dq_sem);
+
+	res = 0;
+	tree = QUGID_TREE(qtd->qmblk, qtd->type);
+	if (!tree) {
+		*eof = 1;
+		goto out_dq;
+	}
+
+	dqi = &qtd->qmblk->dq_ugid_info[qtd->type];
+
+	buf_off = 0;
+	buf_size = count;
+	blk_num = off / DQBLOCK_SIZE;
+	blk_off = off % DQBLOCK_SIZE;
+
+	while (buf_size > 0) {
+		off_t len;
+
+		len = min((size_t)(DQBLOCK_SIZE-blk_off), buf_size);
+		res = read_block(blk_num, tmp, tree, dqi, qtd->type);
+		if (res < 0)
+			goto out_err;
+		if (res == BLOCK_NOT_FOUND) {
+			*eof = 1;
+			break;
+		} 
+		memcpy(page + buf_off, tmp + blk_off, len);
+
+		blk_num++;
+		buf_size -= len;
+		blk_off = 0;
+		buf_off += len;
+	}
+	res = buf_off;
+
+out_err:
+	*start = NULL + count;
+out_dq:
+	up(&qtd->qmblk->dq_sem);
+	up(&vz_quota_sem);
+	kfree(tmp);
+
+	return res;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota/QID/aquota.* files
+ *
+ * FIXME: this code lacks serialization of read/readdir/lseek.
+ * However, this problem should be fixed after the mainstream issue of what
+ * appears to be non-atomic read and update of file position in sys_read.
+ *
+ * --------------------------------------------------------------------- */
+
+static inline unsigned long vzdq_aquot_getino(dev_t dev)
+{
+	return 0xec000000UL + dev;
+}
+
+static inline dev_t vzdq_aquot_getidev(struct inode *inode)
+{
+	return (dev_t)(unsigned long)PROC_I(inode)->op.proc_get_link;
+}
+
+static inline void vzdq_aquot_setidev(struct inode *inode, dev_t dev)
+{
+	PROC_I(inode)->op.proc_get_link = (void *)(unsigned long)dev;
+}
+
+static ssize_t vzdq_aquotf_read(struct file *file,
+		char __user *buf, size_t size, loff_t *ppos)
+{
+	char *page;
+	size_t bufsize;
+	ssize_t l, l2, copied;
+	char *start;
+	struct inode *inode;
+	struct block_device *bdev;
+	struct super_block *sb;
+	struct quotatree_data data;
+	int eof, err;
+
+	err = -ENOMEM;
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (page == NULL)
+		goto out_err;
+
+	err = -ENODEV;
+	inode = file->f_dentry->d_inode;
+	bdev = bdget(vzdq_aquot_getidev(inode));
+	if (bdev == NULL)
+		goto out_err;
+	sb = get_super(bdev);
+	bdput(bdev);
+	if (sb == NULL)
+		goto out_err;
+	data.qmblk = vzquota_find_qmblk(sb);
+	data.type = PROC_I(inode)->type - 1;
+	drop_super(sb);
+	if (data.qmblk == NULL || data.qmblk == VZ_QUOTA_BAD)
+		goto out_err;
+
+	copied = 0;
+	l = l2 = 0;
+	while (1) {
+		bufsize = min(size, (size_t)PAGE_SIZE);
+		if (bufsize <= 0)
+			break;
+
+		l = read_proc_quotafile(page, &start, *ppos, bufsize,
+				&eof, &data);
+		if (l <= 0)
+			break;
+
+		l2 = copy_to_user(buf, page, l);
+		copied += l - l2;
+		if (l2)
+			break;
+
+		buf += l;
+		size -= l;
+		*ppos += (unsigned long)start;
+		l = l2 = 0;
+	}
+
+	qmblk_put(data.qmblk);
+	free_page((unsigned long)page);
+	if (copied)
+		return copied;
+	else if (l2)		/* last copy_to_user failed */
+		return -EFAULT;
+	else			/* read error or EOF */
+		return l;
+
+out_err:
+	if (page != NULL)
+		free_page((unsigned long)page);
+	return err;
+}
+
+static struct file_operations vzdq_aquotf_file_operations = {
+	.read		= &vzdq_aquotf_read,
+};
+
+static struct inode_operations vzdq_aquotf_inode_operations = {
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota/QID directory
+ *
+ * --------------------------------------------------------------------- */
+
+static int vzdq_aquotq_readdir(struct file *file, void *data, filldir_t filler)
+{
+	loff_t n;
+	int err;
+
+	n = file->f_pos;
+	for (err = 0; !err; n++) {
+		switch (n) {
+		case 0:
+			err = (*filler)(data, ".", 1, n,
+					file->f_dentry->d_inode->i_ino,
+					DT_DIR);
+			break;
+		case 1:
+			err = (*filler)(data, "..", 2, n,
+					parent_ino(file->f_dentry), DT_DIR);
+			break;
+		case 2:
+			err = (*filler)(data, "aquota.user", 11, n,
+					file->f_dentry->d_inode->i_ino
+								+ USRQUOTA + 1,
+					DT_REG);
+			break;
+		case 3:
+			err = (*filler)(data, "aquota.group", 12, n,
+					file->f_dentry->d_inode->i_ino 
+								+ GRPQUOTA + 1,
+					DT_REG);
+			break;
+		default:
+			goto out;
+		}
+	}
+out:
+	file->f_pos = n;
+	return err;
+}
+
+struct vzdq_aquotq_lookdata {
+	dev_t dev;
+	int type;
+};
+
+static int vzdq_aquotq_looktest(struct inode *inode, void *data)
+{
+	struct vzdq_aquotq_lookdata *d;
+
+	d = data;
+	return inode->i_op == &vzdq_aquotf_inode_operations &&
+	       vzdq_aquot_getidev(inode) == d->dev &&
+	       PROC_I(inode)->type == d->type + 1;
+}
+
+static int vzdq_aquotq_lookset(struct inode *inode, void *data)
+{
+	struct vzdq_aquotq_lookdata *d;
+
+	d = data;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_ino = vzdq_aquot_getino(d->dev) + d->type + 1;
+	inode->i_mode = S_IFREG | S_IRUSR;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_nlink = 1;
+	inode->i_op = &vzdq_aquotf_inode_operations;
+	inode->i_fop = &vzdq_aquotf_file_operations;
+	PROC_I(inode)->type = d->type + 1;
+	vzdq_aquot_setidev(inode, d->dev);
+	return 0;
+}
+
+static struct dentry *vzdq_aquotq_lookup(struct inode *dir,
+		struct dentry *dentry,
+		struct nameidata *nd)
+{
+	struct inode *inode;
+	struct vzdq_aquotq_lookdata d;
+	int k;
+
+	if (dentry->d_name.len == 11) {
+		if (memcmp(dentry->d_name.name, "aquota.user", 11))
+			goto out;
+		k = USRQUOTA;
+	} else if (dentry->d_name.len == 12) {
+		if (memcmp(dentry->d_name.name, "aquota.group", 11))
+			goto out;
+		k = GRPQUOTA;
+	} else
+		goto out;
+	d.dev = vzdq_aquot_getidev(dir);
+	d.type = k;
+	inode = iget5_locked(dir->i_sb, dir->i_ino + k + 1,
+			vzdq_aquotq_looktest, vzdq_aquotq_lookset, &d);
+	if (inode == NULL)
+		goto out;
+	unlock_new_inode(inode);
+	d_add(dentry, inode);
+	return NULL;
+
+out:
+	return ERR_PTR(-ENOENT);
+}
+
+static struct file_operations vzdq_aquotq_file_operations = {
+	.read		= &generic_read_dir,
+	.readdir	= &vzdq_aquotq_readdir,
+};
+
+static struct inode_operations vzdq_aquotq_inode_operations = {
+	.lookup		= &vzdq_aquotq_lookup,
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota directory
+ *
+ * --------------------------------------------------------------------- */
+
+struct vzdq_aquot_de {
+	struct list_head list;
+	struct vfsmount *mnt;
+};
+
+static int vzdq_aquot_buildmntlist(struct ve_struct *ve,
+		struct list_head *head)
+{
+	struct vfsmount *rmnt, *mnt;
+	struct vzdq_aquot_de *p;
+	int err;
+
+#ifdef CONFIG_VE
+	rmnt = mntget(ve->fs_rootmnt);
+#else
+	read_lock(&current->fs->lock);
+	rmnt = mntget(current->fs->rootmnt);
+	read_unlock(&current->fs->lock);
+#endif
+	mnt = rmnt;
+	spin_lock(&vfsmount_lock);
+	while (1) {
+		list_for_each_entry(p, head, list) {
+			if (p->mnt->mnt_sb == mnt->mnt_sb)
+				goto skip;
+		}
+
+		err = -ENOMEM;
+		p = kmalloc(sizeof(*p), GFP_KERNEL);
+		if (p == NULL)
+			goto out;
+		p->mnt = mntget(mnt);
+		list_add_tail(&p->list, head);
+
+skip:
+		err = 0;
+		if (list_empty(&mnt->mnt_mounts)) {
+			while (1) {
+				if (mnt == rmnt)
+					goto out;
+				if (mnt->mnt_child.next !=
+						&mnt->mnt_parent->mnt_mounts)
+					break;
+				mnt = mnt->mnt_parent;
+			}
+			mnt = list_entry(mnt->mnt_child.next,
+					struct vfsmount, mnt_child);
+		} else
+			mnt = list_entry(mnt->mnt_mounts.next,
+					struct vfsmount, mnt_child);
+	}
+out:
+	spin_unlock(&vfsmount_lock);
+	mntput(rmnt);
+	return err;
+}
+
+static void vzdq_aquot_releasemntlist(struct ve_struct *ve,
+		struct list_head *head)
+{
+	struct vzdq_aquot_de *p;
+
+	while (!list_empty(head)) {
+		p = list_entry(head->next, typeof(*p), list);
+		mntput(p->mnt);
+		list_del(&p->list);
+		kfree(p);
+	}
+}
+
+static int vzdq_aquotd_readdir(struct file *file, void *data, filldir_t filler)
+{
+	struct ve_struct *ve, *old_ve;
+	struct list_head mntlist;
+	struct vzdq_aquot_de *de;
+	struct super_block *sb;
+	struct vz_quota_master *qmblk;
+	loff_t i, n;
+	char buf[24];
+	int l, err;
+
+	i = 0;
+	n = file->f_pos;
+	ve = VE_OWNER_FSTYPE(file->f_dentry->d_sb->s_type);
+	old_ve = set_exec_env(ve);
+
+	INIT_LIST_HEAD(&mntlist);
+#ifdef CONFIG_VE
+	/*
+	 * The only reason of disabling readdir for the host system is that
+	 * this readdir can be slow and CPU consuming with large number of VPSs
+	 * (or just mount points).
+	 */
+	err = ve_is_super(ve);
+#else
+	err = 0;
+#endif
+	if (!err) {
+		err = vzdq_aquot_buildmntlist(ve, &mntlist);
+		if (err)
+			goto out_err;
+	}
+
+	if (i >= n) {
+		if ((*filler)(data, ".", 1, i,
+					file->f_dentry->d_inode->i_ino, DT_DIR))
+			goto out_fill;
+	}
+	i++;
+
+	if (i >= n) {
+		if ((*filler)(data, "..", 2, i,
+					parent_ino(file->f_dentry), DT_DIR))
+			goto out_fill;
+	}
+	i++;
+
+	list_for_each_entry (de, &mntlist, list) {
+		sb = de->mnt->mnt_sb;
+#ifdef CONFIG_VE
+		if (get_device_perms_ve(S_IFBLK, sb->s_dev, FMODE_QUOTACTL))
+			continue;
+#endif
+		qmblk = vzquota_find_qmblk(sb);
+		if (qmblk == NULL || qmblk == VZ_QUOTA_BAD)
+			continue;
+
+		qmblk_put(qmblk);
+		i++;
+		if (i <= n)
+			continue;
+
+		l = sprintf(buf, "%08x", new_encode_dev(sb->s_dev));
+		if ((*filler)(data, buf, l, i - 1,
+					vzdq_aquot_getino(sb->s_dev), DT_DIR))
+			break;
+	}
+
+out_fill:
+	err = 0;
+	file->f_pos = i;
+out_err:
+	vzdq_aquot_releasemntlist(ve, &mntlist);
+	(void)set_exec_env(old_ve);
+	return err;
+}
+
+static int vzdq_aquotd_looktest(struct inode *inode, void *data)
+{
+	return inode->i_op == &vzdq_aquotq_inode_operations &&
+	       vzdq_aquot_getidev(inode) == (dev_t)(unsigned long)data;
+}
+
+static int vzdq_aquotd_lookset(struct inode *inode, void *data)
+{
+	dev_t dev;
+
+	dev = (dev_t)(unsigned long)data;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_ino = vzdq_aquot_getino(dev);
+	inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_nlink = 2;
+	inode->i_op = &vzdq_aquotq_inode_operations;
+	inode->i_fop = &vzdq_aquotq_file_operations;
+	vzdq_aquot_setidev(inode, dev);
+	return 0;
+}
+
+static struct dentry *vzdq_aquotd_lookup(struct inode *dir,
+		struct dentry *dentry,
+		struct nameidata *nd)
+{
+	struct ve_struct *ve, *old_ve;
+	const unsigned char *s;
+	int l;
+	dev_t dev;
+	struct inode *inode;
+
+	ve = VE_OWNER_FSTYPE(dir->i_sb->s_type);
+	old_ve = set_exec_env(ve);
+#ifdef CONFIG_VE
+	/*
+	 * Lookup is much lighter than readdir, so it can be allowed for the
+	 * host system.  But it would be strange to be able to do lookup only
+	 * without readdir...
+	 */
+	if (ve_is_super(ve))
+		goto out;
+#endif
+
+	dev = 0;
+	l = dentry->d_name.len;
+	if (l <= 0)
+		goto out;
+	for (s = dentry->d_name.name; l > 0; s++, l--) {
+		if (!isxdigit(*s))
+			goto out;
+		if (dev & ~(~0UL >> 4))
+			goto out;
+		dev <<= 4;
+		if (isdigit(*s))
+			dev += *s - '0';
+		else if (islower(*s))
+			dev += *s - 'a' + 10;
+		else
+			dev += *s - 'A' + 10;
+	}
+	dev = new_decode_dev(dev);
+
+#ifdef CONFIG_VE
+	if (get_device_perms_ve(S_IFBLK, dev, FMODE_QUOTACTL))
+		goto out;
+#endif
+
+	inode = iget5_locked(dir->i_sb, vzdq_aquot_getino(dev),
+			vzdq_aquotd_looktest, vzdq_aquotd_lookset,
+			(void *)(unsigned long)dev);
+	if (inode == NULL)
+		goto out;
+	unlock_new_inode(inode);
+
+	d_add(dentry, inode);
+	(void)set_exec_env(old_ve);
+	return NULL;
+
+out:
+	(void)set_exec_env(old_ve);
+	return ERR_PTR(-ENOENT);
+}
+
+static struct file_operations vzdq_aquotd_file_operations = {
+	.read		= &generic_read_dir,
+	.readdir	= &vzdq_aquotd_readdir,
+};
+
+static struct inode_operations vzdq_aquotd_inode_operations = {
+	.lookup		= &vzdq_aquotd_lookup,
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Initialization and deinitialization
+ *
+ * --------------------------------------------------------------------- */
+
+/*
+ * FIXME: creation of proc entries here is unsafe with respect to module
+ * unloading.
+ */
+void vzaquota_init(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_glob_entry("vz/vzaquota",
+			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
+	if (de != NULL) {
+		de->proc_iops = &vzdq_aquotd_inode_operations;
+		de->proc_fops = &vzdq_aquotd_file_operations;
+	} else
+		printk("VZDQ: vz/vzaquota creation failed\n");
+#if defined(CONFIG_SYSCTL)
+	de = create_proc_glob_entry("sys/fs/quota",
+			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
+	if (de == NULL)
+		printk("VZDQ: sys/fs/quota creation failed\n");
+#endif
+}
+
+void vzaquota_fini(void)
+{
+}
diff -uprN linux-2.6.16/fs/vzdq_mgmt.c linux-2.6.16.ovz/fs/vzdq_mgmt.c
--- linux-2.6.16/fs/vzdq_mgmt.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/fs/vzdq_mgmt.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,735 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <asm/semaphore.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+#include <linux/gfp.h>
+#include <asm/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/quota.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+
+
+/* ----------------------------------------------------------------------
+ * Switching quota on.
+ * --------------------------------------------------------------------- */
+
+/*
+ * check limits copied from user
+ */
+int vzquota_check_sane_limits(struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+
+	/* softlimit must be less then hardlimit */
+	if (qstat->bsoftlimit > qstat->bhardlimit)
+		goto out;
+
+	if (qstat->isoftlimit > qstat->ihardlimit)
+		goto out;
+
+	err = 0;
+out:
+	return err;
+}
+
+/*
+ * check usage values copied from user
+ */
+int vzquota_check_sane_values(struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+
+	/* expiration time must not be set if softlimit was not exceeded */
+	if (qstat->bcurrent < qstat->bsoftlimit && qstat->btime != (time_t)0)
+		goto out;
+
+	if (qstat->icurrent < qstat->isoftlimit && qstat->itime != (time_t)0)
+		goto out;
+
+	err = vzquota_check_sane_limits(qstat);
+out:
+	return err;
+}
+
+/*
+ * create new quota master block
+ * this function should:
+ *  - copy limits and usage parameters from user buffer;
+ *  - allock, initialize quota block and insert it to hash;
+ */
+static int vzquota_create(unsigned int quota_id, struct vz_quota_stat *u_qstat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem);
+
+	err = -EFAULT;
+	if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
+		goto out;
+
+	err = -EINVAL;
+	if (quota_id == 0)
+		goto out;
+
+	if (vzquota_check_sane_values(&qstat.dq_stat))
+		goto out;
+	err = 0;
+	qmblk = vzquota_alloc_master(quota_id, &qstat);
+
+	if (IS_ERR(qmblk)) /* ENOMEM or EEXIST */
+		err = PTR_ERR(qmblk);
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+/**
+ * vzquota_on - turn quota on
+ *
+ * This function should:
+ *  - find and get refcnt of directory entry for quota root and corresponding
+ *    mountpoint;
+ *  - find corresponding quota block and mark it with given path;
+ *  - check quota tree;
+ *  - initialize quota for the tree root.
+ */
+static int vzquota_on(unsigned int quota_id, const char *quota_root)
+{
+	int err;
+	struct nameidata nd;
+	struct vz_quota_master *qmblk;
+	struct super_block *dqsb;
+
+	dqsb = NULL;
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out;
+
+	err = user_path_walk(quota_root, &nd);
+	if (err)
+		goto out;
+	/* init path must be a directory */
+	err = -ENOTDIR;
+	if (!S_ISDIR(nd.dentry->d_inode->i_mode))
+		goto out_path;
+
+	qmblk->dq_root_dentry = nd.dentry;
+	qmblk->dq_root_mnt = nd.mnt;
+	qmblk->dq_sb = nd.dentry->d_inode->i_sb;
+	err = vzquota_get_super(qmblk->dq_sb);
+	if (err)
+		goto out_super;
+
+	/*
+	 * Serialization with quota initialization and operations is performed
+	 * through generation check: generation is memorized before qmblk is
+	 * found and compared under inode_qmblk_lock with assignment.
+	 *
+	 * Note that the dentry tree is shrunk only for high-level logical
+	 * serialization, purely as a courtesy to the user: to have consistent
+	 * quota statistics, files should be closed etc. on quota on.
+	 */
+	err = vzquota_on_qmblk(qmblk->dq_sb, qmblk->dq_root_dentry->d_inode,
+			qmblk);
+	if (err)
+		goto out_init;
+	qmblk->dq_state = VZDQ_WORKING;
+
+	up(&vz_quota_sem);
+	return 0;
+
+out_init:
+	dqsb = qmblk->dq_sb;
+out_super:
+	/* clear for qmblk_put/quota_free_master */
+	qmblk->dq_sb = NULL;
+	qmblk->dq_root_dentry = NULL;
+	qmblk->dq_root_mnt = NULL;
+out_path:
+	path_release(&nd);
+out:
+	if (dqsb)
+		vzquota_put_super(dqsb);
+	up(&vz_quota_sem);
+	return err;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Switching quota off.
+ * --------------------------------------------------------------------- */
+
+/*
+ * destroy quota block by ID
+ */
+static int vzquota_destroy(unsigned int quota_id)
+{
+	int err;
+	struct vz_quota_master *qmblk;
+	struct dentry *dentry;
+	struct vfsmount *mnt;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EBUSY;
+	if (qmblk->dq_state == VZDQ_WORKING)
+		goto out; /* quota_off first */
+
+	list_del_init(&qmblk->dq_hash);
+	dentry = qmblk->dq_root_dentry;
+	qmblk->dq_root_dentry = NULL;
+	mnt = qmblk->dq_root_mnt;
+	qmblk->dq_root_mnt = NULL;
+
+	if (qmblk->dq_sb)
+		vzquota_put_super(qmblk->dq_sb);
+	up(&vz_quota_sem);
+
+	qmblk_put(qmblk);
+	dput(dentry);
+	mntput(mnt);
+	return 0;
+
+out:
+	up(&vz_quota_sem);
+	return err;
+}
+
+/**
+ * vzquota_off - turn quota off
+ */
+
+static int __vzquota_sync_list(struct list_head *lh,
+		struct vz_quota_master *qmblk,
+		enum writeback_sync_modes sync_mode)
+{
+	struct writeback_control wbc;
+	LIST_HEAD(list);
+	struct vz_quota_ilink *qlnk;
+	struct inode *inode;
+	int err;
+
+	memset(&wbc, 0, sizeof(wbc));
+	wbc.sync_mode = sync_mode;
+
+	err = 0;
+	while (!list_empty(lh) && !err) {
+		if (need_resched()) {
+			inode_qmblk_unlock(qmblk->dq_sb);
+			schedule();
+			inode_qmblk_lock(qmblk->dq_sb);
+		}
+
+		qlnk = list_first_entry(lh, struct vz_quota_ilink, list);
+		list_move(&qlnk->list, &list);
+
+		inode = igrab(QLNK_INODE(qlnk));
+		if (!inode)
+			continue;
+
+		inode_qmblk_unlock(qmblk->dq_sb);
+
+		wbc.nr_to_write = LONG_MAX;
+		err = sync_inode(inode, &wbc);
+		iput(inode);
+
+		inode_qmblk_lock(qmblk->dq_sb);
+	}
+
+	list_splice(&list, lh);
+	return err;
+}
+
+static int vzquota_sync_list(struct list_head *lh,
+		struct vz_quota_master *qmblk)
+{
+	int err;
+
+	err = __vzquota_sync_list(lh, qmblk, WB_SYNC_NONE);
+	if (err)
+		return err;
+
+	err = __vzquota_sync_list(lh, qmblk, WB_SYNC_ALL);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int vzquota_sync_inodes(struct vz_quota_master *qmblk)
+{
+	int err;
+	LIST_HEAD(qlnk_list);
+
+	list_splice_init(&qmblk->dq_ilink_list, &qlnk_list);
+	err = vzquota_sync_list(&qlnk_list, qmblk);
+	if (!err && !list_empty(&qmblk->dq_ilink_list))
+		err = -EBUSY;
+	list_splice(&qlnk_list, &qmblk->dq_ilink_list);
+
+	return err;
+}
+
+static int vzquota_off(unsigned int quota_id)
+{
+	int err;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EALREADY;
+	if (qmblk->dq_state != VZDQ_WORKING)
+		goto out;
+
+	inode_qmblk_lock(qmblk->dq_sb); /* protects dq_ilink_list also */
+	err = vzquota_sync_inodes(qmblk);
+	if (err)
+		goto out_unlock;
+	inode_qmblk_unlock(qmblk->dq_sb);
+
+	err = vzquota_off_qmblk(qmblk->dq_sb, qmblk);
+	if (err)
+		goto out;
+
+	/* vzquota_destroy will free resources */
+	qmblk->dq_state = VZDQ_STOPING;
+out:
+	up(&vz_quota_sem);
+
+	return err;
+
+out_unlock:
+	inode_qmblk_unlock(qmblk->dq_sb);
+	goto out;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Other VZQUOTA ioctl's.
+ * --------------------------------------------------------------------- */
+
+/*
+ * this function should:
+ * - set new limits/buffer under quota master block lock
+ * - if new softlimit less then usage, then set expiration time
+ * - no need to alloc ugid hash table - we'll do that on demand
+ */
+int vzquota_update_limit(struct dq_stat *_qstat,
+		struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+	if (vzquota_check_sane_limits(qstat))
+		goto out;
+
+	err = 0;
+
+	/* limits */
+	_qstat->bsoftlimit = qstat->bsoftlimit;
+	_qstat->bhardlimit = qstat->bhardlimit;
+	/*
+	 * If the soft limit is exceeded, administrator can override the moment
+	 * when the grace period for limit exceeding ends.
+	 * Specifying the moment may be useful if the soft limit is set to be
+	 * lower than the current usage.  In the latter case, if the grace
+	 * period end isn't specified, the grace period will start from the
+	 * moment of the first write operation.
+	 * There is a race with the user level.  Soft limit may be already
+	 * exceeded before the limit change, and grace period end calculated by
+	 * the kernel will be overriden.  User level may check if the limit is
+	 * already exceeded, but check and set calls are not atomic.
+	 * This race isn't dangerous.  Under normal cicrumstances, the
+	 * difference between the grace period end calculated by the kernel and
+	 * the user level should be not greater than as the difference between
+	 * the moments of check and set calls, i.e. not bigger than the quota
+	 * timer resolution - 1 sec.
+	 */
+	if (qstat->btime != (time_t)0 &&
+			_qstat->bcurrent >= _qstat->bsoftlimit)
+		_qstat->btime = qstat->btime;
+
+	_qstat->isoftlimit = qstat->isoftlimit;
+	_qstat->ihardlimit = qstat->ihardlimit;
+	if (qstat->itime != (time_t)0 &&
+			_qstat->icurrent >= _qstat->isoftlimit)
+		_qstat->itime = qstat->itime;
+
+out:
+	return err;
+}
+
+/*
+ * set new quota limits.
+ * this function should:
+ *  copy new limits from user level
+ *  - find quota block
+ *  - set new limits and flags.
+ */
+static int vzquota_setlimit(unsigned int quota_id,
+		struct vz_quota_stat *u_qstat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem); /* for hash list protection */
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
+		goto out;
+
+	qmblk_data_write_lock(qmblk);
+	err = vzquota_update_limit(&qmblk->dq_stat, &qstat.dq_stat);
+	if (err == 0)
+		qmblk->dq_info = qstat.dq_info;
+	qmblk_data_write_unlock(qmblk);
+
+out:
+	up(&vz_quota_sem);
+	return err;
+}
+
+/*
+ * get quota limits.
+ * very simple - just return stat buffer to user
+ */
+static int vzquota_getstat(unsigned int quota_id,
+		struct vz_quota_stat *u_qstat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	qmblk_data_read_lock(qmblk);
+	/* copy whole buffer under lock */
+	memcpy(&qstat.dq_stat, &qmblk->dq_stat, sizeof(qstat.dq_stat));
+	memcpy(&qstat.dq_info, &qmblk->dq_info, sizeof(qstat.dq_info));
+	qmblk_data_read_unlock(qmblk);
+
+	err = copy_to_user(u_qstat, &qstat, sizeof(qstat));
+	if (err)
+		err = -EFAULT;
+
+out:
+	up(&vz_quota_sem);
+	return err;
+}
+
+/*
+ * This is a system call to turn per-VE disk quota on.
+ * Note this call is allowed to run ONLY from VE0
+ */
+long do_vzquotactl(int cmd, unsigned int quota_id,
+			  struct vz_quota_stat *qstat, const char *ve_root)
+{
+	int ret;
+
+	ret = -EPERM;
+	/* access allowed only from root of VE0 */
+	if (!capable(CAP_SYS_RESOURCE) ||
+	    !capable(CAP_SYS_ADMIN))
+		goto out;
+
+	switch (cmd) {
+		case VZ_DQ_CREATE:
+			ret = vzquota_create(quota_id, qstat);
+			break;
+		case VZ_DQ_DESTROY:
+			ret = vzquota_destroy(quota_id);
+			break;
+		case VZ_DQ_ON:
+			ret = vzquota_on(quota_id, ve_root);
+			break;
+		case VZ_DQ_OFF:
+			ret = vzquota_off(quota_id);
+			break;
+		case VZ_DQ_SETLIMIT:
+			ret = vzquota_setlimit(quota_id, qstat);
+			break;
+		case VZ_DQ_GETSTAT:
+			ret = vzquota_getstat(quota_id, qstat);
+			break;
+
+		default:
+			ret = -EINVAL;
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Proc filesystem routines
+ * ---------------------------------------------------------------------*/
+
+#if defined(CONFIG_PROC_FS)
+
+#define QUOTA_UINT_LEN		15
+#define QUOTA_TIME_LEN_FMT_UINT	"%11u"
+#define QUOTA_NUM_LEN_FMT_UINT	"%15u"
+#define QUOTA_NUM_LEN_FMT_ULL	"%15Lu"
+#define QUOTA_TIME_LEN_FMT_STR	"%11s"
+#define QUOTA_NUM_LEN_FMT_STR	"%15s"
+#define QUOTA_PROC_MAX_LINE_LEN 2048
+
+/*
+ * prints /proc/ve_dq header line
+ */
+static int print_proc_header(char * buffer)
+{
+	return sprintf(buffer,
+		       "%-11s"
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_TIME_LEN_FMT_STR
+		       QUOTA_TIME_LEN_FMT_STR
+		       "\n",
+		       "qid: path", 
+		       "usage", "softlimit", "hardlimit", "time", "expire");
+}
+
+/*
+ * prints proc master record id, dentry path
+ */
+static int print_proc_master_id(char * buffer, char * path_buf,
+		struct vz_quota_master * qp)
+{
+	char *path;
+	int over;
+
+	path = NULL;
+	switch (qp->dq_state) {
+		case VZDQ_WORKING:
+			if (!path_buf) {
+				path = "";
+				break;
+			}
+			path = d_path(qp->dq_root_dentry,
+				      qp->dq_root_mnt, path_buf, PAGE_SIZE);
+			if (IS_ERR(path)) {
+				path = "";
+				break;
+			}
+			/* do not print large path, truncate it */
+			over = strlen(path) -
+				(QUOTA_PROC_MAX_LINE_LEN - 3 - 3 -
+				 	QUOTA_UINT_LEN);
+			if (over > 0) {
+				path += over - 3;
+				path[0] = path[1] = path[3] = '.';
+			}
+			break;
+		case VZDQ_STARTING:
+			path = "-- started --";
+			break;
+		case VZDQ_STOPING:
+			path = "-- stopped --";
+			break;
+	}
+
+	return sprintf(buffer, "%u: %s\n", qp->dq_id, path);
+}
+
+/*
+ * prints struct vz_quota_stat data
+ */
+static int print_proc_stat(char * buffer, struct dq_stat *qs,
+		struct dq_info *qi)
+{
+	return sprintf(buffer,
+		       "%11s"
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_TIME_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       "\n"
+		       "%11s"
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       "\n",
+		       "1k-blocks",
+		       qs->bcurrent >> 10,
+		       qs->bsoftlimit >> 10,
+		       qs->bhardlimit >> 10,
+		       (unsigned int)qs->btime,
+		       (unsigned int)qi->bexpire,
+		       "inodes",
+		       qs->icurrent,
+		       qs->isoftlimit,
+		       qs->ihardlimit,
+		       (unsigned int)qs->itime,
+		       (unsigned int)qi->iexpire);
+}
+
+
+/*
+ * for /proc filesystem output
+ */
+static int vzquota_read_proc(char *page, char **start, off_t off, int count,
+			   int *eof, void *data)
+{
+	int len, i;
+	off_t printed = 0;
+	char *p = page;
+	struct vz_quota_master *qp;
+	struct vz_quota_ilink *ql2;
+	struct list_head *listp;
+	char *path_buf;
+
+	path_buf = (char*)__get_free_page(GFP_KERNEL);
+	if (path_buf == NULL)
+		return -ENOMEM;
+
+	len = print_proc_header(p);
+	printed += len;
+	if (off < printed) /* keep header in output */ {
+		*start = p + off;
+		p += len;
+	}
+
+	down(&vz_quota_sem);
+
+	/* traverse master hash table for all records */
+	for (i = 0; i < vzquota_hash_size; i++) {
+		list_for_each(listp, &vzquota_hash_table[i]) {
+			qp = list_entry(listp,
+					struct vz_quota_master, dq_hash);
+
+			/* Skip other VE's information if not root of VE0 */
+			if ((!capable(CAP_SYS_ADMIN) ||
+			     !capable(CAP_SYS_RESOURCE))) {
+				ql2 = INODE_QLNK(current->fs->root->d_inode);
+				if (ql2 == NULL || qp != ql2->qmblk)
+					continue;
+			}
+			/*
+			 * Now print the next record
+			 */
+			len = 0;
+			/* we print quotaid and path only in VE0 */
+			if (capable(CAP_SYS_ADMIN))
+				len += print_proc_master_id(p+len,path_buf, qp);
+			len += print_proc_stat(p+len, &qp->dq_stat,
+					&qp->dq_info);
+			printed += len;
+			/* skip unnecessary lines */
+			if (printed <= off)
+				continue;
+			p += len;
+			/* provide start offset */
+			if (*start == NULL)
+				*start = p + (off - printed);
+			/* have we printed all requested size? */
+			if (PAGE_SIZE - (p - page) < QUOTA_PROC_MAX_LINE_LEN ||
+			    (p - *start) >= count)
+				goto out;
+		}
+	}
+
+	*eof = 1; /* checked all hash */
+out:
+	up(&vz_quota_sem);
+
+	len = 0;
+	if (*start != NULL) {
+		len = (p - *start);
+		if (len > count)
+			len = count;
+	}
+
+	if (path_buf)
+		free_page((unsigned long) path_buf);
+
+	return len;
+}
+
+/*
+ * Register procfs read callback
+ */
+int vzquota_proc_init(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_entry("vz/vzquota", S_IFREG|S_IRUSR, NULL);
+	if (de == NULL) {
+		/* create "vz" subdirectory, if not exist */
+		de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+		if (de == NULL)
+			goto out_err;
+		de = create_proc_entry("vzquota", S_IFREG|S_IRUSR, de);
+		if (de == NULL)
+			goto out_err;
+	}
+	de->read_proc = vzquota_read_proc;
+	de->data = NULL;
+	return 0;
+out_err:
+	return -EBUSY;
+}
+
+void vzquota_proc_release(void)
+{
+	/* Unregister procfs read callback */
+	remove_proc_entry("vz/vzquota", NULL);
+}
+
+#endif
diff -uprN linux-2.6.16/fs/vzdq_ops.c linux-2.6.16.ovz/fs/vzdq_ops.c
--- linux-2.6.16/fs/vzdq_ops.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/fs/vzdq_ops.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,565 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <asm/semaphore.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/vzquota.h>
+
+
+/* ----------------------------------------------------------------------
+ * Quota superblock operations - helper functions.
+ * --------------------------------------------------------------------- */
+
+static inline void vzquota_incr_inodes(struct dq_stat *dqstat,
+		unsigned long number)
+{
+	dqstat->icurrent += number;
+}
+
+static inline void vzquota_incr_space(struct dq_stat *dqstat,
+		__u64 number)
+{
+	dqstat->bcurrent += number;
+}
+
+static inline void vzquota_decr_inodes(struct dq_stat *dqstat,
+		unsigned long number)
+{
+	if (dqstat->icurrent > number)
+		dqstat->icurrent -= number;
+	else
+		dqstat->icurrent = 0;
+	if (dqstat->icurrent < dqstat->isoftlimit)
+		dqstat->itime = (time_t) 0;
+}
+
+static inline void vzquota_decr_space(struct dq_stat *dqstat,
+		__u64 number)
+{
+	if (dqstat->bcurrent > number)
+		dqstat->bcurrent -= number;
+	else
+		dqstat->bcurrent = 0;
+	if (dqstat->bcurrent < dqstat->bsoftlimit)
+		dqstat->btime = (time_t) 0;
+}
+
+/*
+ * better printk() message or use /proc/vzquotamsg interface
+ * similar to /proc/kmsg
+ */
+static inline void vzquota_warn(struct dq_info *dq_info, int dq_id, int flag,
+		const char *fmt)
+{
+	if (dq_info->flags & flag) /* warning already printed for this
+				       masterblock */
+		return;
+	printk(fmt, dq_id);
+	dq_info->flags |= flag;
+}
+
+/*
+ * ignore_hardlimit -
+ *
+ * Intended to allow superuser of VE0 to overwrite hardlimits.
+ *
+ * ignore_hardlimit() has a very bad feature:
+ *
+ *	writepage() operation for writable mapping of a file with holes
+ *	may trigger get_block() with wrong current and as a consequence,
+ *	opens a possibility to overcommit hardlimits
+ */
+/* for the reason above, it is disabled now */
+static inline int ignore_hardlimit(struct dq_info *dqstat)
+{
+#if 0
+	return	ve_is_super(get_exec_env()) &&
+		capable(CAP_SYS_RESOURCE) &&
+		(dqstat->options & VZ_QUOTA_OPT_RSQUASH);
+#else
+	return 0;
+#endif
+}
+
+static int vzquota_check_inodes(struct dq_info *dq_info,
+		struct dq_stat *dqstat,
+		unsigned long number, int dq_id)
+{
+	if (number == 0)
+		return QUOTA_OK;
+
+	if (dqstat->icurrent + number > dqstat->ihardlimit &&
+	    !ignore_hardlimit(dq_info)) {
+		vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
+			   "VZ QUOTA: file hardlimit reached for id=%d\n");
+		return NO_QUOTA;
+	}
+
+	if (dqstat->icurrent + number > dqstat->isoftlimit) {
+		if (dqstat->itime == (time_t)0) {
+			vzquota_warn(dq_info, dq_id, 0,
+				"VZ QUOTA: file softlimit exceeded "
+				"for id=%d\n");
+			dqstat->itime = CURRENT_TIME_SECONDS +
+				dq_info->iexpire;
+		} else if (CURRENT_TIME_SECONDS >= dqstat->itime &&
+			   !ignore_hardlimit(dq_info)) {
+			vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
+				"VZ QUOTA: file softlimit expired "
+				"for id=%d\n");
+			return NO_QUOTA;
+		}
+	}
+
+	return QUOTA_OK;
+}
+
+static int vzquota_check_space(struct dq_info *dq_info,
+		struct dq_stat *dqstat,
+		__u64 number, int dq_id, char prealloc)
+{
+	if (number == 0)
+		return QUOTA_OK;
+
+	if (dqstat->bcurrent + number > dqstat->bhardlimit &&
+	    !ignore_hardlimit(dq_info)) {
+		if (!prealloc)
+			vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
+				"VZ QUOTA: disk hardlimit reached "
+				"for id=%d\n");
+		return NO_QUOTA;
+	}
+
+	if (dqstat->bcurrent + number > dqstat->bsoftlimit) {
+		if (dqstat->btime == (time_t)0) {
+			if (!prealloc) {
+				vzquota_warn(dq_info, dq_id, 0,
+					"VZ QUOTA: disk softlimit exceeded "
+					"for id=%d\n");
+				dqstat->btime = CURRENT_TIME_SECONDS
+							+ dq_info->bexpire;
+			} else {
+				/*
+				 * Original Linux quota doesn't allow
+				 * preallocation to exceed softlimit so
+				 * exceeding will be always printed
+				 */
+				return NO_QUOTA;
+			}
+		} else if (CURRENT_TIME_SECONDS >= dqstat->btime &&
+			   !ignore_hardlimit(dq_info)) {
+			if (!prealloc)
+				vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
+					"VZ QUOTA: disk quota "
+					"softlimit expired "
+					"for id=%d\n");
+			return NO_QUOTA;
+		}
+	}
+
+	return QUOTA_OK;
+}
+
+static int vzquota_check_ugid_inodes(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		int type, unsigned long number)
+{
+	struct dq_info *dqinfo;
+	struct dq_stat *dqstat;
+
+	if (qugid[type] == NULL)
+		return QUOTA_OK;
+	if (qugid[type] == VZ_QUOTA_UGBAD)
+		return NO_QUOTA;
+
+	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
+		return QUOTA_OK;
+	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
+		return QUOTA_OK;
+	if (number == 0)
+		return QUOTA_OK;
+
+	dqinfo = &qmblk->dq_ugid_info[type];
+	dqstat = &qugid[type]->qugid_stat;
+
+	if (dqstat->ihardlimit != 0 &&
+	    dqstat->icurrent + number > dqstat->ihardlimit)
+		return NO_QUOTA;
+
+	if (dqstat->isoftlimit != 0 &&
+	    dqstat->icurrent + number > dqstat->isoftlimit) {
+		if (dqstat->itime == (time_t)0)
+			dqstat->itime = CURRENT_TIME_SECONDS +
+				dqinfo->iexpire;
+		else if (CURRENT_TIME_SECONDS >= dqstat->itime)
+			return NO_QUOTA;
+	}
+
+	return QUOTA_OK;
+}
+
+static int vzquota_check_ugid_space(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		int type, __u64 number, char prealloc)
+{
+	struct dq_info *dqinfo;
+	struct dq_stat *dqstat;
+	
+	if (qugid[type] == NULL)
+		return QUOTA_OK;
+	if (qugid[type] == VZ_QUOTA_UGBAD)
+		return NO_QUOTA;
+
+	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
+		return QUOTA_OK;
+	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
+		return QUOTA_OK;
+	if (number == 0)
+		return QUOTA_OK;
+
+	dqinfo = &qmblk->dq_ugid_info[type];
+	dqstat = &qugid[type]->qugid_stat;
+
+	if (dqstat->bhardlimit != 0 &&
+	    dqstat->bcurrent + number > dqstat->bhardlimit)
+		return NO_QUOTA;
+
+	if (dqstat->bsoftlimit != 0 &&
+	    dqstat->bcurrent + number > dqstat->bsoftlimit) {
+		if (dqstat->btime == (time_t)0) {
+			if (!prealloc)
+				dqstat->btime = CURRENT_TIME_SECONDS
+							+ dqinfo->bexpire;
+			else
+				/*
+				 * Original Linux quota doesn't allow
+				 * preallocation to exceed softlimit so
+				 * exceeding will be always printed
+				 */
+				return NO_QUOTA;
+		} else if (CURRENT_TIME_SECONDS >= dqstat->btime)
+			return NO_QUOTA;
+	}
+
+	return QUOTA_OK;
+}
+
+/* ----------------------------------------------------------------------
+ * Quota superblock operations
+ * --------------------------------------------------------------------- */
+
+/*
+ * S_NOQUOTA note.
+ * In the current kernel (2.6.8.1), S_NOQUOTA flag is set only for
+ *  - quota file (absent in our case)
+ *  - after explicit DQUOT_DROP (earlier than clear_inode) in functions like
+ *    filesystem-specific new_inode, before the inode gets outside links.
+ * For the latter case, the only quota operation where care about S_NOQUOTA
+ * might be required is vzquota_drop, but there S_NOQUOTA has already been
+ * checked in DQUOT_DROP().
+ * So, S_NOQUOTA may be ignored for now in the VZDQ code.
+ *
+ * The above note is not entirely correct.
+ * Both for ext2 and ext3 filesystems, DQUOT_FREE_INODE is called from
+ * delete_inode if new_inode fails (for example, because of inode quota
+ * limits), so S_NOQUOTA check is needed in free_inode.
+ * This seems to be the dark corner of the current quota API.
+ */
+
+/*
+ * Initialize quota operations for the specified inode.
+ */
+static int vzquota_initialize(struct inode *inode, int type)
+{
+	vzquota_inode_init_call(inode);
+	return 0; /* ignored by caller */
+}
+
+/*
+ * Release quota for the specified inode.
+ */
+static int vzquota_drop(struct inode *inode)
+{
+	vzquota_inode_drop_call(inode);
+	return 0; /* ignored by caller */
+}
+
+/*
+ * Allocate block callback.
+ *
+ * If (prealloc) disk quota exceeding warning is not printed.
+ * See Linux quota to know why.
+ *
+ * Return:
+ *	QUOTA_OK == 0 on SUCCESS
+ *	NO_QUOTA == 1 if allocation should fail
+ */
+static int vzquota_alloc_space(struct inode *inode,
+			     qsize_t number, int prealloc)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	int ret = QUOTA_OK;
+
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid[MAXQUOTAS];
+#endif
+
+		/* checking first */
+		ret = vzquota_check_space(&qmblk->dq_info, &qmblk->dq_stat,
+				number, qmblk->dq_id, prealloc);
+		if (ret == NO_QUOTA)
+			goto no_quota;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
+			ret = vzquota_check_ugid_space(qmblk, qugid,
+					cnt, number, prealloc);
+			if (ret == NO_QUOTA)
+				goto no_quota;
+		}
+		/* check ok, may increment */
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			if (qugid[cnt] == NULL)
+				continue;
+			vzquota_incr_space(&qugid[cnt]->qugid_stat, number);
+		}
+#endif
+		vzquota_incr_space(&qmblk->dq_stat, number);
+		vzquota_data_unlock(inode, &data);
+	}
+
+	inode_add_bytes(inode, number);
+	might_sleep();
+	return QUOTA_OK;
+
+no_quota:
+	vzquota_data_unlock(inode, &data);
+	return NO_QUOTA;
+}
+
+/*
+ * Allocate inodes callback.
+ *
+ * Return:
+ *	QUOTA_OK == 0 on SUCCESS
+ *	NO_QUOTA == 1 if allocation should fail
+ */
+static int vzquota_alloc_inode(const struct inode *inode, unsigned long number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	int ret = QUOTA_OK;
+
+	qmblk = vzquota_inode_data((struct inode *)inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid *qugid[MAXQUOTAS];
+#endif
+
+		/* checking first */
+		ret = vzquota_check_inodes(&qmblk->dq_info, &qmblk->dq_stat,
+				number, qmblk->dq_id);
+		if (ret == NO_QUOTA)
+			goto no_quota;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
+			ret = vzquota_check_ugid_inodes(qmblk, qugid,
+					cnt, number);
+			if (ret == NO_QUOTA)
+				goto no_quota;
+		}
+		/* check ok, may increment */
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			if (qugid[cnt] == NULL)
+				continue;
+			vzquota_incr_inodes(&qugid[cnt]->qugid_stat, number);
+		}
+#endif
+		vzquota_incr_inodes(&qmblk->dq_stat, number);
+		vzquota_data_unlock((struct inode *)inode, &data);
+	}
+
+	might_sleep();
+	return QUOTA_OK;
+
+no_quota:
+	vzquota_data_unlock((struct inode *)inode, &data);
+	return NO_QUOTA;
+}
+
+/*
+ * Free space callback.
+ */
+static int vzquota_free_space(struct inode *inode, qsize_t number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA; /* isn't checked by the caller */
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid;
+#endif
+
+		vzquota_decr_space(&qmblk->dq_stat, number);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid = INODE_QLNK(inode)->qugid[cnt];
+			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
+				continue;
+			vzquota_decr_space(&qugid->qugid_stat, number);
+		}
+#endif
+		vzquota_data_unlock(inode, &data);
+	}
+	inode_sub_bytes(inode, number);
+	might_sleep();
+	return QUOTA_OK;
+}
+
+/*
+ * Free inodes callback.
+ */
+static int vzquota_free_inode(const struct inode *inode, unsigned long number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	if (IS_NOQUOTA(inode))
+		return QUOTA_OK;
+
+	qmblk = vzquota_inode_data((struct inode *)inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid;
+#endif
+
+		vzquota_decr_inodes(&qmblk->dq_stat, number);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid = INODE_QLNK(inode)->qugid[cnt];
+			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
+				continue;
+			vzquota_decr_inodes(&qugid->qugid_stat, number);
+		}
+#endif
+		vzquota_data_unlock((struct inode *)inode, &data);
+	}
+	might_sleep();
+	return QUOTA_OK;
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+
+/*
+ * helper function for quota_transfer
+ * check that we can add inode to this quota_id
+ */
+static int vzquota_transfer_check(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		unsigned int type, __u64 size)
+{
+	if (vzquota_check_ugid_space(qmblk, qugid, type, size, 0) != QUOTA_OK ||
+	    vzquota_check_ugid_inodes(qmblk, qugid, type, 1) != QUOTA_OK)
+		return -1;
+	return 0;
+}
+
+int vzquota_transfer_usage(struct inode *inode,
+		int mask,
+		struct vz_quota_ilink *qlnk)
+{
+	struct vz_quota_ugid *qugid_old;
+	__u64 space;
+	int i;
+
+	space = inode_get_bytes(inode);
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (!(mask & (1 << i)))
+			continue;
+		if (vzquota_transfer_check(qlnk->qmblk, qlnk->qugid, i, space))
+			return -1;
+	}
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (!(mask & (1 << i)))
+			continue;
+		qugid_old = INODE_QLNK(inode)->qugid[i];
+		vzquota_decr_space(&qugid_old->qugid_stat, space);
+		vzquota_decr_inodes(&qugid_old->qugid_stat, 1);
+		vzquota_incr_space(&qlnk->qugid[i]->qugid_stat, space);
+		vzquota_incr_inodes(&qlnk->qugid[i]->qugid_stat, 1);
+	}
+	return 0;
+}
+
+/*
+ * Transfer the inode between diffent user/group quotas.
+ */
+static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
+{
+	return vzquota_inode_transfer_call(inode, iattr) ?
+		NO_QUOTA : QUOTA_OK;
+}
+
+#else /* CONFIG_VZ_QUOTA_UGID */
+
+static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
+{
+	return QUOTA_OK;
+}
+
+#endif
+
+/*
+ * Called under following semaphores:
+ *	old_d->d_inode->i_sb->s_vfs_rename_sem
+ *	old_d->d_inode->i_sem
+ *	new_d->d_inode->i_sem
+ * [not verified  --SAW]
+ */
+static int vzquota_rename(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	return vzquota_rename_check(inode, old_dir, new_dir) ?
+		NO_QUOTA : QUOTA_OK;
+}
+
+/*
+ * Structure of superblock diskquota operations.
+ */
+struct dquot_operations vz_quota_operations = {
+	initialize:	vzquota_initialize,
+	drop:		vzquota_drop,
+	alloc_space:	vzquota_alloc_space,
+	alloc_inode:	vzquota_alloc_inode,
+	free_space:	vzquota_free_space,
+	free_inode:	vzquota_free_inode,
+	transfer:	vzquota_transfer,
+	rename:		vzquota_rename
+};
diff -uprN linux-2.6.16/fs/vzdq_tree.c linux-2.6.16.ovz/fs/vzdq_tree.c
--- linux-2.6.16/fs/vzdq_tree.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/fs/vzdq_tree.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,286 @@
+/*
+ *
+ * Copyright (C) 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo quota tree implementation
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/vzdq_tree.h>
+
+struct quotatree_tree *quotatree_alloc(void)
+{
+	int l;
+	struct quotatree_tree *tree;
+
+	tree = kmalloc(sizeof(struct quotatree_tree), GFP_KERNEL);
+	if (tree == NULL)
+		goto out;
+
+	for (l = 0; l < QUOTATREE_DEPTH; l++) {
+		INIT_LIST_HEAD(&tree->levels[l].usedlh);
+		INIT_LIST_HEAD(&tree->levels[l].freelh);
+		tree->levels[l].freenum = 0;
+	}
+	tree->root = NULL;
+	tree->leaf_num = 0;
+out:
+	return tree;
+}
+
+static struct quotatree_node *
+quotatree_follow(struct quotatree_tree *tree, quotaid_t id, int level,
+		struct quotatree_find_state *st)
+{
+	void **block;
+	struct quotatree_node *parent;
+	int l, index;
+
+	parent = NULL;
+	block = (void **)&tree->root;
+	l = 0;
+	while (l < level && *block != NULL) {
+		index = (id >>  QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
+		parent = *block;
+		block = parent->blocks + index;
+		l++;
+	}
+	if (st != NULL) {
+		st->block = block;
+		st->level = l;
+	}
+
+	return parent;
+}
+
+void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st)
+{
+	quotatree_follow(tree, id, QUOTATREE_DEPTH, st);
+	if (st->level == QUOTATREE_DEPTH)
+		return *st->block;
+	else
+		return NULL;
+}
+
+void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index)
+{
+	int i, count;
+	struct quotatree_node *p;
+	void *leaf;
+
+	if (QTREE_LEAFNUM(tree) <= index)
+		return NULL;
+
+	count = 0;
+	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {	
+			leaf = p->blocks[i];
+			if (leaf == NULL)
+				continue;
+			if (count == index)
+				return leaf;
+			count++;
+		}
+	}
+	return NULL;
+}
+
+/* returns data leaf (vz_quota_ugid) after _existent_ ugid (@id)
+ * in the tree... */
+void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id)
+{
+	int off;
+	struct quotatree_node *parent, *p;
+	struct list_head *lh;
+
+	/* get parent refering correct quota tree node of the last level */
+	parent = quotatree_follow(tree, id, QUOTATREE_DEPTH, NULL);
+	if (!parent)
+		return NULL;
+
+	off = (id & QUOTATREE_BMASK) + 1;	/* next ugid */
+	lh = &parent->list;
+	do {
+		p = list_entry(lh, struct quotatree_node, list);
+		for ( ; off < QUOTATREE_BSIZE; off++)
+			if (p->blocks[off])
+				return p->blocks[off];
+		off = 0;
+		lh = lh->next;
+	} while (lh != &QTREE_LEAFLVL(tree)->usedlh);
+
+	return NULL;
+}
+
+int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st, void *data)
+{
+	struct quotatree_node *p;
+	int l, index;
+
+	while (st->level < QUOTATREE_DEPTH) {
+		l = st->level;
+		if (!list_empty(&tree->levels[l].freelh)) {
+			p = list_entry(tree->levels[l].freelh.next,
+					struct quotatree_node, list);
+			list_del(&p->list);
+		} else {
+			p = kmalloc(sizeof(struct quotatree_node), GFP_NOFS | __GFP_NOFAIL);
+			if (p == NULL)
+				return -ENOMEM;
+			/* save block number in the l-level
+			 * it uses for quota file generation */
+			p->num = tree->levels[l].freenum++;
+		}
+		list_add(&p->list, &tree->levels[l].usedlh);
+		memset(p->blocks, 0, sizeof(p->blocks));
+		*st->block = p;
+
+		index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
+		st->block = p->blocks + index;
+		st->level++;
+	}
+	tree->leaf_num++;
+	*st->block = data;
+
+	return 0;
+}
+
+static struct quotatree_node *
+quotatree_remove_ptr(struct quotatree_tree *tree, quotaid_t id,
+		int level)
+{
+	struct quotatree_node *parent;
+	struct quotatree_find_state st;
+
+	parent = quotatree_follow(tree, id, level, &st);
+	if (st.level == QUOTATREE_DEPTH)
+		tree->leaf_num--;
+	*st.block = NULL;
+	return parent;
+}
+
+void quotatree_remove(struct quotatree_tree *tree, quotaid_t id)
+{
+	struct quotatree_node *p;
+	int level, i;
+
+	p = quotatree_remove_ptr(tree, id, QUOTATREE_DEPTH);
+	for (level = QUOTATREE_DEPTH - 1; level >= QUOTATREE_CDEPTH; level--) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++)
+			if (p->blocks[i] != NULL)
+				return;
+		list_move(&p->list, &tree->levels[level].freelh);
+		p = quotatree_remove_ptr(tree, id, level);
+	}
+}
+
+#if 0
+static void quotatree_walk(struct quotatree_tree *tree,
+		struct quotatree_node *node_start,
+		quotaid_t id_start,
+		int level_start, int level_end,
+		int (*callback)(struct quotatree_tree *,
+				quotaid_t id,
+				int level,
+				void *ptr,
+				void *data),
+		void *data)
+{
+	struct quotatree_node *p;
+	int l, shift, index;
+	quotaid_t id;
+	struct quotatree_find_state st;
+
+	p = node_start;
+	l = level_start;
+	shift = (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
+	id = id_start;
+	index = 0;
+
+	/*
+	 * Invariants:
+	 * shift == (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
+	 * id & ((1 << shift) - 1) == 0
+	 * p is l-level node corresponding to id
+	 */
+	do {
+		if (!p)
+			break;
+
+		if (l < level_end) {
+			for (; index < QUOTATREE_BSIZE; index++)
+				if (p->blocks[index] != NULL)
+					break;
+			if (index < QUOTATREE_BSIZE) {
+				/* descend */
+				p = p->blocks[index];
+				l++;
+				shift -= QUOTAID_BBITS;
+				id += (quotaid_t)index << shift;
+				index = 0;
+				continue;
+			}
+		}
+
+		if ((*callback)(tree, id, l, p, data))
+			break;
+
+		/* ascend and to the next node */
+		p = quotatree_follow(tree, id, l, &st);
+
+		index = ((id >> shift) & QUOTATREE_BMASK) + 1;
+		l--;
+		shift += QUOTAID_BBITS;
+		id &= ~(((quotaid_t)1 << shift) - 1);
+	} while (l >= level_start);
+}
+#endif
+
+static void free_list(struct list_head *node_list)
+{
+	struct quotatree_node *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, node_list, list) {
+		list_del(&p->list);
+		kfree(p);
+	}
+}
+
+static inline void quotatree_free_nodes(struct quotatree_tree *tree)
+{
+	int i;
+
+	for (i = 0; i < QUOTATREE_DEPTH; i++) {
+		free_list(&tree->levels[i].usedlh);
+		free_list(&tree->levels[i].freelh);
+	}
+}
+
+static void quotatree_free_leafs(struct quotatree_tree *tree,
+		void (*dtor)(void *))
+{
+	int i;
+	struct quotatree_node *p;
+
+	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {
+			if (p->blocks[i] == NULL)
+				continue;
+
+			dtor(p->blocks[i]);
+		}
+	}
+}
+
+void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *))
+{
+	quotatree_free_leafs(tree, dtor);
+	quotatree_free_nodes(tree);
+	kfree(tree);
+}
diff -uprN linux-2.6.16/fs/vzdq_ugid.c linux-2.6.16.ovz/fs/vzdq_ugid.c
--- linux-2.6.16/fs/vzdq_ugid.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/fs/vzdq_ugid.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (C) 2002 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo UID/GID disk quota implementation
+ */
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/smp_lock.h>
+#include <linux/rcupdate.h>
+#include <asm/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/quota.h>
+#include <linux/quotaio_v2.h>
+#include <linux/virtinfo.h>
+
+#include <linux/vzctl.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+
+/*
+ * XXX
+ * may be something is needed for sb->s_dquot->info[]?
+ */
+
+#define USRQUOTA_MASK		(1 << USRQUOTA)
+#define GRPQUOTA_MASK		(1 << GRPQUOTA)
+#define QTYPE2MASK(type)	(1 << (type))
+
+static kmem_cache_t *vz_quota_ugid_cachep;
+
+/* guard to protect vz_quota_master from destroy in quota_on/off. Also protects
+ * list on the hash table */
+extern struct semaphore vz_quota_sem;
+
+inline struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid)
+{
+	if (qugid != VZ_QUOTA_UGBAD)
+		atomic_inc(&qugid->qugid_count);
+	return qugid;
+}
+
+/* we don't limit users with zero limits */
+static inline int vzquota_fake_stat(struct dq_stat *stat)
+{
+	return stat->bhardlimit == 0 && stat->bsoftlimit == 0 &&
+		stat->ihardlimit == 0 && stat->isoftlimit == 0;
+}
+
+/* callback function for quotatree_free() */
+static inline void vzquota_free_qugid(void *ptr)
+{
+	kmem_cache_free(vz_quota_ugid_cachep, ptr);
+}
+
+/*
+ * destroy ugid, if it have zero refcount, limits and usage
+ * must be called under qmblk->dq_sem
+ */
+void vzquota_put_ugid(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid)
+{
+	if (qugid == VZ_QUOTA_UGBAD)
+		return;
+	qmblk_data_read_lock(qmblk);
+	if (atomic_dec_and_test(&qugid->qugid_count) &&
+	    (qmblk->dq_flags & VZDQUG_FIXED_SET) == 0 &&
+	    vzquota_fake_stat(&qugid->qugid_stat) &&
+	    qugid->qugid_stat.bcurrent == 0 &&
+	    qugid->qugid_stat.icurrent == 0) {
+		quotatree_remove(QUGID_TREE(qmblk, qugid->qugid_type),
+				qugid->qugid_id);
+		qmblk->dq_ugid_count--;
+		vzquota_free_qugid(qugid);
+	}
+	qmblk_data_read_unlock(qmblk);
+}
+
+/*
+ * Get ugid block by its index, like it would present in array.
+ * In reality, this is not array - this is leafs chain of the tree.
+ * NULL if index is out of range.
+ * qmblk semaphore is required to protect the tree.
+ */
+static inline struct vz_quota_ugid *
+vzquota_get_byindex(struct vz_quota_master *qmblk, unsigned int index, int type)
+{
+	return quotatree_leaf_byindex(QUGID_TREE(qmblk, type), index);
+}
+
+/*
+ * get next element from ugid "virtual array"
+ * ugid must be in current array and this array may not be changed between
+ * two accesses (quaranteed by "stopped" quota state and quota semaphore)
+ * qmblk semaphore is required to protect the tree
+ */
+static inline struct vz_quota_ugid *
+vzquota_get_next(struct vz_quota_master *qmblk, struct vz_quota_ugid *qugid)
+{
+	return quotatree_get_next(QUGID_TREE(qmblk, qugid->qugid_type),
+			qugid->qugid_id);
+}
+
+/*
+ * requires dq_sem
+ */
+struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
+			unsigned int quota_id, int type, int flags)
+{
+	struct vz_quota_ugid *qugid;
+	struct quotatree_tree *tree;
+	struct quotatree_find_state st;
+
+	tree = QUGID_TREE(qmblk, type);
+	qugid = quotatree_find(tree, quota_id, &st);
+	if (qugid)
+		goto success;
+
+	/* caller does not want alloc */
+	if (flags & VZDQUG_FIND_DONT_ALLOC)
+		goto fail;
+
+	if (flags & VZDQUG_FIND_FAKE)
+		goto doit;
+
+	/* check limit */
+	if (qmblk->dq_ugid_count >= qmblk->dq_ugid_max)
+		goto fail;
+
+	/* see comment at VZDQUG_FIXED_SET define */
+	if (qmblk->dq_flags & VZDQUG_FIXED_SET)
+		goto fail;
+
+doit:
+	/* alloc new structure */
+	qugid = kmem_cache_alloc(vz_quota_ugid_cachep,
+			SLAB_NOFS | __GFP_NOFAIL);
+	if (qugid == NULL)
+		goto fail;
+
+	/* initialize new structure */
+	qugid->qugid_id = quota_id;
+	memset(&qugid->qugid_stat, 0, sizeof(qugid->qugid_stat));
+	qugid->qugid_type = type;
+	atomic_set(&qugid->qugid_count, 0);
+
+	/* insert in tree */
+	if (quotatree_insert(tree, quota_id, &st, qugid) < 0)
+		goto fail_insert;
+	qmblk->dq_ugid_count++;
+
+success:
+	vzquota_get_ugid(qugid);
+	return qugid;
+
+fail_insert:
+	vzquota_free_qugid(qugid);
+fail:
+	return VZ_QUOTA_UGBAD;
+}
+
+/*
+ * takes dq_sem, may schedule
+ */
+struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
+			unsigned int quota_id, int type, int flags)
+{
+	struct vz_quota_ugid *qugid;
+
+	down(&qmblk->dq_sem);
+	qugid = __vzquota_find_ugid(qmblk, quota_id, type, flags);
+	up(&qmblk->dq_sem);
+
+	return qugid;
+}
+
+/*
+ * destroy all ugid records on given quota master
+ */
+void vzquota_kill_ugid(struct vz_quota_master *qmblk)
+{
+	BUG_ON((qmblk->dq_gid_tree == NULL && qmblk->dq_uid_tree != NULL) ||
+		(qmblk->dq_uid_tree == NULL && qmblk->dq_gid_tree != NULL));
+
+	if (qmblk->dq_uid_tree != NULL) {
+		quotatree_free(qmblk->dq_uid_tree, vzquota_free_qugid);
+		quotatree_free(qmblk->dq_gid_tree, vzquota_free_qugid);
+	}
+}
+
+
+/* ----------------------------------------------------------------------
+ * Management interface to ugid quota for (super)users.
+ * --------------------------------------------------------------------- */
+
+/**
+ * vzquota_find_qmblk - helper to emulate quota on virtual filesystems
+ *
+ * This function finds a quota master block corresponding to the root of
+ * a virtual filesystem.
+ * Returns a quota master block with reference taken, or %NULL if not under
+ * quota, or %VZ_QUOTA_BAD if quota inconsistency is found (and all allocation
+ * operations will fail).
+ *
+ * Note: this function uses vzquota_inode_qmblk().
+ * The latter is a rather confusing function: it returns qmblk that used to be
+ * on the inode some time ago (without guarantee that it still has any
+ * relations to the inode).  So, vzquota_find_qmblk() leaves it up to the
+ * caller to think whether the inode could have changed its qmblk and what to
+ * do in that case.
+ * Currently, the callers appear to not care :(
+ */
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *sb)
+{
+	struct inode *qrinode;
+	struct vz_quota_master *qmblk;
+
+	qmblk = NULL;
+	qrinode = NULL;
+	if (sb->s_op->get_quota_root != NULL)
+		qrinode = sb->s_op->get_quota_root(sb);
+	if (qrinode != NULL)
+		qmblk = vzquota_inode_qmblk(qrinode);
+	return qmblk;
+}
+
+static int vzquota_initialize2(struct inode *inode, int type)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_drop2(struct inode *inode)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_alloc_space2(struct inode *inode,
+			     qsize_t number, int prealloc)
+{
+	inode_add_bytes(inode, number);
+	return QUOTA_OK;
+}
+
+static int vzquota_alloc_inode2(const struct inode *inode, unsigned long number)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_free_space2(struct inode *inode, qsize_t number)
+{
+	inode_sub_bytes(inode, number);
+	return QUOTA_OK;
+}
+
+static int vzquota_free_inode2(const struct inode *inode, unsigned long number)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_transfer2(struct inode *inode, struct iattr *iattr)
+{
+	return QUOTA_OK;
+}
+
+struct dquot_operations vz_quota_operations2 = {
+	initialize:	vzquota_initialize2,
+	drop:		vzquota_drop2,
+	alloc_space:	vzquota_alloc_space2,
+	alloc_inode:	vzquota_alloc_inode2,
+	free_space:	vzquota_free_space2,
+	free_inode:	vzquota_free_inode2,
+	transfer:	vzquota_transfer2
+};
+
+static int vz_quota_on(struct super_block *sb, int type,
+		int format_id, char *path)
+{
+	struct vz_quota_master *qmblk;
+	int mask, mask2;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	mask = 0;
+	mask2 = 0;
+	sb->dq_op = &vz_quota_operations2;
+	sb->s_qcop = &vz_quotactl_operations;
+	if (type == USRQUOTA) {
+		mask = DQUOT_USR_ENABLED;
+		mask2 = VZDQ_USRQUOTA;
+	}
+	if (type == GRPQUOTA) {
+		mask = DQUOT_GRP_ENABLED;
+		mask2 = VZDQ_GRPQUOTA;
+	}
+	err = -EBUSY;
+	if (qmblk->dq_flags & mask2)
+		goto out;
+
+	err = 0;
+	qmblk->dq_flags |= mask2;
+	sb->s_dquot.flags |= mask;
+
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+static int vz_quota_off(struct super_block *sb, int type)
+{
+	struct vz_quota_master *qmblk;
+	int mask2;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	mask2 = 0;
+	if (type == USRQUOTA)
+		mask2 = VZDQ_USRQUOTA;
+	if (type == GRPQUOTA)
+		mask2 = VZDQ_GRPQUOTA;
+	err = -EINVAL;
+	if (!(qmblk->dq_flags & mask2))
+		goto out;
+
+	qmblk->dq_flags &= ~mask2;
+	err = 0;
+
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+static int vz_quota_sync(struct super_block *sb, int type)
+{
+	return 0;	/* vz quota is always uptodate */
+}
+
+static int vz_get_dqblk(struct super_block *sb, int type,
+		qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid *ugid;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = 0;
+	ugid = vzquota_find_ugid(qmblk, id, type, VZDQUG_FIND_DONT_ALLOC);
+	if (ugid != VZ_QUOTA_UGBAD) {
+		qmblk_data_read_lock(qmblk);
+		di->dqb_bhardlimit = ugid->qugid_stat.bhardlimit >> 10;
+		di->dqb_bsoftlimit = ugid->qugid_stat.bsoftlimit >> 10;
+		di->dqb_curspace = ugid->qugid_stat.bcurrent;
+		di->dqb_ihardlimit = ugid->qugid_stat.ihardlimit;
+		di->dqb_isoftlimit = ugid->qugid_stat.isoftlimit;
+		di->dqb_curinodes = ugid->qugid_stat.icurrent;
+		di->dqb_btime = ugid->qugid_stat.btime;
+		di->dqb_itime = ugid->qugid_stat.itime;
+		qmblk_data_read_unlock(qmblk);
+		di->dqb_valid = QIF_ALL;
+		vzquota_put_ugid(qmblk, ugid);
+	} else {
+		memset(di, 0, sizeof(*di));
+		di->dqb_valid = QIF_ALL;
+	}
+
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+/* must be called under vz_quota_sem */
+static int __vz_set_dqblk(struct vz_quota_master *qmblk,
+		int type, qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_ugid *ugid;
+
+	ugid = vzquota_find_ugid(qmblk, id, type, 0);
+	if (ugid == VZ_QUOTA_UGBAD)
+		return -ESRCH;
+
+	qmblk_data_write_lock(qmblk);
+	/*
+	 * Subtle compatibility breakage.
+	 *
+	 * Some old non-vz kernel quota didn't start grace period
+	 * if the new soft limit happens to be below the usage.
+	 * Non-vz kernel quota in 2.4.20 starts the grace period
+	 * (if it hasn't been started).
+	 * Current non-vz kernel performs even more complicated
+	 * manipulations...
+	 *
+	 * Also, current non-vz kernels have inconsistency related to 
+	 * the grace time start.  In regular operations the grace period
+	 * is started if the usage is greater than the soft limit (and,
+	 * strangely, is cancelled if the usage is less).
+	 * However, set_dqblk starts the grace period if the usage is greater
+	 * or equal to the soft limit.
+	 *
+	 * Here we try to mimic the behavior of the current non-vz kernel.
+	 */
+	if (di->dqb_valid & QIF_BLIMITS) {
+		ugid->qugid_stat.bhardlimit =
+			(__u64)di->dqb_bhardlimit << 10;
+		ugid->qugid_stat.bsoftlimit =
+			(__u64)di->dqb_bsoftlimit << 10;
+		if (di->dqb_bsoftlimit == 0 ||
+		    ugid->qugid_stat.bcurrent < ugid->qugid_stat.bsoftlimit)
+			ugid->qugid_stat.btime = 0;
+		else if (!(di->dqb_valid & QIF_BTIME))
+			ugid->qugid_stat.btime = CURRENT_TIME_SECONDS
+				+ qmblk->dq_ugid_info[type].bexpire;
+		else
+			ugid->qugid_stat.btime = di->dqb_btime;
+	}
+	if (di->dqb_valid & QIF_ILIMITS) {
+		ugid->qugid_stat.ihardlimit = di->dqb_ihardlimit;
+		ugid->qugid_stat.isoftlimit = di->dqb_isoftlimit;
+		if (di->dqb_isoftlimit == 0 ||
+		    ugid->qugid_stat.icurrent < ugid->qugid_stat.isoftlimit)
+			ugid->qugid_stat.itime = 0;
+		else if (!(di->dqb_valid & QIF_ITIME))
+			ugid->qugid_stat.itime = CURRENT_TIME_SECONDS
+				+ qmblk->dq_ugid_info[type].iexpire;
+		else
+			ugid->qugid_stat.itime = di->dqb_itime;
+	}
+	qmblk_data_write_unlock(qmblk);
+	vzquota_put_ugid(qmblk, ugid);
+
+	return 0;
+}
+
+static int vz_set_dqblk(struct super_block *sb, int type,
+		qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+	err = __vz_set_dqblk(qmblk, type, id, di);
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+static int vz_get_dqinfo(struct super_block *sb, int type,
+		struct if_dqinfo *ii)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = 0;
+	ii->dqi_bgrace = qmblk->dq_ugid_info[type].bexpire;
+	ii->dqi_igrace = qmblk->dq_ugid_info[type].iexpire;
+	ii->dqi_flags = 0;
+	ii->dqi_valid = IIF_ALL;
+
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+/* must be called under vz_quota_sem */
+static int __vz_set_dqinfo(struct vz_quota_master *qmblk,
+		int type, struct if_dqinfo *ii)
+{
+	if (ii->dqi_valid & IIF_FLAGS)
+		if (ii->dqi_flags & DQF_MASK)
+			return -EINVAL;
+
+	if (ii->dqi_valid & IIF_BGRACE)
+		qmblk->dq_ugid_info[type].bexpire = ii->dqi_bgrace;
+	if (ii->dqi_valid & IIF_IGRACE)
+		qmblk->dq_ugid_info[type].iexpire = ii->dqi_igrace;
+	return 0;
+}
+
+static int vz_set_dqinfo(struct super_block *sb, int type,
+		struct if_dqinfo *ii)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+	err = __vz_set_dqinfo(qmblk, type, ii);
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+#ifdef CONFIG_QUOTA_COMPAT
+
+#define Q_GETQUOTI_SIZE 1024
+
+#define UGID2DQBLK(dst, src)						\
+	do {								\
+		(dst)->dqb_ihardlimit = (src)->qugid_stat.ihardlimit;	\
+		(dst)->dqb_isoftlimit = (src)->qugid_stat.isoftlimit;	\
+		(dst)->dqb_curinodes = (src)->qugid_stat.icurrent;	\
+		/* in 1K blocks */					\
+		(dst)->dqb_bhardlimit = (src)->qugid_stat.bhardlimit >> 10; \
+		/* in 1K blocks */					\
+		(dst)->dqb_bsoftlimit = (src)->qugid_stat.bsoftlimit >> 10; \
+		/* in bytes, 64 bit */					\
+		(dst)->dqb_curspace = (src)->qugid_stat.bcurrent;	\
+		(dst)->dqb_btime = (src)->qugid_stat.btime;		\
+		(dst)->dqb_itime = (src)->qugid_stat.itime;		\
+	} while (0)
+
+static int vz_get_quoti(struct super_block *sb, int type, qid_t idx,
+		struct v2_disk_dqblk *dqblk)
+{
+	struct vz_quota_master *qmblk;
+	struct v2_disk_dqblk *data, *kbuf;
+	struct vz_quota_ugid *ugid;
+	int count;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = -ENOMEM;
+	kbuf = vmalloc(Q_GETQUOTI_SIZE * sizeof(*kbuf));
+	if (!kbuf)
+		goto out;
+
+	down(&vz_quota_sem);
+	down(&qmblk->dq_sem);
+	for (ugid = vzquota_get_byindex(qmblk, idx, type), count = 0;
+		ugid != NULL && count < Q_GETQUOTI_SIZE;
+		count++)
+	{
+		data = kbuf + count;
+		qmblk_data_read_lock(qmblk);
+		UGID2DQBLK(data, ugid);
+		qmblk_data_read_unlock(qmblk);
+		data->dqb_id = ugid->qugid_id;
+
+		/* Find next entry */
+		ugid = vzquota_get_next(qmblk, ugid);
+		BUG_ON(ugid != NULL && ugid->qugid_type != type);
+	}
+	up(&qmblk->dq_sem);
+	up(&vz_quota_sem);
+
+	err = count;
+	if (copy_to_user(dqblk, kbuf, count * sizeof(*kbuf)))
+		err = -EFAULT;
+
+	vfree(kbuf);
+out:
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+
+	return err;
+}
+
+#endif
+
+struct quotactl_ops vz_quotactl_operations = {
+	quota_on:	vz_quota_on,
+	quota_off:	vz_quota_off,
+	quota_sync:	vz_quota_sync,
+	get_info:	vz_get_dqinfo,
+	set_info:	vz_set_dqinfo,
+	get_dqblk:	vz_get_dqblk,
+	set_dqblk:	vz_set_dqblk,
+#ifdef CONFIG_QUOTA_COMPAT
+	get_quoti:	vz_get_quoti
+#endif
+};
+
+
+/* ----------------------------------------------------------------------
+ * Management interface for host system admins.
+ * --------------------------------------------------------------------- */
+
+static int quota_ugid_addstat(unsigned int quota_id, unsigned int ugid_size,
+		struct vz_quota_iface *u_ugid_buf)
+{
+	struct vz_quota_master *qmblk;
+	int ret;
+
+	down(&vz_quota_sem);
+
+	ret = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	ret = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out; /* working quota doesn't accept new ugids */
+
+	ret = 0;
+	/* start to add ugids */
+	for (ret = 0; ret < ugid_size; ret++) {
+		struct vz_quota_iface ugid_buf;
+		struct vz_quota_ugid *ugid;
+
+		if (copy_from_user(&ugid_buf, u_ugid_buf, sizeof(ugid_buf)))
+			break;
+
+		if (ugid_buf.qi_type >= MAXQUOTAS)
+			break; /* bad quota type - this is the only check */
+
+		ugid = vzquota_find_ugid(qmblk,
+				ugid_buf.qi_id, ugid_buf.qi_type, 0);
+		if (ugid == VZ_QUOTA_UGBAD) {
+			qmblk->dq_flags |= VZDQUG_FIXED_SET;
+			break; /* limit reached */
+		}
+
+		/* update usage/limits 
+		 * we can copy the data without the lock, because the data
+		 * cannot be modified in VZDQ_STARTING state */
+		ugid->qugid_stat = ugid_buf.qi_stat;
+
+		vzquota_put_ugid(qmblk, ugid);
+
+		u_ugid_buf++; /* next user buffer */
+	}
+out:
+	up(&vz_quota_sem);
+
+	return ret;
+}
+
+static int quota_ugid_setgrace(unsigned int quota_id,
+		struct dq_info u_dq_info[])
+{
+	struct vz_quota_master *qmblk;
+	struct dq_info dq_info[MAXQUOTAS];
+	struct dq_info *target;
+	int err, type;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+	
+	err = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out; /* working quota doesn't accept changing options */
+
+	err = -EFAULT;
+	if (copy_from_user(dq_info, u_dq_info, sizeof(dq_info)))
+		goto out;
+
+	err = 0;
+
+	/* update in qmblk */
+	for (type = 0; type < MAXQUOTAS; type ++) {
+		target = &qmblk->dq_ugid_info[type];
+		target->bexpire = dq_info[type].bexpire;
+		target->iexpire = dq_info[type].iexpire;
+	}
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int do_quota_ugid_getstat(struct vz_quota_master *qmblk, int index, int size,
+		struct vz_quota_iface *u_ugid_buf)
+{
+	int type, count;
+	struct vz_quota_ugid *ugid;
+
+	if (QTREE_LEAFNUM(qmblk->dq_uid_tree) +
+	    QTREE_LEAFNUM(qmblk->dq_gid_tree)
+	    		<= index)
+		return 0;
+
+	count = 0;
+
+	type = index < QTREE_LEAFNUM(qmblk->dq_uid_tree) ? USRQUOTA : GRPQUOTA;
+	if (type == GRPQUOTA)
+		index -= QTREE_LEAFNUM(qmblk->dq_uid_tree);
+
+	/* loop through ugid and then qgid quota */
+repeat:
+	for (ugid = vzquota_get_byindex(qmblk, index, type);
+		ugid != NULL && count < size;
+		ugid = vzquota_get_next(qmblk, ugid), count++)
+	{
+		struct vz_quota_iface ugid_buf;
+
+		/* form interface buffer and send in to user-level */
+		qmblk_data_read_lock(qmblk);
+		memcpy(&ugid_buf.qi_stat, &ugid->qugid_stat,
+				sizeof(ugid_buf.qi_stat));
+		qmblk_data_read_unlock(qmblk);
+		ugid_buf.qi_id = ugid->qugid_id;
+		ugid_buf.qi_type = ugid->qugid_type;
+
+		memcpy(u_ugid_buf, &ugid_buf, sizeof(ugid_buf));
+		u_ugid_buf++; /* next portion of user buffer */
+	}
+
+	if (type == USRQUOTA && count < size) {
+		type = GRPQUOTA;
+		index = 0;
+		goto repeat;
+	}
+
+	return count;
+}
+
+static int quota_ugid_getstat(unsigned int quota_id,
+		int index, int size, struct vz_quota_iface *u_ugid_buf)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_iface *k_ugid_buf;
+	int err;
+
+	if (index < 0 || size < 0)
+		return -EINVAL;
+
+	if (size > INT_MAX / sizeof(struct vz_quota_iface))
+		return -EINVAL;
+
+	k_ugid_buf = vmalloc(size * sizeof(struct vz_quota_iface));
+	if (k_ugid_buf == NULL)
+		return -ENOMEM;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	down(&qmblk->dq_sem);
+	err = do_quota_ugid_getstat(qmblk, index, size, k_ugid_buf);
+	up(&qmblk->dq_sem);
+	if (err < 0)
+		goto out;
+
+	if (copy_to_user(u_ugid_buf, k_ugid_buf,
+				size * sizeof(struct vz_quota_iface)))
+		err = -EFAULT;
+
+out:
+	up(&vz_quota_sem);
+	vfree(k_ugid_buf);
+	return err;
+}
+
+static int quota_ugid_getgrace(unsigned int quota_id,
+		struct dq_info u_dq_info[])
+{
+	struct vz_quota_master *qmblk;
+	struct dq_info dq_info[MAXQUOTAS];
+	struct dq_info *target;
+	int err, type;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+	
+	err = 0;
+	/* update from qmblk */
+	for (type = 0; type < MAXQUOTAS; type ++) {
+		target = &qmblk->dq_ugid_info[type];
+		dq_info[type].bexpire = target->bexpire;
+		dq_info[type].iexpire = target->iexpire;
+		dq_info[type].flags = target->flags;
+	}
+
+	if (copy_to_user(u_dq_info, dq_info, sizeof(dq_info)))
+		err = -EFAULT;
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_getconfig(unsigned int quota_id, 
+		struct vz_quota_ugid_stat *info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_stat kinfo;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+	
+	err = 0;
+	kinfo.limit = qmblk->dq_ugid_max;
+	kinfo.count = qmblk->dq_ugid_count;
+	kinfo.flags = qmblk->dq_flags;
+
+	if (copy_to_user(info, &kinfo, sizeof(kinfo)))
+		err = -EFAULT;
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_setconfig(unsigned int quota_id,
+		struct vz_quota_ugid_stat *info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_stat kinfo;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&kinfo, info, sizeof(kinfo)))
+		goto out;
+
+	err = 0;
+	qmblk->dq_ugid_max = kinfo.limit;
+	if (qmblk->dq_state == VZDQ_STARTING) {
+		qmblk->dq_flags = kinfo.flags;
+		if (qmblk->dq_flags & VZDQUG_ON)
+			qmblk->dq_flags |= VZDQ_USRQUOTA | VZDQ_GRPQUOTA;
+	}		
+
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_setlimit(unsigned int quota_id,
+		struct vz_quota_ugid_setlimit *u_lim)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_setlimit lim;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ESRCH;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&lim, u_lim, sizeof(lim)))
+		goto out;
+
+	err = __vz_set_dqblk(qmblk, lim.type, lim.id, &lim.dqb);
+
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_setinfo(unsigned int quota_id,
+		struct vz_quota_ugid_setinfo *u_info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_setinfo info;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ESRCH;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		goto out;
+
+	err = __vz_set_dqinfo(qmblk, info.type, &info.dqi);
+
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+/*
+ * This is a system call to maintain UGID quotas
+ * Note this call is allowed to run ONLY from VE0
+ */
+long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub)
+{
+	int ret;
+
+	ret = -EPERM;
+	/* access allowed only from root of VE0 */
+	if (!capable(CAP_SYS_RESOURCE) ||
+	    !capable(CAP_SYS_ADMIN))
+		goto out;
+
+	switch (qub->cmd) {
+		case VZ_DQ_UGID_GETSTAT:
+			ret = quota_ugid_getstat(qub->quota_id,
+					qub->ugid_index, qub->ugid_size,
+				       	(struct vz_quota_iface *)qub->addr);
+			break;
+		case VZ_DQ_UGID_ADDSTAT:
+			ret = quota_ugid_addstat(qub->quota_id, qub->ugid_size,
+				       	(struct vz_quota_iface *)qub->addr);
+			break;
+		case VZ_DQ_UGID_GETGRACE:
+			ret = quota_ugid_getgrace(qub->quota_id,
+					(struct dq_info *)qub->addr);
+			break;
+		case VZ_DQ_UGID_SETGRACE:
+			ret = quota_ugid_setgrace(qub->quota_id,
+					(struct dq_info *)qub->addr);
+			break;
+		case VZ_DQ_UGID_GETCONFIG:
+			ret = quota_ugid_getconfig(qub->quota_id,
+					(struct vz_quota_ugid_stat *)qub->addr);
+			break;
+		case VZ_DQ_UGID_SETCONFIG:
+			ret = quota_ugid_setconfig(qub->quota_id,
+					(struct vz_quota_ugid_stat *)qub->addr);
+			break;
+		case VZ_DQ_UGID_SETLIMIT:
+			ret = quota_ugid_setlimit(qub->quota_id,
+					(struct vz_quota_ugid_setlimit *)
+								qub->addr);
+			break;
+		case VZ_DQ_UGID_SETINFO:
+			ret = quota_ugid_setinfo(qub->quota_id,
+					(struct vz_quota_ugid_setinfo *)
+								qub->addr);
+			break;
+		default:
+			ret = -EINVAL;
+			goto out;
+	}
+out:
+	return ret;
+}
+
+static void ugid_quota_on_sb(struct super_block *sb)
+{
+	struct super_block *real_sb;
+	struct vz_quota_master *qmblk;
+
+	if (!sb->s_op->get_quota_root)
+		return;
+
+	real_sb = sb->s_op->get_quota_root(sb)->i_sb;
+	if (real_sb->dq_op != &vz_quota_operations)
+		return;
+
+	sb->dq_op = &vz_quota_operations2;
+	sb->s_qcop = &vz_quotactl_operations;
+	INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+	INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+	sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
+	sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
+
+	qmblk = vzquota_find_qmblk(sb);
+	if ((qmblk == NULL) || (qmblk == VZ_QUOTA_BAD))
+		return;
+	down(&vz_quota_sem);
+	if (qmblk->dq_flags & VZDQ_USRQUOTA)
+		sb->s_dquot.flags |= DQUOT_USR_ENABLED;
+	if (qmblk->dq_flags & VZDQ_GRPQUOTA)
+		sb->s_dquot.flags |= DQUOT_GRP_ENABLED;
+	up(&vz_quota_sem);
+	qmblk_put(qmblk);
+}
+
+static void ugid_quota_off_sb(struct super_block *sb)
+{
+	/* can't make quota off on mounted super block */
+	BUG_ON(sb->s_root != NULL);
+}
+
+static int ugid_notifier_call(struct vnotifier_block *self,
+		unsigned long n, void *data, int old_ret)
+{
+	struct virt_info_quota *viq;
+
+	viq = (struct virt_info_quota *)data;
+
+	switch (n) {
+	case VIRTINFO_QUOTA_ON:
+		ugid_quota_on_sb(viq->super);
+		break;
+	case VIRTINFO_QUOTA_OFF:
+		ugid_quota_off_sb(viq->super);
+		break;
+	case VIRTINFO_QUOTA_GETSTAT:
+		break;
+	default:
+		return old_ret;
+	}
+	return NOTIFY_OK;
+}
+
+static struct vnotifier_block ugid_notifier_block = {
+	.notifier_call = ugid_notifier_call,
+};
+
+/* ----------------------------------------------------------------------
+ * Init/exit.
+ * --------------------------------------------------------------------- */
+
+struct quota_format_type vz_quota_empty_v2_format = {
+	qf_fmt_id:	QFMT_VFS_V0,
+	qf_ops:		NULL,
+	qf_owner:	THIS_MODULE
+};
+
+int vzquota_ugid_init()
+{
+	int err;
+
+	vz_quota_ugid_cachep = kmem_cache_create("vz_quota_ugid",
+				      sizeof(struct vz_quota_ugid),
+				      0, SLAB_HWCACHE_ALIGN,
+				      NULL, NULL);
+	if (vz_quota_ugid_cachep == NULL)
+		goto err_slab;
+
+	err = register_quota_format(&vz_quota_empty_v2_format);
+	if (err)
+		goto err_reg;
+
+	virtinfo_notifier_register(VITYPE_QUOTA, &ugid_notifier_block);
+	return 0;
+
+err_reg:
+	kmem_cache_destroy(vz_quota_ugid_cachep);
+	return err;
+
+err_slab:
+	printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
+	return -ENOMEM;
+}
+
+void vzquota_ugid_release()
+{
+	virtinfo_notifier_unregister(VITYPE_QUOTA, &ugid_notifier_block);
+	unregister_quota_format(&vz_quota_empty_v2_format);
+
+	if (kmem_cache_destroy(vz_quota_ugid_cachep))
+		printk(KERN_ERR "VZQUOTA: kmem_cache_destroy failed\n");
+}
diff -uprN linux-2.6.16/fs/vzdquot.c linux-2.6.16.ovz/fs/vzdquot.c
--- linux-2.6.16/fs/vzdquot.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/fs/vzdquot.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,1705 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains the core of Virtuozzo disk quota implementation:
+ * maintenance of VZDQ information in inodes,
+ * external interfaces,
+ * module entry.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <asm/atomic.h>
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/quota.h>
+#include <linux/rcupdate.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+#include <linux/virtinfo.h>
+#include <linux/vzdq_tree.h>
+
+/* ----------------------------------------------------------------------
+ *
+ * Locking
+ *
+ * ---------------------------------------------------------------------- */
+
+/*
+ * Serializes on/off and all other do_vzquotactl operations.
+ * Protects qmblk hash.
+ */
+struct semaphore vz_quota_sem;
+
+/*
+ * Data access locks
+ *  inode_qmblk
+ *	protects qmblk pointers in all inodes and qlnk content in general
+ *	(but not qmblk content);
+ *	also protects related qmblk invalidation procedures;
+ *	can't be per-inode because of vzquota_dtree_qmblk complications
+ *	and problems with serialization with quota_on,
+ *	but can be per-superblock;
+ *  qmblk_data
+ *	protects qmblk fields (such as current usage)
+ *  quota_data
+ *	protects charge/uncharge operations, thus, implies
+ *	qmblk_data lock and, if CONFIG_VZ_QUOTA_UGID, inode_qmblk lock
+ *	(to protect ugid pointers).
+ *
+ * Lock order:
+ *  inode_qmblk_lock -> dcache_lock
+ *  inode_qmblk_lock -> qmblk_data
+ */
+static spinlock_t vzdq_qmblk_lock = SPIN_LOCK_UNLOCKED;
+
+inline void inode_qmblk_lock(struct super_block *sb)
+{
+	spin_lock(&vzdq_qmblk_lock);
+}
+
+inline void inode_qmblk_unlock(struct super_block *sb)
+{
+	spin_unlock(&vzdq_qmblk_lock);
+}
+
+inline void qmblk_data_read_lock(struct vz_quota_master *qmblk)
+{
+	spin_lock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_read_unlock(struct vz_quota_master *qmblk)
+{
+	spin_unlock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_write_lock(struct vz_quota_master *qmblk)
+{
+	spin_lock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_write_unlock(struct vz_quota_master *qmblk)
+{
+	spin_unlock(&qmblk->dq_data_lock);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Master hash table handling.
+ *
+ * SMP not safe, serialied by vz_quota_sem within quota syscalls
+ *
+ * --------------------------------------------------------------------- */
+
+static kmem_cache_t *vzquota_cachep;
+
+/*
+ * Hash function.
+ */
+#define QHASH_BITS		6
+#define	VZ_QUOTA_HASH_SIZE	(1 << QHASH_BITS)
+#define QHASH_MASK		(VZ_QUOTA_HASH_SIZE - 1)
+
+struct list_head vzquota_hash_table[VZ_QUOTA_HASH_SIZE];
+int vzquota_hash_size = VZ_QUOTA_HASH_SIZE;
+
+static inline int vzquota_hash_func(unsigned int qid)
+{
+	return (((qid >> QHASH_BITS) ^ qid) & QHASH_MASK);
+}
+
+/**
+ * vzquota_alloc_master - alloc and instantiate master quota record
+ *
+ * Returns:
+ *	pointer to newly created record if SUCCESS
+ *	-ENOMEM if out of memory
+ *	-EEXIST if record with given quota_id already exist
+ */
+struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
+		struct vz_quota_stat *qstat)
+{
+	int err;
+	struct vz_quota_master *qmblk;
+
+	err = -EEXIST;
+	if (vzquota_find_master(quota_id) != NULL)
+		goto out;
+
+	err = -ENOMEM;
+	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
+	if (qmblk == NULL)
+		goto out;
+#ifdef CONFIG_VZ_QUOTA_UGID
+	qmblk->dq_uid_tree = quotatree_alloc();
+	if (!qmblk->dq_uid_tree)
+		goto out_free;
+
+	qmblk->dq_gid_tree = quotatree_alloc();
+	if (!qmblk->dq_gid_tree)
+		goto out_free_tree;
+#endif
+
+	qmblk->dq_state = VZDQ_STARTING;
+	init_MUTEX(&qmblk->dq_sem);
+	spin_lock_init(&qmblk->dq_data_lock);
+
+	qmblk->dq_id = quota_id;
+	qmblk->dq_stat = qstat->dq_stat;
+	qmblk->dq_info = qstat->dq_info;
+	qmblk->dq_root_dentry = NULL;
+	qmblk->dq_root_mnt = NULL;
+	qmblk->dq_sb = NULL;
+	qmblk->dq_ugid_count = 0;
+	qmblk->dq_ugid_max = 0;
+	qmblk->dq_flags = 0;
+	memset(qmblk->dq_ugid_info, 0, sizeof(qmblk->dq_ugid_info));
+	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
+
+	atomic_set(&qmblk->dq_count, 1);
+
+	/* insert in hash chain */
+	list_add(&qmblk->dq_hash,
+		&vzquota_hash_table[vzquota_hash_func(quota_id)]);
+
+	/* success */
+	return qmblk;
+
+out_free_tree:
+	quotatree_free(qmblk->dq_uid_tree, NULL);
+out_free:
+	kmem_cache_free(vzquota_cachep, qmblk);
+out:
+	return ERR_PTR(err);
+}
+
+static struct vz_quota_master *vzquota_alloc_fake(void)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
+	if (qmblk == NULL)
+		return NULL;
+	memset(qmblk, 0, sizeof(*qmblk));
+	qmblk->dq_state = VZDQ_STOPING;
+	qmblk->dq_flags = VZDQ_NOQUOT;
+	spin_lock_init(&qmblk->dq_data_lock);
+	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
+	atomic_set(&qmblk->dq_count, 1);
+	return qmblk;
+}
+
+/**
+ * vzquota_find_master - find master record with given id
+ *
+ * Returns qmblk without touching its refcounter.
+ * Called under vz_quota_sem.
+ */
+struct vz_quota_master *vzquota_find_master(unsigned int quota_id)
+{
+	int i;
+	struct vz_quota_master *qp;
+
+	i = vzquota_hash_func(quota_id);
+	list_for_each_entry(qp, &vzquota_hash_table[i], dq_hash) {
+		if (qp->dq_id == quota_id)
+			return qp;
+	}
+	return NULL;
+}
+
+/**
+ * vzquota_free_master - release resources taken by qmblk, freeing memory
+ *
+ * qmblk is assumed to be already taken out from the hash.
+ * Should be called outside vz_quota_sem.
+ */
+void vzquota_free_master(struct vz_quota_master *qmblk)
+{
+#ifdef CONFIG_VZ_QUOTA_UGID
+	vzquota_kill_ugid(qmblk);
+#endif
+	BUG_ON(!list_empty(&qmblk->dq_ilink_list));
+	kmem_cache_free(vzquota_cachep, qmblk);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Passing quota information through current
+ *
+ * Used in inode -> qmblk lookup at inode creation stage (since at that
+ * time there are no links between the inode being created and its parent
+ * directory).
+ *
+ * --------------------------------------------------------------------- */
+
+#define VZDQ_CUR_MAGIC	0x57d0fee2
+
+static inline int vzquota_cur_qmblk_check(void)
+{
+	return current->magic == VZDQ_CUR_MAGIC;
+}
+
+static inline struct inode *vzquota_cur_qmblk_fetch(void)
+{
+	return current->ino;
+}
+
+static inline void vzquota_cur_qmblk_set(struct inode *data)
+{
+	struct task_struct *tsk;
+
+	tsk = current;
+	tsk->magic = VZDQ_CUR_MAGIC;
+	tsk->ino = data;
+}
+
+#if 0
+static inline void vzquota_cur_qmblk_reset(void)
+{
+	current->magic = 0;
+}
+#endif
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Superblock quota operations
+ *
+ * --------------------------------------------------------------------- */
+
+/*
+ * Kernel structure abuse.
+ * We use files[0] pointer as an int variable:
+ * reference counter of how many quota blocks uses this superblock.
+ * files[1] is used for generations structure which helps us to track
+ * when traversing of dentries is really required.
+ */
+#define __VZ_QUOTA_NOQUOTA(sb)		sb->s_dquot.vzdq_master
+#define __VZ_QUOTA_TSTAMP(sb)		((struct timeval *)\
+						&sb->s_dquot.dqio_sem)
+
+#if defined(VZ_QUOTA_UNLOAD)
+
+#define __VZ_QUOTA_SBREF(sb)		sb->s_dquot.vzdq_count
+
+struct dquot_operations *orig_dq_op;
+struct quotactl_ops *orig_dq_cop;
+
+/**
+ * quota_get_super - account for new a quoted tree under the superblock
+ *
+ * One superblock can have multiple directory subtrees with different VZ
+ * quotas.  We keep a counter of such subtrees and set VZ quota operations or
+ * reset the default ones.
+ *
+ * Called under vz_quota_sem (from quota_on).
+ */
+int vzquota_get_super(struct super_block *sb)
+{
+	if (sb->dq_op != &vz_quota_operations) {
+		down(&sb->s_dquot.dqonoff_sem);
+		if (sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) {
+			up(&sb->s_dquot.dqonoff_sem);
+			return -EEXIST;
+		}
+		if (orig_dq_op == NULL && sb->dq_op != NULL)
+			orig_dq_op = sb->dq_op;
+		sb->dq_op = &vz_quota_operations;
+		if (orig_dq_cop == NULL && sb->s_qcop != NULL)
+			orig_dq_cop = sb->s_qcop;
+		/* XXX this may race with sys_quotactl */
+#ifdef CONFIG_VZ_QUOTA_UGID
+		sb->s_qcop = &vz_quotactl_operations;
+#else
+		sb->s_qcop = NULL;
+#endif
+		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
+		sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
+		/*
+		 * To get quotaops.h call us we need to mark superblock
+		 * as having quota.  These flags mark the moment when
+		 * our dq_op start to be called.
+		 *
+		 * The ordering of dq_op and s_dquot.flags assignment
+		 * needs to be enforced, but other CPUs do not do rmb()
+		 * between s_dquot.flags and dq_op accesses.
+		 */
+		wmb(); synchronize_sched();
+		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
+		__module_get(THIS_MODULE);
+		up(&sb->s_dquot.dqonoff_sem);
+	}
+	/* protected by vz_quota_sem */
+	__VZ_QUOTA_SBREF(sb)++;
+	return 0;
+}
+
+/**
+ * quota_put_super - release superblock when one quota tree goes away
+ *
+ * Called under vz_quota_sem.
+ */
+void vzquota_put_super(struct super_block *sb)
+{
+	int count;
+
+	count = --__VZ_QUOTA_SBREF(sb);
+	if (count == 0) {
+		down(&sb->s_dquot.dqonoff_sem);
+		sb->s_dquot.flags = 0;
+		wmb(); synchronize_sched();
+		sema_init(&sb->s_dquot.dqio_sem, 1);
+		sb->s_qcop = orig_dq_cop;
+		sb->dq_op = orig_dq_op;
+		inode_qmblk_lock(sb);
+		quota_gen_put(SB_QGEN(sb));
+		SB_QGEN(sb) = NULL;
+		/* release qlnk's without qmblk */
+		remove_inode_quota_links_list(&non_vzquota_inodes_lh,
+				sb, NULL);
+		/*
+		 * Races with quota initialization:
+		 * after this inode_qmblk_unlock all inode's generations are
+		 * invalidated, quota_inode_qmblk checks superblock operations.
+		 */
+		inode_qmblk_unlock(sb);
+		/*
+		 * Module refcounting: in theory, this is the best place
+		 * to call module_put(THIS_MODULE).
+		 * In reality, it can't be done because we can't be sure that
+		 * other CPUs do not enter our code segment through dq_op
+		 * cached long time ago.  Quotaops interface isn't supposed to
+		 * go into modules currently (that is, into unloadable
+		 * modules).  By omitting module_put, our module isn't
+		 * unloadable.
+		 */
+		up(&sb->s_dquot.dqonoff_sem);
+	}
+}
+
+#else
+
+struct vzquota_new_sop {
+	struct super_operations new_op;
+	struct super_operations *old_op;
+};
+
+/**
+ * vzquota_shutdown_super - callback on umount
+ */
+void vzquota_shutdown_super(struct super_block *sb)
+{
+	struct vz_quota_master *qmblk;
+	struct vzquota_new_sop *sop;
+
+	qmblk = __VZ_QUOTA_NOQUOTA(sb);
+	__VZ_QUOTA_NOQUOTA(sb) = NULL;
+	if (qmblk != NULL)
+		qmblk_put(qmblk);
+	sop = container_of(sb->s_op, struct vzquota_new_sop, new_op);
+	sb->s_op = sop->old_op;
+	kfree(sop);
+	(*sb->s_op->put_super)(sb);
+}
+
+/**
+ * vzquota_get_super - account for new a quoted tree under the superblock
+ *
+ * One superblock can have multiple directory subtrees with different VZ
+ * quotas.
+ *
+ * Called under vz_quota_sem (from vzquota_on).
+ */
+int vzquota_get_super(struct super_block *sb)
+{
+	struct vz_quota_master *qnew;
+	struct vzquota_new_sop *sop;
+	int err;
+
+	down(&sb->s_dquot.dqonoff_sem);
+	err = -EEXIST;
+	if ((sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) &&
+	    sb->dq_op != &vz_quota_operations)
+		goto out_up;
+
+	/*
+	 * This allocation code should be under sb->dq_op check below, but
+	 * it doesn't really matter...
+	 */
+	if (__VZ_QUOTA_NOQUOTA(sb) == NULL) {
+		qnew = vzquota_alloc_fake();
+		if (qnew == NULL)
+			goto out_up;
+		__VZ_QUOTA_NOQUOTA(sb) = qnew;
+	}
+
+	if (sb->dq_op != &vz_quota_operations) {
+		sop = kmalloc(sizeof(*sop), GFP_KERNEL);
+		if (sop == NULL) {
+			vzquota_free_master(__VZ_QUOTA_NOQUOTA(sb));
+			__VZ_QUOTA_NOQUOTA(sb) = NULL;
+			goto out_up;
+		}
+		memcpy(&sop->new_op, sb->s_op, sizeof(sop->new_op));
+		sop->new_op.put_super = &vzquota_shutdown_super;
+		sop->old_op = sb->s_op;
+		sb->s_op = &sop->new_op;
+
+		sb->dq_op = &vz_quota_operations;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		sb->s_qcop = &vz_quotactl_operations;
+#else
+		sb->s_qcop = NULL;
+#endif
+		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
+
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+		/* these 2 list heads are checked in sync_dquots() */
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		sb->s_dquot.info[USRQUOTA].dqi_format =
+						&vz_quota_empty_v2_format;
+		sb->s_dquot.info[GRPQUOTA].dqi_format =
+						&vz_quota_empty_v2_format;
+
+		/*
+		 * To get quotaops.h to call us we need to mark superblock
+		 * as having quota.  These flags mark the moment when
+		 * our dq_op start to be called.
+		 *
+		 * The ordering of dq_op and s_dquot.flags assignment
+		 * needs to be enforced, but other CPUs do not do rmb()
+		 * between s_dquot.flags and dq_op accesses.
+		 */
+		wmb(); synchronize_sched();
+		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
+	}
+	err = 0;
+
+out_up:
+	up(&sb->s_dquot.dqonoff_sem);
+	return err;
+}
+
+/**
+ * vzquota_put_super - one quota tree less on this superblock
+ *
+ * Called under vz_quota_sem.
+ */
+void vzquota_put_super(struct super_block *sb)
+{
+	/*
+	 * Even if this put is the last one,
+	 * sb->s_dquot.flags can't be cleared, because otherwise vzquota_drop
+	 * won't be called and the remaining qmblk references won't be put.
+	 */
+}
+
+#endif
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Helpers for inode -> qmblk link maintenance
+ *
+ * --------------------------------------------------------------------- */
+
+#define __VZ_QUOTA_EMPTY		((void *)0xbdbdbdbd)
+#define VZ_QUOTA_IS_NOQUOTA(qm, sb)	((qm)->dq_flags & VZDQ_NOQUOT)
+#define VZ_QUOTA_EMPTY_IOPS		(&vfs_empty_iops)
+extern struct inode_operations vfs_empty_iops;
+
+static int VZ_QUOTA_IS_ACTUAL(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk == VZ_QUOTA_BAD)
+		return 1;
+	if (qmblk == __VZ_QUOTA_EMPTY)
+		return 0;
+	if (qmblk->dq_flags & VZDQ_NOACT)
+		/* not actual (invalidated) qmblk */
+		return 0;
+	return 1;
+}
+
+static inline int vzquota_qlnk_is_empty(struct vz_quota_ilink *qlnk)
+{
+	return qlnk->qmblk == __VZ_QUOTA_EMPTY;
+}
+
+static inline void vzquota_qlnk_set_empty(struct vz_quota_ilink *qlnk)
+{
+	qlnk->qmblk = __VZ_QUOTA_EMPTY;
+	qlnk->origin = VZ_QUOTAO_SETE;
+}
+
+void vzquota_qlnk_init(struct vz_quota_ilink *qlnk)
+{
+	memset(qlnk, 0, sizeof(*qlnk));
+	INIT_LIST_HEAD(&qlnk->list);
+	vzquota_qlnk_set_empty(qlnk);
+	qlnk->origin = VZ_QUOTAO_INIT;
+}
+
+void vzquota_qlnk_destroy(struct vz_quota_ilink *qlnk)
+{
+	might_sleep();
+	if (vzquota_qlnk_is_empty(qlnk))
+		return;
+#if defined(CONFIG_VZ_QUOTA_UGID)
+	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD) {
+		struct vz_quota_master *qmblk;
+		struct vz_quota_ugid *quid, *qgid;
+		qmblk = qlnk->qmblk;
+		quid = qlnk->qugid[USRQUOTA];
+		qgid = qlnk->qugid[GRPQUOTA];
+		if (quid != NULL || qgid != NULL) {
+			down(&qmblk->dq_sem);
+			if (qgid != NULL)
+				vzquota_put_ugid(qmblk, qgid);
+			if (quid != NULL)
+				vzquota_put_ugid(qmblk, quid);
+			up(&qmblk->dq_sem);
+		}
+	}
+#endif
+	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qlnk->qmblk);
+	qlnk->origin = VZ_QUOTAO_DESTR;
+}
+
+/**
+ * vzquota_qlnk_swap - swap inode's and temporary vz_quota_ilink contents
+ * @qlt: temporary
+ * @qli: inode's
+ *
+ * Locking is provided by the caller (depending on the context).
+ * After swap, @qli is inserted into the corresponding dq_ilink_list,
+ * @qlt list is reinitialized.
+ */
+static void vzquota_qlnk_swap(struct vz_quota_ilink *qlt,
+		struct vz_quota_ilink *qli)
+{
+	struct vz_quota_master *qb;
+	struct vz_quota_ugid *qu;
+	int i;
+
+	qb = qlt->qmblk;
+	qlt->qmblk = qli->qmblk;
+	qli->qmblk = qb;
+	list_del_init(&qli->list);
+	if (qb != __VZ_QUOTA_EMPTY && qb != VZ_QUOTA_BAD)
+		list_add(&qli->list, &qb->dq_ilink_list);
+	INIT_LIST_HEAD(&qlt->list);
+	qli->origin = VZ_QUOTAO_SWAP;
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		qu = qlt->qugid[i];
+		qlt->qugid[i] = qli->qugid[i];
+		qli->qugid[i] = qu;
+	}
+}
+
+/**
+ * vzquota_qlnk_reinit_locked - destroy qlnk content, called under locks
+ *
+ * Called under dcache_lock and inode_qmblk locks.
+ * Returns 1 if locks were dropped inside, 0 if atomic.
+ */
+static int vzquota_qlnk_reinit_locked(struct vz_quota_ilink *qlnk,
+		struct inode *inode)
+{
+	if (vzquota_qlnk_is_empty(qlnk))
+		return 0;
+	if (qlnk->qmblk == VZ_QUOTA_BAD) {
+		vzquota_qlnk_set_empty(qlnk);
+		return 0;
+	}
+	spin_unlock(&dcache_lock);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(qlnk);
+	vzquota_qlnk_init(qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	spin_lock(&dcache_lock);
+	return 1;
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_qlnk_reinit_attr - destroy and reinit qlnk content
+ *
+ * Similar to vzquota_qlnk_reinit_locked, called under different locks.
+ */
+static int vzquota_qlnk_reinit_attr(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct vz_quota_master *qmblk)
+{
+	if (vzquota_qlnk_is_empty(qlnk))
+		return 0;
+	/* may be optimized if qlnk->qugid all NULLs */
+	qmblk_data_write_unlock(qmblk);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(qlnk);
+	vzquota_qlnk_init(qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	qmblk_data_write_lock(qmblk);
+	return 1;
+}
+#endif
+
+/**
+ * vzquota_qlnk_fill - fill vz_quota_ilink content
+ * @qlnk: vz_quota_ilink to fill
+ * @inode: inode for which @qlnk is filled (i_sb, i_uid, i_gid)
+ * @qmblk: qmblk to which this @qlnk will belong
+ *
+ * Called under dcache_lock and inode_qmblk locks.
+ * Returns 1 if locks were dropped inside, 0 if atomic.
+ * @qlnk is expected to be empty.
+ */
+static int vzquota_qlnk_fill(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct vz_quota_master *qmblk)
+{
+	if (qmblk != VZ_QUOTA_BAD)
+		qmblk_get(qmblk);
+	qlnk->qmblk = qmblk;
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+	if (qmblk != VZ_QUOTA_BAD &&
+	    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
+	    (qmblk->dq_flags & VZDQUG_ON)) {
+		struct vz_quota_ugid *quid, *qgid;
+
+		spin_unlock(&dcache_lock);
+		inode_qmblk_unlock(inode->i_sb);
+
+		down(&qmblk->dq_sem);
+		quid = __vzquota_find_ugid(qmblk, inode->i_uid, USRQUOTA, 0);
+		qgid = __vzquota_find_ugid(qmblk, inode->i_gid, GRPQUOTA, 0);
+		up(&qmblk->dq_sem);
+
+		inode_qmblk_lock(inode->i_sb);
+		spin_lock(&dcache_lock);
+		qlnk->qugid[USRQUOTA] = quid;
+		qlnk->qugid[GRPQUOTA] = qgid;
+		return 1;
+	}
+#endif
+
+	return 0;
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_qlnk_fill_attr - fill vz_quota_ilink content for uid, gid
+ *
+ * This function is a helper for vzquota_transfer, and differs from
+ * vzquota_qlnk_fill only by locking.
+ */
+static int vzquota_qlnk_fill_attr(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct iattr *iattr,
+		int mask,
+		struct vz_quota_master *qmblk)
+{
+	qmblk_get(qmblk);
+	qlnk->qmblk = qmblk;
+
+	if (mask) {
+		struct vz_quota_ugid *quid, *qgid;
+
+		quid = qgid = NULL; /* to make gcc happy */
+		if (!(mask & (1 << USRQUOTA)))
+			quid = vzquota_get_ugid(INODE_QLNK(inode)->
+							qugid[USRQUOTA]);
+		if (!(mask & (1 << GRPQUOTA)))
+			qgid = vzquota_get_ugid(INODE_QLNK(inode)->
+							qugid[GRPQUOTA]);
+
+		qmblk_data_write_unlock(qmblk);
+		inode_qmblk_unlock(inode->i_sb);
+
+		down(&qmblk->dq_sem);
+		if (mask & (1 << USRQUOTA))
+			quid = __vzquota_find_ugid(qmblk, iattr->ia_uid,
+					USRQUOTA, 0);
+		if (mask & (1 << GRPQUOTA))
+			qgid = __vzquota_find_ugid(qmblk, iattr->ia_gid,
+					GRPQUOTA, 0);
+		up(&qmblk->dq_sem);
+
+		inode_qmblk_lock(inode->i_sb);
+		qmblk_data_write_lock(qmblk);
+		qlnk->qugid[USRQUOTA] = quid;
+		qlnk->qugid[GRPQUOTA] = qgid;
+		return 1;
+	}
+
+	return 0;
+}
+#endif
+
+/**
+ * __vzquota_inode_init - make sure inode's qlnk is initialized
+ *
+ * May be called if qlnk is already initialized, detects this situation itself.
+ * Called under inode_qmblk_lock.
+ */
+static void __vzquota_inode_init(struct inode *inode, unsigned char origin)
+{
+	if (inode->i_dquot[USRQUOTA] == NODQUOT) {
+		vzquota_qlnk_init(INODE_QLNK(inode));
+		inode->i_dquot[USRQUOTA] = (void *)~(unsigned long)NODQUOT;
+	}
+	INODE_QLNK(inode)->origin = origin;
+}
+
+/**
+ * vzquota_inode_drop - destroy VZ quota information in the inode
+ *
+ * Inode must not be externally accessible or dirty.
+ */
+static void vzquota_inode_drop(struct inode *inode)
+{
+	struct vz_quota_ilink qlnk;
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	vzquota_qlnk_swap(&qlnk, INODE_QLNK(inode));
+	INODE_QLNK(inode)->origin = VZ_QUOTAO_DRCAL;
+	inode->i_dquot[USRQUOTA] = NODQUOT;
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+}
+
+/**
+ * vzquota_inode_qmblk_set - initialize inode's qlnk
+ * @inode: inode to be initialized
+ * @qmblk: quota master block to which this inode should belong (may be BAD)
+ * @qlnk: placeholder to store data to resolve locking issues
+ *
+ * Returns 1 if locks were dropped and rechecks possibly needed, 0 otherwise.
+ * Called under dcache_lock and inode_qmblk locks.
+ * @qlnk will be destroyed in the caller chain.
+ *
+ * It is not mandatory to restart parent checks since quota on/off currently
+ * shrinks dentry tree and checks that there are not outside references.
+ * But if at some time that shink is removed, restarts will be required.
+ * Additionally, the restarts prevent inconsistencies if the dentry tree
+ * changes (inode is moved).  This is not a big deal, but anyway...
+ */
+static int vzquota_inode_qmblk_set(struct inode *inode,
+		struct vz_quota_master *qmblk,
+		struct vz_quota_ilink *qlnk)
+{
+	if (qmblk == NULL) {
+		printk(KERN_ERR "VZDQ: NULL in set, "
+				"orig %u, dev %s, inode %lu, fs %s\n",
+				INODE_QLNK(inode)->origin,
+				inode->i_sb->s_id, inode->i_ino,
+				inode->i_sb->s_type->name);
+		printk(KERN_ERR "current %d (%s), VE %d\n",
+				current->pid, current->comm,
+				VEID(get_exec_env()));
+		dump_stack();
+		qmblk = VZ_QUOTA_BAD;
+	}
+	while (1) {
+		if (vzquota_qlnk_is_empty(qlnk) &&
+		    vzquota_qlnk_fill(qlnk, inode, qmblk))
+			return 1;
+		if (qlnk->qmblk == qmblk)
+			break;
+		if (vzquota_qlnk_reinit_locked(qlnk, inode))
+			return 1;
+	}
+	vzquota_qlnk_swap(qlnk, INODE_QLNK(inode));
+	INODE_QLNK(inode)->origin = VZ_QUOTAO_QSET;
+	return 0;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * vzquota_inode_qmblk (inode -> qmblk lookup) parts
+ *
+ * --------------------------------------------------------------------- */
+
+static int vzquota_dparents_check_attach(struct inode *inode)
+{
+	if (!list_empty(&inode->i_dentry))
+		return 0;
+	printk(KERN_ERR "VZDQ: no parent for "
+			"dev %s, inode %lu, fs %s\n",
+			inode->i_sb->s_id,
+			inode->i_ino,
+			inode->i_sb->s_type->name);
+	return -1;
+}
+
+static struct inode *vzquota_dparents_check_actual(struct inode *inode)
+{
+	struct dentry *de;
+
+	list_for_each_entry(de, &inode->i_dentry, d_alias) {
+		if (de->d_parent == de) /* detached dentry, perhaps */
+			continue;
+		/* first access to parent, make sure its qlnk initialized */
+		__vzquota_inode_init(de->d_parent->d_inode, VZ_QUOTAO_ACT);
+		if (!VZ_QUOTA_IS_ACTUAL(de->d_parent->d_inode))
+			return de->d_parent->d_inode;
+	}
+	return NULL;
+}
+
+static struct vz_quota_master *vzquota_dparents_check_same(struct inode *inode)
+{
+	struct dentry *de;
+	struct vz_quota_master *qmblk;
+
+	qmblk = NULL;
+	list_for_each_entry(de, &inode->i_dentry, d_alias) {
+		if (de->d_parent == de) /* detached dentry, perhaps */
+			continue;
+		if (qmblk == NULL) {
+			qmblk = INODE_QLNK(de->d_parent->d_inode)->qmblk;
+			continue;
+		}
+		if (INODE_QLNK(de->d_parent->d_inode)->qmblk != qmblk) {
+			printk(KERN_WARNING "VZDQ: multiple quotas for "
+					"dev %s, inode %lu, fs %s\n",
+					inode->i_sb->s_id,
+					inode->i_ino,
+					inode->i_sb->s_type->name);
+			qmblk = VZ_QUOTA_BAD;
+			break;
+		}
+	}
+	if (qmblk == NULL) {
+		printk(KERN_WARNING "VZDQ: not attached to tree, "
+				"dev %s, inode %lu, fs %s\n",
+				inode->i_sb->s_id,
+				inode->i_ino,
+				inode->i_sb->s_type->name);
+		qmblk = VZ_QUOTA_BAD;
+	}
+	return qmblk;
+}
+
+static void vzquota_dbranch_actualize(struct inode *inode,
+		struct inode *refinode)
+{
+	struct inode *pinode;
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk;
+
+	vzquota_qlnk_init(&qlnk);
+
+start:
+	if (inode == inode->i_sb->s_root->d_inode) {
+		/* filesystem root */
+		atomic_inc(&inode->i_count);
+		do {
+			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+		} while (vzquota_inode_qmblk_set(inode, qmblk, &qlnk));
+		goto out;
+	}
+
+	if (!vzquota_dparents_check_attach(inode)) {
+		pinode = vzquota_dparents_check_actual(inode);
+		if (pinode != NULL) {
+			inode = pinode;
+			goto start;
+		}
+	}
+
+	atomic_inc(&inode->i_count);
+	while (1) {
+		if (VZ_QUOTA_IS_ACTUAL(inode)) /* actualized without us */
+			break;
+		/*
+		 * Need to check parents again if we have slept inside
+		 * vzquota_inode_qmblk_set() in the loop.
+		 * If the state of parents is different, just return and repeat
+		 * the actualizing process again from the inode passed to
+		 * vzquota_inode_qmblk_recalc().
+		 */
+		if (!vzquota_dparents_check_attach(inode)) {
+			if (vzquota_dparents_check_actual(inode) != NULL)
+				break;
+			qmblk = vzquota_dparents_check_same(inode);
+		} else
+			qmblk = VZ_QUOTA_BAD;
+		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk)){/* success */
+			INODE_QLNK(inode)->origin = VZ_QUOTAO_ACT;
+			break;
+		}
+	}
+
+out:
+	spin_unlock(&dcache_lock);
+	inode_qmblk_unlock(refinode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+	iput(inode);
+	inode_qmblk_lock(refinode->i_sb);
+	spin_lock(&dcache_lock);
+}
+
+static void vzquota_dtree_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	struct inode *pinode;
+	struct vz_quota_master *qmblk;
+
+	if (inode == inode->i_sb->s_root->d_inode) {
+		/* filesystem root */
+		do {
+			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+		} while (vzquota_inode_qmblk_set(inode, qmblk, qlnk));
+		return;
+	}
+
+start:
+	if (VZ_QUOTA_IS_ACTUAL(inode))
+		return;
+	/*
+	 * Here qmblk is (re-)initialized for all ancestors.
+	 * This is not a very efficient procedure, but it guarantees that
+	 * the quota tree is consistent (that is, the inode doesn't have two
+	 * ancestors with different qmblk).
+	 */
+	if (!vzquota_dparents_check_attach(inode)) {
+		pinode = vzquota_dparents_check_actual(inode);
+		if (pinode != NULL) {
+			vzquota_dbranch_actualize(pinode, inode);
+			goto start;
+		}
+		qmblk = vzquota_dparents_check_same(inode);
+	} else
+		qmblk = VZ_QUOTA_BAD;
+
+	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
+		goto start;
+	INODE_QLNK(inode)->origin = VZ_QUOTAO_DTREE;
+}
+
+static void vzquota_det_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	struct inode *parent;
+	struct vz_quota_master *qmblk;
+	char *msg;
+	int cnt;
+	time_t timeout;
+
+	cnt = 0;
+	parent = NULL;
+start:
+	/*
+	 * qmblk of detached inodes shouldn't be considered as not actual.
+	 * They are not in any dentry tree, so quota on/off shouldn't affect
+	 * them.
+	 */
+	if (!vzquota_qlnk_is_empty(INODE_QLNK(inode)))
+		return;
+
+	timeout = 3;
+	qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+	msg = "detached inode not in creation";
+	if (inode->i_op != VZ_QUOTA_EMPTY_IOPS)
+		goto fail;
+	qmblk = VZ_QUOTA_BAD;
+	msg = "unexpected creation context";
+	if (!vzquota_cur_qmblk_check())
+		goto fail;
+	timeout = 0;
+	parent = vzquota_cur_qmblk_fetch();
+	msg = "uninitialized parent";
+	if (vzquota_qlnk_is_empty(INODE_QLNK(parent)))
+		goto fail;
+	msg = "parent not in tree";
+	if (list_empty(&parent->i_dentry))
+		goto fail;
+	msg = "parent has 0 refcount";
+	if (!atomic_read(&parent->i_count))
+		goto fail;
+	msg = "parent has different sb";
+	if (parent->i_sb != inode->i_sb)
+		goto fail;
+	if (!VZ_QUOTA_IS_ACTUAL(parent)) {
+		vzquota_dbranch_actualize(parent, inode);
+		goto start;
+	}
+
+	qmblk = INODE_QLNK(parent)->qmblk;
+set:
+	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
+		goto start;
+	INODE_QLNK(inode)->origin = VZ_QUOTAO_DET;
+	return;
+
+fail:
+	{
+		struct timeval tv, tvo;
+		do_gettimeofday(&tv);
+		memcpy(&tvo, __VZ_QUOTA_TSTAMP(inode->i_sb), sizeof(tvo));
+		tv.tv_sec -= tvo.tv_sec;
+		if (tv.tv_usec < tvo.tv_usec) {
+			tv.tv_sec--;
+			tv.tv_usec += USEC_PER_SEC - tvo.tv_usec;
+		} else
+			tv.tv_usec -= tvo.tv_usec;
+		if (tv.tv_sec < timeout)
+			goto set;
+		printk(KERN_ERR "VZDQ: %s, orig %u,"
+			" dev %s, inode %lu, fs %s\n",
+			msg, INODE_QLNK(inode)->origin,
+			inode->i_sb->s_id, inode->i_ino,
+			inode->i_sb->s_type->name);
+		if (!cnt++) {
+			printk(KERN_ERR "current %d (%s), VE %d,"
+				" time %ld.%06ld\n",
+				current->pid, current->comm,
+				VEID(get_exec_env()),
+				tv.tv_sec, tv.tv_usec);
+			dump_stack();
+		}
+		if (parent != NULL)
+			printk(KERN_ERR "VZDQ: parent of %lu is %lu\n",
+				inode->i_ino, parent->i_ino);
+	}
+	goto set;
+}
+
+static void vzquota_inode_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	spin_lock(&dcache_lock);
+	if (!list_empty(&inode->i_dentry))
+		vzquota_dtree_qmblk_recalc(inode, qlnk);
+	else
+		vzquota_det_qmblk_recalc(inode, qlnk);
+	spin_unlock(&dcache_lock);
+}
+
+/**
+ * vzquota_inode_qmblk - obtain inode's qmblk
+ *
+ * Returns qmblk with refcounter taken, %NULL if not under
+ * VZ quota or %VZ_QUOTA_BAD.
+ *
+ * FIXME: This function should be removed when vzquota_find_qmblk /
+ * get_quota_root / vzquota_dstat code is cleaned up.
+ */
+struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk;
+
+	might_sleep();
+
+	if (inode->i_sb->dq_op != &vz_quota_operations)
+		return NULL;
+#if defined(VZ_QUOTA_UNLOAD)
+#error Make sure qmblk does not disappear
+#endif
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+	    !VZ_QUOTA_IS_ACTUAL(inode))
+		vzquota_inode_qmblk_recalc(inode, &qlnk);
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != VZ_QUOTA_BAD) {
+		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb))
+			qmblk_get(qmblk);
+		else
+			qmblk = NULL;
+	}
+
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+	return qmblk;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Calls from quota operations
+ *
+ * --------------------------------------------------------------------- */
+
+/**
+ * vzquota_inode_init_call - call from DQUOT_INIT
+ */
+void vzquota_inode_init_call(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	/* initializes inode's quota inside */
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		vzquota_data_unlock(inode, &data);
+
+	/*
+	 * The check is needed for repeated new_inode() calls from a single
+	 * ext3 call like create or mkdir in case of -ENOSPC.
+	 */
+	spin_lock(&dcache_lock);
+	if (!list_empty(&inode->i_dentry))
+		vzquota_cur_qmblk_set(inode);
+	spin_unlock(&dcache_lock);
+}
+
+/**
+ * vzquota_inode_drop_call - call from DQUOT_DROP
+ */
+void vzquota_inode_drop_call(struct inode *inode)
+{
+	vzquota_inode_drop(inode);
+}
+
+/**
+ * vzquota_inode_data - initialize (if nec.) and lock inode quota ptrs
+ * @inode: the inode
+ * @data: storage space
+ *
+ * Returns: qmblk is NULL or VZ_QUOTA_BAD or actualized qmblk.
+ * On return if qmblk is neither NULL nor VZ_QUOTA_BAD:
+ *   qmblk in inode's qlnk is the same as returned,
+ *   ugid pointers inside inode's qlnk are valid,
+ *   some locks are taken (and should be released by vzquota_data_unlock).
+ * If qmblk is NULL or VZ_QUOTA_BAD, locks are NOT taken.
+ */
+struct vz_quota_master *vzquota_inode_data(struct inode *inode,
+		struct vz_quota_datast *data)
+{
+	struct vz_quota_master *qmblk;
+
+	might_sleep();
+
+	vzquota_qlnk_init(&data->qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+	    !VZ_QUOTA_IS_ACTUAL(inode))
+		vzquota_inode_qmblk_recalc(inode, &data->qlnk);
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != VZ_QUOTA_BAD) {
+		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb)) {
+			/*
+			 * Note that in the current implementation,
+			 * inode_qmblk_lock can theoretically be dropped here.
+			 * This place is serialized with quota_off because
+			 * quota_off fails when there are extra dentry
+			 * references and syncs inodes before removing quota
+			 * information from them.
+			 * However, quota usage information should stop being
+			 * updated immediately after vzquota_off.
+			 */
+			qmblk_data_write_lock(qmblk);
+		} else {
+			inode_qmblk_unlock(inode->i_sb);
+			qmblk = NULL;
+		}
+	} else {
+		inode_qmblk_unlock(inode->i_sb);
+	}
+	return qmblk;
+}
+
+void vzquota_data_unlock(struct inode *inode,
+		struct vz_quota_datast *data)
+{
+	qmblk_data_write_unlock(INODE_QLNK(inode)->qmblk);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&data->qlnk);
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_inode_transfer_call - call from vzquota_transfer
+ */
+int vzquota_inode_transfer_call(struct inode *inode, struct iattr *iattr)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	struct vz_quota_ilink qlnew;
+	int mask;
+	int ret;
+
+	might_sleep();
+	vzquota_qlnk_init(&qlnew);
+start:
+	qmblk = vzquota_inode_data(inode, &data);
+	ret = NO_QUOTA;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out_destr;
+	ret = QUOTA_OK;
+	if (qmblk == NULL)
+		goto out_destr;
+	qmblk_get(qmblk);
+
+	ret = QUOTA_OK;
+	if (!(qmblk->dq_flags & VZDQUG_ON))
+		/* no ugid quotas */
+		goto out_unlock;
+
+	mask = 0;
+	if ((iattr->ia_valid & ATTR_UID) && iattr->ia_uid != inode->i_uid)
+		mask |= 1 << USRQUOTA;
+	if ((iattr->ia_valid & ATTR_GID) && iattr->ia_gid != inode->i_gid)
+		mask |= 1 << GRPQUOTA;
+	while (1) {
+		if (vzquota_qlnk_is_empty(&qlnew) &&
+		    vzquota_qlnk_fill_attr(&qlnew, inode, iattr, mask, qmblk))
+			break;
+		if (qlnew.qmblk == INODE_QLNK(inode)->qmblk &&
+		    qlnew.qmblk == qmblk)
+			goto finish;
+		if (vzquota_qlnk_reinit_attr(&qlnew, inode, qmblk))
+			break;
+	}
+
+	/* prepare for restart */
+	vzquota_data_unlock(inode, &data);
+	qmblk_put(qmblk);
+	goto start;
+
+finish:
+	/* all references obtained successfully */
+	ret = vzquota_transfer_usage(inode, mask, &qlnew);
+	if (!ret) {
+		vzquota_qlnk_swap(&qlnew, INODE_QLNK(inode));
+		INODE_QLNK(inode)->origin = VZ_QUOTAO_TRANS;
+	}
+out_unlock:
+	vzquota_data_unlock(inode, &data);
+	qmblk_put(qmblk);
+out_destr:
+	vzquota_qlnk_destroy(&qlnew);
+	return ret;
+}
+#endif
+
+int vzquota_rename_check(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk1, qlnk2;
+	int c, ret;
+
+	if (inode->i_sb != old_dir->i_sb || inode->i_sb != new_dir->i_sb)
+		return -1;
+
+	might_sleep();
+
+	vzquota_qlnk_init(&qlnk1);
+	vzquota_qlnk_init(&qlnk2);
+	inode_qmblk_lock(inode->i_sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+	__vzquota_inode_init(old_dir, VZ_QUOTAO_INICAL);
+	__vzquota_inode_init(new_dir, VZ_QUOTAO_INICAL);
+
+	do {
+		c = 0;
+		if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+		    !VZ_QUOTA_IS_ACTUAL(inode)) {
+			vzquota_inode_qmblk_recalc(inode, &qlnk1);
+			c++;
+		}
+		if (vzquota_qlnk_is_empty(INODE_QLNK(new_dir)) ||
+		    !VZ_QUOTA_IS_ACTUAL(new_dir)) {
+			vzquota_inode_qmblk_recalc(new_dir, &qlnk2);
+			c++;
+		}
+	} while (c);
+
+	ret = 0;
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != INODE_QLNK(new_dir)->qmblk) {
+		ret = -1;
+		if (qmblk != VZ_QUOTA_BAD &&
+		    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
+		    qmblk->dq_root_dentry->d_inode == inode &&
+		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(new_dir)->qmblk,
+			    				inode->i_sb) &&
+		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(old_dir)->qmblk,
+			    				inode->i_sb))
+			/* quota root rename is allowed */
+			ret = 0;
+	}
+
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk2);
+	vzquota_qlnk_destroy(&qlnk1);
+	return ret;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * qmblk-related parts of on/off operations
+ *
+ * --------------------------------------------------------------------- */
+
+/**
+ * vzquota_check_dtree - check dentry tree if quota on/off is allowed
+ *
+ * This function doesn't allow quota to be turned on/off if some dentries in
+ * the tree have external references.
+ * In addition to technical reasons, it enforces user-space correctness:
+ * current usage (taken from or reported to the user space) can be meaningful
+ * and accurate only if the tree is not being modified.
+ * Side effect: additional vfsmount structures referencing the tree (bind
+ * mounts of tree nodes to some other places) are not allowed at on/off time.
+ */
+int vzquota_check_dtree(struct vz_quota_master *qmblk, int off)
+{
+	struct dentry *dentry;
+	int err, count;
+
+	err = -EBUSY;
+	dentry = qmblk->dq_root_dentry;
+
+	if (d_unhashed(dentry) && dentry != dentry->d_sb->s_root)
+		goto unhashed;
+
+	/* attempt to shrink */
+  	if (!list_empty(&dentry->d_subdirs)) {
+		spin_unlock(&dcache_lock);
+		inode_qmblk_unlock(dentry->d_sb);
+		shrink_dcache_parent(dentry);
+		inode_qmblk_lock(dentry->d_sb);
+		spin_lock(&dcache_lock);
+		if (!list_empty(&dentry->d_subdirs))
+			goto out;
+
+		count = 1;
+		if (dentry == dentry->d_sb->s_root)
+			count += 2;	/* sb and mnt refs */
+		if (atomic_read(&dentry->d_count) < count) {
+			printk(KERN_ERR "%s: too small count %d vs %d.\n",
+					__FUNCTION__,
+					atomic_read(&dentry->d_count), count);
+			goto out;
+		}
+		if (atomic_read(&dentry->d_count) > count)
+			goto out;
+	}
+
+	err = 0;
+out:
+	return err;
+
+unhashed:
+	/*
+	 * Quota root is removed.
+	 * Allow to turn quota off, but not on.
+	 */
+	if (off)
+		err = 0;
+	goto out;
+}
+
+int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
+		struct vz_quota_master *qmblk)
+{
+	struct vz_quota_ilink qlnk;
+	struct vz_quota_master *qold, *qnew;
+	int err;
+
+	might_sleep();
+
+	qold = NULL;
+	qnew = vzquota_alloc_fake();
+	if (qnew == NULL)
+		return -ENOMEM;
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	spin_lock(&dcache_lock);
+	while (1) {
+		err = vzquota_check_dtree(qmblk, 0);
+		if (err)
+			break;
+		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk))
+			break;
+	}
+	INODE_QLNK(inode)->origin = VZ_QUOTAO_ON;
+	spin_unlock(&dcache_lock);
+
+	if (!err) {
+		qold = __VZ_QUOTA_NOQUOTA(sb);
+		qold->dq_flags |= VZDQ_NOACT;
+		__VZ_QUOTA_NOQUOTA(sb) = qnew;
+	}
+
+	inode_qmblk_unlock(sb);
+	vzquota_qlnk_destroy(&qlnk);
+	if (qold != NULL)
+		qmblk_put(qold);
+
+	return err;
+}
+
+int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk)
+{
+	int ret;
+
+	ret = 0;
+	inode_qmblk_lock(sb);
+
+	spin_lock(&dcache_lock);
+	if (vzquota_check_dtree(qmblk, 1))
+		ret = -EBUSY;
+	spin_unlock(&dcache_lock);
+
+	if (!ret)
+		qmblk->dq_flags |= VZDQ_NOACT | VZDQ_NOQUOT;
+	inode_qmblk_unlock(sb);
+	return ret;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * External interfaces
+ *
+ * ---------------------------------------------------------------------*/
+
+static int vzquota_ioctl(struct inode *ino, struct file *file,
+		unsigned int cmd, unsigned long arg)
+{
+	int err;
+	struct vzctl_quotactl qb;
+	struct vzctl_quotaugidctl qub;
+
+	switch (cmd) {
+		case VZCTL_QUOTA_CTL:
+			err = -ENOTTY;
+			break;
+		case VZCTL_QUOTA_NEW_CTL:
+			err = -EFAULT;
+			if (copy_from_user(&qb, (void *)arg, sizeof(qb)))
+				break;
+			err = do_vzquotactl(qb.cmd, qb.quota_id,
+					qb.qstat, qb.ve_root);
+			break;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		case VZCTL_QUOTA_UGID_CTL:
+			err = -EFAULT;
+			if (copy_from_user(&qub, (void *)arg, sizeof(qub)))
+				break;
+			err = do_vzquotaugidctl(&qub);
+			break;
+#endif
+		default:
+			err = -ENOTTY;
+	}
+	might_sleep(); /* debug */
+	return err;
+}
+
+static struct vzioctlinfo vzdqcalls = {
+	.type	= VZDQCTLTYPE,
+	.func	= vzquota_ioctl,
+	.owner	= THIS_MODULE,
+};
+
+/**
+ * vzquota_dstat - get quota usage info for virtual superblock
+ */
+static int vzquota_dstat(struct super_block *super, struct dq_stat *qstat)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = vzquota_find_qmblk(super);
+	if (qmblk == NULL)
+		return -ENOENT;
+	if (qmblk == VZ_QUOTA_BAD) {
+		memset(qstat, 0, sizeof(*qstat));
+		return 0;
+	}
+
+	qmblk_data_read_lock(qmblk);
+	memcpy(qstat, &qmblk->dq_stat, sizeof(*qstat));
+	qmblk_data_read_unlock(qmblk);
+	qmblk_put(qmblk);
+	return 0;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Init/exit helpers
+ *
+ * ---------------------------------------------------------------------*/
+
+static int vzquota_cache_init(void)
+{
+	int i;
+
+	vzquota_cachep = kmem_cache_create("vz_quota_master",
+					 sizeof(struct vz_quota_master),
+					 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (vzquota_cachep == NULL) {
+		printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
+		goto nomem2;
+	}
+	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&vzquota_hash_table[i]);
+
+	return 0;
+
+nomem2:
+	return -ENOMEM;
+}
+
+static void vzquota_cache_release(void)
+{
+	int i;
+
+	/* sanity check */
+	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
+		if (!list_empty(&vzquota_hash_table[i]))
+			BUG();
+
+	/* release caches */
+	if (kmem_cache_destroy(vzquota_cachep))
+		printk(KERN_ERR
+			"VZQUOTA: vz_quota_master kmem_cache_destroy failed\n");
+	vzquota_cachep = NULL;
+}
+
+static int quota_notifier_call(struct vnotifier_block *self,
+		unsigned long n, void *data, int err)
+{
+	struct virt_info_quota *viq;
+	struct super_block *sb;
+
+	viq = (struct virt_info_quota *)data;
+	switch (n) {
+	case VIRTINFO_QUOTA_ON:
+		err = NOTIFY_BAD;
+		if (!try_module_get(THIS_MODULE))
+			break;
+		sb = viq->super;
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		err = NOTIFY_OK;
+		break;
+	case VIRTINFO_QUOTA_OFF:
+		module_put(THIS_MODULE);
+		err = NOTIFY_OK;
+		break;
+	case VIRTINFO_QUOTA_GETSTAT:
+		err = NOTIFY_BAD;
+		if (vzquota_dstat(viq->super, viq->qstat))
+			break;
+		err = NOTIFY_OK;
+		break;
+	}
+	return err;
+}
+
+struct vnotifier_block quota_notifier_block = {
+	.notifier_call = quota_notifier_call,
+	.priority = INT_MAX,
+};
+
+/* ----------------------------------------------------------------------
+ *
+ * Init/exit procedures
+ *
+ * ---------------------------------------------------------------------*/
+
+static int __init vzquota_init(void)
+{
+	int err;
+
+	if ((err = vzquota_cache_init()) != 0)
+		goto out_cache;
+
+	if ((err = vzquota_proc_init()) != 0)
+		goto out_proc;
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+	if ((err = vzquota_ugid_init()) != 0)
+		goto out_ugid;
+#endif
+
+	init_MUTEX(&vz_quota_sem);
+	vzioctl_register(&vzdqcalls);
+	virtinfo_notifier_register(VITYPE_QUOTA, &quota_notifier_block);
+#if defined(CONFIG_VZ_QUOTA_UGID) && defined(CONFIG_PROC_FS)
+	vzaquota_init();
+#endif
+
+	return 0;
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+out_ugid:
+	vzquota_proc_release();
+#endif
+out_proc:
+	vzquota_cache_release();
+out_cache:
+	return err;
+}
+
+#if defined(VZ_QUOTA_UNLOAD)
+static void __exit vzquota_release(void)
+{
+	virtinfo_notifier_unregister(VITYPE_QUOTA, &quota_notifier_block);
+	vzioctl_unregister(&vzdqcalls);
+#ifdef CONFIG_VZ_QUOTA_UGID
+#ifdef CONFIG_PROC_FS
+	vzaquota_fini();
+#endif
+	vzquota_ugid_release();
+#endif
+	vzquota_proc_release();
+	vzquota_cache_release();
+}
+#endif
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Disk Quota");
+MODULE_LICENSE("GPL v2");
+
+module_init(vzquota_init)
+#if defined(VZ_QUOTA_UNLOAD)
+module_exit(vzquota_release)
+#endif
diff -uprN linux-2.6.16/fs/xattr.c linux-2.6.16.ovz/fs/xattr.c
--- linux-2.6.16/fs/xattr.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/xattr.c	2006-07-05 08:34:56.000000000 -0400
@@ -58,7 +58,7 @@ xattr_permission(struct inode *inode, co
 			return -EPERM;
 	}
 
-	return permission(inode, mask, NULL);
+	return permission(inode, mask, NULL, NULL);
 }
 
 int
diff -uprN linux-2.6.16/fs/xfs/linux-2.6/xfs_aops.c linux-2.6.16.ovz/fs/xfs/linux-2.6/xfs_aops.c
--- linux-2.6.16/fs/xfs/linux-2.6/xfs_aops.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/xfs/linux-2.6/xfs_aops.c	2006-07-05 08:34:56.000000000 -0400
@@ -616,7 +616,7 @@ xfs_is_delayed_page(
 				acceptable = (type == IOMAP_UNWRITTEN);
 			else if (buffer_delay(bh))
 				acceptable = (type == IOMAP_DELAY);
-			else if (buffer_mapped(bh))
+			else if (buffer_dirty(bh) && buffer_mapped(bh))
 				acceptable = (type == 0);
 			else
 				break;
diff -uprN linux-2.6.16/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.16.ovz/fs/xfs/linux-2.6/xfs_iops.c
--- linux-2.6.16/fs/xfs/linux-2.6/xfs_iops.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/fs/xfs/linux-2.6/xfs_iops.c	2006-07-05 08:34:56.000000000 -0400
@@ -615,7 +615,8 @@ STATIC int
 linvfs_permission(
 	struct inode	*inode,
 	int		mode,
-	struct nameidata *nd)
+	struct nameidata *nd,
+	struct exec_perm *perm)
 {
 	vnode_t		*vp = LINVFS_GET_VP(inode);
 	int		error;
@@ -673,8 +674,7 @@ linvfs_setattr(
 	if (ia_valid & ATTR_ATIME) {
 		vattr.va_mask |= XFS_AT_ATIME;
 		vattr.va_atime = attr->ia_atime;
-		if (ia_valid & ATTR_ATIME_SET)
-			inode->i_atime = attr->ia_atime;
+		inode->i_atime = attr->ia_atime;
 	}
 	if (ia_valid & ATTR_MTIME) {
 		vattr.va_mask |= XFS_AT_MTIME;
diff -uprN linux-2.6.16/include/asm-arm26/tlbflush.h linux-2.6.16.ovz/include/asm-arm26/tlbflush.h
--- linux-2.6.16/include/asm-arm26/tlbflush.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-arm26/tlbflush.h	2006-07-05 08:34:56.000000000 -0400
@@ -25,7 +25,7 @@ static inline void memc_update_all(void)
 {
 	struct task_struct *p;
 	cpu_memc_update_all(init_mm.pgd);
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (!p->mm)
 			continue;
 		cpu_memc_update_all(p->mm->pgd);
diff -uprN linux-2.6.16/include/asm-generic/atomic.h linux-2.6.16.ovz/include/asm-generic/atomic.h
--- linux-2.6.16/include/asm-generic/atomic.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-generic/atomic.h	2006-07-05 08:34:56.000000000 -0400
@@ -66,6 +66,13 @@ static inline void atomic_long_sub(long 
 	atomic64_sub(i, v);
 }
 
+static inline int atomic_long_add_negative(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return atomic64_add_negative(i, v);
+}
+
 #else
 
 typedef atomic_t atomic_long_t;
@@ -113,5 +120,12 @@ static inline void atomic_long_sub(long 
 	atomic_sub(i, v);
 }
 
+static inline int atomic_long_add_negative(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return atomic_add_negative(i, v);
+}
+
 #endif
 #endif
diff -uprN linux-2.6.16/include/asm-generic/pgtable.h linux-2.6.16.ovz/include/asm-generic/pgtable.h
--- linux-2.6.16/include/asm-generic/pgtable.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-generic/pgtable.h	2006-07-05 08:34:56.000000000 -0400
@@ -159,17 +159,8 @@ static inline void ptep_set_wrprotect(st
 #define lazy_mmu_prot_update(pte)	do { } while (0)
 #endif
 
-#ifndef __HAVE_ARCH_MULTIPLE_ZERO_PAGE
+#ifndef __HAVE_ARCH_MOVE_PTE
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
-#else
-#define move_pte(pte, prot, old_addr, new_addr)				\
-({									\
- 	pte_t newpte = (pte);						\
-	if (pte_present(pte) && pfn_valid(pte_pfn(pte)) &&		\
-			pte_page(pte) == ZERO_PAGE(old_addr))		\
-		newpte = mk_pte(ZERO_PAGE(new_addr), (prot));		\
-	newpte;								\
-})
 #endif
 
 /*
diff -uprN linux-2.6.16/include/asm-i386/bug.h linux-2.6.16.ovz/include/asm-i386/bug.h
--- linux-2.6.16/include/asm-i386/bug.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/bug.h	2006-07-05 08:34:56.000000000 -0400
@@ -14,7 +14,10 @@
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 #define BUG()				\
  __asm__ __volatile__(	"ud2\n"		\
+			"\t.byte 0x66\n"\
+			"\t.byte 0xb8\n" /* mov $xxx, %ax */\
 			"\t.word %c0\n"	\
+			"\t.byte 0xb8\n" /* mov $xxx, %eax */\
 			"\t.long %c1\n"	\
 			 : : "i" (__LINE__), "i" (__FILE__))
 #else
diff -uprN linux-2.6.16/include/asm-i386/cpufeature.h linux-2.6.16.ovz/include/asm-i386/cpufeature.h
--- linux-2.6.16/include/asm-i386/cpufeature.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/cpufeature.h	2006-07-05 08:34:56.000000000 -0400
@@ -70,6 +70,7 @@
 #define X86_FEATURE_P3		(3*32+ 6) /* P3 */
 #define X86_FEATURE_P4		(3*32+ 7) /* P4 */
 #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
diff -uprN linux-2.6.16/include/asm-i386/elf.h linux-2.6.16.ovz/include/asm-i386/elf.h
--- linux-2.6.16/include/asm-i386/elf.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/elf.h	2006-07-05 08:34:56.000000000 -0400
@@ -108,7 +108,7 @@ typedef struct user_fxsr_struct elf_fpxr
    For the moment, we have only optimizations for the Intel generations,
    but that could change... */
 
-#define ELF_PLATFORM  (system_utsname.machine)
+#define ELF_PLATFORM  (ve_utsname.machine)
 
 #ifdef __KERNEL__
 #define SET_PERSONALITY(ex, ibcs2) do { } while (0)
@@ -136,8 +136,10 @@ extern void __kernel_vsyscall;
 
 #define ARCH_DLINFO						\
 do {								\
+	if (sysctl_at_vsyscall) {				\
 		NEW_AUX_ENT(AT_SYSINFO,	VSYSCALL_ENTRY);	\
 		NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE);	\
+	}							\
 } while (0)
 
 /*
diff -uprN linux-2.6.16/include/asm-i386/i387.h linux-2.6.16.ovz/include/asm-i386/i387.h
--- linux-2.6.16/include/asm-i386/i387.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/i387.h	2006-07-05 08:34:56.000000000 -0400
@@ -13,6 +13,7 @@
 
 #include <linux/sched.h>
 #include <linux/init.h>
+#include <linux/kernel_stat.h>
 #include <asm/processor.h>
 #include <asm/sigcontext.h>
 #include <asm/user.h>
@@ -38,17 +39,38 @@ extern void init_fpu(struct task_struct 
 extern void kernel_fpu_begin(void);
 #define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
 
+/* We need a safe address that is cheap to find and that is already
+   in L1 during context switch. The best choices are unfortunately
+   different for UP and SMP */
+#ifdef CONFIG_SMP
+#define safe_address (__per_cpu_offset[0])
+#else
+#define safe_address (kstat_cpu(0).cpustat.user)
+#endif
+
 /*
  * These must be called with preempt disabled
  */
 static inline void __save_init_fpu( struct task_struct *tsk )
 {
+	/* Use more nops than strictly needed in case the compiler
+	   varies code */
 	alternative_input(
-		"fnsave %1 ; fwait ;" GENERIC_NOP2,
-		"fxsave %1 ; fnclex",
+		"fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
+		"fxsave %[fx]\n"
+		"bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
 		X86_FEATURE_FXSR,
-		"m" (tsk->thread.i387.fxsave)
-		:"memory");
+		[fx] "m" (tsk->thread.i387.fxsave),
+		[fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
+	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+	   is pending.  Clear the x87 state here by setting it to fixed
+   	   values. safe_address is a random variable that should be in L1 */
+	alternative_input(
+		GENERIC_NOP8 GENERIC_NOP2,
+		"emms\n\t"	  	/* clear stack tags */
+		"fildl %[addr]", 	/* set F?P to defined value */
+		X86_FEATURE_FXSAVE_LEAK,
+		[addr] "m" (safe_address));
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
diff -uprN linux-2.6.16/include/asm-i386/mman.h linux-2.6.16.ovz/include/asm-i386/mman.h
--- linux-2.6.16/include/asm-i386/mman.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/mman.h	2006-07-05 08:34:56.000000000 -0400
@@ -10,6 +10,7 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO	0x20000		/* do soft ubc charge */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff -uprN linux-2.6.16/include/asm-i386/nmi.h linux-2.6.16.ovz/include/asm-i386/nmi.h
--- linux-2.6.16/include/asm-i386/nmi.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/nmi.h	2006-07-05 08:34:56.000000000 -0400
@@ -17,6 +17,7 @@ typedef int (*nmi_callback_t)(struct pt_
  * set. Return 1 if the NMI was handled.
  */
 void set_nmi_callback(nmi_callback_t callback);
+void set_nmi_ipi_callback(nmi_callback_t callback);
  
 /** 
  * unset_nmi_callback
@@ -24,5 +25,6 @@ void set_nmi_callback(nmi_callback_t cal
  * Remove the handler previously set.
  */
 void unset_nmi_callback(void);
+void unset_nmi_ipi_callback(void);
  
 #endif /* ASM_NMI_H */
diff -uprN linux-2.6.16/include/asm-i386/pgtable-2level.h linux-2.6.16.ovz/include/asm-i386/pgtable-2level.h
--- linux-2.6.16/include/asm-i386/pgtable-2level.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/pgtable-2level.h	2006-07-05 08:34:56.000000000 -0400
@@ -18,6 +18,9 @@
 #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
 #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
 
+#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
+#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
+
 #define ptep_get_and_clear(mm,addr,xp)	__pte(xchg(&(xp)->pte_low, 0))
 #define pte_same(a, b)		((a).pte_low == (b).pte_low)
 #define pte_page(x)		pfn_to_page(pte_pfn(x))
diff -uprN linux-2.6.16/include/asm-i386/pgtable-3level.h linux-2.6.16.ovz/include/asm-i386/pgtable-3level.h
--- linux-2.6.16/include/asm-i386/pgtable-3level.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/pgtable-3level.h	2006-07-05 08:34:56.000000000 -0400
@@ -85,6 +85,26 @@ static inline void pud_clear (pud_t * pu
 #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
 			pmd_index(address))
 
+/*
+ * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
+ * entry, so clear the bottom half first and enforce ordering with a compiler
+ * barrier.
+ */
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	ptep->pte_low = 0;
+	smp_wmb();
+	ptep->pte_high = 0;
+}
+
+static inline void pmd_clear(pmd_t *pmd)
+{
+	u32 *tmp = (u32 *)pmd;
+	*tmp = 0;
+	smp_wmb();
+	*(tmp + 1) = 0;
+}
+
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	pte_t res;
diff -uprN linux-2.6.16/include/asm-i386/pgtable.h linux-2.6.16.ovz/include/asm-i386/pgtable.h
--- linux-2.6.16/include/asm-i386/pgtable.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/pgtable.h	2006-07-05 08:34:56.000000000 -0400
@@ -204,12 +204,10 @@ extern unsigned long long __PAGE_KERNEL,
 extern unsigned long pg0[];
 
 #define pte_present(x)	((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
 
 /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
 #define pmd_none(x)	(!(unsigned long)pmd_val(x))
 #define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
-#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
 #define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
 
 
@@ -269,7 +267,7 @@ static inline pte_t ptep_get_and_clear_f
 	pte_t pte;
 	if (full) {
 		pte = *ptep;
-		*ptep = __pte(0);
+		pte_clear(mm, addr, ptep);
 	} else {
 		pte = ptep_get_and_clear(mm, addr, ptep);
 	}
diff -uprN linux-2.6.16/include/asm-i386/thread_info.h linux-2.6.16.ovz/include/asm-i386/thread_info.h
--- linux-2.6.16/include/asm-i386/thread_info.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/thread_info.h	2006-07-05 08:34:56.000000000 -0400
@@ -101,13 +101,13 @@ register unsigned long current_stack_poi
 	({							\
 		struct thread_info *ret;			\
 								\
-		ret = kmalloc(THREAD_SIZE, GFP_KERNEL);		\
+		ret = kmalloc(THREAD_SIZE, GFP_KERNEL_UBC);	\
 		if (ret)					\
 			memset(ret, 0, THREAD_SIZE);		\
 		ret;						\
 	})
 #else
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL_UBC)
 #endif
 
 #define free_thread_info(info)	kfree(info)
@@ -142,7 +142,8 @@ register unsigned long current_stack_poi
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
-#define TIF_MEMDIE		17
+#define TIF_FREEZE		17	/* Freeze request, atomic version of PF_FREEZE */
+#define TIF_MEMDIE		18
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
diff -uprN linux-2.6.16/include/asm-i386/timex.h linux-2.6.16.ovz/include/asm-i386/timex.h
--- linux-2.6.16/include/asm-i386/timex.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/timex.h	2006-07-05 08:34:56.000000000 -0400
@@ -36,13 +36,17 @@ static inline cycles_t get_cycles (void)
 {
 	unsigned long long ret=0;
 
-#ifndef CONFIG_X86_TSC
-	if (!cpu_has_tsc)
-		return 0;
-#endif
-
 #if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
 	rdtscll(ret);
+#elif defined(CONFIG_VE)
+	/*
+	 * get_cycles is used in the following calculations:
+	 * - VPS idle and iowait times in kernel/shced.h
+	 * - task's sleep time to be shown with SyRq-t
+	 * - kstat latencies in linux/vzstat.h
+	 * - sched latency via wakeup_stamp in linux/ve_task.h
+	 */
+#warning "some of VPS statistics won't be correct without get_cycles() (kstat_lat, ve_idle, etc)"
 #endif
 	return ret;
 }
diff -uprN linux-2.6.16/include/asm-i386/unistd.h linux-2.6.16.ovz/include/asm-i386/unistd.h
--- linux-2.6.16/include/asm-i386/unistd.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-i386/unistd.h	2006-07-05 08:34:56.000000000 -0400
@@ -316,8 +316,16 @@
 #define __NR_pselect6		308
 #define __NR_ppoll		309
 #define __NR_unshare		310
-
-#define NR_syscalls 311
+#define __NR_fairsched_mknod	500     /* FairScheduler syscalls */
+#define __NR_fairsched_rmnod	501
+#define __NR_fairsched_chwt	502
+#define __NR_fairsched_mvpr	503
+#define __NR_fairsched_rate	504
+#define __NR_getluid		510
+#define __NR_setluid		511
+#define __NR_setublimit		512
+#define __NR_ubstat		513
+#define NR_syscalls		513
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
diff -uprN linux-2.6.16/include/asm-ia64/mman.h linux-2.6.16.ovz/include/asm-ia64/mman.h
--- linux-2.6.16/include/asm-ia64/mman.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-ia64/mman.h	2006-07-05 08:34:56.000000000 -0400
@@ -18,6 +18,7 @@
 #define MAP_NORESERVE	0x04000		/* don't check for reservations */
 #define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO	0x20000		/* soft ubc charge */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff -uprN linux-2.6.16/include/asm-ia64/pgalloc.h linux-2.6.16.ovz/include/asm-ia64/pgalloc.h
--- linux-2.6.16/include/asm-ia64/pgalloc.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-ia64/pgalloc.h	2006-07-05 08:34:56.000000000 -0400
@@ -20,6 +20,8 @@
 #include <linux/page-flags.h>
 #include <linux/threads.h>
 
+#include <ub/ub_mem.h>
+
 #include <asm/mmu_context.h>
 
 DECLARE_PER_CPU(unsigned long *, __pgtable_quicklist);
@@ -38,7 +40,7 @@ static inline long pgtable_quicklist_tot
 	return ql_size;
 }
 
-static inline void *pgtable_quicklist_alloc(void)
+static inline void *pgtable_quicklist_alloc(int charge)
 {
 	unsigned long *ret = NULL;
 
@@ -46,13 +48,19 @@ static inline void *pgtable_quicklist_al
 
 	ret = pgtable_quicklist;
 	if (likely(ret != NULL)) {
+		if (ub_page_charge(virt_to_page(ret), 0, 
+					charge ? __GFP_UBC|__GFP_SOFT_UBC : 0))
+			goto out;
+
 		pgtable_quicklist = (unsigned long *)(*ret);
 		ret[0] = 0;
 		--pgtable_quicklist_size;
+out:
 		preempt_enable();
 	} else {
 		preempt_enable();
-		ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+		ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO |
+				(charge ? __GFP_UBC | __GFP_SOFT_UBC : 0));
 	}
 
 	return ret;
@@ -70,6 +78,7 @@ static inline void pgtable_quicklist_fre
 #endif
 
 	preempt_disable();
+	ub_page_uncharge(virt_to_page(pgtable_entry), 0);
 	*(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist;
 	pgtable_quicklist = (unsigned long *)pgtable_entry;
 	++pgtable_quicklist_size;
@@ -78,7 +87,7 @@ static inline void pgtable_quicklist_fre
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(1);
 }
 
 static inline void pgd_free(pgd_t * pgd)
@@ -95,7 +104,7 @@ pgd_populate(struct mm_struct *mm, pgd_t
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(1);
 }
 
 static inline void pud_free(pud_t * pud)
@@ -113,7 +122,7 @@ pud_populate(struct mm_struct *mm, pud_t
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(1);
 }
 
 static inline void pmd_free(pmd_t * pmd)
@@ -138,13 +147,13 @@ pmd_populate_kernel(struct mm_struct *mm
 static inline struct page *pte_alloc_one(struct mm_struct *mm,
 					 unsigned long addr)
 {
-	return virt_to_page(pgtable_quicklist_alloc());
+	return virt_to_page(pgtable_quicklist_alloc(1));
 }
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long addr)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(0);
 }
 
 static inline void pte_free(struct page *pte)
diff -uprN linux-2.6.16/include/asm-ia64/processor.h linux-2.6.16.ovz/include/asm-ia64/processor.h
--- linux-2.6.16/include/asm-ia64/processor.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-ia64/processor.h	2006-07-05 08:34:56.000000000 -0400
@@ -306,7 +306,7 @@ struct thread_struct {
 	regs->loadrs = 0;									\
 	regs->r8 = current->mm->dumpable;	/* set "don't zap registers" flag */		\
 	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
-	if (unlikely(!current->mm->dumpable)) {							\
+	if (unlikely(!current->mm->dumpable || !current->mm->vps_dumpable)) {			\
 		/*										\
 		 * Zap scratch regs to avoid leaking bits between processes with different	\
 		 * uid/privileges.								\
diff -uprN linux-2.6.16/include/asm-ia64/thread_info.h linux-2.6.16.ovz/include/asm-ia64/thread_info.h
--- linux-2.6.16/include/asm-ia64/thread_info.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-ia64/thread_info.h	2006-07-05 08:34:56.000000000 -0400
@@ -94,6 +94,7 @@ struct thread_info {
 #define TIF_MEMDIE		17
 #define TIF_MCA_INIT		18	/* this task is processing MCA or INIT */
 #define TIF_DB_DISABLED		19	/* debug trap disabled for fsyscall */
+#define TIF_FREEZE		20	/* Freeze request, atomic version of PF_FREEZE */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
diff -uprN linux-2.6.16/include/asm-ia64/unistd.h linux-2.6.16.ovz/include/asm-ia64/unistd.h
--- linux-2.6.16/include/asm-ia64/unistd.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-ia64/unistd.h	2006-07-05 08:34:56.000000000 -0400
@@ -285,12 +285,22 @@
 #define __NR_faccessat			1293
 /* 1294, 1295 reserved for pselect/ppoll */
 #define __NR_unshare			1296
+#define __NR_fairsched_mknod		1500
+#define __NR_fairsched_rmnod		1501
+#define __NR_fairsched_chwt		1502
+#define __NR_fairsched_mvpr		1503
+#define __NR_fairsched_rate		1504
+#define __NR_getluid			1505
+#define __NR_setluid			1506
+#define __NR_setublimit			1507
+#define __NR_ubstat			1508
 
 #ifdef __KERNEL__
 
 #include <linux/config.h>
 
-#define NR_syscalls			273 /* length of syscall table */
+/* length of syscall table */
+#define NR_syscalls (__NR_ubstat - __NR_ni_syscall + 1)
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
diff -uprN linux-2.6.16/include/asm-m32r/smp.h linux-2.6.16.ovz/include/asm-m32r/smp.h
--- linux-2.6.16/include/asm-m32r/smp.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-m32r/smp.h	2006-07-05 08:34:56.000000000 -0400
@@ -67,7 +67,8 @@ extern volatile int cpu_2_physid[NR_CPUS
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 extern cpumask_t cpu_callout_map;
-#define cpu_possible_map cpu_callout_map
+extern cpumask_t cpu_possible_map;
+extern cpumask_t cpu_present_map;
 
 static __inline__ int hard_smp_processor_id(void)
 {
diff -uprN linux-2.6.16/include/asm-m32r/uaccess.h linux-2.6.16.ovz/include/asm-m32r/uaccess.h
--- linux-2.6.16/include/asm-m32r/uaccess.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-m32r/uaccess.h	2006-07-05 08:34:56.000000000 -0400
@@ -5,17 +5,9 @@
  *  linux/include/asm-m32r/uaccess.h
  *
  *  M32R version.
- *    Copyright (C) 2004  Hirokazu Takata <takata at linux-m32r.org>
+ *    Copyright (C) 2004, 2006  Hirokazu Takata <takata at linux-m32r.org>
  */
 
-#undef UACCESS_DEBUG
-
-#ifdef UACCESS_DEBUG
-#define UAPRINTK(args...) printk(args)
-#else
-#define UAPRINTK(args...)
-#endif /* UACCESS_DEBUG */
-
 /*
  * User space memory access functions
  */
@@ -38,27 +30,29 @@
 #define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
 
 #ifdef CONFIG_MMU
+
 #define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFF)
 #define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
-#else
-#define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFF)
-#define USER_DS		MAKE_MM_SEG(0xFFFFFFFF)
-#endif /* CONFIG_MMU */
-
 #define get_ds()	(KERNEL_DS)
-#ifdef CONFIG_MMU
 #define get_fs()	(current_thread_info()->addr_limit)
 #define set_fs(x)	(current_thread_info()->addr_limit = (x))
-#else
+
+#else /* not CONFIG_MMU */
+
+#define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFF)
+#define USER_DS		MAKE_MM_SEG(0xFFFFFFFF)
+#define get_ds()	(KERNEL_DS)
+
 static inline mm_segment_t get_fs(void)
 {
-  return USER_DS;
+	return USER_DS;
 }
 
 static inline void set_fs(mm_segment_t s)
 {
 }
-#endif /* CONFIG_MMU */
+
+#endif /* not CONFIG_MMU */
 
 #define segment_eq(a,b)	((a).seg == (b).seg)
 
@@ -83,9 +77,9 @@ static inline void set_fs(mm_segment_t s
 		"	subx	%0, %0\n"				\
 		"	cmpu	%4, %1\n"				\
 		"	subx	%0, %5\n"				\
-		: "=&r"(flag), "=r"(sum)				\
-		: "1"(addr), "r"((int)(size)), 				\
-		  "r"(current_thread_info()->addr_limit.seg), "r"(0)	\
+		: "=&r" (flag), "=r" (sum)				\
+		: "1" (addr), "r" ((int)(size)), 			\
+		  "r" (current_thread_info()->addr_limit.seg), "r" (0)	\
 		: "cbit" );						\
 	flag; })
 
@@ -113,10 +107,10 @@ static inline void set_fs(mm_segment_t s
 #else
 static inline int access_ok(int type, const void *addr, unsigned long size)
 {
-  extern unsigned long memory_start, memory_end;
-  unsigned long val = (unsigned long)addr;
+	extern unsigned long memory_start, memory_end;
+	unsigned long val = (unsigned long)addr;
 
-  return ((val >= memory_start) && ((val + size) < memory_end));
+	return ((val >= memory_start) && ((val + size) < memory_end));
 }
 #endif /* CONFIG_MMU */
 
@@ -155,39 +149,6 @@ extern int fixup_exception(struct pt_reg
  * accesses to the same area of user memory).
  */
 
-extern void __get_user_1(void);
-extern void __get_user_2(void);
-extern void __get_user_4(void);
-
-#ifndef MODULE
-#define __get_user_x(size,ret,x,ptr) 					\
-	__asm__ __volatile__(						\
-		"	mv	r0, %0\n"				\
-		"	mv	r1, %1\n" 				\
-		"	bl __get_user_" #size "\n"			\
-		"	mv	%0, r0\n"				\
-		"	mv	%1, r1\n" 				\
-		: "=r"(ret), "=r"(x) 					\
-		: "0"(ptr)						\
-		: "r0", "r1", "r14" )
-#else /* MODULE */
-/*
- * Use "jl" instead of "bl" for MODULE
- */
-#define __get_user_x(size,ret,x,ptr) 					\
-	__asm__ __volatile__(						\
-		"	mv	r0, %0\n"				\
-		"	mv	r1, %1\n" 				\
-		"	seth	lr, #high(__get_user_" #size ")\n"	\
-		"	or3	lr, lr, #low(__get_user_" #size ")\n"	\
-		"	jl 	lr\n"					\
-		"	mv	%0, r0\n"				\
-		"	mv	%1, r1\n" 				\
-		: "=r"(ret), "=r"(x) 					\
-		: "0"(ptr)						\
-		: "r0", "r1", "r14" )
-#endif
-
 /* Careful: we have to cast the result to the type of the pointer for sign
    reasons */
 /**
@@ -208,20 +169,7 @@ extern void __get_user_4(void);
  * On error, the variable @x is set to zero.
  */
 #define get_user(x,ptr)							\
-({	int __ret_gu;							\
-	unsigned long __val_gu;						\
-	__chk_user_ptr(ptr);						\
-	switch(sizeof (*(ptr))) {					\
-	case 1:  __get_user_x(1,__ret_gu,__val_gu,ptr); break;		\
-	case 2:  __get_user_x(2,__ret_gu,__val_gu,ptr); break;		\
-	case 4:  __get_user_x(4,__ret_gu,__val_gu,ptr); break;		\
-	default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;		\
-	}								\
-	(x) = (__typeof__(*(ptr)))__val_gu;				\
-	__ret_gu;							\
-})
-
-extern void __put_user_bad(void);
+	__get_user_check((x),(ptr),sizeof(*(ptr)))
 
 /**
  * put_user: - Write a simple value into user space.
@@ -240,8 +188,7 @@ extern void __put_user_bad(void);
  * Returns zero on success, or -EFAULT on error.
  */
 #define put_user(x,ptr)							\
-  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+	__put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
 
 /**
  * __get_user: - Get a simple variable from user space, with less checking.
@@ -264,8 +211,64 @@ extern void __put_user_bad(void);
  * On error, the variable @x is set to zero.
  */
 #define __get_user(x,ptr) \
-  __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+	__get_user_nocheck((x),(ptr),sizeof(*(ptr)))
 
+#define __get_user_nocheck(x,ptr,size)					\
+({									\
+	long __gu_err = 0;						\
+	unsigned long __gu_val;						\
+	might_sleep();							\
+	__get_user_size(__gu_val,(ptr),(size),__gu_err);		\
+	(x) = (__typeof__(*(ptr)))__gu_val;				\
+	__gu_err;							\
+})
+
+#define __get_user_check(x,ptr,size)					\
+({									\
+	long __gu_err = -EFAULT;					\
+	unsigned long __gu_val = 0;					\
+	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
+	might_sleep();							\
+	if (access_ok(VERIFY_READ,__gu_addr,size))			\
+		__get_user_size(__gu_val,__gu_addr,(size),__gu_err);	\
+	(x) = (__typeof__(*(ptr)))__gu_val;				\
+	__gu_err;							\
+})
+
+extern long __get_user_bad(void);
+
+#define __get_user_size(x,ptr,size,retval)				\
+do {									\
+	retval = 0;							\
+	__chk_user_ptr(ptr);						\
+	switch (size) {							\
+	  case 1: __get_user_asm(x,ptr,retval,"ub"); break;		\
+	  case 2: __get_user_asm(x,ptr,retval,"uh"); break;		\
+	  case 4: __get_user_asm(x,ptr,retval,""); break;		\
+	  default: (x) = __get_user_bad();				\
+	}								\
+} while (0)
+
+#define __get_user_asm(x, addr, err, itype)				\
+	__asm__ __volatile__(						\
+		"	.fillinsn\n"					\
+		"1:	ld"itype" %1,@%2\n"				\
+		"	.fillinsn\n"					\
+		"2:\n"							\
+		".section .fixup,\"ax\"\n"				\
+		"	.balign 4\n"					\
+		"3:	ldi %0,%3\n"					\
+		"	seth r14,#high(2b)\n"				\
+		"	or3 r14,r14,#low(2b)\n"				\
+		"	jmp r14\n"					\
+		".previous\n"						\
+		".section __ex_table,\"a\"\n"				\
+		"	.balign 4\n"					\
+		"	.long 1b,3b\n"					\
+		".previous"						\
+		: "=&r" (err), "=&r" (x)				\
+		: "r" (addr), "i" (-EFAULT), "0" (err)			\
+		: "r14", "memory")
 
 /**
  * __put_user: - Write a simple value into user space, with less checking.
@@ -287,11 +290,13 @@ extern void __put_user_bad(void);
  * Returns zero on success, or -EFAULT on error.
  */
 #define __put_user(x,ptr) \
-  __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+	__put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
 
 #define __put_user_nocheck(x,ptr,size)					\
 ({									\
 	long __pu_err;							\
+	might_sleep();							\
 	__put_user_size((x),(ptr),(size),__pu_err);			\
 	__pu_err;							\
 })
@@ -308,28 +313,28 @@ extern void __put_user_bad(void);
 })
 
 #if defined(__LITTLE_ENDIAN__)
-#define __put_user_u64(x, addr, err)                                    \
-        __asm__ __volatile__(                                           \
-                "       .fillinsn\n"                                    \
-                "1:     st %L1,@%2\n"                                    \
-                "       .fillinsn\n"                                    \
-                "2:     st %H1,@(4,%2)\n"                                \
-                "       .fillinsn\n"                                    \
-                "3:\n"                                                  \
-                ".section .fixup,\"ax\"\n"                              \
-                "       .balign 4\n"                                    \
-                "4:     ldi %0,%3\n"                                    \
-                "       seth r14,#high(3b)\n"                           \
-                "       or3 r14,r14,#low(3b)\n"                         \
-                "       jmp r14\n"                                      \
-                ".previous\n"                                           \
-                ".section __ex_table,\"a\"\n"                           \
-                "       .balign 4\n"                                    \
-                "       .long 1b,4b\n"                                  \
-                "       .long 2b,4b\n"                                  \
-                ".previous"                                             \
-                : "=&r"(err)                                             \
-                : "r"(x), "r"(addr), "i"(-EFAULT), "0"(err)		\
+#define __put_user_u64(x, addr, err)					\
+        __asm__ __volatile__(						\
+                "       .fillinsn\n"					\
+                "1:     st %L1,@%2\n"					\
+                "       .fillinsn\n"					\
+                "2:     st %H1,@(4,%2)\n"				\
+                "       .fillinsn\n"					\
+                "3:\n"							\
+                ".section .fixup,\"ax\"\n"				\
+                "       .balign 4\n"					\
+                "4:     ldi %0,%3\n"					\
+                "       seth r14,#high(3b)\n"				\
+                "       or3 r14,r14,#low(3b)\n"				\
+                "       jmp r14\n"					\
+                ".previous\n"						\
+                ".section __ex_table,\"a\"\n"				\
+                "       .balign 4\n"					\
+                "       .long 1b,4b\n"					\
+                "       .long 2b,4b\n"					\
+                ".previous"						\
+                : "=&r" (err)						\
+                : "r" (x), "r" (addr), "i" (-EFAULT), "0" (err)		\
                 : "r14", "memory")
 
 #elif defined(__BIG_ENDIAN__)
@@ -353,13 +358,15 @@ extern void __put_user_bad(void);
 		"	.long 1b,4b\n"					\
 		"	.long 2b,4b\n"					\
 		".previous"						\
-		: "=&r"(err)						\
-		: "r"(x), "r"(addr), "i"(-EFAULT), "0"(err)		\
+		: "=&r" (err)						\
+		: "r" (x), "r" (addr), "i" (-EFAULT), "0" (err)		\
 		: "r14", "memory")
 #else
 #error no endian defined
 #endif
 
+extern void __put_user_bad(void);
+
 #define __put_user_size(x,ptr,size,retval)				\
 do {									\
 	retval = 0;							\
@@ -398,52 +405,8 @@ struct __large_struct { unsigned long bu
 		"	.balign 4\n"					\
 		"	.long 1b,3b\n"					\
 		".previous"						\
-		: "=&r"(err)						\
-		: "r"(x), "r"(addr), "i"(-EFAULT), "0"(err)		\
-		: "r14", "memory")
-
-#define __get_user_nocheck(x,ptr,size)					\
-({									\
-	long __gu_err;							\
-	unsigned long __gu_val;						\
-	__get_user_size(__gu_val,(ptr),(size),__gu_err);		\
-	(x) = (__typeof__(*(ptr)))__gu_val;				\
-	__gu_err;							\
-})
-
-extern long __get_user_bad(void);
-
-#define __get_user_size(x,ptr,size,retval)				\
-do {									\
-	retval = 0;							\
-	__chk_user_ptr(ptr);						\
-	switch (size) {							\
-	  case 1: __get_user_asm(x,ptr,retval,"ub"); break;		\
-	  case 2: __get_user_asm(x,ptr,retval,"uh"); break;		\
-	  case 4: __get_user_asm(x,ptr,retval,""); break;		\
-	  default: (x) = __get_user_bad();				\
-	}								\
-} while (0)
-
-#define __get_user_asm(x, addr, err, itype)				\
-	__asm__ __volatile__(						\
-		"	.fillinsn\n"					\
-		"1:	ld"itype" %1,@%2\n"				\
-		"	.fillinsn\n"					\
-		"2:\n"							\
-		".section .fixup,\"ax\"\n"				\
-		"	.balign 4\n"					\
-		"3:	ldi %0,%3\n"					\
-		"	seth r14,#high(2b)\n"				\
-		"	or3 r14,r14,#low(2b)\n"				\
-		"	jmp r14\n"					\
-		".previous\n"						\
-		".section __ex_table,\"a\"\n"				\
-		"	.balign 4\n"					\
-		"	.long 1b,3b\n"					\
-		".previous"						\
-		: "=&r"(err), "=&r"(x)					\
-		: "r"(addr), "i"(-EFAULT), "0"(err)			\
+		: "=&r" (err)						\
+		: "r" (x), "r" (addr), "i" (-EFAULT), "0" (err)		\
 		: "r14", "memory")
 
 /*
@@ -453,7 +416,6 @@ do {									\
  * anything, so this is accurate.
  */
 
-
 /*
  * Copy To/From Userspace
  */
@@ -511,8 +473,9 @@ do {									\
 		"	.long 2b,9b\n"					\
 		"	.long 3b,9b\n"					\
 		".previous\n"						\
-		: "=&r"(__dst), "=&r"(__src), "=&r"(size), "=&r"(__c)	\
-		: "0"(to), "1"(from), "2"(size), "3"(size / 4)		\
+		: "=&r" (__dst), "=&r" (__src), "=&r" (size),		\
+		  "=&r" (__c)						\
+		: "0" (to), "1" (from), "2" (size), "3" (size / 4)	\
 		: "r14", "memory");					\
 } while (0)
 
@@ -573,8 +536,9 @@ do {									\
 		"	.long 2b,7b\n"					\
 		"	.long 3b,7b\n"					\
 		".previous\n"						\
-		: "=&r"(__dst), "=&r"(__src), "=&r"(size), "=&r"(__c)	\
-		: "0"(to), "1"(from), "2"(size), "3"(size / 4)		\
+		: "=&r" (__dst), "=&r" (__src), "=&r" (size),		\
+		  "=&r" (__c)						\
+		: "0" (to), "1" (from), "2" (size), "3" (size / 4)	\
 		: "r14", "memory");					\
 } while (0)
 
@@ -676,7 +640,7 @@ unsigned long __generic_copy_from_user(v
 #define copy_from_user(to,from,n)			\
 ({							\
 	might_sleep();					\
-__generic_copy_from_user((to),(from),(n));	\
+	__generic_copy_from_user((to),(from),(n));	\
 })
 
 long __must_check strncpy_from_user(char *dst, const char __user *src,
diff -uprN linux-2.6.16/include/asm-mips/bitops.h linux-2.6.16.ovz/include/asm-mips/bitops.h
--- linux-2.6.16/include/asm-mips/bitops.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-mips/bitops.h	2006-07-05 08:34:56.000000000 -0400
@@ -654,7 +654,12 @@ static inline unsigned long fls(unsigned
 {
 #ifdef CONFIG_32BIT
 #ifdef CONFIG_CPU_MIPS32
-	__asm__ ("clz %0, %1" : "=r" (word) : "r" (word));
+	__asm__ (
+	"	.set	mips32					\n"
+	"	clz	%0, %1					\n"
+	"	.set	mips0					\n"
+	: "=r" (word)
+	: "r" (word));
 
 	return 32 - word;
 #else
@@ -678,7 +683,12 @@ static inline unsigned long fls(unsigned
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_CPU_MIPS64
 
-	__asm__ ("dclz %0, %1" : "=r" (word) : "r" (word));
+	__asm__ (
+	"	.set	mips64					\n"
+	"	dclz	%0, %1					\n"
+	"	.set	mips0					\n"
+	: "=r" (word)
+	: "r" (word));
 
 	return 64 - word;
 #else
diff -uprN linux-2.6.16/include/asm-mips/byteorder.h linux-2.6.16.ovz/include/asm-mips/byteorder.h
--- linux-2.6.16/include/asm-mips/byteorder.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-mips/byteorder.h	2006-07-05 08:34:56.000000000 -0400
@@ -19,7 +19,9 @@
 static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 x)
 {
 	__asm__(
+	"	.set	mips32r2		\n"
 	"	wsbh	%0, %1			\n"
+	"	.set	mips0			\n"
 	: "=r" (x)
 	: "r" (x));
 
@@ -30,8 +32,10 @@ static __inline__ __attribute_const__ __
 static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 x)
 {
 	__asm__(
+	"	.set	mips32r2		\n"
 	"	wsbh	%0, %1			\n"
 	"	rotr	%0, %0, 16		\n"
+	"	.set	mips0			\n"
 	: "=r" (x)
 	: "r" (x));
 
diff -uprN linux-2.6.16/include/asm-mips/interrupt.h linux-2.6.16.ovz/include/asm-mips/interrupt.h
--- linux-2.6.16/include/asm-mips/interrupt.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-mips/interrupt.h	2006-07-05 08:34:56.000000000 -0400
@@ -20,7 +20,9 @@ __asm__ (
 	"	.set	reorder						\n"
 	"	.set	noat						\n"
 #ifdef CONFIG_CPU_MIPSR2
+	"	.set	mips32r2					\n"
 	"	ei							\n"
+	"	.set	mips0						\n"
 #else
 	"	mfc0	$1,$12						\n"
 	"	ori	$1,0x1f						\n"
@@ -63,7 +65,9 @@ __asm__ (
 	"	.set	push						\n"
 	"	.set	noat						\n"
 #ifdef CONFIG_CPU_MIPSR2
+	"	.set	mips32r2					\n"
 	"	di							\n"
+	"	.set	mips0						\n"
 #else
 	"	mfc0	$1,$12						\n"
 	"	ori	$1,0x1f						\n"
@@ -103,8 +107,10 @@ __asm__ (
 	"	.set	reorder						\n"
 	"	.set	noat						\n"
 #ifdef CONFIG_CPU_MIPSR2
+	"	.set	mips32r2					\n"
 	"	di	\\result					\n"
 	"	andi	\\result, 1					\n"
+	"	.set	mips0						\n"
 #else
 	"	mfc0	\\result, $12					\n"
 	"	ori	$1, \\result, 0x1f				\n"
@@ -133,9 +139,11 @@ __asm__ (
 	 * Slow, but doesn't suffer from a relativly unlikely race
 	 * condition we're having since days 1.
 	 */
+	"	.set	mips32r2					\n"
 	"	beqz	\\flags, 1f					\n"
 	"	 di							\n"
 	"	ei							\n"
+	"	.set	mips0						\n"
 	"1:								\n"
 #elif defined(CONFIG_CPU_MIPSR2)
 	/*
diff -uprN linux-2.6.16/include/asm-mips/pgtable.h linux-2.6.16.ovz/include/asm-mips/pgtable.h
--- linux-2.6.16/include/asm-mips/pgtable.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-mips/pgtable.h	2006-07-05 08:34:56.000000000 -0400
@@ -70,7 +70,15 @@ extern unsigned long zero_page_mask;
 #define ZERO_PAGE(vaddr) \
 	(virt_to_page(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))
 
-#define __HAVE_ARCH_MULTIPLE_ZERO_PAGE
+#define __HAVE_ARCH_MOVE_PTE
+#define move_pte(pte, prot, old_addr, new_addr)				\
+({									\
+ 	pte_t newpte = (pte);						\
+	if (pte_present(pte) && pfn_valid(pte_pfn(pte)) &&		\
+			pte_page(pte) == ZERO_PAGE(old_addr))		\
+		newpte = mk_pte(ZERO_PAGE(new_addr), (prot));		\
+	newpte;								\
+})
 
 extern void paging_init(void);
 
diff -uprN linux-2.6.16/include/asm-mips/r4kcache.h linux-2.6.16.ovz/include/asm-mips/r4kcache.h
--- linux-2.6.16/include/asm-mips/r4kcache.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-mips/r4kcache.h	2006-07-05 08:34:56.000000000 -0400
@@ -37,7 +37,7 @@
 	"	cache	%0, %1					\n"	\
 	"	.set	pop					\n"	\
 	:								\
-	: "i" (op), "m" (*(unsigned char *)(addr)))
+	: "i" (op), "R" (*(unsigned char *)(addr)))
 
 static inline void flush_icache_line_indexed(unsigned long addr)
 {
diff -uprN linux-2.6.16/include/asm-powerpc/floppy.h linux-2.6.16.ovz/include/asm-powerpc/floppy.h
--- linux-2.6.16/include/asm-powerpc/floppy.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-powerpc/floppy.h	2006-07-05 08:34:56.000000000 -0400
@@ -35,6 +35,7 @@
 #ifdef CONFIG_PCI
 
 #include <linux/pci.h>
+#include <asm/ppc-pci.h>	/* for ppc64_isabridge_dev */
 
 #define fd_dma_setup(addr,size,mode,io) powerpc_fd_dma_setup(addr,size,mode,io)
 
@@ -52,12 +53,12 @@ static __inline__ int powerpc_fd_dma_set
 	if (bus_addr 
 	    && (addr != prev_addr || size != prev_size || dir != prev_dir)) {
 		/* different from last time -- unmap prev */
-		pci_unmap_single(NULL, bus_addr, prev_size, prev_dir);
+		pci_unmap_single(ppc64_isabridge_dev, bus_addr, prev_size, prev_dir);
 		bus_addr = 0;
 	}
 
 	if (!bus_addr)	/* need to map it */
-		bus_addr = pci_map_single(NULL, addr, size, dir);
+		bus_addr = pci_map_single(ppc64_isabridge_dev, addr, size, dir);
 
 	/* remember this one as prev */
 	prev_addr = addr;
diff -uprN linux-2.6.16/include/asm-powerpc/pgalloc.h linux-2.6.16.ovz/include/asm-powerpc/pgalloc.h
--- linux-2.6.16/include/asm-powerpc/pgalloc.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-powerpc/pgalloc.h	2006-07-05 08:34:56.000000000 -0400
@@ -33,7 +33,8 @@ extern kmem_cache_t *pgtable_cache[];
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
+	return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM],
+			GFP_KERNEL_UBC | __GFP_SOFT_UBC);
 }
 
 static inline void pgd_free(pgd_t *pgd)
@@ -48,7 +49,7 @@ static inline void pgd_free(pgd_t *pgd)
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
-				GFP_KERNEL|__GFP_REPEAT);
+				GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
 }
 
 static inline void pud_free(pud_t *pud)
@@ -84,7 +85,7 @@ static inline void pmd_populate_kernel(s
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
-				GFP_KERNEL|__GFP_REPEAT);
+				GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
 }
 
 static inline void pmd_free(pmd_t *pmd)
@@ -92,17 +93,21 @@ static inline void pmd_free(pmd_t *pmd)
 	kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
 }
 
+static inline pte_t *__pte_alloc(gfp_t flags)
+{
+	return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], flags);
+}
+
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
-				GFP_KERNEL|__GFP_REPEAT);
+	return __pte_alloc(GFP_KERNEL | __GFP_REPEAT);
 }
 
 static inline struct page *pte_alloc_one(struct mm_struct *mm,
 					 unsigned long address)
 {
-	return virt_to_page(pte_alloc_one_kernel(mm, address));
+	return virt_to_page(__pte_alloc(GFP_KERNEL_UBC | __GFP_SOFT_UBC));
 }
 		
 static inline void pte_free_kernel(pte_t *pte)
diff -uprN linux-2.6.16/include/asm-powerpc/unistd.h linux-2.6.16.ovz/include/asm-powerpc/unistd.h
--- linux-2.6.16/include/asm-powerpc/unistd.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-powerpc/unistd.h	2006-07-05 08:34:56.000000000 -0400
@@ -301,8 +301,12 @@
 #define __NR_pselect6		280
 #define __NR_ppoll		281
 #define __NR_unshare		282
-
-#define __NR_syscalls		283
+#define __NR_getluid		410
+#define __NR_setluid		411
+#define __NR_setublimit		412
+#define __NR_ubstat		413
+ 
+#define NR_syscalls 414
 
 #ifdef __KERNEL__
 #define __NR__exit __NR_exit
diff -uprN linux-2.6.16/include/asm-s390/pgalloc.h linux-2.6.16.ovz/include/asm-s390/pgalloc.h
--- linux-2.6.16/include/asm-s390/pgalloc.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-s390/pgalloc.h	2006-07-05 08:34:56.000000000 -0400
@@ -34,12 +34,12 @@ static inline pgd_t *pgd_alloc(struct mm
 	int i;
 
 #ifndef __s390x__
-	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,1);
+	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 1);
         if (pgd != NULL)
 		for (i = 0; i < USER_PTRS_PER_PGD; i++)
 			pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE));
 #else /* __s390x__ */
-	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,2);
+	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 2);
         if (pgd != NULL)
 		for (i = 0; i < PTRS_PER_PGD; i++)
 			pgd_clear(pgd + i);
@@ -72,7 +72,7 @@ static inline pmd_t * pmd_alloc_one(stru
 	pmd_t *pmd;
         int i;
 
-	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, 2);
+	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 2);
 	if (pmd != NULL) {
 		for (i=0; i < PTRS_PER_PMD; i++)
 			pmd_clear(pmd+i);
@@ -118,16 +118,13 @@ pmd_populate(struct mm_struct *mm, pmd_t
 	pmd_populate_kernel(mm, pmd, (pte_t *)((page-mem_map) << PAGE_SHIFT));
 }
 
-/*
- * page table entry allocation/free routines.
- */
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
+static inline pte_t *pte_alloc(struct mm_struct *mm, unsigned long vmaddr,
+		gfp_t mask)
 {
 	pte_t *pte;
         int i;
 
-	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pte = (pte_t *)__get_free_page(mask);
 	if (pte != NULL) {
 		for (i=0; i < PTRS_PER_PTE; i++) {
 			pte_clear(mm, vmaddr, pte+i);
@@ -137,10 +134,20 @@ pte_alloc_one_kernel(struct mm_struct *m
 	return pte;
 }
 
+/*
+ * page table entry allocation/free routines.
+ */
+static inline pte_t *
+pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
+{
+	return pte_alloc(mm, vmaddr, GFP_KERNEL | __GFP_REPEAT);
+}
+
 static inline struct page *
 pte_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, vmaddr);
+	pte_t *pte = pte_alloc(mm, vmaddr, GFP_KERNEL_UBC | __GFP_SOFT_UBC |
+			__GFP_REPEAT);
 	if (pte)
 		return virt_to_page(pte);
 	return 0;
diff -uprN linux-2.6.16/include/asm-sh64/pgalloc.h linux-2.6.16.ovz/include/asm-sh64/pgalloc.h
--- linux-2.6.16/include/asm-sh64/pgalloc.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-sh64/pgalloc.h	2006-07-05 08:34:56.000000000 -0400
@@ -173,7 +173,7 @@ static inline void set_pgdir(unsigned lo
 	pgd_t *pgd;
 
 	read_lock(&tasklist_lock);
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (!p->mm)
 			continue;
 		*pgd_offset(p->mm,address) = entry;
diff -uprN linux-2.6.16/include/asm-sparc64/dma-mapping.h linux-2.6.16.ovz/include/asm-sparc64/dma-mapping.h
--- linux-2.6.16/include/asm-sparc64/dma-mapping.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-sparc64/dma-mapping.h	2006-07-05 08:34:56.000000000 -0400
@@ -4,7 +4,146 @@
 #include <linux/config.h>
 
 #ifdef CONFIG_PCI
-#include <asm-generic/dma-mapping.h>
+
+/* we implement the API below in terms of the existing PCI one,
+ * so include it */
+#include <linux/pci.h>
+/* need struct page definitions */
+#include <linux/mm.h>
+
+static inline int
+dma_supported(struct device *dev, u64 mask)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	return pci_dma_supported(to_pci_dev(dev), mask);
+}
+
+static inline int
+dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
+}
+
+static inline void *
+dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		   gfp_t flag)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	return __pci_alloc_consistent(to_pci_dev(dev), size, dma_handle, flag);
+}
+
+static inline void
+dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+		    dma_addr_t dma_handle)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	pci_free_consistent(to_pci_dev(dev), size, cpu_addr, dma_handle);
+}
+
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+	       enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	return pci_map_single(to_pci_dev(dev), cpu_addr, size, (int)direction);
+}
+
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	pci_unmap_single(to_pci_dev(dev), dma_addr, size, (int)direction);
+}
+
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page,
+	     unsigned long offset, size_t size,
+	     enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	return pci_map_page(to_pci_dev(dev), page, offset, size, (int)direction);
+}
+
+static inline void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+	       enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	pci_unmap_page(to_pci_dev(dev), dma_address, size, (int)direction);
+}
+
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+	   enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	return pci_map_sg(to_pci_dev(dev), sg, nents, (int)direction);
+}
+
+static inline void
+dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+	     enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	pci_unmap_sg(to_pci_dev(dev), sg, nhwentries, (int)direction);
+}
+
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+			enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	pci_dma_sync_single_for_cpu(to_pci_dev(dev), dma_handle,
+				    size, (int)direction);
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+			   enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	pci_dma_sync_single_for_device(to_pci_dev(dev), dma_handle,
+				       size, (int)direction);
+}
+
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+		    enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	pci_dma_sync_sg_for_cpu(to_pci_dev(dev), sg, nelems, (int)direction);
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+		       enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	pci_dma_sync_sg_for_device(to_pci_dev(dev), sg, nelems, (int)direction);
+}
+
+static inline int
+dma_mapping_error(dma_addr_t dma_addr)
+{
+	return pci_dma_mapping_error(dma_addr);
+}
+
 #else
 
 struct device;
diff -uprN linux-2.6.16/include/asm-sparc64/pci.h linux-2.6.16.ovz/include/asm-sparc64/pci.h
--- linux-2.6.16/include/asm-sparc64/pci.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-sparc64/pci.h	2006-07-05 08:34:56.000000000 -0400
@@ -44,7 +44,9 @@ struct pci_dev;
 /* Allocate and map kernel buffer using consistent mode DMA for a device.
  * hwdev should be valid struct pci_dev pointer for PCI devices.
  */
-extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle);
+extern void *__pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t gfp);
+#define pci_alloc_consistent(DEV,SZ,HANDLE) \
+	__pci_alloc_consistent(DEV,SZ,HANDLE,GFP_ATOMIC)
 
 /* Free and unmap a consistent DMA buffer.
  * cpu_addr is what was returned from pci_alloc_consistent,
diff -uprN linux-2.6.16/include/asm-sparc64/pgtable.h linux-2.6.16.ovz/include/asm-sparc64/pgtable.h
--- linux-2.6.16/include/asm-sparc64/pgtable.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-sparc64/pgtable.h	2006-07-05 08:34:56.000000000 -0400
@@ -335,6 +335,23 @@ static inline void set_pte_at(struct mm_
 #define pte_clear(mm,addr,ptep)		\
 	set_pte_at((mm), (addr), (ptep), __pte(0UL))
 
+#ifdef DCACHE_ALIASING_POSSIBLE
+#define __HAVE_ARCH_MOVE_PTE
+#define move_pte(pte, prot, old_addr, new_addr)				\
+({									\
+ 	pte_t newpte = (pte);						\
+	if (pte_present(pte)) {						\
+		unsigned long this_pfn = pte_pfn(pte);			\
+									\
+		if (pfn_valid(this_pfn) &&				\
+		    (((old_addr) ^ (new_addr)) & (1 << 13)))		\
+			flush_dcache_page_all(current->mm,		\
+					      pfn_to_page(this_pfn));	\
+	}								\
+	newpte;								\
+})
+#endif
+
 extern pgd_t swapper_pg_dir[2048];
 extern pmd_t swapper_low_pmd_dir[2048];
 
diff -uprN linux-2.6.16/include/asm-x86_64/cpufeature.h linux-2.6.16.ovz/include/asm-x86_64/cpufeature.h
--- linux-2.6.16/include/asm-x86_64/cpufeature.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-x86_64/cpufeature.h	2006-07-05 08:34:56.000000000 -0400
@@ -64,6 +64,7 @@
 #define X86_FEATURE_REP_GOOD	(3*32+ 4) /* rep microcode works well on this CPU */
 #define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
 #define X86_FEATURE_SYNC_RDTSC  (3*32+6)  /* RDTSC syncs CPU core */
+#define X86_FEATURE_FXSAVE_LEAK (3*32+7)  /* FIP/FOP/FDP leaks through FXSAVE */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
diff -uprN linux-2.6.16/include/asm-x86_64/i387.h linux-2.6.16.ovz/include/asm-x86_64/i387.h
--- linux-2.6.16/include/asm-x86_64/i387.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-x86_64/i387.h	2006-07-05 08:34:56.000000000 -0400
@@ -72,6 +72,23 @@ extern int set_fpregs(struct task_struct
 #define set_fpu_swd(t,val) ((t)->thread.i387.fxsave.swd = (val))
 #define set_fpu_fxsr_twd(t,val) ((t)->thread.i387.fxsave.twd = (val))
 
+#define X87_FSW_ES (1 << 7)	/* Exception Summary */
+
+/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
+   is pending. Clear the x87 state here by setting it to fixed
+   values. The kernel data segment can be sometimes 0 and sometimes
+   new user value. Both should be ok.
+   Use the PDA as safe address because it should be already in L1. */
+static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
+{
+	if (unlikely(fx->swd & X87_FSW_ES))
+		 asm volatile("fnclex");
+	alternative_input(ASM_NOP8 ASM_NOP2,
+	     	     "    emms\n"		/* clear stack tags */
+	     	     "    fildl %%gs:0",	/* load to clear state */
+		     X86_FEATURE_FXSAVE_LEAK);
+}
+
 static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) 
 { 
 	int err;
@@ -119,6 +136,7 @@ static inline int save_i387_checking(str
 #endif
 	if (unlikely(err))
 		__clear_user(fx, sizeof(struct i387_fxsave_struct));
+	/* No need to clear here because the caller clears USED_MATH */
 	return err;
 } 
 
@@ -149,7 +167,7 @@ static inline void __fxsave_clear(struct
 				"i" (offsetof(__typeof__(*tsk),
 					      thread.i387.fxsave)));
 #endif
-	__asm__ __volatile__("fnclex");
+	clear_fpu_state(&tsk->thread.i387.fxsave);
 }
 
 static inline void kernel_fpu_begin(void)
diff -uprN linux-2.6.16/include/asm-x86_64/mman.h linux-2.6.16.ovz/include/asm-x86_64/mman.h
--- linux-2.6.16/include/asm-x86_64/mman.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-x86_64/mman.h	2006-07-05 08:34:56.000000000 -0400
@@ -12,6 +12,7 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO	0x20000		/* soft ubc charge */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff -uprN linux-2.6.16/include/asm-x86_64/nmi.h linux-2.6.16.ovz/include/asm-x86_64/nmi.h
--- linux-2.6.16/include/asm-x86_64/nmi.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-x86_64/nmi.h	2006-07-05 08:34:56.000000000 -0400
@@ -24,6 +24,9 @@ void set_nmi_callback(nmi_callback_t cal
  * Remove the handler previously set.
  */
 void unset_nmi_callback(void);
+
+void set_nmi_ipi_callback(nmi_callback_t callback);
+void unset_nmi_ipi_callback(void);
  
 #ifdef CONFIG_PM
  
diff -uprN linux-2.6.16/include/asm-x86_64/pgalloc.h linux-2.6.16.ovz/include/asm-x86_64/pgalloc.h
--- linux-2.6.16/include/asm-x86_64/pgalloc.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-x86_64/pgalloc.h	2006-07-05 08:34:56.000000000 -0400
@@ -31,12 +31,14 @@ static inline void pmd_free(pmd_t *pmd)
 
 static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
 {
-	return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	return (pmd_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
+			__GFP_SOFT_UBC);
 }
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	return (pud_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
+			__GFP_SOFT_UBC);
 }
 
 static inline void pud_free (pud_t *pud)
@@ -48,7 +50,8 @@ static inline void pud_free (pud_t *pud)
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	unsigned boundary;
-	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL_UBC|__GFP_REPEAT|
+			__GFP_SOFT_UBC);
 	if (!pgd)
 		return NULL;
 	/*
@@ -77,7 +80,8 @@ static inline pte_t *pte_alloc_one_kerne
 
 static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	void *p = (void *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
+			__GFP_SOFT_UBC);
 	if (!p)
 		return NULL;
 	return virt_to_page(p);
diff -uprN linux-2.6.16/include/asm-x86_64/processor.h linux-2.6.16.ovz/include/asm-x86_64/processor.h
--- linux-2.6.16/include/asm-x86_64/processor.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-x86_64/processor.h	2006-07-05 08:34:56.000000000 -0400
@@ -167,7 +167,7 @@ static inline void clear_in_cr4 (unsigne
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
+#define IA32_PAGE_OFFSET 0xc0000000
 
 #define TASK_SIZE 		(test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
 #define TASK_SIZE_OF(child) 	((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
diff -uprN linux-2.6.16/include/asm-x86_64/segment.h linux-2.6.16.ovz/include/asm-x86_64/segment.h
--- linux-2.6.16/include/asm-x86_64/segment.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-x86_64/segment.h	2006-07-05 08:34:56.000000000 -0400
@@ -3,29 +3,28 @@
 
 #include <asm/cache.h>
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
-
-#define __KERNEL32_CS   0x38
-
+#define GDT_ENTRY_BOOT_CS		2
+#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
+#define GDT_ENTRY_BOOT_DS		3
+#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
+#define GDT_ENTRY_TSS 4	/* needs two entries */
 /* 
  * we cannot use the same code segment descriptor for user and kernel
  * -- not even in the long flat mode, because of different DPL /kkeil 
  * The segment offset needs to contain a RPL. Grr. -AK
  * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) 
  */
+#define GDT_ENTRY_TLS_MIN 6
+#define GDT_ENTRY_TLS_MAX 8
 
-#define __USER32_CS   0x23   /* 4*8+3 */ 
-#define __USER_DS     0x2b   /* 5*8+3 */ 
-#define __USER_CS     0x33   /* 6*8+3 */ 
+#define GDT_ENTRY_LDT 9 /* needs two entries */
+#define __KERNEL32_CS   0x58	/* 11*8 */
+#define __KERNEL_CS	0x60	/* 12*8 */
+#define __KERNEL_DS	0x68	/* 13*8 */
+#define __USER32_CS   0x73   /* 14*8+3 */ 
+#define __USER_DS     0x7b   /* 15*8+3 */ 
 #define __USER32_DS	__USER_DS 
-
-#define GDT_ENTRY_TLS 1
-#define GDT_ENTRY_TSS 8	/* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
-/* 15 free */
+#define __USER_CS     0x83   /* 16*8+3 */ 
 
 #define GDT_ENTRY_TLS_ENTRIES 3
 
@@ -37,7 +36,7 @@
 #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
 
 #define IDT_ENTRIES 256
-#define GDT_ENTRIES 16
+#define GDT_ENTRIES 32
 #define GDT_SIZE (GDT_ENTRIES * 8)
 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 
 
diff -uprN linux-2.6.16/include/asm-x86_64/signal.h linux-2.6.16.ovz/include/asm-x86_64/signal.h
--- linux-2.6.16/include/asm-x86_64/signal.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-x86_64/signal.h	2006-07-05 08:34:56.000000000 -0400
@@ -23,11 +23,6 @@ typedef struct {
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
-
-struct pt_regs; 
-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
-
-
 #else
 /* Here we must cater to libcs that poke about in kernel headers.  */
 
diff -uprN linux-2.6.16/include/asm-x86_64/thread_info.h linux-2.6.16.ovz/include/asm-x86_64/thread_info.h
--- linux-2.6.16/include/asm-x86_64/thread_info.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-x86_64/thread_info.h	2006-07-05 08:34:56.000000000 -0400
@@ -74,7 +74,7 @@ static inline struct thread_info *stack_
 
 /* thread information allocation */
 #define alloc_thread_info(tsk) \
-	((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER))
+	((struct thread_info *) __get_free_pages(GFP_KERNEL_UBC,THREAD_ORDER))
 #define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
 
 #else /* !__ASSEMBLY__ */
@@ -101,11 +101,13 @@ static inline struct thread_info *stack_
 #define TIF_IRET		5	/* force IRET */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
+#define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_IA32		17	/* 32bit process */ 
 #define TIF_FORK		18	/* ret_from_fork */
 #define TIF_ABI_PENDING		19
-#define TIF_MEMDIE		20
+#define TIF_FREEZE		20
+#define TIF_MEMDIE		21
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -115,6 +117,7 @@ static inline struct thread_info *stack_
 #define _TIF_IRET		(1<<TIF_IRET)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_IA32		(1<<TIF_IA32)
 #define _TIF_FORK		(1<<TIF_FORK)
diff -uprN linux-2.6.16/include/asm-x86_64/unistd.h linux-2.6.16.ovz/include/asm-x86_64/unistd.h
--- linux-2.6.16/include/asm-x86_64/unistd.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/asm-x86_64/unistd.h	2006-07-05 08:34:56.000000000 -0400
@@ -605,8 +605,26 @@ __SYSCALL(__NR_pselect6, sys_ni_syscall)
 __SYSCALL(__NR_ppoll,	sys_ni_syscall)		/* for now */
 #define __NR_unshare		272
 __SYSCALL(__NR_unshare,	sys_unshare)
-
-#define __NR_syscall_max __NR_unshare
+#define __NR_getluid		500
+__SYSCALL(__NR_getluid, sys_getluid)
+#define __NR_setluid		501
+__SYSCALL(__NR_setluid, sys_setluid)
+#define __NR_setublimit		502
+__SYSCALL(__NR_setublimit, sys_setublimit)
+#define __NR_ubstat		503
+__SYSCALL(__NR_ubstat, sys_ubstat)
+#define __NR_fairsched_mknod	504 /* FairScheduler syscalls */
+__SYSCALL(__NR_fairsched_mknod, sys_fairsched_mknod)
+#define __NR_fairsched_rmnod	505
+__SYSCALL(__NR_fairsched_rmnod, sys_fairsched_rmnod)
+#define __NR_fairsched_chwt	506
+__SYSCALL(__NR_fairsched_chwt, sys_fairsched_chwt)
+#define __NR_fairsched_mvpr	507
+__SYSCALL(__NR_fairsched_mvpr, sys_fairsched_mvpr)
+#define __NR_fairsched_rate	508
+__SYSCALL(__NR_fairsched_rate, sys_fairsched_rate)
+ 
+#define __NR_syscall_max __NR_fairsched_rate
 
 #ifndef __NO_STUBS
 
@@ -645,6 +663,7 @@ do { \
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #endif
 
 #ifndef __KERNEL_SYSCALLS__
diff -uprN linux-2.6.16/include/linux/aio.h linux-2.6.16.ovz/include/linux/aio.h
--- linux-2.6.16/include/linux/aio.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/aio.h	2006-07-05 08:34:56.000000000 -0400
@@ -247,4 +247,8 @@ static inline struct kiocb *list_kiocb(s
 extern unsigned long aio_nr;
 extern unsigned long aio_max_nr;
 
+void wait_for_all_aios(struct kioctx *ctx);
+extern kmem_cache_t	*kioctx_cachep;
+extern void aio_kick_handler(void *);
+
 #endif /* __LINUX__AIO_H */
diff -uprN linux-2.6.16/include/linux/binfmts.h linux-2.6.16.ovz/include/linux/binfmts.h
--- linux-2.6.16/include/linux/binfmts.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/binfmts.h	2006-07-05 08:34:56.000000000 -0400
@@ -2,6 +2,7 @@
 #define _LINUX_BINFMTS_H
 
 #include <linux/capability.h>
+#include <linux/fs.h>
 
 struct pt_regs;
 
@@ -28,6 +29,7 @@ struct linux_binprm{
 	int sh_bang;
 	struct file * file;
 	int e_uid, e_gid;
+	struct exec_perm perm;
 	kernel_cap_t cap_inheritable, cap_permitted, cap_effective;
 	void *security;
 	int argc, envc;
diff -uprN linux-2.6.16/include/linux/capability.h linux-2.6.16.ovz/include/linux/capability.h
--- linux-2.6.16/include/linux/capability.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/capability.h	2006-07-05 08:34:56.000000000 -0400
@@ -146,12 +146,9 @@ typedef __u32 kernel_cap_t;
 
 #define CAP_NET_BROADCAST    11
 
-/* Allow interface configuration */
 /* Allow administration of IP firewall, masquerading and accounting */
 /* Allow setting debug option on sockets */
 /* Allow modification of routing tables */
-/* Allow setting arbitrary process / process group ownership on
-   sockets */
 /* Allow binding to any address for transparent proxying */
 /* Allow setting TOS (type of service) */
 /* Allow setting promiscuous mode */
@@ -200,24 +197,19 @@ typedef __u32 kernel_cap_t;
 
 /* Allow configuration of the secure attention key */
 /* Allow administration of the random device */
-/* Allow examination and configuration of disk quotas */
 /* Allow configuring the kernel's syslog (printk behaviour) */
 /* Allow setting the domainname */
 /* Allow setting the hostname */
 /* Allow calling bdflush() */
-/* Allow mount() and umount(), setting up new smb connection */
+/* Allow setting up new smb connection */
 /* Allow some autofs root ioctls */
 /* Allow nfsservctl */
 /* Allow VM86_REQUEST_IRQ */
 /* Allow to read/write pci config on alpha */
 /* Allow irix_prctl on mips (setstacksize) */
 /* Allow flushing all cache on m68k (sys_cacheflush) */
-/* Allow removing semaphores */
-/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
-   and shared memory */
 /* Allow locking/unlocking of shared memory segment */
 /* Allow turning swap on/off */
-/* Allow forged pids on socket credentials passing */
 /* Allow setting readahead and flushing buffers on block devices */
 /* Allow setting geometry in floppy driver */
 /* Allow turning DMA on/off in xd driver */
@@ -235,6 +227,8 @@ typedef __u32 kernel_cap_t;
    arbitrary SCSI commands */
 /* Allow setting encryption key on loopback filesystem */
 /* Allow setting zone reclaim policy */
+/* Modify data journaling mode on ext3 filesystem (uses journaling
+   resources) */
 
 #define CAP_SYS_ADMIN        21
 
@@ -254,8 +248,6 @@ typedef __u32 kernel_cap_t;
 /* Override resource limits. Set resource limits. */
 /* Override quota limits. */
 /* Override reserved space on ext2 filesystem */
-/* Modify data journaling mode on ext3 filesystem (uses journaling
-   resources) */
 /* NOTE: ext2 honors fsuid when checking for resource overrides, so 
    you can override using fsuid too */
 /* Override size restrictions on IPC message queues */
@@ -288,7 +280,52 @@ typedef __u32 kernel_cap_t;
 
 #define CAP_AUDIT_CONTROL    30
 
+/*
+ * Important note: VZ capabilities do intersect with CAP_AUDIT
+ * this is due to compatibility reasons. Nothing bad.
+ * Both VZ and Audit/SELinux caps are disabled in VPSs.
+ */
+
+/* Allow access to all information. In the other case some structures will be
+   hiding to ensure different Virtual Environment non-interaction on the same
+   node */
+#define CAP_SETVEID	     29
+
+#define CAP_VE_ADMIN	     30
+
 #ifdef __KERNEL__
+
+#include <linux/config.h>
+
+#ifdef CONFIG_VE
+
+/* Replacement for CAP_NET_ADMIN:
+   delegated rights to the Virtual environment of its network administration.
+   For now the following rights have been delegated:
+
+   Allow setting arbitrary process / process group ownership on sockets
+   Allow interface configuration
+ */
+#define CAP_VE_NET_ADMIN     CAP_VE_ADMIN
+
+/* Replacement for CAP_SYS_ADMIN:
+   delegated rights to the Virtual environment of its administration.
+   For now the following rights have been delegated:
+ */
+/* Allow mount/umount/remount */
+/* Allow examination and configuration of disk quotas */
+/* Allow removing semaphores */
+/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
+   and shared memory */
+/* Allow locking/unlocking of shared memory segment */
+/* Allow forged pids on socket credentials passing */
+
+#define CAP_VE_SYS_ADMIN     CAP_VE_ADMIN
+#else
+#define CAP_VE_NET_ADMIN     CAP_NET_ADMIN
+#define CAP_VE_SYS_ADMIN     CAP_SYS_ADMIN
+#endif
+
 /* 
  * Bounding set
  */
@@ -352,9 +389,14 @@ static inline kernel_cap_t cap_invert(ke
 #define cap_issubset(a,set)  (!(cap_t(a) & ~cap_t(set)))
 
 #define cap_clear(c)         do { cap_t(c) =  0; } while(0)
+#ifndef CONFIG_VE
 #define cap_set_full(c)      do { cap_t(c) = ~0; } while(0)
+#else
+#define cap_set_full(c) \
+        do {cap_t(c) = ve_is_super(get_exec_env()) ? ~0 :		\
+					get_exec_env()->cap_default; } while(0)
+#endif
 #define cap_mask(c,mask)     do { cap_t(c) &= cap_t(mask); } while(0)
-
 #define cap_is_fs_cap(c)     (CAP_TO_MASK(c) & CAP_FS_MASK)
 
 extern int capable(int cap);
diff -uprN linux-2.6.16/include/linux/coda_linux.h linux-2.6.16.ovz/include/linux/coda_linux.h
--- linux-2.6.16/include/linux/coda_linux.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/coda_linux.h	2006-07-05 08:34:56.000000000 -0400
@@ -38,7 +38,8 @@ extern struct file_operations coda_ioctl
 int coda_open(struct inode *i, struct file *f);
 int coda_flush(struct file *f);
 int coda_release(struct inode *i, struct file *f);
-int coda_permission(struct inode *inode, int mask, struct nameidata *nd);
+int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *);
 int coda_revalidate_inode(struct dentry *);
 int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 int coda_setattr(struct dentry *, struct iattr *);
diff -uprN linux-2.6.16/include/linux/compat.h linux-2.6.16.ovz/include/linux/compat.h
--- linux-2.6.16/include/linux/compat.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/compat.h	2006-07-05 08:34:56.000000000 -0400
@@ -181,5 +181,7 @@ static inline int compat_timespec_compar
 	return lhs->tv_nsec - rhs->tv_nsec;
 }
 
+extern long compat_nanosleep_restart(struct restart_block *restart);
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
diff -uprN linux-2.6.16/include/linux/cpt_image.h linux-2.6.16.ovz/include/linux/cpt_image.h
--- linux-2.6.16/include/linux/cpt_image.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/cpt_image.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,1453 @@
+/*
+ *
+ *  include/linux/cpt_image.h
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __CPT_IMAGE_H_
+#define __CPT_IMAGE_H_ 1
+
+#define CPT_NULL (~0ULL)
+#define CPT_NOINDEX (~0U)
+
+/*
+ * Image file layout.
+ *
+ * - major header
+ * - sections[]
+ *
+ *	Each section is:
+ *	- section header
+ *	- array of objects
+ *
+ * All data records are arch independent, 64 bit aligned.
+ */
+
+enum _cpt_object_type
+{
+	CPT_OBJ_TASK = 0,
+	CPT_OBJ_MM,
+	CPT_OBJ_FS,
+	CPT_OBJ_FILES,
+	CPT_OBJ_FILE,
+	CPT_OBJ_SIGHAND_STRUCT,
+	CPT_OBJ_SIGNAL_STRUCT,
+	CPT_OBJ_TTY,
+	CPT_OBJ_SOCKET,
+	CPT_OBJ_SYSVSEM_UNDO,
+	CPT_OBJ_NAMESPACE,
+	CPT_OBJ_SYSV_SHM,
+	CPT_OBJ_INODE,
+	CPT_OBJ_UBC,
+	CPT_OBJ_SLM_SGREG,
+	CPT_OBJ_SLM_REGOBJ,
+	CPT_OBJ_SLM_MM,
+	CPT_OBJ_MAX,
+	/* The objects above are stored in memory while checkpointing */
+
+	CPT_OBJ_VMA = 1024,
+	CPT_OBJ_FILEDESC,
+	CPT_OBJ_SIGHANDLER,
+	CPT_OBJ_SIGINFO,
+	CPT_OBJ_LASTSIGINFO,
+	CPT_OBJ_SYSV_SEM,
+	CPT_OBJ_SKB,
+	CPT_OBJ_FLOCK,
+	CPT_OBJ_OPENREQ,
+	CPT_OBJ_VFSMOUNT,
+	CPT_OBJ_TRAILER,
+	CPT_OBJ_SYSVSEM_UNDO_REC,
+	CPT_OBJ_NET_DEVICE,
+	CPT_OBJ_NET_IFADDR,
+	CPT_OBJ_NET_ROUTE,
+	CPT_OBJ_NET_CONNTRACK,
+	CPT_OBJ_NET_CONNTRACK_EXPECT,
+	CPT_OBJ_AIO_CONTEXT,
+	CPT_OBJ_VEINFO,
+	CPT_OBJ_EPOLL,
+	CPT_OBJ_EPOLL_FILE,
+	CPT_OBJ_SKFILTER,
+	CPT_OBJ_SIGALTSTACK,
+  	CPT_OBJ_SOCK_MCADDR,
+
+	CPT_OBJ_X86_REGS = 4096,
+	CPT_OBJ_X86_64_REGS,
+	CPT_OBJ_PAGES,
+	CPT_OBJ_COPYPAGES,
+	CPT_OBJ_REMAPPAGES,
+	CPT_OBJ_LAZYPAGES,
+	CPT_OBJ_NAME,
+	CPT_OBJ_BITS,
+	CPT_OBJ_REF,
+};
+
+#define CPT_ALIGN(n) (((n)+7)&~7)
+
+struct cpt_major_hdr
+{
+	__u8	cpt_signature[4];	/* Magic number */
+	__u16	cpt_hdrlen;		/* Length of this header */
+	__u16	cpt_image_version;	/* Format of this file; mbz */
+	__u16	cpt_os_arch;		/* Architecture */
+#define CPT_OS_ARCH_I386	0
+#define CPT_OS_ARCH_EMT64	1
+#define CPT_OS_ARCH_IA64	2
+	__u16	__cpt_pad1;
+	__u32	cpt_os_version;		/* Version of kernel, where image was done */
+	__u32	cpt_os_features;	/* Kernel features: SMP etc. */
+	__u16	cpt_pagesize;		/* Page size used by OS */
+	__u16	cpt_hz;			/* HZ used by OS */
+	__u64	cpt_start_jiffies64;	/* Jiffies */
+	__u32	cpt_start_sec;		/* Seconds */
+	__u32	cpt_start_nsec;		/* Nanoseconds */
+	__u32	cpt_cpu_caps[4];	/* CPU capabilities */
+	__u32	cpt_kernel_config[4];	/* Kernel config */
+	__u64	cpt_iptables_mask;	/* Used netfilter modules */
+} __attribute__ ((aligned (8)));
+
+#define CPT_SIGNATURE0 0x79
+#define CPT_SIGNATURE1 0x1c
+#define CPT_SIGNATURE2 0x01
+#define CPT_SIGNATURE3 0x63
+
+#define CPT_CPU_X86_CMOV	0
+#define CPT_CPU_X86_FXSR	1
+#define CPT_CPU_X86_SSE		2
+#define CPT_CPU_X86_SSE2	3
+#define CPT_CPU_X86_MMX		4
+#define CPT_CPU_X86_3DNOW	5
+#define CPT_CPU_X86_3DNOW2	6
+#define CPT_CPU_X86_SEP		7
+#define CPT_CPU_X86_EMT64	8
+#define CPT_CPU_X86_IA64	9
+
+#define CPT_KERNEL_CONFIG_PAE	0
+
+struct cpt_section_hdr
+{
+	__u64	cpt_next;
+	__u32	cpt_section;
+	__u16	cpt_hdrlen;
+	__u16	cpt_align;
+} __attribute__ ((aligned (8)));
+
+enum
+{
+	CPT_SECT_ERROR,			/* Error section, content is string */
+	CPT_SECT_VEINFO,
+	CPT_SECT_FILES,			/* Files. Content is array of file objects */
+	CPT_SECT_TASKS,
+	CPT_SECT_MM,
+	CPT_SECT_FILES_STRUCT,
+	CPT_SECT_FS,
+	CPT_SECT_SIGHAND_STRUCT,
+	CPT_SECT_TTY,
+	CPT_SECT_SOCKET,
+	CPT_SECT_NAMESPACE,
+	CPT_SECT_SYSVSEM_UNDO,
+	CPT_SECT_INODE,			/* Inodes with i->i_nlink==0 and
+					 * deleted dentires with inodes not
+					 * referenced inside dumped process.
+					 */
+	CPT_SECT_SYSV_SHM,
+	CPT_SECT_SYSV_SEM,
+	CPT_SECT_ORPHANS,
+	CPT_SECT_NET_DEVICE,
+	CPT_SECT_NET_IFADDR,
+	CPT_SECT_NET_ROUTE,
+	CPT_SECT_NET_IPTABLES,
+	CPT_SECT_NET_CONNTRACK,
+	CPT_SECT_NET_CONNTRACK_VE0,
+	CPT_SECT_UTSNAME,
+	CPT_SECT_TRAILER,
+	CPT_SECT_UBC,
+	CPT_SECT_SLM_SGREGS,
+	CPT_SECT_SLM_REGOBJS,
+/* Due to silly mistake we cannot index sections beyond this value */
+#define	CPT_SECT_MAX_INDEX	(CPT_SECT_SLM_REGOBJS+1)
+	CPT_SECT_EPOLL,
+	CPT_SECT_MAX
+};
+
+struct cpt_major_tail
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_lazypages;
+	__u32	cpt_64bit;
+	__u64	cpt_sections[CPT_SECT_MAX_INDEX];
+	__u32	cpt_nsect;
+	__u8	cpt_signature[4];	/* Magic number */
+} __attribute__ ((aligned (8)));
+
+
+/* Common object header. */
+struct cpt_object_hdr
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+} __attribute__ ((aligned (8)));
+
+enum _cpt_content_type {
+	CPT_CONTENT_VOID,
+	CPT_CONTENT_ARRAY,
+	CPT_CONTENT_DATA,
+	CPT_CONTENT_NAME,
+
+	CPT_CONTENT_STACK,
+	CPT_CONTENT_X86_FPUSTATE_OLD,
+	CPT_CONTENT_X86_FPUSTATE,
+	CPT_CONTENT_MM_CONTEXT,
+	CPT_CONTENT_SEMARRAY,
+	CPT_CONTENT_SEMUNDO,
+	CPT_CONTENT_NLMARRAY,
+	CPT_CONTENT_MAX
+};
+
+/* CPT_OBJ_BITS: encode array of bytes */ 
+struct cpt_obj_bits
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_size;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_REF: a reference to another object */ 
+struct cpt_obj_ref
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_pos;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_VEINFO: various ve specific data */
+struct cpt_veinfo_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	/* ipc ctls */
+	__u32	shm_ctl_max;
+	__u32	shm_ctl_all;
+	__u32	shm_ctl_mni;
+	__u32	msg_ctl_max;
+	__u32	msg_ctl_mni;
+	__u32	msg_ctl_mnb;
+	__u32	sem_ctl_arr[4];
+
+	/* start time */
+	__u64	start_timespec_delta;
+	__u64	start_jiffies_delta;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_FILE: one struct file */ 
+struct cpt_file_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_flags;
+	__u32	cpt_mode;
+	__u64	cpt_pos;
+	__u32	cpt_uid;
+	__u32	cpt_gid;
+
+	__u32	cpt_i_mode;
+	__u32	cpt_lflags;
+#define CPT_DENTRY_DELETED	1
+#define CPT_DENTRY_ROOT		2
+#define CPT_DENTRY_CLONING	4
+#define CPT_DENTRY_PROC		8
+#define CPT_DENTRY_EPOLL	0x10
+	__u64	cpt_inode;
+	__u64	cpt_priv;
+
+	__u32	cpt_fown_fd;
+	__u32	cpt_fown_pid;
+	__u32	cpt_fown_uid;
+	__u32	cpt_fown_euid;
+	__u32	cpt_fown_signo;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by file name, encoded as CPT_OBJ_NAME */
+
+struct cpt_epoll_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+} __attribute__ ((aligned (8)));
+/* Followed by array of struct cpt_epoll_file */
+
+struct cpt_epoll_file_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_fd;
+	__u32	cpt_events;
+	__u64	cpt_data;
+	__u32	cpt_revents;
+	__u32	cpt_ready;
+} __attribute__ ((aligned (8)));
+
+
+/* CPT_OBJ_FILEDESC: one file descriptor */
+struct cpt_fd_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_fd;
+	__u32	cpt_flags;
+#define CPT_FD_FLAG_CLOSEEXEC	1
+	__u64	cpt_file;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_FILES: one files_struct */
+struct cpt_files_struct_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u32	cpt_max_fds;
+	__u32	cpt_next_fd;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by array of cpt_fd_image */
+
+/* CPT_OBJ_FS: one fs_struct */
+struct cpt_fs_struct_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_umask;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by two/three CPT_OBJ_FILENAME for root, pwd and, optionally, altroot */
+
+/* CPT_OBJ_INODE: one struct inode */
+struct cpt_inode_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_dev;
+	__u64	cpt_ino;
+	__u32	cpt_mode;
+	__u32	cpt_nlink;
+	__u32	cpt_uid;
+	__u32	cpt_gid;
+	__u64	cpt_rdev;
+	__u64	cpt_size;
+	__u64	cpt_blksize;
+	__u64	cpt_atime;
+	__u64	cpt_mtime;
+	__u64	cpt_ctime;
+	__u64	cpt_blocks;
+	__u32	cpt_sb;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_VFSMOUNT: one vfsmount */
+struct cpt_vfsmount_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_mntflags;
+	__u32	cpt_flags;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_flock_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_owner;
+	__u32	cpt_pid;
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u32	cpt_flags;
+	__u32	cpt_type;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_tty_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_flags;
+	__u32	cpt_link;
+	__u32	cpt_index;
+	__u32	cpt_drv_type;
+	__u32	cpt_drv_subtype;
+	__u32	cpt_drv_flags;
+	__u8	cpt_packet;
+	__u8	cpt_stopped;
+	__u8	cpt_hw_stopped;
+	__u8	cpt_flow_stopped;
+
+	__u32	cpt_canon_data;
+	__u32	cpt_canon_head;
+	__u32	cpt_canon_column;
+	__u32	cpt_column;
+	__u8	cpt_ctrl_status;
+	__u8	cpt_erasing;
+	__u8	cpt_lnext;
+	__u8	cpt_icanon;
+	__u8	cpt_raw;
+	__u8	cpt_real_raw;
+	__u8	cpt_closing;
+	__u8	__cpt_pad1;
+	__u16	cpt_minimum_to_wake;
+	__u16	__cpt_pad2;
+	__u32	cpt_pgrp;
+	__u32	cpt_session;
+	__u32	cpt_c_line;
+	__u8	cpt_name[64];	
+	__u16	cpt_ws_row;
+	__u16	cpt_ws_col;
+	__u16	cpt_ws_prow;
+	__u16	cpt_ws_pcol;
+	__u8	cpt_c_cc[32];
+	__u32	cpt_c_iflag;
+	__u32	cpt_c_oflag;
+	__u32	cpt_c_cflag;
+	__u32	cpt_c_lflag;
+	__u32	cpt_read_flags[4096/32];
+} __attribute__ ((aligned (8)));
+
+struct cpt_sock_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_parent;
+	__u32	cpt_index;
+
+	__u64	cpt_ssflags;
+	__u16	cpt_type;
+	__u16	cpt_family;
+	__u8	cpt_sstate;
+	__u8	cpt_passcred;
+	__u8	cpt_state;
+	__u8	cpt_reuse;
+
+	__u8	cpt_zapped;
+	__u8	cpt_shutdown;
+	__u8	cpt_userlocks;
+	__u8	cpt_no_check;
+	__u8	cpt_debug;
+	__u8	cpt_rcvtstamp;
+	__u8	cpt_localroute;
+	__u8	cpt_protocol;
+
+	__u32	cpt_err;
+	__u32	cpt_err_soft;
+
+	__u16	cpt_max_ack_backlog;
+	__u16   __cpt_pad1;
+	__u32	cpt_priority;
+
+	__u32	cpt_rcvlowat;
+	__u32	cpt_bound_dev_if;
+
+	__u64	cpt_rcvtimeo;
+	__u64	cpt_sndtimeo;
+	__u32	cpt_rcvbuf;
+	__u32	cpt_sndbuf;
+	__u64	cpt_flags;
+	__u64	cpt_lingertime;
+	__u32	cpt_peer_pid;
+	__u32	cpt_peer_uid;
+
+	__u32	cpt_peer_gid;
+	__u32	cpt_laddrlen;
+	__u32	cpt_laddr[128/4];
+	__u32	cpt_raddrlen;
+	__u32	cpt_raddr[128/4];
+	/* AF_UNIX */
+	__u32	cpt_peer;
+
+	__u8	cpt_socketpair;
+	__u8	cpt_deleted;
+	__u16	__cpt_pad4;
+	__u32	__cpt_pad5;
+/*
+	struct sk_filter      	*sk_filter;
+ */
+
+	__u64			cpt_stamp;
+	__u32			cpt_daddr;
+	__u16			cpt_dport;
+	__u16			cpt_sport;
+
+	__u32			cpt_saddr;
+	__u32			cpt_rcv_saddr;
+
+	__u32			cpt_uc_ttl;
+	__u32			cpt_tos;
+
+	__u32			cpt_cmsg_flags;
+	__u32			cpt_mc_index;
+
+	__u32			cpt_mc_addr;
+/*
+	struct ip_options	*opt;
+ */
+	__u8			cpt_hdrincl;
+	__u8			cpt_mc_ttl;
+	__u8			cpt_mc_loop;
+	__u8			cpt_pmtudisc;
+
+	__u8			cpt_recverr;
+	__u8			cpt_freebind;
+	__u16			cpt_idcounter;
+	__u32			cpt_cork_flags;
+
+	__u32			cpt_cork_fragsize;
+	__u32			cpt_cork_length;
+	__u32			cpt_cork_addr;
+	__u32			cpt_cork_saddr;
+	__u32			cpt_cork_daddr;
+	__u32			cpt_cork_oif;
+
+	__u32			cpt_udp_pending;
+	__u32			cpt_udp_corkflag;
+	__u16			cpt_udp_encap;
+	__u16			cpt_udp_len;
+	__u32			__cpt_pad7;
+
+	__u64			cpt_saddr6[2];
+	__u64			cpt_rcv_saddr6[2];
+	__u64			cpt_daddr6[2];
+	__u32			cpt_flow_label6;
+	__u32			cpt_frag_size6;
+	__u32			cpt_hop_limit6;
+	__u32			cpt_mcast_hops6;
+
+	__u32			cpt_mcast_oif6;
+	__u8			cpt_rxopt6;
+	__u8			cpt_mc_loop6;
+	__u8			cpt_recverr6;
+	__u8			cpt_sndflow6;
+
+	__u8			cpt_pmtudisc6;
+	__u8			cpt_ipv6only6;
+	__u8			cpt_mapped;
+	__u8			__cpt_pad8;
+	__u32	cpt_pred_flags;
+
+	__u32	cpt_rcv_nxt;
+	__u32	cpt_snd_nxt;
+
+	__u32	cpt_snd_una;
+	__u32	cpt_snd_sml;
+
+	__u32	cpt_rcv_tstamp;
+	__u32	cpt_lsndtime;
+
+	__u8	cpt_tcp_header_len;
+	__u8	cpt_ack_pending;
+	__u8	cpt_quick;
+	__u8	cpt_pingpong;
+	__u8	cpt_blocked;
+	__u8	__cpt_pad9;
+	__u16	__cpt_pad10;
+
+	__u32	cpt_ato;
+	__u32	cpt_ack_timeout;
+
+	__u32	cpt_lrcvtime;
+	__u16	cpt_last_seg_size;
+	__u16	cpt_rcv_mss;
+
+	__u32	cpt_snd_wl1;
+	__u32	cpt_snd_wnd;
+
+	__u32	cpt_max_window;
+	__u32	cpt_pmtu_cookie;
+
+	__u32	cpt_mss_cache;
+	__u16	cpt_mss_cache_std;
+	__u16	cpt_mss_clamp;
+
+	__u16	cpt_ext_header_len;
+	__u16	cpt_ext2_header_len;
+	__u8	cpt_ca_state;
+	__u8	cpt_retransmits;
+	__u8	cpt_reordering;
+	__u8	cpt_frto_counter;
+
+	__u32	cpt_frto_highmark;
+	__u8	cpt_adv_cong;
+	__u8	cpt_defer_accept;
+	__u8	cpt_backoff;
+	__u8	__cpt_pad11;
+
+	__u32	cpt_srtt;
+	__u32	cpt_mdev;
+
+	__u32	cpt_mdev_max;
+	__u32	cpt_rttvar;
+
+	__u32	cpt_rtt_seq;
+	__u32	cpt_rto;
+
+	__u32	cpt_packets_out;
+	__u32	cpt_left_out;
+
+	__u32	cpt_retrans_out;
+ 	__u32	cpt_snd_ssthresh;
+
+ 	__u32	cpt_snd_cwnd;
+ 	__u16	cpt_snd_cwnd_cnt;
+	__u16	cpt_snd_cwnd_clamp;
+
+	__u32	cpt_snd_cwnd_used;
+	__u32	cpt_snd_cwnd_stamp;
+
+	__u32	cpt_timeout;
+	__u32	cpt_ka_timeout;
+
+ 	__u32	cpt_rcv_wnd;
+	__u32	cpt_rcv_wup;
+
+	__u32	cpt_write_seq;
+	__u32	cpt_pushed_seq;
+
+	__u32	cpt_copied_seq;
+	__u8	cpt_tstamp_ok;
+	__u8	cpt_wscale_ok;
+	__u8	cpt_sack_ok;
+	__u8	cpt_saw_tstamp;
+
+        __u8	cpt_snd_wscale;
+        __u8	cpt_rcv_wscale;
+	__u8	cpt_nonagle;
+	__u8	cpt_keepalive_probes;
+        __u32	cpt_rcv_tsval;
+
+        __u32	cpt_rcv_tsecr;
+        __u32	cpt_ts_recent;
+
+	__u64	cpt_ts_recent_stamp;
+	__u16	cpt_user_mss;
+	__u8	cpt_dsack;
+	__u8	cpt_eff_sacks;
+	__u32	cpt_sack_array[2*5];
+	__u32	cpt_window_clamp;
+
+	__u32	cpt_rcv_ssthresh;
+	__u8	cpt_probes_out;
+	__u8	cpt_num_sacks;
+	__u16	cpt_advmss;
+
+	__u8	cpt_syn_retries;
+	__u8	cpt_ecn_flags;
+	__u16	cpt_prior_ssthresh;
+	__u32	cpt_lost_out;
+
+	__u32   cpt_sacked_out;
+	__u32   cpt_fackets_out;
+
+	__u32   cpt_high_seq;
+	__u32	cpt_retrans_stamp;
+
+	__u32	cpt_undo_marker;
+	__u32	cpt_undo_retrans;
+
+	__u32	cpt_urg_seq;
+	__u16	cpt_urg_data;
+	__u8	cpt_pending;
+	__u8	cpt_urg_mode;
+
+	__u32	cpt_snd_up;
+	__u32	cpt_keepalive_time;
+
+	__u32   cpt_keepalive_intvl;
+	__u32   cpt_linger2;
+
+	__u32	cpt_rcvrtt_rtt;
+	__u32	cpt_rcvrtt_seq;
+
+	__u32	cpt_rcvrtt_time;
+	__u32	__cpt_pad12;
+} __attribute__ ((aligned (8)));
+
+struct cpt_sockmc_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u16	cpt_family;
+	__u16	cpt_mode;
+	__u32	cpt_ifindex;
+	__u32	cpt_mcaddr[4];
+} __attribute__ ((aligned (8)));
+/* Followed by array of source addresses, each zero padded to 16 bytes */
+
+struct cpt_openreq_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_rcv_isn;
+	__u32	cpt_snt_isn;
+
+	__u16	cpt_rmt_port;
+	__u16	cpt_mss;
+	__u8	cpt_family;
+	__u8	cpt_retrans;
+	__u8	cpt_snd_wscale;
+	__u8	cpt_rcv_wscale;
+
+	__u8	cpt_tstamp_ok;
+	__u8	cpt_sack_ok;
+	__u8	cpt_wscale_ok;
+	__u8	cpt_ecn_ok;
+	__u8	cpt_acked;
+	__u8	__cpt_pad1;
+	__u16	__cpt_pad2;
+
+	__u32	cpt_window_clamp;
+	__u32	cpt_rcv_wnd;
+	__u32	cpt_ts_recent;
+	__u32	cpt_iif;
+	__u64	cpt_expires;
+
+	__u64	cpt_loc_addr[2];
+	__u64	cpt_rmt_addr[2];
+/*
+	struct ip_options	*opt;
+ */
+	
+} __attribute__ ((aligned (8)));
+
+struct cpt_skb_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_owner;
+	__u32	cpt_queue;
+#define CPT_SKB_NQ	0
+#define CPT_SKB_RQ	1
+#define CPT_SKB_WQ	2
+#define CPT_SKB_OFOQ	3
+
+	__u64	cpt_stamp;
+	__u32	cpt_len;
+	__u32	cpt_hspace;
+	__u32	cpt_tspace;
+	__u32	cpt_h;
+	__u32	cpt_nh;
+	__u32	cpt_mac;
+	
+	__u64	cpt_cb[5];
+	__u32	cpt_mac_len;
+	__u32	cpt_csum;
+	__u8	cpt_local_df;
+	__u8	cpt_pkt_type;
+	__u8	cpt_ip_summed;
+	__u8	__cpt_pad1;
+	__u32	cpt_priority;
+	__u16	cpt_protocol;
+	__u16	cpt_security;
+	__u16	cpt_tso_segs;
+	__u16	cpt_tso_size;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_sysvshm_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_key;
+	__u64	cpt_uid;
+	__u64	cpt_gid;
+	__u64	cpt_cuid;
+	__u64	cpt_cgid;
+	__u64	cpt_mode;
+	__u64	cpt_seq;
+
+	__u32	cpt_id;
+	__u32	cpt_mlockuser;
+	__u64	cpt_segsz;
+	__u64	cpt_atime;
+	__u64	cpt_ctime;
+	__u64	cpt_dtime;
+	__u64	cpt_creator;
+	__u64	cpt_last;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_sysvsem_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_key;
+	__u64	cpt_uid;
+	__u64	cpt_gid;
+	__u64	cpt_cuid;
+	__u64	cpt_cgid;
+	__u64	cpt_mode;
+	__u64	cpt_seq;
+	__u32	cpt_id;
+	__u32	__cpt_pad1;
+
+	__u64	cpt_otime;
+	__u64	cpt_ctime;
+} __attribute__ ((aligned (8)));
+/* Content is array of pairs semval/sempid */
+
+struct cpt_sysvsem_undo_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_id;
+	__u32	cpt_nsem;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_mm_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start_code;
+	__u64	cpt_end_code;
+	__u64	cpt_start_data;
+	__u64	cpt_end_data;
+	__u64	cpt_start_brk;
+	__u64	cpt_brk;
+	__u64	cpt_start_stack;
+	__u64	cpt_start_arg;
+	__u64	cpt_end_arg;
+	__u64	cpt_start_env;
+	__u64	cpt_end_env;
+	__u64	cpt_def_flags;
+	__u64	cpt_mmub;
+	__u8	cpt_dumpable;
+	__u8	cpt_vps_dumpable;
+	__u8	cpt_used_hugetlb;
+	__u8	__cpt_pad;
+} __attribute__ ((aligned (8)));
+
+struct cpt_page_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+} __attribute__ ((aligned (8)));
+
+struct cpt_remappage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_pgoff;
+} __attribute__ ((aligned (8)));
+
+struct cpt_copypage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_source;
+} __attribute__ ((aligned (8)));
+
+struct cpt_lazypage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_index;
+} __attribute__ ((aligned (8)));
+
+struct cpt_vma_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_type;
+#define CPT_VMA_TYPE_0		0
+#define CPT_VMA_TYPE_SHM	1
+	__u32	cpt_anonvma;
+	__u64	cpt_anonvmaid;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_flags;
+	__u64	cpt_pgprot;
+	__u64	cpt_pgoff;
+} __attribute__ ((aligned (8)));
+
+struct cpt_aio_ctx_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_max_reqs;
+	__u32	cpt_ring_pages;
+	__u32	cpt_tail;
+	__u32	cpt_nr;
+	__u64	cpt_mmap_base;
+	/* Data (io_event's) and struct aio_ring are stored in user space VM */
+} __attribute__ ((aligned (8)));
+
+
+/* Format of MM section.
+ *
+ * It is array of MM objects (mm_struct). Each MM object is
+ * header, encoding mm_struct, followed by array of VMA objects.
+ * Each VMA consists of VMA header, encoding vm_area_struct, and
+ * if the VMA contains copied pages, the header is followed by
+ * array of tuples start-end each followed by data.
+ *
+ * ATTN: no block/page alignment. Only 64bit alignment. This might be not good?
+ */
+
+struct cpt_restart_block {
+	__u64	fn;
+#define CPT_RBL_0			0
+#define CPT_RBL_NANOSLEEP		1
+#define CPT_RBL_COMPAT_NANOSLEEP	2
+	__u64	arg0;
+	__u64	arg1;
+	__u64	arg2;
+	__u64	arg3;
+} __attribute__ ((aligned (8)));
+
+struct cpt_siginfo_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_qflags;
+	__u32	cpt_signo;
+	__u32	cpt_errno;
+	__u32	cpt_code;
+
+	__u64	cpt_sigval;
+	__u32	cpt_pid;
+	__u32	cpt_uid;
+	__u64	cpt_utime;
+	__u64	cpt_stime;
+
+	__u64	cpt_user;
+} __attribute__ ((aligned (8)));
+
+/* Portable presentaions for segment registers */
+
+#define CPT_SEG_ZERO		0
+#define CPT_SEG_TLS1		1
+#define CPT_SEG_TLS2		2
+#define CPT_SEG_TLS3		3
+#define CPT_SEG_USER32_DS	4
+#define CPT_SEG_USER32_CS	5
+#define CPT_SEG_USER64_DS	6
+#define CPT_SEG_USER64_CS	7
+#define CPT_SEG_LDT		256
+
+struct cpt_x86_regs
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_debugreg[8];
+	__u32	cpt_fs;
+	__u32	cpt_gs;
+
+	__u32	cpt_ebx;
+	__u32	cpt_ecx;
+	__u32	cpt_edx;
+	__u32	cpt_esi;
+	__u32	cpt_edi;
+	__u32	cpt_ebp;
+	__u32	cpt_eax;
+	__u32	cpt_xds;
+	__u32	cpt_xes;
+	__u32	cpt_orig_eax;
+	__u32	cpt_eip;
+	__u32	cpt_xcs;
+	__u32	cpt_eflags;
+	__u32	cpt_esp;
+	__u32	cpt_xss;
+	__u32	cpt_pad;
+};
+
+struct cpt_x86_64_regs
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_debugreg[8];
+
+	__u64	cpt_fsbase;
+	__u64	cpt_gsbase;
+	__u32	cpt_fsindex;
+	__u32	cpt_gsindex;
+	__u32	cpt_ds;
+	__u32	cpt_es;
+
+	__u64	cpt_r15;
+	__u64	cpt_r14;
+	__u64	cpt_r13;
+	__u64	cpt_r12;
+	__u64	cpt_rbp;
+	__u64	cpt_rbx;
+	__u64	cpt_r11;
+	__u64	cpt_r10;	
+	__u64	cpt_r9;
+	__u64	cpt_r8;
+	__u64	cpt_rax;
+	__u64	cpt_rcx;
+	__u64	cpt_rdx;
+	__u64	cpt_rsi;
+	__u64	cpt_rdi;
+	__u64	cpt_orig_rax;
+	__u64	cpt_rip;
+	__u64	cpt_cs;
+	__u64	cpt_eflags;
+	__u64	cpt_rsp;
+	__u64	cpt_ss;
+};
+
+struct cpt_task_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_state;
+	__u64	cpt_flags;
+	__u64	cpt_ptrace;
+	__u32	cpt_prio;
+	__u32	cpt_static_prio;
+	__u32	cpt_policy;
+	__u32	cpt_rt_priority;
+
+	/* struct thread_info */
+	__u64	cpt_exec_domain;
+	__u64	cpt_thrflags;
+	__u64	cpt_thrstatus;
+	__u64	cpt_addr_limit;
+
+	__u64	cpt_personality;
+
+	__u64	cpt_mm;
+	__u64	cpt_files;
+	__u64	cpt_fs;
+	__u64	cpt_signal;
+	__u64	cpt_sighand;
+	__u64	cpt_sigblocked;
+	__u64	cpt_sigrblocked;
+	__u64	cpt_sigpending;
+	__u64	cpt_namespace;
+	__u64	cpt_sysvsem_undo;
+	__u32	cpt_pid;
+	__u32	cpt_tgid;
+	__u32	cpt_ppid;
+	__u32	cpt_rppid;
+	__u32	cpt_pgrp;
+	__u32	cpt_session;
+	__u32	cpt_old_pgrp;
+	__u32	__cpt_pad;
+	__u32	cpt_leader;
+	__u8	cpt_pn_state;
+	__u8	cpt_stopped_state;
+	__u8	cpt_sigsuspend_state;
+	__u8	cpt_64bit;
+	__u64	cpt_set_tid;
+	__u64	cpt_clear_tid;
+	__u32	cpt_exit_code;
+	__u32	cpt_exit_signal;
+	__u32	cpt_pdeath_signal;
+	__u32	cpt_user;
+	__u32	cpt_uid;
+	__u32	cpt_euid;
+	__u32	cpt_suid;
+	__u32	cpt_fsuid;
+	__u32	cpt_gid;
+	__u32	cpt_egid;
+	__u32	cpt_sgid;
+	__u32	cpt_fsgid;
+	__u32	cpt_ngids;
+	__u32	cpt_gids[32];
+	__u32	__cpt_pad2;
+	__u64	cpt_ecap;
+	__u64	cpt_icap;
+	__u64	cpt_pcap;
+	__u8	cpt_comm[16];
+	__u64	cpt_tls[3];
+	struct cpt_restart_block cpt_restart;
+	__u64	cpt_it_real_value;	/* V0: jiffies, V1: nsec */
+	__u64	cpt_it_real_incr;	/* V0: jiffies, V1: nsec */
+	__u64	cpt_it_prof_value;
+	__u64	cpt_it_prof_incr;
+	__u64	cpt_it_virt_value;
+	__u64	cpt_it_virt_incr;
+
+	__u16	cpt_used_math;
+	__u8	cpt_keepcap;
+	__u8	cpt_did_exec;
+	__u32	cpt_ptrace_message;
+
+	__u64	cpt_utime;
+	__u64	cpt_stime;
+	__u64	cpt_starttime;		/* V0: jiffies, V1: timespec */
+	__u64	cpt_nvcsw;
+	__u64	cpt_nivcsw;
+	__u64	cpt_min_flt;
+	__u64	cpt_maj_flt;
+
+	__u64	cpt_sigsuspend_blocked;
+	__u64	cpt_cutime, cpt_cstime;
+	__u64	cpt_cnvcsw, cpt_cnivcsw;
+	__u64	cpt_cmin_flt, cpt_cmaj_flt;
+
+#define CPT_RLIM_NLIMITS 16
+	__u64	cpt_rlim_cur[CPT_RLIM_NLIMITS];
+	__u64	cpt_rlim_max[CPT_RLIM_NLIMITS];
+
+	__u64	cpt_task_ub;
+	__u64	cpt_exec_ub;
+	__u64	cpt_mm_ub;
+	__u64	cpt_fork_sub;
+} __attribute__ ((aligned (8)));
+
+struct cpt_sigaltstack_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_stack;
+	__u32	cpt_stacksize;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+struct cpt_signal_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_leader;
+	__u8	cpt_pgrp_type;
+	__u8	cpt_old_pgrp_type;
+	__u8	cpt_session_type;
+#define CPT_PGRP_NORMAL		0
+#define CPT_PGRP_ORPHAN		1
+#define CPT_PGRP_STRAY		2
+	__u8	__cpt_pad1;
+	__u64	cpt_pgrp;
+	__u64	cpt_old_pgrp;
+	__u64	cpt_session;
+	__u64	cpt_sigpending;
+	__u64	cpt_ctty;
+
+	__u32	cpt_curr_target;
+	__u32	cpt_group_exit;
+	__u32	cpt_group_exit_code;
+	__u32	cpt_group_exit_task;
+	__u32	cpt_notify_count;
+	__u32	cpt_group_stop_count;
+	__u32	cpt_stop_state;
+	__u32	__cpt_pad2;
+
+	__u64	cpt_utime, cpt_stime, cpt_cutime, cpt_cstime;
+	__u64	cpt_nvcsw, cpt_nivcsw, cpt_cnvcsw, cpt_cnivcsw;
+	__u64	cpt_min_flt, cpt_maj_flt, cpt_cmin_flt, cpt_cmaj_flt;
+
+	__u64	cpt_rlim_cur[CPT_RLIM_NLIMITS];
+	__u64	cpt_rlim_max[CPT_RLIM_NLIMITS];
+} __attribute__ ((aligned (8)));
+/* Followed by list of posix timers. */
+
+struct cpt_sighand_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+} __attribute__ ((aligned (8)));
+/* Followed by list of sighandles. */
+
+struct cpt_sighandler_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+	
+	__u32	cpt_signo;
+	__u32	__cpt_pad1;
+	__u64	cpt_handler;
+	__u64	cpt_restorer;
+	__u64	cpt_flags;
+	__u64	cpt_mask;
+} __attribute__ ((aligned (8)));
+
+struct cpt_netdev_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u32	cpt_flags;
+	__u8	cpt_name[16];
+} __attribute__ ((aligned (8)));
+
+struct cpt_ifaddr_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u8	cpt_family;
+	__u8	cpt_masklen;
+	__u8	cpt_flags;
+	__u8	cpt_scope;
+	__u32	cpt_address[4];
+	__u32	cpt_peer[4];
+	__u32	cpt_broadcast[4];
+	__u8	cpt_label[16];
+} __attribute__ ((aligned (8)));
+
+struct cpt_ipct_tuple
+{
+	__u32	cpt_src;
+	__u16	cpt_srcport;
+	__u16	__cpt_pad1;
+
+	__u32	cpt_dst;
+	__u16	cpt_dstport;
+	__u8	cpt_protonum;
+	__u8	cpt_dir;	/* TEMPORARY HACK TO VALIDATE CODE */
+} __attribute__ ((aligned (8)));
+
+struct cpt_nat_manip
+{
+	__u8	cpt_direction;
+	__u8	cpt_hooknum;
+	__u8	cpt_maniptype;
+	__u8	__cpt_pad1;
+
+	__u32	cpt_manip_addr;
+	__u16	cpt_manip_port;
+	__u16	__cpt_pad2;
+	__u32	__cpt_pad3;
+} __attribute__ ((aligned (8)));
+
+struct cpt_nat_seq
+{
+	__u32	cpt_correction_pos;
+	__u32	cpt_offset_before;
+	__u32	cpt_offset_after;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ip_connexpect_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_timeout;
+	__u32	cpt_sibling_conntrack;	/* Index of child conntrack */
+	__u32	cpt_seq;		/* id in 2.6.15 */
+
+	struct cpt_ipct_tuple	cpt_ct_tuple;	/* NU 2.6.15 */
+	struct cpt_ipct_tuple	cpt_tuple;
+	struct cpt_ipct_tuple	cpt_mask;
+
+	/* union ip_conntrack_expect_help. Used by ftp, irc, amanda */
+	__u32	cpt_help[3];			/* NU 2.6.15 */
+	__u16	cpt_manip_proto;
+	__u8	cpt_dir;
+	__u8	cpt_flags;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ip_conntrack_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	struct cpt_ipct_tuple cpt_tuple[2];
+	__u64	cpt_status;
+	__u64	cpt_timeout;
+	__u32	cpt_index;
+	__u8	cpt_ct_helper;
+	__u8	cpt_nat_helper;
+	__u16	cpt_pad1;
+
+	/* union ip_conntrack_proto. Used by tcp and icmp. */
+	__u32	cpt_proto_data[12];
+
+	/* union ip_conntrack_help. Used by ftp and pptp helper.
+	 * We do not support pptp...
+	 */
+	__u32	cpt_help_data[6];
+
+	/* nat info */
+	__u32	cpt_initialized;	/* NU 2.6.15 */
+	__u32	cpt_num_manips;		/* NU 2.6.15 */
+	struct  cpt_nat_manip	cpt_nat_manips[6];	/* NU 2.6.15 */
+
+	struct	cpt_nat_seq	cpt_nat_seq[2];
+
+	__u32	cpt_masq_index;
+	__u32	cpt_id;
+	__u32	cpt_mark;
+} __attribute__ ((aligned (8)));
+
+struct cpt_beancounter_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_parent;
+	__u32	cpt_id;
+	__u32	__cpt_pad;
+	__u64	cpt_parms[32 * 6 * 2];
+} __attribute__ ((aligned (8)));
+
+#ifdef __KERNEL__
+
+static inline void *cpt_ptr_import(__u64 ptr)
+{
+	return (void*)(unsigned long)ptr;
+}
+
+static inline __u64 cpt_ptr_export(void __user *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+static inline void cpt_sigset_import(sigset_t *sig, __u64 ptr)
+{
+	memcpy(sig, &ptr, sizeof(*sig));
+}
+
+static inline __u64 cpt_sigset_export(sigset_t *sig)
+{
+	return *(__u64*)sig;
+}
+
+static inline __u64 cpt_timespec_export(struct timespec *tv)
+{
+	return (((u64)tv->tv_sec) << 32) + tv->tv_nsec;
+}
+
+static inline void cpt_timespec_import(struct timespec *tv, __u64 val)
+{
+	tv->tv_sec = val>>32;
+	tv->tv_nsec = (val&0xFFFFFFFF);
+}
+
+static inline __u64 cpt_timeval_export(struct timeval *tv)
+{
+	return (((u64)tv->tv_sec) << 32) + tv->tv_usec;
+}
+
+static inline void cpt_timeval_import(struct timeval *tv, __u64 val)
+{
+	tv->tv_sec = val>>32;
+	tv->tv_usec = (val&0xFFFFFFFF);
+}
+
+#endif
+
+#endif /* __CPT_IMAGE_H_ */
diff -uprN linux-2.6.16/include/linux/cpt_ioctl.h linux-2.6.16.ovz/include/linux/cpt_ioctl.h
--- linux-2.6.16/include/linux/cpt_ioctl.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/cpt_ioctl.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,41 @@
+/*
+ *
+ *  include/linux/cpt_ioctl.h
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _CPT_IOCTL_H_
+#define _CPT_IOCTL_H_ 1
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define CPTCTLTYPE '-'
+#define CPT_SET_DUMPFD	_IOW(CPTCTLTYPE, 1, int)
+#define CPT_SET_STATUSFD _IOW(CPTCTLTYPE, 2, int)
+#define CPT_SET_LOCKFD	_IOW(CPTCTLTYPE, 3, int)
+#define CPT_SET_VEID	_IOW(CPTCTLTYPE, 4, int)
+#define CPT_SUSPEND	_IO(CPTCTLTYPE, 5)
+#define CPT_DUMP	_IO(CPTCTLTYPE, 6)
+#define CPT_UNDUMP	_IO(CPTCTLTYPE, 7)
+#define CPT_RESUME	_IO(CPTCTLTYPE, 8)
+#define CPT_KILL	_IO(CPTCTLTYPE, 9)
+#define CPT_JOIN_CONTEXT _IO(CPTCTLTYPE, 10)
+#define CPT_GET_CONTEXT _IOW(CPTCTLTYPE, 11, unsigned int)
+#define CPT_PUT_CONTEXT _IO(CPTCTLTYPE, 12)
+#define CPT_SET_PAGEINFDIN _IOW(CPTCTLTYPE, 13, int)
+#define CPT_SET_PAGEINFDOUT _IOW(CPTCTLTYPE, 14, int)
+#define CPT_PAGEIND	_IO(CPTCTLTYPE, 15)
+#define CPT_VMPREP	_IOW(CPTCTLTYPE, 16, int)
+#define CPT_SET_LAZY	_IOW(CPTCTLTYPE, 17, int)
+#define CPT_SET_CPU_FLAGS _IOW(CPTCTLTYPE, 18, unsigned int)
+#define CPT_TEST_CAPS	_IOW(CPTCTLTYPE, 19, unsigned int)
+#define CPT_TEST_VECAPS	_IOW(CPTCTLTYPE, 20, unsigned int)
+#define CPT_SET_ERRORFD _IOW(CPTCTLTYPE, 21, int)
+
+#endif
diff -uprN linux-2.6.16/include/linux/cpu.h linux-2.6.16.ovz/include/linux/cpu.h
--- linux-2.6.16/include/linux/cpu.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/cpu.h	2006-07-05 08:34:56.000000000 -0400
@@ -32,7 +32,7 @@ struct cpu {
 };
 
 extern int register_cpu(struct cpu *, int, struct node *);
-extern struct sys_device *get_cpu_sysdev(int cpu);
+extern struct sys_device *get_cpu_sysdev(unsigned cpu);
 #ifdef CONFIG_HOTPLUG_CPU
 extern void unregister_cpu(struct cpu *, struct node *);
 #endif
diff -uprN linux-2.6.16/include/linux/cpumask.h linux-2.6.16.ovz/include/linux/cpumask.h
--- linux-2.6.16/include/linux/cpumask.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/cpumask.h	2006-07-05 08:34:56.000000000 -0400
@@ -408,6 +408,7 @@ extern cpumask_t cpu_present_map;
 })
 
 #define for_each_cpu(cpu)	  for_each_cpu_mask((cpu), cpu_possible_map)
+#define for_each_possible_cpu(cpu)  for_each_cpu_mask((cpu), cpu_possible_map)
 #define for_each_online_cpu(cpu)  for_each_cpu_mask((cpu), cpu_online_map)
 #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
 
diff -uprN linux-2.6.16/include/linux/dcache.h linux-2.6.16.ovz/include/linux/dcache.h
--- linux-2.6.16/include/linux/dcache.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/dcache.h	2006-07-05 08:34:56.000000000 -0400
@@ -9,6 +9,8 @@
 #include <linux/cache.h>
 #include <linux/rcupdate.h>
 
+#include <ub/ub_dcache.h>
+
 struct nameidata;
 struct vfsmount;
 
@@ -111,6 +113,9 @@ struct dentry {
 	struct dcookie_struct *d_cookie; /* cookie, if any */
 #endif
 	int d_mounted;
+#ifdef CONFIG_USER_RESOURCE
+	struct dentry_beancounter dentry_bc;
+#endif
 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
 };
 
@@ -161,7 +166,11 @@ d_iput:		no		no		no       yes
 
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
 #define DCACHE_UNHASHED		0x0010	
+#define DCACHE_VIRTUAL		0x0100	/* ve accessible */
+
+extern void mark_tree_virtual(struct vfsmount *m, struct dentry *d);
 
+extern kmem_cache_t *dentry_cache;
 extern spinlock_t dcache_lock;
 
 /**
@@ -215,7 +224,7 @@ extern struct dentry * d_alloc_anon(stru
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
-extern void shrink_dcache_anon(struct hlist_head *);
+extern void shrink_dcache_anon(struct super_block *);
 extern int d_invalidate(struct dentry *);
 
 /* only used at mount-time */
@@ -277,6 +286,7 @@ extern struct dentry * __d_lookup(struct
 /* validate "insecure" dentry pointer */
 extern int d_validate(struct dentry *, struct dentry *);
 
+extern int d_root_check(struct dentry *, struct vfsmount *);
 extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
   
 /* Allocation counts.. */
@@ -297,6 +307,8 @@ extern char * d_path(struct dentry *, st
 static inline struct dentry *dget(struct dentry *dentry)
 {
 	if (dentry) {
+		if (ub_dget_testone(dentry))
+			BUG();
 		BUG_ON(!atomic_read(&dentry->d_count));
 		atomic_inc(&dentry->d_count);
 	}
@@ -340,6 +352,8 @@ extern struct dentry *lookup_create(stru
 
 extern int sysctl_vfs_cache_pressure;
 
+extern int check_area_access_ve(struct dentry *, struct vfsmount *);
+extern int check_area_execute_ve(struct dentry *, struct vfsmount *);
 #endif /* __KERNEL__ */
 
 #endif	/* __LINUX_DCACHE_H */
diff -uprN linux-2.6.16/include/linux/devpts_fs.h linux-2.6.16.ovz/include/linux/devpts_fs.h
--- linux-2.6.16/include/linux/devpts_fs.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/devpts_fs.h	2006-07-05 08:34:56.000000000 -0400
@@ -21,6 +21,15 @@ int devpts_pty_new(struct tty_struct *tt
 struct tty_struct *devpts_get_tty(int number);	 /* get tty structure */
 void devpts_pty_kill(int number);		 /* unlink */
 
+struct devpts_config {
+	int setuid;
+	int setgid;
+	uid_t   uid;
+	gid_t   gid;
+	umode_t mode;
+};
+
+extern struct devpts_config devpts_config;
 #else
 
 /* Dummy stubs in the no-pty case */
diff -uprN linux-2.6.16/include/linux/elfcore.h linux-2.6.16.ovz/include/linux/elfcore.h
--- linux-2.6.16/include/linux/elfcore.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/elfcore.h	2006-07-05 08:34:56.000000000 -0400
@@ -7,6 +7,8 @@
 #include <linux/user.h>
 #include <linux/ptrace.h>
 
+extern int sysctl_at_vsyscall;
+
 struct elf_siginfo
 {
 	int	si_signo;			/* signal number */
diff -uprN linux-2.6.16/include/linux/eventpoll.h linux-2.6.16.ovz/include/linux/eventpoll.h
--- linux-2.6.16/include/linux/eventpoll.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/eventpoll.h	2006-07-05 08:34:56.000000000 -0400
@@ -85,6 +85,91 @@ static inline void eventpoll_release(str
 	eventpoll_release_file(file);
 }
 
+struct epoll_filefd {
+	struct file *file;
+	int fd;
+};
+
+/*
+ * This structure is stored inside the "private_data" member of the file
+ * structure and rapresent the main data sructure for the eventpoll
+ * interface.
+ */
+struct eventpoll {
+	/* Protect the this structure access */
+	rwlock_t lock;
+
+	/*
+	 * This semaphore is used to ensure that files are not removed
+	 * while epoll is using them. This is read-held during the event
+	 * collection loop and it is write-held during the file cleanup
+	 * path, the epoll file exit code and the ctl operations.
+	 */
+	struct rw_semaphore sem;
+
+	/* Wait queue used by sys_epoll_wait() */
+	wait_queue_head_t wq;
+
+	/* Wait queue used by file->poll() */
+	wait_queue_head_t poll_wait;
+
+	/* List of ready file descriptors */
+	struct list_head rdllist;
+
+	/* RB-Tree root used to store monitored fd structs */
+	struct rb_root rbr;
+};
+
+/*
+ * Each file descriptor added to the eventpoll interface will
+ * have an entry of this type linked to the hash.
+ */
+struct epitem {
+	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
+	struct rb_node rbn;
+
+	/* List header used to link this structure to the eventpoll ready list */
+	struct list_head rdllink;
+
+	/* The file descriptor information this item refers to */
+	struct epoll_filefd ffd;
+
+	/* Number of active wait queue attached to poll operations */
+	int nwait;
+
+	/* List containing poll wait queues */
+	struct list_head pwqlist;
+
+	/* The "container" of this item */
+	struct eventpoll *ep;
+
+	/* The structure that describe the interested events and the source fd */
+	struct epoll_event event;
+
+	/*
+	 * Used to keep track of the usage count of the structure. This avoids
+	 * that the structure will desappear from underneath our processing.
+	 */
+	atomic_t usecnt;
+
+	/* List header used to link this item to the "struct file" items list */
+	struct list_head fllink;
+
+	/* List header used to link the item to the transfer list */
+	struct list_head txlink;
+
+	/*
+	 * This is used during the collection/transfer of events to userspace
+	 * to pin items empty events set.
+	 */
+	unsigned int revents;
+};
+
+extern struct semaphore epsem;
+struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+		     struct file *tfile, int fd);
+void ep_release_epitem(struct epitem *epi);
 
 #else
 
diff -uprN linux-2.6.16/include/linux/fairsched.h linux-2.6.16.ovz/include/linux/fairsched.h
--- linux-2.6.16/include/linux/fairsched.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/fairsched.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,118 @@
+#ifndef __LINUX_FAIRSCHED_H__
+#define __LINUX_FAIRSCHED_H__
+
+/*
+ * Fair Scheduler
+ *
+ * Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/cache.h>
+#include <asm/timex.h>
+
+#define FAIRSCHED_HAS_CPU_BINDING	0
+
+typedef struct { cycles_t t; } fschtag_t;
+typedef struct { unsigned long d; } fschdur_t;
+typedef struct { cycles_t v; } fschvalue_t;
+
+struct vcpu_scheduler;
+
+struct fairsched_node {
+	struct list_head runlist;
+
+	/*
+	 * Fair Scheduler fields
+	 *
+	 * nr_running >= nr_ready (!= if delayed)
+	 */
+	fschtag_t start_tag;
+	int nr_ready;
+	int nr_runnable;
+	int nr_pcpu;
+
+	/*
+	 * Rate limitator fields
+	 */
+	cycles_t last_updated_at;
+	fschvalue_t value;	/* leaky function value */
+	cycles_t delay;		/* removed from schedule till */
+	unsigned char delayed;
+
+	/*
+	 * Configuration
+	 *
+	 * Read-only most of the time.
+	 */
+	unsigned weight ____cacheline_aligned_in_smp;
+				/* fairness weight */
+	unsigned char rate_limited;
+	unsigned rate;		/* max CPU share */
+	fschtag_t max_latency;
+	unsigned min_weight;
+
+	struct list_head nodelist;
+	int id;
+#ifdef CONFIG_VE
+	struct ve_struct *owner_env;
+#endif
+	struct vcpu_scheduler *vsched;
+};
+
+#ifdef CONFIG_FAIRSCHED
+
+#define FSCHWEIGHT_MAX			((1 << 16) - 1)
+#define FSCHRATE_SHIFT			10
+
+/*
+ * Fairsched nodes used in boot process.
+ */
+extern struct fairsched_node fairsched_init_node;
+extern struct fairsched_node fairsched_idle_node;
+
+/*
+ * For proc output.
+ */
+extern unsigned fairsched_nr_cpus;
+extern void fairsched_cpu_online_map(int id, cpumask_t *mask);
+
+/* I hope vsched_id is always equal to fairsched node id  --SAW */
+#define task_fairsched_node_id(p)	task_vsched_id(p)
+
+/*
+ * Core functions.
+ */
+extern void fairsched_incrun(struct fairsched_node *node);
+extern void fairsched_decrun(struct fairsched_node *node);
+extern void fairsched_inccpu(struct fairsched_node *node);
+extern void fairsched_deccpu(struct fairsched_node *node);
+extern struct fairsched_node *fairsched_schedule(
+		struct fairsched_node *prev_node,
+		struct fairsched_node *cur_node,
+		int cur_node_active,
+		cycles_t time);
+
+/*
+ * Management functions.
+ */
+void fairsched_init_early(void);
+asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
+		unsigned int newid);
+asmlinkage int sys_fairsched_rmnod(unsigned int id);
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid);
+
+#else /* CONFIG_FAIRSCHED */
+
+#define task_fairsched_node_id(p)	0
+#define fairsched_incrun(p)		do { } while (0)
+#define fairsched_decrun(p)		do { } while (0)
+#define fairsched_deccpu(p)		do { } while (0)
+#define fairsched_cpu_online_map(id, mask)      do { *(mask) = cpu_online_map; } while (0)
+
+#endif /* CONFIG_FAIRSCHED */
+
+#endif /* __LINUX_FAIRSCHED_H__ */
diff -uprN linux-2.6.16/include/linux/faudit.h linux-2.6.16.ovz/include/linux/faudit.h
--- linux-2.6.16/include/linux/faudit.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/faudit.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,46 @@
+/*
+ *  include/linux/faudit.h
+ *
+ *  Copyright (C) 2005  SWSoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __FAUDIT_H_
+#define __FAUDIT_H_
+
+#include <linux/config.h>
+#include <linux/virtinfo.h>
+
+struct vfsmount;
+struct dentry;
+struct super_block;
+struct kstatfs;
+struct kstat;
+struct pt_regs;
+
+struct faudit_regs_arg {
+	int err;
+	struct pt_regs *regs;
+};
+
+struct faudit_stat_arg {
+	int err;
+	struct vfsmount *mnt;
+	struct dentry *dentry;
+	struct kstat *stat;
+};
+
+struct faudit_statfs_arg {
+	int err;
+	struct super_block *sb;
+	struct kstatfs *stat;
+};
+
+#define VIRTINFO_FAUDIT			(0)
+#define VIRTINFO_FAUDIT_STAT		(VIRTINFO_FAUDIT + 0)
+#define VIRTINFO_FAUDIT_STATFS		(VIRTINFO_FAUDIT + 1)
+
+#endif
diff -uprN linux-2.6.16/include/linux/fb.h linux-2.6.16.ovz/include/linux/fb.h
--- linux-2.6.16/include/linux/fb.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/fb.h	2006-07-05 08:34:56.000000000 -0400
@@ -839,12 +839,10 @@ struct fb_info {
 #define FB_LEFT_POS(bpp)          (32 - bpp)
 #define FB_SHIFT_HIGH(val, bits)  ((val) >> (bits))
 #define FB_SHIFT_LOW(val, bits)   ((val) << (bits))
-#define FB_BIT_NR(b)              (7 - (b))
 #else
 #define FB_LEFT_POS(bpp)          (0)
 #define FB_SHIFT_HIGH(val, bits)  ((val) << (bits))
 #define FB_SHIFT_LOW(val, bits)   ((val) >> (bits))
-#define FB_BIT_NR(b)              (b)
 #endif
 
     /*
diff -uprN linux-2.6.16/include/linux/fs.h linux-2.6.16.ovz/include/linux/fs.h
--- linux-2.6.16/include/linux/fs.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/fs.h	2006-07-05 08:34:56.000000000 -0400
@@ -7,6 +7,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/ve_owner.h>
 #include <linux/limits.h>
 #include <linux/ioctl.h>
 
@@ -64,6 +65,7 @@ extern int dir_notify_enable;
 #define FMODE_LSEEK	4
 #define FMODE_PREAD	8
 #define FMODE_PWRITE	FMODE_PREAD	/* These go hand in hand */
+#define FMODE_QUOTACTL	4
 
 #define RW_MASK		1
 #define RWA_MASK	2
@@ -83,6 +85,7 @@ extern int dir_notify_enable;
 /* public flags for file_system_type */
 #define FS_REQUIRES_DEV 1 
 #define FS_BINARY_MOUNTDATA 2
+#define FS_VIRTUALIZED	64	/* Can mount this fstype inside ve */
 #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
 #define FS_ODD_RENAME	32768	/* Temporary stuff; will go away as soon
 				  * as nfs_rename() will be cleaned up
@@ -297,6 +300,9 @@ struct iattr {
  * Includes for diskquotas.
  */
 #include <linux/quota.h>
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+#include <linux/vzquota_qlnk.h>
+#endif
 
 /** 
  * enum positive_aop_returns - aop return codes with specific semantics
@@ -493,6 +499,9 @@ struct inode {
 #ifdef CONFIG_QUOTA
 	struct dquot		*i_dquot[MAXQUOTAS];
 #endif
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+	struct vz_quota_ilink	i_qlnk;
+#endif
 	/* These three should probably be a union */
 	struct list_head	i_devices;
 	struct pipe_inode_info	*i_pipe;
@@ -527,6 +536,8 @@ struct inode {
 #endif
 };
 
+extern kmem_cache_t *inode_cachep;
+
 /*
  * NOTE: in a 32bit arch with a preemptable kernel and
  * an UP compile the i_size_read/write must be atomic
@@ -588,6 +599,20 @@ static inline unsigned imajor(struct ino
 
 extern struct block_device *I_BDEV(struct inode *inode);
 
+struct exec_perm {
+	umode_t mode;
+	uid_t uid, gid;
+	int set;
+};
+
+static inline void set_exec_perm(struct exec_perm *perm, struct inode *ino)
+{
+	perm->set = 1;
+	perm->mode = ino->i_mode;
+	perm->uid = ino->i_uid;
+	perm->gid = ino->i_gid;
+}
+
 struct fown_struct {
 	rwlock_t lock;          /* protects pid, uid, euid fields */
 	int pid;		/* pid or -pgrp where SIGIO should be sent */
@@ -646,7 +671,10 @@ struct file {
 	spinlock_t		f_ep_lock;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
+	struct ve_struct	*owner_env;
 };
+DCL_VE_OWNER_PROTO(FILP, struct file, owner_env)
+
 extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
 #define file_list_unlock() spin_unlock(&files_lock);
@@ -710,6 +738,9 @@ struct file_lock {
 	struct file *fl_file;
 	unsigned char fl_flags;
 	unsigned char fl_type;
+#ifdef CONFIG_USER_RESOURCE
+	unsigned char fl_charged;
+#endif
 	loff_t fl_start;
 	loff_t fl_end;
 
@@ -902,7 +933,7 @@ static inline void unlock_super(struct s
 /*
  * VFS helper functions..
  */
-extern int vfs_permission(struct nameidata *, int);
+extern int vfs_permission(struct nameidata *, int, struct exec_perm *);
 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
 extern int vfs_mkdir(struct inode *, struct dentry *, int);
 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
@@ -1041,7 +1072,8 @@ struct inode_operations {
 	void * (*follow_link) (struct dentry *, struct nameidata *);
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
-	int (*permission) (struct inode *, int, struct nameidata *);
+	int (*permission) (struct inode *, int, struct nameidata *,
+			struct exec_perm *);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -1089,6 +1121,8 @@ struct super_operations {
 
 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+
+	struct inode *(*get_quota_root)(struct super_block *);
 };
 
 /* Inode state bits.  Protected by inode_lock. */
@@ -1246,8 +1280,14 @@ struct file_system_type {
 	struct module *owner;
 	struct file_system_type * next;
 	struct list_head fs_supers;
+	struct ve_struct *owner_env;
 };
 
+DCL_VE_OWNER_PROTO(FSTYPE, struct file_system_type, owner_env)
+
+void get_filesystem(struct file_system_type *fs);
+void put_filesystem(struct file_system_type *fs);
+
 struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int));
@@ -1285,6 +1325,7 @@ extern struct vfsmount *kern_mount(struc
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
 extern void umount_tree(struct vfsmount *, int, struct list_head *);
+#define kern_umount mntput
 extern void release_mounts(struct list_head *);
 extern long do_mount(char *, char *, char *, unsigned long, void *);
 extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
@@ -1292,6 +1333,7 @@ extern void mnt_set_mountpoint(struct vf
 				  struct vfsmount *);
 
 extern int vfs_statfs(struct super_block *, struct kstatfs *);
+extern int faudit_statfs(struct super_block *, struct kstatfs *);
 
 /* /sys/fs */
 extern struct subsystem fs_subsys;
@@ -1383,6 +1425,7 @@ extern int bd_claim(struct block_device 
 extern void bd_release(struct block_device *);
 
 /* fs/char_dev.c */
+#define CHRDEV_MAJOR_HASH_SIZE	255
 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
 extern int register_chrdev_region(dev_t, unsigned, const char *);
 extern int register_chrdev(unsigned int, const char *,
@@ -1390,25 +1433,17 @@ extern int register_chrdev(unsigned int,
 extern int unregister_chrdev(unsigned int, const char *);
 extern void unregister_chrdev_region(dev_t, unsigned);
 extern int chrdev_open(struct inode *, struct file *);
-extern int get_chrdev_list(char *);
-extern void *acquire_chrdev_list(void);
-extern int count_chrdev_list(void);
-extern void *get_next_chrdev(void *);
-extern int get_chrdev_info(void *, int *, char **);
-extern void release_chrdev_list(void *);
+extern void chrdev_show(struct seq_file *,off_t);
 
 /* fs/block_dev.c */
+#define BLKDEV_MAJOR_HASH_SIZE	255
 #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
 extern const char *__bdevname(dev_t, char *buffer);
 extern const char *bdevname(struct block_device *bdev, char *buffer);
-extern struct block_device *lookup_bdev(const char *);
+extern struct block_device *lookup_bdev(const char *, int mode);
 extern struct block_device *open_bdev_excl(const char *, int, void *);
 extern void close_bdev_excl(struct block_device *);
-extern void *acquire_blkdev_list(void);
-extern int count_blkdev_list(void);
-extern void *get_next_blkdev(void *);
-extern int get_blkdev_info(void *, int *, char **);
-extern void release_blkdev_list(void *);
+extern void blkdev_show(struct seq_file *,off_t);
 
 extern void init_special_inode(struct inode *, umode_t, dev_t);
 
@@ -1433,7 +1468,7 @@ extern int fs_may_remount_ro(struct supe
 #define bio_data_dir(bio)	((bio)->bi_rw & 1)
 
 extern int check_disk_change(struct block_device *);
-extern int invalidate_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *, int);
 extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
@@ -1463,9 +1498,10 @@ extern int do_remount_sb(struct super_bl
 			 void *data, int force);
 extern sector_t bmap(struct inode *, sector_t);
 extern int notify_change(struct dentry *, struct iattr *);
-extern int permission(struct inode *, int, struct nameidata *);
+extern int permission(struct inode *, int, struct nameidata *,
+		struct exec_perm *);
 extern int generic_permission(struct inode *, int,
-		int (*check_acl)(struct inode *, int));
+		int (*check_acl)(struct inode *, int), struct exec_perm *);
 
 extern int get_write_access(struct inode *);
 extern int deny_write_access(struct file *);
@@ -1484,7 +1520,9 @@ extern int open_namei(int dfd, const cha
 extern int may_open(struct nameidata *, int, int);
 
 extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
-extern struct file * open_exec(const char *);
+
+struct linux_binprm;
+extern struct file * open_exec(const char *, struct linux_binprm *);
  
 /* fs/dcache.c -- generic fs support functions */
 extern int is_subdir(struct dentry *, struct dentry *);
diff -uprN linux-2.6.16/include/linux/genhd.h linux-2.6.16.ovz/include/linux/genhd.h
--- linux-2.6.16/include/linux/genhd.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/genhd.h	2006-07-05 08:34:56.000000000 -0400
@@ -421,6 +421,7 @@ static inline struct block_device *bdget
 	return bdget(MKDEV(disk->major, disk->first_minor) + index);
 }
 
+extern struct subsystem block_subsys;
 #endif
 
 #endif
diff -uprN linux-2.6.16/include/linux/gfp.h linux-2.6.16.ovz/include/linux/gfp.h
--- linux-2.6.16/include/linux/gfp.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/gfp.h	2006-07-05 08:34:56.000000000 -0400
@@ -47,6 +47,8 @@ struct vm_area_struct;
 #define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_UBC	((__force gfp_t)0x40000u)/* charge kmem in buddy and slab */
+#define __GFP_SOFT_UBC	((__force gfp_t)0x80000u)/* use soft charging */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -55,14 +57,17 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_HARDWALL)
+			__GFP_NOMEMALLOC|__GFP_HARDWALL| \
+			__GFP_UBC|__GFP_SOFT_UBC)
 
 /* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_NOIO	(__GFP_WAIT)
 #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
 #define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
+#define GFP_KERNEL_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_USER_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | __GFP_UBC)
 #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
 			 __GFP_HIGHMEM)
 
diff -uprN linux-2.6.16/include/linux/hrtimer.h linux-2.6.16.ovz/include/linux/hrtimer.h
--- linux-2.6.16/include/linux/hrtimer.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/hrtimer.h	2006-07-05 08:34:56.000000000 -0400
@@ -140,4 +140,9 @@ extern void hrtimer_run_queues(void);
 /* Bootup initialization: */
 extern void __init hrtimers_init(void);
 
+extern long nanosleep_restart(struct restart_block *restart);
+
+extern ktime_t schedule_hrtimer(struct hrtimer *timer,
+				const enum hrtimer_mode mode);
+
 #endif
diff -uprN linux-2.6.16/include/linux/i2o.h linux-2.6.16.ovz/include/linux/i2o.h
--- linux-2.6.16/include/linux/i2o.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/i2o.h	2006-07-05 08:34:56.000000000 -0400
@@ -1116,8 +1116,11 @@ static inline struct i2o_message *i2o_ms
 
 	mmsg->mfa = readl(c->in_port);
 	if (unlikely(mmsg->mfa >= c->in_queue.len)) {
+		u32 mfa = mmsg->mfa;
+
 		mempool_free(mmsg, c->in_msg.mempool);
-		if(mmsg->mfa == I2O_QUEUE_EMPTY)
+
+		if (mfa == I2O_QUEUE_EMPTY)
 			return ERR_PTR(-EBUSY);
 		return ERR_PTR(-EFAULT);
 	}
diff -uprN linux-2.6.16/include/linux/inetdevice.h linux-2.6.16.ovz/include/linux/inetdevice.h
--- linux-2.6.16/include/linux/inetdevice.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/inetdevice.h	2006-07-05 08:34:56.000000000 -0400
@@ -34,6 +34,12 @@ struct ipv4_devconf
 };
 
 extern struct ipv4_devconf ipv4_devconf;
+extern struct ipv4_devconf ipv4_devconf_dflt;
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_ipv4_devconf		(*(get_exec_env()->_ipv4_devconf))
+#else
+#define ve_ipv4_devconf		ipv4_devconf
+#endif
 
 struct in_device
 {
@@ -60,29 +66,29 @@ struct in_device
 };
 
 #define IN_DEV_FORWARD(in_dev)		((in_dev)->cnf.forwarding)
-#define IN_DEV_MFORWARD(in_dev)		(ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
-#define IN_DEV_RPFILTER(in_dev)		(ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
-#define IN_DEV_SOURCE_ROUTE(in_dev)	(ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
-#define IN_DEV_BOOTP_RELAY(in_dev)	(ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
-
-#define IN_DEV_LOG_MARTIANS(in_dev)	(ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
-#define IN_DEV_PROXY_ARP(in_dev)	(ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
-#define IN_DEV_SHARED_MEDIA(in_dev)	(ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
-#define IN_DEV_TX_REDIRECTS(in_dev)	(ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
-#define IN_DEV_SEC_REDIRECTS(in_dev)	(ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
+#define IN_DEV_MFORWARD(in_dev)		(ve_ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
+#define IN_DEV_RPFILTER(in_dev)		(ve_ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
+#define IN_DEV_SOURCE_ROUTE(in_dev)	(ve_ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
+#define IN_DEV_BOOTP_RELAY(in_dev)	(ve_ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
+
+#define IN_DEV_LOG_MARTIANS(in_dev)	(ve_ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
+#define IN_DEV_PROXY_ARP(in_dev)	(ve_ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
+#define IN_DEV_SHARED_MEDIA(in_dev)	(ve_ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
+#define IN_DEV_TX_REDIRECTS(in_dev)	(ve_ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
+#define IN_DEV_SEC_REDIRECTS(in_dev)	(ve_ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
 #define IN_DEV_IDTAG(in_dev)		((in_dev)->cnf.tag)
 #define IN_DEV_MEDIUM_ID(in_dev)	((in_dev)->cnf.medium_id)
 #define IN_DEV_PROMOTE_SECONDARIES(in_dev)	(ipv4_devconf.promote_secondaries || (in_dev)->cnf.promote_secondaries)
 
 #define IN_DEV_RX_REDIRECTS(in_dev) \
 	((IN_DEV_FORWARD(in_dev) && \
-	  (ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
+	  (ve_ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
 	 || (!IN_DEV_FORWARD(in_dev) && \
-	  (ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
+ 	  (ve_ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
 
-#define IN_DEV_ARPFILTER(in_dev)	(ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
-#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
-#define IN_DEV_ARP_IGNORE(in_dev)	(max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
+#define IN_DEV_ARPFILTER(in_dev)	(ve_ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
+#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ve_ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
+#define IN_DEV_ARP_IGNORE(in_dev)	(max(ve_ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
 
 struct in_ifaddr
 {
@@ -113,6 +119,7 @@ extern u32		inet_select_addr(const struc
 extern u32		inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope);
 extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask);
 extern void		inet_forward_change(void);
+extern void		inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
 
 static __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa)
 {
@@ -180,6 +187,10 @@ static inline void in_dev_put(struct in_
 #define __in_dev_put(idev)  atomic_dec(&(idev)->refcnt)
 #define in_dev_hold(idev)   atomic_inc(&(idev)->refcnt)
 
+struct ve_struct;
+extern int devinet_sysctl_init(struct ve_struct *);
+extern void devinet_sysctl_fini(struct ve_struct *);
+extern void devinet_sysctl_free(struct ve_struct *);
 #endif /* __KERNEL__ */
 
 static __inline__ __u32 inet_make_mask(int logmask)
diff -uprN linux-2.6.16/include/linux/ipv6.h linux-2.6.16.ovz/include/linux/ipv6.h
--- linux-2.6.16/include/linux/ipv6.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/ipv6.h	2006-07-05 08:34:56.000000000 -0400
@@ -415,12 +415,13 @@ static inline struct raw6_sock *raw6_sk(
 #define inet_v6_ipv6only(__sk)		0
 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 
-#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\
+#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif,__ve)\
 	(((__sk)->sk_hash == (__hash))				&& \
 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))  	&& \
 	 ((__sk)->sk_family		== AF_INET6)		&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&& \
+	 ve_accessible_strict(VE_OWNER_SK(__sk), (__ve))	&& \
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 
 #endif /* __KERNEL__ */
diff -uprN linux-2.6.16/include/linux/jbd.h linux-2.6.16.ovz/include/linux/jbd.h
--- linux-2.6.16/include/linux/jbd.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/jbd.h	2006-07-05 08:34:56.000000000 -0400
@@ -245,10 +245,15 @@ typedef struct journal_superblock_s
 #define J_ASSERT(assert)						\
 do {									\
 	if (!(assert)) {						\
+		unsigned long stack;					\
 		printk (KERN_EMERG					\
 			"Assertion failure in %s() at %s:%d: \"%s\"\n",	\
 			__FUNCTION__, __FILE__, __LINE__, # assert);	\
-		BUG();							\
+		printk("Stack=%p current=%p pid=%d ve=%d comm='%s'\n",	\
+				&stack, current, current->pid,		\
+				get_exec_env()->veid,			\
+				current->comm);				\
+		dump_stack();						\
 	}								\
 } while (0)
 
diff -uprN linux-2.6.16/include/linux/jiffies.h linux-2.6.16.ovz/include/linux/jiffies.h
--- linux-2.6.16/include/linux/jiffies.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/jiffies.h	2006-07-05 08:34:56.000000000 -0400
@@ -74,6 +74,7 @@
  */
 extern u64 __jiffy_data jiffies_64;
 extern unsigned long volatile __jiffy_data jiffies;
+extern unsigned long cycles_per_jiffy, cycles_per_clock;
 
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void);
diff -uprN linux-2.6.16/include/linux/kdev_t.h linux-2.6.16.ovz/include/linux/kdev_t.h
--- linux-2.6.16/include/linux/kdev_t.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/kdev_t.h	2006-07-05 08:34:56.000000000 -0400
@@ -87,6 +87,57 @@ static inline unsigned sysv_minor(u32 de
 	return dev & 0x3ffff;
 }
 
+#define UNNAMED_MAJOR_COUNT	16
+
+#if UNNAMED_MAJOR_COUNT > 1
+
+extern int unnamed_dev_majors[UNNAMED_MAJOR_COUNT];
+
+static inline dev_t make_unnamed_dev(int idx)
+{
+	/*
+	 * Here we transfer bits from 8 to 8+log2(UNNAMED_MAJOR_COUNT) of the
+	 * unnamed device index into major number.
+	 */
+	return MKDEV(unnamed_dev_majors[(idx >> 8) & (UNNAMED_MAJOR_COUNT - 1)],
+		     idx & ~((UNNAMED_MAJOR_COUNT - 1) << 8));
+}
+
+static inline int unnamed_dev_idx(dev_t dev)
+{
+	int i;
+	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
+				MAJOR(dev) != unnamed_dev_majors[i]; i++);
+	return MINOR(dev) | (i << 8);
+}
+
+static inline int is_unnamed_dev(dev_t dev)
+{
+	int i;
+	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
+				MAJOR(dev) != unnamed_dev_majors[i]; i++);
+	return i < UNNAMED_MAJOR_COUNT;
+}
+
+#else /* UNNAMED_MAJOR_COUNT */
+
+static inline dev_t make_unnamed_dev(int idx)
+{
+	return MKDEV(0, idx);
+}
+
+static inline int unnamed_dev_idx(dev_t dev)
+{
+	return MINOR(dev);
+}
+
+static inline int is_unnamed_dev(dev_t dev)
+{
+	return MAJOR(dev) == 0;
+}
+
+#endif /* UNNAMED_MAJOR_COUNT */
+
 
 #else /* __KERNEL__ */
 
diff -uprN linux-2.6.16/include/linux/kernel.h linux-2.6.16.ovz/include/linux/kernel.h
--- linux-2.6.16/include/linux/kernel.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/kernel.h	2006-07-05 08:34:56.000000000 -0400
@@ -132,6 +132,9 @@ asmlinkage int vprintk(const char *fmt, 
 	__attribute__ ((format (printf, 1, 0)));
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
+asmlinkage int ve_printk(int, const char * fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+void prepare_printk(void);
 #else
 static inline int vprintk(const char *s, va_list args)
 	__attribute__ ((format (printf, 1, 0)));
@@ -139,8 +142,16 @@ static inline int vprintk(const char *s,
 static inline int printk(const char *s, ...)
 	__attribute__ ((format (printf, 1, 2)));
 static inline int printk(const char *s, ...) { return 0; }
+static inline int ve_printk(int d, const char *s, ...)
+	__attribute__ ((format (printf, 1, 2)));
+static inline int printk(int d, const char *s, ...) { return 0; }
+#define prepare_printk()	do { } while (0)
 #endif
 
+#define VE0_LOG		1
+#define VE_LOG		2
+#define VE_LOG_BOTH	(VE0_LOG | VE_LOG)
+
 unsigned long int_sqrt(unsigned long);
 
 static inline int __attribute_pure__ long_log2(unsigned long x)
@@ -159,9 +170,14 @@ static inline unsigned long __attribute_
 extern int printk_ratelimit(void);
 extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
 
+extern int console_silence_loglevel;
+
 static inline void console_silent(void)
 {
-	console_loglevel = 0;
+	if (console_loglevel > console_silence_loglevel) {
+		printk(KERN_EMERG "console shuts up ...\n");
+		console_loglevel = 0;
+	}
 }
 
 static inline void console_verbose(void)
@@ -171,10 +187,13 @@ static inline void console_verbose(void)
 }
 
 extern void bust_spinlocks(int yes);
+extern void wake_up_klogd(void);
 extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
 extern __deprecated_for_modules int panic_timeout;
 extern int panic_on_oops;
+extern int decode_call_traces;
 extern int tainted;
+extern int kernel_text_csum_broken;
 extern const char *print_tainted(void);
 extern void add_taint(unsigned);
 
diff -uprN linux-2.6.16/include/linux/kmem_cache.h linux-2.6.16.ovz/include/linux/kmem_cache.h
--- linux-2.6.16/include/linux/kmem_cache.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/kmem_cache.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,199 @@
+#ifndef __KMEM_CACHE_H__
+#define __KMEM_CACHE_H__
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <asm/atomic.h>
+
+/*
+ * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+ *		  SLAB_RED_ZONE & SLAB_POISON.
+ *		  0 for faster, smaller code (especially in the critical paths).
+ *
+ * STATS	- 1 to collect stats for /proc/slabinfo.
+ *		  0 for faster, smaller code (especially in the critical paths).
+ *
+ * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
+ */
+
+#ifdef CONFIG_DEBUG_SLAB
+#define	SLAB_DEBUG		1
+#define	SLAB_STATS		1
+#define	SLAB_FORCED_DEBUG	1
+#else
+#define	SLAB_DEBUG		0
+#define	SLAB_STATS		0
+#define	SLAB_FORCED_DEBUG	0
+#endif
+
+/*
+ * struct array_cache
+ *
+ * Purpose:
+ * - LIFO ordering, to hand out cache-warm objects from _alloc
+ * - reduce the number of linked list operations
+ * - reduce spinlock operations
+ *
+ * The limit is stored in the per-cpu structure to reduce the data cache
+ * footprint.
+ *
+ */
+struct array_cache {
+	unsigned int avail;
+	unsigned int limit;
+	unsigned int batchcount;
+	unsigned int touched;
+	spinlock_t lock;
+	void *entry[0];		/*
+				 * Must have this definition in here for the proper
+				 * alignment of array_cache. Also simplifies accessing
+				 * the entries.
+				 * [0] is for gcc 2.95. It should really be [].
+				 */
+};
+
+/* bootstrap: The caches do not work without cpuarrays anymore,
+ * but the cpuarrays are allocated from the generic caches...
+ */
+#define BOOT_CPUCACHE_ENTRIES	1
+struct arraycache_init {
+	struct array_cache cache;
+	void *entries[BOOT_CPUCACHE_ENTRIES];
+};
+
+/*
+ * The slab lists for all objects.
+ */
+struct kmem_list3 {
+	struct list_head slabs_partial;	/* partial list first, better asm code */
+	struct list_head slabs_full;
+	struct list_head slabs_free;
+	unsigned long free_objects;
+	unsigned long next_reap;
+	int free_touched;
+	unsigned int free_limit;
+	unsigned int colour_next;	/* Per-node cache coloring */
+	spinlock_t list_lock;
+	struct array_cache *shared;	/* shared per node */
+	struct array_cache **alien;	/* on other nodes */
+};
+
+/*
+ * struct kmem_cache
+ *
+ * manages a cache.
+ */
+
+struct kmem_cache {
+/* 1) per-cpu data, touched during every alloc/free */
+	struct array_cache *array[NR_CPUS];
+	unsigned int batchcount;
+	unsigned int limit;
+	unsigned int shared;
+	unsigned int buffer_size;
+/* 2) touched by every alloc & free from the backend */
+	struct kmem_list3 *nodelists[MAX_NUMNODES];
+	unsigned int flags;	/* constant flags */
+	unsigned int num;	/* # of objs per slab */
+	spinlock_t spinlock;
+
+/* 3) cache_grow/shrink */
+	/* order of pgs per slab (2^n) */
+	unsigned int gfporder;
+
+	/* force GFP flags, e.g. GFP_DMA */
+	gfp_t gfpflags;
+
+	size_t colour;		/* cache colouring range */
+	unsigned int colour_off;	/* colour offset */
+	struct kmem_cache *slabp_cache;
+	unsigned int slab_size;
+	unsigned int dflags;	/* dynamic flags */
+
+	/* constructor func */
+	void (*ctor) (void *, struct kmem_cache *, unsigned long);
+
+	/* de-constructor func */
+	void (*dtor) (void *, struct kmem_cache *, unsigned long);
+
+/* 4) cache creation/removal */
+	const char *name;
+	struct list_head next;
+
+/* 5) statistics */
+#if SLAB_STATS
+	unsigned long num_active;
+	unsigned long num_allocations;
+	unsigned long high_mark;
+	unsigned long grown;
+	unsigned long reaped;
+	unsigned long errors;
+	unsigned long max_freeable;
+	unsigned long node_allocs;
+	unsigned long node_frees;
+	atomic_t allochit;
+	atomic_t allocmiss;
+	atomic_t freehit;
+	atomic_t freemiss;
+#endif
+#if SLAB_DEBUG
+	/*
+	 * If debugging is enabled, then the allocator can add additional
+	 * fields and/or padding to every object. buffer_size contains the total
+	 * object size including these internal fields, the following two
+	 * variables contain the offset to the user object and its size.
+	 */
+	int obj_offset;
+	int obj_size;
+#endif
+#ifdef CONFIG_USER_RESOURCE
+	unsigned int		objuse;
+#endif
+};
+
+#define CFLGS_OFF_SLAB		(0x80000000UL)
+#define CFLGS_ENVIDS		(0x04000000UL)
+#define	OFF_SLAB(x)		((x)->flags & CFLGS_OFF_SLAB)
+#define ENVIDS(x)		((x)->flags & CFLGS_ENVIDS)
+#define kmem_mark_nocharge(c)	do { (c)->flags |= SLAB_NO_CHARGE; } while (0)
+
+struct slab;
+/* Functions for storing/retrieving the cachep and or slab from the
+ * global 'mem_map'. These are used to find the slab an obj belongs to.
+ * With kfree(), these are used to find the cache which an obj belongs to.
+ */
+static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
+{
+	page->lru.next = (struct list_head *)cache;
+}
+
+static inline struct kmem_cache *page_get_cache(struct page *page)
+{
+	return (struct kmem_cache *)page->lru.next;
+}
+
+static inline void page_set_slab(struct page *page, struct slab *slab)
+{
+	page->lru.prev = (struct list_head *)slab;
+}
+
+static inline struct slab *page_get_slab(struct page *page)
+{
+	return (struct slab *)page->lru.prev;
+}
+
+static inline struct kmem_cache *virt_to_cache(const void *obj)
+{
+	struct page *page = virt_to_page(obj);
+	return page_get_cache(page);
+}
+
+static inline struct slab *virt_to_slab(const void *obj)
+{
+	struct page *page = virt_to_page(obj);
+	return page_get_slab(page);
+}
+
+#endif
diff -uprN linux-2.6.16/include/linux/kmem_slab.h linux-2.6.16.ovz/include/linux/kmem_slab.h
--- linux-2.6.16/include/linux/kmem_slab.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/kmem_slab.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,71 @@
+#ifndef __KMEM_SLAB_H__
+#define __KMEM_SLAB_H__
+
+/*
+ * kmem_bufctl_t:
+ *
+ * Bufctl's are used for linking objs within a slab
+ * linked offsets.
+ *
+ * This implementation relies on "struct page" for locating the cache &
+ * slab an object belongs to.
+ * This allows the bufctl structure to be small (one int), but limits
+ * the number of objects a slab (not a cache) can contain when off-slab
+ * bufctls are used. The limit is the size of the largest general cache
+ * that does not use off-slab slabs.
+ * For 32bit archs with 4 kB pages, is this 56.
+ * This is not serious, as it is only for large objects, when it is unwise
+ * to have too many per slab.
+ * Note: This limit can be raised by introducing a general cache whose size
+ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
+ */
+
+typedef unsigned int kmem_bufctl_t;
+#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
+#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
+#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
+
+/*
+ * struct slab
+ *
+ * Manages the objs in a slab. Placed either at the beginning of mem allocated
+ * for a slab, or allocated from an general cache.
+ * Slabs are chained into three list: fully used, partial, fully free slabs.
+ */
+struct slab {
+	struct list_head list;
+	unsigned long colouroff;
+	void *s_mem;		/* including colour offset */
+	unsigned int inuse;	/* num of objs active in slab */
+	kmem_bufctl_t free;
+	unsigned short nodeid;
+};
+
+/*
+ * struct slab_rcu
+ *
+ * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
+ * arrange for kmem_freepages to be called via RCU.  This is useful if
+ * we need to approach a kernel structure obliquely, from its address
+ * obtained without the usual locking.  We can lock the structure to
+ * stabilize it and check it's still at the given address, only if we
+ * can be sure that the memory has not been meanwhile reused for some
+ * other kind of object (which our subsystem's lock might corrupt).
+ *
+ * rcu_read_lock before reading the address, then rcu_read_unlock after
+ * taking the spinlock within the structure expected at that address.
+ *
+ * We assume struct slab_rcu can overlay struct slab when destroying.
+ */
+struct slab_rcu {
+	struct rcu_head head;
+	struct kmem_cache *cachep;
+	void *addr;
+};
+
+static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+{
+	return (kmem_bufctl_t *) (slabp + 1);
+}
+
+#endif
diff -uprN linux-2.6.16/include/linux/list.h linux-2.6.16.ovz/include/linux/list.h
--- linux-2.6.16/include/linux/list.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/list.h	2006-07-05 08:34:56.000000000 -0400
@@ -325,6 +325,9 @@ static inline void list_splice_init(stru
 #define list_entry(ptr, type, member) \
 	container_of(ptr, type, member)
 
+#define list_first_entry(ptr, type, member) \
+	container_of((ptr)->next, type, member)
+
 /**
  * list_for_each	-	iterate over a list
  * @pos:	the &struct list_head to use as a loop counter.
@@ -411,6 +414,20 @@ static inline void list_splice_init(stru
 	     pos = list_entry(pos->member.next, typeof(*pos), member))
 
 /**
+ * list_for_each_entry_continue_reverse - iterate backwards over list of given
+ *			type continuing after existing point
+ * @pos:	the type * to use as a loop counter.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_continue_reverse(pos, head, member) 	\
+	for (pos = list_entry(pos->member.prev, typeof(*pos), member),	\
+			prefetch(pos->member.prev);			\
+		&pos->member != (head);					\
+	pos = list_entry(pos->member.prev, typeof(*pos), member),	\
+			prefetch(pos->member.prev))
+
+/**
  * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
  * @pos:	the type * to use as a loop counter.
  * @n:		another type * to use as temporary storage
diff -uprN linux-2.6.16/include/linux/major.h linux-2.6.16.ovz/include/linux/major.h
--- linux-2.6.16/include/linux/major.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/major.h	2006-07-05 08:34:56.000000000 -0400
@@ -165,4 +165,7 @@
 
 #define VIOTAPE_MAJOR		230
 
+#define UNNAMED_EXTRA_MAJOR		130
+#define UNNAMED_EXTRA_MAJOR_COUNT	120
+
 #endif
diff -uprN linux-2.6.16/include/linux/mm.h linux-2.6.16.ovz/include/linux/mm.h
--- linux-2.6.16/include/linux/mm.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/mm.h	2006-07-05 08:34:56.000000000 -0400
@@ -41,6 +41,27 @@ extern int sysctl_legacy_va_layout;
 
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 
+#include <linux/mm_counter.h>
+
+#ifdef CONFIG_USER_RESOURCE
+#define set_vma_rss(vma, v)	set_mm_counter(vma, vm_rss, v)
+#define get_vma_rss(vma)	get_mm_counter(vma, vm_rss)
+#define inc_vma_rss(vma)	inc_mm_counter(vma, vm_rss)
+#define dec_vma_rss(vma)	dec_mm_counter(vma, vm_rss)
+#define add_vma_rss(vma, v)	add_mm_counter(vma, vm_rss, v)
+#define sub_vma_rss(vma, v)	do {					\
+		if (unlikely(dec_mm_counter_chk(vma, vm_rss, v)))	\
+			warn_bad_rss(vma, v);				\
+	} while (0)
+#else
+#define set_vma_rss(vma, v)	do { } while (0)
+#define get_vma_rss(vma)	(0)
+#define inc_vma_rss(vma)	do { } while (0)
+#define dec_vma_rss(vma)	do { } while (0)
+#define add_vma_rss(vma, v)	do { } while (0)
+#define sub_vma_rss(vma, v)	do { } while (0)
+#endif
+
 /*
  * Linux kernel virtual memory manager primitives.
  * The idea being to have a "virtual" mm in the same way
@@ -111,6 +132,9 @@ struct vm_area_struct {
 #ifdef CONFIG_NUMA
 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
 #endif
+#ifdef CONFIG_USER_RESOURCE
+	mm_counter_t _vm_rss;
+#endif
 };
 
 /*
@@ -229,10 +253,9 @@ struct page {
 		unsigned long private;		/* Mapping-private opaque data:
 					 	 * usually used for buffer_heads
 						 * if PagePrivate set; used for
-						 * swp_entry_t if PageSwapCache.
-						 * When page is free, this
+						 * swp_entry_t if PageSwapCache;
 						 * indicates order in the buddy
-						 * system.
+						 * system if PG_buddy is set.
 						 */
 		struct address_space *mapping;	/* If low bit clear, points to
 						 * inode address_space, or NULL.
@@ -264,6 +287,12 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
+#ifdef CONFIG_USER_RESOURCE
+	union {
+		struct user_beancounter *page_ub;
+		struct page_beancounter *page_pb;
+	} bc;
+#endif
 };
 
 #define page_private(page)		((page)->private)
@@ -636,16 +665,9 @@ struct page *shmem_nopage(struct vm_area
 int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
 struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 					unsigned long addr);
-int shmem_lock(struct file *file, int lock, struct user_struct *user);
 #else
 #define shmem_nopage filemap_nopage
 
-static inline int shmem_lock(struct file *file, int lock,
-			     struct user_struct *user)
-{
-	return 0;
-}
-
 static inline int shmem_set_policy(struct vm_area_struct *vma,
 				   struct mempolicy *new)
 {
@@ -706,7 +728,9 @@ void free_pgd_range(struct mmu_gather **
 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
-			struct vm_area_struct *vma);
+		struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+int __copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *vma,
+		      unsigned long addr, size_t size);
 int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
 			unsigned long size, pgprot_t prot);
 void unmap_mapping_range(struct address_space *mapping,
diff -uprN linux-2.6.16/include/linux/mm_counter.h linux-2.6.16.ovz/include/linux/mm_counter.h
--- linux-2.6.16/include/linux/mm_counter.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/mm_counter.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,32 @@
+#ifndef __MM_COUNTER_H_
+#define __MM_COUNTER_H_
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * The mm counters are not protected by its page_table_lock,
+ * so must be incremented atomically.
+ */
+#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
+#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
+#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
+#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
+#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
+#define dec_mm_counter_chk(mm, member, value)	\
+	atomic_long_add_negative(-(value), &(mm)->_##member)
+typedef atomic_long_t mm_counter_t;
+
+#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+/*
+ * The mm counters are protected by its page_table_lock,
+ * so can be incremented directly.
+ */
+#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
+#define get_mm_counter(mm, member) ((mm)->_##member)
+#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
+#define inc_mm_counter(mm, member) (mm)->_##member++
+#define dec_mm_counter(mm, member) (mm)->_##member--
+#define dec_mm_counter_chk(mm, member, value)	\
+	(((mm)->_##member -= (value)) < 0)
+typedef unsigned long mm_counter_t;
+
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#endif
diff -uprN linux-2.6.16/include/linux/mount.h linux-2.6.16.ovz/include/linux/mount.h
--- linux-2.6.16/include/linux/mount.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/mount.h	2006-07-05 08:34:56.000000000 -0400
@@ -47,6 +47,7 @@ struct vfsmount {
 	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
 	struct namespace *mnt_namespace; /* containing namespace */
 	int mnt_pinned;
+	unsigned owner;
 };
 
 static inline struct vfsmount *mntget(struct vfsmount *mnt)
diff -uprN linux-2.6.16/include/linux/msg.h linux-2.6.16.ovz/include/linux/msg.h
--- linux-2.6.16/include/linux/msg.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/msg.h	2006-07-05 08:34:56.000000000 -0400
@@ -92,6 +92,8 @@ struct msg_queue {
 	struct list_head q_senders;
 };
 
+int sysvipc_walk_msg(int (*func)(int, struct msg_queue*, void *), void *arg);
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_MSG_H */
diff -uprN linux-2.6.16/include/linux/namei.h linux-2.6.16.ovz/include/linux/namei.h
--- linux-2.6.16/include/linux/namei.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/namei.h	2006-07-05 08:34:56.000000000 -0400
@@ -48,12 +48,15 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
 #define LOOKUP_PARENT		16
 #define LOOKUP_NOALT		32
 #define LOOKUP_REVAL		64
+#define LOOKUP_STRICT		128	/* no symlinks or other filesystems */
+
 /*
  * Intent data
  */
 #define LOOKUP_OPEN		(0x0100)
 #define LOOKUP_CREATE		(0x0200)
 #define LOOKUP_ACCESS		(0x0400)
+#define LOOKUP_NOAREACHECK	(0x0800)	/* no area check on lookup */
 
 extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
 extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
diff -uprN linux-2.6.16/include/linux/namespace.h linux-2.6.16.ovz/include/linux/namespace.h
--- linux-2.6.16/include/linux/namespace.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/namespace.h	2006-07-05 08:34:56.000000000 -0400
@@ -13,6 +13,8 @@ struct namespace {
 	int event;
 };
 
+extern struct rw_semaphore namespace_sem;
+
 extern int copy_namespace(int, struct task_struct *);
 extern void __put_namespace(struct namespace *namespace);
 extern struct namespace *dup_namespace(struct task_struct *, struct fs_struct *);
diff -uprN linux-2.6.16/include/linux/netdevice.h linux-2.6.16.ovz/include/linux/netdevice.h
--- linux-2.6.16/include/linux/netdevice.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netdevice.h	2006-07-05 08:34:56.000000000 -0400
@@ -37,6 +37,7 @@
 #include <linux/config.h>
 #include <linux/device.h>
 #include <linux/percpu.h>
+#include <linux/ctype.h>
 
 struct divert_blk;
 struct vlan_group;
@@ -233,6 +234,11 @@ enum netdev_state_t
 	__LINK_STATE_LINKWATCH_PENDING
 };
 
+struct netdev_bc {
+	struct user_beancounter *exec_ub, *owner_ub;
+};
+
+#define netdev_bc(dev)		(&(dev)->dev_bc)
 
 /*
  * This structure holds at boot time configured netdevice settings. They
@@ -309,6 +315,8 @@ struct net_device
 #define NETIF_F_TSO		2048	/* Can offload TCP/IP segmentation */
 #define NETIF_F_LLTX		4096	/* LockLess TX */
 #define NETIF_F_UFO             8192    /* Can offload UDP Large Send*/
+#define NETIF_F_VIRTUAL		0x40000000 /* can be registered in ve */
+#define NETIF_F_VENET		0x80000000 /* Device is VENET device */
 
 	struct net_device	*next_sched;
 
@@ -431,6 +439,7 @@ struct net_device
 	enum { NETREG_UNINITIALIZED=0,
 	       NETREG_REGISTERING,	/* called register_netdevice */
 	       NETREG_REGISTERED,	/* completed register todo */
+	       NETREG_REGISTER_ERR,	/* register todo failed */
 	       NETREG_UNREGISTERING,	/* called unregister_netdevice */
 	       NETREG_UNREGISTERED,	/* completed unregister todo */
 	       NETREG_RELEASED,		/* called free_netdev */
@@ -500,8 +509,18 @@ struct net_device
 	struct divert_blk	*divert;
 #endif /* CONFIG_NET_DIVERT */
 
+	unsigned                orig_mtu;   /* MTU value before move to VE */
+	struct ve_struct	*owner_env; /* Owner VE of the interface */
+	struct netdev_bc	dev_bc;
+
 	/* class/net/name entry */
 	struct class_device	class_dev;
+
+#ifdef CONFIG_VE
+	/* List entry in global devices list to keep track of their names
+	 * assignment */
+	struct list_head	dev_global_list_entry;
+#endif
 };
 
 #define	NETDEV_ALIGN		32
@@ -535,9 +554,23 @@ struct packet_type {
 #include <linux/notifier.h>
 
 extern struct net_device		loopback_dev;		/* The loopback */
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define loopback_dev	(*get_exec_env()->_loopback_dev)
+#define ve0_loopback	(*get_ve0()->_loopback_dev)
+#define dev_base	(get_exec_env()->_net_dev_base)
+#define visible_dev_head(x)	(&(x)->_net_dev_head)
+#define visible_dev_index_head(x) (&(x)->_net_dev_index_head)
+#else
 extern struct net_device		*dev_base;		/* All devices */
+#define ve0_loopback	loopback_dev
+#define visible_dev_head(x)	NULL
+#define visible_dev_index_head(x) NULL
+#endif
 extern rwlock_t				dev_base_lock;		/* Device list lock */
 
+struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env);
+struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env);
+
 extern int 			netdev_boot_setup_check(struct net_device *dev);
 extern unsigned long		netdev_boot_base(const char *prefix, int unit);
 extern struct net_device    *dev_getbyhwaddr(unsigned short type, char *hwaddr);
@@ -554,6 +587,7 @@ extern int		dev_alloc_name(struct net_de
 extern int		dev_open(struct net_device *dev);
 extern int		dev_close(struct net_device *dev);
 extern int		dev_queue_xmit(struct sk_buff *skb);
+extern int		dev_set_mtu(struct net_device *dev, int new_mtu);
 extern int		register_netdevice(struct net_device *dev);
 extern int		unregister_netdevice(struct net_device *dev);
 extern void		free_netdev(struct net_device *dev);
@@ -951,6 +985,18 @@ extern void dev_seq_stop(struct seq_file
 
 extern void linkwatch_run_queue(void);
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+static inline int ve_is_dev_movable(struct net_device *dev)
+{
+	return !(dev->features & NETIF_F_VIRTUAL);
+}
+#else
+static inline int ve_is_dev_movable(struct net_device *dev)
+{
+	return 0;
+}
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif	/* _LINUX_DEV_H */
diff -uprN linux-2.6.16/include/linux/netfilter/nf_conntrack_ftp.h linux-2.6.16.ovz/include/linux/netfilter/nf_conntrack_ftp.h
--- linux-2.6.16/include/linux/netfilter/nf_conntrack_ftp.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter/nf_conntrack_ftp.h	2006-07-05 08:34:56.000000000 -0400
@@ -32,13 +32,22 @@ struct ip_conntrack_expect;
 
 /* For NAT to hook in when we find a packet which describes what other
  * connection we should expect. */
-extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
+typedef unsigned int (*ip_nat_helper_ftp_hook)(struct sk_buff **pskb,
 				       enum ip_conntrack_info ctinfo,
 				       enum ip_ct_ftp_type type,
 				       unsigned int matchoff,
 				       unsigned int matchlen,
 				       struct ip_conntrack_expect *exp,
 				       u32 *seq);
+extern ip_nat_helper_ftp_hook ip_nat_ftp_hook;
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_nat_ftp_hook \
+	((ip_nat_helper_ftp_hook) \
+		(get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook))
+#else
+#define ve_ip_nat_ftp_hook	ip_nat_ftp_hook
+#endif
 #endif /* __KERNEL__ */
 
 #endif /* _NF_CONNTRACK_FTP_H */
diff -uprN linux-2.6.16/include/linux/netfilter/x_tables.h linux-2.6.16.ovz/include/linux/netfilter/x_tables.h
--- linux-2.6.16/include/linux/netfilter/x_tables.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter/x_tables.h	2006-07-05 08:34:56.000000000 -0400
@@ -80,12 +80,19 @@ struct xt_counters_info
 
 #ifdef __KERNEL__
 
+#include <linux/config.h>
 #include <linux/netdevice.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
 #include <linux/netfilter_ipv4/listhelp.h>
 
+#ifdef CONFIG_COMPAT
+#define COMPAT_TO_USER		1
+#define COMPAT_FROM_USER	-1
+#define COMPAT_CALC_SIZE	0
+#endif
+
 struct xt_match
 {
 	struct list_head list;
@@ -118,6 +125,10 @@ struct xt_match
 	/* Called when entry of this type deleted. */
 	void (*destroy)(void *matchinfo, unsigned int matchinfosize);
 
+#ifdef CONFIG_COMPAT
+	/* Called when userspace align differs from kernel space one */
+	int (*compat)(void *match, void **dstptr, int *size, int convert);
+#endif
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 };
@@ -154,6 +165,10 @@ struct xt_target
 	/* Called when entry of this type deleted. */
 	void (*destroy)(void *targinfo, unsigned int targinfosize);
 
+#ifdef CONFIG_COMPAT
+	/* Called when userspace align differs from kernel space one */
+	int (*compat)(void *target, void **dstptr, int *size, int convert);
+#endif
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 };
@@ -211,6 +226,10 @@ extern int xt_register_table(struct xt_t
 			     struct xt_table_info *bootstrap,
 			     struct xt_table_info *newinfo);
 extern void *xt_unregister_table(struct xt_table *table);
+extern struct xt_table *virt_xt_register_table(struct xt_table *table,
+			     struct xt_table_info *bootstrap,
+			     struct xt_table_info *newinfo);
+extern void *virt_xt_unregister_table(struct xt_table *table);
 
 extern struct xt_table_info *xt_replace_table(struct xt_table *table,
 					      unsigned int num_counters,
@@ -233,6 +252,34 @@ extern void xt_proto_fini(int af);
 extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
 extern void xt_free_table_info(struct xt_table_info *info);
 
+#ifdef CONFIG_COMPAT
+#include <net/compat.h>
+
+/* FIXME: this works only on 32 bit tasks
+ * need to change whole approach in order to calculate align as function of
+ * current task alignment */
+
+struct compat_xt_counters
+{
+	u_int32_t cnt[4];
+};
+
+struct compat_xt_counters_info
+{
+	char name[XT_TABLE_MAXNAMELEN];
+	compat_uint_t num_counters;
+	struct compat_xt_counters counters[0];
+};
+
+#define COMPAT_XT_ALIGN(s) (((s) + (__alignof__(struct compat_xt_counters)-1)) \
+		& ~(__alignof__(struct compat_xt_counters)-1))
+
+extern int ipt_match_align_compat(void *match, void **dstptr,
+		int *size, int off, int convert);
+extern int ipt_target_align_compat(void *target, void **dstptr,
+		int *size, int off, int convert);
+
+#endif /* CONFIG_COMPAT */
 #endif /* __KERNEL__ */
 
 #endif /* _X_TABLES_H */
diff -uprN linux-2.6.16/include/linux/netfilter/xt_conntrack.h linux-2.6.16.ovz/include/linux/netfilter/xt_conntrack.h
--- linux-2.6.16/include/linux/netfilter/xt_conntrack.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter/xt_conntrack.h	2006-07-05 08:34:56.000000000 -0400
@@ -5,6 +5,7 @@
 #ifndef _XT_CONNTRACK_H
 #define _XT_CONNTRACK_H
 
+#include <linux/config.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <linux/in.h>
 
@@ -60,4 +61,21 @@ struct xt_conntrack_info
 	/* Inverse flags */
 	u_int8_t invflags;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_conntrack_info
+{
+	compat_uint_t statemask, statusmask;
+
+	struct ip_conntrack_tuple tuple[IP_CT_DIR_MAX];
+	struct in_addr sipmsk[IP_CT_DIR_MAX], dipmsk[IP_CT_DIR_MAX];
+
+	compat_ulong_t expires_min, expires_max;
+
+	/* Flags word */
+	u_int8_t flags;
+	/* Inverse flags */
+	u_int8_t invflags;
+};
+#endif
 #endif /*_XT_CONNTRACK_H*/
diff -uprN linux-2.6.16/include/linux/netfilter/xt_helper.h linux-2.6.16.ovz/include/linux/netfilter/xt_helper.h
--- linux-2.6.16/include/linux/netfilter/xt_helper.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter/xt_helper.h	2006-07-05 08:34:56.000000000 -0400
@@ -1,8 +1,17 @@
 #ifndef _XT_HELPER_H
 #define _XT_HELPER_H
 
+#include <linux/config.h>
+
 struct xt_helper_info {
 	int invert;
 	char name[30];
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_helper_info {
+	compat_int_t invert;
+	char name[30];
+};
+#endif
 #endif /* _XT_HELPER_H */
diff -uprN linux-2.6.16/include/linux/netfilter/xt_limit.h linux-2.6.16.ovz/include/linux/netfilter/xt_limit.h
--- linux-2.6.16/include/linux/netfilter/xt_limit.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter/xt_limit.h	2006-07-05 08:34:56.000000000 -0400
@@ -1,6 +1,8 @@
 #ifndef _XT_RATE_H
 #define _XT_RATE_H
 
+#include <linux/config.h>
+
 /* timings are in milliseconds. */
 #define XT_LIMIT_SCALE 10000
 
@@ -18,4 +20,19 @@ struct xt_rateinfo {
 	/* Ugly, ugly fucker. */
 	struct xt_rateinfo *master;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_rateinfo {
+	u_int32_t avg;    /* Average secs between packets * scale */
+	u_int32_t burst;  /* Period multiplier for upper limit. */
+
+	/* Used internally by the kernel */
+	compat_ulong_t prev;
+	u_int32_t credit;
+	u_int32_t credit_cap, cost;
+
+	/* Ugly, ugly fucker. */
+	compat_uptr_t master;
+};
+#endif
 #endif /*_XT_RATE_H*/
diff -uprN linux-2.6.16/include/linux/netfilter/xt_state.h linux-2.6.16.ovz/include/linux/netfilter/xt_state.h
--- linux-2.6.16/include/linux/netfilter/xt_state.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter/xt_state.h	2006-07-05 08:34:56.000000000 -0400
@@ -1,6 +1,8 @@
 #ifndef _XT_STATE_H
 #define _XT_STATE_H
 
+#include <linux/config.h>
+
 #define XT_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
 #define XT_STATE_INVALID (1 << 0)
 
@@ -10,4 +12,11 @@ struct xt_state_info
 {
 	unsigned int statemask;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_state_info
+{
+	compat_uint_t statemask;
+};
+#endif
 #endif /*_XT_STATE_H*/
diff -uprN linux-2.6.16/include/linux/netfilter.h linux-2.6.16.ovz/include/linux/netfilter.h
--- linux-2.6.16/include/linux/netfilter.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter.h	2006-07-05 08:34:56.000000000 -0400
@@ -107,12 +107,21 @@ struct nf_info
 int nf_register_hook(struct nf_hook_ops *reg);
 void nf_unregister_hook(struct nf_hook_ops *reg);
 
+int virt_nf_register_hook(struct nf_hook_ops *reg);
+int virt_nf_unregister_hook(struct nf_hook_ops *reg);
+
 /* Functions to register get/setsockopt ranges (non-inclusive).  You
    need to check permissions yourself! */
 int nf_register_sockopt(struct nf_sockopt_ops *reg);
 void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
 
+#ifdef CONFIG_VE_IPTABLES
+#define ve_nf_hooks \
+       ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
+#else
 extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+#define ve_nf_hooks nf_hooks
+#endif
 
 /* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
  * disappear once iptables is replaced with pkttables.  Please DO NOT use them
@@ -190,7 +199,7 @@ static inline int nf_hook_thresh(int pf,
 	if (!cond)
 		return 1;
 #ifndef CONFIG_NETFILTER_DEBUG
-	if (list_empty(&nf_hooks[pf][hook]))
+	if (list_empty(&ve_nf_hooks[pf][hook]))
 		return 1;
 #endif
 	return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
diff -uprN linux-2.6.16/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_conntrack.h
--- linux-2.6.16/include/linux/netfilter_ipv4/ip_conntrack.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_conntrack.h	2006-07-05 08:34:56.000000000 -0400
@@ -71,6 +71,11 @@ do {									\
 
 struct ip_conntrack_helper;
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/ve.h>
+#include <linux/ve_owner.h>
+#endif
+
 struct ip_conntrack
 {
 	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
@@ -122,8 +127,15 @@ struct ip_conntrack
 	/* Traversed often, so hopefully in different cacheline to top */
 	/* These are my tuples; original and reply */
 	struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
+#ifdef CONFIG_VE_IPTABLES
+        struct ve_struct *ct_owner_env;
+#endif
 };
 
+#ifdef CONFIG_VE_IPTABLES
+DCL_VE_OWNER_PROTO(CT, struct ip_conntrack, ct_owner_env)
+#endif
+
 struct ip_conntrack_expect
 {
 	/* Internal linked list (global expectation list) */
@@ -232,7 +244,15 @@ extern void ip_conntrack_tcp_update(stru
 				    enum ip_conntrack_dir dir);
 
 /* Call me when a conntrack is destroyed. */
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_conntrack_destroyed	\
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_destroyed)
+#else
 extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
+#define ve_ip_conntrack_destroyed	ip_conntrack_destroyed
+#endif
+
 
 /* Fake conntrack entry for untracked connections */
 extern struct ip_conntrack ip_conntrack_untracked;
@@ -261,7 +281,7 @@ extern void ip_conntrack_proto_put(struc
 extern void ip_ct_remove_expectations(struct ip_conntrack *ct);
 
 extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *,
-					       struct ip_conntrack_tuple *);
+		struct ip_conntrack_tuple *, struct user_beancounter *);
 
 extern void ip_conntrack_free(struct ip_conntrack *ct);
 
@@ -270,6 +290,8 @@ extern void ip_conntrack_hash_insert(str
 extern struct ip_conntrack_expect *
 __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
 
+extern void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp);
+
 extern struct ip_conntrack_expect *
 ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
 
@@ -291,6 +313,7 @@ static inline int is_dying(struct ip_con
 }
 
 extern unsigned int ip_conntrack_htable_size;
+extern int ip_conntrack_disable_ve0;
  
 #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
 
@@ -341,6 +364,9 @@ ip_conntrack_event_cache(enum ip_conntra
 	struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
 	struct ip_conntrack_ecache *ecache;
 	
+	if (!ve_is_super(get_exec_env()))
+		return;
+
 	local_bh_disable();
 	ecache = &__get_cpu_var(ip_conntrack_ecache);
 	if (ct != ecache->ct)
@@ -352,7 +378,7 @@ ip_conntrack_event_cache(enum ip_conntra
 static inline void ip_conntrack_event(enum ip_conntrack_events event,
 				      struct ip_conntrack *ct)
 {
-	if (is_confirmed(ct) && !is_dying(ct))
+	if (is_confirmed(ct) && !is_dying(ct) && ve_is_super(get_exec_env()))
 		notifier_call_chain(&ip_conntrack_chain, event, ct);
 }
 
@@ -360,7 +386,8 @@ static inline void 
 ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
 			  struct ip_conntrack_expect *exp)
 {
-	notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
+	if (ve_is_super(get_exec_env()))
+		notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
 }
 #else /* CONFIG_IP_NF_CONNTRACK_EVENTS */
 static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, 
diff -uprN linux-2.6.16/include/linux/netfilter_ipv4/ip_conntrack_core.h linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_conntrack_core.h
--- linux-2.6.16/include/linux/netfilter_ipv4/ip_conntrack_core.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_conntrack_core.h	2006-07-05 08:34:56.000000000 -0400
@@ -3,7 +3,6 @@
 #include <linux/netfilter.h>
 
 #define MAX_IP_CT_PROTO 256
-extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 
 /* This header is used to share core functionality between the
    standalone connection tracking module, and the compatibility layer's use
@@ -54,8 +53,26 @@ static inline int ip_conntrack_confirm(s
 
 extern void ip_ct_unlink_expect(struct ip_conntrack_expect *exp);
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_ct_protos \
+	(get_exec_env()->_ip_conntrack->_ip_ct_protos)
+#define ve_ip_conntrack_hash	\
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_hash)
+#define ve_ip_conntrack_expect_list \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_expect_list)
+#define ve_ip_conntrack_vmalloc \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_vmalloc)
+#else
+extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 extern struct list_head *ip_conntrack_hash;
 extern struct list_head ip_conntrack_expect_list;
+#define ve_ip_ct_protos			ip_ct_protos
+#define ve_ip_conntrack_hash		ip_conntrack_hash
+#define ve_ip_conntrack_expect_list	ip_conntrack_expect_list
+#define ve_ip_conntrack_vmalloc		ip_conntrack_vmalloc
+#endif /* CONFIG_VE_IPTABLES */
+
 extern rwlock_t ip_conntrack_lock;
 #endif /* _IP_CONNTRACK_CORE_H */
 
diff -uprN linux-2.6.16/include/linux/netfilter_ipv4/ip_conntrack_helper.h linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_conntrack_helper.h
--- linux-2.6.16/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2006-07-05 08:34:56.000000000 -0400
@@ -31,6 +31,9 @@ struct ip_conntrack_helper
 extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
 extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
 
+extern int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *);
+extern void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
+
 /* Allocate space for an expectation: this is mandatory before calling 
    ip_conntrack_expect_related.  You will have to call put afterwards. */
 extern struct ip_conntrack_expect *
@@ -41,4 +44,5 @@ extern void ip_conntrack_expect_put(stru
 extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp);
 extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
 
+extern struct list_head helpers;
 #endif /*_IP_CONNTRACK_HELPER_H*/
diff -uprN linux-2.6.16/include/linux/netfilter_ipv4/ip_conntrack_irc.h linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_conntrack_irc.h
--- linux-2.6.16/include/linux/netfilter_ipv4/ip_conntrack_irc.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_conntrack_irc.h	2006-07-05 08:34:56.000000000 -0400
@@ -14,16 +14,26 @@
 #ifndef _IP_CONNTRACK_IRC_H
 #define _IP_CONNTRACK_IRC_H
 
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+
 /* This structure exists only once per master */
 struct ip_ct_irc_master {
 };
 
 #ifdef __KERNEL__
-extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
-				       enum ip_conntrack_info ctinfo,
-				       unsigned int matchoff,
-				       unsigned int matchlen,
-				       struct ip_conntrack_expect *exp);
+typedef unsigned int (*ip_nat_helper_irc_hook)(struct sk_buff **,
+		enum ip_conntrack_info, unsigned int, unsigned int,
+		struct ip_conntrack_expect *);
+
+extern ip_nat_helper_irc_hook ip_nat_irc_hook;
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_nat_irc_hook \
+	((ip_nat_helper_irc_hook) \
+		(get_exec_env()->_ip_conntrack->_ip_nat_irc_hook))
+#else
+#define ve_ip_nat_irc_hook	ip_nat_irc_hook
+#endif
 
 #define IRC_PORT	6667
 
diff -uprN linux-2.6.16/include/linux/netfilter_ipv4/ip_conntrack_protocol.h linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
--- linux-2.6.16/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2006-07-05 08:34:56.000000000 -0400
@@ -67,6 +67,7 @@ struct ip_conntrack_protocol
 /* Protocol registration. */
 extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
 extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
+
 /* Existing built-in protocols */
 extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
 extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
@@ -74,6 +75,41 @@ extern struct ip_conntrack_protocol ip_c
 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
 extern int ip_conntrack_protocol_tcp_init(void);
 
+#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_SYSCTL)
+#include <linux/sched.h>
+#define ve_ip_ct_tcp_timeouts \
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeouts)
+#define ve_ip_ct_udp_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout)
+#define ve_ip_ct_udp_timeout_stream \
+	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout_stream)
+#define ve_ip_ct_icmp_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_icmp_timeout)
+#define ve_ip_ct_generic_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_generic_timeout)
+#define ve_ip_ct_log_invalid	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_log_invalid)
+#define ve_ip_ct_tcp_timeout_max_retrans \
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeout_max_retrans)
+#define ve_ip_ct_tcp_loose	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_loose)
+#define ve_ip_ct_tcp_be_liberal	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_be_liberal)
+#define ve_ip_ct_tcp_max_retrans	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_max_retrans)
+#else
+#define ve_ip_ct_tcp_timeouts		*tcp_timeouts
+#define ve_ip_ct_udp_timeout		ip_ct_udp_timeout
+#define ve_ip_ct_udp_timeout_stream	ip_ct_udp_timeout_stream
+#define ve_ip_ct_icmp_timeout		ip_ct_icmp_timeout
+#define ve_ip_ct_generic_timeout	ip_ct_generic_timeout
+#define ve_ip_ct_log_invalid		ip_ct_log_invalid
+#define ve_ip_ct_tcp_timeout_max_retrans ip_ct_tcp_timeout_max_retrans
+#define ve_ip_ct_tcp_loose		ip_ct_tcp_loose
+#define ve_ip_ct_tcp_be_liberal		ip_ct_tcp_be_liberal
+#define ve_ip_ct_tcp_max_retrans	ip_ct_tcp_max_retrans
+#endif
+
 /* Log invalid packets */
 extern unsigned int ip_ct_log_invalid;
 
@@ -85,10 +121,10 @@ extern int ip_ct_port_nfattr_to_tuple(st
 #ifdef CONFIG_SYSCTL
 #ifdef DEBUG_INVALID_PACKETS
 #define LOG_INVALID(proto) \
-	(ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
+	(ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW)
 #else
 #define LOG_INVALID(proto) \
-	((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
+	((ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW) \
 	 && net_ratelimit())
 #endif
 #else
diff -uprN linux-2.6.16/include/linux/netfilter_ipv4/ip_nat.h linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_nat.h
--- linux-2.6.16/include/linux/netfilter_ipv4/ip_nat.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_nat.h	2006-07-05 08:34:56.000000000 -0400
@@ -1,5 +1,6 @@
 #ifndef _IP_NAT_H
 #define _IP_NAT_H
+#include <linux/config.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
 
@@ -72,10 +73,29 @@ extern unsigned int ip_nat_setup_info(st
 extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
 			     const struct ip_conntrack *ignored_conntrack);
 
+extern void ip_nat_hash_conntrack(struct ip_conntrack *conntrack);
+
 /* Calculate relative checksum. */
 extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv,
 				    u_int32_t newval,
 				    u_int16_t oldcheck);
+
+#ifdef CONFIG_COMPAT
+#include <net/compat.h>
+
+struct compat_ip_nat_range
+{
+	compat_uint_t flags;
+	u_int32_t min_ip, max_ip;
+	union ip_conntrack_manip_proto min, max;
+};
+
+struct compat_ip_nat_multi_range
+{
+	compat_uint_t rangesize;
+	struct compat_ip_nat_range range[1];
+};
+#endif
 #else  /* !__KERNEL__: iptables wants this to compile. */
 #define ip_nat_multi_range ip_nat_multi_range_compat
 #endif /*__KERNEL__*/
diff -uprN linux-2.6.16/include/linux/netfilter_ipv4/ip_nat_rule.h linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_nat_rule.h
--- linux-2.6.16/include/linux/netfilter_ipv4/ip_nat_rule.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_nat_rule.h	2006-07-05 08:34:56.000000000 -0400
@@ -6,7 +6,7 @@
 
 #ifdef __KERNEL__
 
-extern int ip_nat_rule_init(void) __init;
+extern int ip_nat_rule_init(void);
 extern void ip_nat_rule_cleanup(void);
 extern int ip_nat_rule_find(struct sk_buff **pskb,
 			    unsigned int hooknum,
diff -uprN linux-2.6.16/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_tables.h
--- linux-2.6.16/include/linux/netfilter_ipv4/ip_tables.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter_ipv4/ip_tables.h	2006-07-05 08:34:56.000000000 -0400
@@ -16,6 +16,7 @@
 #define _IPTABLES_H
 
 #ifdef __KERNEL__
+#include <linux/config.h>
 #include <linux/if.h>
 #include <linux/types.h>
 #include <linux/in.h>
@@ -330,7 +331,7 @@ extern void ipt_init(void) __init;
 //#define ipt_register_table(tbl, repl) xt_register_table(AF_INET, tbl, repl)
 //#define ipt_unregister_table(tbl) xt_unregister_table(AF_INET, tbl)
 
-extern int ipt_register_table(struct ipt_table *table,
+extern struct ipt_table *ipt_register_table(struct ipt_table *table,
 			      const struct ipt_replace *repl);
 extern void ipt_unregister_table(struct ipt_table *table);
 
@@ -364,5 +365,62 @@ extern unsigned int ipt_do_table(struct 
 				 void *userdata);
 
 #define IPT_ALIGN(s) XT_ALIGN(s)
+
+#ifdef CONFIG_COMPAT
+#include <net/compat.h>
+
+struct compat_ipt_getinfo
+{
+	char name[IPT_TABLE_MAXNAMELEN];
+	compat_uint_t valid_hooks;
+	compat_uint_t hook_entry[NF_IP_NUMHOOKS];
+	compat_uint_t underflow[NF_IP_NUMHOOKS];
+	compat_uint_t num_entries;
+	compat_uint_t size;
+};
+
+struct compat_ipt_entry
+{
+	struct ipt_ip ip;
+	compat_uint_t nfcache;
+	u_int16_t target_offset;
+	u_int16_t next_offset;
+	compat_uint_t comefrom;
+	struct compat_xt_counters counters;
+	unsigned char elems[0];
+};
+
+struct compat_ipt_entry_match
+{
+	union {
+		struct {
+			u_int16_t match_size;
+			char name[IPT_FUNCTION_MAXNAMELEN];
+		} user;
+		u_int16_t match_size;
+	} u;
+	unsigned char data[0];
+};
+
+struct compat_ipt_entry_target
+{
+	union {
+		struct {
+			u_int16_t target_size;
+			char name[IPT_FUNCTION_MAXNAMELEN];
+		} user;
+		u_int16_t target_size;
+	} u;
+	unsigned char data[0];
+};
+
+#define COMPAT_IPT_ALIGN(s) 	COMPAT_XT_ALIGN(s)
+
+extern int ipt_match_align_compat(void *match, void **dstptr,
+		int *size, int off, int convert);
+extern int ipt_target_align_compat(void *target, void **dstptr,
+		int *size, int off, int convert);
+
+#endif /* CONFIG_COMPAT */
 #endif /*__KERNEL__*/
 #endif /* _IPTABLES_H */
diff -uprN linux-2.6.16/include/linux/netfilter_ipv6/ip6_tables.h linux-2.6.16.ovz/include/linux/netfilter_ipv6/ip6_tables.h
--- linux-2.6.16/include/linux/netfilter_ipv6/ip6_tables.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/netfilter_ipv6/ip6_tables.h	2006-07-05 08:34:56.000000000 -0400
@@ -340,7 +340,7 @@ extern void ip6t_init(void) __init;
 #define ip6t_register_match(match) xt_register_match(AF_INET6, match)
 #define ip6t_unregister_match(match) xt_unregister_match(AF_INET6, match)
 
-extern int ip6t_register_table(struct ip6t_table *table,
+extern struct ip6t_table *ip6t_register_table(struct ip6t_table *table,
 			       const struct ip6t_replace *repl);
 extern void ip6t_unregister_table(struct ip6t_table *table);
 extern unsigned int ip6t_do_table(struct sk_buff **pskb,
diff -uprN linux-2.6.16/include/linux/nfcalls.h linux-2.6.16.ovz/include/linux/nfcalls.h
--- linux-2.6.16/include/linux/nfcalls.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/nfcalls.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,254 @@
+/*
+ *  include/linux/nfcalls.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_NFCALLS_H
+#define _LINUX_NFCALLS_H
+
+#include <linux/rcupdate.h>
+
+#ifdef CONFIG_MODULES
+extern struct module no_module;
+
+#define DECL_KSYM_MODULE(name)				\
+	extern struct module *vz_mod_##name
+#define DECL_KSYM_CALL(type, name, args)		\
+	extern type (*vz_##name) args
+
+#define INIT_KSYM_MODULE(name)				\
+	struct module *vz_mod_##name = &no_module;	\
+	EXPORT_SYMBOL(vz_mod_##name)
+#define INIT_KSYM_CALL(type, name, args)		\
+	type (*vz_##name) args;				\
+	EXPORT_SYMBOL(vz_##name)
+
+#define __KSYMERRCALL(err, type, mod, name, args)	\
+({							\
+	type ret = (type)err;				\
+	if (!__vzksym_module_get(vz_mod_##mod))	{	\
+		if (vz_##name)				\
+			ret = ((*vz_##name)args); 	\
+		__vzksym_module_put(vz_mod_##mod);	\
+	}						\
+	ret;						\
+})
+#define __KSYMSAFECALL_VOID(mod, name, args)		\
+do {							\
+	if (!__vzksym_module_get(vz_mod_##mod)) {	\
+		if (vz_##name)				\
+			((*vz_##name)args); 		\
+		__vzksym_module_put(vz_mod_##mod);	\
+	}						\
+} while (0)
+#else
+#define DECL_KSYM_CALL(type, name, args)                \
+	extern type name args
+#define INIT_KSYM_MODULE(name)
+#define INIT_KSYM_CALL(type, name, args)		\
+	type name args
+#define __KSYMERRCALL(err, type, mod, name, args)      ((*name)args)
+#define __KSYMSAFECALL_VOID(mod, name, args)           ((*name)args)
+#endif
+
+#define KSYMERRCALL(err, mod, name, args)		\
+	__KSYMERRCALL(err, int, mod, name, args)
+#define KSYMSAFECALL(type, mod, name, args)		\
+	__KSYMERRCALL(0, type, mod, name, args)
+#define KSYMSAFECALL_VOID(mod, name, args)		\
+	__KSYMSAFECALL_VOID(mod, name, args)
+
+#if defined(CONFIG_VE) && defined(CONFIG_MODULES)
+/* should be called _after_ KSYMRESOLVE's */
+#define KSYMMODRESOLVE(name)				\
+	__vzksym_modresolve(&vz_mod_##name, THIS_MODULE)
+#define KSYMMODUNRESOLVE(name)				\
+	__vzksym_modunresolve(&vz_mod_##name)
+
+#define KSYMRESOLVE(name)				\
+	vz_##name = &name
+#define KSYMUNRESOLVE(name)				\
+	vz_##name = NULL
+#else
+#define KSYMRESOLVE(name)	do { } while (0)
+#define KSYMUNRESOLVE(name)	do { } while (0)
+#define KSYMMODRESOLVE(name)	do { } while (0)
+#define KSYMMODUNRESOLVE(name)	do { } while (0)
+#endif
+
+#ifdef CONFIG_MODULES
+static inline void __vzksym_modresolve(struct module **modp, struct module *mod)
+{
+	/*
+	 * we want to be sure, that pointer updates are visible first:
+	 * 1. wmb() is here only for piece of sure
+	 *    (note, no rmb() in KSYMSAFECALL)
+	 * 2. synchronize_sched() guarantees that updates are visible
+	 *    on all cpus and allows us to remove rmb() in KSYMSAFECALL
+	 */
+	wmb(); synchronize_sched();
+	*modp = mod;
+	/* just to be sure, our changes are visible as soon as possible */
+	wmb(); synchronize_sched();
+}
+
+static inline void __vzksym_modunresolve(struct module **modp)
+{
+	/*
+	 * try_module_get() in KSYMSAFECALL should fail at this moment since
+	 * THIS_MODULE in in unloading state (we should be called from fini),
+	 * no need to syncronize pointers/ve_module updates.
+	 */
+	*modp = &no_module;
+	/*
+	 * synchronize_sched() guarantees here that we see
+	 * updated module pointer before the module really gets away
+	 */
+	synchronize_sched();
+}
+
+static inline int __vzksym_module_get(struct module *mod)
+{
+	/*
+	 * we want to avoid rmb(), so use synchronize_sched() in KSYMUNRESOLVE
+	 * and smp_read_barrier_depends() here...
+	 */
+	smp_read_barrier_depends(); /* for module loading */
+	if (!try_module_get(mod))
+		return -EBUSY;
+
+	return 0;
+}
+
+static inline void __vzksym_module_put(struct module *mod)
+{
+	module_put(mod);
+}
+#endif
+
+#if defined(CONFIG_VE)
+#ifdef CONFIG_MODULES
+DECL_KSYM_MODULE(x_tables);
+DECL_KSYM_MODULE(xt_tcpudp);
+DECL_KSYM_MODULE(ip_tables);
+DECL_KSYM_MODULE(ip6_tables);
+DECL_KSYM_MODULE(iptable_filter);
+DECL_KSYM_MODULE(ip6table_filter);
+DECL_KSYM_MODULE(iptable_mangle);
+DECL_KSYM_MODULE(ip6table_mangle);
+DECL_KSYM_MODULE(xt_limit);
+DECL_KSYM_MODULE(ipt_multiport);
+DECL_KSYM_MODULE(ip6t_multiport);
+DECL_KSYM_MODULE(ipt_tos);
+DECL_KSYM_MODULE(ipt_TOS);
+DECL_KSYM_MODULE(ipt_REJECT);
+DECL_KSYM_MODULE(ip6t_REJECT);
+DECL_KSYM_MODULE(ipt_TCPMSS);
+DECL_KSYM_MODULE(xt_tcpmss);
+DECL_KSYM_MODULE(ipt_ttl);
+DECL_KSYM_MODULE(ipt_LOG);
+DECL_KSYM_MODULE(ip6t_LOG);
+DECL_KSYM_MODULE(xt_length);
+DECL_KSYM_MODULE(ip_conntrack);
+DECL_KSYM_MODULE(ip_conntrack_ftp);
+DECL_KSYM_MODULE(ip_conntrack_irc);
+DECL_KSYM_MODULE(xt_conntrack);
+DECL_KSYM_MODULE(xt_state);
+DECL_KSYM_MODULE(xt_helper);
+DECL_KSYM_MODULE(ip_nat);
+DECL_KSYM_MODULE(iptable_nat);
+DECL_KSYM_MODULE(ip_nat_ftp);
+DECL_KSYM_MODULE(ip_nat_irc);
+DECL_KSYM_MODULE(ipt_REDIRECT);
+#endif
+
+struct sk_buff;
+
+DECL_KSYM_CALL(int, init_netfilter, (void));
+DECL_KSYM_CALL(int, init_xtables, (void));
+DECL_KSYM_CALL(int, init_xt_tcpudp, (void));
+DECL_KSYM_CALL(int, init_iptables, (void));
+DECL_KSYM_CALL(int, init_ip6tables, (void));
+DECL_KSYM_CALL(int, init_iptable_filter, (void));
+DECL_KSYM_CALL(int, init_ip6table_filter, (void));
+DECL_KSYM_CALL(int, init_iptable_mangle, (void));
+DECL_KSYM_CALL(int, init_ip6table_mangle, (void));
+DECL_KSYM_CALL(int, init_xt_limit, (void));
+DECL_KSYM_CALL(int, init_iptable_multiport, (void));
+DECL_KSYM_CALL(int, init_ip6table_multiport, (void));
+DECL_KSYM_CALL(int, init_iptable_tos, (void));
+DECL_KSYM_CALL(int, init_iptable_TOS, (void));
+DECL_KSYM_CALL(int, init_iptable_REJECT, (void));
+DECL_KSYM_CALL(int, init_ip6table_REJECT, (void));
+DECL_KSYM_CALL(int, init_iptable_TCPMSS, (void));
+DECL_KSYM_CALL(int, init_xt_tcpmss, (void));
+DECL_KSYM_CALL(int, init_iptable_ttl, (void));
+DECL_KSYM_CALL(int, init_iptable_LOG, (void));
+DECL_KSYM_CALL(int, init_ip6table_LOG, (void));
+DECL_KSYM_CALL(int, init_xt_length, (void));
+DECL_KSYM_CALL(int, init_iptable_conntrack, (void));
+DECL_KSYM_CALL(int, init_iptable_ftp, (void));
+DECL_KSYM_CALL(int, init_iptable_irc, (void));
+DECL_KSYM_CALL(int, init_xt_conntrack_match, (void));
+DECL_KSYM_CALL(int, init_xt_state, (void));
+DECL_KSYM_CALL(int, init_xt_helper, (void));
+DECL_KSYM_CALL(int, ip_nat_init, (void));
+DECL_KSYM_CALL(int, init_iptable_nat, (void));
+DECL_KSYM_CALL(int, init_iptable_nat_ftp, (void));
+DECL_KSYM_CALL(int, init_iptable_nat_irc, (void));
+DECL_KSYM_CALL(int, init_iptable_REDIRECT, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat_irc, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat, (void));
+DECL_KSYM_CALL(void, ip_nat_cleanup, (void));
+DECL_KSYM_CALL(void, fini_xt_helper, (void));
+DECL_KSYM_CALL(void, fini_xt_state, (void));
+DECL_KSYM_CALL(void, fini_xt_conntrack_match, (void));
+DECL_KSYM_CALL(void, fini_iptable_irc, (void));
+DECL_KSYM_CALL(void, fini_iptable_ftp, (void));
+DECL_KSYM_CALL(void, fini_iptable_conntrack, (void));
+DECL_KSYM_CALL(void, fini_xt_length, (void));
+DECL_KSYM_CALL(void, fini_ip6table_LOG, (void));
+DECL_KSYM_CALL(void, fini_iptable_LOG, (void));
+DECL_KSYM_CALL(void, fini_iptable_ttl, (void));
+DECL_KSYM_CALL(void, fini_xt_tcpmss, (void));
+DECL_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
+DECL_KSYM_CALL(void, fini_ip6table_REJECT, (void));
+DECL_KSYM_CALL(void, fini_iptable_REJECT, (void));
+DECL_KSYM_CALL(void, fini_iptable_TOS, (void));
+DECL_KSYM_CALL(void, fini_iptable_tos, (void));
+DECL_KSYM_CALL(void, fini_ip6table_multiport, (void));
+DECL_KSYM_CALL(void, fini_iptable_multiport, (void));
+DECL_KSYM_CALL(void, fini_xt_limit, (void));
+DECL_KSYM_CALL(void, fini_iptable_filter, (void));
+DECL_KSYM_CALL(void, fini_ip6table_filter, (void));
+DECL_KSYM_CALL(void, fini_iptable_mangle, (void));
+DECL_KSYM_CALL(void, fini_ip6table_mangle, (void));
+DECL_KSYM_CALL(void, fini_ip6tables, (void));
+DECL_KSYM_CALL(void, fini_iptables, (void));
+DECL_KSYM_CALL(void, fini_xt_tcpudp, (void));
+DECL_KSYM_CALL(void, fini_xtables, (void));
+DECL_KSYM_CALL(void, fini_netfilter, (void));
+DECL_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
+
+#include <linux/netfilter/x_tables.h>
+
+DECL_KSYM_CALL(void, ipt_flush_table, (struct xt_table *table));
+DECL_KSYM_CALL(void, ip6t_flush_table, (struct xt_table *table));
+#endif /* CONFIG_VE */
+
+#ifdef CONFIG_VE_CALLS_MODULE
+DECL_KSYM_MODULE(vzmon);
+DECL_KSYM_CALL(int, real_get_device_perms_ve,
+	(int dev_type, dev_t dev, int access_mode));
+DECL_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
+DECL_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
+DECL_KSYM_CALL(void, real_update_load_avg_ve, (void));
+#endif
+
+#endif /* _LINUX_NFCALLS_H */
diff -uprN linux-2.6.16/include/linux/nfs_fs.h linux-2.6.16.ovz/include/linux/nfs_fs.h
--- linux-2.6.16/include/linux/nfs_fs.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/nfs_fs.h	2006-07-05 08:34:56.000000000 -0400
@@ -296,7 +296,7 @@ extern struct inode *nfs_fhget(struct su
 extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-extern int nfs_permission(struct inode *, int, struct nameidata *);
+extern int nfs_permission(struct inode *, int, struct nameidata *, struct exec_perm *);
 extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
 extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
 extern int nfs_open(struct inode *, struct file *);
diff -uprN linux-2.6.16/include/linux/notifier.h linux-2.6.16.ovz/include/linux/notifier.h
--- linux-2.6.16/include/linux/notifier.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/notifier.h	2006-07-05 08:34:56.000000000 -0400
@@ -27,8 +27,9 @@ extern int notifier_call_chain(struct no
 
 #define NOTIFY_DONE		0x0000		/* Don't care */
 #define NOTIFY_OK		0x0001		/* Suits me */
+#define NOTIFY_FAIL		0x0002		/* Reject */
 #define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
-#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)	/* Bad/Veto action	*/
+#define NOTIFY_BAD		(NOTIFY_STOP_MASK|NOTIFY_FAIL)	/* Bad/Veto action	*/
 /*
  * Clean way to return from the notifier and stop further calls.
  */
diff -uprN linux-2.6.16/include/linux/page-flags.h linux-2.6.16.ovz/include/linux/page-flags.h
--- linux-2.6.16/include/linux/page-flags.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/page-flags.h	2006-07-05 08:34:56.000000000 -0400
@@ -74,7 +74,9 @@
 #define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
 #define PG_reclaim		17	/* To be reclaimed asap */
 #define PG_nosave_free		18	/* Free, should not be written */
-#define PG_uncached		19	/* Page has been mapped as uncached */
+#define PG_buddy		19	/* Page is free, on buddy lists */
+
+#define PG_uncached		20	/* Page has been mapped as uncached */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -319,6 +321,10 @@ extern void __mod_page_state_offset(unsi
 #define SetPageNosaveFree(page)	set_bit(PG_nosave_free, &(page)->flags)
 #define ClearPageNosaveFree(page)		clear_bit(PG_nosave_free, &(page)->flags)
 
+#define PageBuddy(page)		test_bit(PG_buddy, &(page)->flags)
+#define __SetPageBuddy(page)	__set_bit(PG_buddy, &(page)->flags)
+#define __ClearPageBuddy(page)	__clear_bit(PG_buddy, &(page)->flags)
+
 #define PageMappedToDisk(page)	test_bit(PG_mappedtodisk, &(page)->flags)
 #define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
 #define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
diff -uprN linux-2.6.16/include/linux/pid.h linux-2.6.16.ovz/include/linux/pid.h
--- linux-2.6.16/include/linux/pid.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/pid.h	2006-07-05 08:34:56.000000000 -0400
@@ -1,6 +1,18 @@
 #ifndef _LINUX_PID_H
 #define _LINUX_PID_H
 
+#define VPID_BIT	10
+#define VPID_DIV	(1<<VPID_BIT)
+
+#ifdef CONFIG_VE
+#define __is_virtual_pid(pid)	((pid) & VPID_DIV)
+#define is_virtual_pid(pid)	\
+   (__is_virtual_pid(pid) || ((pid)==1 && !ve_is_super(get_exec_env())))
+#else
+#define __is_virtual_pid(pid)	0
+#define is_virtual_pid(pid)	0
+#endif
+
 enum pid_type
 {
 	PIDTYPE_PID,
@@ -15,6 +27,9 @@ struct pid
 	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
 	int nr;
 	struct hlist_node pid_chain;
+#ifdef CONFIG_VE
+	int vnr;
+#endif
 	/* list of pids with the same nr, only one of them is in the hash */
 	struct list_head pid_list;
 };
@@ -40,16 +55,89 @@ extern int alloc_pidmap(void);
 extern void FASTCALL(free_pidmap(int));
 extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
 
-#define do_each_task_pid(who, type, task)				\
-	if ((task = find_task_by_pid_type(type, who))) {		\
+#ifndef CONFIG_VE
+
+#define vpid_to_pid(pid)	(pid)
+#define __vpid_to_pid(pid)	(pid)
+#define pid_type_to_vpid(type, pid)	(pid)
+#define __pid_type_to_vpid(type, pid)	(pid)
+
+#define comb_vpid_to_pid(pid)	(pid)
+#define comb_pid_to_vpid(pid)	(pid)
+
+#else
+
+struct ve_struct;
+extern void free_vpid(int vpid, struct ve_struct *ve);
+extern int alloc_vpid(int pid, int vpid);
+extern int vpid_to_pid(int pid);
+extern int __vpid_to_pid(int pid);
+extern pid_t pid_type_to_vpid(int type, pid_t pid);
+extern pid_t _pid_type_to_vpid(int type, pid_t pid);
+
+static inline int comb_vpid_to_pid(int vpid)
+{
+	int pid = vpid;
+
+	if (vpid > 0) {
+		pid = vpid_to_pid(vpid);
+		if (unlikely(pid < 0))
+			return 0;
+	} else if (vpid < 0) {
+		pid = vpid_to_pid(-vpid);
+		if (unlikely(pid < 0))
+			return 0;
+		pid = -pid;
+	}
+	return pid;
+}
+
+static inline int comb_pid_to_vpid(int pid)
+{
+	int vpid = pid;
+
+	if (pid > 0) {
+		vpid = pid_type_to_vpid(PIDTYPE_PID, pid);
+		if (unlikely(vpid < 0))
+			return 0;
+	} else if (pid < 0) {
+		vpid = pid_type_to_vpid(PIDTYPE_PGID, -pid);
+		if (unlikely(vpid < 0))
+			return 0;
+		vpid = -vpid;
+	}
+	return vpid;
+}
+#endif
+
+#define do_each_task_pid_all(who, type, task)				\
+	if ((task = find_task_by_pid_type_all(type, who))) {		\
 		prefetch((task)->pids[type].pid_list.next);		\
 		do {
 
-#define while_each_task_pid(who, type, task)				\
+#define while_each_task_pid_all(who, type, task)			\
 		} while (task = pid_task((task)->pids[type].pid_list.next,\
 						type),			\
 			prefetch((task)->pids[type].pid_list.next),	\
 			hlist_unhashed(&(task)->pids[type].pid_chain));	\
 	}								\
 
+#ifndef CONFIG_VE
+#define __do_each_task_pid_ve(who, type, task, owner)			\
+		do_each_task_pid_all(who, type, task)
+#define __while_each_task_pid_ve(who, type, task, owner)		\
+		while_each_task_pid_all(who, type, task)
+#else /* CONFIG_VE */
+#define __do_each_task_pid_ve(who, type, task, owner)			\
+		do_each_task_pid_all(who, type, task)			\
+			if (ve_accessible(VE_TASK_INFO(task)->owner_env, owner))
+#define __while_each_task_pid_ve(who, type, task, owner)		\
+		while_each_task_pid_all(who, type, task)
+#endif /* CONFIG_VE */
+
+#define do_each_task_pid_ve(who, type, task)				\
+		__do_each_task_pid_ve(who, type, task, get_exec_env());
+#define while_each_task_pid_ve(who, type, task)				\
+		__while_each_task_pid_ve(who, type, task, get_exec_env());
+
 #endif /* _LINUX_PID_H */
diff -uprN linux-2.6.16/include/linux/proc_fs.h linux-2.6.16.ovz/include/linux/proc_fs.h
--- linux-2.6.16/include/linux/proc_fs.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/proc_fs.h	2006-07-05 08:34:56.000000000 -0400
@@ -78,7 +78,7 @@ struct kcore_list {
 struct vmcore {
 	struct list_head list;
 	unsigned long long paddr;
-	unsigned long size;
+	unsigned long long size;
 	loff_t offset;
 };
 
@@ -86,8 +86,14 @@ struct vmcore {
 
 extern struct proc_dir_entry proc_root;
 extern struct proc_dir_entry *proc_root_fs;
+#ifdef CONFIG_VE
+#include <linux/sched.h>
+#define proc_net	(get_exec_env()->_proc_net)
+#define proc_net_stat	(get_exec_env()->_proc_net_stat)
+#else
 extern struct proc_dir_entry *proc_net;
 extern struct proc_dir_entry *proc_net_stat;
+#endif
 extern struct proc_dir_entry *proc_bus;
 extern struct proc_dir_entry *proc_root_driver;
 extern struct proc_dir_entry *proc_root_kcore;
@@ -98,8 +104,8 @@ extern void proc_misc_init(void);
 struct mm_struct;
 
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
-struct dentry *proc_pid_unhash(struct task_struct *p);
-void proc_pid_flush(struct dentry *proc_dentry);
+void proc_pid_unhash(struct task_struct *p, struct dentry * [2]);
+void proc_pid_flush(struct dentry *proc_dentry[2]);
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
 unsigned long task_vsize(struct mm_struct *);
 int task_statm(struct mm_struct *, int *, int *, int *, int *);
@@ -107,7 +113,11 @@ char *task_mem(struct mm_struct *, char 
 
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
+extern struct proc_dir_entry *create_proc_glob_entry(const char *name,
+						mode_t mode,
+						struct proc_dir_entry *parent);
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
+extern void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent);
 
 extern struct vfsmount *proc_mnt;
 extern int proc_fill_super(struct super_block *,void *,int);
@@ -194,6 +204,15 @@ static inline struct proc_dir_entry *pro
 	return res;
 }
 
+static inline struct proc_dir_entry *proc_glob_fops_create(const char *name,
+	mode_t mode, struct file_operations *fops)
+{
+	struct proc_dir_entry *res = create_proc_glob_entry(name, mode, NULL);
+	if (res)
+		res->proc_fops = fops;
+	return res;
+}
+
 static inline void proc_net_remove(const char *name)
 {
 	remove_proc_entry(name,proc_net);
@@ -206,16 +225,21 @@ static inline void proc_net_remove(const
 #define proc_bus NULL
 
 #define proc_net_fops_create(name, mode, fops)  ({ (void)(mode), NULL; })
+#define proc_glob_fops_create(name, mode, fops)  ({ (void)(mode), NULL; })
 #define proc_net_create(name, mode, info)	({ (void)(mode), NULL; })
 static inline void proc_net_remove(const char *name) {}
 
-static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; }
-static inline void proc_pid_flush(struct dentry *proc_dentry) { }
+static inline struct dentry *proc_pid_unhash(struct task_struct *p,
+		struct dentry *d[2]) { return NULL; }
+static inline void proc_pid_flush(struct dentry *proc_dentry[2]) { }
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+static inline struct proc_dir_entry *create_proc_glob_entry(const char *name,
+	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
 
 #define remove_proc_entry(name, parent) do {} while (0)
+#define remove_proc_glob_entry(name, parent) do {} while (0)
 
 static inline struct proc_dir_entry *proc_symlink(const char *name,
 		struct proc_dir_entry *parent,const char *dest) {return NULL;}
@@ -266,4 +290,18 @@ static inline struct proc_dir_entry *PDE
 	return PROC_I(inode)->pde;
 }
 
+static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
+{
+	if (de)
+		atomic_inc(&de->count);
+	return de;
+}
+
+extern void de_put(struct proc_dir_entry *);
+
+#define LPDE(inode)	(PROC_I((inode))->pde)
+#ifdef CONFIG_VE
+#define GPDE(inode)	(*(struct proc_dir_entry **)(&(inode)->i_pipe))
+#endif
+
 #endif /* _LINUX_PROC_FS_H */
diff -uprN linux-2.6.16/include/linux/quota.h linux-2.6.16.ovz/include/linux/quota.h
--- linux-2.6.16/include/linux/quota.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/quota.h	2006-07-05 08:34:56.000000000 -0400
@@ -37,7 +37,6 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/spinlock.h>
 
 #define __DQUOT_VERSION__	"dquot_6.5.1"
 #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
@@ -45,8 +44,6 @@
 typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
 typedef __u64 qsize_t;          /* Type in which we store sizes */
 
-extern spinlock_t dq_data_lock;
-
 /* Size of blocks in which are counted size limits */
 #define QUOTABLOCK_BITS 10
 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -133,6 +130,10 @@ struct if_dqinfo {
 
 #ifdef __KERNEL__
 
+#include <linux/spinlock.h>
+
+extern spinlock_t dq_data_lock;
+
 #include <linux/dqblk_xfs.h>
 #include <linux/dqblk_v1.h>
 #include <linux/dqblk_v2.h>
@@ -242,6 +243,8 @@ struct quota_format_ops {
 	int (*release_dqblk)(struct dquot *dquot);	/* Called when last reference to dquot is being dropped */
 };
 
+struct inode;
+struct iattr;
 /* Operations working with dquots */
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
@@ -256,9 +259,11 @@ struct dquot_operations {
 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
 	int (*write_info) (struct super_block *, int);	/* Write of quota "superblock" */
+	int (*rename) (struct inode *, struct inode *, struct inode *);
 };
 
 /* Operations handling requests from userspace */
+struct v2_disk_dqblk;
 struct quotactl_ops {
 	int (*quota_on)(struct super_block *, int, int, char *);
 	int (*quota_off)(struct super_block *, int);
@@ -271,6 +276,9 @@ struct quotactl_ops {
 	int (*set_xstate)(struct super_block *, unsigned int, int);
 	int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 	int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
+#ifdef CONFIG_QUOTA_COMPAT
+	int (*get_quoti)(struct super_block *, int, unsigned int, struct v2_disk_dqblk *);
+#endif
 };
 
 struct quota_format_type {
@@ -291,6 +299,10 @@ struct quota_info {
 	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */
 	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
 	struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+	struct vz_quota_master *vzdq_master;
+	int vzdq_count;
+#endif
 };
 
 /* Inline would be better but we need to dereference super_block which is not defined yet */
diff -uprN linux-2.6.16/include/linux/quotaops.h linux-2.6.16.ovz/include/linux/quotaops.h
--- linux-2.6.16/include/linux/quotaops.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/quotaops.h	2006-07-05 08:34:56.000000000 -0400
@@ -171,6 +171,19 @@ static __inline__ int DQUOT_TRANSFER(str
 	return 0;
 }
 
+static __inline__ int DQUOT_RENAME(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	struct dquot_operations *q_op;
+
+	q_op = inode->i_sb->dq_op;
+	if (q_op && q_op->rename) {
+		if (q_op->rename(inode, old_dir, new_dir) == NO_QUOTA)
+			return 1;
+	}
+	return 0;
+}
+
 /* The following two functions cannot be called inside a transaction */
 #define DQUOT_SYNC(sb)	sync_dquots(sb, -1)
 
@@ -197,6 +210,7 @@ static __inline__ int DQUOT_OFF(struct s
 #define DQUOT_SYNC(sb)				do { } while(0)
 #define DQUOT_OFF(sb)				do { } while(0)
 #define DQUOT_TRANSFER(inode, iattr)		(0)
+#define DQUOT_RENAME(inode, old_dir, new_dir)	(0)
 static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 {
 	inode_add_bytes(inode, nr);
diff -uprN linux-2.6.16/include/linux/raid/raid1.h linux-2.6.16.ovz/include/linux/raid/raid1.h
--- linux-2.6.16/include/linux/raid/raid1.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/raid/raid1.h	2006-07-05 08:34:56.000000000 -0400
@@ -130,6 +130,6 @@ struct r1bio_s {
  * with failure when last write completes (and all failed).
  * Record that bi_end_io was called with this flag...
  */
-#define	R1BIO_Returned 4
+#define	R1BIO_Returned 6
 
 #endif
diff -uprN linux-2.6.16/include/linux/reiserfs_xattr.h linux-2.6.16.ovz/include/linux/reiserfs_xattr.h
--- linux-2.6.16/include/linux/reiserfs_xattr.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/reiserfs_xattr.h	2006-07-05 08:34:56.000000000 -0400
@@ -42,7 +42,8 @@ int reiserfs_removexattr(struct dentry *
 int reiserfs_delete_xattrs(struct inode *inode);
 int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
 int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
-int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd);
+int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *);
 
 int reiserfs_xattr_del(struct inode *, const char *);
 int reiserfs_xattr_get(const struct inode *, const char *, void *, size_t);
diff -uprN linux-2.6.16/include/linux/rmap.h linux-2.6.16.ovz/include/linux/rmap.h
--- linux-2.6.16/include/linux/rmap.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/rmap.h	2006-07-05 08:34:56.000000000 -0400
@@ -74,6 +74,7 @@ void page_add_anon_rmap(struct page *, s
 void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
 void page_remove_rmap(struct page *);
+struct anon_vma *page_lock_anon_vma(struct page *page);
 
 /**
  * page_dup_rmap - duplicate pte mapping to a page
diff -uprN linux-2.6.16/include/linux/rtc.h linux-2.6.16.ovz/include/linux/rtc.h
--- linux-2.6.16/include/linux/rtc.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/rtc.h	2006-07-05 08:34:56.000000000 -0400
@@ -11,8 +11,6 @@
 #ifndef _LINUX_RTC_H_
 #define _LINUX_RTC_H_
 
-#include <linux/interrupt.h>
-
 /*
  * The struct used to pass data via the following ioctl. Similar to the
  * struct tm in <time.h>, but it needs to be here so that the kernel 
@@ -95,6 +93,8 @@ struct rtc_pll_info {
 
 #ifdef __KERNEL__
 
+#include <linux/interrupt.h>
+
 typedef struct rtc_task {
 	void (*func)(void *private_data);
 	void *private_data;
diff -uprN linux-2.6.16/include/linux/sched.h linux-2.6.16.ovz/include/linux/sched.h
--- linux-2.6.16/include/linux/sched.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/sched.h	2006-07-05 08:34:56.000000000 -0400
@@ -38,7 +38,10 @@
 
 #include <linux/auxvec.h>	/* For AT_VECTOR_SIZE */
 
+#include <ub/ub_task.h>
+
 struct exec_domain;
+struct ve_struct;
 
 /*
  * cloning flags:
@@ -92,15 +95,34 @@ extern unsigned long avenrun[];		/* Load
 	load += n*(FIXED_1-exp); \
 	load >>= FSHIFT;
 
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
 extern unsigned long total_forks;
 extern int nr_threads;
 extern int last_pid;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
+
+extern unsigned long nr_sleeping(void);
+extern unsigned long nr_stopped(void);
+extern unsigned long nr_zombie;
+extern atomic_t nr_dead;
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_iowait(void);
 
+#ifdef CONFIG_VE
+struct ve_struct;
+extern unsigned long nr_running_ve(struct ve_struct *);
+extern unsigned long nr_iowait_ve(struct ve_struct *);
+extern unsigned long nr_uninterruptible_ve(struct ve_struct *);
+#else
+#define nr_running_ve(ve)		0
+#define nr_iowait_ve(ve)		0
+#define nr_uninterruptible_ve(ve)	0
+#endif
+
 #include <linux/time.h>
 #include <linux/param.h>
 #include <linux/resource.h>
@@ -189,6 +211,8 @@ extern cpumask_t nohz_cpu_mask;
 
 extern void show_state(void);
 extern void show_regs(struct pt_regs *);
+extern void smp_show_regs(struct pt_regs *, void *);
+extern void show_vsched(void);
 
 /*
  * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
@@ -252,31 +276,7 @@ arch_get_unmapped_area_topdown(struct fi
 extern void arch_unmap_area(struct mm_struct *, unsigned long);
 extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
 
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
-/*
- * The mm counters are not protected by its page_table_lock,
- * so must be incremented atomically.
- */
-#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
-#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
-#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
-#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
-#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
-typedef atomic_long_t mm_counter_t;
-
-#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
-/*
- * The mm counters are protected by its page_table_lock,
- * so can be incremented directly.
- */
-#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
-#define get_mm_counter(mm, member) ((mm)->_##member)
-#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
-#define inc_mm_counter(mm, member) (mm)->_##member++
-#define dec_mm_counter(mm, member) (mm)->_##member--
-typedef unsigned long mm_counter_t;
-
-#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#include <linux/mm_counter.h>
 
 #define get_mm_rss(mm)					\
 	(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
@@ -332,6 +332,7 @@ struct mm_struct {
 	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
 	unsigned dumpable:2;
+	unsigned vps_dumpable:1;
 	cpumask_t cpu_vm_mask;
 
 	/* Architecture-specific MM context */
@@ -348,6 +349,9 @@ struct mm_struct {
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
 	struct kioctx		*ioctx_list;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *mm_ub;
+#endif
 };
 
 struct sighand_struct {
@@ -364,6 +368,9 @@ static inline void sighand_free(struct s
 	call_rcu(&sp->rcu, sighand_free_cb);
 }
 
+#include <linux/ve.h>
+#include <linux/ve_task.h>
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -688,6 +695,8 @@ static inline void prefetch_stack(struct
 
 struct audit_context;		/* See audit.c */
 struct mempolicy;
+struct vcpu_scheduler;
+struct vcpu_info;
 
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
@@ -701,6 +710,14 @@ struct task_struct {
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	int oncpu;
 #endif
+#ifdef CONFIG_SCHED_VCPU
+	struct vcpu_scheduler *vsched;
+	struct vcpu_info *vcpu;
+
+	/* id's are saved to avoid locking (e.g. on vsched->id access) */
+	int vsched_id;
+	int vcpu_id;
+#endif
 	int prio, static_prio;
 	struct list_head run_list;
 	prio_array_t *array;
@@ -846,6 +863,11 @@ struct task_struct {
 
 	unsigned long ptrace_message;
 	siginfo_t *last_siginfo; /* For ptrace use.  */
+
+/* state tracking for suspend */
+	__u8	 pn_state;
+	__u8	 stopped_state:1;
+
 /*
  * current io wait handle: wait queue entry to use for io waits
  * If this thread is processing aio, this points at the waitqueue
@@ -871,6 +893,16 @@ struct task_struct {
 #endif
 	atomic_t fs_excl;	/* holding fs exclusive resources */
 	struct rcu_head rcu;
+#ifdef CONFIG_USER_RESOURCE
+	struct task_beancounter task_bc;
+#endif
+#ifdef CONFIG_VE
+	struct ve_task_info ve_task_info;
+#endif
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+	unsigned long	magic;
+	struct inode	*ino;
+#endif
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
@@ -929,6 +961,43 @@ static inline void put_task_struct(struc
 #define PF_RANDOMIZE	0x00800000	/* randomize virtual address space */
 #define PF_SWAPWRITE	0x01000000	/* Allowed to write to swap */
 
+#ifndef CONFIG_VE
+#define set_pn_state(tsk, state)	do { } while(0)
+#define clear_pn_state(tsk)		do { } while(0)
+#define set_stop_state(tsk)		do { } while(0)
+#define clear_stop_state(tsk)		do { } while(0)
+#else
+#define PN_STOP_TF	1	/* was not in 2.6.8 */
+#define PN_STOP_TF_RT	2	/* was not in 2.6.8 */ 
+#define PN_STOP_ENTRY	3
+#define PN_STOP_FORK	4
+#define PN_STOP_VFORK	5
+#define PN_STOP_SIGNAL	6
+#define PN_STOP_EXIT	7
+#define PN_STOP_EXEC	8
+#define PN_STOP_LEAVE	9
+
+static inline void set_pn_state(struct task_struct *tsk, int state)
+{
+	tsk->pn_state = state;
+}
+
+static inline void clear_pn_state(struct task_struct *tsk)
+{
+	tsk->pn_state = 0;
+}
+
+static inline void set_stop_state(struct task_struct *tsk)
+{
+	tsk->stopped_state = 1;
+}
+
+static inline void clear_stop_state(struct task_struct *tsk)
+{
+	tsk->stopped_state = 0;
+}
+#endif
+
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
  * tasks can access tsk->flags in readonly mode for example
@@ -968,6 +1037,21 @@ static inline int set_cpus_allowed(task_
 extern unsigned long long sched_clock(void);
 extern unsigned long long current_sched_time(const task_t *current_task);
 
+static inline unsigned long cycles_to_clocks(cycles_t cycles)
+{
+	extern unsigned long cycles_per_clock;
+	do_div(cycles, cycles_per_clock);
+	return cycles;
+}
+
+static inline u64 cycles_to_jiffies(cycles_t cycles)
+{
+	extern unsigned long cycles_per_jiffy;
+	do_div(cycles, cycles_per_jiffy);
+	return cycles;
+}
+
+
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
 extern void sched_exec(void);
@@ -1020,12 +1104,237 @@ extern struct task_struct init_task;
 
 extern struct   mm_struct init_mm;
 
-#define find_task_by_pid(nr)	find_task_by_pid_type(PIDTYPE_PID, nr)
-extern struct task_struct *find_task_by_pid_type(int type, int pid);
+#define find_task_by_pid_all(nr)	\
+		find_task_by_pid_type_all(PIDTYPE_PID, nr)
+extern struct task_struct *find_task_by_pid_type_all(int type, int pid);
 extern void set_special_pids(pid_t session, pid_t pgrp);
 extern void __set_special_pids(pid_t session, pid_t pgrp);
 
+#ifndef CONFIG_VE
+#define find_task_by_pid_ve find_task_by_pid_all
+
+#define get_exec_env()		((struct ve_struct *)NULL)
+#define set_exec_env(new_env)	((struct ve_struct *)NULL)
+
+#define ve_is_super(env)			1
+#define ve_accessible(target, owner)		1
+#define ve_accessible_strict(target, owner)	1
+#define ve_accessible_veid(target, owner)		1
+#define ve_accessible_strict_veid(target, owner)	1
+
+#define VEID(envid)				0
+#define get_ve0() NULL
+
+static inline pid_t virt_pid(struct task_struct *tsk)
+{
+	return tsk->pid;
+}
+
+static inline pid_t virt_tgid(struct task_struct *tsk)
+{
+	return tsk->tgid;
+}
+
+static inline pid_t virt_pgid(struct task_struct *tsk)
+{
+	return tsk->signal->pgrp;
+}
+
+static inline pid_t virt_sid(struct task_struct *tsk)
+{
+	return tsk->signal->session;
+}
+
+#define get_task_pid_ve(tsk, ve)	get_task_pid(tsk)
+
+static inline pid_t get_task_pid(struct task_struct *tsk)
+{
+	return tsk->pid;
+}
+
+static inline pid_t get_task_tgid(struct task_struct *tsk)
+{
+	return tsk->tgid;
+}
+
+static inline pid_t get_task_pgid(struct task_struct *tsk)
+{
+	return tsk->signal->pgrp;
+}
+
+static inline pid_t get_task_sid(struct task_struct *tsk)
+{
+	return tsk->signal->session;
+}
+
+static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline pid_t get_task_ppid(struct task_struct *p)
+{
+	return pid_alive(p) ? p->group_leader->real_parent->tgid : 0;
+}
+
+#else	/* CONFIG_VE */
+
+#include <asm/current.h>
+#include <linux/ve.h>
+
+extern struct ve_struct ve0;
+
+#define find_task_by_pid_ve(nr)	\
+		find_task_by_pid_type_ve(PIDTYPE_PID, nr)
+
+extern struct task_struct *find_task_by_pid_type_ve(int type, int pid);
+
+#define get_ve0()	(&ve0)
+#define VEID(envid)	((envid)->veid)
+
+#define get_exec_env()	(VE_TASK_INFO(current)->exec_env)
+static inline struct ve_struct *set_exec_env(struct ve_struct *new_env)
+{
+	struct ve_struct *old_env;
+
+	old_env = VE_TASK_INFO(current)->exec_env;
+	VE_TASK_INFO(current)->exec_env = new_env;
+
+	return old_env;
+}
+
+#define ve_is_super(env) ((env) == get_ve0())
+#define ve_accessible_strict(target, owner)	((target) == (owner))
+static inline int ve_accessible(struct ve_struct *target,
+				struct ve_struct *owner) {
+	return ve_is_super(owner) || ve_accessible_strict(target, owner);
+}
+
+#define ve_accessible_strict_veid(target, owner) ((target) == (owner))
+static inline int ve_accessible_veid(envid_t target, envid_t owner)
+{
+	return get_ve0()->veid == owner ||
+	       ve_accessible_strict_veid(target, owner);
+}
+
+static inline pid_t virt_pid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_PID].vnr;
+}
+
+static inline pid_t virt_tgid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_TGID].vnr;
+}
+
+static inline pid_t virt_pgid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_PGID].vnr;
+}
+
+static inline pid_t virt_sid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_SID].vnr;
+}
+
+static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *env)
+{
+	return ve_is_super(env) ? tsk->pid : virt_pid(tsk);
+}
+
+static inline pid_t get_task_pid(struct task_struct *tsk)
+{
+	return get_task_pid_ve(tsk, get_exec_env());
+}
+
+static inline pid_t get_task_tgid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->tgid : virt_tgid(tsk);
+}
+
+static inline pid_t get_task_pgid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->signal->pgrp : virt_pgid(tsk);
+}
+
+static inline pid_t get_task_sid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->signal->session : virt_sid(tsk);
+}
+
+static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_PID].vnr = pid;
+}
+
+static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_TGID].vnr = pid;
+}
+
+static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_PGID].vnr = pid;
+}
+
+static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_SID].vnr = pid;
+}
+
+static inline pid_t get_task_ppid(struct task_struct *p)
+{
+	struct task_struct *parent;
+	struct ve_struct *env;
+
+	if (!pid_alive(p))
+		return 0;
+	env = get_exec_env();
+	if (get_task_pid_ve(p, env) == 1)
+		return 0;
+	parent = p->group_leader->real_parent;
+	return ve_accessible(VE_TASK_INFO(parent)->owner_env, env) ?
+		get_task_tgid(parent) : 1;
+}
+
+void ve_sched_get_cpu_stat(struct ve_struct *envid, cycles_t *idle,
+				cycles_t *strv, unsigned int cpu);
+void ve_sched_attach(struct ve_struct *envid);
+
+#endif	/* CONFIG_VE */
+
+
+#ifdef CONFIG_VE
+extern cycles_t ve_sched_get_idle_time(struct ve_struct *, int);
+extern cycles_t ve_sched_get_iowait_time(struct ve_struct *, int);
+#else
+#define ve_sched_get_idle_time(ve, cpu) 	0
+#define ve_sched_get_iowait_time(ve, cpu)	0
+#endif
+
+#ifdef CONFIG_SCHED_VCPU
+struct vcpu_scheduler;
+extern void fastcall vsched_cpu_online_map(struct vcpu_scheduler *sched,
+		cpumask_t *mask);
+#else
+#define vsched_cpu_online_map(vsched, mask)     do {    \
+			*mask = cpu_online_map;         \
+	} while (0)
+#endif
+
 /* per-UID process charging. */
+extern int set_user(uid_t new_ruid, int dumpclear);
 extern struct user_struct * alloc_uid(uid_t);
 static inline struct user_struct *get_uid(struct user_struct *u)
 {
@@ -1043,7 +1352,7 @@ extern int FASTCALL(wake_up_state(struct
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
 extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
 						unsigned long clone_flags));
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined (CONFIG_SCHED_VCPU)
  extern void kick_process(struct task_struct *tsk);
 #else
  static inline void kick_process(struct task_struct *tsk) { }
@@ -1161,12 +1470,19 @@ extern task_t *child_reaper;
 
 extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+extern long do_fork_pid(unsigned long clone_flags,
+			unsigned long stack_start,
+			struct pt_regs *regs,
+			unsigned long stack_size,
+			int __user *parent_tidptr,
+			int __user *child_tidptr,
+			long pid0);
 task_t *fork_idle(int);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
 extern void get_task_comm(char *to, struct task_struct *tsk);
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined (CONFIG_SCHED_VCPU)
 extern void wait_task_inactive(task_t * p);
 #else
 #define wait_task_inactive(p)	do { } while (0)
@@ -1187,22 +1503,100 @@ extern void wait_task_inactive(task_t * 
 	add_parent(p, (p)->parent);				\
 	} while (0)
 
-#define next_task(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
-#define prev_task(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
+#define next_task_all(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
+#define prev_task_all(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
 
-#define for_each_process(p) \
-	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+#define for_each_process_all(p) \
+	for (p = &init_task ; (p = next_task_all(p)) != &init_task ; )
 
 /*
  * Careful: do_each_thread/while_each_thread is a double loop so
  *          'break' will not work as expected - use goto instead.
  */
-#define do_each_thread(g, t) \
-	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
+#define do_each_thread_all(g, t) \
+	for (g = t = &init_task ; (g = t = next_task_all(g)) != &init_task ; ) do
 
-#define while_each_thread(g, t) \
+#define while_each_thread_all(g, t) \
 	while ((t = next_thread(t)) != g)
 
+#ifndef CONFIG_VE
+
+#define SET_VE_LINKS(p)
+#define REMOVE_VE_LINKS(p)
+#define for_each_process_ve(p)		for_each_process_all(p)
+#define do_each_thread_ve(g, t)		do_each_thread_all(g, t)
+#define while_each_thread_ve(g, t)	while_each_thread_all(g, t)
+#define first_task_ve()			next_task_ve(&init_task)
+#define __first_task_ve(owner)		next_task_ve(&init_task)
+#define __next_task_ve(owner, p)	next_task_ve(p)
+#define next_task_ve(p)			\
+		(next_task_all(p) != &init_task ? next_task_all(p) : NULL)
+
+#else	/* CONFIG_VE */
+
+#define SET_VE_LINKS(p)							\
+	do {								\
+		if (thread_group_leader(p))				\
+			list_add_tail(&VE_TASK_INFO(p)->vetask_list,	\
+					&VE_TASK_INFO(p)->owner_env->vetask_lh); \
+	} while (0)
+
+#define REMOVE_VE_LINKS(p)						\
+	do {								\
+		if (thread_group_leader(p))				\
+			list_del(&VE_TASK_INFO(p)->vetask_list);	\
+	} while(0)
+
+static inline task_t* __first_task_ve(struct ve_struct *ve)
+{
+	task_t *tsk;
+
+	if (unlikely(ve_is_super(ve))) {
+		tsk = next_task_all(&init_task);
+		if (tsk == &init_task)
+			tsk = NULL;
+	} else {
+		/* probably can return ve->init_entry, but it's more clear */
+		BUG_ON(list_empty(&ve->vetask_lh));
+		tsk = VE_TASK_LIST_2_TASK(ve->vetask_lh.next);
+	}
+	return tsk;
+}
+
+static inline task_t* __next_task_ve(struct ve_struct *ve, task_t *tsk)
+{
+	if (unlikely(ve_is_super(ve))) {
+		tsk = next_task_all(tsk);
+		if (tsk == &init_task)
+			tsk = NULL;
+	} else {
+		struct list_head *tmp;
+
+		BUG_ON(VE_TASK_INFO(tsk)->owner_env != ve);
+		tmp = VE_TASK_INFO(tsk)->vetask_list.next;
+		if (tmp == &ve->vetask_lh)
+			tsk = NULL;
+		else
+			tsk = VE_TASK_LIST_2_TASK(tmp);
+	}
+	return tsk;
+}
+
+#define first_task_ve()	__first_task_ve(get_exec_env())
+#define next_task_ve(p)	__next_task_ve(get_exec_env(), p)
+/* no one uses prev_task_ve(), copy next_task_ve() if needed */
+
+#define for_each_process_ve(p) \
+	for (p = first_task_ve(); p != NULL ; p = next_task_ve(p))
+
+#define do_each_thread_ve(g, t) \
+	for (g = t = first_task_ve() ; g != NULL; g = t = next_task_ve(g)) do
+
+#define while_each_thread_ve(g, t) \
+	while ((t = next_thread(t)) != g)
+
+#endif	/* CONFIG_VE */
+
 extern task_t * FASTCALL(next_thread(const task_t *p));
 
 #define thread_group_leader(p)	(p->pid == p->tgid)
@@ -1348,28 +1742,63 @@ extern void signal_wake_up(struct task_s
  */
 #ifdef CONFIG_SMP
 
-static inline unsigned int task_cpu(const struct task_struct *p)
+static inline unsigned int task_pcpu(const struct task_struct *p)
 {
 	return task_thread_info(p)->cpu;
 }
 
-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
 {
 	task_thread_info(p)->cpu = cpu;
 }
 
 #else
 
+static inline unsigned int task_pcpu(const struct task_struct *p)
+{
+	return 0;
+}
+
+static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
+{
+}
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_SCHED_VCPU
+
+static inline unsigned int task_vsched_id(const struct task_struct *p)
+{
+	return p->vsched_id;
+}
+
 static inline unsigned int task_cpu(const struct task_struct *p)
 {
+	return p->vcpu_id;
+}
+
+extern void set_task_cpu(struct task_struct *p, unsigned int vcpu);
+extern int vcpu_online(int cpu);
+
+#else
+
+static inline unsigned int task_vsched_id(const struct task_struct *p)
+{
 	return 0;
 }
 
+static inline unsigned int task_cpu(const struct task_struct *p)
+{
+	return task_pcpu(p);
+}
+
 static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 {
+	set_task_pcpu(p, cpu);
 }
 
-#endif /* CONFIG_SMP */
+#define vcpu_online(cpu)	cpu_online(cpu)
+#endif /* CONFIG_SCHED_VCPU */
 
 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 extern void arch_pick_mmap_layout(struct mm_struct *mm);
@@ -1401,7 +1830,7 @@ static inline int frozen(struct task_str
  */
 static inline int freezing(struct task_struct *p)
 {
-	return p->flags & PF_FREEZE;
+	return test_tsk_thread_flag(p, TIF_FREEZE);
 }
 
 /*
@@ -1410,7 +1839,7 @@ static inline int freezing(struct task_s
  */
 static inline void freeze(struct task_struct *p)
 {
-	p->flags |= PF_FREEZE;
+	set_tsk_thread_flag(p, TIF_FREEZE);
 }
 
 /*
@@ -1431,7 +1860,8 @@ static inline int thaw_process(struct ta
  */
 static inline void frozen_process(struct task_struct *p)
 {
-	p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
+	clear_tsk_thread_flag(p, TIF_FREEZE);
+	p->flags |= PF_FROZEN;
 }
 
 extern void refrigerator(void);
diff -uprN linux-2.6.16/include/linux/sem.h linux-2.6.16.ovz/include/linux/sem.h
--- linux-2.6.16/include/linux/sem.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/sem.h	2006-07-05 08:34:56.000000000 -0400
@@ -155,6 +155,9 @@ static inline void exit_sem(struct task_
 }
 #endif
 
+int sysvipc_walk_sem(int (*func)(int, struct sem_array*, void *), void *arg);
+int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg);
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SEM_H */
diff -uprN linux-2.6.16/include/linux/shm.h linux-2.6.16.ovz/include/linux/shm.h
--- linux-2.6.16/include/linux/shm.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/shm.h	2006-07-05 08:34:56.000000000 -0400
@@ -86,6 +86,7 @@ struct shmid_kernel /* private to the ke
 	pid_t			shm_cprid;
 	pid_t			shm_lprid;
 	struct user_struct	*mlock_user;
+	struct ipc_ids		*_shm_ids;
 };
 
 /* shm_mode upper byte flags */
@@ -104,6 +105,9 @@ static inline long do_shmat(int shmid, c
 }
 #endif
 
+int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg);
+struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg);
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SHM_H_ */
diff -uprN linux-2.6.16/include/linux/shmem_fs.h linux-2.6.16.ovz/include/linux/shmem_fs.h
--- linux-2.6.16/include/linux/shmem_fs.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/shmem_fs.h	2006-07-05 08:34:56.000000000 -0400
@@ -19,6 +19,9 @@ struct shmem_inode_info {
 	swp_entry_t		i_direct[SHMEM_NR_DIRECT]; /* first blocks */
 	struct list_head	swaplist;	/* chain of maybes on swap */
 	struct inode		vfs_inode;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter	*shmi_ub;
+#endif
 };
 
 struct shmem_sb_info {
diff -uprN linux-2.6.16/include/linux/signal.h linux-2.6.16.ovz/include/linux/signal.h
--- linux-2.6.16/include/linux/signal.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/signal.h	2006-07-05 08:34:56.000000000 -0400
@@ -3,6 +3,7 @@
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 #include <asm/signal.h>
 #include <asm/siginfo.h>
 
@@ -41,6 +42,9 @@ struct sigqueue {
 	int flags;
 	siginfo_t info;
 	struct user_struct *user;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *sig_ub;
+#endif
 };
 
 /* flags values. */
@@ -263,6 +267,8 @@ extern int sigprocmask(int, sigset_t *, 
 struct pt_regs;
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 
+extern kmem_cache_t *sigqueue_cachep;
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SIGNAL_H */
diff -uprN linux-2.6.16/include/linux/skbuff.h linux-2.6.16.ovz/include/linux/skbuff.h
--- linux-2.6.16/include/linux/skbuff.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/skbuff.h	2006-07-05 08:34:56.000000000 -0400
@@ -19,6 +19,7 @@
 #include <linux/compiler.h>
 #include <linux/time.h>
 #include <linux/cache.h>
+#include <linux/ve_owner.h>
 
 #include <asm/atomic.h>
 #include <asm/types.h>
@@ -211,6 +212,8 @@ enum {
  *	@tc_verd: traffic control verdict
  */
 
+#include <ub/ub_sk.h>
+
 struct sk_buff {
 	/* These two members must be first. */
 	struct sk_buff		*next;
@@ -294,13 +297,18 @@ struct sk_buff {
 				*data,
 				*tail,
 				*end;
+	struct skb_beancounter	skb_bc;
+	struct ve_struct	*owner_env;
 };
 
+DCL_VE_OWNER_PROTO(SKB, struct sk_buff, owner_env)
+
 #ifdef __KERNEL__
 /*
  *	Handling routines are only of interest to the kernel
  */
 #include <linux/slab.h>
+#include <ub/ub_net.h>
 
 #include <asm/system.h>
 
@@ -1007,6 +1015,8 @@ static inline int pskb_trim(struct sk_bu
  */
 static inline void skb_orphan(struct sk_buff *skb)
 {
+	ub_skb_uncharge(skb);
+
 	if (skb->destructor)
 		skb->destructor(skb);
 	skb->destructor = NULL;
diff -uprN linux-2.6.16/include/linux/slab.h linux-2.6.16.ovz/include/linux/slab.h
--- linux-2.6.16/include/linux/slab.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/slab.h	2006-07-05 08:34:56.000000000 -0400
@@ -48,6 +48,26 @@ typedef struct kmem_cache kmem_cache_t;
 #define SLAB_PANIC		0x00040000UL	/* panic if kmem_cache_create() fails */
 #define SLAB_DESTROY_BY_RCU	0x00080000UL	/* defer freeing pages to RCU */
 
+/*
+ * allocation rules:                            __GFP_UBC       0
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *  cache (SLAB_UBC)				charge		charge
+ *				      (usual caches: mm, vma, task_struct, ...)
+ *
+ *  cache (SLAB_UBC | SLAB_NO_CHARGE)		charge		---
+ *					     (ub_kmalloc)    (kmalloc)
+ *
+ *  cache (no UB flags)				BUG()		---
+ *							(nonub caches, mempools)
+ *
+ *  pages					charge		---
+ *					   (ub_vmalloc,	      (vmalloc,
+ *				        poll, fdsets, ...)  non-ub allocs)
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#define SLAB_UBC		0x20000000UL	/* alloc space for ubs ... */
+#define SLAB_NO_CHARGE		0x40000000UL	/* ... but don't charge */
+
 /* flags passed to a constructor func */
 #define	SLAB_CTOR_CONSTRUCTOR	0x001UL		/* if not set, then deconstructor */
 #define SLAB_CTOR_ATOMIC	0x002UL		/* tell constructor it can't sleep */
@@ -108,6 +128,8 @@ found:
 	return __kmalloc(size, flags);
 }
 
+#define ub_kmalloc(size, flags) kmalloc(size, ((flags) | __GFP_UBC))
+
 extern void *kzalloc(size_t, gfp_t);
 
 /**
diff -uprN linux-2.6.16/include/linux/smp.h linux-2.6.16.ovz/include/linux/smp.h
--- linux-2.6.16/include/linux/smp.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/smp.h	2006-07-05 08:34:56.000000000 -0400
@@ -10,6 +10,9 @@
 
 extern void cpu_idle(void);
 
+struct pt_regs;
+typedef void (*smp_nmi_function)(struct pt_regs *regs, void *info);
+
 #ifdef CONFIG_SMP
 
 #include <linux/preempt.h>
@@ -49,6 +52,8 @@ extern int __cpu_up(unsigned int cpunum)
  */
 extern void smp_cpus_done(unsigned int max_cpus);
 
+extern int smp_nmi_call_function(smp_nmi_function func, void *info, int wait);
+
 /*
  * Call a function on all other processors
  */
@@ -99,6 +104,12 @@ static inline void smp_send_reschedule(i
 #define num_booting_cpus()			1
 #define smp_prepare_boot_cpu()			do {} while (0)
 
+static inline int smp_nmi_call_function(smp_nmi_function func,
+					 void *info, int wait)
+{
+	return 0;
+}
+
 #endif /* !SMP */
 
 /*
diff -uprN linux-2.6.16/include/linux/socket.h linux-2.6.16.ovz/include/linux/socket.h
--- linux-2.6.16/include/linux/socket.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/socket.h	2006-07-05 08:34:56.000000000 -0400
@@ -300,6 +300,7 @@ extern int memcpy_toiovec(struct iovec *
 extern int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen);
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+extern int vz_security_proto_check(int family, int type, int protocol);
 
 #endif
 #endif /* not kernel and not glibc */
diff -uprN linux-2.6.16/include/linux/swap.h linux-2.6.16.ovz/include/linux/swap.h
--- linux-2.6.16/include/linux/swap.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/swap.h	2006-07-05 08:34:56.000000000 -0400
@@ -80,6 +80,7 @@ struct address_space;
 struct sysinfo;
 struct writeback_control;
 struct zone;
+struct user_beancounter;
 
 /*
  * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
@@ -119,6 +120,7 @@ enum {
 /*
  * The in-memory structure used to track swap areas.
  */
+struct user_beancounter;
 struct swap_info_struct {
 	unsigned int flags;
 	int prio;			/* swap priority */
@@ -136,6 +138,9 @@ struct swap_info_struct {
 	unsigned int max;
 	unsigned int inuse_pages;
 	int next;			/* next entry on swap list */
+#ifdef CONFIG_USER_SWAP_ACCOUNTING
+	struct user_beancounter **swap_ubs;
+#endif
 };
 
 struct swap_list_t {
@@ -240,7 +245,7 @@ extern long total_swap_pages;
 extern unsigned int nr_swapfiles;
 extern struct swap_info_struct swap_info[];
 extern void si_swapinfo(struct sysinfo *);
-extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page(struct user_beancounter *);
 extern swp_entry_t get_swap_page_of_type(int type);
 extern int swap_duplicate(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
@@ -253,7 +258,9 @@ extern int remove_exclusive_swap_page(st
 struct backing_dev_info;
 
 extern spinlock_t swap_lock;
-extern int remove_vma_swap(struct vm_area_struct *vma, struct page *page);
+struct page_beancounter;
+extern int remove_vma_swap(struct vm_area_struct *vma, struct page *page,
+		struct page_beancounter **pb);
 
 /* linux/mm/thrash.c */
 extern struct mm_struct * swap_token_mm;
@@ -310,7 +317,7 @@ static inline int remove_exclusive_swap_
 	return 0;
 }
 
-static inline swp_entry_t get_swap_page(void)
+static inline swp_entry_t get_swap_page(struct user_beancounter *ub)
 {
 	swp_entry_t entry;
 	entry.val = 0;
diff -uprN linux-2.6.16/include/linux/sysctl.h linux-2.6.16.ovz/include/linux/sysctl.h
--- linux-2.6.16/include/linux/sysctl.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/sysctl.h	2006-07-05 08:34:56.000000000 -0400
@@ -148,6 +148,13 @@ enum
 	KERN_SPIN_RETRY=70,	/* int: number of spinlock retries */
 	KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
 	KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
+	KERN_SILENCE_LEVEL=200, /* int: Console silence loglevel */
+	KERN_ALLOC_FAIL_WARN=201, /* int: whether we'll print "alloc failure" */
+	KERN_VIRT_PIDS=202,	/* int: VE pids virtualization */
+	KERN_VIRT_OSRELEASE=205,/* virtualization of utsname.release */
+	KERN_FAIRSCHED_MAX_LATENCY=201, /* int: Max start_tag delta */
+	KERN_VCPU_SCHED_TIMESLICE=202,
+	KERN_VCPU_TIMESLICE=203,
 };
 
 
@@ -397,10 +404,12 @@ enum
 	NET_TCP_CONG_CONTROL=110,
 	NET_TCP_ABC=111,
 	NET_IPV4_IPFRAG_MAX_DIST=112,
+	NET_TCP_USE_SG=245,
 };
 
 enum {
 	NET_IPV4_ROUTE_FLUSH=1,
+	NET_IPV4_ROUTE_SRC_CHECK=188,
 	NET_IPV4_ROUTE_MIN_DELAY=2,
 	NET_IPV4_ROUTE_MAX_DELAY=3,
 	NET_IPV4_ROUTE_GC_THRESH=4,
@@ -760,6 +769,12 @@ enum
 	FS_AIO_NR=18,	/* current system-wide number of aio requests */
 	FS_AIO_MAX_NR=19,	/* system-wide maximum number of aio requests */
 	FS_INOTIFY=20,	/* inotify submenu */
+ 	FS_AT_VSYSCALL=21,	/* int: to announce vsyscall data */
+};
+
+/* /proc/sys/debug */
+enum {
+	DBG_DECODE_CALLTRACES = 1,	/* int: decode call traces on oops */
 };
 
 /* /proc/sys/fs/quota/ */
@@ -900,6 +915,8 @@ extern int proc_doulongvec_minmax(ctl_ta
 				  void __user *, size_t *, loff_t *);
 extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int,
 				      struct file *, void __user *, size_t *, loff_t *);
+extern int proc_doutsstring(ctl_table *table, int write, struct file *,
+			    void __user *, size_t *, loff_t *);
 
 extern int do_sysctl (int __user *name, int nlen,
 		      void __user *oldval, size_t __user *oldlenp,
@@ -954,6 +971,8 @@ extern ctl_handler sysctl_ms_jiffies;
  */
 
 /* A sysctl table is an array of struct ctl_table: */
+struct ve_struct;
+
 struct ctl_table 
 {
 	int ctl_name;			/* Binary ID */
@@ -967,6 +986,7 @@ struct ctl_table 
 	struct proc_dir_entry *de;	/* /proc control block */
 	void *extra1;
 	void *extra2;
+	struct ve_struct *owner_env;
 };
 
 /* struct ctl_table_header is used to maintain dynamic lists of
@@ -983,6 +1003,9 @@ struct ctl_table_header * register_sysct
 						int insert_at_head);
 void unregister_sysctl_table(struct ctl_table_header * table);
 
+ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr);
+void free_sysctl_clone(ctl_table *clone);
+
 #else /* __KERNEL__ */
 
 #endif /* __KERNEL__ */
diff -uprN linux-2.6.16/include/linux/tty.h linux-2.6.16.ovz/include/linux/tty.h
--- linux-2.6.16/include/linux/tty.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/tty.h	2006-07-05 08:34:56.000000000 -0400
@@ -238,8 +238,11 @@ struct tty_struct {
 	spinlock_t read_lock;
 	/* If the tty has a pending do_SAK, queue it here - akpm */
 	struct work_struct SAK_work;
+	struct ve_struct *owner_env;
 };
 
+DCL_VE_OWNER_PROTO(TTY, struct tty_struct, owner_env)
+
 /* tty magic number */
 #define TTY_MAGIC		0x5401
 
@@ -266,6 +269,7 @@ struct tty_struct {
 #define TTY_PTY_LOCK 		16	/* pty private */
 #define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
 #define TTY_HUPPED 		18	/* Post driver->hangup() */
+#define TTY_CHARGED		19	/* Charged as ub resource */
 
 #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
 
diff -uprN linux-2.6.16/include/linux/tty_driver.h linux-2.6.16.ovz/include/linux/tty_driver.h
--- linux-2.6.16/include/linux/tty_driver.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/tty_driver.h	2006-07-05 08:34:56.000000000 -0400
@@ -115,6 +115,7 @@
  * 	character to the device.
  */
 
+#include <linux/ve_owner.h>
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/cdev.h>
@@ -214,9 +215,18 @@ struct tty_driver {
 			unsigned int set, unsigned int clear);
 
 	struct list_head tty_drivers;
+	struct ve_struct *owner_env;
 };
 
+DCL_VE_OWNER_PROTO(TTYDRV, struct tty_driver, owner_env)
+
+#ifdef CONFIG_LEGACY_PTYS
+extern struct tty_driver *pty_driver;
+extern struct tty_driver *pty_slave_driver;
+#endif
+
 extern struct list_head tty_drivers;
+extern rwlock_t tty_driver_guard;
 
 struct tty_driver *alloc_tty_driver(int lines);
 void put_tty_driver(struct tty_driver *driver);
diff -uprN linux-2.6.16/include/linux/ve.h linux-2.6.16.ovz/include/linux/ve.h
--- linux-2.6.16/include/linux/ve.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/ve.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,337 @@
+/*
+ *  include/linux/ve.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VE_H
+#define _LINUX_VE_H
+
+#include <linux/config.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/utsname.h>
+#include <linux/sysctl.h>
+#include <linux/vzstat.h>
+#include <linux/kobject.h>
+
+#ifdef VZMON_DEBUG
+#  define VZTRACE(fmt,args...) \
+	printk(KERN_DEBUG fmt, ##args)
+#else
+#  define VZTRACE(fmt,args...)
+#endif /* VZMON_DEBUG */
+
+struct tty_driver;
+struct devpts_config;
+struct task_struct;
+struct new_utsname;
+struct file_system_type;
+struct icmp_mib;
+struct ip_mib;
+struct tcp_mib;
+struct udp_mib;
+struct linux_mib;
+struct fib_info;
+struct fib_rule;
+struct veip_struct;
+struct ve_monitor;
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+struct fib_table;
+struct devcnfv4_struct;
+#ifdef CONFIG_VE_IPTABLES
+struct xt_af;
+struct xt_table;
+struct xt_target;
+struct ip_conntrack;
+typedef unsigned int (*ip_nat_helper_func)(void);
+struct ve_ip_conntrack {
+	struct list_head 	*_ip_conntrack_hash;
+	struct list_head	_ip_conntrack_expect_list;
+	struct list_head	_ip_conntrack_unconfirmed;
+	struct ip_conntrack_protocol ** _ip_ct_protos;
+	struct list_head	_ip_conntrack_helpers;
+	int 			_ip_conntrack_max;
+	int			_ip_conntrack_vmalloc;
+	atomic_t		_ip_conntrack_count;
+	void (*_ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
+#ifdef CONFIG_SYSCTL
+	unsigned long		_ip_ct_tcp_timeouts[10];
+	unsigned long		_ip_ct_udp_timeout;
+	unsigned long		_ip_ct_udp_timeout_stream;
+	unsigned long		_ip_ct_icmp_timeout;
+	unsigned long		_ip_ct_generic_timeout;
+	unsigned int		_ip_ct_log_invalid;
+	unsigned long		_ip_ct_tcp_timeout_max_retrans;
+	int			_ip_ct_tcp_loose;
+	int			_ip_ct_tcp_be_liberal;
+	int			_ip_ct_tcp_max_retrans;
+	struct ctl_table_header *_ip_ct_sysctl_header;
+	ctl_table		*_ip_ct_net_table;
+	ctl_table		*_ip_ct_ipv4_table;
+	ctl_table		*_ip_ct_netfilter_table;
+	ctl_table		*_ip_ct_sysctl_table;
+#endif /*CONFIG_SYSCTL*/
+
+	struct ip_nat_protocol	**_ip_nat_protos;
+	ip_nat_helper_func	_ip_nat_ftp_hook;
+	ip_nat_helper_func	_ip_nat_irc_hook;
+	struct list_head	*_ip_nat_bysource;
+	struct xt_table		*_ip_nat_table;
+
+	/* resource accounting */
+	struct user_beancounter *ub;
+};
+#endif
+#endif
+
+#define UIDHASH_BITS_VE		6
+#define UIDHASH_SZ_VE		(1 << UIDHASH_BITS_VE)
+
+struct ve_cpu_stats {
+	cycles_t	idle_time;
+	cycles_t	iowait_time;
+	cycles_t	strt_idle_time;
+	cycles_t	used_time;
+	seqcount_t	stat_lock;
+	int		nr_running;
+	int		nr_unint;
+	int		nr_iowait;
+	cputime64_t	user;
+	cputime64_t	nice;
+	cputime64_t	system;
+} ____cacheline_aligned;
+
+struct ve_struct {
+	struct ve_struct	*prev;
+	struct ve_struct	*next;
+
+	envid_t			veid;
+	struct task_struct	*init_entry;
+	struct list_head	vetask_lh;
+	kernel_cap_t		cap_default;
+	atomic_t		pcounter;
+	/* ref counter to ve from ipc */
+	atomic_t		counter;	
+	unsigned int		class_id;
+	struct veip_struct	*veip;
+	struct rw_semaphore	op_sem;
+	int			is_running;
+	int			is_locked;
+	int			virt_pids;
+	/* see vzcalluser.h for VE_FEATURE_XXX definitions */
+	__u64			features;
+
+/* VE's root */
+	struct vfsmount 	*fs_rootmnt;
+	struct dentry 		*fs_root;
+
+/* sysctl */
+	struct new_utsname	*utsname;
+	struct list_head	sysctl_lh;
+	struct ctl_table_header	*kern_header;
+	struct ctl_table	*kern_table;
+	struct ctl_table_header	*quota_header;
+	struct ctl_table	*quota_table;
+	struct file_system_type *proc_fstype;
+	struct vfsmount		*proc_mnt;
+	struct proc_dir_entry	*proc_root;
+	struct proc_dir_entry	*proc_sys_root;
+	struct proc_dir_entry	*_proc_net;
+	struct proc_dir_entry	*_proc_net_stat;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct proc_dir_entry	*_proc_net_devsnmp6;
+#endif
+
+/* SYSV IPC */
+	struct ipc_ids		*_shm_ids;
+	struct ipc_ids		*_msg_ids;
+	struct ipc_ids		*_sem_ids;
+	int			_used_sems;
+	int			_shm_tot;
+	size_t			_shm_ctlmax;
+	size_t			_shm_ctlall;
+	int			_shm_ctlmni;
+	int			_msg_ctlmax;
+	int			_msg_ctlmni;
+	int			_msg_ctlmnb;
+	int			_sem_ctls[4];
+
+/* BSD pty's */
+	struct tty_driver       *pty_driver;
+	struct tty_driver       *pty_slave_driver;
+
+#ifdef CONFIG_UNIX98_PTYS
+	struct tty_driver	*ptm_driver;
+	struct tty_driver	*pts_driver;
+	struct idr		*allocated_ptys;
+	struct file_system_type *devpts_fstype;
+	struct vfsmount		*devpts_mnt;
+	struct dentry		*devpts_root;
+	struct devpts_config	*devpts_config;
+#endif
+
+	struct file_system_type *shmem_fstype;
+	struct vfsmount		*shmem_mnt;
+#ifdef CONFIG_SYSFS
+	struct file_system_type *sysfs_fstype;
+	struct vfsmount		*sysfs_mnt;
+	struct super_block	*sysfs_sb;
+	struct sysfs_dirent	*sysfs_root;
+#endif
+	struct subsystem	*class_subsys;
+	struct subsystem	*class_obj_subsys;
+	struct class		*net_class;
+
+/* User uids hash */
+	struct list_head	uidhash_table[UIDHASH_SZ_VE];
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	struct hlist_head	_net_dev_head;
+	struct hlist_head	_net_dev_index_head;
+	struct net_device	*_net_dev_base, **_net_dev_tail;
+	int			ifindex;
+	struct net_device	*_loopback_dev;
+	struct net_device	*_venet_dev;
+	struct ipv4_devconf	*_ipv4_devconf;
+	struct ipv4_devconf	*_ipv4_devconf_dflt;
+	struct ctl_table_header	*forward_header;
+	struct ctl_table	*forward_table;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct ipv6_devconf	*_ipv6_devconf;
+	struct ipv6_devconf	*_ipv6_devconf_dflt;
+#endif
+#endif
+ 	unsigned long		rt_flush_required;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct neigh_table	*ve_nd_tbl;
+#endif
+	struct neigh_table	*ve_arp_tbl;
+
+/* per VE CPU stats*/
+	struct timespec		start_timespec;
+	u64			start_jiffies;
+	cycles_t 		start_cycles;
+	unsigned long		avenrun[3];	/* loadavg data */
+
+	cycles_t 		cpu_used_ve;
+	struct kstat_lat_pcpu_struct	sched_lat_ve;
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	struct hlist_head	*_fib_info_hash;
+	struct hlist_head	*_fib_info_laddrhash;
+	int			_fib_hash_size;
+	int			_fib_info_cnt;
+
+	struct fib_rule		*_local_rule;
+	struct fib_rule		*_fib_rules;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	/* XXX: why a magic constant? */
+	struct fib_table 	*_fib_tables[256]; /* RT_TABLE_MAX - for now */
+#else
+	struct fib_table	*_main_table;
+	struct fib_table	*_local_table;
+#endif
+	struct icmp_mib		*_icmp_statistics[2];
+	struct ipstats_mib	*_ip_statistics[2];
+	struct tcp_mib		*_tcp_statistics[2];
+	struct udp_mib		*_udp_statistics[2];
+	struct linux_mib	*_net_statistics[2];
+	struct venet_stat       *stat;
+#ifdef CONFIG_VE_IPTABLES
+/* core/netfilter.c virtualization */
+	void			*_nf_hooks;
+	struct xt_table		*_ve_ipt_filter_pf; /* packet_filter struct */
+	struct xt_table		*_ve_ip6t_filter_pf;
+	struct xt_table		*_ipt_mangle_table;
+	struct xt_table		*_ip6t_mangle_table;
+	struct xt_af		*_xt;
+	struct xt_target 	*_ipt_standard_target;
+	struct xt_target 	*_ip6t_standard_target;
+
+	__u64			_iptables_modules;
+	struct ve_ip_conntrack	*_ip_conntrack;
+#endif /* CONFIG_VE_IPTABLES */
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct fib6_table	*_fib6_table;
+	struct ipstats_mib	*_ipv6_statistics[2];
+	struct icmpv6_mib	*_icmpv6_statistics[2];
+	struct udp_mib		*_udp_stats_in6[2];
+#endif
+#endif
+	wait_queue_head_t	*_log_wait;
+	unsigned long		*_log_start;
+	unsigned long		*_log_end;
+	unsigned long		*_logged_chars;
+	char			*log_buf;
+#define VE_DEFAULT_LOG_BUF_LEN	4096
+
+	struct ve_cpu_stats 	ve_cpu_stats[NR_CPUS] ____cacheline_aligned;
+	unsigned long		down_at;
+	struct list_head	cleanup_list;
+ 
+ 	unsigned long		jiffies_fixup;
+ 	unsigned char		disable_net;
+ 	unsigned char		sparse_vpid;
+	struct ve_monitor	*monitor;
+	struct proc_dir_entry	*monitor_proc;
+	unsigned long		meminfo_val;
+};
+
+#define VE_CPU_STATS(ve, cpu) (&((ve)->ve_cpu_stats[(cpu)]))
+
+extern int nr_ve;
+
+#ifdef CONFIG_VE
+
+int get_device_perms_ve(int dev_type, dev_t dev, int access_mode);
+void do_env_cleanup(struct ve_struct *envid);
+void do_update_load_avg_ve(void);
+void do_env_free(struct ve_struct *ptr);
+
+#define ve_utsname (*get_exec_env()->utsname)
+
+static inline struct ve_struct *get_ve(struct ve_struct *ptr)
+{
+	if (ptr != NULL)
+		atomic_inc(&ptr->counter);
+	return ptr;
+}
+
+static inline void put_ve(struct ve_struct *ptr)
+{
+	if (ptr && atomic_dec_and_test(&ptr->counter)) {
+		if (atomic_read(&ptr->pcounter) > 0)
+			BUG();
+		if (ptr->is_running)
+			BUG();
+		do_env_free(ptr);
+	}
+}
+
+#ifdef CONFIG_FAIRSCHED
+#define ve_cpu_online_map(ve, mask) fairsched_cpu_online_map(ve->veid, mask)
+#else
+#define ve_cpu_online_map(ve, mask) do { *(mask) = cpu_online_map; } while (0)
+#endif
+#else	/* CONFIG_VE */
+#define ve_utsname	system_utsname
+#define get_ve(ve)	(NULL)
+#define put_ve(ve)	do { } while (0)
+#endif	/* CONFIG_VE */
+
+#endif /* _LINUX_VE_H */
diff -uprN linux-2.6.16/include/linux/ve_owner.h linux-2.6.16.ovz/include/linux/ve_owner.h
--- linux-2.6.16/include/linux/ve_owner.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/ve_owner.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,32 @@
+/*
+ *  include/linux/ve_owner.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_OWNER_H__
+#define __VE_OWNER_H__
+
+#include <linux/config.h>
+#include <linux/vmalloc.h>
+
+
+#define DCL_VE_OWNER(name, type, member)
+	/* prototype declares static inline functions */
+
+#define DCL_VE_OWNER_PROTO(name, type, member)				\
+type;									\
+static inline struct ve_struct *VE_OWNER_##name(const type *obj)	\
+{									\
+	return obj->member;						\
+}									\
+static inline void SET_VE_OWNER_##name(type *obj, struct ve_struct *ve)	\
+{									\
+	obj->member = ve;						\
+}
+
+#endif /* __VE_OWNER_H__ */
diff -uprN linux-2.6.16/include/linux/ve_proto.h linux-2.6.16.ovz/include/linux/ve_proto.h
--- linux-2.6.16/include/linux/ve_proto.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/ve_proto.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,75 @@
+/*
+ *  include/linux/ve_proto.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_H__
+#define __VE_H__
+
+#ifdef CONFIG_VE
+
+extern struct semaphore ve_call_guard;
+extern rwlock_t ve_call_lock;
+
+#ifdef CONFIG_SYSVIPC
+extern void prepare_ipc(void);
+extern int init_ve_ipc(struct ve_struct *);
+extern void fini_ve_ipc(struct ve_struct *);
+extern void ve_ipc_cleanup(void);
+#endif
+
+#ifdef CONFIG_UNIX98_PTYS
+extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
+extern struct tty_driver *pts_driver;	/* Unix98 pty slaves;  for /dev/ptmx */
+#endif
+
+extern rwlock_t  tty_driver_guard;
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+void ip_fragment_cleanup(struct ve_struct *envid);
+void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
+struct fib_table * fib_hash_init(int id);
+int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr);
+extern int main_loopback_init(struct net_device*);
+int venet_init(void);
+#endif
+
+extern struct ve_struct *ve_list_head;
+extern rwlock_t ve_list_guard;
+extern struct ve_struct *get_ve_by_id(envid_t);
+extern struct ve_struct *__find_ve_by_id(envid_t);
+
+struct env_create_param2;
+extern int real_env_create(envid_t veid, unsigned flags, u32 class_id,
+			   struct env_create_param2 *data, int datalen);
+
+extern int do_setdevperms(envid_t veid, unsigned type,
+		dev_t dev, unsigned mask);
+
+#define VE_HOOK_INIT	0
+#define VE_HOOK_FINI	1
+#define VE_MAX_HOOKS	2
+
+typedef int ve_hookfn(unsigned int hooknum, void *data);
+
+struct ve_hook
+{
+	struct list_head list;
+	ve_hookfn *hook;
+	ve_hookfn *undo;
+	struct module *owner;
+	int hooknum;
+	/* Functions are called in ascending priority. */
+	int priority;
+};
+
+extern int ve_hook_register(struct ve_hook *vh);
+extern void ve_hook_unregister(struct ve_hook *vh);
+
+#endif
+#endif
diff -uprN linux-2.6.16/include/linux/ve_task.h linux-2.6.16.ovz/include/linux/ve_task.h
--- linux-2.6.16/include/linux/ve_task.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/ve_task.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,34 @@
+/*
+ *  include/linux/ve_task.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_TASK_H__
+#define __VE_TASK_H__
+
+#include <linux/seqlock.h>
+
+struct ve_task_info {
+/* virtualization */
+	struct ve_struct *owner_env;
+	struct ve_struct *exec_env;
+	struct list_head vetask_list;
+	struct dentry *glob_proc_dentry;
+/* statistics: scheduling latency */
+	cycles_t sleep_time;
+	cycles_t sched_time;
+	cycles_t sleep_stamp;
+	cycles_t wakeup_stamp;
+	seqcount_t wakeup_lock;
+};
+
+#define VE_TASK_INFO(task)	(&(task)->ve_task_info)
+#define VE_TASK_LIST_2_TASK(lh)	\
+	list_entry(lh, struct task_struct, ve_task_info.vetask_list)
+
+#endif /* __VE_TASK_H__ */
diff -uprN linux-2.6.16/include/linux/venet.h linux-2.6.16.ovz/include/linux/venet.h
--- linux-2.6.16/include/linux/venet.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/venet.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,70 @@
+/*
+ *  include/linux/venet.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VENET_H
+#define _VENET_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/vzcalluser.h>
+
+#define VEIP_HASH_SZ 512
+
+struct ve_struct;
+struct venet_stat;
+struct ip_entry_struct
+{
+	__u32			key[4];
+	int			family;
+	struct ve_struct	*active_env;
+	struct venet_stat	*stat;
+	struct veip_struct	*veip;
+	struct list_head 	ip_hash;
+	struct list_head 	ve_list;
+};
+
+struct veip_struct
+{
+	struct list_head	src_lh;
+	struct list_head	dst_lh;
+	struct list_head	ip_lh;
+	struct list_head	list;
+	envid_t			veid;
+};
+
+/* veip_hash_lock should be taken for write by caller */
+void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip);
+/* veip_hash_lock should be taken for write by caller */
+void ip_entry_unhash(struct ip_entry_struct *entry);
+/* veip_hash_lock should be taken for read by caller */
+struct ip_entry_struct *ip_entry_lookup(u32 addr);
+struct ip_entry_struct *venet_entry_lookup(u32 *addr, int family);
+
+/* veip_hash_lock should be taken for read by caller */
+struct veip_struct *veip_find(envid_t veid);
+/* veip_hash_lock should be taken for write by caller */
+struct veip_struct *veip_findcreate(envid_t veid);
+/* veip_hash_lock should be taken for write by caller */
+void veip_put(struct veip_struct *veip);
+
+int veip_start(struct ve_struct *ve);
+void veip_stop(struct ve_struct *ve);
+int veip_entry_add(struct ve_struct *ve, struct sockaddr *addr);
+int veip_entry_del(envid_t veid, struct sockaddr *addr);
+int venet_change_skb_owner(struct sk_buff *skb);
+
+extern struct list_head ip_entry_hash_table[];
+extern rwlock_t veip_hash_lock;
+
+#ifdef CONFIG_PROC_FS
+int veip_seq_show(struct seq_file *m, void *v);
+#endif
+
+#endif
diff -uprN linux-2.6.16/include/linux/veprintk.h linux-2.6.16.ovz/include/linux/veprintk.h
--- linux-2.6.16/include/linux/veprintk.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/veprintk.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,38 @@
+/*
+ *  include/linux/veprintk.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_PRINTK_H__
+#define __VE_PRINTK_H__
+
+#ifdef CONFIG_VE
+
+#define ve_log_wait		(*(get_exec_env()->_log_wait))
+#define ve_log_start		(*(get_exec_env()->_log_start))
+#define ve_log_end		(*(get_exec_env()->_log_end))
+#define ve_logged_chars		(*(get_exec_env()->_logged_chars))
+#define ve_log_buf		(get_exec_env()->log_buf)
+#define ve_log_buf_len		(ve_is_super(get_exec_env()) ? \
+				log_buf_len : VE_DEFAULT_LOG_BUF_LEN)
+#define VE_LOG_BUF_MASK		(ve_log_buf_len - 1)
+#define VE_LOG_BUF(idx)		(ve_log_buf[(idx) & VE_LOG_BUF_MASK])
+
+#else
+
+#define ve_log_wait		log_wait
+#define ve_log_start		log_start
+#define ve_log_end		log_end
+#define ve_logged_chars		logged_chars
+#define ve_log_buf		log_buf
+#define ve_log_buf_len		log_buf_len
+#define VE_LOG_BUF_MASK		LOG_BUF_MASK
+#define VE_LOG_BUF(idx)		LOG_BUF(idx)
+
+#endif /* CONFIG_VE */
+#endif /* __VE_PRINTK_H__ */
diff -uprN linux-2.6.16/include/linux/virtinfo.h linux-2.6.16.ovz/include/linux/virtinfo.h
--- linux-2.6.16/include/linux/virtinfo.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/virtinfo.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,52 @@
+/*
+ *  include/linux/virtinfo.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __LINUX_VIRTINFO_H
+#define __LINUX_VIRTINFO_H
+
+#include <linux/kernel.h>
+#include <linux/page-flags.h>
+#include <linux/rwsem.h>
+#include <linux/notifier.h>
+
+struct vnotifier_block
+{
+	int (*notifier_call)(struct vnotifier_block *self,
+			unsigned long, void *, int);
+	struct vnotifier_block *next;
+	int priority;
+};
+
+void virtinfo_notifier_register(int type, struct vnotifier_block *nb);
+void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb);
+int virtinfo_notifier_call(int type, unsigned long n, void *data);
+
+struct meminfo {
+	struct sysinfo si;
+	unsigned long active, inactive;
+	unsigned long cache, swapcache;
+	unsigned long committed_space;
+	unsigned long allowed;
+	struct page_state ps;
+	unsigned long vmalloc_total, vmalloc_used, vmalloc_largest;
+};
+
+#define VIRTINFO_MEMINFO	0
+#define VIRTINFO_ENOUGHMEM	1
+
+enum virt_info_types {
+	VITYPE_GENERAL,
+	VITYPE_FAUDIT,
+	VITYPE_QUOTA,
+
+	VIRT_TYPES
+};
+
+#endif /* __LINUX_VIRTINFO_H */
diff -uprN linux-2.6.16/include/linux/vmalloc.h linux-2.6.16.ovz/include/linux/vmalloc.h
--- linux-2.6.16/include/linux/vmalloc.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vmalloc.h	2006-07-05 08:34:56.000000000 -0400
@@ -18,6 +18,10 @@
 #define IOREMAP_MAX_ORDER	(7 + PAGE_SHIFT)	/* 128 pages */
 #endif
 
+/* align size to 2^n page boundary */
+#define POWER2_PAGE_ALIGN(size) \
+	((typeof(size))(1UL << (PAGE_SHIFT + get_order(size))))
+
 struct vm_struct {
 	void			*addr;
 	unsigned long		size;
@@ -32,10 +36,14 @@ struct vm_struct {
  *	Highlevel APIs for driver use
  */
 extern void *vmalloc(unsigned long size);
+extern void *ub_vmalloc(unsigned long size);
 extern void *vmalloc_node(unsigned long size, int node);
+extern void *ub_vmalloc_node(unsigned long size, int node);
 extern void *vmalloc_exec(unsigned long size);
 extern void *vmalloc_32(unsigned long size);
 extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
+extern void *vmalloc_best(unsigned long size);
+extern void *ub_vmalloc_best(unsigned long size);
 extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
 				pgprot_t prot);
 extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
@@ -52,6 +60,9 @@ extern void vunmap(void *addr);
 extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
 extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 					unsigned long start, unsigned long end);
+extern struct vm_struct * get_vm_area_best(unsigned long size,
+					   unsigned long flags);
+extern void vprintstat(void);
 extern struct vm_struct *get_vm_area_node(unsigned long size,
 					unsigned long flags, int node);
 extern struct vm_struct *remove_vm_area(void *addr);
diff -uprN linux-2.6.16/include/linux/vsched.h linux-2.6.16.ovz/include/linux/vsched.h
--- linux-2.6.16/include/linux/vsched.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vsched.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,26 @@
+/*
+ *  include/linux/vsched.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VSCHED_H__
+#define __VSCHED_H__
+
+#include <linux/config.h>
+#include <linux/cache.h>
+#include <linux/fairsched.h>
+#include <linux/sched.h>
+
+extern int vsched_create(int id, struct fairsched_node *node);
+extern int vsched_destroy(struct vcpu_scheduler *vsched);
+
+extern int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched);
+
+extern int vcpu_online(int cpu);
+
+#endif
diff -uprN linux-2.6.16/include/linux/vzcalluser.h linux-2.6.16.ovz/include/linux/vzcalluser.h
--- linux-2.6.16/include/linux/vzcalluser.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vzcalluser.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,228 @@
+/*
+ *  include/linux/vzcalluser.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VZCALLUSER_H
+#define _LINUX_VZCALLUSER_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define KERN_VZ_PRIV_RANGE 51
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+/*
+ * VE management ioctls
+ */
+
+struct vzctl_old_env_create {
+	envid_t veid;
+	unsigned flags;
+#define VE_CREATE 	1	/* Create VE, VE_ENTER added automatically */
+#define VE_EXCLUSIVE	2	/* Fail if exists */
+#define VE_ENTER	4	/* Enter existing VE */
+#define VE_TEST		8	/* Test if VE exists */
+#define VE_LOCK		16	/* Do not allow entering created VE */
+#define VE_SKIPLOCK	32	/* Allow entering embrion VE */
+	__u32 addr;
+};
+
+struct vzctl_mark_env_to_down {
+	envid_t veid;
+};
+
+struct vzctl_setdevperms {
+	envid_t veid;
+	unsigned type;
+#define VE_USE_MAJOR	010	/* Test MAJOR supplied in rule */
+#define VE_USE_MINOR	030	/* Test MINOR supplied in rule */
+#define VE_USE_MASK	030	/* Testing mask, VE_USE_MAJOR|VE_USE_MINOR */
+	unsigned dev;
+	unsigned mask;
+};
+
+struct vzctl_ve_netdev {
+	envid_t veid;
+	int op;
+#define VE_NETDEV_ADD  1
+#define VE_NETDEV_DEL  2
+	char *dev_name;
+};
+
+struct vzctl_ve_meminfo {
+	envid_t veid;
+	unsigned long val;
+};
+
+/* these masks represent modules */
+#define VE_IP_IPTABLES_MOD		(1U<<0)
+#define VE_IP_FILTER_MOD		(1U<<1)
+#define VE_IP_MANGLE_MOD		(1U<<2)
+#define VE_IP_MATCH_LIMIT_MOD		(1U<<3)
+#define VE_IP_MATCH_MULTIPORT_MOD	(1U<<4)
+#define VE_IP_MATCH_TOS_MOD		(1U<<5)
+#define VE_IP_TARGET_TOS_MOD		(1U<<6)
+#define VE_IP_TARGET_REJECT_MOD		(1U<<7)
+#define VE_IP_TARGET_TCPMSS_MOD		(1U<<8)
+#define VE_IP_MATCH_TCPMSS_MOD		(1U<<9)
+#define VE_IP_MATCH_TTL_MOD		(1U<<10)
+#define VE_IP_TARGET_LOG_MOD		(1U<<11)
+#define VE_IP_MATCH_LENGTH_MOD		(1U<<12)
+#define VE_IP_CONNTRACK_MOD		(1U<<14)
+#define VE_IP_CONNTRACK_FTP_MOD		(1U<<15)
+#define VE_IP_CONNTRACK_IRC_MOD		(1U<<16)
+#define VE_IP_MATCH_CONNTRACK_MOD	(1U<<17)
+#define VE_IP_MATCH_STATE_MOD		(1U<<18)
+#define VE_IP_MATCH_HELPER_MOD		(1U<<19)
+#define VE_IP_NAT_MOD			(1U<<20)
+#define VE_IP_NAT_FTP_MOD		(1U<<21)
+#define VE_IP_NAT_IRC_MOD		(1U<<22)
+#define VE_IP_TARGET_REDIRECT_MOD	(1U<<23)
+
+/* these masks represent modules with their dependences */
+#define VE_IP_IPTABLES		(VE_IP_IPTABLES_MOD)
+#define VE_IP_FILTER		(VE_IP_FILTER_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MANGLE		(VE_IP_MANGLE_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_LIMIT	(VE_IP_MATCH_LIMIT_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_MULTIPORT	(VE_IP_MATCH_MULTIPORT_MOD	\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_TOS		(VE_IP_MATCH_TOS_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_TARGET_TOS	(VE_IP_TARGET_TOS_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_TARGET_REJECT	(VE_IP_TARGET_REJECT_MOD	\
+					| VE_IP_IPTABLES)
+#define VE_IP_TARGET_TCPMSS	(VE_IP_TARGET_TCPMSS_MOD	\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_TCPMSS	(VE_IP_MATCH_TCPMSS_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_TTL		(VE_IP_MATCH_TTL_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_TARGET_LOG	(VE_IP_TARGET_LOG_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_LENGTH	(VE_IP_MATCH_LENGTH_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_CONNTRACK		(VE_IP_CONNTRACK_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_CONNTRACK_FTP	(VE_IP_CONNTRACK_FTP_MOD	\
+					| VE_IP_CONNTRACK)
+#define VE_IP_CONNTRACK_IRC	(VE_IP_CONNTRACK_IRC_MOD	\
+					| VE_IP_CONNTRACK)
+#define VE_IP_MATCH_CONNTRACK	(VE_IP_MATCH_CONNTRACK_MOD	\
+					| VE_IP_CONNTRACK)
+#define VE_IP_MATCH_STATE	(VE_IP_MATCH_STATE_MOD		\
+					| VE_IP_CONNTRACK)
+#define VE_IP_MATCH_HELPER	(VE_IP_MATCH_HELPER_MOD		\
+					| VE_IP_CONNTRACK)
+#define VE_IP_NAT		(VE_IP_NAT_MOD			\
+					| VE_IP_CONNTRACK)
+#define VE_IP_NAT_FTP		(VE_IP_NAT_FTP_MOD		\
+					| VE_IP_NAT | VE_IP_CONNTRACK_FTP)
+#define VE_IP_NAT_IRC		(VE_IP_NAT_IRC_MOD		\
+					| VE_IP_NAT | VE_IP_CONNTRACK_IRC)
+#define VE_IP_TARGET_REDIRECT	(VE_IP_TARGET_REDIRECT_MOD	\
+					| VE_IP_NAT)
+
+/* safe iptables mask to be used by default */
+#define VE_IP_DEFAULT					\
+	(VE_IP_IPTABLES |				\
+	VE_IP_FILTER | VE_IP_MANGLE |			\
+	VE_IP_MATCH_LIMIT | VE_IP_MATCH_MULTIPORT |	\
+	VE_IP_MATCH_TOS | VE_IP_TARGET_REJECT | 	\
+	VE_IP_TARGET_TCPMSS | VE_IP_MATCH_TCPMSS |	\
+	VE_IP_MATCH_TTL | VE_IP_MATCH_LENGTH)
+
+#define VE_IPT_CMP(x,y)		(((x) & (y)) == (y))
+
+struct vzctl_env_create_cid {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+};
+
+struct vzctl_env_create {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+};
+
+struct env_create_param {
+	__u64 iptables_mask;
+};
+
+#define VZCTL_ENV_CREATE_DATA_MINLEN	sizeof(struct env_create_param)
+
+struct env_create_param2 {
+	__u64 iptables_mask;
+	__u64 feature_mask;
+#define VE_FEATURE_SYSFS	(1ULL << 0)
+	__u32 total_vcpus;	/* 0 - don't care, same as in host */
+};
+#define VZCTL_ENV_CREATE_DATA_MAXLEN	sizeof(struct env_create_param2)
+
+typedef struct env_create_param2 env_create_param_t;
+
+struct vzctl_env_create_data {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+	env_create_param_t *data;
+	int datalen;
+};
+
+struct vz_load_avg {
+	int val_int;
+	int val_frac;
+};
+
+struct vz_cpu_stat {
+	unsigned long user_jif;
+	unsigned long nice_jif;
+	unsigned long system_jif; 
+	unsigned long uptime_jif;
+	__u64 idle_clk;
+	__u64 strv_clk;
+	__u64 uptime_clk;
+	struct vz_load_avg avenrun[3];	/* loadavg data */
+};
+
+struct vzctl_cpustatctl {
+	envid_t veid;
+	struct vz_cpu_stat *cpustat;
+};
+
+#define VZCTLTYPE '.'
+#define VZCTL_OLD_ENV_CREATE	_IOW(VZCTLTYPE, 0,			\
+					struct vzctl_old_env_create)
+#define VZCTL_MARK_ENV_TO_DOWN	_IOW(VZCTLTYPE, 1,			\
+					struct vzctl_mark_env_to_down)
+#define VZCTL_SETDEVPERMS	_IOW(VZCTLTYPE, 2,			\
+					struct vzctl_setdevperms)
+#define VZCTL_ENV_CREATE_CID	_IOW(VZCTLTYPE, 4,			\
+					struct vzctl_env_create_cid)
+#define VZCTL_ENV_CREATE	_IOW(VZCTLTYPE, 5,			\
+					struct vzctl_env_create)
+#define VZCTL_GET_CPU_STAT	_IOW(VZCTLTYPE, 6,			\
+					struct vzctl_cpustatctl)
+#define VZCTL_ENV_CREATE_DATA	_IOW(VZCTLTYPE, 10,			\
+					struct vzctl_env_create_data)
+#define VZCTL_VE_NETDEV		_IOW(VZCTLTYPE, 11,			\
+					struct vzctl_ve_netdev)
+#define VZCTL_VE_MEMINFO	_IOW(VZCTLTYPE, 13,                     \
+					struct vzctl_ve_meminfo)
+
+
+#endif
diff -uprN linux-2.6.16/include/linux/vzctl.h linux-2.6.16.ovz/include/linux/vzctl.h
--- linux-2.6.16/include/linux/vzctl.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vzctl.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,30 @@
+/*
+ *  include/linux/vzctl.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VZCTL_H
+#define _LINUX_VZCTL_H
+
+#include <linux/list.h>
+
+struct module;
+struct inode;
+struct file;
+struct vzioctlinfo {
+	unsigned type;
+	int (*func)(struct inode *, struct file *,
+			unsigned int, unsigned long);
+	struct module *owner;
+	struct list_head list;
+};
+
+extern void vzioctl_register(struct vzioctlinfo *inf);
+extern void vzioctl_unregister(struct vzioctlinfo *inf);
+
+#endif
diff -uprN linux-2.6.16/include/linux/vzctl_quota.h linux-2.6.16.ovz/include/linux/vzctl_quota.h
--- linux-2.6.16/include/linux/vzctl_quota.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vzctl_quota.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,43 @@
+/*
+ *  include/linux/vzctl_quota.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __LINUX_VZCTL_QUOTA_H__
+#define __LINUX_VZCTL_QUOTA_H__
+
+/*
+ * Quota management ioctl
+ */
+
+struct vz_quota_stat;
+struct vzctl_quotactl {
+	int cmd;
+	unsigned int quota_id;
+	struct vz_quota_stat *qstat;
+	char *ve_root;
+};
+
+struct vzctl_quotaugidctl {
+	int cmd;		/* subcommand */
+	unsigned int quota_id;	/* quota id where it applies to */
+	unsigned int ugid_index;/* for reading statistic. index of first
+				    uid/gid record to read */
+	unsigned int ugid_size;	/* size of ugid_buf array */
+	void *addr; 		/* user-level buffer */
+};
+
+#define VZDQCTLTYPE '+'
+#define VZCTL_QUOTA_CTL		_IOWR(VZDQCTLTYPE, 1,			\
+					struct vzctl_quotactl)
+#define VZCTL_QUOTA_NEW_CTL	_IOWR(VZDQCTLTYPE, 2,			\
+					struct vzctl_quotactl)
+#define VZCTL_QUOTA_UGID_CTL	_IOWR(VZDQCTLTYPE, 3,			\
+					struct vzctl_quotaugidctl)
+
+#endif /* __LINUX_VZCTL_QUOTA_H__ */
diff -uprN linux-2.6.16/include/linux/vzctl_venet.h linux-2.6.16.ovz/include/linux/vzctl_venet.h
--- linux-2.6.16/include/linux/vzctl_venet.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vzctl_venet.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,36 @@
+/*
+ *  include/linux/vzctl_venet.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZCTL_VENET_H
+#define _VZCTL_VENET_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+struct vzctl_ve_ip_map {
+	envid_t veid;
+	int op;
+#define VE_IP_ADD	1
+#define VE_IP_DEL	2
+	struct sockaddr *addr;
+	int addrlen;
+};
+
+#define VENETCTLTYPE '('
+
+#define VENETCTL_VE_IP_MAP	_IOW(VENETCTLTYPE, 3,			\
+					struct vzctl_ve_ip_map)
+
+#endif
diff -uprN linux-2.6.16/include/linux/vzctl_veth.h linux-2.6.16.ovz/include/linux/vzctl_veth.h
--- linux-2.6.16/include/linux/vzctl_veth.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vzctl_veth.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,40 @@
+/*
+ *  include/linux/vzctl_veth.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZCTL_VETH_H
+#define _VZCTL_VETH_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+struct vzctl_ve_hwaddr {
+	envid_t veid;
+	int op;
+#define VE_ETH_ADD	1
+#define VE_ETH_DEL	2
+	unsigned char	dev_addr[6];
+	int addrlen;
+	char		dev_name[16];
+	unsigned char	dev_addr_ve[6];
+	int addrlen_ve;
+	char		dev_name_ve[16];
+};
+
+#define VETHCTLTYPE '['
+
+#define VETHCTL_VE_HWADDR	_IOW(VETHCTLTYPE, 3,			\
+					struct vzctl_ve_hwaddr)
+
+#endif
diff -uprN linux-2.6.16/include/linux/vzdq_tree.h linux-2.6.16.ovz/include/linux/vzdq_tree.h
--- linux-2.6.16/include/linux/vzdq_tree.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vzdq_tree.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,99 @@
+/*
+ *
+ * Copyright (C) 2005 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo disk quota tree definition
+ */
+
+#ifndef _VZDQ_TREE_H
+#define _VZDQ_TREE_H
+
+#include <linux/list.h>
+#include <asm/string.h>
+
+typedef unsigned int quotaid_t;
+#define QUOTAID_BITS		32
+#define QUOTAID_BBITS		4
+#define QUOTAID_EBITS		8
+
+#if QUOTAID_EBITS % QUOTAID_BBITS
+#error Quota bit assumption failure
+#endif
+
+#define QUOTATREE_BSIZE		(1 << QUOTAID_BBITS)
+#define QUOTATREE_BMASK		(QUOTATREE_BSIZE - 1)
+#define QUOTATREE_DEPTH		((QUOTAID_BITS + QUOTAID_BBITS - 1) \
+							/ QUOTAID_BBITS)
+#define QUOTATREE_EDEPTH	((QUOTAID_BITS + QUOTAID_EBITS - 1) \
+							/ QUOTAID_EBITS)
+#define QUOTATREE_BSHIFT(lvl)	((QUOTATREE_DEPTH - (lvl) - 1) * QUOTAID_BBITS)
+
+/*
+ * Depth of keeping unused node (not inclusive).
+ * 0 means release all nodes including root,
+ * QUOTATREE_DEPTH means never release nodes.
+ * Current value: release all nodes strictly after QUOTATREE_EDEPTH 
+ * (measured in external shift units).
+ */
+#define QUOTATREE_CDEPTH	(QUOTATREE_DEPTH \
+				- 2 * QUOTATREE_DEPTH / QUOTATREE_EDEPTH \
+				+ 1)
+
+/*
+ * Levels 0..(QUOTATREE_DEPTH-1) are tree nodes.
+ * On level i the maximal number of nodes is 2^(i*QUOTAID_BBITS),
+ * and each node contains 2^QUOTAID_BBITS pointers.
+ * Level 0 is a (single) tree root node.
+ *
+ * Nodes of level (QUOTATREE_DEPTH-1) contain pointers to caller's data.
+ * Nodes of lower levels contain pointers to nodes.
+ *
+ * Double pointer in array of i-level node, pointing to a (i+1)-level node
+ * (such as inside quotatree_find_state) are marked by level (i+1), not i.
+ * Level 0 double pointer is a pointer to root inside tree struct.
+ *
+ * The tree is permanent, i.e. all index blocks allocated are keeped alive to
+ * preserve the blocks numbers in the quota file tree to keep its changes
+ * locally.
+ */
+struct quotatree_node {
+	struct list_head list;
+	quotaid_t num;
+	void *blocks[QUOTATREE_BSIZE];
+};
+
+struct quotatree_level {
+	struct list_head usedlh, freelh;
+	quotaid_t freenum;
+};
+
+struct quotatree_tree {
+	struct quotatree_level levels[QUOTATREE_DEPTH];
+	struct quotatree_node *root;
+	unsigned int leaf_num;
+};
+
+struct quotatree_find_state {
+	void **block;
+	int level;
+};
+
+/* number of leafs (objects) and leaf level of the tree */
+#define QTREE_LEAFNUM(tree)	((tree)->leaf_num)
+#define QTREE_LEAFLVL(tree)	(&(tree)->levels[QUOTATREE_DEPTH - 1])
+
+struct quotatree_tree *quotatree_alloc(void);
+void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st);
+int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st, void *data);
+void quotatree_remove(struct quotatree_tree *tree, quotaid_t id);
+void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *));
+void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id);
+void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index);
+
+#endif /* _VZDQ_TREE_H */
+
diff -uprN linux-2.6.16/include/linux/vzquota.h linux-2.6.16.ovz/include/linux/vzquota.h
--- linux-2.6.16/include/linux/vzquota.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vzquota.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,291 @@
+/*
+ *
+ * Copyright (C) 2001-2005 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo disk quota implementation
+ */
+
+#ifndef _VZDQUOTA_H
+#define _VZDQUOTA_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/* vzquotactl syscall commands */
+#define VZ_DQ_CREATE		5 /* create quota master block */
+#define VZ_DQ_DESTROY		6 /* destroy qmblk */
+#define VZ_DQ_ON		7 /* mark dentry with already created qmblk */
+#define VZ_DQ_OFF		8 /* remove mark, don't destroy qmblk */
+#define VZ_DQ_SETLIMIT		9 /* set new limits */
+#define VZ_DQ_GETSTAT		10 /* get usage statistic */
+/* set of syscalls to maintain UGID quotas */
+#define VZ_DQ_UGID_GETSTAT	1 /* get usage/limits for ugid(s) */
+#define VZ_DQ_UGID_ADDSTAT	2 /* set usage/limits statistic for ugid(s) */
+#define VZ_DQ_UGID_GETGRACE	3 /* get expire times */
+#define VZ_DQ_UGID_SETGRACE	4 /* set expire times */
+#define VZ_DQ_UGID_GETCONFIG	5 /* get ugid_max limit, cnt, flags of qmblk */
+#define VZ_DQ_UGID_SETCONFIG	6 /* set ugid_max limit, flags of qmblk */
+#define VZ_DQ_UGID_SETLIMIT	7 /* set ugid B/I limits */
+#define VZ_DQ_UGID_SETINFO	8 /* set ugid info */
+
+/* common structure for vz and ugid quota */
+struct dq_stat {
+	/* blocks limits */
+	__u64	bhardlimit;	/* absolute limit in bytes */
+	__u64	bsoftlimit;	/* preferred limit in bytes */
+	time_t	btime;		/* time limit for excessive disk use */
+	__u64	bcurrent;	/* current bytes count */
+	/* inodes limits */
+	__u32	ihardlimit;	/* absolute limit on allocated inodes */
+	__u32	isoftlimit;	/* preferred inode limit */
+	time_t	itime;		/* time limit for excessive inode use */
+	__u32	icurrent;	/* current # allocated inodes */
+};
+
+/* One second resolution for grace times */
+#define CURRENT_TIME_SECONDS	(get_seconds())
+
+/* Values for dq_info->flags */
+#define VZ_QUOTA_INODES 0x01       /* inodes limit warning printed */
+#define VZ_QUOTA_SPACE  0x02       /* space limit warning printed */
+
+struct dq_info {
+	time_t		bexpire;   /* expire timeout for excessive disk use */
+	time_t		iexpire;   /* expire timeout for excessive inode use */
+	unsigned	flags;	   /* see previos defines */
+};
+
+struct vz_quota_stat  {
+	struct dq_stat dq_stat;
+	struct dq_info dq_info;
+};
+
+/* UID/GID interface record - for user-kernel level exchange */
+struct vz_quota_iface {
+	unsigned int	qi_id;	   /* UID/GID this applies to */
+	unsigned int	qi_type;   /* USRQUOTA|GRPQUOTA */
+	struct dq_stat	qi_stat;   /* limits, options, usage stats */
+};
+
+/* values for flags and dq_flags */
+/* this flag is set if the userspace has been unable to provide usage
+ * information about all ugids
+ * if the flag is set, we don't allocate new UG quota blocks (their
+ * current usage is unknown) or free existing UG quota blocks (not to
+ * lose information that this block is ok) */
+#define VZDQUG_FIXED_SET	0x01
+/* permit to use ugid quota */
+#define VZDQUG_ON		0x02
+#define VZDQ_USRQUOTA		0x10
+#define VZDQ_GRPQUOTA		0x20
+#define VZDQ_NOACT		0x1000	/* not actual */
+#define VZDQ_NOQUOT		0x2000	/* not under quota tree */
+
+struct vz_quota_ugid_stat {
+	unsigned int	limit;	/* max amount of ugid records */
+	unsigned int	count;	/* amount of ugid records */
+	unsigned int	flags;	
+};
+
+struct vz_quota_ugid_setlimit {
+	unsigned int	type;	/* quota type (USR/GRP) */
+	unsigned int	id;	/* ugid */
+	struct if_dqblk dqb;	/* limits info */
+};
+
+struct vz_quota_ugid_setinfo {
+	unsigned int	type;	/* quota type (USR/GRP) */
+	struct if_dqinfo dqi;	/* grace info */
+};
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+#include <linux/time.h>
+#include <linux/vzquota_qlnk.h>
+#include <linux/vzdq_tree.h>
+
+/* Values for dq_info flags */
+#define VZ_QUOTA_INODES	0x01	   /* inodes limit warning printed */
+#define VZ_QUOTA_SPACE	0x02	   /* space limit warning printed */
+
+/* values for dq_state */
+#define VZDQ_STARTING		0 /* created, not turned on yet */
+#define VZDQ_WORKING		1 /* quota created, turned on */
+#define VZDQ_STOPING		2 /* created, turned on and off */
+
+/* master quota record - one per veid */
+struct vz_quota_master {
+	struct list_head	dq_hash;	/* next quota in hash list */
+	atomic_t		dq_count;	/* inode reference count */
+	unsigned int		dq_flags;	/* see VZDQUG_FIXED_SET */
+	unsigned int		dq_state;	/* see values above */
+	unsigned int		dq_id;		/* VEID this applies to */
+	struct dq_stat		dq_stat; 	/* limits, grace, usage stats */
+	struct dq_info		dq_info;	/* grace times and flags */
+	spinlock_t		dq_data_lock;	/* for dq_stat */
+
+	struct semaphore	dq_sem;		/* semaphore to protect 
+						   ugid tree */
+
+	struct list_head	dq_ilink_list;	/* list of vz_quota_ilink */
+	struct quotatree_tree	*dq_uid_tree;	/* vz_quota_ugid tree for UIDs */
+	struct quotatree_tree	*dq_gid_tree;	/* vz_quota_ugid tree for GIDs */
+	unsigned int		dq_ugid_count;	/* amount of ugid records */
+	unsigned int		dq_ugid_max;	/* max amount of ugid records */
+	struct dq_info		dq_ugid_info[MAXQUOTAS]; /* ugid grace times */
+
+	struct dentry		*dq_root_dentry;/* dentry of fs tree */
+	struct vfsmount		*dq_root_mnt;	/* vfsmnt of this dentry */
+	struct super_block	*dq_sb;	      /* superblock of our quota root */
+};
+
+/* UID/GID quota record - one per pair (quota_master, uid or gid) */
+struct vz_quota_ugid {
+	unsigned int		qugid_id;     /* UID/GID this applies to */
+	struct dq_stat		qugid_stat;   /* limits, options, usage stats */
+	int			qugid_type;   /* USRQUOTA|GRPQUOTA */
+	atomic_t		qugid_count;  /* reference count */
+};
+
+#define VZ_QUOTA_UGBAD		((struct vz_quota_ugid *)0xfeafea11)
+
+struct vz_quota_datast {
+	struct vz_quota_ilink qlnk;
+};
+
+#define VIRTINFO_QUOTA_GETSTAT	0
+#define VIRTINFO_QUOTA_ON	1
+#define VIRTINFO_QUOTA_OFF	2
+
+struct virt_info_quota {
+	struct super_block *super;
+	struct dq_stat *qstat;
+};
+
+/*
+ * Interface to VZ quota core
+ */
+#define INODE_QLNK(inode)	(&(inode)->i_qlnk)
+#define QLNK_INODE(qlnk)	container_of((qlnk), struct inode, i_qlnk)
+
+#define VZ_QUOTA_BAD		((struct vz_quota_master *)0xefefefef)
+
+#define VZ_QUOTAO_SETE		1
+#define VZ_QUOTAO_INIT		2
+#define VZ_QUOTAO_DESTR		3
+#define VZ_QUOTAO_SWAP		4
+#define VZ_QUOTAO_INICAL	5
+#define VZ_QUOTAO_DRCAL		6
+#define VZ_QUOTAO_QSET		7
+#define VZ_QUOTAO_TRANS		8
+#define VZ_QUOTAO_ACT		9
+#define VZ_QUOTAO_DTREE		10
+#define VZ_QUOTAO_DET		11
+#define VZ_QUOTAO_ON		12
+
+extern struct semaphore vz_quota_sem;
+void inode_qmblk_lock(struct super_block *sb);
+void inode_qmblk_unlock(struct super_block *sb);
+void qmblk_data_read_lock(struct vz_quota_master *qmblk);
+void qmblk_data_read_unlock(struct vz_quota_master *qmblk);
+void qmblk_data_write_lock(struct vz_quota_master *qmblk);
+void qmblk_data_write_unlock(struct vz_quota_master *qmblk);
+
+/* for quota operations */
+void vzquota_inode_init_call(struct inode *inode);
+void vzquota_inode_drop_call(struct inode *inode);
+int vzquota_inode_transfer_call(struct inode *, struct iattr *);
+struct vz_quota_master *vzquota_inode_data(struct inode *inode,
+		struct vz_quota_datast *);
+void vzquota_data_unlock(struct inode *inode, struct vz_quota_datast *);
+int vzquota_rename_check(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir);
+struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode);
+/* for second-level quota */
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
+/* for management operations */
+struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
+		struct vz_quota_stat *qstat);
+void vzquota_free_master(struct vz_quota_master *);
+struct vz_quota_master *vzquota_find_master(unsigned int quota_id);
+int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
+		struct vz_quota_master *qmblk);
+int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk);
+int vzquota_get_super(struct super_block *sb);
+void vzquota_put_super(struct super_block *sb);
+
+static inline struct vz_quota_master *qmblk_get(struct vz_quota_master *qmblk)
+{
+	if (!atomic_read(&qmblk->dq_count))
+		BUG();
+	atomic_inc(&qmblk->dq_count);
+	return qmblk;
+}
+
+static inline void __qmblk_put(struct vz_quota_master *qmblk)
+{
+	atomic_dec(&qmblk->dq_count);
+}
+
+static inline void qmblk_put(struct vz_quota_master *qmblk)
+{
+	if (!atomic_dec_and_test(&qmblk->dq_count))
+		return;
+	vzquota_free_master(qmblk);
+}
+
+extern struct list_head vzquota_hash_table[];
+extern int vzquota_hash_size;
+
+/*
+ * Interface to VZ UGID quota
+ */
+extern struct quotactl_ops vz_quotactl_operations;
+extern struct dquot_operations vz_quota_operations2;
+extern struct quota_format_type vz_quota_empty_v2_format;
+
+#define QUGID_TREE(qmblk, type)	(((type) == USRQUOTA) ?		\
+					qmblk->dq_uid_tree :	\
+					qmblk->dq_gid_tree)
+
+#define VZDQUG_FIND_DONT_ALLOC	1
+#define VZDQUG_FIND_FAKE	2
+struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
+		unsigned int quota_id, int type, int flags);
+struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
+		unsigned int quota_id, int type, int flags);
+struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid);
+void vzquota_put_ugid(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid);
+void vzquota_kill_ugid(struct vz_quota_master *qmblk);
+int vzquota_ugid_init(void);
+void vzquota_ugid_release(void);
+int vzquota_transfer_usage(struct inode *inode, int mask,
+		struct vz_quota_ilink *qlnk);
+
+struct vzctl_quotaugidctl;
+long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub);
+
+/*
+ * Other VZ quota parts
+ */
+extern struct dquot_operations vz_quota_operations;
+
+long do_vzquotactl(int cmd, unsigned int quota_id,
+			  struct vz_quota_stat *qstat, const char *ve_root);
+int vzquota_proc_init(void);
+void vzquota_proc_release(void);
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
+extern struct semaphore vz_quota_sem;
+
+void vzaquota_init(void);
+void vzaquota_fini(void);
+
+#endif /* __KERNEL__ */
+
+#endif /* _VZDQUOTA_H */
diff -uprN linux-2.6.16/include/linux/vzquota_qlnk.h linux-2.6.16.ovz/include/linux/vzquota_qlnk.h
--- linux-2.6.16/include/linux/vzquota_qlnk.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vzquota_qlnk.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,25 @@
+/*
+ *  include/linux/vzquota_qlnk.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZDQUOTA_QLNK_H
+#define _VZDQUOTA_QLNK_H
+
+struct vz_quota_master;
+struct vz_quota_ugid;
+
+/* inode link, used to track inodes using quota via dq_ilink_list */
+struct vz_quota_ilink {
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid *qugid[MAXQUOTAS];
+	struct list_head list;
+	unsigned char origin;
+};
+
+#endif /* _VZDQUOTA_QLNK_H */
diff -uprN linux-2.6.16/include/linux/vzratelimit.h linux-2.6.16.ovz/include/linux/vzratelimit.h
--- linux-2.6.16/include/linux/vzratelimit.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vzratelimit.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,28 @@
+/*
+ *  include/linux/vzratelimit.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VZ_RATELIMIT_H__
+#define __VZ_RATELIMIT_H__
+
+/*
+ * Generic ratelimiting stuff.
+ */
+
+struct vz_rate_info {
+	int burst;
+	int interval; /* jiffy_t per event */
+	int bucket; /* kind of leaky bucket */
+	unsigned long last; /* last event */
+};
+
+/* Return true if rate limit permits. */
+int vz_ratelimit(struct vz_rate_info *p);
+
+#endif /* __VZ_RATELIMIT_H__ */
diff -uprN linux-2.6.16/include/linux/vzstat.h linux-2.6.16.ovz/include/linux/vzstat.h
--- linux-2.6.16/include/linux/vzstat.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/linux/vzstat.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,182 @@
+/*
+ *  include/linux/vzstat.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VZSTAT_H__
+#define __VZSTAT_H__
+
+struct swap_cache_info_struct {
+	unsigned long add_total;
+	unsigned long del_total;
+	unsigned long find_success;
+	unsigned long find_total;
+	unsigned long noent_race;
+	unsigned long exist_race;
+	unsigned long remove_race;
+};
+
+struct kstat_lat_snap_struct {
+	cycles_t maxlat, totlat;
+	unsigned long count;
+};
+struct kstat_lat_pcpu_snap_struct {
+	cycles_t maxlat, totlat;
+	unsigned long count;
+	seqcount_t lock;
+} ____cacheline_aligned_in_smp;
+
+struct kstat_lat_struct {
+	struct kstat_lat_snap_struct cur, last;
+	cycles_t avg[3];
+};
+struct kstat_lat_pcpu_struct {
+	struct kstat_lat_pcpu_snap_struct cur[NR_CPUS];
+	cycles_t max_snap;
+	struct kstat_lat_snap_struct last;
+	cycles_t avg[3];
+};
+
+struct kstat_perf_snap_struct {
+	cycles_t wall_tottime, cpu_tottime;
+	cycles_t wall_maxdur, cpu_maxdur;
+	unsigned long count;
+};
+struct kstat_perf_struct {
+	struct kstat_perf_snap_struct cur, last;
+};
+
+struct kstat_zone_avg {
+	unsigned long		free_pages_avg[3],
+				nr_active_avg[3],
+				nr_inactive_avg[3];
+};
+
+#define KSTAT_ALLOCSTAT_NR 5
+
+struct kernel_stat_glob {
+	unsigned long nr_unint_avg[3];
+
+	unsigned long alloc_fails[KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_pcpu_struct sched_lat;
+	struct kstat_lat_struct swap_in;
+
+	struct kstat_perf_struct ttfp, cache_reap,
+			refill_inact, shrink_icache, shrink_dcache;
+
+	struct kstat_zone_avg zone_avg[3];	/* MAX_NR_ZONES */
+} ____cacheline_aligned;
+
+extern struct kernel_stat_glob kstat_glob ____cacheline_aligned;
+extern spinlock_t kstat_glb_lock;
+
+#ifdef CONFIG_VE
+#define KSTAT_PERF_ENTER(name)				\
+	unsigned long flags;				\
+	cycles_t start, sleep_time;			\
+							\
+	start = get_cycles();				\
+	sleep_time = VE_TASK_INFO(current)->sleep_time;	\
+
+#define KSTAT_PERF_LEAVE(name)				\
+	spin_lock_irqsave(&kstat_glb_lock, flags);	\
+	kstat_glob.name.cur.count++;			\
+	start = get_cycles() - start;			\
+	if (kstat_glob.name.cur.wall_maxdur < start)	\
+		kstat_glob.name.cur.wall_maxdur = start;\
+	kstat_glob.name.cur.wall_tottime += start;	\
+	start -= VE_TASK_INFO(current)->sleep_time -	\
+					sleep_time;	\
+	if (kstat_glob.name.cur.cpu_maxdur < start)	\
+		kstat_glob.name.cur.cpu_maxdur = start;	\
+	kstat_glob.name.cur.cpu_tottime += start;	\
+	spin_unlock_irqrestore(&kstat_glb_lock, flags);	\
+
+#else
+#define KSTAT_PERF_ENTER(name)
+#define KSTAT_PERF_LEAVE(name)
+#endif
+
+/*
+ * Add another statistics reading.
+ * Serialization is the caller's due.
+ */
+static inline void KSTAT_LAT_ADD(struct kstat_lat_struct *p,
+		cycles_t dur)
+{
+	p->cur.count++;
+	if (p->cur.maxlat < dur)
+		p->cur.maxlat = dur;
+	p->cur.totlat += dur;
+}
+
+static inline void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, int cpu,
+		cycles_t dur)
+{
+	struct kstat_lat_pcpu_snap_struct *cur;
+
+	cur = &p->cur[cpu];
+	write_seqcount_begin(&cur->lock);
+	cur->count++;
+	if (cur->maxlat < dur)
+		cur->maxlat = dur;
+	cur->totlat += dur;
+	write_seqcount_end(&cur->lock);
+}
+
+/*
+ * Move current statistics to last, clear last.
+ * Serialization is the caller's due.
+ */
+static inline void KSTAT_LAT_UPDATE(struct kstat_lat_struct *p)
+{
+	cycles_t m;
+	memcpy(&p->last, &p->cur, sizeof(p->last));
+	p->cur.maxlat = 0;
+	m = p->last.maxlat;
+	CALC_LOAD(p->avg[0], EXP_1, m)
+	CALC_LOAD(p->avg[1], EXP_5, m)
+	CALC_LOAD(p->avg[2], EXP_15, m)
+}
+
+static inline void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p)
+{
+	unsigned i, cpu;
+	struct kstat_lat_pcpu_snap_struct snap, *cur;
+	cycles_t m;
+
+	memset(&p->last, 0, sizeof(p->last));
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		cur = &p->cur[cpu];
+		do {
+			i = read_seqcount_begin(&cur->lock);
+			memcpy(&snap, cur, sizeof(snap));
+		} while (read_seqcount_retry(&cur->lock, i));
+		/* 
+		 * read above and this update of maxlat is not atomic,
+		 * but this is OK, since it happens rarely and losing
+		 * a couple of peaks is not essential. xemul
+		 */
+		cur->maxlat = 0;
+
+		p->last.count += snap.count;
+		p->last.totlat += snap.totlat;
+		if (p->last.maxlat < snap.maxlat)
+			p->last.maxlat = snap.maxlat;
+	}
+
+	m = (p->last.maxlat > p->max_snap ? p->last.maxlat : p->max_snap);
+	CALC_LOAD(p->avg[0], EXP_1, m);
+	CALC_LOAD(p->avg[1], EXP_5, m);
+	CALC_LOAD(p->avg[2], EXP_15, m);
+	/* reset max_snap to calculate it correctly next time */
+	p->max_snap = 0;
+}
+
+#endif /* __VZSTAT_H__ */
diff -uprN linux-2.6.16/include/net/addrconf.h linux-2.6.16.ovz/include/net/addrconf.h
--- linux-2.6.16/include/net/addrconf.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/addrconf.h	2006-07-05 08:34:56.000000000 -0400
@@ -244,5 +244,14 @@ extern int if6_proc_init(void);
 extern void if6_proc_exit(void);
 #endif
 
+int addrconf_ifdown(struct net_device *dev, int how);
+int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen);
+
+#ifdef CONFIG_VE
+int addrconf_sysctl_init(struct ve_struct *ve);
+void addrconf_sysctl_fini(struct ve_struct *ve);
+void addrconf_sysctl_free(struct ve_struct *ve);
+#endif
+
 #endif
 #endif
diff -uprN linux-2.6.16/include/net/af_unix.h linux-2.6.16.ovz/include/net/af_unix.h
--- linux-2.6.16/include/net/af_unix.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/af_unix.h	2006-07-05 08:34:56.000000000 -0400
@@ -19,23 +19,37 @@ extern atomic_t unix_tot_inflight;
 
 static inline struct sock *first_unix_socket(int *i)
 {
+	struct sock *s;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
+		for (s = sk_head(&unix_socket_table[*i]);
+		     s != NULL && !ve_accessible(s->sk_owner_env, ve);
+		     s = sk_next(s));
+		if (s != NULL)
+			return s;
 	}
 	return NULL;
 }
 
 static inline struct sock *next_unix_socket(int *i, struct sock *s)
 {
-	struct sock *next = sk_next(s);
-	/* More in this chain? */
-	if (next)
-		return next;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+	for (s = sk_next(s); s != NULL; s = sk_next(s)) {
+		if (!ve_accessible(s->sk_owner_env, ve))
+			continue;
+		return s;
+	}
 	/* Look for next non-empty chain. */
 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
+		for (s = sk_head(&unix_socket_table[*i]);
+		     s != NULL && !ve_accessible(s->sk_owner_env, ve);
+		     s = sk_next(s));
+		if (s != NULL)
+			return s;
 	}
 	return NULL;
 }
diff -uprN linux-2.6.16/include/net/arp.h linux-2.6.16.ovz/include/net/arp.h
--- linux-2.6.16/include/net/arp.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/arp.h	2006-07-05 08:34:56.000000000 -0400
@@ -7,7 +7,14 @@
 
 #define HAVE_ARP_CREATE
 
-extern struct neigh_table arp_tbl;
+#ifdef CONFIG_VE
+#define arp_tbl		(*(get_exec_env()->ve_arp_tbl))
+extern int ve_arp_init(struct ve_struct *ve);
+extern void ve_arp_fini(struct ve_struct *ve);
+#else
+struct neigh_table	global_arp_tbl;
+#define arp_tbl		global_arp_tbl
+#endif
 
 extern void	arp_init(void);
 extern int	arp_rcv(struct sk_buff *skb, struct net_device *dev,
diff -uprN linux-2.6.16/include/net/compat.h linux-2.6.16.ovz/include/net/compat.h
--- linux-2.6.16/include/net/compat.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/compat.h	2006-07-05 08:34:56.000000000 -0400
@@ -23,6 +23,14 @@ struct compat_cmsghdr {
 	compat_int_t	cmsg_type;
 };
 
+#if defined(CONFIG_X86_64)
+#define is_current_32bits() (current_thread_info()->flags & _TIF_IA32)
+#elif defined(CONFIG_IA64)
+#define is_current_32bits() (IS_IA32_PROCESS(ia64_task_regs(current)))
+#else
+#define is_current_32bits()	0
+#endif
+
 #else /* defined(CONFIG_COMPAT) */
 #define compat_msghdr	msghdr		/* to avoid compiler warnings */
 #endif /* defined(CONFIG_COMPAT) */
diff -uprN linux-2.6.16/include/net/flow.h linux-2.6.16.ovz/include/net/flow.h
--- linux-2.6.16/include/net/flow.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/flow.h	2006-07-05 08:34:56.000000000 -0400
@@ -10,6 +10,7 @@
 #include <linux/in6.h>
 #include <asm/atomic.h>
 
+struct ve_struct;
 struct flowi {
 	int	oif;
 	int	iif;
@@ -78,6 +79,9 @@ struct flowi {
 #define fl_icmp_type	uli_u.icmpt.type
 #define fl_icmp_code	uli_u.icmpt.code
 #define fl_ipsec_spi	uli_u.spi
+#ifdef CONFIG_VE
+	struct ve_struct *owner_env;
+#endif
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 #define FLOW_DIR_IN	0
diff -uprN linux-2.6.16/include/net/icmp.h linux-2.6.16.ovz/include/net/icmp.h
--- linux-2.6.16/include/net/icmp.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/icmp.h	2006-07-05 08:34:56.000000000 -0400
@@ -31,9 +31,14 @@ struct icmp_err {
 
 extern struct icmp_err icmp_err_convert[];
 DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
-#define ICMP_INC_STATS(field)		SNMP_INC_STATS(icmp_statistics, field)
-#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmp_statistics, field)
-#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(icmp_statistics, field)
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_icmp_statistics (get_exec_env()->_icmp_statistics)
+#else
+#define ve_icmp_statistics icmp_statistics
+#endif
+#define ICMP_INC_STATS(field)		SNMP_INC_STATS(ve_icmp_statistics, field)
+#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(ve_icmp_statistics, field)
+#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_icmp_statistics, field)
 
 struct dst_entry;
 struct net_proto_family;
diff -uprN linux-2.6.16/include/net/if_inet6.h linux-2.6.16.ovz/include/net/if_inet6.h
--- linux-2.6.16/include/net/if_inet6.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/if_inet6.h	2006-07-05 08:34:56.000000000 -0400
@@ -194,7 +194,14 @@ struct inet6_dev 
 	unsigned long		tstamp; /* ipv6InterfaceTable update timestamp */
 };
 
-extern struct ipv6_devconf ipv6_devconf;
+extern struct ipv6_devconf global_ipv6_devconf;
+extern struct ipv6_devconf global_ipv6_devconf_dflt;
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_ipv6_devconf	(*(get_exec_env()->_ipv6_devconf))
+#else
+#define ve_ipv6_devconf	global_ipv6_devconf
+#endif
 
 static inline void ipv6_eth_mc_map(struct in6_addr *addr, char *buf)
 {
diff -uprN linux-2.6.16/include/net/inet6_hashtables.h linux-2.6.16.ovz/include/net/inet6_hashtables.h
--- linux-2.6.16/include/net/inet6_hashtables.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/inet6_hashtables.h	2006-07-05 08:34:56.000000000 -0400
@@ -27,11 +27,13 @@ struct inet_hashinfo;
 
 /* I have no idea if this is a good hash for v6 or not. -DaveM */
 static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
-				const struct in6_addr *faddr, const u16 fport)
+				const struct in6_addr *faddr, const u16 fport,
+				const envid_t veid)
 {
 	unsigned int hashent = (lport ^ fport);
 
 	hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
+	hashent ^= (veid ^ (veid >> 16));
 	hashent ^= hashent >> 16;
 	hashent ^= hashent >> 8;
 	return hashent;
@@ -45,7 +47,7 @@ static inline int inet6_sk_ehashfn(const
 	const struct in6_addr *faddr = &np->daddr;
 	const __u16 lport = inet->num;
 	const __u16 fport = inet->dport;
-	return inet6_ehashfn(laddr, lport, faddr, fport);
+	return inet6_ehashfn(laddr, lport, faddr, fport, VEID(VE_OWNER_SK(sk)));
 }
 
 static inline void __inet6_hash(struct inet_hashinfo *hashinfo,
@@ -94,14 +96,15 @@ static inline struct sock *
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport);
+	struct ve_struct *env = get_exec_env();
+	unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport, VEID(env));
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
 
 	prefetch(head->chain.first);
 	read_lock(&head->lock);
 	sk_for_each(sk, node, &head->chain) {
 		/* For IPV6 do the cheaper port and family tests first. */
-		if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif))
+		if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif, env))
 			goto hit; /* You sunk my battleship! */
 	}
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
@@ -114,6 +117,7 @@ static inline struct sock *
 
 			if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	&&
 			    ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)	&&
+			    ve_accessible_strict(tw->tw_owner_env, VEID(env)) &&
 			    (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
 				goto hit;
 		}
diff -uprN linux-2.6.16/include/net/inet_hashtables.h linux-2.6.16.ovz/include/net/inet_hashtables.h
--- linux-2.6.16/include/net/inet_hashtables.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/inet_hashtables.h	2006-07-05 08:34:56.000000000 -0400
@@ -24,6 +24,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/wait.h>
+#include <linux/ve_owner.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_sock.h>
@@ -75,11 +76,13 @@ struct inet_ehash_bucket {
  * ports are created in O(1) time?  I thought so. ;-)	-DaveM
  */
 struct inet_bind_bucket {
+	struct ve_struct	*owner_env;
 	unsigned short		port;
 	signed short		fastreuse;
 	struct hlist_node	node;
 	struct hlist_head	owners;
 };
+DCL_VE_OWNER_PROTO(TB, struct inet_bind_bucket, owner_env)
 
 #define inet_bind_bucket_for_each(tb, node, head) \
 	hlist_for_each_entry(tb, node, head, node)
@@ -139,37 +142,43 @@ static inline struct inet_ehash_bucket *
 extern struct inet_bind_bucket *
 		    inet_bind_bucket_create(kmem_cache_t *cachep,
 					    struct inet_bind_hashbucket *head,
-					    const unsigned short snum);
+					    const unsigned short snum,
+					    struct ve_struct *env);
 extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
 				     struct inet_bind_bucket *tb);
 
-static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
+static inline int inet_bhashfn(const __u16 lport, const int bhash_size,
+		unsigned veid)
 {
-	return lport & (bhash_size - 1);
+	return ((lport + (veid ^ (veid >> 16))) & (bhash_size - 1));
 }
 
 extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
 			   const unsigned short snum);
 
 /* These can have wildcards, don't try too hard. */
-static inline int inet_lhashfn(const unsigned short num)
+static inline int inet_lhashfn(const unsigned short num, unsigned veid)
 {
-	return num & (INET_LHTABLE_SIZE - 1);
+	return ((num + (veid ^ (veid >> 16))) & (INET_LHTABLE_SIZE - 1));
 }
 
 static inline int inet_sk_listen_hashfn(const struct sock *sk)
 {
-	return inet_lhashfn(inet_sk(sk)->num);
+	return inet_lhashfn(inet_sk(sk)->num, VEID(VE_OWNER_SK(sk)));
 }
 
 /* Caller must disable local BH processing. */
 static inline void __inet_inherit_port(struct inet_hashinfo *table,
 				       struct sock *sk, struct sock *child)
 {
-	const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
-	struct inet_bind_hashbucket *head = &table->bhash[bhash];
+	int bhash;
+	struct inet_bind_hashbucket *head;
 	struct inet_bind_bucket *tb;
 
+	bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size,
+			VEID(VE_OWNER_SK(child)));
+	head = &table->bhash[bhash];
+
 	spin_lock(&head->lock);
 	tb = inet_csk(sk)->icsk_bind_hash;
 	sk_add_bind_node(child, &tb->owners);
@@ -275,7 +284,8 @@ static inline int inet_iif(const struct 
 extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
 					   const u32 daddr,
 					   const unsigned short hnum,
-					   const int dif);
+					   const int dif,
+					   struct ve_struct *env);
 
 /* Optimize the common listener case. */
 static inline struct sock *
@@ -285,18 +295,21 @@ static inline struct sock *
 {
 	struct sock *sk = NULL;
 	const struct hlist_head *head;
+	struct ve_struct *env;
 
+	env = get_exec_env();
 	read_lock(&hashinfo->lhash_lock);
-	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+	head = &hashinfo->listening_hash[inet_lhashfn(hnum, VEID(env))];
 	if (!hlist_empty(head)) {
 		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
 
 		if (inet->num == hnum && !sk->sk_node.next &&
+		    ve_accessible_strict(VE_OWNER_SK(sk), env) &&
 		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
 		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
 		    !sk->sk_bound_dev_if)
 			goto sherry_cache;
-		sk = __inet_lookup_listener(head, daddr, hnum, dif);
+		sk = __inet_lookup_listener(head, daddr, hnum, dif, env);
 	}
 	if (sk) {
 sherry_cache:
@@ -323,25 +336,25 @@ sherry_cache:
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
 	const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
 #endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie))	&&	\
 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+#define INET_TW_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&	\
 	 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
+#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 (inet_sk(__sk)->daddr		== (__saddr))		&&	\
 	 (inet_sk(__sk)->rcv_saddr	== (__daddr))		&&	\
 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
+#define INET_TW_MATCH_ALLVE(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 (inet_twsk(__sk)->tw_daddr	== (__saddr))		&&	\
 	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))		&&	\
@@ -349,6 +362,18 @@ sherry_cache:
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #endif /* 64-bit arch */
 
+#define INET_MATCH(__sk, __hash, __cookie, __saddr,			\
+					__daddr, __ports, __dif, __ve)  \
+        (INET_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr),	\
+			  		(__daddr), (__ports), (__dif))	\
+	 && ve_accessible_strict(VE_OWNER_SK(__sk), (__ve)))
+
+#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr,			\
+					__daddr, __ports, __dif, __ve)	\
+        (INET_TW_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr),	\
+					(__daddr), (__ports), (__dif))	\
+	 && ve_accessible_strict(inet_twsk(__sk)->tw_owner_env, VEID(__ve)))
+
 /*
  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
  * not check it for lookups anymore, thanks Alexey. -DaveM
@@ -368,19 +393,25 @@ static inline struct sock *
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
-
+	unsigned int hash;
+	struct inet_ehash_bucket *head;
+	struct ve_struct *env;
+
+	env = get_exec_env();
+	hash = inet_ehashfn(daddr, hnum, saddr, sport, VEID(env));
+	head = inet_ehash_bucket(hashinfo, hash);
 	prefetch(head->chain.first);
 	read_lock(&head->lock);
 	sk_for_each(sk, node, &head->chain) {
-		if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
+		if (INET_MATCH(sk, hash, acookie, saddr, daddr,
+					ports, dif, env))
 			goto hit; /* You sunk my battleship! */
 	}
 
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
 	sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
-		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
+		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr,
+					ports, dif, env))
 			goto hit;
 	}
 	sk = NULL;
diff -uprN linux-2.6.16/include/net/inet_sock.h linux-2.6.16.ovz/include/net/inet_sock.h
--- linux-2.6.16/include/net/inet_sock.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/inet_sock.h	2006-07-05 08:34:56.000000000 -0400
@@ -171,9 +171,10 @@ static inline void inet_sk_copy_descenda
 extern int inet_sk_rebuild_header(struct sock *sk);
 
 static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,
-					const __u32 faddr, const __u16 fport)
+					const __u32 faddr, const __u16 fport,
+					const envid_t veid)
 {
-	unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
+	int h = (laddr ^ lport) ^ (faddr ^ fport) ^ (veid ^ (veid >> 16));
 	h ^= h >> 16;
 	h ^= h >> 8;
 	return h;
@@ -186,8 +187,9 @@ static inline int inet_sk_ehashfn(const 
 	const __u16 lport = inet->num;
 	const __u32 faddr = inet->daddr;
 	const __u16 fport = inet->dport;
+	envid_t veid = VEID(VE_OWNER_SK(sk));
 
-	return inet_ehashfn(laddr, lport, faddr, fport);
+	return inet_ehashfn(laddr, lport, faddr, fport, veid);
 }
 
 #endif	/* _INET_SOCK_H */
diff -uprN linux-2.6.16/include/net/inet_timewait_sock.h linux-2.6.16.ovz/include/net/inet_timewait_sock.h
--- linux-2.6.16/include/net/inet_timewait_sock.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/inet_timewait_sock.h	2006-07-05 08:34:56.000000000 -0400
@@ -134,6 +134,7 @@ struct inet_timewait_sock {
 	unsigned long		tw_ttd;
 	struct inet_bind_bucket	*tw_tb;
 	struct hlist_node	tw_death_node;
+	envid_t			tw_owner_env;
 };
 
 static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
diff -uprN linux-2.6.16/include/net/ip.h linux-2.6.16.ovz/include/net/ip.h
--- linux-2.6.16/include/net/ip.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/ip.h	2006-07-05 08:34:56.000000000 -0400
@@ -95,6 +95,7 @@ extern int		ip_local_deliver(struct sk_b
 extern int		ip_mr_input(struct sk_buff *skb);
 extern int		ip_output(struct sk_buff *skb);
 extern int		ip_mc_output(struct sk_buff *skb);
+extern int		ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 extern int		ip_do_nat(struct sk_buff *skb);
 extern void		ip_send_check(struct iphdr *ip);
 extern int		ip_queue_xmit(struct sk_buff *skb, int ipfragok);
@@ -152,15 +153,25 @@ struct ipv4_config
 
 extern struct ipv4_config ipv4_config;
 DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
-#define IP_INC_STATS(field)		SNMP_INC_STATS(ip_statistics, field)
-#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ip_statistics, field)
-#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ip_statistics, field)
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_ip_statistics (get_exec_env()->_ip_statistics)
+#else
+#define ve_ip_statistics ip_statistics
+#endif
+#define IP_INC_STATS(field)		SNMP_INC_STATS(ve_ip_statistics, field)
+#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_ip_statistics, field)
+#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_ip_statistics, field)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
-#define NET_INC_STATS(field)		SNMP_INC_STATS(net_statistics, field)
-#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(net_statistics, field)
-#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(net_statistics, field)
-#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
-#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_net_statistics (get_exec_env()->_net_statistics)
+#else
+#define ve_net_statistics net_statistics
+#endif
+#define NET_INC_STATS(field)		SNMP_INC_STATS(ve_net_statistics, field)
+#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_net_statistics, field)
+#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_net_statistics, field)
+#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(ve_net_statistics, field, adnd)
+#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(ve_net_statistics, field, adnd)
 
 extern int sysctl_local_port_range[2];
 extern int sysctl_ip_default_ttl;
@@ -380,4 +391,11 @@ extern int ip_misc_proc_init(void);
 
 extern struct ctl_table ipv4_table[];
 
+#ifdef CONFIG_SYSCTL
+extern int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+			void __user *buffer, size_t *lenp, loff_t *ppos);
+extern int ipv4_sysctl_forward_strategy(ctl_table *table, int __user *name,
+			int nlen, void __user *oldval, size_t __user *oldlenp,
+			 void __user *newval, size_t newlen, void **context);
+#endif
 #endif	/* _IP_H */
diff -uprN linux-2.6.16/include/net/ip6_fib.h linux-2.6.16.ovz/include/net/ip6_fib.h
--- linux-2.6.16/include/net/ip6_fib.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/ip6_fib.h	2006-07-05 08:34:56.000000000 -0400
@@ -78,6 +78,15 @@ struct rt6_info
 	u8				rt6i_protocol;
 };
 
+struct fib6_table
+{
+	struct list_head	list;
+	struct fib6_node	root;
+	struct ve_struct	*owner_env;
+};
+
+extern struct list_head	fib6_table_list;
+
 struct fib6_walker_t
 {
 	struct fib6_walker_t *prev, *next;
@@ -143,7 +152,7 @@ struct rt6_statistics {
 
 typedef void			(*f_pnode)(struct fib6_node *fn, void *);
 
-extern struct fib6_node		ip6_routing_table;
+extern struct fib6_node		ve0_ip6_routing_table;
 
 /*
  *	exported functions
diff -uprN linux-2.6.16/include/net/ip6_route.h linux-2.6.16.ovz/include/net/ip6_route.h
--- linux-2.6.16/include/net/ip6_route.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/ip6_route.h	2006-07-05 08:34:56.000000000 -0400
@@ -139,5 +139,10 @@ static inline int ipv6_unicast_destinati
 	return rt->rt6i_flags & RTF_LOCAL;
 }
 
+#ifdef CONFIG_VE
+int init_ve_route6(struct ve_struct *ve);
+void fini_ve_route6(struct ve_struct *ve);
+#endif
+
 #endif
 #endif
diff -uprN linux-2.6.16/include/net/ip_fib.h linux-2.6.16.ovz/include/net/ip_fib.h
--- linux-2.6.16/include/net/ip_fib.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/ip_fib.h	2006-07-05 08:34:56.000000000 -0400
@@ -168,10 +168,22 @@ struct fib_table {
 	unsigned char	tb_data[0];
 };
 
+struct fn_zone;
+struct fn_hash
+{
+	struct fn_zone	*fn_zones[33];
+	struct fn_zone	*fn_zone_list;
+};
+
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ip_fib_local_table 	get_exec_env()->_local_table
+#define ip_fib_main_table 	get_exec_env()->_main_table
+#else
 extern struct fib_table *ip_fib_local_table;
 extern struct fib_table *ip_fib_main_table;
+#endif
 
 static inline struct fib_table *fib_get_table(int id)
 {
@@ -203,7 +215,12 @@ static inline void fib_select_default(co
 #define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL])
 #define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define fib_tables get_exec_env()->_fib_tables
+#else
 extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
+#endif
+
 extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
 extern struct fib_table *__fib_new_table(int id);
 extern void fib_rule_put(struct fib_rule *r);
@@ -250,10 +267,19 @@ extern u32  __fib_res_prefsrc(struct fib
 
 /* Exported by fib_hash.c */
 extern struct fib_table *fib_hash_init(int id);
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+struct ve_struct;
+extern int init_ve_route(struct ve_struct *ve);
+extern void fini_ve_route(struct ve_struct *ve);
+#else
+#define init_ve_route(ve)	(0)
+#define fini_ve_route(ve)	do { } while (0)
+#endif
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 /* Exported by fib_rules.c */
-
+extern int fib_rules_create(void);
+extern void fib_rules_destroy(void);
 extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
diff -uprN linux-2.6.16/include/net/ipv6.h linux-2.6.16.ovz/include/net/ipv6.h
--- linux-2.6.16/include/net/ipv6.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/ipv6.h	2006-07-05 08:34:56.000000000 -0400
@@ -113,39 +113,48 @@ extern int sysctl_mld_max_msf;
 
 /* MIBs */
 DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
-#define IP6_INC_STATS(field)		SNMP_INC_STATS(ipv6_statistics, field)
-#define IP6_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ipv6_statistics, field)
-#define IP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ipv6_statistics, field)
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_ipv6_statistics (get_exec_env()->_ipv6_statistics)
+#define ve_icmpv6_statistics (get_exec_env()->_icmpv6_statistics)
+#define ve_udp_stats_in6 (get_exec_env()->_udp_stats_in6)
+#else
+#define ve_ipv6_statistics ipv6_statistics
+#define ve_icmpv6_statistics icmpv6_statistics
+#define ve_udp_stats_in6 udp_stats_in6
+#endif
+#define IP6_INC_STATS(field)		SNMP_INC_STATS(ve_ipv6_statistics, field)
+#define IP6_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_ipv6_statistics, field)
+#define IP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_ipv6_statistics, field)
 DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
 #define ICMP6_INC_STATS(idev, field)		({			\
 	struct inet6_dev *_idev = (idev);				\
 	if (likely(_idev != NULL))					\
 		SNMP_INC_STATS(idev->stats.icmpv6, field); 		\
-	SNMP_INC_STATS(icmpv6_statistics, field);			\
+	SNMP_INC_STATS(ve_icmpv6_statistics, field);			\
 })
 #define ICMP6_INC_STATS_BH(idev, field)		({			\
 	struct inet6_dev *_idev = (idev);				\
 	if (likely(_idev != NULL))					\
 		SNMP_INC_STATS_BH((_idev)->stats.icmpv6, field);	\
-	SNMP_INC_STATS_BH(icmpv6_statistics, field);			\
+	SNMP_INC_STATS_BH(ve_icmpv6_statistics, field);			\
 })
 #define ICMP6_INC_STATS_USER(idev, field) 	({			\
 	struct inet6_dev *_idev = (idev);				\
 	if (likely(_idev != NULL))					\
 		SNMP_INC_STATS_USER(_idev->stats.icmpv6, field);	\
-	SNMP_INC_STATS_USER(icmpv6_statistics, field);			\
+	SNMP_INC_STATS_USER(ve_icmpv6_statistics, field);			\
 })
 #define ICMP6_INC_STATS_OFFSET_BH(idev, field, offset)	({			\
 	struct inet6_dev *_idev = idev;						\
 	__typeof__(offset) _offset = (offset);					\
 	if (likely(_idev != NULL))						\
 		SNMP_INC_STATS_OFFSET_BH(_idev->stats.icmpv6, field, _offset);	\
-	SNMP_INC_STATS_OFFSET_BH(icmpv6_statistics, field, _offset);    	\
+	SNMP_INC_STATS_OFFSET_BH(ve_icmpv6_statistics, field, _offset);    	\
 })
 DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
-#define UDP6_INC_STATS(field)		SNMP_INC_STATS(udp_stats_in6, field)
-#define UDP6_INC_STATS_BH(field)	SNMP_INC_STATS_BH(udp_stats_in6, field)
-#define UDP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_stats_in6, field)
+#define UDP6_INC_STATS(field)		SNMP_INC_STATS(ve_udp_stats_in6, field)
+#define UDP6_INC_STATS_BH(field)	SNMP_INC_STATS_BH(ve_udp_stats_in6, field)
+#define UDP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_udp_stats_in6, field)
 
 int snmp6_register_dev(struct inet6_dev *idev);
 int snmp6_unregister_dev(struct inet6_dev *idev);
@@ -154,6 +163,11 @@ int snmp6_free_dev(struct inet6_dev *ide
 int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
 void snmp6_mib_free(void *ptr[2]);
 
+#ifdef CONFIG_VE
+int ve_snmp_proc_init(void);
+void ve_snmp_proc_fini(void);
+#endif
+
 struct ip6_ra_chain
 {
 	struct ip6_ra_chain	*next;
diff -uprN linux-2.6.16/include/net/ndisc.h linux-2.6.16.ovz/include/net/ndisc.h
--- linux-2.6.16/include/net/ndisc.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/ndisc.h	2006-07-05 08:34:56.000000000 -0400
@@ -50,7 +50,14 @@ struct net_device;
 struct net_proto_family;
 struct sk_buff;
 
-extern struct neigh_table nd_tbl;
+#ifdef CONFIG_VE
+#define nd_tbl		(*(get_exec_env()->ve_nd_tbl))
+extern int ve_ndisc_init(struct ve_struct *ve);
+extern void ve_ndisc_fini(struct ve_struct *ve);
+#else
+extern struct neigh_table global_nd_tbl;
+#define nd_tbl		global_nd_tbl
+#endif
 
 struct nd_msg {
         struct icmp6hdr	icmph;
@@ -128,6 +135,7 @@ extern int 			ndisc_ifinfo_sysctl_change
 extern void 			inet6_ifinfo_notify(int event,
 						    struct inet6_dev *idev);
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static inline struct neighbour * ndisc_get_neigh(struct net_device *dev, struct in6_addr *addr)
 {
 
@@ -136,6 +144,7 @@ static inline struct neighbour * ndisc_g
 
 	return NULL;
 }
+#endif
 
 
 #endif /* __KERNEL__ */
diff -uprN linux-2.6.16/include/net/neighbour.h linux-2.6.16.ovz/include/net/neighbour.h
--- linux-2.6.16/include/net/neighbour.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/neighbour.h	2006-07-05 08:34:56.000000000 -0400
@@ -191,6 +191,8 @@ struct neigh_table
 	atomic_t		entries;
 	rwlock_t		lock;
 	unsigned long		last_rand;
+	struct ve_struct	*owner_env;
+	struct user_beancounter *owner_ub;
 	kmem_cache_t		*kmem_cachep;
 	struct neigh_statistics	*stats;
 	struct neighbour	**hash_buckets;
@@ -210,7 +212,7 @@ struct neigh_table
 #define NEIGH_UPDATE_F_ISROUTER			0x40000000
 #define NEIGH_UPDATE_F_ADMIN			0x80000000
 
-extern void			neigh_table_init(struct neigh_table *tbl);
+extern int			neigh_table_init(struct neigh_table *tbl);
 extern int			neigh_table_clear(struct neigh_table *tbl);
 extern struct neighbour *	neigh_lookup(struct neigh_table *tbl,
 					     const void *pkey,
diff -uprN linux-2.6.16/include/net/netlink_sock.h linux-2.6.16.ovz/include/net/netlink_sock.h
--- linux-2.6.16/include/net/netlink_sock.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/net/netlink_sock.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,22 @@
+#ifndef __NET_NETLINK_SOCK_H
+#define __NET_NETLINK_SOCK_H
+
+struct netlink_sock {
+	/* struct sock has to be the first member of netlink_sock */
+	struct sock		sk;
+	u32			pid;
+	u32			dst_pid;
+	u32			dst_group;
+	u32			flags;
+	u32			subscriptions;
+	u32			ngroups;
+	unsigned long		*groups;
+	unsigned long		state;
+	wait_queue_head_t	wait;
+	struct netlink_callback	*cb;
+	spinlock_t		cb_lock;
+	void			(*data_ready)(struct sock *sk, int bytes);
+	struct module		*module;
+};
+
+#endif /* __NET_NETLINK_SOCK_H */
diff -uprN linux-2.6.16/include/net/route.h linux-2.6.16.ovz/include/net/route.h
--- linux-2.6.16/include/net/route.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/route.h	2006-07-05 08:34:56.000000000 -0400
@@ -201,4 +201,14 @@ static inline struct inet_peer *rt_get_p
 
 extern ctl_table ipv4_route_table[];
 
+#ifdef CONFIG_SYSCTL
+extern int ipv4_flush_delay;
+extern int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+		struct file *filp, void __user *buffer,	size_t *lenp,
+		loff_t *ppos);
+extern int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+		int __user *name, int nlen, void __user *oldval,
+		size_t __user *oldlenp,	void __user *newval,
+		size_t newlen, void **context);
+#endif
 #endif	/* _ROUTE_H */
diff -uprN linux-2.6.16/include/net/scm.h linux-2.6.16.ovz/include/net/scm.h
--- linux-2.6.16/include/net/scm.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/scm.h	2006-07-05 08:34:56.000000000 -0400
@@ -40,7 +40,7 @@ static __inline__ int scm_send(struct so
 	memset(scm, 0, sizeof(*scm));
 	scm->creds.uid = current->uid;
 	scm->creds.gid = current->gid;
-	scm->creds.pid = current->tgid;
+	scm->creds.pid = virt_tgid(current);
 	if (msg->msg_controllen <= 0)
 		return 0;
 	return __scm_send(sock, msg, scm);
diff -uprN linux-2.6.16/include/net/sctp/sctp.h linux-2.6.16.ovz/include/net/sctp/sctp.h
--- linux-2.6.16/include/net/sctp/sctp.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/sctp/sctp.h	2006-07-05 08:34:56.000000000 -0400
@@ -461,12 +461,12 @@ static inline int sctp_frag_point(const 
  * there is room for a param header too.
  */
 #define sctp_walk_params(pos, chunk, member)\
-_sctp_walk_params((pos), (chunk), WORD_ROUND(ntohs((chunk)->chunk_hdr.length)), member)
+_sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
 
 #define _sctp_walk_params(pos, chunk, end, member)\
 for (pos.v = chunk->member;\
      pos.v <= (void *)chunk + end - sizeof(sctp_paramhdr_t) &&\
-     pos.v <= (void *)chunk + end - WORD_ROUND(ntohs(pos.p->length)) &&\
+     pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
      ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
      pos.v += WORD_ROUND(ntohs(pos.p->length)))
 
@@ -477,7 +477,7 @@ _sctp_walk_errors((err), (chunk_hdr), nt
 for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
 	    sizeof(sctp_chunkhdr_t));\
      (void *)err <= (void *)chunk_hdr + end - sizeof(sctp_errhdr_t) &&\
-     (void *)err <= (void *)chunk_hdr + end - WORD_ROUND(ntohs(err->length)) &&\
+     (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
      err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length))))
 
diff -uprN linux-2.6.16/include/net/sctp/structs.h linux-2.6.16.ovz/include/net/sctp/structs.h
--- linux-2.6.16/include/net/sctp/structs.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/sctp/structs.h	2006-07-05 08:34:56.000000000 -0400
@@ -702,6 +702,7 @@ struct sctp_chunk {
 	__u8 tsn_gap_acked;	/* Is this chunk acked by a GAP ACK? */
 	__s8 fast_retransmit;	 /* Is this chunk fast retransmitted? */
 	__u8 tsn_missing_report; /* Data chunk missing counter. */
+	__u8 data_accepted; 	/* At least 1 chunk in this packet accepted */
 };
 
 void sctp_chunk_hold(struct sctp_chunk *);
diff -uprN linux-2.6.16/include/net/sock.h linux-2.6.16.ovz/include/net/sock.h
--- linux-2.6.16/include/net/sock.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/sock.h	2006-07-05 08:34:56.000000000 -0400
@@ -55,6 +55,8 @@
 #include <net/dst.h>
 #include <net/checksum.h>
 
+#include <ub/ub_net.h>
+
 /*
  * This structure really needs to be cleaned up.
  * Most of it is for TCP, and not used by any of
@@ -251,8 +253,12 @@ struct sock {
   	int			(*sk_backlog_rcv)(struct sock *sk,
 						  struct sk_buff *skb);  
 	void                    (*sk_destruct)(struct sock *sk);
+	struct sock_beancounter sk_bc;
+	struct ve_struct	*sk_owner_env;
 };
 
+DCL_VE_OWNER_PROTO(SK, struct sock, sk_owner_env)
+
 /*
  * Hashed lists helper routines
  */
@@ -485,7 +491,8 @@ static inline void sk_add_backlog(struct
 })
 
 extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
-extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
+extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p,
+				unsigned long amount);
 extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
 extern int sk_stream_error(struct sock *sk, int flags, int err);
 extern void sk_stream_kill_queues(struct sock *sk);
@@ -706,8 +713,11 @@ static inline void sk_stream_writequeue_
 
 static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
 {
-	return (int)skb->truesize <= sk->sk_forward_alloc ||
-		sk_stream_mem_schedule(sk, skb->truesize, 1);
+	if ((int)skb->truesize > sk->sk_forward_alloc &&
+		!sk_stream_mem_schedule(sk, skb->truesize, 1))
+		/* The situation is bad according to mainstream. Den */
+		return 0;
+	return ub_tcprcvbuf_charge(sk, skb) == 0;
 }
 
 static inline int sk_stream_wmem_schedule(struct sock *sk, int size)
@@ -765,6 +775,11 @@ extern struct sk_buff 		*sock_alloc_send
 						     unsigned long size,
 						     int noblock,
 						     int *errcode);
+extern struct sk_buff 		*sock_alloc_send_skb2(struct sock *sk,
+						     unsigned long size,
+						     unsigned long size2,
+						     int noblock,
+						     int *errcode);
 extern void *sock_kmalloc(struct sock *sk, int size,
 			  gfp_t priority);
 extern void sock_kfree_s(struct sock *sk, void *mem, int size);
@@ -1062,12 +1077,16 @@ sk_dst_check(struct sock *sk, u32 cookie
 
 static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 {
+	extern int sysctl_tcp_use_sg;
+
 	__sk_dst_set(sk, dst);
 	sk->sk_route_caps = dst->dev->features;
 	if (sk->sk_route_caps & NETIF_F_TSO) {
 		if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
 			sk->sk_route_caps &= ~NETIF_F_TSO;
 	}
+	if (!sysctl_tcp_use_sg)
+		sk->sk_route_caps &= ~NETIF_F_SG;
 }
 
 static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
@@ -1142,6 +1161,10 @@ static inline int sock_queue_rcv_skb(str
 		goto out;
 	}
 
+	err = ub_sockrcvbuf_charge(sk, skb);
+	if (err < 0)
+		goto out;
+
 	/* It would be deadlock, if sock_queue_rcv_skb is used
 	   with socket lock! We assume that users of this
 	   function are lock free.
diff -uprN linux-2.6.16/include/net/tcp.h linux-2.6.16.ovz/include/net/tcp.h
--- linux-2.6.16/include/net/tcp.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/tcp.h	2006-07-05 08:34:56.000000000 -0400
@@ -40,6 +40,7 @@
 #include <net/tcp_states.h>
 
 #include <linux/seq_file.h>
+#include <ub/ub_net.h>
 
 extern struct inet_hashinfo tcp_hashinfo;
 
@@ -219,6 +220,7 @@ extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
 extern int sysctl_tcp_abc;
+extern int sysctl_tcp_use_sg;
 
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
@@ -250,12 +252,17 @@ static inline int between(__u32 seq1, __
 extern struct proto tcp_prot;
 
 DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
-#define TCP_INC_STATS(field)		SNMP_INC_STATS(tcp_statistics, field)
-#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(tcp_statistics, field)
-#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(tcp_statistics, field)
-#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(tcp_statistics, field)
-#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(tcp_statistics, field, val)
-#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(tcp_statistics, field, val)
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_tcp_statistics (get_exec_env()->_tcp_statistics)
+#else
+#define ve_tcp_statistics tcp_statistics
+#endif
+#define TCP_INC_STATS(field)		SNMP_INC_STATS(ve_tcp_statistics, field)
+#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_tcp_statistics, field)
+#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_tcp_statistics, field)
+#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(ve_tcp_statistics, field)
+#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(ve_tcp_statistics, field, val)
+#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(ve_tcp_statistics, field, val)
 
 extern void			tcp_v4_err(struct sk_buff *skb, u32);
 
@@ -493,7 +500,7 @@ extern u32	__tcp_select_window(struct so
  * to use only the low 32-bits of jiffies and hide the ugly
  * casts with the following macro.
  */
-#define tcp_time_stamp		((__u32)(jiffies))
+#define tcp_time_stamp		((__u32)(jiffies + get_exec_env()->jiffies_fixup))
 
 /* This is what the send packet queuing engine uses to pass
  * TCP per-packet control information to the transmission
diff -uprN linux-2.6.16/include/net/udp.h linux-2.6.16.ovz/include/net/udp.h
--- linux-2.6.16/include/net/udp.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/include/net/udp.h	2006-07-05 08:34:56.000000000 -0400
@@ -39,13 +39,19 @@ extern rwlock_t udp_hash_lock;
 
 extern int udp_port_rover;
 
-static inline int udp_lport_inuse(u16 num)
+static inline int udp_hashfn(u16 num, unsigned veid)
+{
+	return ((num + (veid ^ (veid >> 16))) & (UDP_HTABLE_SIZE - 1));
+}
+
+static inline int udp_lport_inuse(u16 num, struct ve_struct *env)
 {
 	struct sock *sk;
 	struct hlist_node *node;
 
-	sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
-		if (inet_sk(sk)->num == num)
+	sk_for_each(sk, node, &udp_hash[udp_hashfn(num, VEID(env))])
+		if (inet_sk(sk)->num == num &&
+		    ve_accessible_strict(sk->sk_owner_env, env))
 			return 1;
 	return 0;
 }
@@ -75,9 +81,14 @@ extern unsigned int udp_poll(struct file
 			     poll_table *wait);
 
 DECLARE_SNMP_STAT(struct udp_mib, udp_statistics);
-#define UDP_INC_STATS(field)		SNMP_INC_STATS(udp_statistics, field)
-#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(udp_statistics, field)
-#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_statistics, field)
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_udp_statistics (get_exec_env()->_udp_statistics)
+#else
+#define ve_udp_statistics udp_statistics
+#endif
+#define UDP_INC_STATS(field)		SNMP_INC_STATS(ve_udp_statistics, field)
+#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_udp_statistics, field)
+#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_udp_statistics, field)
 
 /* /proc */
 struct udp_seq_afinfo {
diff -uprN linux-2.6.16/include/ub/beancounter.h linux-2.6.16.ovz/include/ub/beancounter.h
--- linux-2.6.16/include/ub/beancounter.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/beancounter.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,329 @@
+/*
+ *  include/ub/beancounter.h
+ *
+ *  Copyright (C) 1999-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  Andrey Savochkin	saw@sw-soft.com
+ *
+ */
+
+#ifndef _LINUX_BEANCOUNTER_H
+#define _LINUX_BEANCOUNTER_H
+
+#include <linux/config.h>
+
+/*
+ * Generic ratelimiting stuff.
+ */
+
+struct ub_rate_info {
+	int burst;
+	int interval; /* jiffy_t per event */
+	int bucket; /* kind of leaky bucket */
+	unsigned long last; /* last event */
+};
+
+/* Return true if rate limit permits. */
+int ub_ratelimit(struct ub_rate_info *);
+
+
+/*
+ * This magic is used to distinuish user beancounter and pages beancounter
+ * in struct page. page_ub and page_bc are placed in union and MAGIC
+ * ensures us that we don't use pbc as ubc in ub_page_uncharge().
+ */
+#define UB_MAGIC		0x62756275
+
+/*
+ *	Resource list.
+ */
+
+#define UB_KMEMSIZE	0	/* Unswappable kernel memory size including
+				 * struct task, page directories, etc.
+				 */
+#define UB_LOCKEDPAGES	1	/* Mlock()ed pages. */
+#define UB_PRIVVMPAGES	2	/* Total number of pages, counting potentially
+				 * private pages as private and used.
+				 */
+#define UB_SHMPAGES	3	/* IPC SHM segment size. */
+#define UB_ZSHMPAGES	4	/* Anonymous shared memory. */
+#define UB_NUMPROC	5	/* Number of processes. */
+#define UB_PHYSPAGES	6	/* All resident pages, for swapout guarantee. */
+#define UB_VMGUARPAGES	7	/* Guarantee for memory allocation,
+				 * checked against PRIVVMPAGES.
+				 */
+#define UB_OOMGUARPAGES	8	/* Guarantees against OOM kill.
+				 * Only limit is used, no accounting.
+				 */
+#define UB_NUMTCPSOCK	9	/* Number of TCP sockets. */
+#define UB_NUMFLOCK	10	/* Number of file locks. */
+#define UB_NUMPTY	11	/* Number of PTYs. */
+#define UB_NUMSIGINFO	12	/* Number of siginfos. */
+#define UB_TCPSNDBUF	13	/* Total size of tcp send buffers. */
+#define UB_TCPRCVBUF	14	/* Total size of tcp receive buffers. */
+#define UB_OTHERSOCKBUF	15	/* Total size of other socket
+				 * send buffers (all buffers for PF_UNIX).
+				 */
+#define UB_DGRAMRCVBUF	16	/* Total size of other socket
+				 * receive buffers.
+				 */
+#define UB_NUMOTHERSOCK	17	/* Number of other sockets. */
+#define UB_DCACHESIZE	18	/* Size of busy dentry/inode cache. */
+#define UB_NUMFILE	19	/* Number of open files. */
+
+#define UB_RESOURCES	24
+
+#define UB_UNUSEDPRIVVM	(UB_RESOURCES + 0)
+#define UB_TMPFSPAGES	(UB_RESOURCES + 1)
+#define UB_SWAPPAGES	(UB_RESOURCES + 2)
+#define UB_HELDPAGES	(UB_RESOURCES + 3)
+
+struct ubparm {
+	/* 
+	 * A barrier over which resource allocations are failed gracefully.
+	 * If the amount of consumed memory is over the barrier further sbrk()
+	 * or mmap() calls fail, the existing processes are not killed. 
+	 */
+	unsigned long	barrier;
+	/* hard resource limit */
+	unsigned long	limit;
+	/* consumed resources */
+	unsigned long	held;
+	/* maximum amount of consumed resources through the last period */
+	unsigned long	maxheld;
+	/* minimum amount of consumed resources through the last period */
+	unsigned long	minheld;
+	/* count of failed charges */
+	unsigned long	failcnt;
+};
+
+/*
+ * Kernel internal part.
+ */
+
+#ifdef __KERNEL__
+
+#include <ub/ub_debug.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/threads.h>
+
+/*
+ * UB_MAXVALUE is essentially LONG_MAX declared in a cross-compiling safe form.
+ */
+#define UB_MAXVALUE	( (1UL << (sizeof(unsigned long)*8-1)) - 1)
+
+
+/*
+ *	Resource management structures
+ * Serialization issues:
+ *   beancounter list management is protected via ub_hash_lock
+ *   task pointers are set only for current task and only once
+ *   refcount is managed atomically
+ *   value and limit comparison and change are protected by per-ub spinlock
+ */
+
+struct page_beancounter;
+struct task_beancounter;
+struct sock_beancounter;
+
+struct page_private {
+	unsigned long		ubp_unused_privvmpages;
+	unsigned long		ubp_tmpfs_respages;
+	unsigned long		ubp_swap_pages;
+	unsigned long long	ubp_held_pages;
+};
+
+struct sock_private {
+	unsigned long		ubp_rmem_thres;
+	unsigned long		ubp_wmem_pressure;
+	unsigned long		ubp_maxadvmss;
+	unsigned long		ubp_rmem_pressure;
+#define UB_RMEM_EXPAND          0
+#define UB_RMEM_KEEP            1
+#define UB_RMEM_SHRINK          2
+	struct list_head	ubp_other_socks;
+	struct list_head	ubp_tcp_socks;
+	atomic_t		ubp_orphan_count;
+};
+
+struct ub_perfstat {
+	unsigned long unmap;
+	unsigned long swapin;
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	long	pages_charged;
+	long	vmalloc_charged;
+	long	pbcs;
+#endif
+} ____cacheline_aligned_in_smp;
+
+struct user_beancounter
+{
+	unsigned long		ub_magic;
+	atomic_t		ub_refcount;
+	struct			user_beancounter *ub_next;
+	spinlock_t		ub_lock;
+	uid_t			ub_uid;
+
+	struct ub_rate_info	ub_limit_rl;
+	int			ub_oom_noproc;
+
+	struct page_private	ppriv;
+#define ub_unused_privvmpages	ppriv.ubp_unused_privvmpages
+#define ub_tmpfs_respages	ppriv.ubp_tmpfs_respages
+#define ub_swap_pages		ppriv.ubp_swap_pages
+#define ub_held_pages		ppriv.ubp_held_pages
+	struct sock_private	spriv;
+#define ub_rmem_thres		spriv.ubp_rmem_thres
+#define ub_maxadvmss		spriv.ubp_maxadvmss
+#define ub_rmem_pressure	spriv.ubp_rmem_pressure
+#define ub_wmem_pressure	spriv.ubp_wmem_pressure
+#define ub_tcp_sk_list		spriv.ubp_tcp_socks
+#define ub_other_sk_list	spriv.ubp_other_socks
+#define ub_orphan_count		spriv.ubp_orphan_count
+
+	struct user_beancounter *parent;
+	void *private_data;
+
+	/* resources statistic and settings */
+	struct ubparm		ub_parms[UB_RESOURCES];
+	/* resources statistic for last interval */
+	struct ubparm		ub_store[UB_RESOURCES];
+
+	struct ub_perfstat	ub_stat[NR_CPUS];
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	struct list_head	ub_cclist;
+#endif
+};
+
+enum severity { UB_HARD, UB_SOFT, UB_FORCE };
+
+static inline int ub_barrier_hit(struct user_beancounter *ub, int resource)
+{
+	return ub->ub_parms[resource].held > ub->ub_parms[resource].barrier;
+}
+
+static inline int ub_hfbarrier_hit(struct user_beancounter *ub, int resource)
+{
+	return (ub->ub_parms[resource].held > 
+		((ub->ub_parms[resource].barrier) >> 1));
+}
+
+#ifndef CONFIG_USER_RESOURCE
+
+extern inline struct user_beancounter *get_beancounter_byuid
+		(uid_t uid, int create) { return NULL; }
+extern inline struct user_beancounter *get_beancounter
+		(struct user_beancounter *ub) { return NULL; }
+extern inline void put_beancounter(struct user_beancounter *ub) {;}
+
+static inline void ub_init_cache(unsigned long mempages) { };
+static inline void ub_init_ub0(void) { };
+
+#define get_ub0()	NULL
+
+#else /* CONFIG_USER_RESOURCE */
+
+/*
+ *  Charge/uncharge operations
+ */
+
+extern int __charge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val, enum severity strict);
+
+extern void __uncharge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val);
+
+extern void __put_beancounter(struct user_beancounter *ub);
+
+extern void uncharge_warn(struct user_beancounter *ub, int resource,
+		unsigned long val, unsigned long held);
+
+extern const char *ub_rnames[];
+/*
+ *	Put a beancounter reference
+ */
+
+static inline void put_beancounter(struct user_beancounter *ub)
+{
+	if (unlikely(ub == NULL))
+		return;
+
+	__put_beancounter(ub);
+}
+
+/*
+ *	Create a new beancounter reference
+ */
+extern struct user_beancounter *get_beancounter_byuid(uid_t uid, int create);
+
+static inline 
+struct user_beancounter *get_beancounter(struct user_beancounter *ub)
+{
+	if (unlikely(ub == NULL))
+		return NULL;
+
+	atomic_inc(&ub->ub_refcount);
+	return ub;
+}
+
+extern struct user_beancounter *get_subbeancounter_byid(
+		struct user_beancounter *,
+		int id, int create);
+extern struct user_beancounter *subbeancounter_findcreate(
+		struct user_beancounter *p, int id);
+
+extern struct user_beancounter ub0;
+
+extern void ub_init_cache(unsigned long);
+extern void ub_init_ub0(void);
+#define get_ub0()	(&ub0)
+
+extern void print_ub_uid(struct user_beancounter *ub, char *buf, int size);
+
+/*
+ *	Resource charging
+ * Change user's account and compare against limits
+ */
+
+static inline void ub_adjust_maxheld(struct user_beancounter *ub, int resource)
+{
+	if (ub->ub_parms[resource].maxheld < ub->ub_parms[resource].held)
+		ub->ub_parms[resource].maxheld = ub->ub_parms[resource].held;
+	if (ub->ub_parms[resource].minheld > ub->ub_parms[resource].held)
+		ub->ub_parms[resource].minheld = ub->ub_parms[resource].held;
+}
+
+#endif /* CONFIG_USER_RESOURCE */
+
+#include <ub/ub_decl.h>
+UB_DECLARE_FUNC(int, charge_beancounter(struct user_beancounter *ub,
+			int resource, unsigned long val, enum severity strict));
+UB_DECLARE_VOID_FUNC(uncharge_beancounter(struct user_beancounter *ub,
+			int resource, unsigned long val));
+
+UB_DECLARE_VOID_FUNC(charge_beancounter_notop(struct user_beancounter *ub,
+			int resource, unsigned long val));
+UB_DECLARE_VOID_FUNC(uncharge_beancounter_notop(struct user_beancounter *ub,
+			int resource, unsigned long val));
+
+#ifndef CONFIG_USER_RESOURCE_PROC
+static inline void ub_init_proc(void) { };
+#else
+extern void ub_init_proc(void);
+#endif
+
+#ifdef CONFIG_USER_RSS_ACCOUNTING
+extern void ub_init_pbc(void);
+#else
+static inline void ub_ini_pbc(void) { }
+#endif
+#endif /* __KERNEL__ */
+#endif /* _LINUX_BEANCOUNTER_H */
diff -uprN linux-2.6.16/include/ub/ub_dcache.h linux-2.6.16.ovz/include/ub/ub_dcache.h
--- linux-2.6.16/include/ub/ub_dcache.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_dcache.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,57 @@
+/*
+ *  include/ub/ub_dcache.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DCACHE_H_
+#define __UB_DCACHE_H_
+
+#include <ub/ub_decl.h>
+
+/*
+ * UB_DCACHESIZE accounting
+ */
+
+struct dentry_beancounter
+{
+	/*
+	 *  d_inuse =
+	 *         <number of external refs> +
+	 *         <number of 'used' childs>
+	 *
+	 * d_inuse == -1 means that dentry is unused
+	 * state change -1 => 0 causes charge
+	 * state change 0 => -1 causes uncharge
+	 */
+	atomic_t d_inuse;
+	/* charged size, including name length if name is not inline */
+	unsigned long d_ubsize;
+	struct user_beancounter *d_ub;
+};
+
+struct dentry;
+
+UB_DECLARE_FUNC(int, ub_dentry_alloc(struct dentry *d))
+UB_DECLARE_VOID_FUNC(ub_dentry_charge_nofail(struct dentry *d))
+UB_DECLARE_VOID_FUNC(ub_dentry_uncharge(struct dentry *d))
+
+#ifdef CONFIG_USER_RESOURCE
+UB_DECLARE_FUNC(int, ub_dentry_charge(struct dentry *d))
+#define ub_dget_testone(d)	(atomic_inc_and_test(&(d)->dentry_bc.d_inuse))
+#define ub_dput_testzero(d)	(atomic_add_negative(-1, &(d)->dentry_bc.d_inuse))
+#define INUSE_INIT		0
+#else
+#define ub_dentry_charge(d)	({			\
+			spin_unlock(&d->d_lock);	\
+			rcu_read_unlock();		\
+			0;				\
+		})
+#define ub_dget_testone(d)	(0)
+#define ub_dput_testzero(d)	(0)
+#endif
+#endif
diff -uprN linux-2.6.16/include/ub/ub_debug.h linux-2.6.16.ovz/include/ub/ub_debug.h
--- linux-2.6.16/include/ub/ub_debug.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_debug.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,95 @@
+/*
+ *  include/ub/ub_debug.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DEBUG_H_
+#define __UB_DEBUG_H_
+
+/*
+ * general debugging
+ */
+
+#define UBD_ALLOC	0x1
+#define UBD_CHARGE	0x2
+#define UBD_LIMIT	0x4
+#define UBD_TRACE	0x8
+
+/*
+ * ub_net debugging
+ */
+
+#define UBD_NET_SOCKET	0x10
+#define UBD_NET_SLEEP	0x20
+#define UBD_NET_SEND	0x40
+#define UBD_NET_RECV	0x80
+
+/*
+ * Main routines
+ */
+
+#define UB_DEBUG (0)
+#define DEBUG_RESOURCE (0ULL)
+
+#define ub_dbg_cond(__cond, __str, args...)				\
+	do { 								\
+		if ((__cond) != 0)					\
+			printk(__str, ##args);				\
+	} while(0)
+
+#define ub_debug(__section, __str, args...) 				\
+	ub_dbg_cond(UB_DEBUG & (__section), __str, ##args)
+
+#define ub_debug_resource(__resource, __str, args...)			\
+	ub_dbg_cond((UB_DEBUG & UBD_CHARGE) && 				\
+			(DEBUG_RESOURCE & (1 << (__resource))), 	\
+			__str, ##args)
+
+#if UB_DEBUG & UBD_TRACE
+#define ub_debug_trace(__cond, __b, __r)				\
+		do {							\
+			static struct ub_rate_info ri =	{ __b, __r };	\
+			if ((__cond) != 0 && ub_ratelimit(&ri))		\
+				dump_stack(); 				\
+		} while(0)
+#else
+#define ub_debug_trace(__cond, __burst, __rate)
+#endif
+
+#include <linux/config.h>
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+#include <linux/list.h>
+#include <linux/kmem_cache.h>
+
+struct user_beancounter;
+struct ub_cache_counter {
+	struct list_head ulist;
+	struct ub_cache_counter *next;
+	struct user_beancounter *ub;
+	kmem_cache_t *cachep;
+	unsigned long counter;
+};
+
+extern spinlock_t cc_lock;
+extern void init_cache_counters(void);
+extern void ub_free_counters(struct user_beancounter *);
+extern void ub_kmemcache_free(kmem_cache_t *cachep);
+
+struct vm_struct;
+extern void inc_vmalloc_charged(struct vm_struct *, int);
+extern void dec_vmalloc_charged(struct vm_struct *);
+#else
+#define init_cache_counters()		do { } while (0)
+#define inc_vmalloc_charged(vm, f)	do { } while (0)
+#define dec_vmalloc_charged(vm)		do { } while (0)
+#define ub_free_counters(ub)		do { } while (0)
+#define ub_kmemcache_free(cachep)	do { } while (0)
+#endif
+
+#endif
diff -uprN linux-2.6.16/include/ub/ub_decl.h linux-2.6.16.ovz/include/ub/ub_decl.h
--- linux-2.6.16/include/ub/ub_decl.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_decl.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,40 @@
+/*
+ *  include/ub/ub_decl.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DECL_H_
+#define __UB_DECL_H_
+
+#include <linux/config.h>
+
+/*
+ * Naming convension:
+ * ub_<section|object>_<operation>
+ */
+
+#ifdef CONFIG_USER_RESOURCE
+
+#define UB_DECLARE_FUNC(ret_type, decl)	extern ret_type decl;
+#define UB_DECLARE_VOID_FUNC(decl)	extern void decl;
+
+#else /* CONFIG_USER_RESOURCE */
+
+#define UB_DECLARE_FUNC(ret_type, decl)		\
+	static inline ret_type decl		\
+	{					\
+		return (ret_type)0;		\
+	}
+#define UB_DECLARE_VOID_FUNC(decl)		\
+	static inline void decl			\
+	{					\
+	}
+
+#endif /* CONFIG_USER_RESOURCE */
+
+#endif
diff -uprN linux-2.6.16/include/ub/ub_hash.h linux-2.6.16.ovz/include/ub/ub_hash.h
--- linux-2.6.16/include/ub/ub_hash.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_hash.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,41 @@
+/*
+ *  include/ub/ub_hash.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_UBHASH_H
+#define _LINUX_UBHASH_H
+
+#ifdef __KERNEL__
+
+#define UB_HASH_SIZE 256
+
+struct ub_hash_slot {
+	struct user_beancounter *ubh_beans;
+};
+
+extern struct ub_hash_slot ub_hash[];
+extern spinlock_t ub_hash_lock;
+
+#ifdef CONFIG_USER_RESOURCE
+
+/*
+ * Iterate over beancounters
+ * @__slot  - hash slot
+ * @__ubp - beancounter ptr
+ * Can use break :)
+ */
+#define for_each_beancounter(__slot, __ubp)				\
+	for (__slot = 0, __ubp = NULL; 					\
+		__slot < UB_HASH_SIZE && __ubp == NULL; __slot++)	\
+		 for (__ubp = ub_hash[__slot].ubh_beans; __ubp;		\
+				 __ubp = __ubp->ub_next)
+
+#endif /* CONFIG_USER_RESOURCE */
+#endif /* __KERNEL__ */
+#endif /* _LINUX_UBHASH_H */
diff -uprN linux-2.6.16/include/ub/ub_mem.h linux-2.6.16.ovz/include/ub/ub_mem.h
--- linux-2.6.16/include/ub/ub_mem.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_mem.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,76 @@
+/*
+ *  include/ub/ub_mem.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_SLAB_H_
+#define __UB_SLAB_H_
+
+#include <linux/config.h>
+#include <linux/kmem_slab.h>
+#include <ub/beancounter.h>
+#include <ub/ub_decl.h>
+
+/*
+ * UB_KMEMSIZE accounting
+ */
+
+#ifdef CONFIG_UBC_DEBUG_ITEMS
+#define CHARGE_ORDER(__o)		(1 << __o)
+#define CHARGE_SIZE(__s)		1
+#else
+#define CHARGE_ORDER(__o)		(PAGE_SIZE << (__o))
+#define CHARGE_SIZE(__s)		(__s)
+#endif
+
+#define page_ub(__page)	((__page)->bc.page_ub)
+
+struct mm_struct;
+struct page;
+
+UB_DECLARE_FUNC(struct user_beancounter *, slab_ub(void *obj))
+UB_DECLARE_FUNC(struct user_beancounter *, vmalloc_ub(void *obj))
+UB_DECLARE_FUNC(struct user_beancounter *, mem_ub(void *obj))
+
+UB_DECLARE_FUNC(int, ub_page_charge(struct page *page, int order, int mask))
+UB_DECLARE_VOID_FUNC(ub_page_uncharge(struct page *page, int order))
+UB_DECLARE_FUNC(int, ub_slab_charge(void *objp, int flags))
+UB_DECLARE_VOID_FUNC(ub_slab_uncharge(void *obj))
+
+#define slab_ubcs(cachep, slabp) ((struct user_beancounter **)\
+		(ALIGN((unsigned long)(slab_bufctl(slabp) + (cachep)->num),\
+		       sizeof(void *))))
+
+#ifdef CONFIG_USER_RESOURCE
+extern struct user_beancounter *ub_select_worst(long *);
+
+/* mm/slab.c needed stuff */
+#define UB_ALIGN(flags)		(flags & SLAB_UBC ? sizeof(void *) : 1)
+#define UB_EXTRA(flags)		(flags & SLAB_UBC ? sizeof(void *) : 0)
+#define set_cache_objuse(cachep)	do {				\
+		(cachep)->objuse = ((PAGE_SIZE << (cachep)->gfporder) +	\
+				(cachep)->num - 1) / (cachep)->num;	\
+		if (!OFF_SLAB(cachep))					\
+			break;						\
+		(cachep)->objuse += ((cachep)->slabp_cache->objuse +	\
+				(cachep)->num - 1) / (cachep)->num;	\
+	} while (0)
+#define init_slab_ubps(cachep, slabp)	do {				\
+		if (!((cachep)->flags & SLAB_UBC))			\
+			break;						\
+		memset(slab_ubcs(cachep, slabp), 0,			\
+				(cachep)->num * sizeof(void *));	\
+	} while (0)
+#define kmem_obj_memusage(o)	(virt_to_cache(o)->objuse)
+#else
+#define UB_ALIGN(flags)		1
+#define UB_EXTRA(flags)		0
+#define set_cache_objuse(c)	do { } while (0)
+#define init_slab_ubps(c, s)	do { } while (0)
+#endif
+#endif /* __UB_SLAB_H_ */
diff -uprN linux-2.6.16/include/ub/ub_misc.h linux-2.6.16.ovz/include/ub/ub_misc.h
--- linux-2.6.16/include/ub/ub_misc.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_misc.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,54 @@
+/*
+ *  include/ub/ub_misc.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_MISC_H_
+#define __UB_MISC_H_
+
+#include <ub/ub_decl.h>
+
+struct tty_struct;
+struct file;
+struct file_lock;
+struct sigqueue;
+
+UB_DECLARE_FUNC(int, ub_file_charge(struct file *f))
+UB_DECLARE_VOID_FUNC(ub_file_uncharge(struct file *f))
+UB_DECLARE_FUNC(int, ub_flock_charge(struct file_lock *fl, int hard))
+UB_DECLARE_VOID_FUNC(ub_flock_uncharge(struct file_lock *fl))
+UB_DECLARE_FUNC(int, ub_siginfo_charge(struct sigqueue *q,
+			struct user_beancounter *ub))
+UB_DECLARE_VOID_FUNC(ub_siginfo_uncharge(struct sigqueue *q))
+UB_DECLARE_FUNC(int, ub_task_charge(struct task_struct *parent,
+			struct task_struct *task))
+UB_DECLARE_VOID_FUNC(ub_task_uncharge(struct task_struct *task))
+UB_DECLARE_FUNC(int, ub_pty_charge(struct tty_struct *tty))
+UB_DECLARE_VOID_FUNC(ub_pty_uncharge(struct tty_struct *tty))
+
+#ifdef CONFIG_USER_RESOURCE
+#define set_flock_charged(fl)	do { (fl)->fl_charged = 1; } while (0)
+#define unset_flock_charged(fl)	do {		\
+		WARN_ON((fl)->fl_charged == 0);	\
+		(fl)->fl_charged = 0;		\
+	} while (0)
+#define set_mm_ub(mm, tsk)	do {				\
+		(mm)->mm_ub = get_beancounter(tsk ? 		\
+			tsk->task_bc.task_ub : get_exec_ub());	\
+	} while (0)
+#define put_mm_ub(mm)		do {				\
+		put_beancounter((mm)->mm_ub);			\
+		(mm)->mm_ub = NULL;				\
+	} while (0)
+#else
+#define set_flock_charged(fl)	do { } while (0)
+#define ubset_flock_charged(fl)	do { } while (0)
+#define set_mm_ub(mm, tsk)	do { } while (0)
+#define put_mm_ub(mm)		do { } while (0)
+#endif
+#endif
diff -uprN linux-2.6.16/include/ub/ub_net.h linux-2.6.16.ovz/include/ub/ub_net.h
--- linux-2.6.16/include/ub/ub_net.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_net.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,141 @@
+/*
+ *  include/ub/ub_net.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_NET_H_
+#define __UB_NET_H_
+
+/*
+ * UB_NUMXXXSOCK, UB_XXXBUF accounting
+ */
+
+#include <ub/ub_decl.h>
+#include <ub/ub_sk.h>
+
+#define bid2sid(__bufid) \
+	((__bufid) == UB_TCPSNDBUF ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK)
+
+#define SOCK_MIN_UBCSPACE ((int)((2048 - sizeof(struct skb_shared_info)) & \
+			~(SMP_CACHE_BYTES-1)))
+#define SOCK_MIN_UBCSPACE_CH skb_charge_size(SOCK_MIN_UBCSPACE)
+
+
+#define IS_TCP_SOCK(__family, __type) \
+		(((__family) == PF_INET || (__family) == PF_INET6) && (__type) == SOCK_STREAM)
+
+UB_DECLARE_FUNC(int, ub_sock_charge(struct sock *sk, int family, int type))
+UB_DECLARE_FUNC(int, ub_tcp_sock_charge(struct sock *sk)) 
+UB_DECLARE_FUNC(int, ub_other_sock_charge(struct sock *sk))
+UB_DECLARE_VOID_FUNC(ub_sock_uncharge(struct sock *sk))
+UB_DECLARE_VOID_FUNC(ub_skb_uncharge(struct sk_buff *skb))
+UB_DECLARE_FUNC(int, ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask))
+UB_DECLARE_VOID_FUNC(ub_skb_free_bc(struct sk_buff *skb))
+UB_DECLARE_FUNC(int, ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk))
+UB_DECLARE_FUNC(int, ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb))
+UB_DECLARE_VOID_FUNC(ub_sock_snd_queue_add(struct sock *sk, int resource, 
+			unsigned long size))
+UB_DECLARE_FUNC(long, ub_sock_wait_for_space(struct sock *sk, long timeo, 
+			unsigned long size))
+
+UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb))
+UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge_forced(struct sock *sk,
+						struct sk_buff *skb))
+UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb))
+UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge_forced(struct sock *sk,
+						struct sk_buff *skb))
+
+/* Charge size */
+static inline unsigned long skb_charge_datalen(unsigned long chargesize)
+{
+#ifdef CONFIG_USER_RESOURCE
+	unsigned long slabsize;
+
+	chargesize -= sizeof(struct sk_buff);
+	slabsize = 64;
+	do { 
+		slabsize <<= 1; 
+	} while (slabsize <= chargesize);
+
+	slabsize >>= 1;
+	return (slabsize - sizeof(struct skb_shared_info)) &
+		~(SMP_CACHE_BYTES-1);
+#else
+	return 0;
+#endif
+}
+
+static inline unsigned long skb_charge_size_gen(unsigned long size)
+{ 
+#ifdef CONFIG_USER_RESOURCE
+	unsigned int slabsize;
+
+	size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
+	slabsize = 32; /* min size is 64 because of skb_shared_info */
+	do { 
+		slabsize <<= 1; 
+	} while (slabsize < size);
+
+	return slabsize + sizeof(struct sk_buff);
+#else
+	return 0;
+#endif
+
+}
+	
+static inline unsigned long skb_charge_size_const(unsigned long size)
+{
+#ifdef CONFIG_USER_RESOURCE
+	unsigned int ret;
+	if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 64)
+		ret = 64 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 128)
+		ret = 128 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 256)
+		ret = 256 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 512)
+		ret = 512 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 1024)
+		ret = 1024 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 2048)
+		ret = 2048 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 4096)
+		ret = 4096 + sizeof(struct sk_buff);
+	else
+		ret = skb_charge_size_gen(size);
+	return ret;
+#else
+	return 0;
+#endif
+}
+
+
+#define skb_charge_size(__size)			\
+	(__builtin_constant_p(__size)	?	\
+	 skb_charge_size_const(__size)	:	\
+	 skb_charge_size_gen(__size))
+
+UB_DECLARE_FUNC(int, skb_charge_fullsize(struct sk_buff *skb))
+UB_DECLARE_VOID_FUNC(ub_skb_set_charge(struct sk_buff *skb, 
+			struct sock *sk, unsigned long size, int res))
+
+/* Poll reserv */
+UB_DECLARE_FUNC(int, ub_sock_makewres_other(struct sock *sk, unsigned long sz))
+UB_DECLARE_FUNC(int, ub_sock_makewres_tcp(struct sock *sk, unsigned long size))
+UB_DECLARE_FUNC(int, ub_sock_getwres_other(struct sock *sk, unsigned long size))
+UB_DECLARE_FUNC(int, ub_sock_getwres_tcp(struct sock *sk, unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_sock_retwres_other(struct sock *sk, unsigned long size,
+			unsigned long ressize))
+UB_DECLARE_VOID_FUNC(ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
+			unsigned long ressize))
+UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_other(struct sock *sk, 
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz))
+UB_DECLARE_VOID_FUNC(ub_sock_sndqueuedel(struct sock *sk))
+
+#endif
diff -uprN linux-2.6.16/include/ub/ub_orphan.h linux-2.6.16.ovz/include/ub/ub_orphan.h
--- linux-2.6.16/include/ub/ub_orphan.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_orphan.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,56 @@
+/*
+ *  include/ub/ub_orphan.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_ORPHAN_H_
+#define __UB_ORPHAN_H_
+
+#include <net/tcp.h>
+
+#include "ub/beancounter.h"
+#include "ub/ub_net.h"
+
+
+static inline atomic_t *__ub_get_orphan_count_ptr(struct sock *sk)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return &sock_bc(sk)->ub->ub_orphan_count;
+#endif
+	return sk->sk_prot->orphan_count;
+}
+
+static inline void ub_inc_orphan_count(struct sock *sk)
+{
+	atomic_inc(__ub_get_orphan_count_ptr(sk));
+}
+
+static inline void ub_dec_orphan_count(struct sock *sk)
+{
+	atomic_dec(__ub_get_orphan_count_ptr(sk));
+}
+
+static inline int ub_get_orphan_count(struct sock *sk)
+{
+	return atomic_read(__ub_get_orphan_count_ptr(sk));
+}
+
+extern int __ub_too_many_orphans(struct sock *sk, int count);
+static inline int ub_too_many_orphans(struct sock *sk, int count)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (__ub_too_many_orphans(sk, count))
+		return 1;
+#endif
+	return (ub_get_orphan_count(sk) > sysctl_tcp_max_orphans ||
+		(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+		 atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]));
+}
+
+#endif
diff -uprN linux-2.6.16/include/ub/ub_page.h linux-2.6.16.ovz/include/ub/ub_page.h
--- linux-2.6.16/include/ub/ub_page.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_page.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,48 @@
+/*
+ *  include/ub/ub_page.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_PAGE_H_
+#define __UB_PAGE_H_
+
+#include <linux/config.h>
+
+/*
+ * Page_beancounters
+ */
+
+struct page;
+struct user_beancounter;
+
+#define PB_MAGIC 0x62700001UL
+
+struct page_beancounter {
+	unsigned long pb_magic;
+	struct page *page;
+	struct user_beancounter *ub;
+	struct page_beancounter *next_hash;
+	unsigned refcount;
+	struct list_head page_list;
+};
+
+#define PB_REFCOUNT_BITS 24
+#define PB_SHIFT_GET(c) ((c) >> PB_REFCOUNT_BITS)
+#define PB_SHIFT_INC(c) ((c) += (1 << PB_REFCOUNT_BITS))
+#define PB_SHIFT_DEC(c) ((c) -= (1 << PB_REFCOUNT_BITS))
+#define PB_COUNT_GET(c) ((c) & ((1 << PB_REFCOUNT_BITS) - 1))
+#define PB_COUNT_INC(c) ((c)++)
+#define PB_COUNT_DEC(c) ((c)--)
+#define PB_REFCOUNT_MAKE(s, c) (((s) << PB_REFCOUNT_BITS) + (c))
+
+#define page_pbc(__page)        ((__page)->bc.page_pb)
+
+struct address_space;
+extern int is_shmem_mapping(struct address_space *);
+
+#endif
diff -uprN linux-2.6.16/include/ub/ub_sk.h linux-2.6.16.ovz/include/ub/ub_sk.h
--- linux-2.6.16/include/ub/ub_sk.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_sk.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,43 @@
+/*
+ *  include/ub/ub_sk.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_SK_H_
+#define __UB_SK_H_
+
+#include <linux/config.h>
+#include <ub/ub_task.h>
+
+struct sock;
+struct sk_buff;
+
+struct skb_beancounter {
+	struct user_beancounter *ub;
+	unsigned long charged:27, resource:5;
+};
+
+struct sock_beancounter {
+	/*
+	 * already charged for future sends, to make poll work;
+	 * changes are protected by bc spinlock, read is under socket
+	 * semaphore for sends and unprotected in poll
+	 */
+	unsigned long           poll_reserv;
+	unsigned long           ub_waitspc;     /* space waiting for */
+	unsigned long           ub_wcharged;
+	struct list_head        ub_sock_list;
+	struct user_beancounter *ub;
+};
+
+#define sock_bc(__sk)		(&(__sk)->sk_bc)
+#define skb_bc(__skb)		(&(__skb)->skb_bc)
+#define skbc_sock(__skbc)	(container_of(__skbc, struct sock, sk_bc))
+#define sock_has_ubc(__sk)	(sock_bc(__sk)->ub != NULL)
+
+#endif
diff -uprN linux-2.6.16/include/ub/ub_stat.h linux-2.6.16.ovz/include/ub/ub_stat.h
--- linux-2.6.16/include/ub/ub_stat.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_stat.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,70 @@
+/*
+ *  include/ub/ub_stat.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_STAT_H_
+#define __UB_STAT_H_
+
+/* sys_ubstat commands list */
+#define UBSTAT_READ_ONE			0x010000
+#define UBSTAT_READ_ALL			0x020000
+#define UBSTAT_READ_FULL		0x030000
+#define UBSTAT_UBLIST			0x040000
+#define UBSTAT_UBPARMNUM		0x050000
+#define UBSTAT_GETTIME			0x060000
+
+#define UBSTAT_CMD(func)		((func) & 0xF0000)
+#define UBSTAT_PARMID(func)		((func) & 0x0FFFF)
+
+#define TIME_MAX_SEC		(LONG_MAX / HZ)
+#define TIME_MAX_JIF		(TIME_MAX_SEC * HZ)
+
+typedef unsigned long ubstattime_t;
+
+typedef struct {
+	ubstattime_t	start_time;
+	ubstattime_t	end_time;
+	ubstattime_t	cur_time;
+} ubgettime_t;
+
+typedef struct {
+	long		maxinterval;
+	int		signum;
+} ubnotifrq_t;
+
+typedef struct {
+	unsigned long	maxheld;
+	unsigned long	failcnt;
+} ubstatparm_t;
+
+typedef struct {
+	unsigned long	barrier;
+	unsigned long	limit;
+	unsigned long	held;
+	unsigned long	maxheld;
+	unsigned long	minheld;
+	unsigned long	failcnt;
+	unsigned long __unused1;
+	unsigned long __unused2;
+} ubstatparmf_t;
+
+typedef struct {
+	ubstattime_t	start_time;
+	ubstattime_t	end_time;
+	ubstatparmf_t	param[0];
+} ubstatfull_t;
+
+#ifdef __KERNEL__
+struct ub_stat_notify {
+	struct list_head	list;
+	struct task_struct	*task;
+	int			signum;
+};
+#endif
+#endif
diff -uprN linux-2.6.16/include/ub/ub_task.h linux-2.6.16.ovz/include/ub/ub_task.h
--- linux-2.6.16/include/ub/ub_task.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_task.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,49 @@
+/*
+ *  include/ub/ub_task.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_TASK_H_
+#define __UB_TASK_H_
+
+#include <linux/config.h>
+
+struct user_beancounter;
+
+
+#ifdef CONFIG_USER_RESOURCE
+
+struct task_beancounter {
+	struct user_beancounter	*exec_ub;
+	struct user_beancounter	*task_ub;
+	struct user_beancounter *fork_sub;
+	void *task_fnode, *task_freserv;
+	unsigned long oom_generation;
+	unsigned long task_data[4];
+};
+
+#define get_exec_ub()		(current->task_bc.exec_ub)
+#define get_task_ub(__task)	((__task)->task_bc.task_ub)
+#define set_exec_ub(__newub)		\
+({					\
+	struct user_beancounter *old;	\
+	struct task_beancounter *tbc;	\
+	tbc = &current->task_bc;	\
+	old = tbc->exec_ub;		\
+	tbc->exec_ub = __newub;		\
+	old;				\
+})
+
+#else /* CONFIG_USER_RESOURCE */
+
+#define get_exec_ub()		(NULL)
+#define get_task_ub(task)	(NULL)
+#define set_exec_ub(__ub)	(NULL)
+
+#endif /* CONFIG_USER_RESOURCE */
+#endif /* __UB_TASK_H_ */
diff -uprN linux-2.6.16/include/ub/ub_tcp.h linux-2.6.16.ovz/include/ub/ub_tcp.h
--- linux-2.6.16/include/ub/ub_tcp.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_tcp.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,79 @@
+/*
+ *  include/ub/ub_tcp.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_TCP_H_
+#define __UB_TCP_H_
+
+/*
+ * UB_NUMXXXSOCK, UB_XXXBUF accounting
+ */
+
+#include <ub/ub_sk.h>
+#include <ub/beancounter.h>
+
+static inline void ub_tcp_update_maxadvmss(struct sock *sk)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (!sock_has_ubc(sk))
+		return;
+	if (sock_bc(sk)->ub->ub_maxadvmss >= tcp_sk(sk)->advmss)
+		return;
+
+	sock_bc(sk)->ub->ub_maxadvmss =
+		skb_charge_size(MAX_HEADER + sizeof(struct iphdr)
+				+ sizeof(struct tcphdr)	+ tcp_sk(sk)->advmss);
+#endif
+}
+
+static inline int ub_tcp_rmem_allows_expand(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 0;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk)) {
+		struct user_beancounter *ub;
+
+		ub = sock_bc(sk)->ub;
+		if (ub->ub_rmem_pressure == UB_RMEM_EXPAND)
+			return 1;
+		if (ub->ub_rmem_pressure == UB_RMEM_SHRINK)
+			return 0;
+		return sk->sk_rcvbuf <= ub->ub_rmem_thres;
+	}
+#endif
+	return 1;
+}
+
+static inline int ub_tcp_memory_pressure(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return sock_bc(sk)->ub->ub_rmem_pressure != UB_RMEM_EXPAND;
+#endif
+	return 0;
+}
+
+static inline int ub_tcp_shrink_rcvbuf(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return sock_bc(sk)->ub->ub_rmem_pressure == UB_RMEM_SHRINK;
+#endif
+	return 0;
+}
+
+UB_DECLARE_FUNC(int, ub_sock_tcp_chargepage(struct sock *sk))
+UB_DECLARE_VOID_FUNC(ub_sock_tcp_detachpage(struct sock *sk))
+
+#endif
diff -uprN linux-2.6.16/include/ub/ub_vmpages.h linux-2.6.16.ovz/include/ub/ub_vmpages.h
--- linux-2.6.16/include/ub/ub_vmpages.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/include/ub/ub_vmpages.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,167 @@
+/*
+ *  include/ub/ub_vmpages.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_PAGES_H_
+#define __UB_PAGES_H_
+
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <ub/beancounter.h>
+#include <ub/ub_decl.h>
+
+/*
+ * Check whether vma has private or copy-on-write mapping.
+ * Should match checks in ub_protected_charge().
+ */
+#define VM_UB_PRIVATE(__flags, __file)					\
+		( ((__flags) & VM_WRITE) ?				\
+			(__file) == NULL || !((__flags) & VM_SHARED) :	\
+			0						\
+		)
+
+/* Mprotect charging result */
+#define PRIVVM_ERROR		-1
+#define PRIVVM_NO_CHARGE	 0 /* UB_DECLARE_FUNC retval with ubc off */
+#define PRIVVM_TO_PRIVATE	 1
+#define PRIVVM_TO_SHARED	 2
+
+UB_DECLARE_FUNC(int, ub_protected_charge(struct mm_struct *mm,
+			unsigned long size,
+			unsigned long newflags,
+			struct vm_area_struct *vma))
+
+UB_DECLARE_VOID_FUNC(ub_unused_privvm_add(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			unsigned long num))
+#define ub_unused_privvm_inc(mm, vma)	ub_unused_privvm_add(mm, vma, 1)
+UB_DECLARE_VOID_FUNC(ub_unused_privvm_sub(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			unsigned long num))
+#define ub_unused_privvm_dec(mm, vma)	ub_unused_privvm_sub(mm, vma, 1)
+
+UB_DECLARE_VOID_FUNC(__ub_unused_privvm_dec(struct mm_struct *mm,
+			long sz))
+
+UB_DECLARE_FUNC(int, ub_memory_charge(struct mm_struct *mm,
+			unsigned long size,
+			unsigned vm_flags,
+			struct file *vm_file,
+			int strict))
+UB_DECLARE_VOID_FUNC(ub_memory_uncharge(struct mm_struct *mm,
+			unsigned long size,
+			unsigned vm_flags,
+			struct file *vm_file))
+
+struct shmem_inode_info;
+UB_DECLARE_FUNC(int, ub_shmpages_charge(struct shmem_inode_info *i,
+			unsigned long sz))
+UB_DECLARE_VOID_FUNC(ub_shmpages_uncharge(struct shmem_inode_info *i,
+			unsigned long sz))
+UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_inc(struct shmem_inode_info *shi))
+UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
+			unsigned long size))
+#define ub_tmpfs_respages_dec(shi)	ub_tmpfs_respages_sub(shi, 1)
+
+#ifdef CONFIG_USER_RESOURCE
+#define shmi_ub_set(shi, ub)	do {			\
+		(shi)->shmi_ub = get_beancounter(ub);	\
+	} while (0)
+#define shmi_ub_put(shi)	do {			\
+		put_beancounter((shi)->shmi_ub);	\
+		(shi)->shmi_ub = NULL;			\
+	} while (0)
+#else
+#define shmi_ub_set(shi, ub)	do { } while (0)
+#define shmi_ub_put(shi)	do { } while (0)
+#endif
+
+UB_DECLARE_FUNC(int, ub_locked_charge(struct mm_struct *mm,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_locked_uncharge(struct mm_struct *mm,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_lockedshm_charge(struct shmem_inode_info *shi,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_lockedshm_uncharge(struct shmem_inode_info *shi,
+			unsigned long size))
+
+UB_DECLARE_FUNC(unsigned long, pages_in_vma_range(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long end))
+UB_DECLARE_VOID_FUNC(warn_bad_rss(struct vm_area_struct *vma,
+			unsigned long freed))
+#define pages_in_vma(vma)	(pages_in_vma_range(vma, \
+			vma->vm_start, vma->vm_end))
+
+#define UB_PAGE_WEIGHT_SHIFT 24
+#define UB_PAGE_WEIGHT (1 << UB_PAGE_WEIGHT_SHIFT)
+
+struct page_beancounter;
+#define PBC_COPY_SAME	((struct page_beancounter *) 1)
+
+/* Mprotect charging result */
+#define PRIVVM_ERROR		-1
+#define PRIVVM_NO_CHARGE	0
+#define PRIVVM_TO_PRIVATE	1
+#define PRIVVM_TO_SHARED	2
+
+extern void fastcall __ub_update_physpages(struct user_beancounter *ub);
+extern void fastcall __ub_update_oomguarpages(struct user_beancounter *ub);
+extern void fastcall __ub_update_privvm(struct user_beancounter *ub);
+
+#ifdef CONFIG_USER_RSS_ACCOUNTING
+#define PB_DECLARE_FUNC(ret, decl)	UB_DECLARE_FUNC(ret, decl)
+#define PB_DECLARE_VOID_FUNC(decl)	UB_DECLARE_VOID_FUNC(decl)
+#else
+#define PB_DECLARE_FUNC(ret, decl)	static inline ret decl {return (ret)0;}
+#define PB_DECLARE_VOID_FUNC(decl)	static inline void decl { }
+#endif
+
+PB_DECLARE_FUNC(int, pb_alloc(struct page_beancounter **pbc))
+PB_DECLARE_FUNC(int, pb_alloc_list(struct page_beancounter **pbc, int num))
+PB_DECLARE_FUNC(int, pb_alloc_all(struct page_beancounter **pbc))
+PB_DECLARE_VOID_FUNC(pb_add_ref(struct page *page,
+			struct mm_struct *mm,
+			struct page_beancounter **pbc))
+PB_DECLARE_VOID_FUNC(pb_dup_ref(struct page *page, 
+			struct mm_struct *mm, 
+			struct page_beancounter **pbc))
+PB_DECLARE_VOID_FUNC(pb_free_list(struct page_beancounter **pb))
+PB_DECLARE_VOID_FUNC(pb_free(struct page_beancounter **pb))
+PB_DECLARE_VOID_FUNC(pb_remove_ref(struct page *page, 
+			struct mm_struct *mm))
+
+PB_DECLARE_FUNC(struct user_beancounter *, pb_grab_page_ub(struct page *page))
+#endif
+
+#ifdef CONFIG_USER_SWAP_ACCOUNTING
+#define SWP_DECLARE_FUNC(ret, decl)	UB_DECLARE_FUNC(ret, decl)
+#define SWP_DECLARE_VOID_FUNC(decl)	UB_DECLARE_VOID_FUNC(decl)
+#else
+#define SWP_DECLARE_FUNC(ret, decl)	static inline ret decl {return (ret)0;}
+#define SWP_DECLARE_VOID_FUNC(decl)	static inline void decl { }
+#endif
+
+struct swap_info_struct;
+SWP_DECLARE_FUNC(int, ub_swap_init(struct swap_info_struct *si, pgoff_t n))
+SWP_DECLARE_VOID_FUNC(ub_swap_fini(struct swap_info_struct *si))
+SWP_DECLARE_VOID_FUNC(ub_swapentry_inc(struct swap_info_struct *si, pgoff_t n,
+			struct user_beancounter *ub))
+SWP_DECLARE_VOID_FUNC(ub_swapentry_dec(struct swap_info_struct *si, pgoff_t n))
+
+#ifdef CONFIG_USER_RESOURCE
+#define ub_unmap_inc(mm)	do { 					\
+		(mm)->mm_ub->ub_stat[smp_processor_id()].unmap++;	\
+	} while (0)
+#define ub_swapin_inc(mm)	do {					\
+		(mm)->mm_ub->ub_stat[smp_processor_id()].swapin++;	\
+	} while (0)
+#else
+#define ub_unmap_inc(mm)	do { } while (0)
+#define ub_swapin_inc(mm)	do { } while (0)
+#endif
diff -uprN linux-2.6.16/init/calibrate.c linux-2.6.16.ovz/init/calibrate.c
--- linux-2.6.16/init/calibrate.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/init/calibrate.c	2006-07-05 08:34:56.000000000 -0400
@@ -7,6 +7,7 @@
 #include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/module.h>
 
 #include <asm/timex.h>
 
@@ -105,6 +106,60 @@ static unsigned long __devinit calibrate
 static unsigned long __devinit calibrate_delay_direct(void) {return 0;}
 #endif
 
+unsigned long cycles_per_jiffy, cycles_per_clock;
+
+static __devinit void calibrate_cycles(void)
+{
+	unsigned long ticks;
+	cycles_t time;
+
+	ticks = jiffies;
+	while (ticks == jiffies)
+		/* nothing */;
+	time = get_cycles();
+	ticks = jiffies;
+	while (ticks == jiffies)
+		/* nothing */;
+
+	time = get_cycles() - time;
+	cycles_per_jiffy = time;
+	if ((time >> 32) != 0) {
+		printk("CPU too fast! timings are incorrect\n");
+		cycles_per_jiffy = -1;
+	}
+}
+
+EXPORT_SYMBOL(cycles_per_jiffy);
+EXPORT_SYMBOL(cycles_per_clock);
+
+static __devinit void calc_cycles_per_jiffy(void)
+{
+#if defined(__i386__)
+	extern unsigned long fast_gettimeoffset_quotient;
+	unsigned long low, high;
+
+	if (fast_gettimeoffset_quotient != 0) {
+		__asm__("divl %2"
+				:"=a" (low), "=d" (high)
+				:"r" (fast_gettimeoffset_quotient),
+				"0" (0), "1" (1000000/HZ));
+
+		cycles_per_jiffy = low;
+	}
+#endif
+	if (cycles_per_jiffy == 0)
+		calibrate_cycles();
+
+	if (cycles_per_jiffy == 0) {
+		printk(KERN_WARNING "Cycles are stuck! "
+				"Some VPS statistics will not be available.");
+		/* to prevent division by zero in cycles_to_(clocks|jiffies) */
+		cycles_per_jiffy = 1;
+		cycles_per_clock = 1;
+	} else
+		cycles_per_clock = cycles_per_jiffy * (HZ / CLOCKS_PER_SEC);
+}
+
 /*
  * This is the number of bits of precision for the loops_per_jiffy.  Each
  * bit takes on average 1.5/HZ seconds.  This (like the original) is a little
@@ -170,4 +225,5 @@ void __devinit calibrate_delay(void)
 			loops_per_jiffy);
 	}
 
+	calc_cycles_per_jiffy();
 }
diff -uprN linux-2.6.16/init/main.c linux-2.6.16.ovz/init/main.c
--- linux-2.6.16/init/main.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/init/main.c	2006-07-05 08:34:56.000000000 -0400
@@ -48,6 +48,8 @@
 #include <linux/mempolicy.h>
 #include <linux/key.h>
 
+#include <ub/beancounter.h>
+
 #include <asm/io.h>
 #include <asm/bugs.h>
 #include <asm/setup.h>
@@ -80,6 +82,7 @@ extern void sbus_init(void);
 extern void sysctl_init(void);
 extern void signals_init(void);
 extern void buffer_init(void);
+extern void fairsched_init_late(void);
 extern void pidhash_init(void);
 extern void pidmap_init(void);
 extern void prio_tree_init(void);
@@ -104,6 +107,24 @@ extern void tc_init(void);
 enum system_states system_state;
 EXPORT_SYMBOL(system_state);
 
+#ifdef CONFIG_VE
+extern void init_ve_system(void);
+extern void prepare_ve0_process(struct task_struct *tsk);
+extern void prepare_ve0_proc_root(void);
+extern void prepare_ve0_sysctl(void);
+#else
+#define init_ve_system()		do { } while (0)
+#define prepare_ve0_process(tsk)	do { } while (0)
+#define prepare_ve0_proc_root()		do { } while (0)
+#define prepare_ve0_sysctl()		do { } while (0)
+#endif
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+extern void prepare_ve0_loopback(void);
+#else
+#define prepare_ve0_loopback()		do { } while (0)
+#endif
+
 /*
  * Boot command-line arguments
  */
@@ -447,6 +468,10 @@ asmlinkage void __init start_kernel(void
  * enable them
  */
 	lock_kernel();
+	/*
+	 * Prepare ub0 to account early allocations if any
+	 */
+	ub_init_ub0();
 	page_address_init();
 	printk(KERN_NOTICE);
 	printk(linux_banner);
@@ -459,6 +484,8 @@ asmlinkage void __init start_kernel(void
 	 */
 	smp_prepare_boot_cpu();
 
+	prepare_ve0_process(&init_task);
+
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
@@ -524,6 +551,7 @@ asmlinkage void __init start_kernel(void
 #endif
 	fork_init(num_physpages);
 	proc_caches_init();
+	ub_init_cache(num_physpages);
 	buffer_init();
 	unnamed_dev_init();
 	key_init();
@@ -534,7 +562,10 @@ asmlinkage void __init start_kernel(void
 	/* rootfs populating might need page-writeback */
 	page_writeback_init();
 #ifdef CONFIG_PROC_FS
+	prepare_ve0_proc_root();
+	prepare_ve0_sysctl();
 	proc_root_init();
+	ub_init_proc();
 #endif
 	cpuset_init();
 
@@ -542,6 +573,10 @@ asmlinkage void __init start_kernel(void
 
 	acpi_early_init(); /* before LAPIC and SMP init */
 
+#ifdef CONFIG_USER_RSS_ACCOUNTING
+	ub_init_pbc();
+#endif
+
 	/* Do the rest non-__init'ed, we're now alive */
 	rest_init();
 }
@@ -603,6 +638,9 @@ static void __init do_initcalls(void)
  */
 static void __init do_basic_setup(void)
 {
+	prepare_ve0_loopback();
+	init_ve_system();
+
 	/* drivers will send hotplug events */
 	init_workqueues();
 	usermodehelper_init();
@@ -618,7 +656,7 @@ static void __init do_basic_setup(void)
 static void do_pre_smp_initcalls(void)
 {
 	extern int spawn_ksoftirqd(void);
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
 	extern int migration_init(void);
 
 	migration_init();
@@ -674,6 +712,12 @@ static int init(void * unused)
 
 	fixup_cpu_present_map();
 	smp_init();
+
+	/* 
+	 * This should be done after all cpus are known to
+	 * be online.  smp_init gives us confidence in it.
+	 */
+	fairsched_init_late();
 	sched_init_smp();
 
 	cpuset_init_smp();
diff -uprN linux-2.6.16/init/version.c linux-2.6.16.ovz/init/version.c
--- linux-2.6.16/init/version.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/init/version.c	2006-07-05 08:34:56.000000000 -0400
@@ -28,6 +28,12 @@ struct new_utsname system_utsname = {
 
 EXPORT_SYMBOL(system_utsname);
 
+struct new_utsname virt_utsname = {
+	/* we need only this field */
+	.release        = UTS_RELEASE,
+};
+EXPORT_SYMBOL(virt_utsname);
+
 const char linux_banner[] =
 	"Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 	LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
diff -uprN linux-2.6.16/ipc/mqueue.c linux-2.6.16.ovz/ipc/mqueue.c
--- linux-2.6.16/ipc/mqueue.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/ipc/mqueue.c	2006-07-05 08:34:56.000000000 -0400
@@ -639,7 +639,8 @@ static int oflag2acc[O_ACCMODE] = { MAY_
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
+	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE],
+			       NULL, NULL)) {
 		dput(dentry);
 		mntput(mqueue_mnt);
 		return ERR_PTR(-EACCES);
diff -uprN linux-2.6.16/ipc/msg.c linux-2.6.16.ovz/ipc/msg.c
--- linux-2.6.16/ipc/msg.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/ipc/msg.c	2006-07-05 08:34:56.000000000 -0400
@@ -88,6 +88,45 @@ void __init msg_init (void)
 				sysvipc_msg_proc_show);
 }
 
+#ifdef CONFIG_VE
+void __init prepare_msg(void)
+{
+	get_ve0()->_msg_ids = &msg_ids;
+	get_ve0()->_msg_ctlmax = msg_ctlmax;
+	get_ve0()->_msg_ctlmnb = msg_ctlmnb;
+	get_ve0()->_msg_ctlmni = msg_ctlmni;
+}
+
+#define msg_ids		(*(get_exec_env()->_msg_ids))
+#define msg_ctlmax	(get_exec_env()->_msg_ctlmax)
+#define msg_ctlmnb	(get_exec_env()->_msg_ctlmnb)
+#define msg_ctlmni	(get_exec_env()->_msg_ctlmni)
+
+void init_ve_ipc_msg(void)
+{
+	msg_ctlmax = MSGMAX;
+	msg_ctlmnb = MSGMNB;
+	msg_ctlmni = MSGMNI;
+	ipc_init_ids(&msg_ids, MSGMNI);
+}
+
+void cleanup_ve_ipc_msg(void)
+{
+	int i;
+	struct msg_queue *msq;
+
+	down(&msg_ids.sem);
+	for (i = 0; i <= msg_ids.max_id; i++) {
+		msq = msg_lock(i);
+		if (msq == NULL)
+			continue;
+
+		freeque(msq, i);
+	}
+	up(&msg_ids.sem);
+}
+#endif
+
 static int newque (key_t key, int msgflg)
 {
 	int id;
@@ -108,7 +147,7 @@ static int newque (key_t key, int msgflg
 		return retval;
 	}
 
-	id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
+	id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni, -1);
 	if(id == -1) {
 		security_msg_queue_free(msq);
 		ipc_rcu_putref(msq);
@@ -450,7 +489,7 @@ asmlinkage long sys_msgctl (int msqid, i
 	ipcp = &msq->q_perm;
 	err = -EPERM;
 	if (current->euid != ipcp->cuid && 
-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
+	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN))
 	    /* We _could_ check for CAP_CHOWN above, but we don't */
 		goto out_unlock_up;
 
@@ -540,7 +579,7 @@ static inline int pipelined_send(struct 
 				msr->r_msg = ERR_PTR(-E2BIG);
 			} else {
 				msr->r_msg = NULL;
-				msq->q_lrpid = msr->r_tsk->pid;
+				msq->q_lrpid = virt_pid(msr->r_tsk);
 				msq->q_rtime = get_seconds();
 				wake_up_process(msr->r_tsk);
 				smp_mb();
@@ -622,7 +661,7 @@ asmlinkage long sys_msgsnd (int msqid, s
 		}
 	}
 
-	msq->q_lspid = current->tgid;
+	msq->q_lspid = virt_tgid(current);
 	msq->q_stime = get_seconds();
 
 	if(!pipelined_send(msq,msg)) {
@@ -718,7 +757,7 @@ asmlinkage long sys_msgrcv (int msqid, s
 			list_del(&msg->m_list);
 			msq->q_qnum--;
 			msq->q_rtime = get_seconds();
-			msq->q_lrpid = current->tgid;
+			msq->q_lrpid = virt_tgid(current);
 			msq->q_cbytes -= msg->m_ts;
 			atomic_sub(msg->m_ts,&msg_bytes);
 			atomic_dec(&msg_hdrs);
@@ -833,3 +872,27 @@ static int sysvipc_msg_proc_show(struct 
 			  msq->q_ctime);
 }
 #endif
+
+#if defined(CONFIG_VZ_CHECKPOINT) || defined(CONFIG_VZ_CHECKPOINT_MODULE)
+#include <linux/module.h>
+
+int sysvipc_walk_msg(int (*func)(int i, struct msg_queue*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct msg_queue * msq;
+
+	down(&msg_ids.sem);
+	for(i = 0; i <= msg_ids.max_id; i++) {
+		if ((msq = msg_lock(i)) == NULL)
+			continue;
+		err = func(msg_buildid(i,msq->q_perm.seq), msq, arg);
+		msg_unlock(msq);
+		if (err)
+			break;
+	}
+	up(&msg_ids.sem);
+	return err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_walk_msg);
+#endif
diff -uprN linux-2.6.16/ipc/msgutil.c linux-2.6.16.ovz/ipc/msgutil.c
--- linux-2.6.16/ipc/msgutil.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/ipc/msgutil.c	2006-07-05 08:34:56.000000000 -0400
@@ -17,6 +17,8 @@
 
 #include "util.h"
 
+#include <ub/ub_mem.h>
+
 struct msg_msgseg {
 	struct msg_msgseg* next;
 	/* the next part of the message follows immediately */
@@ -36,7 +38,7 @@ struct msg_msg *load_msg(const void __us
 	if (alen > DATALEN_MSG)
 		alen = DATALEN_MSG;
 
-	msg = (struct msg_msg *)kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
+	msg = (struct msg_msg *)ub_kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
 	if (msg == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -56,7 +58,7 @@ struct msg_msg *load_msg(const void __us
 		alen = len;
 		if (alen > DATALEN_SEG)
 			alen = DATALEN_SEG;
-		seg = (struct msg_msgseg *)kmalloc(sizeof(*seg) + alen,
+		seg = (struct msg_msgseg *)ub_kmalloc(sizeof(*seg) + alen,
 						 GFP_KERNEL);
 		if (seg == NULL) {
 			err = -ENOMEM;
diff -uprN linux-2.6.16/ipc/sem.c linux-2.6.16.ovz/ipc/sem.c
--- linux-2.6.16/ipc/sem.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/ipc/sem.c	2006-07-05 08:34:56.000000000 -0400
@@ -78,6 +78,7 @@
 #include <asm/uaccess.h>
 #include "util.h"
 
+#include <ub/ub_mem.h>
 
 #define sem_lock(id)	((struct sem_array*)ipc_lock(&sem_ids,id))
 #define sem_unlock(sma)	ipc_unlock(&(sma)->sem_perm)
@@ -88,7 +89,7 @@
 	ipc_buildid(&sem_ids, id, seq)
 static struct ipc_ids sem_ids;
 
-static int newary (key_t, int, int);
+static int newary (key_t, int, int, int);
 static void freeary (struct sem_array *sma, int id);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
@@ -124,6 +125,48 @@ void __init sem_init (void)
 				sysvipc_sem_proc_show);
 }
 
+#ifdef CONFIG_VE
+void __init prepare_sem(void)
+{
+	get_ve0()->_sem_ids = &sem_ids;
+	get_ve0()->_used_sems = used_sems;
+	get_ve0()->_sem_ctls[0] = sem_ctls[0];
+	get_ve0()->_sem_ctls[1] = sem_ctls[1];
+	get_ve0()->_sem_ctls[2] = sem_ctls[2];
+	get_ve0()->_sem_ctls[3] = sem_ctls[3];
+}
+
+#define sem_ids		(*(get_exec_env()->_sem_ids))
+#define used_sems	(get_exec_env()->_used_sems)
+#define sem_ctls	(get_exec_env()->_sem_ctls)
+
+void init_ve_ipc_sem(void)
+{
+	used_sems = 0;
+	sem_ctls[0] = SEMMSL;
+	sem_ctls[1] = SEMMNS;
+	sem_ctls[2] = SEMOPM;
+	sem_ctls[3] = SEMMNI;
+	ipc_init_ids(&sem_ids, SEMMNI);
+}
+
+void cleanup_ve_ipc_sem(void)
+{
+	int i;
+	struct sem_array *sma;
+
+	down(&sem_ids.sem);
+	for (i = 0; i <= sem_ids.max_id; i++) {
+		sma = sem_lock(i);
+		if (sma == NULL)
+			continue;
+
+		freeary(sma, i);
+	}
+	up(&sem_ids.sem);
+}
+#endif
+
 /*
  * Lockless wakeup algorithm:
  * Without the check/retry algorithm a lockless wakeup is possible:
@@ -158,7 +201,7 @@ void __init sem_init (void)
  */
 #define IN_WAKEUP	1
 
-static int newary (key_t key, int nsems, int semflg)
+static int newary (key_t key, int semid, int nsems, int semflg)
 {
 	int id;
 	int retval;
@@ -187,7 +230,7 @@ static int newary (key_t key, int nsems,
 		return retval;
 	}
 
-	id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
+	id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni, semid);
 	if(id == -1) {
 		security_sem_free(sma);
 		ipc_rcu_putref(sma);
@@ -217,12 +260,12 @@ asmlinkage long sys_semget (key_t key, i
 	down(&sem_ids.sem);
 	
 	if (key == IPC_PRIVATE) {
-		err = newary(key, nsems, semflg);
+		err = newary(key, -1, nsems, semflg);
 	} else if ((id = ipc_findkey(&sem_ids, key)) == -1) {  /* key not used */
 		if (!(semflg & IPC_CREAT))
 			err = -ENOENT;
 		else
-			err = newary(key, nsems, semflg);
+			err = newary(key, -1, nsems, semflg);
 	} else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
 		err = -EEXIST;
 	} else {
@@ -743,7 +786,7 @@ static int semctl_main(int semid, int se
 		for (un = sma->undo; un; un = un->id_next)
 			un->semadj[semnum] = 0;
 		curr->semval = val;
-		curr->sempid = current->tgid;
+		curr->sempid = virt_tgid(current);
 		sma->sem_ctime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
@@ -823,7 +866,7 @@ static int semctl_down(int semid, int se
 	ipcp = &sma->sem_perm;
 	
 	if (current->euid != ipcp->cuid && 
-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
+	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN)) {
 	    	err=-EPERM;
 		goto out_unlock;
 	}
@@ -944,7 +987,8 @@ static inline int get_undo_list(struct s
 	undo_list = current->sysvsem.undo_list;
 	if (!undo_list) {
 		size = sizeof(struct sem_undo_list);
-		undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
+		undo_list = (struct sem_undo_list *) ub_kmalloc(size,
+				GFP_KERNEL);
 		if (undo_list == NULL)
 			return -ENOMEM;
 		memset(undo_list, 0, size);
@@ -1008,7 +1052,8 @@ static struct sem_undo *find_undo(int se
 	ipc_rcu_getref(sma);
 	sem_unlock(sma);
 
-	new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
+	new = (struct sem_undo *) ub_kmalloc(sizeof(struct sem_undo) +
+			sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
 		ipc_lock_by_ptr(&sma->sem_perm);
 		ipc_rcu_putref(sma);
@@ -1066,7 +1111,7 @@ asmlinkage long sys_semtimedop(int semid
 	if (nsops > sc_semopm)
 		return -E2BIG;
 	if(nsops > SEMOPM_FAST) {
-		sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
+		sops = ub_kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
 		if(sops==NULL)
 			return -ENOMEM;
 	}
@@ -1150,7 +1195,7 @@ retry_undos:
 	queue.sops = sops;
 	queue.nsops = nsops;
 	queue.undo = un;
-	queue.pid = current->tgid;
+	queue.pid = virt_tgid(current);
 	queue.id = semid;
 	queue.alter = alter;
 	if (alter)
@@ -1320,7 +1365,7 @@ found:
 					sem->semval = 0;
 				if (sem->semval > SEMVMX)
 					sem->semval = SEMVMX;
-				sem->sempid = current->tgid;
+				sem->sempid = virt_tgid(current);
 			}
 		}
 		sma->sem_otime = get_seconds();
@@ -1351,3 +1396,48 @@ static int sysvipc_sem_proc_show(struct 
 			  sma->sem_ctime);
 }
 #endif
+
+#if defined(CONFIG_VZ_CHECKPOINT) || defined(CONFIG_VZ_CHECKPOINT_MODULE)
+#include <linux/module.h>
+
+int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg)
+{
+	int err = 0;
+	struct sem_array *sma;
+
+	down(&sem_ids.sem);
+	sma = sem_lock(semid);
+	if (!sma) {
+		err = newary(key, semid, size, semflg);
+		if (err >= 0)
+			sma = sem_lock(semid);
+	}
+	if (sma)
+		sem_unlock(sma);
+	up(&sem_ids.sem);
+
+	return err > 0 ? 0 : err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_setup_sem);
+
+int sysvipc_walk_sem(int (*func)(int i, struct sem_array*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct sem_array *sma;
+
+	down(&sem_ids.sem);
+	for (i = 0; i <= sem_ids.max_id; i++) {
+		if ((sma = sem_lock(i)) == NULL)
+			continue;
+		err = func(sem_buildid(i,sma->sem_perm.seq), sma, arg);
+		sem_unlock(sma);
+		if (err)
+			break;
+	}
+	up(&sem_ids.sem);
+	return err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_walk_sem);
+EXPORT_SYMBOL_GPL(exit_sem);
+#endif
diff -uprN linux-2.6.16/ipc/shm.c linux-2.6.16.ovz/ipc/shm.c
--- linux-2.6.16/ipc/shm.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/ipc/shm.c	2006-07-05 08:34:56.000000000 -0400
@@ -30,9 +30,13 @@
 #include <linux/capability.h>
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
+#include <linux/shmem_fs.h>
 
 #include <asm/uaccess.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+
 #include "util.h"
 
 static struct file_operations shm_file_operations;
@@ -46,9 +50,11 @@ static struct ipc_ids shm_ids;
 #define shm_buildid(id, seq) \
 	ipc_buildid(&shm_ids, id, seq)
 
-static int newseg (key_t key, int shmflg, size_t size);
+static int newseg (key_t key, int shmid, int shmflg, size_t size);
 static void shm_open (struct vm_area_struct *shmd);
 static void shm_close (struct vm_area_struct *shmd);
+static void shm_destroy (struct shmid_kernel *shmd);
+static void do_shm_rmid(struct shmid_kernel *shp);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
 #endif
@@ -68,6 +74,68 @@ void __init shm_init (void)
 				sysvipc_shm_proc_show);
 }
 
+#ifdef CONFIG_VE
+void __init prepare_shm(void)
+{
+	get_ve0()->_shm_ids = &shm_ids;
+	get_ve0()->_shm_ctlmax = shm_ctlmax;
+	get_ve0()->_shm_ctlall = shm_ctlall;
+	get_ve0()->_shm_ctlmni = shm_ctlmni;
+	get_ve0()->_shm_tot = shm_tot;
+}
+
+#define shm_ids		(*(get_exec_env()->_shm_ids))
+#define shm_ctlmax	(get_exec_env()->_shm_ctlmax)
+#define shm_ctlall	(get_exec_env()->_shm_ctlall)
+#define shm_ctlmni	(get_exec_env()->_shm_ctlmni)
+#define shm_total	(get_exec_env()->_shm_tot)
+
+void init_ve_ipc_shm(void)
+{
+	shm_ctlmax = SHMMAX;
+	shm_ctlall = SHMALL;
+	shm_ctlmni = SHMMNI;
+	shm_total = 0;
+	ipc_init_ids(&shm_ids, 1);
+}
+
+void cleanup_ve_ipc_shm(void)
+{
+	int i;
+	struct shmid_kernel *shp;
+
+	down(&shm_ids.sem);
+	for (i = 0; i <= shm_ids.max_id; i++) {
+		shp = shm_lock(i);
+		if (shp == NULL)
+			continue;
+
+		do_shm_rmid(shp);
+	}
+	up(&shm_ids.sem);
+}
+#define sb_ve(sb)		VE_OWNER_FSTYPE(sb->s_type)
+#define shm_total_sb(sb)	(&sb_ve(sb)->_shm_tot)
+#define shm_lock_sb(id, sb)	((struct shmid_kernel *) \
+		ipc_lock(sb_ve(sb)->_shm_ids, id))
+#else
+/* renamed since there is a struct field named shm_tot */
+#define shm_total		shm_tot
+#define shm_total_sb(sb)	(&shm_tot)
+#define shm_lock_sb(id, sb)	shm_lock(id)
+#endif
+
+static void do_shm_rmid(struct shmid_kernel *shp)
+{
+	if (shp->shm_nattch){
+		shp->shm_perm.mode |= SHM_DEST;
+		/* Do not find it any more */
+		shp->shm_perm.key = IPC_PRIVATE;
+		shm_unlock(shp);
+	} else
+		shm_destroy (shp);
+}
+
 static inline int shm_checkid(struct shmid_kernel *s, int id)
 {
 	if (ipc_checkid(&shm_ids,&s->shm_perm,id))
@@ -75,25 +143,25 @@ static inline int shm_checkid(struct shm
 	return 0;
 }
 
-static inline struct shmid_kernel *shm_rmid(int id)
+static inline struct shmid_kernel *shm_rmid(struct ipc_ids *ids, int id)
 {
-	return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
+	return (struct shmid_kernel *)ipc_rmid(ids,id);
 }
 
-static inline int shm_addid(struct shmid_kernel *shp)
+static inline int shm_addid(struct shmid_kernel *shp, int reqid)
 {
-	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
+	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni, reqid);
 }
 
 
 
-static inline void shm_inc (int id) {
+static inline void shm_inc(int id, struct super_block *sb) {
 	struct shmid_kernel *shp;
 
-	if(!(shp = shm_lock(id)))
+	if(!(shp = shm_lock_sb(id, sb)))
 		BUG();
 	shp->shm_atim = get_seconds();
-	shp->shm_lprid = current->tgid;
+	shp->shm_lprid = virt_tgid(current);
 	shp->shm_nattch++;
 	shm_unlock(shp);
 }
@@ -101,7 +169,50 @@ static inline void shm_inc (int id) {
 /* This is called by fork, once for every shm attach. */
 static void shm_open (struct vm_area_struct *shmd)
 {
-	shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
+	shm_inc(shmd->vm_file->f_dentry->d_inode->i_ino,
+			shmd->vm_file->f_dentry->d_inode->i_sb);
+}
+
+static int shmem_lock(struct shmid_kernel *shp, int lock,
+		struct user_struct *user)
+{
+	struct file *file = shp->shm_file;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	unsigned long size;
+
+	size = shp->shm_segsz + PAGE_SIZE - 1;
+
+#ifdef CONFIG_SHMEM
+	spin_lock(&info->lock);
+	if (lock && !(info->flags & VM_LOCKED)) {
+		if (ub_lockedshm_charge(info, size) < 0)
+			goto out_ch;
+
+		if (!user_shm_lock(inode->i_size, user))
+			goto out_user;
+		info->flags |= VM_LOCKED;
+	}
+	if (!lock && (info->flags & VM_LOCKED) && user) {
+		ub_lockedshm_uncharge(info, size);
+		user_shm_unlock(inode->i_size, user);
+		info->flags &= ~VM_LOCKED;
+	}
+	spin_unlock(&info->lock);
+	return 0;
+
+out_user:
+	ub_lockedshm_uncharge(info, size);
+out_ch:
+	spin_unlock(&info->lock);
+	return -ENOMEM;
+#else
+	if (lock && ub_lockedshm_charge(info, size))
+		return -ENOMEM;
+	if (!lock)
+		ub_lockedshm_uncharge(info, size);
+	return 0;
+#endif
 }
 
 /*
@@ -114,15 +225,24 @@ static void shm_open (struct vm_area_str
  */
 static void shm_destroy (struct shmid_kernel *shp)
 {
-	shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	shm_rmid (shp->id);
+	int numpages, *shm_totalp;
+	struct file *f;
+	struct super_block *sb;
+
+	f = shp->shm_file;
+	sb = f->f_dentry->d_inode->i_sb;
+	numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	shm_totalp = shm_total_sb(sb);
+	*shm_totalp -= numpages;
+
+	shm_rmid (shp->_shm_ids, shp->id);
 	shm_unlock(shp);
 	if (!is_file_hugepages(shp->shm_file))
-		shmem_lock(shp->shm_file, 0, shp->mlock_user);
+		shmem_lock(shp, 0, shp->mlock_user);
 	else
 		user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size,
 						shp->mlock_user);
-	fput (shp->shm_file);
+	fput(f);
 	security_shm_free(shp);
 	ipc_rcu_putref(shp);
 }
@@ -138,12 +258,24 @@ static void shm_close (struct vm_area_st
 	struct file * file = shmd->vm_file;
 	int id = file->f_dentry->d_inode->i_ino;
 	struct shmid_kernel *shp;
+	struct super_block *sb;
+	struct ipc_ids *ids;
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+
+	sb = file->f_dentry->d_inode->i_sb;
+	ve = get_ve(sb_ve(sb));
+	ids = ve->_shm_ids;
+#else
+	sb = file->f_dentry->d_inode->i_sb;
+	ids = &shm_ids;
+#endif
 
-	down (&shm_ids.sem);
+	down (&ids->sem);
 	/* remove from the list of attaches of the shm segment */
-	if(!(shp = shm_lock(id)))
+	if(!(shp = shm_lock_sb(id, sb)))
 		BUG();
-	shp->shm_lprid = current->tgid;
+	shp->shm_lprid = virt_tgid(current);
 	shp->shm_dtim = get_seconds();
 	shp->shm_nattch--;
 	if(shp->shm_nattch == 0 &&
@@ -151,7 +283,10 @@ static void shm_close (struct vm_area_st
 		shm_destroy (shp);
 	else
 		shm_unlock(shp);
-	up (&shm_ids.sem);
+	up(&ids->sem);
+#ifdef CONFIG_VE
+	put_ve(ve);
+#endif
 }
 
 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
@@ -161,7 +296,10 @@ static int shm_mmap(struct file * file, 
 	ret = shmem_mmap(file, vma);
 	if (ret == 0) {
 		vma->vm_ops = &shm_vm_ops;
-		shm_inc(file->f_dentry->d_inode->i_ino);
+		if (!(vma->vm_flags & VM_WRITE))
+			vma->vm_flags &= ~VM_MAYWRITE;
+		shm_inc(file->f_dentry->d_inode->i_ino,
+				file->f_dentry->d_inode->i_sb);
 	}
 
 	return ret;
@@ -184,19 +322,19 @@ static struct vm_operations_struct shm_v
 #endif
 };
 
-static int newseg (key_t key, int shmflg, size_t size)
+static int newseg (key_t key, int shmid, int shmflg, size_t size)
 {
 	int error;
 	struct shmid_kernel *shp;
 	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
 	struct file * file;
-	char name[13];
+	char name[26];
 	int id;
 
 	if (size < SHMMIN || size > shm_ctlmax)
 		return -EINVAL;
 
-	if (shm_tot + numpages >= shm_ctlall)
+	if (shm_total + numpages >= shm_ctlall)
 		return -ENOSPC;
 
 	shp = ipc_rcu_alloc(sizeof(*shp));
@@ -227,7 +365,11 @@ static int newseg (key_t key, int shmflg
 		if  ((shmflg & SHM_NORESERVE) &&
 				sysctl_overcommit_memory != OVERCOMMIT_NEVER)
 			acctflag = 0;
+#ifdef CONFIG_VE
+		sprintf (name, "VE%d.SYSV%08x", get_exec_env()->veid, key);
+#else
 		sprintf (name, "SYSV%08x", key);
+#endif
 		file = shmem_file_setup(name, size, acctflag);
 	}
 	error = PTR_ERR(file);
@@ -235,17 +377,18 @@ static int newseg (key_t key, int shmflg
 		goto no_file;
 
 	error = -ENOSPC;
-	id = shm_addid(shp);
+	id = shm_addid(shp, shmid);
 	if(id == -1) 
 		goto no_id;
 
-	shp->shm_cprid = current->tgid;
+	shp->shm_cprid = virt_tgid(current);
 	shp->shm_lprid = 0;
 	shp->shm_atim = shp->shm_dtim = 0;
 	shp->shm_ctim = get_seconds();
 	shp->shm_segsz = size;
 	shp->shm_nattch = 0;
 	shp->id = shm_buildid(id,shp->shm_perm.seq);
+	shp->_shm_ids = &shm_ids;
 	shp->shm_file = file;
 	file->f_dentry->d_inode->i_ino = shp->id;
 
@@ -253,7 +396,7 @@ static int newseg (key_t key, int shmflg
 	if (!(shmflg & SHM_HUGETLB))
 		file->f_op = &shm_file_operations;
 
-	shm_tot += numpages;
+	shm_total += numpages;
 	shm_unlock(shp);
 	return shp->id;
 
@@ -272,12 +415,12 @@ asmlinkage long sys_shmget (key_t key, s
 
 	down(&shm_ids.sem);
 	if (key == IPC_PRIVATE) {
-		err = newseg(key, shmflg, size);
+		err = newseg(key, -1, shmflg, size);
 	} else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
 		if (!(shmflg & IPC_CREAT))
 			err = -ENOENT;
 		else
-			err = newseg(key, shmflg, size);
+			err = newseg(key, -1, shmflg, size);
 	} else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
 		err = -EEXIST;
 	} else {
@@ -470,7 +613,7 @@ asmlinkage long sys_shmctl (int shmid, i
 		down(&shm_ids.sem);
 		shm_info.used_ids = shm_ids.in_use;
 		shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
-		shm_info.shm_tot = shm_tot;
+		shm_info.shm_tot = shm_total;
 		shm_info.swap_attempts = 0;
 		shm_info.swap_successes = 0;
 		err = shm_ids.max_id;
@@ -557,14 +700,14 @@ asmlinkage long sys_shmctl (int shmid, i
 		if(cmd==SHM_LOCK) {
 			struct user_struct * user = current->user;
 			if (!is_file_hugepages(shp->shm_file)) {
-				err = shmem_lock(shp->shm_file, 1, user);
+				err = shmem_lock(shp, 1, user);
 				if (!err) {
 					shp->shm_perm.mode |= SHM_LOCKED;
 					shp->mlock_user = user;
 				}
 			}
 		} else if (!is_file_hugepages(shp->shm_file)) {
-			shmem_lock(shp->shm_file, 0, shp->mlock_user);
+			shmem_lock(shp, 0, shp->mlock_user);
 			shp->shm_perm.mode &= ~SHM_LOCKED;
 			shp->mlock_user = NULL;
 		}
@@ -594,7 +737,7 @@ asmlinkage long sys_shmctl (int shmid, i
 
 		if (current->euid != shp->shm_perm.uid &&
 		    current->euid != shp->shm_perm.cuid && 
-		    !capable(CAP_SYS_ADMIN)) {
+		    !capable(CAP_VE_SYS_ADMIN)) {
 			err=-EPERM;
 			goto out_unlock_up;
 		}
@@ -603,13 +746,7 @@ asmlinkage long sys_shmctl (int shmid, i
 		if (err)
 			goto out_unlock_up;
 
-		if (shp->shm_nattch){
-			shp->shm_perm.mode |= SHM_DEST;
-			/* Do not find it any more */
-			shp->shm_perm.key = IPC_PRIVATE;
-			shm_unlock(shp);
-		} else
-			shm_destroy (shp);
+		do_shm_rmid(shp);
 		up(&shm_ids.sem);
 		goto out;
 	}
@@ -633,7 +770,7 @@ asmlinkage long sys_shmctl (int shmid, i
 		err=-EPERM;
 		if (current->euid != shp->shm_perm.uid &&
 		    current->euid != shp->shm_perm.cuid && 
-		    !capable(CAP_SYS_ADMIN)) {
+		    !capable(CAP_VE_SYS_ADMIN)) {
 			goto out_unlock_up;
 		}
 
@@ -916,3 +1053,55 @@ static int sysvipc_shm_proc_show(struct 
 			  shp->shm_ctim);
 }
 #endif
+
+#if defined(CONFIG_VZ_CHECKPOINT) || defined(CONFIG_VZ_CHECKPOINT_MODULE)
+#include <linux/module.h>
+
+struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg)
+{
+	struct shmid_kernel *shp;
+	struct file *file;
+
+	down(&shm_ids.sem);
+	shp = shm_lock(shmid);
+	if (!shp) {
+		int err;
+
+		err = newseg(key, shmid, shmflg, size);
+		file = ERR_PTR(err);
+		if (err < 0)
+			goto out;
+		shp = shm_lock(shmid);
+	}
+	file = ERR_PTR(-EINVAL);
+	if (shp) {
+		file = shp->shm_file;
+		get_file(file);
+		shm_unlock(shp);
+	}
+out:
+	up(&shm_ids.sem);
+	return file;
+}
+EXPORT_SYMBOL_GPL(sysvipc_setup_shm);
+
+int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct shmid_kernel* shp;
+
+	down(&shm_ids.sem);
+	for(i = 0; i <= shm_ids.max_id; i++) {
+		if ((shp = shm_lock(i)) == NULL)
+			continue;
+		err = func(shp, arg);
+		shm_unlock(shp);
+		if (err)
+			break;
+	}
+	up(&shm_ids.sem);
+	return err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_walk_shm);
+#endif
diff -uprN linux-2.6.16/ipc/util.c linux-2.6.16.ovz/ipc/util.c
--- linux-2.6.16/ipc/util.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/ipc/util.c	2006-07-05 08:34:56.000000000 -0400
@@ -13,6 +13,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/shm.h>
 #include <linux/init.h>
@@ -30,6 +31,8 @@
 
 #include <asm/unistd.h>
 
+#include <ub/ub_mem.h>
+
 #include "util.h"
 
 struct ipc_proc_iface {
@@ -65,7 +68,7 @@ __initcall(ipc_init);
  *	array itself. 
  */
  
-void __init ipc_init_ids(struct ipc_ids* ids, int size)
+void __ve_init ipc_init_ids(struct ipc_ids* ids, int size)
 {
 	int i;
 	sema_init(&ids->sem,1);
@@ -94,7 +97,21 @@ void __init ipc_init_ids(struct ipc_ids*
 	ids->entries->size = size;
 	for(i=0;i<size;i++)
 		ids->entries->p[i] = NULL;
+
+	ids->owner_env = get_exec_env();
+}
+
+#ifdef CONFIG_VE
+static inline void ipc_free_ids(struct ipc_ids *ids)
+{
+	if (ids == NULL)
+		return;
+
+	if (ids->entries != &ids->nullentry)
+		ipc_rcu_putref(ids->entries);
+	kfree(ids);
 }
+#endif
 
 #ifdef CONFIG_PROC_FS
 static struct file_operations sysvipc_proc_fops;
@@ -182,8 +199,7 @@ static int grow_ary(struct ipc_ids* ids,
 	if(new == NULL)
 		return size;
 	new->size = newsize;
-	memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size +
-					sizeof(struct ipc_id_ary));
+	memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size);
 	for(i=size;i<newsize;i++) {
 		new->p[i] = NULL;
 	}
@@ -213,10 +229,20 @@ static int grow_ary(struct ipc_ids* ids,
  *	Called with ipc_ids.sem held.
  */
  
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
+int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid)
 {
 	int id;
 
+	if (reqid >= 0) {
+		id = reqid%SEQ_MULTIPLIER;
+		size = grow_ary(ids,id+1);
+		if (id >= size)
+			return -1;
+		if (ids->entries->p[id] == NULL)
+			goto found;
+		return -1;
+	}
+
 	size = grow_ary(ids,size);
 
 	/*
@@ -229,16 +255,21 @@ int ipc_addid(struct ipc_ids* ids, struc
 	}
 	return -1;
 found:
-	ids->in_use++;
+	if (ids->in_use++ == 0)
+		(void)get_ve(ids->owner_env);
 	if (id > ids->max_id)
 		ids->max_id = id;
 
 	new->cuid = new->uid = current->euid;
 	new->gid = new->cgid = current->egid;
 
-	new->seq = ids->seq++;
-	if(ids->seq > ids->seq_max)
-		ids->seq = 0;
+	if (reqid >= 0) {
+		new->seq = reqid/SEQ_MULTIPLIER;
+	} else {
+		new->seq = ids->seq++;
+		if(ids->seq > ids->seq_max)
+			ids->seq = 0;
+	}
 
 	spin_lock_init(&new->lock);
 	new->deleted = 0;
@@ -276,7 +307,8 @@ struct kern_ipc_perm* ipc_rmid(struct ip
 	ids->entries->p[lid] = NULL;
 	if(p==NULL)
 		BUG();
-	ids->in_use--;
+	if (--ids->in_use == 0)
+		put_ve(ids->owner_env);
 
 	if (lid == ids->max_id) {
 		do {
@@ -302,9 +334,9 @@ void* ipc_alloc(int size)
 {
 	void* out;
 	if(size > PAGE_SIZE)
-		out = vmalloc(size);
+		out = ub_vmalloc(size);
 	else
-		out = kmalloc(size, GFP_KERNEL);
+		out = ub_kmalloc(size, GFP_KERNEL);
 	return out;
 }
 
@@ -387,14 +419,14 @@ void* ipc_rcu_alloc(int size)
 	 * workqueue if necessary (for vmalloc). 
 	 */
 	if (rcu_use_vmalloc(size)) {
-		out = vmalloc(HDRLEN_VMALLOC + size);
+		out = ub_vmalloc(HDRLEN_VMALLOC + size);
 		if (out) {
 			out += HDRLEN_VMALLOC;
 			container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
 			container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
 		}
 	} else {
-		out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
+		out = ub_kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
 		if (out) {
 			out += HDRLEN_KMALLOC;
 			container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
@@ -603,6 +635,71 @@ int ipc_checkid(struct ipc_ids* ids, str
 	return 0;
 }
 
+#ifdef CONFIG_VE
+void __init prepare_ipc(void)
+{
+	prepare_msg();
+	prepare_sem();
+	prepare_shm();
+}
+
+int init_ve_ipc(struct ve_struct * envid)
+{
+	envid->_msg_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
+			GFP_KERNEL);
+	if (envid->_msg_ids == NULL)
+		goto out_nomem;
+	envid->_sem_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
+			GFP_KERNEL);
+	if (envid->_sem_ids == NULL)
+		goto out_free_msg;
+	envid->_shm_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
+			GFP_KERNEL);
+	if (envid->_shm_ids == NULL)
+		goto out_free_sem;
+
+	init_ve_ipc_msg();
+	init_ve_ipc_sem();
+	init_ve_ipc_shm();
+	return 0;
+
+out_free_sem:
+	kfree(envid->_sem_ids);
+out_free_msg:
+	kfree(envid->_msg_ids);
+out_nomem:
+	return -ENOMEM;
+}
+
+void ve_ipc_cleanup(void)
+{
+	cleanup_ve_ipc_msg();
+	cleanup_ve_ipc_sem();
+	cleanup_ve_ipc_shm();
+}
+
+void ve_ipc_free(struct ve_struct *env)
+{
+	ipc_free_ids(env->_msg_ids);
+	ipc_free_ids(env->_sem_ids);
+	ipc_free_ids(env->_shm_ids);
+	env->_msg_ids = NULL;
+	env->_sem_ids = NULL;
+	env->_shm_ids = NULL;
+}
+
+void fini_ve_ipc(struct ve_struct *ptr)
+{
+	ve_ipc_cleanup();
+	ve_ipc_free(ptr);
+}
+
+EXPORT_SYMBOL(init_ve_ipc);
+EXPORT_SYMBOL(ve_ipc_cleanup);
+EXPORT_SYMBOL(ve_ipc_free);
+EXPORT_SYMBOL(fini_ve_ipc);
+#endif /* CONFIG_VE */
+
 #ifdef __ARCH_WANT_IPC_PARSE_VERSION
 
 
diff -uprN linux-2.6.16/ipc/util.h linux-2.6.16.ovz/ipc/util.h
--- linux-2.6.16/ipc/util.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/ipc/util.h	2006-07-05 08:34:56.000000000 -0400
@@ -15,6 +15,22 @@ void sem_init (void);
 void msg_init (void);
 void shm_init (void);
 
+#ifdef CONFIG_VE
+void prepare_msg(void);
+void prepare_sem(void);
+void prepare_shm(void);
+void init_ve_ipc_msg(void);
+void init_ve_ipc_sem(void);
+void init_ve_ipc_shm(void);
+void cleanup_ve_ipc_msg(void);
+void cleanup_ve_ipc_sem(void);
+void cleanup_ve_ipc_shm(void);
+
+#define __ve_init
+#else
+#define __ve_init	__init
+#endif
+
 struct ipc_id_ary {
 	int size;
 	struct kern_ipc_perm *p[0];
@@ -28,10 +44,11 @@ struct ipc_ids {
 	struct semaphore sem;	
 	struct ipc_id_ary nullentry;
 	struct ipc_id_ary* entries;
+	struct ve_struct *owner_env;
 };
 
 struct seq_file;
-void __init ipc_init_ids(struct ipc_ids* ids, int size);
+void __ve_init ipc_init_ids(struct ipc_ids *ids, int size);
 #ifdef CONFIG_PROC_FS
 void __init ipc_init_proc_interface(const char *path, const char *header,
 				    struct ipc_ids *ids,
@@ -42,7 +59,7 @@ void __init ipc_init_proc_interface(cons
 
 /* must be called with ids->sem acquired.*/
 int ipc_findkey(struct ipc_ids* ids, key_t key);
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
+int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid);
 
 /* must be called with both locks acquired. */
 struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
diff -uprN linux-2.6.16/kernel/Kconfig.fairsched linux-2.6.16.ovz/kernel/Kconfig.fairsched
--- linux-2.6.16/kernel/Kconfig.fairsched	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/Kconfig.fairsched	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,22 @@
+config SCHED_VCPU
+	bool "VCPU scheduler support"
+	default n
+	help
+	  VCPU scheduler support adds additional layer of abstraction
+	  which allows to virtualize cpu notion and split physical cpus
+	  and virtual cpus. This support allows to use CPU fair scheduler,
+	  dynamically add/remove cpus to/from VPS and so on.
+
+config FAIRSCHED
+	bool "Fair CPU scheduler (EXPERIMENTAL)"
+	depends on SCHED_VCPU
+	default SCHED_VCPU
+	help
+	  Config option for Fair CPU scheduler (fairsched).
+	  This option allows to group processes to scheduling nodes
+	  which receive CPU proportional to their weight.
+	  This is very important feature for process groups isolation and
+	  QoS management.
+
+	  If unsure, say N.
+
diff -uprN linux-2.6.16/kernel/Kconfig.openvz linux-2.6.16.ovz/kernel/Kconfig.openvz
--- linux-2.6.16/kernel/Kconfig.openvz	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/Kconfig.openvz	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,61 @@
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+menu "OpenVZ"
+
+config VE
+	bool "Virtual Environment support"
+	default y
+	help
+	  This option adds support of virtual Linux running on the original box
+	  with fully supported virtual network driver, tty subsystem and
+	  configurable access for hardware and other resources.
+
+config VE_CALLS
+	tristate "VE calls interface"
+	depends on VE
+	default m
+	help
+	  This option controls how to build vzmon code containing VE calls.
+	  By default it's build in module vzmon.o
+
+config VE_NETDEV
+	tristate "VE networking"
+	depends on VE_CALLS
+	default m
+	help
+	  This option controls whether to build VE networking code.
+
+config VE_ETHDEV
+	tristate "Virtual ethernet device"
+	depends on VE_CALLS
+	default m
+	help
+	  This option controls whether to build virtual ethernet device.
+
+config VE_IPTABLES
+	bool "VE netfiltering"
+	depends on VE && VE_NETDEV && INET && NETFILTER
+	default y
+	help
+	  This option controls whether to build VE netfiltering code.
+
+config VZ_WDOG
+	tristate "VE watchdog module"
+	depends on VE_CALLS
+	default m
+	help
+	  This option controls building of vzwdog module, which dumps
+	  a lot of useful system info on console periodically.
+ 
+config VZ_CHECKPOINT
+ 	tristate "Checkpointing & restoring Virtual Environments"
+ 	depends on SOFTWARE_SUSPEND && VE_CALLS
+ 	default m
+ 	help
+ 	  This option adds two modules, "cpt" and "rst", which allow
+ 	  to save a running Virtual Environment and restore it
+ 	  on another host (live migration) or on the same host (checkpointing).
+
+endmenu
diff -uprN linux-2.6.16/kernel/Makefile linux-2.6.16.ovz/kernel/Makefile
--- linux-2.6.16/kernel/Makefile	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/Makefile	2006-07-05 08:34:56.000000000 -0400
@@ -2,7 +2,8 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y     = sched.o fairsched.o \
+	    fork.o exec_domain.o panic.o printk.o profile.o \
 	    exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
@@ -10,6 +11,18 @@ obj-y     = sched.o fork.o exec_domain.o
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o
 
+obj-y += ub/
+
+obj-$(CONFIG_VE) += ve.o
+obj-$(CONFIG_VE) += veowner.o
+obj-$(CONFIG_VE_CALLS) += vzdev.o
+obj-$(CONFIG_VZ_WDOG) += vzwdog.o
+obj-$(CONFIG_VE_CALLS) += vzmon.o
+
+vzmon-objs = vecalls.o
+
+obj-$(CONFIG_VZ_CHECKPOINT) += cpt/
+
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
diff -uprN linux-2.6.16/kernel/audit.c linux-2.6.16.ovz/kernel/audit.c
--- linux-2.6.16/kernel/audit.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/audit.c	2006-07-05 08:34:56.000000000 -0400
@@ -372,6 +372,9 @@ static int audit_receive_msg(struct sk_b
 	uid_t			loginuid; /* loginuid of sender */
 	struct audit_sig_info   sig_data;
 
+	if (!ve_is_super(VE_OWNER_SKB(skb)))
+		return -ECONNREFUSED;
+
 	err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
 	if (err)
 		return err;
diff -uprN linux-2.6.16/kernel/auditsc.c linux-2.6.16.ovz/kernel/auditsc.c
--- linux-2.6.16/kernel/auditsc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/auditsc.c	2006-07-05 08:34:56.000000000 -0400
@@ -966,11 +966,6 @@ void audit_syscall_entry(struct task_str
 	if (context->in_syscall) {
 		struct audit_context *newctx;
 
-#if defined(__NR_vm86) && defined(__NR_vm86old)
-		/* vm86 mode should only be entered once */
-		if (major == __NR_vm86 || major == __NR_vm86old)
-			return;
-#endif
 #if AUDIT_DEBUG
 		printk(KERN_ERR
 		       "audit(:%d) pid=%d in syscall=%d;"
diff -uprN linux-2.6.16/kernel/capability.c linux-2.6.16.ovz/kernel/capability.c
--- linux-2.6.16/kernel/capability.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/capability.c	2006-07-05 08:34:56.000000000 -0400
@@ -24,7 +24,8 @@ EXPORT_SYMBOL(cap_bset);
  * This lock protects task->cap_* for all tasks including current.
  * Locking rule: acquire this prior to tasklist_lock.
  */
-static DEFINE_SPINLOCK(task_capability_lock);
+DEFINE_SPINLOCK(task_capability_lock);
+EXPORT_SYMBOL(task_capability_lock);
 
 /*
  * For sys_getproccap() and sys_setproccap(), any of the three
@@ -67,8 +68,8 @@ asmlinkage long sys_capget(cap_user_head
      spin_lock(&task_capability_lock);
      read_lock(&tasklist_lock); 
 
-     if (pid && pid != current->pid) {
-	     target = find_task_by_pid(pid);
+     if (pid && pid != virt_pid(current)) {
+	     target = find_task_by_pid_ve(pid);
 	     if (!target) {
 	          ret = -ESRCH;
 	          goto out;
@@ -100,9 +101,13 @@ static inline int cap_set_pg(int pgrp, k
 	int ret = -EPERM;
 	int found = 0;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
+	pgrp = vpid_to_pid(pgrp);
+	if (pgrp < 0)
+		return ret;
+
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, g) {
 		target = g;
-		while_each_thread(g, target) {
+		while_each_thread_ve(g, target) {
 			if (!security_capset_check(target, effective,
 							inheritable,
 							permitted)) {
@@ -113,7 +118,7 @@ static inline int cap_set_pg(int pgrp, k
 			}
 			found = 1;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, g);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, g);
 
 	if (!found)
 	     ret = 0;
@@ -132,7 +137,7 @@ static inline int cap_set_all(kernel_cap
      int ret = -EPERM;
      int found = 0;
 
-     do_each_thread(g, target) {
+     do_each_thread_ve(g, target) {
              if (target == current || target->pid == 1)
                      continue;
              found = 1;
@@ -141,7 +146,7 @@ static inline int cap_set_all(kernel_cap
 		     continue;
 	     ret = 0;
 	     security_capset_set(target, effective, inheritable, permitted);
-     } while_each_thread(g, target);
+     } while_each_thread_ve(g, target);
 
      if (!found)
 	     ret = 0;
@@ -188,7 +193,7 @@ asmlinkage long sys_capset(cap_user_head
      if (get_user(pid, &header->pid))
 	     return -EFAULT; 
 
-     if (pid && pid != current->pid && !capable(CAP_SETPCAP))
+     if (pid && pid != virt_pid(current) && !capable(CAP_SETPCAP))
              return -EPERM;
 
      if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
@@ -199,8 +204,8 @@ asmlinkage long sys_capset(cap_user_head
      spin_lock(&task_capability_lock);
      read_lock(&tasklist_lock);
 
-     if (pid > 0 && pid != current->pid) {
-          target = find_task_by_pid(pid);
+     if (pid > 0 && pid != virt_pid(current)) {
+          target = find_task_by_pid_ve(pid);
           if (!target) {
                ret = -ESRCH;
                goto out;
diff -uprN linux-2.6.16/kernel/compat.c linux-2.6.16.ovz/kernel/compat.c
--- linux-2.6.16/kernel/compat.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/compat.c	2006-07-05 08:34:56.000000000 -0400
@@ -21,6 +21,8 @@
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 #include <linux/security.h>
+#include <linux/hrtimer.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 
@@ -38,61 +40,73 @@ int put_compat_timespec(const struct tim
 			__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
 }
 
-static long compat_nanosleep_restart(struct restart_block *restart)
+long compat_nanosleep_restart(struct restart_block *restart)
 {
-	unsigned long expire = restart->arg0, now = jiffies;
 	struct compat_timespec __user *rmtp;
+	struct timespec tu;
+	void *rfn_save = restart->fn;
+	struct hrtimer timer;
+	ktime_t rem;
 
-	/* Did it expire while we handled signals? */
-	if (!time_after(expire, now))
-		return 0;
+	restart->fn = do_no_restart_syscall;
+
+	hrtimer_init(&timer, (clockid_t) restart->arg3, HRTIMER_ABS);
+
+	timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
 
-	expire = schedule_timeout_interruptible(expire - now);
-	if (expire == 0)
+	set_current_state(TASK_INTERRUPTIBLE);
+	rem = schedule_hrtimer(&timer, HRTIMER_ABS);
+
+	if (rem.tv64 <= 0)
 		return 0;
 
-	rmtp = (struct compat_timespec __user *)restart->arg1;
-	if (rmtp) {
-		struct compat_timespec ct;
-		struct timespec t;
-
-		jiffies_to_timespec(expire, &t);
-		ct.tv_sec = t.tv_sec;
-		ct.tv_nsec = t.tv_nsec;
-		if (copy_to_user(rmtp, &ct, sizeof(ct)))
-			return -EFAULT;
-	}
-	/* The 'restart' block is already filled in */
+	rmtp = (struct compat_timespec __user *) restart->arg2;
+	tu = ktime_to_timespec(rem);
+	if (rmtp && put_compat_timespec(&tu, rmtp))
+		return -EFAULT;
+
+	restart->fn = rfn_save;
+
+	/* The other values in restart are already filled in */
 	return -ERESTART_RESTARTBLOCK;
 }
+EXPORT_SYMBOL_GPL(compat_nanosleep_restart);
 
 asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
 		struct compat_timespec __user *rmtp)
 {
 	struct timespec t;
 	struct restart_block *restart;
-	unsigned long expire;
+	struct hrtimer timer;
+	ktime_t rem;
 
 	if (get_compat_timespec(&t, rqtp))
 		return -EFAULT;
 
-	if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0))
+	if (!timespec_valid(&t))
 		return -EINVAL;
 
-	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
-	expire = schedule_timeout_interruptible(expire);
-	if (expire == 0)
+	hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_REL);
+
+	timer.expires = timespec_to_ktime(t);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	rem = schedule_hrtimer(&timer, HRTIMER_REL);
+	if (rem.tv64 <= 0)
 		return 0;
 
-	if (rmtp) {
-		jiffies_to_timespec(expire, &t);
-		if (put_compat_timespec(&t, rmtp))
-			return -EFAULT;
-	}
+	t = ktime_to_timespec(rem);
+
+	if (rmtp && put_compat_timespec(&t, rmtp))
+		return -EFAULT;
+
 	restart = &current_thread_info()->restart_block;
 	restart->fn = compat_nanosleep_restart;
-	restart->arg0 = jiffies + expire;
-	restart->arg1 = (unsigned long) rmtp;
+	restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF;
+	restart->arg1 = timer.expires.tv64 >> 32;
+	restart->arg2 = (unsigned long) rmtp;
+	restart->arg3 = (unsigned long) timer.base->index;
+
 	return -ERESTART_RESTARTBLOCK;
 }
 
diff -uprN linux-2.6.16/kernel/configs.c linux-2.6.16.ovz/kernel/configs.c
--- linux-2.6.16/kernel/configs.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/configs.c	2006-07-05 08:34:56.000000000 -0400
@@ -89,8 +89,7 @@ static int __init ikconfig_init(void)
 	struct proc_dir_entry *entry;
 
 	/* create the current config file */
-	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
-				  &proc_root);
+	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, NULL);
 	if (!entry)
 		return -ENOMEM;
 
diff -uprN linux-2.6.16/kernel/cpt/Makefile linux-2.6.16.ovz/kernel/cpt/Makefile
--- linux-2.6.16/kernel/cpt/Makefile	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/Makefile	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,41 @@
+#
+#
+#  kernel/cpt/Makefile
+#
+#  Copyright (C) 2000-2005  SWsoft
+#  All rights reserved.
+#
+#  Licensing governed by "linux/COPYING.SWsoft" file.
+
+obj-$(CONFIG_VZ_CHECKPOINT) += vzcpt.o vzrst.o
+
+vzcpt-objs := cpt_proc.o cpt_dump.o cpt_obj.o cpt_context.o cpt_process.o \
+	cpt_mm.o cpt_files.o cpt_kernel.o \
+	cpt_socket.o cpt_socket_in.o cpt_tty.o cpt_sysvipc.o cpt_net.o \
+	cpt_conntrack.o cpt_ubc.o cpt_epoll.o
+
+vzrst-objs := rst_proc.o rst_undump.o rst_context.o rst_process.o \
+	rst_mm.o rst_files.o \
+	rst_socket.o rst_socket_in.o rst_tty.o rst_sysvipc.o rst_net.o \
+	rst_conntrack.o rst_ubc.o rst_epoll.o
+
+ifeq ($(CONFIG_VZ_CHECKPOINT), m)
+vzrst-objs += cpt_obj.o cpt_kernel.o
+endif
+
+ifeq ($(CONFIG_VZ_CHECKPOINT_LAZY), y)
+vzcpt-objs += cpt_pagein.o
+vzrst-objs += rst_pagein.o
+endif
+
+ifeq ($(CONFIG_X86_64), y)
+vzcpt-objs += cpt_x8664.o
+vzrst-objs += rst_x8664.o
+ifeq ($(CONFIG_VZ_CHECKPOINT), m)
+vzrst-objs += cpt_x8664.o
+endif
+endif
+
+ifeq ($(CONFIG_X86_32), y)
+vzrst-objs += rst_i386.o
+endif
diff -uprN linux-2.6.16/kernel/cpt/cpt_conntrack.c linux-2.6.16.ovz/kernel/cpt/cpt_conntrack.c
--- linux-2.6.16/kernel/cpt/cpt_conntrack.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_conntrack.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,370 @@
+/*
+ *
+ *  kernel/cpt/cpt_conntrack.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/unistd.h>
+#include <linux/ve.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+
+/* How does it work?
+ *
+ * Network is disabled, so new conntrack entries will not appear.
+ * However, some of them can disappear because of timeouts.
+ *
+ * So, we take read_lock, collect all required information atomically,
+ * essentially, creating parallel "refcount" structures holding pointers.
+ * We delete conntrack timers as well, so the structures cannot disappear
+ * after releasing the lock. Now, after releasing lock we can dump everything
+ * safely. And on exit we restore timers to their original values.
+ *
+ * Note, this approach is not going to work in VE0.
+ */
+
+struct ct_holder
+{
+	struct ct_holder *next;
+	struct ip_conntrack_tuple_hash *cth;
+	int index;
+};
+
+static void encode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple)
+{
+	v->cpt_dst = tuple->dst.ip;
+	v->cpt_dstport = tuple->dst.u.all;
+	v->cpt_protonum = tuple->dst.protonum;
+	v->cpt_dir = tuple->dst.dir;
+
+	v->cpt_src = tuple->src.ip;
+	v->cpt_srcport = tuple->src.u.all;
+}
+
+static int dump_one_expect(struct cpt_ip_connexpect_image *v,
+			   struct ip_conntrack_expect *exp,
+			   int sibling, cpt_context_t *ctx)
+{
+	int err = 0;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_NET_CONNTRACK_EXPECT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	encode_tuple(&v->cpt_tuple, &exp->tuple);
+	encode_tuple(&v->cpt_mask, &exp->mask);
+	v->cpt_sibling_conntrack = sibling;
+	v->cpt_flags = exp->flags;
+	v->cpt_seq = exp->id;
+	v->cpt_dir = 0;
+	v->cpt_manip_proto = 0;
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+	v->cpt_manip_proto = exp->saved_proto.all;
+	v->cpt_dir = exp->dir;
+#endif
+	v->cpt_timeout = 0;
+	if (exp->master->helper->timeout)
+		v->cpt_timeout = exp->timeout.expires - jiffies;
+	return err;
+}
+
+/* NOTE. We use one page to dump list of expectations. This may be not enough
+ * in theory. In practice there is only one expectation per conntrack record.
+ * Moreover, taking into account that _ALL_ of expecations are saved in one
+ * global list, which is looked up each incoming/outpging packet, the system
+ * would be severely dead when even one conntrack would have so much of
+ * expectations. Shortly, I am not going to repair this.
+ */
+
+static int dump_expect_list(struct ip_conntrack *ct, struct ct_holder *list,
+			    cpt_context_t *ctx)
+{
+	int err = 0;
+	unsigned long pg;
+	struct cpt_ip_connexpect_image *v;
+	struct ip_conntrack_expect *exp;
+
+	if (ct->expecting == 0)
+		return err;
+	if (ct->expecting*sizeof(struct cpt_ip_connexpect_image) > PAGE_SIZE)
+		return -ENOBUFS;
+
+	pg = __get_free_page(GFP_KERNEL);
+	if (!pg)
+		return -ENOMEM;
+	v = (struct cpt_ip_connexpect_image *)pg;
+
+	read_lock_bh(&ip_conntrack_lock);
+	list_for_each_entry(exp, &ve_ip_conntrack_expect_list, list) {
+		int sibling;
+
+		if (exp->master != ct)
+			continue;
+
+		if (ct->helper == NULL) {
+			eprintk_ctx("conntrack: no helper and non-trivial expectation\n");
+			err = -EINVAL;
+			break;
+		}
+
+		sibling = 0;
+#if 0
+		/* That's all? No need to calculate sibling? */
+		if (exp->sibling) {
+			struct ct_holder *c;
+			for (c = list; c; c = c->next) {
+				if (tuplehash_to_ctrack(c->cth) == exp->sibling) {
+					sibling = c->index;
+					break;
+				}
+			}
+			/* NOTE: exp->sibling could be not "confirmed" and, hence,
+			 * out of hash table. We should just ignore such a sibling,
+			 * the connection is going to be retried, the packet
+			 * apparently was lost somewhere.
+			 */
+			if (sibling == 0)
+				dprintk_ctx("sibling conntrack is not found\n");
+		}
+#endif
+
+		/* If the expectation still does not have exp->sibling
+		 * and timer is not running, it is about to die on another
+		 * cpu. Skip it. */
+		if (!sibling &&
+		    ct->helper->timeout &&
+		    !timer_pending(&exp->timeout)) {
+			dprintk_ctx("conntrack: expectation: no timer\n");
+			continue;
+		}
+
+		err = dump_one_expect(v, exp, sibling, ctx);
+		if (err)
+			break;
+
+		v++;
+	}
+	read_unlock_bh(&ip_conntrack_lock);
+
+	if (err == 0 && (unsigned long)v != pg)
+		ctx->write((void*)pg, (unsigned long)v - pg, ctx);
+
+	free_page(pg);
+	return err;
+}
+
+static int dump_one_ct(struct ct_holder *c, struct ct_holder *list,
+		       cpt_context_t *ctx)
+{
+	struct ip_conntrack_tuple_hash *h = c->cth;
+	struct ip_conntrack *ct = tuplehash_to_ctrack(h);
+	struct cpt_ip_conntrack_image v;
+	int err = 0;
+
+	if (sizeof(v.cpt_proto_data) != sizeof(ct->proto)) {
+		eprintk_ctx("conntrack module ct->proto version mismatch\n");
+		return -EINVAL;
+	}
+	if (sizeof(v.cpt_help_data) != sizeof(ct->help)) {
+		eprintk_ctx("conntrack module ct->help version mismatch\n");
+		return -EINVAL;
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_CONNTRACK;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	read_lock_bh(&ip_conntrack_lock);
+	v.cpt_status = ct->status;
+	v.cpt_timeout = ct->timeout.expires - jiffies;
+	v.cpt_ct_helper = (ct->helper != NULL);
+	v.cpt_index = c->index;
+	v.cpt_id = ct->id;
+	v.cpt_mark = 0;
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	v.cpt_mark = ct->mark;
+#endif
+	encode_tuple(&v.cpt_tuple[0], &ct->tuplehash[0].tuple);
+	encode_tuple(&v.cpt_tuple[1], &ct->tuplehash[1].tuple);
+	memcpy(&v.cpt_proto_data, &ct->proto, sizeof(v.cpt_proto_data));
+	memcpy(&v.cpt_help_data, &ct->help, sizeof(v.cpt_help_data));
+
+	v.cpt_masq_index = 0;
+	v.cpt_initialized = 0;
+	v.cpt_num_manips = 0;
+	v.cpt_nat_helper = 0;
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+	v.cpt_masq_index = ct->nat.masq_index;
+#endif
+	/* "help" data is used by pptp, difficult to support */
+	v.cpt_nat_seq[0].cpt_correction_pos = ct->nat.info.seq[0].correction_pos;
+	v.cpt_nat_seq[0].cpt_offset_before = ct->nat.info.seq[0].offset_before;
+	v.cpt_nat_seq[0].cpt_offset_after = ct->nat.info.seq[0].offset_after;
+	v.cpt_nat_seq[1].cpt_correction_pos = ct->nat.info.seq[1].correction_pos;
+	v.cpt_nat_seq[1].cpt_offset_before = ct->nat.info.seq[1].offset_before;
+	v.cpt_nat_seq[1].cpt_offset_after = ct->nat.info.seq[1].offset_after;
+#endif
+	read_unlock_bh(&ip_conntrack_lock);
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	err = dump_expect_list(ct, list, ctx);
+
+	cpt_close_object(ctx);
+	return err;
+}
+
+int cpt_dump_ip_conntrack(cpt_context_t * ctx)
+{
+	struct ct_holder *ct_list = NULL;
+	struct ct_holder *c, **cp;
+	int err = 0;
+	int index = 0;
+	int idx;
+
+	if (get_exec_env()->_ip_conntrack == NULL)
+		return 0;
+
+	for (idx = atomic_read(&(get_exec_env()->_ip_conntrack->_ip_conntrack_count)); idx >= 0; idx--) {
+		c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
+		if (c == NULL) {
+			err = -ENOMEM;
+			goto done;
+		}
+		memset(c, 0, sizeof(struct ct_holder));
+		c->next = ct_list;
+		ct_list = c;
+	}
+
+	c = ct_list;
+
+	read_lock_bh(&ip_conntrack_lock);
+	for (idx = 0; idx < ip_conntrack_htable_size; idx++) {
+		struct ip_conntrack_tuple_hash *h;
+		list_for_each_entry(h, &ve_ip_conntrack_hash[idx], list) {
+			/* Skip reply tuples, they are covered by original
+			 * direction. */
+			if (DIRECTION(h))
+				continue;
+
+			/* Oops, we have not enough of holders...
+			 * It is impossible. */
+			if (unlikely(c == NULL)) {
+				read_unlock_bh(&ip_conntrack_lock);
+				eprintk_ctx("unexpected conntrack appeared\n");
+				err = -ENOMEM;
+				goto done;
+			}
+
+			/* If timer is not running, it means that it
+			 * has just been scheduled on another cpu.
+			 * We should skip this conntrack, it is about to be
+			 * destroyed. */
+			if (!del_timer(&tuplehash_to_ctrack(h)->timeout)) {
+				dprintk_ctx("conntrack: no timer\n");
+				continue;
+			}
+
+			/* Timer is deleted. refcnt is _not_ decreased.
+			 * We are going to restore the timer on exit
+			 * from this function. */
+			c->cth = h;
+			c->index = ++index;
+			c = c->next;
+		}
+	}
+	read_unlock_bh(&ip_conntrack_lock);
+
+	/* No conntracks? Good. */
+	if (index == 0)
+		goto done;
+
+	/* Comb the list a little. */
+	cp = &ct_list;
+	while ((c = *cp) != NULL) {
+		/* Discard unused entries; they can appear, if some
+		 * entries were timed out since we preallocated the list.
+		 */
+		if (c->cth == NULL) {
+			*cp = c->next;
+			kfree(c);
+			continue;
+		}
+
+		/* Move conntracks attached to expectations to the beginning
+		 * of the list. */
+		if (tuplehash_to_ctrack(c->cth)->master && c != ct_list) {
+			*cp = c->next;
+			c->next = ct_list;
+			ct_list = c;
+			dprintk_ctx("conntrack: %d moved in list\n", c->index);
+			continue;
+		}
+		cp = &c->next;
+	}
+
+	cpt_open_section(ctx, CPT_SECT_NET_CONNTRACK);
+
+	for (c = ct_list; c; c = c->next) {
+		err = dump_one_ct(c, ct_list, ctx);
+		if (err)
+			goto done;
+	}
+
+	cpt_close_section(ctx);
+
+done:
+	while ((c = ct_list) != NULL) {
+		ct_list = c->next;
+		if (c->cth) {
+			/* Restore timer. refcnt is preserved. */
+			add_timer(&tuplehash_to_ctrack(c->cth)->timeout);
+		}
+		kfree(c);
+	}
+	return err;
+}
+
+#endif
diff -uprN linux-2.6.16/kernel/cpt/cpt_context.c linux-2.6.16.ovz/kernel/cpt/cpt_context.c
--- linux-2.6.16/kernel/cpt/cpt_context.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_context.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,255 @@
+/*
+ *
+ *  kernel/cpt/cpt_context.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+
+static void file_write(const void *addr, size_t count, struct cpt_context *ctx)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->write(file, addr, count, &file->f_pos);
+	set_fs(oldfs);
+	if (err != count && !ctx->write_error)
+		ctx->write_error = err < 0 ? err : -EIO;
+}
+
+static void file_pwrite(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->write(file, addr, count, &pos);
+	set_fs(oldfs);
+	if (err != count && !ctx->write_error)
+		ctx->write_error = err < 0 ? err : -EIO;
+}
+
+static void file_align(struct cpt_context *ctx)
+{
+	struct file *file = ctx->file;
+
+	if (file)
+		file->f_pos = CPT_ALIGN(file->f_pos);
+}
+
+void cpt_context_init(struct cpt_context *ctx)
+{
+	int i;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	init_MUTEX(&ctx->main_sem);
+	ctx->refcount = 1;
+
+	ctx->current_section = -1;
+	ctx->current_object = -1;
+	ctx->pagesize = PAGE_SIZE;
+	ctx->write = file_write;
+	ctx->pwrite = file_pwrite;
+	ctx->align = file_align;
+	for (i=0; i < CPT_SECT_MAX; i++)
+		ctx->sections[i] = CPT_NULL;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	init_completion(&ctx->pgin_notify);
+#endif
+	cpt_object_init(ctx);
+}
+
+int cpt_open_dumpfile(struct cpt_context *ctx)
+{
+	ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
+	if (ctx->tmpbuf == NULL)
+		return -ENOMEM;
+	__cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_close_dumpfile(struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		fput(ctx->file);
+		ctx->file = NULL;
+	}
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+	if (ctx->write_error)
+		eprintk_ctx("error while writing dump file: %d\n", ctx->write_error);
+	return ctx->write_error;
+}
+
+int cpt_major_hdr_out(struct cpt_context *ctx)
+{
+	struct cpt_major_hdr hdr;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.cpt_signature[0] = CPT_SIGNATURE0;
+	hdr.cpt_signature[1] = CPT_SIGNATURE1;
+	hdr.cpt_signature[2] = CPT_SIGNATURE2;
+	hdr.cpt_signature[3] = CPT_SIGNATURE3;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_image_version = 1;
+#ifdef CONFIG_X86_32
+	hdr.cpt_os_arch = CPT_OS_ARCH_I386;
+#endif
+#ifdef CONFIG_X86_64
+	hdr.cpt_os_arch = CPT_OS_ARCH_EMT64;
+#endif
+	hdr.cpt_os_version = 0;
+	hdr.cpt_os_features = 0;
+	hdr.cpt_pagesize = PAGE_SIZE;
+	hdr.cpt_hz = HZ;
+	hdr.cpt_start_jiffies64 = ctx->virt_jiffies64;
+	hdr.cpt_start_sec = ctx->start_time.tv_sec;
+	hdr.cpt_start_nsec = ctx->start_time.tv_nsec;
+	hdr.cpt_cpu_caps[0] = ctx->src_cpu_flags;
+	hdr.cpt_kernel_config[0] = ctx->kernel_config_flags;
+	hdr.cpt_iptables_mask = ctx->iptables_mask;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	return 0;
+}
+
+int cpt_close_section(struct cpt_context *ctx)
+{
+	if (ctx->file && ctx->current_section >= 0) {
+		__u64 next = ctx->file->f_pos - ctx->current_section;
+		ctx->pwrite(&next, 8, ctx, ctx->current_section);
+		ctx->current_section = -1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_close_section);
+
+int cpt_open_section(struct cpt_context *ctx, __u32 type)
+{
+	struct cpt_section_hdr hdr;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_close_section(ctx);
+
+	ctx->current_section = ctx->file->f_pos;
+	ctx->sections[type] = ctx->current_section;
+
+	hdr.cpt_next = 0;
+	hdr.cpt_section = type;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_align = 0;
+	ctx->write(&hdr, sizeof(hdr), ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL(cpt_open_section);
+
+
+int cpt_close_object(struct cpt_context *ctx)
+{
+	if (ctx->file && ctx->current_object >= 0) {
+		__u64 next = ctx->file->f_pos - ctx->current_object;
+		ctx->pwrite(&next, 8, ctx, ctx->current_object);
+		ctx->current_object = -1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_close_object);
+
+int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_close_object(ctx);
+
+	ctx->current_object = ctx->file->f_pos;
+	if (obj)
+		cpt_obj_setpos(obj, ctx->current_object, ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL(cpt_open_object);
+
+int cpt_push_object(loff_t *saved, struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		*saved = ctx->current_object;
+		ctx->current_object = ctx->file->f_pos;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_push_object);
+
+int cpt_pop_object(loff_t *saved, struct cpt_context *ctx)
+{
+	ctx->current_object = *saved;
+	return 0;
+}
+EXPORT_SYMBOL(cpt_pop_object);
+
+int cpt_dump_tail(struct cpt_context *ctx)
+{
+	struct cpt_major_tail hdr;
+	int i;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_open_section(ctx, CPT_SECT_TRAILER);
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.cpt_next = sizeof(hdr);
+	hdr.cpt_object = CPT_OBJ_TRAILER;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = CPT_CONTENT_VOID;
+	hdr.cpt_lazypages = 0;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	hdr.cpt_lazypages = ctx->lazypages;
+#endif
+	hdr.cpt_64bit = ctx->tasks64;
+	hdr.cpt_signature[0] = CPT_SIGNATURE0;
+	hdr.cpt_signature[1] = CPT_SIGNATURE1;
+	hdr.cpt_signature[2] = CPT_SIGNATURE2;
+	hdr.cpt_signature[3] = CPT_SIGNATURE3;
+	hdr.cpt_nsect = CPT_SECT_MAX_INDEX;
+	for (i = 0; i < CPT_SECT_MAX_INDEX; i++)
+		hdr.cpt_sections[i] = ctx->sections[i];
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_context.h linux-2.6.16.ovz/kernel/cpt/cpt_context.h
--- linux-2.6.16/kernel/cpt/cpt_context.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_context.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,196 @@
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+
+#define	CPT_CTX_ERROR		-1
+#define	CPT_CTX_IDLE		0
+#define CPT_CTX_SUSPENDING	1
+#define	CPT_CTX_SUSPENDED	2
+#define CPT_CTX_DUMPING		3
+#define CPT_CTX_UNDUMPING	4
+#define CPT_CTX_UNDUMPED	5
+
+#define CPT_TID(tsk)   (tsk)->pid, virt_pid(tsk), (tsk)->comm
+#define CPT_FID		"%d,%d(%s)"
+
+
+typedef struct cpt_context
+{
+	struct list_head ctx_list;
+	int	refcount;
+	int	ctx_state;
+	int	objcount;
+	int	sticky;
+	struct semaphore main_sem;
+
+	struct file *errorfile;
+	struct file *statusfile;
+	struct file *lockfile;
+
+	int	errno;
+	char	*error_msg;
+	loff_t	err_offset;
+
+	struct file	*file;
+	char		*tmpbuf;
+	int		pagesize;
+
+	loff_t		current_section;
+	loff_t		current_object;
+
+	loff_t		sections[CPT_SECT_MAX];
+
+	__u32		errormask;
+	__u32		write_error;
+
+	struct list_head object_array[CPT_OBJ_MAX];
+
+	void		(*write)(const void *addr, size_t count, struct cpt_context *ctx);
+	void		(*pwrite)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
+	ssize_t		(*read)(void *addr, size_t count, struct cpt_context *ctx);
+	ssize_t		(*pread)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
+	void		(*align)(struct cpt_context *ctx);
+	int		ve_id;
+	int		contextid;
+	__u64		cpt_jiffies64; 	/* Host jiffies64 at the moment of cpt/rst,
+					 * corresponging to start_time */
+	__u64		virt_jiffies64;	/* Virtual jiffies64. It is == cpt_jiffies64 when
+					 * VE did not migrate. */
+	struct timespec	start_time;
+	struct timespec delta_time;
+	int		image_version;
+	int		lo_index;
+	int		lo_index_old;
+	int		venet_index;
+	int		venet_index_old;
+	__u64		iptables_mask;
+
+#define CPT_ANONVMA_HBITS (sizeof(void*) == 4 ? 10 : 9)
+#define CPT_ANONVMA_HSIZE (1<<CPT_ANONVMA_HBITS)
+	struct hlist_head *anonvmas;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	struct file	*pagein_file_in;
+	struct file	*pagein_file_out;
+	int		lazy_vm;
+	int		lazypages;
+	int		lazytype;
+	task_t		*pgin_task;
+	unsigned long	last_pagein;
+	struct pagein_desc	**pgin_dir;
+	struct pgin_device	*pagein_dev;
+	struct completion	pgin_notify;
+	struct completion	*pgind_completion;
+	struct swap_info_struct	*pgin_swp;
+#endif
+	int		tasks64;
+	__u32		src_cpu_flags;
+	__u32		dst_cpu_flags;
+	__u32		kernel_config_flags;
+
+	struct filejob  *filejob_queue;
+} cpt_context_t;
+
+typedef struct {
+	int pid;
+	cpt_context_t *ctx;
+	struct completion done;
+} pagein_info_t;
+
+int pagein_info_printf(char *buf, cpt_context_t *ctx);
+
+int cpt_open_dumpfile(struct cpt_context *);
+int cpt_close_dumpfile(struct cpt_context *);
+int rst_open_dumpfile(struct cpt_context *);
+void rst_close_dumpfile(struct cpt_context *);
+void cpt_context_init(struct cpt_context *);
+void rst_context_init(struct cpt_context *);
+void cpt_context_destroy(struct cpt_context *);
+
+void rst_report_error(int err, cpt_context_t *ctx);
+
+
+int cpt_major_hdr_out(struct cpt_context *ctx);
+int cpt_dump_tail(struct cpt_context *ctx);
+int cpt_close_section(struct cpt_context *ctx);
+int cpt_open_section(struct cpt_context *ctx, __u32 type);
+int cpt_close_object(struct cpt_context *ctx);
+int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx);
+int cpt_push_object(loff_t *saved, struct cpt_context *ctx);
+int cpt_pop_object(loff_t *saved, struct cpt_context *ctx);
+
+int rst_get_section(int type, struct cpt_context * ctx, loff_t *, loff_t *);
+__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx);
+__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx);
+void rst_put_name(__u8 *name, struct cpt_context *ctx);
+int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx);
+void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx);
+
+#define rst_get_object(type, pos, tmp, ctx) \
+ _rst_get_object((type), (pos), (tmp), sizeof(*(tmp)), (ctx))
+
+extern int debug_level;
+
+#define cpt_printk(lvl, fmt, args...)	do {	\
+		if (lvl <= debug_level)		\
+			printk(fmt, ##args);	\
+	} while (0)
+
+#define dprintk(a...) cpt_printk(3, "CPT DBG: " a)
+#define dprintk_ctx(f, arg...) dprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
+
+#define wprintk(a...) cpt_printk(2, "CPT WRN: " a)
+#define wprintk_ctx(f, arg...) wprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
+
+#define eprintk(a...) cpt_printk(1, "CPT ERR: " a)
+#define eprintk_ctx(f, arg...)						\
+do {									\
+	eprintk("%p,%u :" f, ctx, ctx->ve_id, ##arg);			\
+	if (ctx->error_msg && ctx->err_offset < PAGE_SIZE)		\
+		ctx->err_offset += snprintf((char*)(ctx->error_msg +	\
+				ctx->err_offset),			\
+			       	PAGE_SIZE - ctx->err_offset, f, ##arg);	\
+} while(0)
+
+#define CPT_TMPBUF_FREE 0x789adf12
+#define CPT_TMPBUF_BUSY 0xabcd9876
+
+static inline void *cpt_get_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_FREE);
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_BUSY;
+	return buf;
+}
+
+static inline void __cpt_release_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
+}
+
+static inline void cpt_release_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_BUSY);
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
+}
+
+static inline void cpt_flush_error(cpt_context_t *ctx)
+{
+	mm_segment_t oldfs;
+
+	if (ctx->errorfile && ctx->error_msg && ctx->err_offset) {
+		if (ctx->errorfile->f_op && ctx->errorfile->f_op->write) {
+			oldfs = get_fs();
+			set_fs(KERNEL_DS);
+			ctx->errorfile->f_op->write(ctx->errorfile,
+				ctx->error_msg, ctx->err_offset,
+				&ctx->errorfile->f_pos);
+			set_fs(oldfs);
+		}
+		ctx->error_msg[0] = 0;
+		ctx->err_offset = 0;
+	}
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_dump.c linux-2.6.16.ovz/kernel/cpt/cpt_dump.c
--- linux-2.6.16/kernel/cpt/cpt_dump.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_dump.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,840 @@
+/*
+ *
+ *  kernel/cpt/cpt_dump.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/ptrace.h>
+#include <linux/smp_lock.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/virtinfo.h>
+#include <ub/ub_task.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_process.h"
+#include "cpt_net.h"
+#include "cpt_socket.h"
+#include "cpt_ubc.h"
+#include "cpt_kernel.h"
+
+
+static int vps_child_level(task_t *root, task_t *c)
+{
+	int level = 0;
+	int veid = VE_TASK_INFO(c)->owner_env->veid;
+
+	while (VE_TASK_INFO(c)->owner_env->veid == veid) {
+		if (c->pid != c->tgid)
+			c = c->group_leader;
+		if (c == root)
+			return level;
+
+		c = c->real_parent;
+		level++;
+	}
+	return -1;
+}
+
+static inline int freezable(struct task_struct * p)
+{
+	if (p->exit_state)
+		return 0;
+
+	switch (p->state) {
+	case EXIT_ZOMBIE:
+	case EXIT_DEAD:
+	case TASK_STOPPED:
+#if TASK_TRACED != TASK_STOPPED
+	case TASK_TRACED:
+#endif
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+/*
+ * Some comment is necessary about PF_FREEZE,PF_FROZEN,TIF_FREEZE...
+ *
+ * SWSUSP uses PF_FREEZE flag in tsk->flags raising it in context
+ * of another process. Apparently, it is unacceptable on SMP.
+ * Let's take freeze_processes() in kernel/power/process.c as an example.
+ * Unserialized modifications tsk->flags easily
+ * (believe or not, but it happens with probability of almost 100% :-))
+ * creates the situation when setting PF_FREEZE in freeze_processes(),
+ * which quickly spins raising PF_FREEZE of all the processes,
+ * _clears_ PF_FROZEN just set in refrigerator(), so that suspend deadlocks.
+ *
+ * So, to make things clean, we require that those flags may be modified
+ * only under tsk->sighand->siglock, which is quite natural because PF_FREEZE
+ * is just a kind of signal.
+ *
+ * It is not enough, because we are still not allowed to change tsk->flags
+ * in context of another process, we can corrupt another flags, when the process
+ * running on another cpu modifies them. So, we use TIF_FREEZE in thread flags,
+ * which can be changed atomically.
+ *
+ * PF_FROZEN also changes in context of another process, but this happens
+ * only when the process is already in refrigerator() which does not modify
+ * tsk->flags.
+ */
+
+static int vps_stop_tasks(struct cpt_context *ctx)
+{
+	unsigned long start_time = jiffies;
+	int err;
+	task_t *p, *g;
+	int todo;
+	int round = 0;
+
+	do_gettimespec(&ctx->start_time); 
+	ctx->cpt_jiffies64 = get_jiffies_64();
+	ctx->virt_jiffies64 = ctx->cpt_jiffies64 + get_exec_env()->jiffies_fixup;
+
+	read_lock(&tasklist_lock);
+	for(;;) {
+		task_t *root;
+		todo = 0;
+
+		root = find_task_by_pid_ve(1);
+		if (!root) {
+			read_unlock(&tasklist_lock);
+			eprintk_ctx("cannot find ve init\n");
+			return -ESRCH;
+		}
+
+		do_each_thread_ve(g, p) {
+			if (vps_child_level(root, p) >= 0) {
+				if (!is_virtual_pid(virt_pid(p))) {
+					eprintk_ctx("external process %d/%d(%s) inside VPS (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
+					todo = -1;
+					goto out;
+				}
+				if (p->vfork_done) {
+					/* Task between vfork()...exec()
+					 * cannot be frozen, because parent
+					 * wait in uninterruptible state.
+					 * So, we do nothing, waiting for
+					 * exec(), unless:
+					 */
+					if (p->state == TASK_STOPPED ||
+					    p->state == TASK_TRACED) {
+						eprintk_ctx("task %d/%d(%s) is stopped while vfork(). Checkpointing is impossible.\n", virt_pid(p), p->pid, p->comm);
+						todo = -1;
+						/* It is fatal, _user_ stopped
+						 * vfork()ing task, so that we
+						 * cannot suspend now.
+						 */
+					} else {
+						todo = -3;
+					}
+					goto out;
+				}
+				if (p->state == TASK_TRACED
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+				    && !p->stopped_state
+#endif
+				    ) {
+					int ptrace_id = p->pn_state;
+					/* Debugger waits for signal. */
+					switch (ptrace_id) {
+					case PN_STOP_TF:
+					case PN_STOP_TF_RT:
+					case PN_STOP_ENTRY:
+					case PN_STOP_FORK:
+					case PN_STOP_VFORK:
+					case PN_STOP_SIGNAL:
+					case PN_STOP_EXIT:
+					case PN_STOP_LEAVE:
+						break;
+					default:
+						eprintk_ctx("task %d/%d(%s) is stopped by debugger while %d.\n", virt_pid(p), p->pid, p->comm, ptrace_id);
+						todo = -1;
+						goto out;
+					}
+				}
+				if (p->flags & PF_NOFREEZE)
+					goto out;
+				if (p->flags & PF_FROZEN)
+					continue;
+				if (!freezable(p))
+					continue;
+
+				spin_lock_irq(&p->sighand->siglock);
+				set_tsk_thread_flag(p, TIF_FREEZE);
+				signal_wake_up(p, 0);
+				spin_unlock_irq(&p->sighand->siglock);
+
+				if (round == 10)
+					wprintk_ctx("%d/%d(%s) is running\n", virt_pid(p), p->pid, p->comm);
+
+				todo++;
+			} else {
+				if (p != current) {
+					eprintk_ctx("foreign process %d/%d(%s) inside VPS (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
+					todo = -1;
+					goto out;
+				}
+			}
+		} while_each_thread_ve(g, p);
+
+out:
+		if (todo &&
+		    (time_after(jiffies, start_time + 10*HZ) ||
+		     signal_pending(current) || todo < 0)) {
+			do_each_thread_ve(g, p) {
+				if (vps_child_level(root, p) >= 0) {
+					spin_lock_irq(&p->sighand->siglock);
+					clear_tsk_thread_flag(p, TIF_FREEZE);
+					if (p->flags & PF_FROZEN) {
+						p->flags &= ~PF_FROZEN;
+						wake_up_process(p);
+					}
+					spin_unlock_irq(&p->sighand->siglock);
+				}
+			} while_each_thread_ve(g, p);
+			if (todo > 0)
+				todo = -2;
+			/* This is sign of failure of printk(), which is not
+			 * ours. So, no prefixes. */
+			printk(">\n");
+		}
+
+		read_unlock(&tasklist_lock);
+
+		if (!todo)
+			return 0;
+
+		if (todo == -1) {
+			eprintk_ctx("suspend is impossible now.\n");
+			return -EAGAIN;
+		}
+
+		if (todo == -2) {
+			eprintk_ctx("interrupted or timed out.\n");
+			return -EINTR;
+		}
+
+		if (time_after(jiffies, start_time + 10*HZ) ||
+		    signal_pending(current)) {
+			if (todo == -3) {
+				eprintk_ctx("vfork() is active, suspend is impossible now.\n");
+			} else {
+				eprintk_ctx("suspend is impossible, reason %d\n", todo);
+			}
+			return -EAGAIN;
+		}
+
+		if (todo < 0 || round > 0) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(HZ/50);
+		} else {
+			yield();
+		}
+
+		read_lock(&tasklist_lock);
+		round++;
+	}
+
+	read_unlock(&tasklist_lock);
+	return err;
+}
+
+static int cpt_unlock_ve(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	down_write(&env->op_sem);
+	env->is_locked = 0;
+	up_write(&env->op_sem);
+	put_ve(env);
+	return 0;
+}
+
+int cpt_resume(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_unlock_sockets(ctx);
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pgin_task) {
+		wait_for_completion(&ctx->pgin_notify);
+		put_task_struct(ctx->pgin_task);
+		ctx->pgin_task = NULL;
+	}
+#endif
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		clear_tsk_thread_flag(tsk, TIF_FREEZE);
+		if (tsk->flags & PF_FROZEN) {
+			tsk->flags &= ~PF_FROZEN;
+			wake_up_process(tsk);
+		} else if (freezable(tsk)) {
+			eprintk_ctx("strange, %s not frozen\n", tsk->comm );
+		}
+		spin_unlock_irq(&tsk->sighand->siglock);
+		put_task_struct(tsk);
+	}
+
+	cpt_resume_network(ctx);
+
+	cpt_unlock_ve(ctx);
+
+	cpt_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+	return 0;
+}
+
+int cpt_kill(struct cpt_context *ctx)
+{
+	int err = 0;
+	struct ve_struct *env;
+	cpt_object_t *obj;
+	task_t *root_task = NULL;
+	long delay;
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+
+	/* from here cpt_kill succeeds */
+	if (VE_TASK_INFO(current)->owner_env == env) {
+		wprintk_ctx("attempt to kill ve from inside, escaping...\n");
+
+		write_lock_irq(&tasklist_lock);
+		VE_TASK_INFO(current)->owner_env = get_ve0();
+		REMOVE_VE_LINKS(current);
+		SET_VE_LINKS(current);
+
+		atomic_inc(&get_ve0()->pcounter);
+		atomic_dec(&env->pcounter);
+		write_unlock_irq(&tasklist_lock);
+		set_exec_env(get_ve0());
+	}
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pgin_task) {
+		wait_for_completion(&ctx->pgin_notify);
+		put_task_struct(ctx->pgin_task);
+		ctx->pgin_task = NULL;
+	}
+#endif
+
+	cpt_kill_sockets(ctx);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+
+		if (tsk->exit_state) {
+			put_task_struct(tsk);
+			continue;
+		}
+
+		if (virt_pid(tsk) == 1) {
+			root_task = tsk;
+			continue;
+		}
+
+		if (tsk->ptrace) {
+			write_lock_irq(&tasklist_lock);
+			tsk->ptrace = 0;
+			if (!list_empty(&tsk->ptrace_list)) {
+				list_del_init(&tsk->ptrace_list);
+				REMOVE_LINKS(tsk);
+				tsk->parent = tsk->real_parent;
+				SET_LINKS(tsk);
+			}
+			write_unlock_irq(&tasklist_lock);
+		}
+
+		send_sig(SIGKILL, tsk, 1);
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		sigfillset(&tsk->blocked);
+		sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
+		set_tsk_thread_flag(tsk, TIF_SIGPENDING);
+		clear_tsk_thread_flag(tsk, TIF_FREEZE);
+		if (tsk->flags & PF_FROZEN)
+			tsk->flags &= ~PF_FROZEN;
+		spin_unlock_irq(&tsk->sighand->siglock);
+
+		wake_up_process(tsk);
+		put_task_struct(tsk);
+	}
+
+	yield();
+
+	if (root_task != NULL) {
+		send_sig(SIGKILL, root_task, 1);
+
+		spin_lock_irq(&root_task->sighand->siglock);
+		sigfillset(&root_task->blocked);
+		sigdelsetmask(&root_task->blocked, sigmask(SIGKILL));
+		set_tsk_thread_flag(root_task, TIF_SIGPENDING);
+		clear_tsk_thread_flag(root_task, TIF_FREEZE);
+		if (root_task->flags & PF_FROZEN)
+			root_task->flags &= ~PF_FROZEN;
+		spin_unlock_irq(&root_task->sighand->siglock);
+
+		wake_up_process(root_task);
+		put_task_struct(root_task);
+	}
+
+	cpt_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+
+	delay = 1;
+	while (atomic_read(&env->counter) != 1) {
+		if (signal_pending(current))
+			break;
+		current->state = TASK_INTERRUPTIBLE;
+		delay = (delay < HZ) ? (delay << 1) : HZ;
+		schedule_timeout(delay);
+	}
+	put_ve(env);
+
+	return err;
+}
+
+static void collect_task_ubc(task_t *t, struct cpt_context *ctx)
+{
+	struct task_beancounter *tbc;
+
+	tbc = &(t->task_bc);
+	cpt_add_ubc(tbc->exec_ub, ctx);
+	cpt_add_ubc(tbc->task_ub, ctx);
+	cpt_add_ubc(tbc->fork_sub, ctx);
+}
+
+static cpt_object_t * remember_task(task_t * child, cpt_object_t * head,
+				    cpt_context_t * ctx)
+{
+	cpt_object_t *cobj;
+
+	if (freezable(child) && !(child->flags&PF_FROZEN)) {
+		eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(child));
+		put_task_struct(child);
+		return NULL;
+	}
+
+	if (lookup_cpt_object(CPT_OBJ_TASK, child, ctx)) BUG();
+	if ((cobj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
+		put_task_struct(child);
+		return NULL;
+	}
+	cobj->o_count = 1;
+	cpt_obj_setobj(cobj, child, ctx);
+	insert_cpt_object(CPT_OBJ_TASK, cobj, head, ctx);
+	collect_task_ubc(child, ctx);
+	return cobj;
+}
+
+static int vps_collect_tasks(struct cpt_context *ctx)
+{
+	int err = -ESRCH;
+	cpt_object_t *obj;
+	task_t *root;
+
+	read_lock(&tasklist_lock);
+	root = find_task_by_pid_ve(1);
+	if (root)
+		get_task_struct(root);
+	read_unlock(&tasklist_lock);
+
+	if (!root) {
+		err = -ESRCH;
+		eprintk_ctx("vps_collect_tasks: cannot find root\n");
+		goto out;
+	}
+
+	if ((obj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
+		put_task_struct(root);
+		return -ENOMEM;
+	}
+	obj->o_count = 1;
+	cpt_obj_setobj(obj, root, ctx);
+	intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
+	collect_task_ubc(root, ctx);
+
+	/* Collect process subtree recursively */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		cpt_object_t *head = obj;
+		task_t *tsk = obj->o_obj;
+		task_t *child;
+
+		if (freezable(tsk) && !(tsk->flags&PF_FROZEN)) {
+			eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(tsk));
+			err = -EINVAL;
+			goto out;
+		}
+
+		wait_task_inactive(tsk);
+
+		if (tsk->pid == tsk->tgid) {
+			child = tsk;
+			for (;;) {
+				read_lock(&tasklist_lock);
+				child = next_thread(child);
+				if (child != tsk)
+					get_task_struct(child);
+				read_unlock(&tasklist_lock);
+
+				if (child == tsk)
+					break;
+
+				if (child->real_parent != tsk->real_parent) {
+					put_task_struct(child);
+					eprintk_ctx("illegal thread structure, kernel bug\n");
+					return -EINVAL;
+				}
+
+				if ((head = remember_task(child, head, ctx)) == NULL)
+					return -ENOMEM;
+			}
+		}
+
+		/* About locking. VE is frozen. But lists of children
+		 * may change at least for init, when entered task reparents
+		 * to init and when reparented task exits. If we take care
+		 * of this case, we still can unlock while scanning
+		 * tasklists.
+		 */
+		read_lock(&tasklist_lock);
+		list_for_each_entry(child, &tsk->children, sibling) {
+			if (child->real_parent != tsk)
+				continue;
+			if (child->pid != child->tgid)
+				continue;
+			get_task_struct(child);
+			read_unlock(&tasklist_lock);
+
+			if ((head = remember_task(child, head, ctx)) == NULL)
+				return -ENOMEM;
+
+			read_lock(&tasklist_lock);
+		}
+
+		list_for_each_entry(child, &tsk->ptrace_children, ptrace_list) {
+			if (child->real_parent != tsk)
+				continue;
+			if (child->pid != child->tgid)
+				continue;
+			get_task_struct(child);
+			read_unlock(&tasklist_lock);
+
+			if ((head = remember_task(child, head, ctx)) == NULL)
+				return -ENOMEM;
+
+			read_lock(&tasklist_lock);
+		}
+		read_unlock(&tasklist_lock);
+	}
+
+	return 0;
+
+out:
+	return err;
+}
+
+static int cpt_collect(struct cpt_context *ctx)
+{
+	int err;
+
+	if ((err = cpt_collect_mm(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_sysv(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_files(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_fs(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_namespace(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_signals(ctx)) != 0)
+		return err;
+
+	return 0;
+}
+
+static int cpt_dump_veinfo(cpt_context_t *ctx)
+{
+	struct cpt_veinfo_image i;
+	struct ve_struct *ve;
+	struct timespec delta;
+
+	cpt_open_section(ctx, CPT_SECT_VEINFO);
+	cpt_open_object(NULL, ctx);
+
+	i.cpt_next = CPT_NULL;
+	i.cpt_object = CPT_OBJ_VEINFO;
+	i.cpt_hdrlen = sizeof(i);
+	i.cpt_content = CPT_CONTENT_VOID;
+
+	ve = get_exec_env();
+	i.shm_ctl_all = ve->_shm_ctlall;
+	i.shm_ctl_max = ve->_shm_ctlmax;
+	i.shm_ctl_mni = ve->_shm_ctlmni;
+
+	i.msg_ctl_max = ve->_msg_ctlmax;
+	i.msg_ctl_mni = ve->_msg_ctlmni;
+	i.msg_ctl_mnb = ve->_msg_ctlmnb;
+
+	BUG_ON(sizeof(ve->_sem_ctls) != sizeof(i.sem_ctl_arr));
+	i.sem_ctl_arr[0] = ve->_sem_ctls[0];
+	i.sem_ctl_arr[1] = ve->_sem_ctls[1];
+	i.sem_ctl_arr[2] = ve->_sem_ctls[2];
+	i.sem_ctl_arr[3] = ve->_sem_ctls[3];
+
+	do_posix_clock_monotonic_gettime(&delta);
+	_set_normalized_timespec(&delta,
+			delta.tv_sec - ve->start_timespec.tv_sec,
+			delta.tv_nsec - ve->start_timespec.tv_nsec);
+	i.start_timespec_delta = cpt_timespec_export(&delta);
+	i.start_jiffies_delta = get_jiffies_64() - ve->start_jiffies;
+
+	ctx->write(&i, sizeof(i), ctx);
+	cpt_close_object(ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int cpt_dump_utsname(cpt_context_t *ctx)
+{
+	int len;
+	struct cpt_object_hdr o;
+
+	cpt_open_section(ctx, CPT_SECT_UTSNAME);
+
+	len = strlen(ve_utsname.nodename);
+	o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(ve_utsname.nodename, len+1, ctx);
+	ctx->align(ctx);
+
+	len = strlen(ve_utsname.domainname);
+	o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(ve_utsname.domainname, len+1, ctx);
+	ctx->align(ctx);
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_dump(struct cpt_context *ctx)
+{
+	struct ve_struct *oldenv, *env;
+	int err, err2 = 0;
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+
+	down_read(&env->op_sem);
+	err = -ESRCH;
+	if (!env->is_running)
+		goto out_noenv;
+	if (!env->is_locked)
+		goto out_noenv;
+
+	oldenv = set_exec_env(env);
+
+	/* Phase 2: real checkpointing */
+	err = cpt_open_dumpfile(ctx);
+	if (err)
+		goto out;
+	
+	cpt_major_hdr_out(ctx);
+
+	if (!err)
+		err = cpt_dump_veinfo(ctx);
+	if (!err)
+		err = cpt_dump_ubc(ctx);
+	if (!err)
+		err = cpt_dump_ifinfo(ctx);
+	if (!err)
+		err = cpt_dump_files(ctx);
+	if (!err)
+		err = cpt_dump_files_struct(ctx);
+	if (!err)
+		err = cpt_dump_fs_struct(ctx);
+	if (!err)
+		err = cpt_dump_namespace(ctx);
+	if (!err)
+		err = cpt_dump_sighand(ctx);
+	if (!err)
+		err = cpt_dump_vm(ctx);
+	if (!err)
+		err = cpt_dump_sysvsem(ctx);
+	if (!err)
+		err = cpt_dump_tasks(ctx);
+	if (!err)
+		err = cpt_dump_orphaned_sockets(ctx);
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+	if (!err)
+		err = cpt_dump_ip_conntrack(ctx);
+#endif
+	if (!err)
+		err = cpt_dump_utsname(ctx);
+
+	if (!err)
+		err = cpt_dump_tail(ctx);
+
+	err2 = cpt_close_dumpfile(ctx);
+
+out:
+	set_exec_env(oldenv);
+out_noenv:
+	up_read(&env->op_sem);
+	put_ve(env);
+	return err ? : err2;
+}
+
+int cpt_vps_suspend(struct cpt_context *ctx)
+{
+	struct ve_struct *oldenv, *env;
+	int err = 0;
+
+	ctx->kernel_config_flags = test_kernel_config();
+	cpt_object_init(ctx);
+
+	if (!ctx->ve_id) {
+		env = get_exec_env();
+		if (env == get_ve0())
+			return -EINVAL;
+		wprintk("undefined ve_id\n");
+		ctx->ve_id = env->veid;
+		get_ve(env);
+	} else {
+		env = get_ve_by_id(ctx->ve_id);
+		if (!env)
+			return -ESRCH;
+	}
+
+#ifdef CONFIG_VE_IPTABLES
+	ctx->iptables_mask = env->_iptables_modules;
+#endif
+
+	down_write(&env->op_sem);
+	err = -ESRCH;
+	if (!env->is_running)
+		goto out_noenv;
+
+	err = -EBUSY;
+	if (env->is_locked)
+		goto out_noenv;
+	env->is_locked = 1;
+	downgrade_write(&env->op_sem);
+
+	oldenv = set_exec_env(env);
+
+	/* Phase 0: find and stop all the tasks */
+	if ((err = vps_stop_tasks(ctx)) != 0)
+		goto out;
+
+	if ((err = cpt_suspend_network(ctx)) != 0)
+		goto out;
+
+	/* At the moment all the state is frozen. We do not need to lock
+	 * the state, which can be changed only if the tasks are running.
+	 */
+
+	/* Phase 1: collect task tree */
+	if ((err = vps_collect_tasks(ctx)) != 0)
+		goto out;
+
+	/* Phase 1': collect all the resources */
+	if ((err = cpt_collect(ctx)) != 0)
+		goto out;
+
+out:
+	set_exec_env(oldenv);
+	up_read(&env->op_sem);
+	put_ve(env);
+        return err;
+
+out_noenv:
+	up_write(&env->op_sem);
+	put_ve(env);
+	return err;
+}
+
+int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps)
+{
+	task_t *p;
+	struct ve_struct *env;
+	unsigned int flags = test_cpu_caps();
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (env == NULL)
+		return -ESRCH;
+
+	*caps = flags & (1<<CPT_CPU_X86_CMOV);
+	flags &= ~((1<<CPT_CPU_X86_EMT64)|(1<<CPT_CPU_X86_IA64));
+
+	read_lock(&tasklist_lock);
+	for (p = __first_task_ve(env); p != NULL ; p = __next_task_ve(env, p)) {
+		if (tsk_used_math(p))
+			*caps |= flags;
+#ifdef CONFIG_X86_64
+		if (!(p->thread_info->flags & _TIF_IA32))
+			*caps |= (1<<CPT_CPU_X86_EMT64);
+#endif
+	}
+	read_unlock(&tasklist_lock);
+	put_ve(env);
+
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_dump.h linux-2.6.16.ovz/kernel/cpt/cpt_dump.h
--- linux-2.6.16/kernel/cpt/cpt_dump.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_dump.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,14 @@
+int cpt_dump(struct cpt_context *cpt);
+int rst_undump(struct cpt_context *cpt);
+int cpt_suspend(struct cpt_context *cpt);
+int cpt_resume(struct cpt_context *cpt);
+int cpt_kill(struct cpt_context *cpt);
+int rst_clean(struct cpt_context *cpt);
+int rst_resume(struct cpt_context *cpt);
+int rst_kill(struct cpt_context *cpt);
+
+int cpt_freeze_one(pid_t pid, int freeze);
+int cpt_vps_suspend(struct cpt_context *ctx);
+int vps_rst_undump(struct cpt_context *ctx);
+
+int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps);
diff -uprN linux-2.6.16/kernel/cpt/cpt_epoll.c linux-2.6.16.ovz/kernel/cpt/cpt_epoll.c
--- linux-2.6.16/kernel/cpt/cpt_epoll.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_epoll.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,116 @@
+/*
+ *
+ *  kernel/cpt/cpt_epoll.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/eventpoll.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+extern struct file_operations eventpoll_fops;
+
+int cpt_dump_epolldev(cpt_object_t *obj, cpt_context_t *ctx)
+{
+	int err = 0;
+	struct file *file = obj->o_obj;
+	struct eventpoll *ep;
+	struct rb_node *rbp;
+	struct cpt_epoll_image ei;
+
+	if (file->f_op != &eventpoll_fops) {
+		eprintk_ctx("bad epoll file\n");
+		return -EINVAL;
+	}
+
+	ep = file->private_data;
+
+	/* eventpoll.c does not protect open /proc/N/fd, silly.
+	 * Opener will get an invalid file with uninitialized private_data
+	 */
+	if (unlikely(ep == NULL)) {
+		eprintk_ctx("bad epoll device\n");
+		return -EINVAL;
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	ei.cpt_next = CPT_NULL;
+	ei.cpt_object = CPT_OBJ_EPOLL;
+	ei.cpt_hdrlen = sizeof(ei);
+	ei.cpt_content = CPT_CONTENT_ARRAY;
+	ei.cpt_file = obj->o_pos;
+
+	ctx->write(&ei, sizeof(ei), ctx);
+
+	down(&epsem);
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		loff_t saved_obj;
+		cpt_object_t *tobj;
+		struct cpt_epoll_file_image efi;
+		struct epitem *epi;
+		epi = rb_entry(rbp, struct epitem, rbn);
+		tobj = lookup_cpt_object(CPT_OBJ_FILE, epi->ffd.file, ctx);
+		if (tobj == NULL) {
+			eprintk_ctx("epoll device refers to an external file\n");
+			err = -EBUSY;
+			break;
+		}
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		efi.cpt_next = CPT_NULL;
+		efi.cpt_object = CPT_OBJ_EPOLL_FILE;
+		efi.cpt_hdrlen = sizeof(efi);
+		efi.cpt_content = CPT_CONTENT_VOID;
+		efi.cpt_file = tobj->o_pos;
+		efi.cpt_fd = epi->ffd.fd;
+		efi.cpt_events = epi->event.events;
+		efi.cpt_data = epi->event.data;
+		efi.cpt_revents = epi->revents;
+		efi.cpt_ready = 0;
+		if (!list_empty(&epi->rdllink))
+			efi.cpt_ready = 1;
+
+		ctx->write(&efi, sizeof(efi), ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	up(&epsem);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
diff -uprN linux-2.6.16/kernel/cpt/cpt_files.c linux-2.6.16.ovz/kernel/cpt/cpt_files.c
--- linux-2.6.16/kernel/cpt/cpt_files.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_files.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,1343 @@
+/*
+ *
+ *  kernel/cpt/cpt_files.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <linux/pagemap.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/ve_proto.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+void cpt_printk_dentry(struct dentry *d, struct vfsmount *mnt)
+{
+	char *path;
+	unsigned long pg = __get_free_page(GFP_KERNEL);
+
+	if (!pg)
+		return;
+
+	path = d_path(d, mnt, (char *)pg, PAGE_SIZE);
+
+	if (!IS_ERR(path))
+		printk("<%s>", path);
+	free_page(pg);
+}
+
+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
+			 cpt_context_t *ctx)
+{
+	if (path[0] == '/' && !IS_ROOT(d) && !d_unhashed(d)) {
+		struct nameidata nd;
+		if (path_lookup(path, 0, &nd)) {
+			eprintk_ctx("d_path cannot be looked up %s\n", path);
+			return -EINVAL;
+		}
+		if (nd.dentry != d || nd.mnt != mnt) {
+			eprintk_ctx("d_path is invisible %s\n", path);
+			path_release(&nd);
+			return -EINVAL;
+		}
+		path_release(&nd);
+	}
+	return 0;
+}
+
+int cpt_dump_dentry(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int len;
+	char *path;
+	char *pg = cpt_get_buf(ctx);
+
+	path = d_path(d, mnt, pg, PAGE_SIZE);
+	len = PTR_ERR(path);
+
+	if (IS_ERR(path)) {
+		struct cpt_object_hdr o;
+		char tmp[1];
+		/* VZ changes d_path() to return EINVAL, when path
+		 * is not supposed to be visible inside VE. */
+		if (len != -EINVAL)
+			eprintk_ctx("d_path err=%d\n", len);
+		else
+			len = 0;
+
+		o.cpt_next = sizeof(o) + CPT_ALIGN(1);
+		o.cpt_object = CPT_OBJ_NAME;
+		o.cpt_hdrlen = sizeof(o);
+		o.cpt_content = CPT_CONTENT_NAME;
+		tmp[0] = 0;
+
+		ctx->write(&o, sizeof(o), ctx);
+		ctx->write(tmp, 1, ctx);
+		ctx->align(ctx);
+
+		__cpt_release_buf(ctx);
+		return len;
+	} else {
+		struct cpt_object_hdr o;
+
+		len = pg + PAGE_SIZE - 1 - path;
+		o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
+		o.cpt_object = CPT_OBJ_NAME;
+		o.cpt_hdrlen = sizeof(o);
+		o.cpt_content = CPT_CONTENT_NAME;
+		path[len] = 0;
+
+		if (cpt_verify_overmount(path, d, mnt, ctx)) {
+			__cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+
+		ctx->write(&o, sizeof(o), ctx);
+		ctx->write(path, len+1, ctx);
+		ctx->align(ctx);
+		__cpt_release_buf(ctx);
+	}
+	return 0;
+}
+
+int cpt_dump_string(const char *s, struct cpt_context *ctx)
+{
+	int len;
+	struct cpt_object_hdr o;
+
+	len = strlen(s);
+	o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(s, len+1, ctx);
+	ctx->align(ctx);
+	return 0;
+}
+
+int cpt_dump_filename(struct file *file, struct cpt_context *ctx)
+{
+	return cpt_dump_dentry(file->f_dentry, file->f_vfsmnt, ctx);
+}
+
+int cpt_dump_inode(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_inode_image *v = cpt_get_buf(ctx);
+	struct kstat sbuf;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_INODE;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	if ((err = vfs_getattr(mnt, d, &sbuf)) != 0) {
+		cpt_release_buf(ctx);
+		return err;
+	}
+
+	v->cpt_dev	= d->d_inode->i_sb->s_dev;
+	v->cpt_ino	= d->d_inode->i_ino;
+	v->cpt_mode	= sbuf.mode;
+	v->cpt_nlink	= sbuf.nlink;
+	v->cpt_uid	= sbuf.uid;
+	v->cpt_gid	= sbuf.gid;
+	v->cpt_rdev	= d->d_inode->i_rdev;
+	v->cpt_size	= sbuf.size;
+	v->cpt_atime	= cpt_timespec_export(&sbuf.atime);
+	v->cpt_mtime	= cpt_timespec_export(&sbuf.mtime);
+	v->cpt_ctime	= cpt_timespec_export(&sbuf.ctime);
+	v->cpt_blksize	= sbuf.blksize;
+	v->cpt_blocks	= sbuf.blocks;
+	v->cpt_sb	= d->d_inode->i_sb->s_magic;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_collect_files(cpt_context_t * ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	int index = 0;
+
+	/* Collect process fd sets */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->files && cpt_object_add(CPT_OBJ_FILES, tsk->files, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	/* Collect files from fd sets */
+	for_each_object(obj, CPT_OBJ_FILES) {
+		int fd;
+		struct files_struct *f = obj->o_obj;
+
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if (obj->o_count != atomic_read(&f->count)) {
+			eprintk_ctx("files_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&f->count));
+			return -EBUSY;
+		}
+
+		for (fd = 0; fd < f->fdt->max_fds; fd++) {
+			struct file *file = fcheck_files(f, fd);
+			if (file && cpt_object_add(CPT_OBJ_FILE, file, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+
+	/* Collect files queued by AF_UNIX sockets. */
+	if ((err = cpt_collect_passedfds(ctx)) < 0)
+		return err;
+
+	/* OK. At this point we should count all the references. */
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		struct file *parent;
+		cpt_object_t *ino_obj;
+
+		if (obj->o_count != atomic_read(&file->f_count)) {
+			eprintk_ctx("file struct is referenced outside %d %d\n", obj->o_count, atomic_read(&file->f_count));
+			cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
+			return -EBUSY;
+		}
+
+		switch (file->f_dentry->d_inode->i_sb->s_magic) {
+		case FSMAGIC_FUTEX:
+		case FSMAGIC_MQUEUE:
+		case FSMAGIC_BDEV:
+			eprintk_ctx("file on unsupported FS: magic %08lx\n", file->f_dentry->d_inode->i_sb->s_magic);
+			return -EBUSY;
+		}
+
+		/* Collect inode. It is necessary mostly to resolve deleted
+		 * hard links. */
+		ino_obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+		if (ino_obj == NULL)
+			return -ENOMEM;
+
+		parent = ino_obj->o_parent;
+		if (!parent || (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry)))
+			ino_obj->o_parent = file;
+
+		if (S_ISCHR(file->f_dentry->d_inode->i_mode)) {
+			int maj = imajor(file->f_dentry->d_inode);
+			if (maj == PTY_MASTER_MAJOR ||
+			    (maj >= UNIX98_PTY_MASTER_MAJOR &&
+			     maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
+			    maj == PTY_SLAVE_MAJOR ||
+			    maj == UNIX98_PTY_SLAVE_MAJOR ||
+			    maj == TTYAUX_MAJOR) {
+				err = cpt_collect_tty(file, ctx);
+				if (err)
+					return err;
+			}
+		}
+
+		if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
+			err = cpt_collect_socket(file, ctx);
+			if (err)
+				return err;
+		}
+	}
+
+	err = cpt_index_sockets(ctx);
+
+	return err;
+}
+
+/* /dev/ptmx is special, all the files share one inode, but real tty backend
+ * is attached via file->private_data.
+ */
+
+static inline int is_cloning_inode(struct inode *ino)
+{
+	return S_ISCHR(ino->i_mode) && 
+		ino->i_rdev == MKDEV(TTYAUX_MAJOR,2);
+}
+
+static int dump_one_flock(struct file_lock *fl, int owner, struct cpt_context *ctx)
+{
+	pid_t pid;
+	struct cpt_flock_image *v = cpt_get_buf(ctx);
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_FLOCK;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_owner = owner;
+
+	pid = fl->fl_pid;
+	if (pid && !is_virtual_pid(fl->fl_pid)) {
+		pid = _pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+		if (pid == -1) {
+			if (!(fl->fl_flags&FL_FLOCK)) {
+				eprintk_ctx("posix lock from another VE?\n");
+				cpt_release_buf(ctx);
+				return -EBUSY;
+			}
+			pid = 0;
+		}
+	}
+
+	v->cpt_pid = pid;
+	v->cpt_start = fl->fl_start;
+	v->cpt_end = fl->fl_end;
+	v->cpt_flags = fl->fl_flags;
+	v->cpt_type = fl->fl_type;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+
+int cpt_dump_flock(struct file *file, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct file_lock *fl;
+
+	lock_kernel();
+	for (fl = file->f_dentry->d_inode->i_flock;
+	     fl; fl = fl->fl_next) {
+		if (file != fl->fl_file)
+			continue;
+		if (fl->fl_flags & FL_LEASE) {
+			eprintk_ctx("lease lock is not supported\n");
+			err = -EINVAL;
+			break;
+		}
+		if (fl->fl_flags & FL_POSIX) {
+			cpt_object_t *obj;
+			obj = lookup_cpt_object(CPT_OBJ_FILES, fl->fl_owner, ctx);
+			if (obj) {
+				dump_one_flock(fl, obj->o_index, ctx);
+				continue;
+			} else {
+				eprintk_ctx("unknown lock owner %p\n", fl->fl_owner);
+				err = -EINVAL;
+			}
+		}
+		if (fl->fl_flags & FL_FLOCK) {
+			dump_one_flock(fl, -1, ctx);
+			continue;
+		}
+	}
+	unlock_kernel();
+	return err;
+}
+
+static int __comb_pid_to_vpid(int pid)
+{
+	int vpid = pid;
+
+	if (pid > 0) {
+		vpid = _pid_type_to_vpid(PIDTYPE_PID, pid);
+		if (unlikely(vpid < 0)) {
+			dprintk("pid %d does not exist amymore.\n", pid);
+			return 0;
+		}
+	} else if (pid < 0) {
+		vpid = _pid_type_to_vpid(PIDTYPE_PGID, -pid);
+		if (unlikely(vpid < 0)) {
+			dprintk("pgid %d does not exist amymore.\n", -pid);
+			return 0;
+		}
+		vpid = -vpid;
+	}
+	return vpid;
+}
+
+static int dump_one_file(cpt_object_t *obj, struct file *file, cpt_context_t *ctx)
+{
+	int err = 0;
+	cpt_object_t *iobj;
+	struct cpt_file_image *v = cpt_get_buf(ctx);
+	struct kstat sbuf;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILE;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_flags = file->f_flags;
+	v->cpt_mode = file->f_mode;
+	v->cpt_pos = file->f_pos;
+	v->cpt_uid = file->f_uid;
+	v->cpt_gid = file->f_gid;
+
+	vfs_getattr(file->f_vfsmnt, file->f_dentry, &sbuf);
+
+	v->cpt_i_mode = sbuf.mode;
+	v->cpt_lflags = 0;
+	if (IS_ROOT(file->f_dentry))
+		v->cpt_lflags |= CPT_DENTRY_ROOT;
+	else if (d_unhashed(file->f_dentry))
+		v->cpt_lflags |= CPT_DENTRY_DELETED;
+	if (is_cloning_inode(file->f_dentry->d_inode))
+		v->cpt_lflags |= CPT_DENTRY_CLONING;
+	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_PROC)
+		v->cpt_lflags |= CPT_DENTRY_PROC;
+	v->cpt_inode = CPT_NULL;
+	iobj = lookup_cpt_object(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+	if (iobj)
+		v->cpt_inode = iobj->o_pos;
+	v->cpt_priv = CPT_NULL;
+	v->cpt_fown_fd = -1;
+	if (S_ISCHR(v->cpt_i_mode)) {
+		iobj = lookup_cpt_object(CPT_OBJ_TTY, file->private_data, ctx);
+		if (iobj) {
+			v->cpt_priv = iobj->o_pos;
+			if (file->f_flags&FASYNC)
+				v->cpt_fown_fd = cpt_tty_fasync(file, ctx);
+		}
+	}
+	if (S_ISSOCK(v->cpt_i_mode)) {
+		if (obj->o_index < 0) {
+			eprintk_ctx("BUG: no socket index\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_priv = obj->o_index;
+		if (file->f_flags&FASYNC)
+			v->cpt_fown_fd = cpt_socket_fasync(file, ctx);
+	}
+	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
+		v->cpt_priv = file->f_dentry->d_inode->i_ino;
+		v->cpt_lflags |= CPT_DENTRY_EPOLL;
+	}
+
+	v->cpt_fown_pid = __comb_pid_to_vpid((int)file->f_owner.pid);
+	v->cpt_fown_uid = file->f_owner.uid;
+	v->cpt_fown_euid = file->f_owner.euid;
+	v->cpt_fown_signo = file->f_owner.signum;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (!S_ISSOCK(v->cpt_i_mode)) {
+		err = cpt_dump_filename(file, ctx);
+		if (err)
+			return err;
+	}
+
+	if (file->f_dentry->d_inode->i_flock)
+		err = cpt_dump_flock(file, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+/* About this weird function... Crappy code dealing with SYSV shared memory 
+ * defines TMPFS inode and file with f_op doing only mmap. So...
+ * Maybe, this is wrong and leaks something. It is clear access to
+ * SYSV shmem via mmap is quite unusual and impossible from user space.
+ */
+static int dump_content_shm(struct file *file, struct cpt_context *ctx)
+{
+	struct cpt_obj_bits *v;
+	loff_t saved_pos;
+	unsigned long addr;
+
+	addr = do_mmap_pgoff(file, 0, file->f_dentry->d_inode->i_size,
+			     PROT_READ, MAP_SHARED, 0);
+	if (IS_ERR((void*)addr))
+		return PTR_ERR((void*)addr);
+
+	cpt_push_object(&saved_pos, ctx);
+	cpt_open_object(NULL, ctx);
+	v = cpt_get_buf(ctx);
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_BITS;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_DATA;
+	v->cpt_size = file->f_dentry->d_inode->i_size;
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	ctx->write((void*)addr, file->f_dentry->d_inode->i_size, ctx);
+	ctx->align(ctx);
+	do_munmap(current->mm, addr, file->f_dentry->d_inode->i_size);
+
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_pos, ctx);
+	return 0;
+}
+
+static int data_is_zero(char *addr, int len)
+{
+	int i;
+	unsigned long zerolong = 0;
+
+	for (i=0; i<len/sizeof(unsigned long); i++) {
+		if (((unsigned long*)(addr))[i] != 0)
+			return 0;
+	}
+	i = len % sizeof(unsigned long);
+	if (!i)
+		return 1;
+	return memcmp(addr + len - i, &zerolong, i) == 0;
+}
+
+
+static int dump_content_regular(struct file *file, struct cpt_context *ctx)
+{
+	loff_t saved_pos;
+	loff_t pos = 0;
+	loff_t obj_opened = CPT_NULL;
+	struct cpt_page_block pgb;
+	ssize_t (*do_read)(struct file *, char __user *, size_t, loff_t *);
+
+	if (file->f_op == NULL)
+		return -EINVAL;
+
+	if ((do_read = file->f_op->read) == NULL) {
+		if (file->f_op->mmap == NULL)
+			return -EINVAL;
+		if (file->f_dentry->d_inode->i_sb->s_magic != FSMAGIC_TMPFS) {
+			eprintk_ctx("unreadable, but not SYSV SHM file\n");
+			return -EINVAL;
+		}
+		
+		do_read = file->f_dentry->d_inode->i_fop->read;
+		cpt_dump_content_sysvshm(file, ctx);
+		if (!do_read) {
+			wprintk_ctx("TMPFS is not configured?\n");
+			return dump_content_shm(file, ctx);
+		}
+	}
+
+	if (!(file->f_mode & FMODE_READ) ||
+	    (file->f_flags & O_DIRECT)) {
+		file = dentry_open(dget(file->f_dentry),
+				   mntget(file->f_vfsmnt), O_RDONLY);
+	} else {
+		atomic_inc(&file->f_count);
+	}
+
+	for (;;) {
+		mm_segment_t oldfs;
+		int err;
+
+		(void)cpt_get_buf(ctx);
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = do_read(file, ctx->tmpbuf, PAGE_SIZE, &pos);
+		set_fs(oldfs);
+		if (err < 0) {
+			eprintk_ctx("dump_content_regular: do_read: %d", err);
+			fput(file);
+			__cpt_release_buf(ctx);
+			return err;
+		}
+		if (err == 0) {
+			__cpt_release_buf(ctx);
+			break;
+		}
+		if (data_is_zero(ctx->tmpbuf, err)) {
+			if (obj_opened != CPT_NULL) {
+				ctx->pwrite(&pgb.cpt_end, 8, ctx, obj_opened + offsetof(struct cpt_page_block, cpt_end));
+				ctx->align(ctx);
+				cpt_close_object(ctx);
+				cpt_pop_object(&saved_pos, ctx);
+				obj_opened = CPT_NULL;
+			}
+		} else {
+			if (obj_opened == CPT_NULL) {
+				cpt_push_object(&saved_pos, ctx);
+				cpt_open_object(NULL, ctx);
+				obj_opened = ctx->file->f_pos;
+				pgb.cpt_next = CPT_NULL;
+				pgb.cpt_object = CPT_OBJ_PAGES;
+				pgb.cpt_hdrlen = sizeof(pgb);
+				pgb.cpt_content = CPT_CONTENT_DATA;
+				pgb.cpt_start = pos - err;
+				pgb.cpt_end = pgb.cpt_start;
+				ctx->write(&pgb, sizeof(pgb), ctx);
+			}
+			ctx->write(ctx->tmpbuf, err, ctx);
+			pgb.cpt_end += err;
+		}
+		__cpt_release_buf(ctx);
+	}
+
+	fput(file);
+
+	if (obj_opened != CPT_NULL) {
+		ctx->pwrite(&pgb.cpt_end, 8, ctx, obj_opened + offsetof(struct cpt_page_block, cpt_end));
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_pos, ctx);
+		obj_opened = CPT_NULL;
+	}
+	return 0;
+}
+
+
+static int dump_content_chrdev(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	int maj;
+
+	maj = imajor(ino);
+	if (maj == MEM_MAJOR) {
+		/* Well, OK. */
+		return 0;
+	}
+	if (maj == PTY_MASTER_MAJOR ||
+	    (maj >= UNIX98_PTY_MASTER_MAJOR &&
+	     maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
+	    maj == PTY_SLAVE_MAJOR ||
+	    maj == UNIX98_PTY_SLAVE_MAJOR ||
+	    maj == TTYAUX_MAJOR) {
+		return cpt_dump_content_tty(file, ctx);
+	}
+	eprintk_ctx("unsupported chrdev %d/%d\n", maj, iminor(ino));
+	return -EINVAL;
+}
+
+static int dump_content_blkdev(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+
+	/* We are not going to transfer them. */
+	eprintk_ctx("unsupported blkdev %d/%d\n", imajor(ino), iminor(ino));
+	return -EINVAL;
+}
+
+static int dump_content_fifo(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	cpt_object_t *obj;
+	loff_t saved_pos;
+	int readers;
+	int writers;
+	int anon = 0;
+
+	mutex_lock(PIPE_MUTEX(*ino));
+	readers = PIPE_READERS(*ino);
+	writers = PIPE_WRITERS(*ino);
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file1 = obj->o_obj;
+		if (file1->f_dentry->d_inode == ino) {
+			if (file1->f_mode & FMODE_READ)
+				readers--;
+			if (file1->f_mode & FMODE_WRITE)
+				writers--;
+		}
+	}	
+	mutex_unlock(PIPE_MUTEX(*ino));
+	if (readers || writers) {
+		struct dentry *dr = file->f_dentry->d_sb->s_root;
+		if (dr->d_name.len == 7 && memcmp(dr->d_name.name,"pipefs:",7) == 0)
+			anon = 1;
+
+		if (anon) {
+			eprintk_ctx("pipe has %d/%d external readers/writers\n", readers, writers);
+			return -EBUSY;
+		}
+		/* If fifo has external readers/writers, we are in troubles.
+		 * If the buffer is not empty, we must move its content.
+		 * But if the fifo is owned by a service, we cannot do
+		 * this. See?
+		 *
+		 * For now we assume, that if fifo is opened by another
+		 * process, we do not own it and, hence, migrate without
+		 * data.
+		 */
+		return 0;
+	}
+
+	/* OK, we must save fifo state. No semaphores required. */
+
+	if (ino->i_pipe->nrbufs) {
+		struct cpt_obj_bits *v = cpt_get_buf(ctx);
+		struct pipe_inode_info *info;
+		int count, buf, nrbufs;
+
+		mutex_lock(PIPE_MUTEX(*ino));
+		info =  ino->i_pipe;
+		count = 0;
+		buf = info->curbuf;
+		nrbufs = info->nrbufs;
+		while (--nrbufs >= 0) {
+			if (!info->bufs[buf].ops->can_merge) {
+				mutex_unlock(PIPE_MUTEX(*ino));
+				eprintk_ctx("unknown format of pipe buffer\n");
+				return -EINVAL;
+			}
+			count += info->bufs[buf].len;
+			buf = (buf+1) & (PIPE_BUFFERS-1);
+		}
+
+		if (!count) {
+			mutex_unlock(PIPE_MUTEX(*ino));
+			return 0;
+		}
+
+		cpt_push_object(&saved_pos, ctx);
+		cpt_open_object(NULL, ctx);
+		v->cpt_next = CPT_NULL;
+		v->cpt_object = CPT_OBJ_BITS;
+		v->cpt_hdrlen = sizeof(*v);
+		v->cpt_content = CPT_CONTENT_DATA;
+		v->cpt_size = count;
+		ctx->write(v, sizeof(*v), ctx);
+		cpt_release_buf(ctx);
+
+		count = 0;
+		buf = info->curbuf;
+		nrbufs = info->nrbufs;
+		while (--nrbufs >= 0) {
+			struct pipe_buffer *b = info->bufs + buf;
+			void * addr = b->ops->map(file, info, b);
+			ctx->write(addr + b->offset, b->len, ctx);
+			b->ops->unmap(info, b);
+			buf = (buf+1) & (PIPE_BUFFERS-1);
+		}
+
+		mutex_unlock(PIPE_MUTEX(*ino));
+
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_pos, ctx);
+	}
+
+	return 0;
+}
+
+static int dump_content_socket(struct file *file, struct cpt_context *ctx)
+{
+	return 0;
+}
+
+static int dump_one_inode(struct file *file, struct dentry *d,
+			  struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct inode *ino = d->d_inode;
+	cpt_object_t *iobj;
+	int dump_it = 0;
+
+	iobj = lookup_cpt_object(CPT_OBJ_INODE, ino, ctx);
+	if (!iobj)
+		return -EINVAL;
+
+	if (iobj->o_pos >= 0)
+		return 0;
+
+	if (!IS_ROOT(d) && d_unhashed(d))
+		dump_it = 1;
+	if (!S_ISREG(ino->i_mode) && !S_ISDIR(ino->i_mode)) {
+		/* One more bug in epoll: invalid inode mode.
+		 * What a load of crap...
+		 */
+		if (ino->i_sb->s_magic == FSMAGIC_EPOLL &&
+		    (ino->i_mode & S_IFMT) == 0)
+			return 0;
+		dump_it = 1;
+	}
+
+	if (!dump_it)
+		return 0;
+
+	cpt_open_object(iobj, ctx);
+	cpt_dump_inode(d, mnt, ctx);
+
+	if (!IS_ROOT(d) && d_unhashed(d)) {
+		struct file *parent;
+		parent = iobj->o_parent;
+		if (!parent ||
+		    (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry))) {
+			/* Inode is not deleted, but it does not
+			 * have references from inside checkpointed
+			 * process group. We have options:
+			 * A. Fail, abort checkpointing
+			 * B. Proceed. File will be cloned.
+			 * A is correct, B is more complicated */
+			/* Just as a hint where to create deleted file */
+			if (ino->i_nlink != 0) {
+				eprintk_ctx("deleted reference to existing inode, checkpointing is impossible\n");
+				return -EBUSY;
+			}
+		} else {
+			/* Refer to _another_ file name. */
+			err = cpt_dump_filename(parent, ctx);
+			if (err)
+				return err;
+			if (S_ISREG(ino->i_mode) || S_ISDIR(ino->i_mode))
+				dump_it = 0;
+		}
+	}
+	if (dump_it) {
+		if (S_ISREG(ino->i_mode)) {
+			if ((err = dump_content_regular(file, ctx)) != 0) {
+				eprintk_ctx("dump_content_regular ");
+				cpt_printk_dentry(d, mnt);
+			}
+		} else if (S_ISDIR(ino->i_mode)) {
+			/* We cannot do anything. The directory should be
+			 * empty, so it is not a big deal.
+			 */
+		} else if (S_ISCHR(ino->i_mode)) {
+			err = dump_content_chrdev(file, ctx);
+		} else if (S_ISBLK(ino->i_mode)) {
+			err = dump_content_blkdev(file, ctx);
+		} else if (S_ISFIFO(ino->i_mode)) {
+			err = dump_content_fifo(file, ctx);
+		} else if (S_ISSOCK(ino->i_mode)) {
+			err = dump_content_socket(file, ctx);
+		} else {
+			eprintk_ctx("unknown inode mode %o\n", ino->i_mode & S_IFMT);
+			err = -EINVAL;
+		}
+	}
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_dump_files(struct cpt_context *ctx)
+{
+	int epoll_nr;
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_TTY);
+	for_each_object(obj, CPT_OBJ_TTY) {
+		int err;
+
+		if ((err = cpt_dump_tty(obj, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	cpt_open_section(ctx, CPT_SECT_INODE);
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		int err;
+
+		if ((err = dump_one_inode(file, file->f_dentry,
+					  file->f_vfsmnt, ctx)) != 0)
+			return err;
+	}
+	for_each_object(obj, CPT_OBJ_FS) {
+		struct fs_struct *fs = obj->o_obj;
+		int err;
+
+		if (fs->root &&
+		    (err = dump_one_inode(NULL, fs->root, fs->rootmnt, ctx)) != 0)
+			return err;
+		if (fs->pwd &&
+		    (err = dump_one_inode(NULL, fs->pwd, fs->pwdmnt, ctx)) != 0)
+			return err;
+		if (fs->altroot &&
+		    (err = dump_one_inode(NULL, fs->altroot, fs->altrootmnt, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	epoll_nr = 0;
+	cpt_open_section(ctx, CPT_SECT_FILES);
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		int err;
+
+		if ((err = dump_one_file(obj, file, ctx)) != 0)
+			return err;
+		if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL)
+			epoll_nr++;
+	}
+	cpt_close_section(ctx);
+
+	if (epoll_nr) {
+		cpt_open_section(ctx, CPT_SECT_EPOLL);
+		for_each_object(obj, CPT_OBJ_FILE) {
+			struct file *file = obj->o_obj;
+			if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
+				int err;
+				if ((err = cpt_dump_epolldev(obj, ctx)) != 0)
+					return err;
+			}
+		}
+		cpt_close_section(ctx);
+	}
+
+	cpt_open_section(ctx, CPT_SECT_SOCKET);
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		int err;
+
+		if ((err = cpt_dump_socket(obj, obj->o_obj, obj->o_index, -1, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	return 0;
+}
+
+static int dump_filedesc(int fd, struct file *file,
+			 struct files_struct *f, struct cpt_context *ctx)
+{
+	struct cpt_fd_image *v = cpt_get_buf(ctx);
+	cpt_object_t *obj;
+
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILEDESC;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_fd = fd;
+	obj = lookup_cpt_object(CPT_OBJ_FILE, file, ctx);
+	if (!obj) BUG();
+	v->cpt_file = obj->o_pos;
+	v->cpt_flags = 0;
+	if (FD_ISSET(fd, f->fdt->close_on_exec))
+		v->cpt_flags = CPT_FD_FLAG_CLOSEEXEC;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+static int dump_one_file_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct files_struct *f = obj->o_obj;
+	struct cpt_files_struct_image *v = cpt_get_buf(ctx);
+	int fd;
+	loff_t saved_obj;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILES;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_index = obj->o_index;
+	v->cpt_max_fds = f->fdt->max_fds;
+	v->cpt_next_fd = f->fdt->next_fd;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	for (fd = 0; fd < f->fdt->max_fds; fd++) {
+		struct file *file = fcheck_files(f, fd);
+		if (file)
+			dump_filedesc(fd, file, f, ctx);
+	}
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+int cpt_dump_files_struct(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_FILES_STRUCT);
+
+	for_each_object(obj, CPT_OBJ_FILES) {
+		int err;
+
+		if ((err = dump_one_file_struct(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_collect_fs(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->fs) {
+			if (cpt_object_add(CPT_OBJ_FS, tsk->fs, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->pwd &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->pwd->d_inode, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->root &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->root->d_inode, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->altroot &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->altroot->d_inode, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+static int cpt_dump_dir(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	struct file file;
+
+	memset(&file, 0, sizeof(file));
+
+	file.f_dentry = d;
+	file.f_vfsmnt = mnt;
+	file.f_mode = FMODE_READ|FMODE_PREAD|FMODE_LSEEK;
+	return dump_one_file(NULL, &file, ctx);
+}
+
+static int dump_one_fs(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct fs_struct *fs = obj->o_obj;
+	struct cpt_fs_struct_image *v = cpt_get_buf(ctx);
+	loff_t saved_obj;
+	int err;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FS;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_umask = fs->umask;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	err = cpt_dump_dir(fs->root, fs->rootmnt, ctx);
+	if (!err)
+		err = cpt_dump_dir(fs->pwd, fs->pwdmnt, ctx);
+	if (!err && fs->altroot)
+		err = cpt_dump_dir(fs->altroot, fs->altrootmnt, ctx);
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_dump_fs_struct(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_FS);
+
+	for_each_object(obj, CPT_OBJ_FS) {
+		int err;
+
+		if ((err = dump_one_fs(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int check_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct namespace *n = obj->o_obj;
+	struct list_head *p;
+	char *path_buf, *path;
+
+	path_buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!path_buf)
+		return -ENOMEM;
+
+	down_read(&namespace_sem);
+	list_for_each(p, &n->list) {
+		struct vfsmount *mnt = list_entry(p, struct vfsmount, mnt_list);
+
+		path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+		if (IS_ERR(path))
+			continue;
+
+		if (
+		    strcmp(mnt->mnt_sb->s_type->name, "rootfs") != 0 &&
+		    strcmp(mnt->mnt_sb->s_type->name, "ext3") != 0 &&
+		    strcmp(mnt->mnt_sb->s_type->name, "simfs") != 0 &&
+		    strcmp(mnt->mnt_sb->s_type->name, "tmpfs") != 0 &&
+		    strcmp(mnt->mnt_sb->s_type->name, "devpts") != 0 &&
+		    strcmp(mnt->mnt_sb->s_type->name, "proc") != 0 &&
+		    strcmp(mnt->mnt_sb->s_type->name, "sysfs") != 0) {
+			eprintk_ctx("unsupported fs type %s\n", mnt->mnt_sb->s_type->name);
+			err = -EINVAL;
+			break;
+		}
+	}
+	up_read(&namespace_sem);
+
+	free_page((unsigned long) path_buf);
+
+	return err;
+}
+
+int cpt_collect_namespace(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->namespace && cpt_object_add(CPT_OBJ_NAMESPACE, tsk->namespace, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	for_each_object(obj, CPT_OBJ_NAMESPACE) {
+		int err;
+		if ((err = check_one_namespace(obj, ctx)) != 0)
+			return err;
+	}
+
+	return 0;
+}
+
+struct args_t
+{
+	int* pfd;
+	char* path;
+};
+
+static int dumptmpfs(void *arg)
+{
+	int i;
+	struct args_t *args = arg;
+	int *pfd = args->pfd;
+	char *path = args->path;
+	char *argv[] = { "tar", "-c", "-S", "--numeric-owner", path, NULL };
+
+	i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
+	if (i < 0) {
+		eprintk("cannot enter ve to dump tmpfs\n");
+		module_put(THIS_MODULE);
+		return 1;
+	}
+
+	if (pfd[1] != 1)
+		sc_dup2(pfd[1], 1);
+
+	for (i=0; i<current->files->fdt->max_fds; i++) {
+		if (i != 1)
+			sc_close(i);
+	}
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/bin/tar", argv, NULL);
+	eprintk("failed to exec /bin/tar: %d\n", i);
+	return -1;
+}
+
+static int cpt_dump_tmpfs(char *path, struct cpt_context *ctx)
+{
+	int err;
+	int pid;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	char buf[16];
+	int n;
+	loff_t saved_obj;
+	struct args_t args;
+	
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	args.pfd = pfd;
+	args.path = path;
+	err = pid = local_kernel_thread(dumptmpfs, (void*)&args, SIGCHLD, 0);
+	if (err < 0)
+		goto out;
+	f = fget(pfd[0]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NAME;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	do {
+		mm_segment_t oldfs;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
+		set_fs(oldfs);
+		if (n > 0)
+			ctx->write(buf, n, ctx);
+	} while (n > 0);
+
+	fput(f);
+
+	if ((err = sc_waitx(pid, 0)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+
+	buf[0] = 0;
+	ctx->write(buf, 1, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	return n;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	return err;
+}
+
+static int dump_vfsmount(struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct cpt_vfsmount_image v;
+	loff_t saved_obj;
+	char *path_buf, *path;
+
+	path_buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!path_buf)
+		return -ENOMEM;
+
+	path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+	if (IS_ERR(path)) {
+		free_page((unsigned long) path_buf);
+		return PTR_ERR(path) == -EINVAL ? 0 : PTR_ERR(path);
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	v.cpt_next = -1;
+	v.cpt_object = CPT_OBJ_VFSMOUNT;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	v.cpt_mntflags = mnt->mnt_flags;
+	v.cpt_flags = mnt->mnt_sb->s_flags;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_dump_string(mnt->mnt_devname ? : "none", ctx);
+	cpt_dump_string(path, ctx);
+	cpt_dump_string(mnt->mnt_sb->s_type->name, ctx);
+#if 0
+	/* This is an evident crap. Ask Savochkin, he might know this.
+	 * Goal is to get some path to mount --bind to.
+	 */
+	cpt_dump_dentry(mnt->mnt_root, mnt->mnt_parent, ctx);
+#else
+	/* For now we just bail, when some FS is mounted not at root. */
+	if (mnt->mnt_root != mnt->mnt_sb->s_root) {
+		eprintk_ctx("mount --bind prevents checkpointing\n");
+		err = -EINVAL;
+	}
+#endif
+
+	if (strcmp(mnt->mnt_sb->s_type->name, "tmpfs") == 0) {
+		cpt_dump_tmpfs(path, ctx);
+	}
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	free_page((unsigned long) path_buf);
+
+	return err;
+}
+
+static int dump_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct namespace *n = obj->o_obj;
+	struct cpt_object_hdr v;
+	struct list_head *p;
+	loff_t saved_obj;
+	int err = 0;
+
+	cpt_open_object(obj, ctx);
+
+	v.cpt_next = -1;
+	v.cpt_object = CPT_OBJ_NAMESPACE;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+
+	down_read(&namespace_sem);
+	list_for_each(p, &n->list) {
+		err = dump_vfsmount(list_entry(p, struct vfsmount, mnt_list), ctx);
+		if (err)
+			break;
+	}
+	up_read(&namespace_sem);
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_dump_namespace(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_NAMESPACE);
+
+	for_each_object(obj, CPT_OBJ_NAMESPACE) {
+		int err;
+
+		if ((err = dump_one_namespace(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_files.h linux-2.6.16.ovz/kernel/cpt/cpt_files.h
--- linux-2.6.16/kernel/cpt/cpt_files.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_files.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,46 @@
+int cpt_collect_files(cpt_context_t *);
+int cpt_collect_fs(cpt_context_t *);
+int cpt_collect_namespace(cpt_context_t *);
+int cpt_collect_sysvsem_undo(cpt_context_t *);
+int cpt_collect_tty(struct file *, cpt_context_t *);
+int cpt_dump_files(struct cpt_context *ctx);
+int cpt_dump_files_struct(struct cpt_context *ctx);
+int cpt_dump_fs_struct(struct cpt_context *ctx);
+int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx);
+int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx);
+int cpt_dump_tty(cpt_object_t *, struct cpt_context *ctx);
+struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx);
+struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii, unsigned flags, struct cpt_context *ctx);
+__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx);
+
+int rst_posix_locks(struct cpt_context *ctx);
+
+struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx);
+int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_restore_fs(struct cpt_context *ctx);
+
+int cpt_collect_sysv(cpt_context_t *);
+int cpt_dump_sysvsem(struct cpt_context *ctx);
+int rst_sysv_ipc(struct cpt_context *ctx);
+int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int cpt_dump_namespace(struct cpt_context *ctx);
+int rst_root_namespace(struct cpt_context *ctx);
+
+int rst_stray_files(struct cpt_context *ctx);
+int rst_tty_jobcontrol(struct cpt_context *ctx);
+
+void rst_flush_filejobs(struct cpt_context *);
+int rst_do_filejobs(struct cpt_context *);
+
+int rst_eventpoll(struct cpt_context *);
+struct file *cpt_open_epolldev(struct cpt_file_image *fi,
+			       unsigned flags,
+			       struct cpt_context *ctx);
+int cpt_dump_epolldev(cpt_object_t *obj, struct cpt_context *);
+
+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
+			 cpt_context_t *ctx);
diff -uprN linux-2.6.16/kernel/cpt/cpt_fsmagic.h linux-2.6.16.ovz/kernel/cpt/cpt_fsmagic.h
--- linux-2.6.16/kernel/cpt/cpt_fsmagic.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_fsmagic.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,15 @@
+/* Collected from kernel sources. */
+
+#define FSMAGIC_TMPFS	0x01021994
+#define FSMAGIC_PIPEFS	0x50495045
+#define FSMAGIC_SOCKFS	0x534F434B
+#define FSMAGIC_PFMFS	0xa0b4d889
+#define FSMAGIC_BDEV	0x62646576
+#define FSMAGIC_EPOLL	0x03111965
+#define FSMAGIC_FUTEX	0x0BAD1DEA
+#define FSMAGIC_MQUEUE	0x19800202
+#define FSMAGIC_PROC	0x9fa0
+#define FSMAGIC_DEVPTS	0x1CD1
+#define FSMAGIC_AUTOFS	0x0187
+#define FSMAGIC_EXT2	0xEF53
+#define FSMAGIC_REISER	0x52654973
diff -uprN linux-2.6.16/kernel/cpt/cpt_kernel.c linux-2.6.16.ovz/kernel/cpt/cpt_kernel.c
--- linux-2.6.16/kernel/cpt/cpt_kernel.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_kernel.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,134 @@
+/*
+ *
+ *  kernel/cpt/cpt_kernel.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#define __KERNEL_SYSCALLS__ 1
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <asm/cpufeature.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_kernel.h"
+#include "cpt_syscalls.h"
+
+int debug_level = 1;
+
+#ifndef CONFIG_X86_64
+
+extern void local_kernel_thread_helper(void);
+__asm__(".section .text\n"
+	".align 4\n"
+	"local_kernel_thread_helper:\n\t"
+	"movl %edx,%eax\n\t"
+	"pushl %edx\n\t"
+	"call *%ebx\n\t"
+	"pushl %eax\n\t"
+	"pushl $0\n\t"
+	"call complete_and_exit\n"
+	".previous");
+
+/*
+ * Create a kernel thread
+ */
+int asm_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+
+	regs.ebx = (unsigned long) fn;
+	regs.edx = (unsigned long) arg;
+
+	regs.xds = __USER_DS;
+	regs.xes = __USER_DS;
+	regs.orig_eax = -1;
+	regs.eip = (unsigned long) local_kernel_thread_helper;
+	regs.xcs = __KERNEL_CS;
+	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+
+	/* Ok, create the new process.. */
+	return do_fork_pid(flags | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL, pid);
+}
+#endif
+
+int local_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
+{
+	pid_t ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+	ret = asm_kernel_thread(fn, arg, flags, pid);
+	if (ret < 0)
+		module_put(THIS_MODULE);
+	return ret;
+}
+
+#ifdef __i386__
+int __execve(const char *file, char **argv, char **envp)
+{
+	long res;
+	__asm__ volatile ("int $0x80"
+	: "=a" (res)
+	: "0" (__NR_execve),"b" ((long)(file)),"c" ((long)(argv)),
+		  "d" ((long)(envp)) : "memory");
+	return (int)res;
+}
+#endif
+
+int sc_execve(char *cmd, char **argv, char **env)
+{
+	int ret;
+#ifndef __i386__
+	ret = execve(cmd, argv, env);
+#else
+	ret = __execve(cmd, argv, env);
+#endif
+	return ret;
+}
+
+unsigned int test_cpu_caps()
+{
+	unsigned int flags = 0;
+	if (boot_cpu_has(X86_FEATURE_CMOV))
+		flags |= 1 << CPT_CPU_X86_CMOV;
+	if (cpu_has_fxsr)
+		flags |= 1 << CPT_CPU_X86_FXSR;
+	if (cpu_has_xmm)
+		flags |= 1 << CPT_CPU_X86_SSE;
+#ifndef CONFIG_X86_64
+	if (cpu_has_xmm2)
+#endif
+		flags |= 1 << CPT_CPU_X86_SSE2;
+	if (cpu_has_mmx)
+		flags |= 1 << CPT_CPU_X86_MMX;
+	if (boot_cpu_has(X86_FEATURE_3DNOW))
+		flags |= 1 << CPT_CPU_X86_3DNOW;
+	if (boot_cpu_has(X86_FEATURE_3DNOWEXT))
+		flags |= 1 << CPT_CPU_X86_3DNOW2;
+	if (boot_cpu_has(X86_FEATURE_SEP))
+		flags |= 1 << CPT_CPU_X86_SEP;
+#ifdef CONFIG_X86_64
+	flags |= 1 << CPT_CPU_X86_EMT64;
+#endif
+	return flags;
+}
+
+unsigned int test_kernel_config()
+{
+	unsigned int flags = 0;
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
+	flags |= 1 << CPT_KERNEL_CONFIG_PAE;
+#endif
+	return flags;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_kernel.h linux-2.6.16.ovz/kernel/cpt/cpt_kernel.h
--- linux-2.6.16/kernel/cpt/cpt_kernel.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_kernel.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,74 @@
+/* Interface to kernel vars which we had to _add_. */
+
+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+
+#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+#define TASK_TRACED TASK_STOPPED
+#define unix_peer(sk) ((sk)->sk_pair)
+#define page_mapcount(pg) ((pg)->mapcount)
+#else
+#define unix_peer(sk) (unix_sk(sk)->peer)
+#endif
+
+#ifdef CONFIG_X86_64
+#define cpu_has_fxsr 1
+#endif
+
+static inline void do_gettimespec(struct timespec *ts)
+{
+	struct timeval tv;
+	do_gettimeofday(&tv);
+	ts->tv_sec = tv.tv_sec;
+	ts->tv_nsec = tv.tv_usec*1000;
+}
+
+int local_kernel_thread(int (*fn)(void *),
+		void * arg,
+		unsigned long flags,
+		pid_t pid);
+int asm_kernel_thread(int (*fn)(void *),
+		void * arg,
+		unsigned long flags,
+		pid_t pid);
+
+unsigned int test_cpu_caps(void);
+unsigned int test_kernel_config(void);
+
+#define test_one_flag(src, dst, flag, message, ret) \
+if (src & (1 << flag)) \
+	if (!(dst & (1 << flag))) { \
+		wprintk("Destination cpu does not have " message "\n"); \
+		ret = 1; \
+	}
+
+static inline void
+_set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
+{
+	while (nsec >= NSEC_PER_SEC) {
+		nsec -= NSEC_PER_SEC;
+		++sec;
+	}
+	while (nsec < 0) {
+		nsec += NSEC_PER_SEC;
+		--sec;
+	}
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+
+static inline struct timespec
+_ns_to_timespec(const nsec_t nsec)
+{
+	struct timespec ts;
+
+	if (!nsec)
+		return (struct timespec) {0, 0};
+
+	ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec);
+	if (unlikely(nsec < 0))
+		_set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec);
+
+	return ts;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_mm.c linux-2.6.16.ovz/kernel/cpt/cpt_mm.c
--- linux-2.6.16/kernel/cpt/cpt_mm.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_mm.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,823 @@
+/*
+ *
+ *  kernel/cpt/cpt_mm.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/errno.h>
+#include <linux/ve.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <asm/ldt.h>
+#include <asm/mmu.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+#include "cpt_pagein.h"
+#endif
+#include "cpt_ubc.h"
+
+static int collect_one_aio_ctx(struct mm_struct *mm, struct kioctx *aio_ctx,
+			       cpt_context_t *ctx)
+{
+	if (!list_empty(&aio_ctx->run_list)) {
+		/* This is impossible at least with kernel 2.6.8.1 or 2.6.16 */
+		eprintk_ctx("run list is not empty, cannot suspend AIO\n");
+		return -EBUSY;
+	}
+
+	/* Wait for pending IOCBs. Linux AIO is mostly _fake_.
+	 * It is actually synchronous, except for direct IO and
+	 * some funny raw USB things, which cannot happen inside VE.
+	 * However, we do this for future.
+	 *
+	 * Later note: in 2.6.16 we may allow O_DIRECT, so that
+	 * it is not meaningless code.
+	 */
+	wait_for_all_aios(aio_ctx);
+
+	if (!list_empty(&aio_ctx->run_list) ||
+	    !list_empty(&aio_ctx->active_reqs) ||
+	    aio_ctx->reqs_active) {
+		eprintk_ctx("were not able to suspend AIO\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int collect_one_mm(struct mm_struct *mm, cpt_context_t * ctx)
+{
+	struct vm_area_struct *vma;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (vma->vm_file) {
+			if (cpt_object_add(CPT_OBJ_FILE, vma->vm_file, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+	if (cpt_add_ubc(mm->mm_ub, ctx) == NULL)
+		return -ENOMEM;
+
+	if (mm->ioctx_list) {
+		struct kioctx *aio_ctx;
+		int err;
+
+		for (aio_ctx = mm->ioctx_list; aio_ctx; aio_ctx = aio_ctx->next)
+			if ((err = collect_one_aio_ctx(mm, aio_ctx, ctx)) != 0)
+				return err;
+	}
+
+	return 0;
+}
+
+int cpt_collect_mm(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	int err;
+	int index;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->mm && cpt_object_add(CPT_OBJ_MM, tsk->mm, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	index = 1;
+	for_each_object(obj, CPT_OBJ_MM) {
+		struct mm_struct *mm = obj->o_obj;
+		if (obj->o_count != atomic_read(&mm->mm_users)) {
+			eprintk_ctx("mm_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&mm->mm_users));
+			return -EBUSY;
+		}
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if ((err = collect_one_mm(mm, ctx)) != 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int zcnt, scnt, scnt0, ucnt;
+
+/* Function where_is_anon_page() returns address of a anonymous page in mm
+ * of already dumped process. This happens f.e. after fork(). We do not use
+ * this right now, just keep statistics, it is diffucult to restore such state,
+ * but the most direct use is to save space in dumped image. */
+
+
+static inline unsigned long
+vma_address0(struct page *page, struct vm_area_struct *vma)
+{
+	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	unsigned long address;
+
+	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+		address |= 1;
+	return address;
+}
+
+static int really_this_one(struct vm_area_struct *vma, unsigned long address,
+			   struct page *page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	spinlock_t *ptl;
+	int result;
+
+	pgd = pgd_offset(mm, address);
+	if (unlikely(!pgd_present(*pgd)))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (unlikely(!pmd_present(*pmd)))
+		return 0;
+
+	result = 0;
+	pte = pte_offset_map(pmd, address);
+	if (!pte_present(*pte)) {
+		pte_unmap(pte);
+		return 0;
+	}
+
+	ptl = pte_lockptr(mm, pmd);
+	if (!spin_trylock(ptl)) {
+		pte_unmap(pte);
+		return 0;
+	}
+	if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte))
+		result = 1;
+	pte_unmap_unlock(pte, ptl);
+	return result;
+}
+
+static loff_t where_is_anon_page(cpt_object_t *mmobj, unsigned long mapaddr,
+				 struct page *page, cpt_context_t * ctx)
+{
+	loff_t mmptr = CPT_NULL;
+	struct anon_vma *anon_vma;
+	struct vm_area_struct *vma;
+	int idx = mmobj->o_index;
+
+	if (!PageAnon(page))
+		return CPT_NULL;
+
+	anon_vma = page_lock_anon_vma(page);
+	if (!anon_vma)
+		return CPT_NULL;
+
+	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		unsigned long addr = vma_address0(page, vma);
+		cpt_object_t *obj;
+
+		/* We do not try to support mremapped regions (addr != mapaddr),
+		 * only mmaps directly inherited via fork().
+		 * With this limitation we may check self-consistency of
+		 * vmas (vm_start, vm_pgoff, anon_vma) before
+		 * doing __copy_page_range() in rst_mm.
+		 */
+		if (mmobj->o_obj != vma->vm_mm && addr == mapaddr) {
+			obj = lookup_cpt_object(CPT_OBJ_MM, vma->vm_mm, ctx);
+			if (obj && obj->o_pos != CPT_NULL && obj->o_index < idx) {
+				if (really_this_one(vma, addr, page)) {
+					mmptr = obj->o_pos;
+					idx = obj->o_index;
+				}
+			}
+		}
+	}
+	spin_unlock(&anon_vma->lock);
+
+	return mmptr;
+}
+
+struct page_area
+{
+	int type;
+	unsigned long start;
+	unsigned long end;
+	pgoff_t pgoff;
+	loff_t mm;
+};
+
+struct page_desc
+{
+	int	type;
+	pgoff_t	index;
+	loff_t	mm;
+	int	shared;
+};
+
+enum {
+	PD_ABSENT,
+	PD_COPY,
+	PD_ZERO,
+	PD_CLONE,
+	PD_FUNKEY,
+	PD_LAZY
+};
+
+/* 0: page can be obtained from backstore, or still not mapped anonymous  page,
+      or something else, which does not requre copy.
+   1: page requires copy
+   2: page requres copy but its content is zero. Quite useless.
+   3: wp page is shared after fork(). It is to be COWed when modified.
+   4: page is something unsupported... We copy it right now.
+ */
+
+
+
+static void page_get_desc(cpt_object_t *mmobj,
+			  struct vm_area_struct *vma, unsigned long addr,
+			  struct page_desc *pdesc, cpt_context_t * ctx)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	spinlock_t *ptl;
+	struct page *pg;
+	pgoff_t linear_index = (addr - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff;
+
+	pdesc->index = linear_index;
+	pdesc->shared = 0;
+
+	if (vma->vm_flags & VM_IO) {
+		pdesc->type = PD_ABSENT;
+		return;
+	}
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+		goto out_absent;
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+		goto out_absent;
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+		goto out_absent;
+	if (pmd_huge(*pmd)) {
+		eprintk_ctx("page_huge\n");
+		goto out_unsupported;
+	}
+
+	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	if (!ptep)
+		goto out_absent;
+
+	pte = *ptep;
+	if (pte_none(pte))
+		goto out_absent_unmap;
+
+	if (!pte_present(pte)) {
+		if (pte_file(pte)) {
+			pdesc->index = pte_to_pgoff(pte);
+			goto out_absent_unmap;
+		}
+		if (vma->vm_flags & VM_SHARED) {
+			/* It is impossible: shared mappings cannot be in swap */
+			eprintk_ctx("shared mapping is not present: %08lx@%Ld\n", addr, mmobj->o_pos);
+			goto out_unsupported_unmap;
+		}
+		/* Otherwise it is in swap. */
+		goto out_lazy_unmap;
+	} else if ((pg = vm_normal_page(vma, addr, pte)) != NULL) {
+
+		if (pg->mapping && !PageAnon(pg)) {
+			if (vma->vm_file == NULL) {
+				eprintk_ctx("pg->mapping!=NULL for fileless vma: %08lx\n", addr);
+				goto out_unsupported_unmap;
+			}
+			if (vma->vm_file->f_mapping != pg->mapping) {
+				eprintk_ctx("pg->mapping!=f_mapping: %08lx %p %p %Ld\n", addr, vma->vm_file->f_mapping, pg->mapping, mmobj->o_pos);
+				goto out_unsupported_unmap;
+			}
+			pdesc->index = (pg->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
+			/* Page is in backstore. For us it is like
+			 * it is not present.
+			 */
+			goto out_absent_unmap;
+		}
+
+		if (PageReserved(pg)) {
+			/* Special case: ZERO_PAGE is used, when an
+			 * anonymous page is accessed but not written. */
+			if (pg == ZERO_PAGE(addr)) {
+				if (pte_write(pte)) {
+					eprintk_ctx("not funny already, writable ZERO_PAGE\n");
+					goto out_unsupported_unmap;
+				}
+				zcnt++;
+				goto out_absent_unmap;
+			}
+			eprintk_ctx("reserved page %lu at %08lx@%Ld\n", pg->index, addr, mmobj->o_pos);
+			goto out_unsupported_unmap;
+		}
+
+		if (pg == ZERO_PAGE(addr)) {
+			wprintk_ctx("that's how it works now\n");
+		}
+
+		if (!pg->mapping) {
+			eprintk_ctx("page without mapping at %08lx@%Ld\n", addr, mmobj->o_pos);
+			goto out_unsupported_unmap;
+		}
+
+		if (pg->mapping && page_mapcount(pg) > 1) {
+			pdesc->shared = 1;
+			pdesc->mm = where_is_anon_page(mmobj, addr, pg, ctx);
+			if (pdesc->mm != CPT_NULL) {
+				scnt0++;
+				goto out_clone_unmap;
+			} else {
+				scnt++;
+			}
+		}
+
+		if (!pte_young(pte))
+			goto out_lazy_unmap;
+	}
+	pte_unmap_unlock(ptep, ptl);
+	pdesc->type = PD_COPY;
+	return;
+
+out_lazy_unmap:
+	pte_unmap_unlock(ptep, ptl);
+	pdesc->type = PD_LAZY;
+	return;
+
+out_absent_unmap:
+	pte_unmap_unlock(ptep, ptl);
+out_absent:
+	pdesc->type = PD_ABSENT;
+	return;
+
+out_clone_unmap:
+	pte_unmap_unlock(ptep, ptl);
+	pdesc->type = PD_CLONE;
+	return;
+
+out_unsupported_unmap:
+	pte_unmap_unlock(ptep, ptl);
+out_unsupported:
+	ucnt++;
+	pdesc->type = PD_FUNKEY;
+	return;
+}
+
+/* ATTN: We give "current" to get_user_pages(). This is wrong, but get_user_pages()
+ * does not really need this thing. It just stores some page fault stats there.
+ *
+ * BUG: some archs (f.e. sparc64, but not Intel*) require flush cache pages
+ * before accessing vma.
+ */
+void dump_pages(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, struct cpt_context *ctx)
+{
+#define MAX_PAGE_BATCH 16
+	struct page *pg[MAX_PAGE_BATCH];
+	int npages = (end - start)/PAGE_SIZE;
+	int count = 0;
+
+	while (count < npages) {
+		int copy = npages - count;
+		int n;
+
+		if (copy > MAX_PAGE_BATCH)
+			copy = MAX_PAGE_BATCH;
+		n = get_user_pages(current, vma->vm_mm, start, copy,
+				   0, 1, pg, NULL);
+		if (n == copy) {
+			int i;
+			for (i=0; i<n; i++) {
+				char *maddr = kmap(pg[i]);
+				ctx->write(maddr, PAGE_SIZE, ctx);
+				kunmap(pg[i]);
+			}
+		} else {
+			eprintk_ctx("get_user_pages fault");
+			for ( ; n > 0; n--)
+				page_cache_release(pg[n-1]);
+			return;
+		}
+		start += n*PAGE_SIZE;
+		count += n;
+		for ( ; n > 0; n--)
+			page_cache_release(pg[n-1]);
+	}
+	return;
+}
+
+int dump_page_block(struct vm_area_struct *vma, struct cpt_page_block *pgb,
+		    int copy,
+		    struct cpt_context *ctx)
+{
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb->cpt_object = (copy != PD_LAZY) ? CPT_OBJ_PAGES : CPT_OBJ_LAZYPAGES;
+	pgb->cpt_hdrlen = sizeof(*pgb);
+	pgb->cpt_content = (copy == PD_COPY || copy == PD_LAZY) ? CPT_CONTENT_DATA : CPT_CONTENT_VOID;
+
+	ctx->write(pgb, sizeof(*pgb), ctx);
+	if (copy == PD_COPY || copy == PD_LAZY)
+		dump_pages(vma, pgb->cpt_start, pgb->cpt_end, ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_remappage_block(struct vm_area_struct *vma, struct page_area *pa,
+			 struct cpt_context *ctx)
+{
+	struct cpt_remappage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_REMAPPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	pgb.cpt_pgoff = pa->pgoff - (pa->end-pa->start)/PAGE_SIZE + 1;
+
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_copypage_block(struct vm_area_struct *vma, struct page_area *pa,
+			struct cpt_context *ctx)
+{
+	struct cpt_copypage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_COPYPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	pgb.cpt_source = pa->mm;
+
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_lazypage_block(struct vm_area_struct *vma, struct page_area *pa,
+			cpt_context_t *ctx)
+{
+	struct cpt_lazypage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_LAZYPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	pgb.cpt_index = cpt_alloc_pgin_index(vma, pa->start,
+					     (pa->end-pa->start)/PAGE_SIZE, ctx);
+#endif
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+static int can_expand(struct page_area *pa, struct page_desc *pd)
+{
+	if (pa->start == pa->end)
+		return 1;
+	if (pa->type != pd->type)
+		return 0;
+	if (pa->type == PD_ABSENT)
+		return pd->index == pa->pgoff + 1;
+	if (pa->type == PD_CLONE)
+		return pd->mm == pa->mm;
+	return 1;
+}
+
+static int dump_one_vma(cpt_object_t *mmobj,
+			struct vm_area_struct *vma, struct cpt_context *ctx)
+{
+	struct cpt_vma_image *v = cpt_get_buf(ctx);
+	unsigned long addr;
+	loff_t saved_object;
+	struct cpt_page_block pgb;
+	struct page_area pa;
+	int cloned_pages = 0;
+
+	cpt_push_object(&saved_object, ctx);
+
+	v->cpt_object = CPT_OBJ_VMA;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_start = vma->vm_start;
+	v->cpt_end = vma->vm_end;
+	v->cpt_flags = vma->vm_flags;
+	if (vma->vm_flags&VM_HUGETLB) {
+		eprintk_ctx("huge TLB VMAs are still not supported\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_pgprot = vma->vm_page_prot.pgprot;
+	v->cpt_pgoff = vma->vm_pgoff;
+	v->cpt_file = CPT_NULL;
+	v->cpt_type = CPT_VMA_TYPE_0;
+	v->cpt_anonvma = 0;
+
+	/* We have to remember what VMAs are bound to one anon_vma.
+	 * So, we store an identifier of group of VMAs. It is handy
+	 * to use absolute address of anon_vma as this identifier. */
+	v->cpt_anonvmaid = (unsigned long)vma->anon_vma;
+
+	if (vma->vm_file) {
+		struct file *filp;
+		cpt_object_t *obj = lookup_cpt_object(CPT_OBJ_FILE, vma->vm_file, ctx);
+		if (obj == NULL) BUG();
+		filp = obj->o_obj;
+		if (filp->f_op &&
+		    filp->f_op->read == NULL &&
+		    filp->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_TMPFS)
+			v->cpt_type = CPT_VMA_TYPE_SHM;
+		v->cpt_file = obj->o_pos;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	pa.type = PD_ABSENT;
+	pa.pgoff = vma->vm_pgoff;
+	pa.mm = CPT_NULL;
+	pa.start = vma->vm_start;
+	pa.end = vma->vm_start;
+
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+		struct page_desc pd;
+
+		page_get_desc(mmobj, vma, addr, &pd, ctx);
+		cloned_pages += pd.shared;
+
+		if (pd.type == PD_FUNKEY) {
+			eprintk_ctx("dump_one_vma: funkey page\n");
+			return -EINVAL;
+		}
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+		if (pd.type == PD_LAZY &&
+		    (ctx->lazy_vm == 0 || (vma->vm_flags&VM_LOCKED)))
+			pd.type = PD_COPY;
+#else
+		if (pd.type == PD_LAZY)
+			pd.type = PD_COPY;
+#endif
+
+		if (!can_expand(&pa, &pd)) {
+			if (pa.type == PD_COPY ||
+			    pa.type == PD_ZERO) {
+				pgb.cpt_start = pa.start;
+				pgb.cpt_end = pa.end;
+				dump_page_block(vma, &pgb, pa.type, ctx);
+			} else if (pa.type == PD_CLONE) {
+				dump_copypage_block(vma, &pa, ctx);
+				cloned_pages++;
+			} else if (pa.type == PD_LAZY) {
+				dump_lazypage_block(vma, &pa, ctx);
+			} else if (pa.type == PD_ABSENT &&
+				   pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
+				dump_remappage_block(vma, &pa, ctx);
+			}
+			pa.start = addr;
+		}
+		pa.type = pd.type;
+		pa.end = addr + PAGE_SIZE;
+		pa.pgoff = pd.index;
+		pa.mm = pd.mm;
+	}
+
+	if (pa.end > pa.start) {
+		if (pa.type == PD_COPY ||
+		    pa.type == PD_ZERO) {
+			pgb.cpt_start = pa.start;
+			pgb.cpt_end = pa.end;
+			dump_page_block(vma, &pgb, pa.type, ctx);
+		} else if (pa.type == PD_CLONE) {
+			dump_copypage_block(vma, &pa, ctx);
+			cloned_pages++;
+		} else if (pa.type == PD_LAZY) {
+			dump_lazypage_block(vma, &pa, ctx);
+		} else if (pa.type == PD_ABSENT &&
+			   pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
+			dump_remappage_block(vma, &pa, ctx);
+		}
+	}
+
+	if (cloned_pages) {
+		__u32 anonvma = 1;
+		loff_t anonpos = ctx->current_object + offsetof(struct cpt_vma_image, cpt_anonvma);
+		ctx->pwrite(&anonvma, 4, ctx, anonpos);
+	}
+
+	cpt_close_object(ctx);
+
+	cpt_pop_object(&saved_object, ctx);
+
+	return 0;
+}
+
+static int dump_one_aio_ctx(struct mm_struct *mm, struct kioctx *aio_ctx,
+			    cpt_context_t *ctx)
+{
+	loff_t saved_object;
+	struct cpt_aio_ctx_image aimg;
+
+	if (!list_empty(&aio_ctx->run_list) ||
+	    !list_empty(&aio_ctx->active_reqs) ||
+	    aio_ctx->reqs_active) {
+		eprintk_ctx("AIO is active after suspend\n");
+		return -EBUSY;
+	}
+
+	cpt_push_object(&saved_object, ctx);
+
+	aimg.cpt_next = CPT_ALIGN(sizeof(aimg));
+	aimg.cpt_object = CPT_OBJ_AIO_CONTEXT;
+	aimg.cpt_hdrlen = sizeof(aimg);
+	aimg.cpt_content = CPT_CONTENT_ARRAY;
+
+	aimg.cpt_max_reqs = aio_ctx->max_reqs;
+	aimg.cpt_ring_pages = aio_ctx->ring_info.nr_pages;
+	aimg.cpt_nr = aio_ctx->ring_info.nr;
+	aimg.cpt_tail = aio_ctx->ring_info.tail;
+	aimg.cpt_mmap_base = aio_ctx->ring_info.mmap_base;
+
+	ctx->write(&aimg, sizeof(aimg), ctx);
+
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+static int dump_one_mm(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = obj->o_obj;
+	struct vm_area_struct *vma;
+	struct cpt_mm_image *v = cpt_get_buf(ctx);
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = -1;
+	v->cpt_object = CPT_OBJ_MM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_start_code = mm->start_code;
+	v->cpt_end_code = mm->end_code;
+	v->cpt_start_data = mm->start_data;
+	v->cpt_end_data = mm->end_data;
+	v->cpt_start_brk = mm->start_brk;
+	v->cpt_brk = mm->brk;
+	v->cpt_start_stack = mm->start_stack;
+	v->cpt_start_arg = mm->arg_start;
+	v->cpt_end_arg = mm->arg_end;
+	v->cpt_start_env = mm->env_start;
+	v->cpt_end_env = mm->env_end;
+	v->cpt_def_flags = mm->def_flags;
+	v->cpt_mmub = cpt_lookup_ubc(mm->mm_ub, ctx);
+	v->cpt_dumpable = mm->dumpable;
+	v->cpt_vps_dumpable = mm->vps_dumpable;
+	v->cpt_used_hugetlb = 0; /* not used */
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (mm->context.size) {
+		loff_t saved_object;
+		struct cpt_obj_bits b;
+		int size;
+
+		dprintk_ctx("nontrivial LDT\n");
+
+		cpt_push_object(&saved_object, ctx);
+
+		cpt_open_object(NULL, ctx);
+		b.cpt_next = CPT_NULL;
+		b.cpt_object = CPT_OBJ_BITS;
+		b.cpt_hdrlen = sizeof(b);
+		b.cpt_content = CPT_CONTENT_MM_CONTEXT;
+		b.cpt_size = mm->context.size*LDT_ENTRY_SIZE;
+
+		ctx->write(&b, sizeof(b), ctx);
+
+		size = mm->context.size*LDT_ENTRY_SIZE;
+
+#if defined(CONFIG_X86_64) || LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
+		ctx->write(mm->context.ldt, size, ctx);
+#else
+		for (i = 0; i < size; i += PAGE_SIZE) {
+			int nr = i / PAGE_SIZE, bytes;
+			char *kaddr = kmap(mm->context.ldt_pages[nr]);
+
+			bytes = size - i;
+			if (bytes > PAGE_SIZE)
+				bytes = PAGE_SIZE;
+			ctx->write(kaddr, bytes, ctx);
+			kunmap(mm->context.ldt_pages[nr]);
+		}
+#endif
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_object, ctx);
+	}
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		int err;
+
+#ifdef CONFIG_X86_64
+		if (vma->vm_start == 0xFFFFE000 &&
+		    vma->vm_end == 0xFFFFF000)
+			continue;
+#endif
+
+		if ((err = dump_one_vma(obj, vma, ctx)) != 0)
+			return err;
+	}
+
+	if (mm->ioctx_list) {
+		struct kioctx *aio_ctx;
+		int err;
+
+		for (aio_ctx = mm->ioctx_list; aio_ctx; aio_ctx = aio_ctx->next)
+			if ((err = dump_one_aio_ctx(mm, aio_ctx, ctx)) != 0)
+				return err;
+	}
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+int cpt_dump_vm(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	scnt = scnt0 = zcnt = 0;
+
+	cpt_open_section(ctx, CPT_SECT_MM);
+
+	for_each_object(obj, CPT_OBJ_MM) {
+		int err;
+
+		if ((err = dump_one_mm(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+
+	if (scnt)
+		dprintk_ctx("cpt_dump_vm: %d shared private anon pages\n", scnt);
+	if (scnt0)
+		dprintk_ctx("cpt_dump_vm: %d anon pages are cloned\n", scnt0);
+	if (zcnt)
+		dprintk_ctx("cpt_dump_vm: %d silly pages canceled\n", zcnt);
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_mm.h linux-2.6.16.ovz/kernel/cpt/cpt_mm.h
--- linux-2.6.16/kernel/cpt/cpt_mm.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_mm.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,16 @@
+int cpt_collect_mm(cpt_context_t *);
+
+int cpt_dump_vm(struct cpt_context *ctx);
+
+__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int cpt_mm_prepare(unsigned long veid);
+
+int cpt_free_pgin_dir(struct cpt_context *);
+int cpt_start_pagein(struct cpt_context *);
+int rst_setup_pagein(struct cpt_context *);
+int rst_complete_pagein(struct cpt_context *, int);
+int rst_pageind(struct cpt_context *);
+int rst_swapoff(struct cpt_context *);
diff -uprN linux-2.6.16/kernel/cpt/cpt_net.c linux-2.6.16.ovz/kernel/cpt/cpt_net.c
--- linux-2.6.16/kernel/cpt/cpt_net.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_net.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,428 @@
+/*
+ *
+ *  kernel/cpt/cpt_net.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/rtnetlink.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+#include "cpt_syscalls.h"
+
+int cpt_dump_link(struct cpt_context * ctx)
+{
+	struct net_device *dev;
+
+	cpt_open_section(ctx, CPT_SECT_NET_DEVICE);
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct cpt_netdev_image v;
+
+		cpt_open_object(NULL, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_NET_DEVICE;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_VOID;
+
+		v.cpt_index = dev->ifindex;
+		v.cpt_flags = dev->flags;
+		memcpy(v.cpt_name, dev->name, IFNAMSIZ);
+		ctx->write(&v, sizeof(v), ctx);
+		cpt_close_object(ctx);
+
+		if (strcmp(dev->name, "lo") != 0 &&
+		    strcmp(dev->name, "venet0") != 0) {
+			eprintk_ctx("unsupported netdevice %s\n", dev->name);
+			cpt_close_section(ctx);
+			return -EBUSY;
+		}
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_suspend_network(struct cpt_context *ctx)
+{
+	get_exec_env()->disable_net = 1;
+	synchronize_net();
+	return 0;
+}
+
+int cpt_resume_network(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	env->disable_net = 0;
+	put_ve(env);
+	return 0;
+}
+
+int cpt_dump_ifaddr(struct cpt_context * ctx)
+{
+	struct net_device *dev;
+
+	cpt_open_section(ctx, CPT_SECT_NET_IFADDR);
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct in_device *idev = in_dev_get(dev);
+		struct in_ifaddr *ifa;
+
+		if (!idev)
+			continue;
+
+		for (ifa = idev->ifa_list; ifa; ifa = ifa->ifa_next) {
+			struct cpt_ifaddr_image v;
+			cpt_open_object(NULL, ctx);
+
+			v.cpt_next = CPT_NULL;
+			v.cpt_object = CPT_OBJ_NET_IFADDR;
+			v.cpt_hdrlen = sizeof(v);
+			v.cpt_content = CPT_CONTENT_VOID;
+
+			v.cpt_index = dev->ifindex;
+			v.cpt_family = AF_INET;
+			v.cpt_masklen = ifa->ifa_prefixlen;
+			v.cpt_flags = ifa->ifa_flags;
+			v.cpt_scope = ifa->ifa_scope;
+			memset(&v.cpt_address, 0, sizeof(v.cpt_address));
+			memset(&v.cpt_peer, 0, sizeof(v.cpt_peer));
+			memset(&v.cpt_broadcast, 0, sizeof(v.cpt_broadcast));
+			v.cpt_address[0] = ifa->ifa_local;
+			v.cpt_peer[0] = ifa->ifa_address;
+			v.cpt_broadcast[0] = ifa->ifa_broadcast;
+			memcpy(v.cpt_label, ifa->ifa_label, IFNAMSIZ);
+			ctx->write(&v, sizeof(v), ctx);
+			cpt_close_object(ctx);
+		}
+		in_dev_put(idev);
+	}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct inet6_dev *idev = in6_dev_get(dev);
+		struct inet6_ifaddr *ifa;
+
+		if (!idev)
+			continue;
+
+		for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
+			struct cpt_ifaddr_image v;
+
+			if (dev == &loopback_dev &&
+			    ifa->prefix_len == 128 &&
+			    ifa->addr.s6_addr32[0] == 0 &&
+			    ifa->addr.s6_addr32[1] == 0 &&
+			    ifa->addr.s6_addr32[2] == 0 &&
+			    ifa->addr.s6_addr32[3] == htonl(1))
+				continue;
+
+			cpt_open_object(NULL, ctx);
+
+			v.cpt_next = CPT_NULL;
+			v.cpt_object = CPT_OBJ_NET_IFADDR;
+			v.cpt_hdrlen = sizeof(v);
+			v.cpt_content = CPT_CONTENT_VOID;
+
+			v.cpt_index = dev->ifindex;
+			v.cpt_family = AF_INET6;
+			v.cpt_masklen = ifa->prefix_len;
+			v.cpt_flags = ifa->flags;
+			v.cpt_scope = ifa->scope;
+			memcpy(&v.cpt_address, &ifa->addr, 16);
+			memcpy(&v.cpt_peer, &ifa->addr, 16);
+			memset(&v.cpt_broadcast, 0, sizeof(v.cpt_broadcast));
+			memcpy(v.cpt_label, dev->name, IFNAMSIZ);
+			ctx->write(&v, sizeof(v), ctx);
+			cpt_close_object(ctx);
+		}
+		in6_dev_put(idev);
+	}
+#endif
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int cpt_dump_route(struct cpt_context * ctx)
+{
+	int err;
+	struct socket *sock;
+	struct msghdr msg;
+	struct iovec iov;
+	struct {
+		struct nlmsghdr nlh;
+		struct rtgenmsg g;
+	} req;
+	struct sockaddr_nl nladdr;
+	struct cpt_object_hdr v;
+	mm_segment_t oldfs;
+	char *pg;
+
+	err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
+	if (err)
+		return err;
+
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family = AF_NETLINK;
+
+	req.nlh.nlmsg_len = sizeof(req);
+	req.nlh.nlmsg_type = RTM_GETROUTE;
+	req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
+	req.nlh.nlmsg_pid = 0;
+	req.g.rtgen_family = AF_INET;
+
+	iov.iov_base=&req;
+	iov.iov_len=sizeof(req);
+	msg.msg_name=&nladdr;
+	msg.msg_namelen=sizeof(nladdr);
+	msg.msg_iov=&iov;
+	msg.msg_iovlen=1;
+	msg.msg_control=NULL;
+	msg.msg_controllen=0;
+	msg.msg_flags=MSG_DONTWAIT;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = sock_sendmsg(sock, &msg, sizeof(req));
+	set_fs(oldfs);
+
+	if (err < 0)
+		goto out_sock;
+
+	pg = (char*)__get_free_page(GFP_KERNEL);
+	if (pg == NULL) {
+		err = -ENOMEM;
+		goto out_sock;
+	}
+
+	cpt_open_section(ctx, CPT_SECT_NET_ROUTE);
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_ROUTE;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NLMARRAY;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+restart:
+#endif
+	for (;;) {
+		struct nlmsghdr *h;
+
+		iov.iov_base = pg;
+		iov.iov_len = PAGE_SIZE;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
+		set_fs(oldfs);
+
+		if (err < 0)
+			goto out_sock_pg;
+		if (msg.msg_flags & MSG_TRUNC) {
+			err = -ENOBUFS;
+			goto out_sock_pg;
+		}
+
+		h = (struct nlmsghdr*)pg;
+		while (NLMSG_OK(h, err)) {
+			if (h->nlmsg_type == NLMSG_DONE) {
+				err = 0;
+				goto done;
+			}
+			if (h->nlmsg_type == NLMSG_ERROR) {
+				struct nlmsgerr *errm = (struct nlmsgerr*)NLMSG_DATA(h);
+				err = errm->error;
+				eprintk_ctx("NLMSG error: %d\n", errm->error);
+				goto done;
+			}
+			if (h->nlmsg_type != RTM_NEWROUTE) {
+				eprintk_ctx("NLMSG: %d\n", h->nlmsg_type);
+				err = -EINVAL;
+				goto done;
+			}
+			ctx->write(h, NLMSG_ALIGN(h->nlmsg_len), ctx);
+			h = NLMSG_NEXT(h, err);
+		}
+		if (err) {
+			eprintk_ctx("!!!Remnant of size %d %d %d\n", err, h->nlmsg_len, h->nlmsg_type);
+			err = -EINVAL;
+			break;
+		}
+	}
+done:
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	if (!err && req.g.rtgen_family == AF_INET) {
+		req.g.rtgen_family = AF_INET6;
+		iov.iov_base=&req;
+		iov.iov_len=sizeof(req);
+		msg.msg_name=&nladdr;
+		msg.msg_namelen=sizeof(nladdr);
+		msg.msg_iov=&iov;
+		msg.msg_iovlen=1;
+		msg.msg_control=NULL;
+		msg.msg_controllen=0;
+		msg.msg_flags=MSG_DONTWAIT;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_sendmsg(sock, &msg, sizeof(req));
+		set_fs(oldfs);
+
+		if (err > 0)
+			goto restart;
+	}
+#endif
+	cpt_close_object(ctx);
+	cpt_close_section(ctx);
+
+out_sock_pg:
+	free_page((unsigned long)pg);
+out_sock:
+	sock_release(sock);
+	return err;
+}
+
+static int dumpfn(void *arg)
+{
+	int i;
+	int *pfd = arg;
+	char *argv[] = { "iptables-save", "-c", NULL };
+
+	i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
+	if (i < 0) {
+		eprintk("cannot enter ve to dump iptables\n");
+		module_put(THIS_MODULE);
+		return 1;
+	}
+
+	if (pfd[1] != 1)
+		sc_dup2(pfd[1], 1);
+
+	for (i=0; i<current->files->fdt->max_fds; i++) {
+		if (i != 1)
+			sc_close(i);
+	}
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/sbin/iptables-save", argv, NULL);
+	eprintk("failed to exec /sbin/iptables-save: %d\n", i);
+	return -1;
+}
+
+
+static int cpt_dump_iptables(struct cpt_context * ctx)
+{
+	int err;
+	int pid;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	char buf[16];
+	loff_t pos;
+	int n;
+
+	err = sc_pipe(pfd);
+	if (err < 0) {
+		eprintk_ctx("sc_pipe: %d\n", err);
+		return err;
+	}
+	err = pid = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
+	if (err < 0) {
+		eprintk_ctx("local_kernel_thread: %d\n", err);
+		goto out;
+	}
+	f = fget(pfd[0]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	cpt_open_section(ctx, CPT_SECT_NET_IPTABLES);
+
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NAME;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	pos = ctx->file->f_pos;
+	do {
+		mm_segment_t oldfs;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
+		set_fs(oldfs);
+		if (n > 0)
+			ctx->write(buf, n, ctx);
+	} while (n > 0);
+
+	if (n < 0)
+		eprintk_ctx("read: %d\n", n);
+
+	fput(f);
+
+	if ((err = sc_waitx(pid, 0)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+
+	if (ctx->file->f_pos != pos) {
+		buf[0] = 0;
+		ctx->write(buf, 1, ctx);
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_close_section(ctx);
+	} else {
+		pos = ctx->current_section;
+		cpt_close_object(ctx);
+		cpt_close_section(ctx);
+		ctx->sections[CPT_SECT_NET_IPTABLES] = CPT_NULL;
+		ctx->file->f_pos = pos;
+	}
+	return n;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	return err;
+}
+
+int cpt_dump_ifinfo(struct cpt_context * ctx)
+{
+	int err;
+
+	err = cpt_dump_link(ctx);
+	if (!err)
+		err = cpt_dump_ifaddr(ctx);
+	if (!err)
+		err = cpt_dump_route(ctx);
+	if (!err)
+		err = cpt_dump_iptables(ctx);
+	return err;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_net.h linux-2.6.16.ovz/kernel/cpt/cpt_net.h
--- linux-2.6.16/kernel/cpt/cpt_net.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_net.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,7 @@
+int cpt_dump_ifinfo(struct cpt_context *ctx);
+int rst_restore_net(struct cpt_context *ctx);
+int cpt_suspend_network(struct cpt_context *ctx);
+int cpt_resume_network(struct cpt_context *ctx);
+int rst_resume_network(struct cpt_context *ctx);
+int cpt_dump_ip_conntrack(struct cpt_context *ctx);
+int rst_restore_ip_conntrack(struct cpt_context * ctx);
diff -uprN linux-2.6.16/kernel/cpt/cpt_obj.c linux-2.6.16.ovz/kernel/cpt/cpt_obj.c
--- linux-2.6.16/kernel/cpt/cpt_obj.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_obj.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,172 @@
+/*
+ *
+ *  kernel/cpt/cpt_obj.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = kmalloc(sizeof(cpt_object_t), gfp);
+	if (obj) {
+		INIT_LIST_HEAD(&obj->o_list);
+		INIT_LIST_HEAD(&obj->o_hash);
+		INIT_LIST_HEAD(&obj->o_alist);
+		obj->o_count = 1;
+		obj->o_pos = CPT_NULL;
+		obj->o_lock = 0;
+		obj->o_parent = NULL;
+		obj->o_index = CPT_NOINDEX;
+		obj->o_obj = NULL;
+		obj->o_image = NULL;
+		ctx->objcount++;
+	}
+	return obj;
+}
+// //EXPORT_SYMBOL(alloc_cpt_object);
+
+void free_cpt_object(cpt_object_t *obj, cpt_context_t *ctx)
+{
+	list_del(&obj->o_alist);
+	kfree(obj);
+	ctx->objcount--;
+}
+
+void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_context_t *ctx)
+{
+	list_add_tail(&obj->o_list, &ctx->object_array[type]);
+}
+// //EXPORT_SYMBOL(intern_cpt_object);
+
+void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj,
+			cpt_object_t *head, cpt_context_t *ctx)
+{
+	list_add(&obj->o_list, &head->o_list);
+}
+// //EXPORT_SYMBOL(insert_cpt_object);
+
+cpt_object_t * __cpt_object_add(enum _cpt_object_type type, void *p,
+		unsigned gfp_mask, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(type, p, ctx);
+
+	if (obj) {
+		obj->o_count++;
+		return obj;
+	}
+
+	if ((obj = alloc_cpt_object(gfp_mask, ctx)) != NULL) {
+		if (p)
+			cpt_obj_setobj(obj, p, ctx);
+		intern_cpt_object(type, obj, ctx);
+		return obj;
+	}
+	return NULL;
+}
+// //EXPORT_SYMBOL(__cpt_object_add);
+
+cpt_object_t * cpt_object_add(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
+{
+	return __cpt_object_add(type, p, GFP_KERNEL, ctx);
+}
+// //EXPORT_SYMBOL(cpt_object_add);
+
+cpt_object_t * cpt_object_get(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(type, p, ctx);
+
+	if (obj)
+		obj->o_count++;
+
+	return obj;
+}
+// //EXPORT_SYMBOL(cpt_object_get);
+
+int cpt_object_init(cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<CPT_OBJ_MAX; i++) {
+		INIT_LIST_HEAD(&ctx->object_array[i]);
+	}
+	return 0;
+}
+
+int cpt_object_destroy(cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<CPT_OBJ_MAX; i++) {
+		while (!list_empty(&ctx->object_array[i])) {
+			struct list_head *head = ctx->object_array[i].next;
+			cpt_object_t *obj = list_entry(head, cpt_object_t, o_list);
+			list_del(head);
+			if (obj->o_image)
+				kfree(obj->o_image);
+			free_cpt_object(obj, ctx);
+		}
+	}
+	if (ctx->objcount != 0)
+		eprintk_ctx("BUG: ctx->objcount=%d\n", ctx->objcount);
+	return 0;
+}
+
+cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_obj == p)
+			return obj;
+	}
+	return NULL;
+}
+// //EXPORT_SYMBOL(lookup_cpt_object);
+
+cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_pos == pos)
+			return obj;
+	}
+	return NULL;
+}
+// //EXPORT_SYMBOL(lookup_cpt_obj_bypos);
+
+cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_index == index)
+			return obj;
+	}
+	return NULL;
+}
+// //EXPORT_SYMBOL(lookup_cpt_obj_byindex);
diff -uprN linux-2.6.16/kernel/cpt/cpt_obj.h linux-2.6.16.ovz/kernel/cpt/cpt_obj.h
--- linux-2.6.16/kernel/cpt/cpt_obj.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_obj.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,62 @@
+#ifndef __CPT_OBJ_H_
+#define __CPT_OBJ_H_ 1
+
+#include <linux/list.h>
+#include <linux/cpt_image.h>
+
+typedef struct _cpt_object
+{
+	struct list_head	o_list;
+	struct list_head	o_hash;
+	int			o_count;
+	int			o_index;
+	int			o_lock;
+	loff_t			o_pos;
+	loff_t			o_ppos;
+	void			*o_obj;
+	void			*o_image;
+	void			*o_parent;
+	struct list_head	o_alist;
+} cpt_object_t;
+
+struct cpt_context;
+
+#define for_each_object(obj, type) list_for_each_entry(obj, &ctx->object_array[type], o_list)
+
+
+extern cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx);
+extern void free_cpt_object(cpt_object_t *obj, struct cpt_context *ctx);
+
+cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx);
+cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx);
+
+static inline void cpt_obj_setpos(cpt_object_t *cpt, loff_t pos, struct cpt_context *ctx)
+{
+	cpt->o_pos = pos;
+	/* Add to pos hash table */
+}
+
+static inline void cpt_obj_setobj(cpt_object_t *cpt, void *ptr, struct cpt_context *ctx)
+{
+	cpt->o_obj = ptr;
+	/* Add to hash table */
+}
+
+static inline void cpt_obj_setindex(cpt_object_t *cpt, __u32 index, struct cpt_context *ctx)
+{
+	cpt->o_index = index;
+	/* Add to index hash table */
+}
+
+
+extern void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, struct cpt_context *ctx);
+extern void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_object_t *head, struct cpt_context *ctx);
+extern cpt_object_t *cpt_object_add(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+extern cpt_object_t *__cpt_object_add(enum _cpt_object_type type, void *p, unsigned int gfp_mask, struct cpt_context *ctx);
+extern cpt_object_t *cpt_object_get(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+
+extern int cpt_object_init(struct cpt_context *ctx);
+extern int cpt_object_destroy(struct cpt_context *ctx);
+
+#endif /* __CPT_OBJ_H_ */
diff -uprN linux-2.6.16/kernel/cpt/cpt_proc.c linux-2.6.16.ovz/kernel/cpt/cpt_proc.c
--- linux-2.6.16/kernel/cpt/cpt_proc.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_proc.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,575 @@
+/*
+ *
+ *  kernel/cpt/cpt_proc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_ioctl.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+
+MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
+MODULE_LICENSE("GPL");
+
+/* List of contexts and lock protecting the list */
+static struct list_head cpt_context_list;
+static spinlock_t cpt_context_lock;
+
+static int proc_read(char *buffer, char **start, off_t offset,
+		     int length, int *eof, void *data)
+{
+	off_t pos = 0;
+	off_t begin = 0;
+	int len = 0;
+	cpt_context_t *ctx;
+
+	len += sprintf(buffer, "Ctx      Id       VE       State\n");
+
+	spin_lock(&cpt_context_lock);
+
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		len += sprintf(buffer+len,"%p %08x %-8u %d",
+			       ctx,
+			       ctx->contextid,
+			       ctx->ve_id,
+			       ctx->ctx_state
+			       );
+
+		buffer[len++] = '\n';
+
+		pos = begin+len;
+		if (pos < offset) {
+			len = 0;
+			begin = pos;
+		}
+		if (pos > offset+length)
+			goto done;
+	}
+	*eof = 1;
+
+done:
+	spin_unlock(&cpt_context_lock);
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+	if(len > length)
+		len = length;
+	if(len < 0)
+		len = 0;
+	return len;
+}
+
+void cpt_context_release(cpt_context_t *ctx)
+{
+	list_del(&ctx->ctx_list);
+	spin_unlock(&cpt_context_lock);
+
+	if (ctx->ctx_state > 0)
+		cpt_resume(ctx);
+	ctx->ctx_state = CPT_CTX_ERROR;
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pgin_task)
+		put_task_struct(ctx->pgin_task);
+	if (ctx->pgin_dir)
+		cpt_free_pgin_dir(ctx);
+	if (ctx->pagein_file_out)
+		fput(ctx->pagein_file_out);
+	if (ctx->pagein_file_in)
+		fput(ctx->pagein_file_in);
+#endif
+	if (ctx->objcount)
+		eprintk_ctx("%d objects leaked\n", ctx->objcount);
+	if (ctx->file)
+		fput(ctx->file);
+	cpt_flush_error(ctx);
+	if (ctx->errorfile) {
+		fput(ctx->errorfile);
+		ctx->errorfile = NULL;
+	}
+	if (ctx->error_msg) {
+		free_page((unsigned long)ctx->error_msg);
+		ctx->error_msg = NULL;
+	}
+	if (ctx->statusfile)
+		fput(ctx->statusfile);
+	if (ctx->lockfile)
+		fput(ctx->lockfile);
+	kfree(ctx);
+
+	spin_lock(&cpt_context_lock);
+}
+
+static void __cpt_context_put(cpt_context_t *ctx)
+{
+	if (!--ctx->refcount)
+		cpt_context_release(ctx);
+}
+
+static void cpt_context_put(cpt_context_t *ctx)
+{
+	spin_lock(&cpt_context_lock);
+	__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+}
+
+cpt_context_t * cpt_context_open(void)
+{
+	cpt_context_t *ctx;
+
+	if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
+		cpt_context_init(ctx);
+		spin_lock(&cpt_context_lock);
+		list_add_tail(&ctx->ctx_list, &cpt_context_list);
+		spin_unlock(&cpt_context_lock);
+		ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
+		if (ctx->error_msg != NULL)
+			ctx->error_msg[0] = 0;
+	}
+	return ctx;
+}
+
+static cpt_context_t * cpt_context_lookup(unsigned int contextid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->contextid == contextid) {
+			ctx->refcount++;
+			spin_unlock(&cpt_context_lock);
+			return ctx;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return NULL;
+}
+
+int cpt_context_lookup_veid(unsigned int veid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->ve_id == veid && ctx->ctx_state > 0) {
+			spin_unlock(&cpt_context_lock);
+			return 1;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return 0;
+}
+
+static int cpt_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	cpt_context_t *ctx;
+	struct file *dfile = NULL;
+
+	unlock_kernel();
+
+	if (cmd == CPT_VMPREP) {
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+		err = cpt_mm_prepare(arg);
+#else
+		err = -EINVAL;
+#endif
+		goto out_lock;
+	}
+
+	if (cmd == CPT_TEST_CAPS) {
+		unsigned int src_flags, dst_flags = arg;
+
+		err = 0;
+		src_flags = test_cpu_caps();
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
+		goto out_lock;
+	}
+
+	if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
+		cpt_context_t *old_ctx;
+
+		ctx = NULL;
+		if (cmd == CPT_JOIN_CONTEXT) {
+			err = -ENOENT;
+			ctx = cpt_context_lookup(arg);
+			if (!ctx)
+				goto out_lock;
+		}
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		file->private_data = ctx;
+
+		if (old_ctx) {
+			if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
+				old_ctx->sticky = 0;
+				old_ctx->refcount--;
+			}
+			__cpt_context_put(old_ctx);
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_lock;
+	}
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	if (ctx)
+		ctx->refcount++;
+	spin_unlock(&cpt_context_lock);
+
+	if (!ctx) {
+		cpt_context_t *old_ctx;
+
+		err = -ENOMEM;
+		ctx = cpt_context_open();
+		if (!ctx)
+			goto out_lock;
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		if (!old_ctx) {
+			ctx->refcount++;
+			file->private_data = ctx;
+		} else {
+			old_ctx->refcount++;
+		}
+		if (old_ctx) {
+			__cpt_context_put(ctx);
+			ctx = old_ctx;
+		}
+		spin_unlock(&cpt_context_lock);
+	}
+
+	if (cmd == CPT_GET_CONTEXT) {
+		unsigned int contextid = (unsigned int)arg;
+
+		if (ctx->contextid && ctx->contextid != contextid) {
+			err = -EINVAL;
+			goto out_nosem;
+		}
+		if (!ctx->contextid) {
+			cpt_context_t *c1 = cpt_context_lookup(contextid);
+			if (c1) {
+				cpt_context_put(c1);
+				err = -EEXIST;
+				goto out_nosem;
+			}
+			ctx->contextid = contextid;
+		}
+		spin_lock(&cpt_context_lock);
+		if (!ctx->sticky) {
+			ctx->sticky = 1;
+			ctx->refcount++;
+		}
+		spin_unlock(&cpt_context_lock);
+		goto out_nosem;
+	}
+
+	down(&ctx->main_sem);
+
+	err = -EBUSY;
+	if (ctx->ctx_state < 0)
+		goto out;
+
+	err = 0;
+	switch (cmd) {
+	case CPT_SET_DUMPFD:
+		if (ctx->ctx_state == CPT_CTX_DUMPING) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (IS_ERR(dfile)) {
+				err = PTR_ERR(dfile);
+				break;
+			}
+			if (dfile->f_op == NULL ||
+			    dfile->f_op->write == NULL) {
+				fput(dfile);
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->file)
+			fput(ctx->file);
+		ctx->file = dfile;
+		break;
+	case CPT_SET_ERRORFD:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (IS_ERR(dfile)) {
+				err = PTR_ERR(dfile);
+				break;
+			}
+		}
+		if (ctx->errorfile)
+			fput(ctx->errorfile);
+		ctx->errorfile = dfile;
+		break;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	case CPT_SET_PAGEINFDIN:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (IS_ERR(dfile)) {
+				err = PTR_ERR(dfile);
+				break;
+			}
+		}
+		if (ctx->pagein_file_in)
+			fput(ctx->pagein_file_in);
+		ctx->pagein_file_in = dfile;
+		break;
+	case CPT_SET_PAGEINFDOUT:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (IS_ERR(dfile)) {
+				err = PTR_ERR(dfile);
+				break;
+			}
+		}
+		if (ctx->pagein_file_out)
+			fput(ctx->pagein_file_out);
+		ctx->pagein_file_out = dfile;
+		break;
+	case CPT_SET_LAZY:
+		ctx->lazy_vm = arg;
+		break;
+	case CPT_PAGEIND:
+		err = cpt_start_pagein(ctx);
+		break;
+#endif
+	case CPT_SET_VEID:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->ve_id = arg;
+		break;
+	case CPT_SET_CPU_FLAGS:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->dst_cpu_flags = arg;
+		ctx->src_cpu_flags = test_cpu_caps();
+		break;
+	case CPT_SUSPEND:
+		if (cpt_context_lookup_veid(ctx->ve_id) ||
+		    ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->ctx_state = CPT_CTX_SUSPENDING;
+		err = cpt_vps_suspend(ctx);
+		if (err) {
+			if (cpt_resume(ctx) == 0)
+				ctx->ctx_state = CPT_CTX_IDLE;
+		} else {
+			ctx->ctx_state = CPT_CTX_SUSPENDED;
+		}
+		break;
+	case CPT_DUMP:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		err = cpt_dump(ctx);
+		break;
+	case CPT_RESUME:
+		if (ctx->ctx_state == CPT_CTX_IDLE) {
+			err = -ENOENT;
+			break;
+		}
+		err = cpt_resume(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_KILL:
+		if (ctx->ctx_state == CPT_CTX_IDLE) {
+			err = -ENOENT;
+			break;
+		}
+		err = cpt_kill(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_TEST_VECAPS:
+	{
+		__u32 dst_flags = arg;
+		__u32 src_flags;
+
+		err = cpt_vps_caps(ctx, &src_flags);
+		if (err)
+			break;
+
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_EMT64, "emt64", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_IA64, "ia64", err);
+		break;
+	}
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+out:
+	cpt_flush_error(ctx);
+	up(&ctx->main_sem);
+out_nosem:
+	cpt_context_put(ctx);
+out_lock:
+	lock_kernel();
+	return err;
+}
+
+static int cpt_open(struct inode *inode, struct file *file)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int cpt_release(struct inode * inode, struct file * file)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	file->private_data = NULL;
+
+	if (ctx)
+		__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+
+static struct file_operations cpt_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = cpt_open,
+	.release = cpt_release,
+	.ioctl	 = cpt_ioctl,
+};
+
+static struct proc_dir_entry *proc_ent;
+
+static struct ctl_table_header *ctl_header;
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 9475,
+		.procname	= "cpt",
+		.data		= &debug_level,
+		.maxlen		= sizeof(debug_level),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static int __init init_cpt(void)
+{
+	int err;
+
+	err = -ENOMEM;
+	ctl_header = register_sysctl_table(root_table, 0);
+	if (!ctl_header)
+		goto err_mon;
+
+	spin_lock_init(&cpt_context_lock);
+	INIT_LIST_HEAD(&cpt_context_list);
+
+	err = -EINVAL;
+	proc_ent = create_proc_entry("cpt", 0600, NULL);
+	if (!proc_ent)
+		goto err_out;
+
+	cpt_fops.read = proc_ent->proc_fops->read;
+	cpt_fops.write = proc_ent->proc_fops->write;
+	cpt_fops.llseek = proc_ent->proc_fops->llseek;
+	proc_ent->proc_fops = &cpt_fops;
+
+	proc_ent->read_proc = proc_read;
+	proc_ent->data = NULL;
+	proc_ent->owner = THIS_MODULE;
+	return 0;
+
+err_out:
+	unregister_sysctl_table(ctl_header);
+err_mon:
+	return err;
+}
+module_init(init_cpt);
+
+static void __exit exit_cpt(void)
+{
+	remove_proc_entry("cpt", NULL);
+	unregister_sysctl_table(ctl_header);
+
+	spin_lock(&cpt_context_lock);
+	while (!list_empty(&cpt_context_list)) {
+		cpt_context_t *ctx;
+		ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
+
+		if (!ctx->sticky)
+			ctx->refcount++;
+		ctx->sticky = 0;
+
+		BUG_ON(ctx->refcount != 1);
+
+		__cpt_context_put(ctx);
+	}
+	spin_unlock(&cpt_context_lock);
+}
+module_exit(exit_cpt);
diff -uprN linux-2.6.16/kernel/cpt/cpt_process.c linux-2.6.16.ovz/kernel/cpt/cpt_process.c
--- linux-2.6.16/kernel/cpt/cpt_process.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_process.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,986 @@
+/*
+ *
+ *  kernel/cpt/cpt_process.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/compat.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_ubc.h"
+#include "cpt_process.h"
+#include "cpt_kernel.h"
+
+#ifdef CONFIG_X86_32
+#undef task_pt_regs
+#define task_pt_regs(t) ((struct pt_regs *)((t)->thread.esp0) - 1)
+#endif
+
+static u32 encode_segment(u32 segreg)
+{
+	segreg &= 0xFFFF;
+
+	if (segreg == 0)
+		return CPT_SEG_ZERO;
+	if ((segreg & 3) != 3) {
+		wprintk("Invalid RPL of a segment reg %x\n", segreg);
+		return CPT_SEG_ZERO;
+	}
+
+	/* LDT descriptor, it is just an index to LDT array */
+	if (segreg & 4)
+		return CPT_SEG_LDT + (segreg >> 3);
+
+	/* TLS descriptor. */
+	if ((segreg >> 3) >= GDT_ENTRY_TLS_MIN &&
+	    (segreg >> 3) <= GDT_ENTRY_TLS_MAX)
+		return CPT_SEG_TLS1 + ((segreg>>3) - GDT_ENTRY_TLS_MIN);
+
+	/* One of standard desriptors */
+#ifdef CONFIG_X86_64
+	if (segreg == __USER32_DS)
+		return CPT_SEG_USER32_DS;
+	if (segreg == __USER32_CS)
+		return CPT_SEG_USER32_CS;
+	if (segreg == __USER_DS)
+		return CPT_SEG_USER64_DS;
+	if (segreg == __USER_CS)
+		return CPT_SEG_USER64_CS;
+#else
+	if (segreg == __USER_DS)
+		return CPT_SEG_USER32_DS;
+	if (segreg == __USER_CS)
+		return CPT_SEG_USER32_CS;
+#endif
+	wprintk("Invalid segment reg %x\n", segreg);
+	return CPT_SEG_ZERO;
+}
+
+#ifdef CONFIG_X86_64
+static void xlate_ptregs_64_to_32(struct cpt_x86_regs *d, struct pt_regs *s, task_t *tsk)
+{
+	d->cpt_ebp = s->rbp;
+	d->cpt_ebx = s->rbx;
+	d->cpt_eax = s->rax;
+	d->cpt_ecx = s->rcx;
+	d->cpt_edx = s->rdx;
+	d->cpt_esi = s->rsi;
+	d->cpt_edi = s->rdi;
+	d->cpt_orig_eax = s->orig_rax;
+	d->cpt_eip = s->rip;
+	d->cpt_xcs = encode_segment(s->cs);
+	d->cpt_eflags = s->eflags;
+	d->cpt_esp = s->rsp;
+	d->cpt_xss = encode_segment(s->ss);
+	d->cpt_xds = encode_segment(tsk->thread.ds);
+	d->cpt_xes = encode_segment(tsk->thread.es);
+}
+
+static int dump_registers(task_t *tsk, struct cpt_context *ctx)
+{
+	cpt_open_object(NULL, ctx);
+
+	if (tsk->thread_info->flags&_TIF_IA32) {
+		struct cpt_x86_regs ri;
+		ri.cpt_next = sizeof(ri);
+		ri.cpt_object = CPT_OBJ_X86_REGS;
+		ri.cpt_hdrlen = sizeof(ri);
+		ri.cpt_content = CPT_CONTENT_VOID;
+
+		ri.cpt_debugreg[0] = tsk->thread.debugreg0;
+		ri.cpt_debugreg[1] = tsk->thread.debugreg1;
+		ri.cpt_debugreg[2] = tsk->thread.debugreg2;
+		ri.cpt_debugreg[3] = tsk->thread.debugreg3;
+		ri.cpt_debugreg[4] = 0;
+		ri.cpt_debugreg[5] = 0;
+		ri.cpt_debugreg[6] = tsk->thread.debugreg6;
+		ri.cpt_debugreg[7] = tsk->thread.debugreg7;
+		ri.cpt_fs = encode_segment(tsk->thread.fsindex);
+		ri.cpt_gs = encode_segment(tsk->thread.gsindex);
+
+		xlate_ptregs_64_to_32(&ri, task_pt_regs(tsk), tsk);
+
+		ctx->write(&ri, sizeof(ri), ctx);
+	} else {
+		struct cpt_x86_64_regs ri;
+		ri.cpt_next = sizeof(ri);
+		ri.cpt_object = CPT_OBJ_X86_64_REGS;
+		ri.cpt_hdrlen = sizeof(ri);
+		ri.cpt_content = CPT_CONTENT_VOID;
+
+		ri.cpt_fsbase = tsk->thread.fs;
+		ri.cpt_gsbase = tsk->thread.gs;
+		ri.cpt_fsindex = encode_segment(tsk->thread.fsindex);
+		ri.cpt_gsindex = encode_segment(tsk->thread.gsindex);
+		ri.cpt_ds = encode_segment(tsk->thread.ds);
+		ri.cpt_es = encode_segment(tsk->thread.es);
+		ri.cpt_debugreg[0] = tsk->thread.debugreg0;
+		ri.cpt_debugreg[1] = tsk->thread.debugreg1;
+		ri.cpt_debugreg[2] = tsk->thread.debugreg2;
+		ri.cpt_debugreg[3] = tsk->thread.debugreg3;
+		ri.cpt_debugreg[4] = 0;
+		ri.cpt_debugreg[5] = 0;
+		ri.cpt_debugreg[6] = tsk->thread.debugreg6;
+		ri.cpt_debugreg[7] = tsk->thread.debugreg7;
+
+		memcpy(&ri.cpt_r15, task_pt_regs(tsk), sizeof(struct pt_regs));
+
+		ri.cpt_cs = encode_segment(task_pt_regs(tsk)->cs);
+		ri.cpt_ss = encode_segment(task_pt_regs(tsk)->ss);
+
+		ctx->write(&ri, sizeof(ri), ctx);
+
+#if 0
+		if (ri.cpt_rip >= VSYSCALL_START && ri.cpt_rip < VSYSCALL_END) {
+			eprintk_ctx(CPT_FID "cannot be checkpointied while vsyscall, try later\n", CPT_TID(tsk));
+			return -EAGAIN;
+		}
+#endif
+	}
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+#else
+
+static int dump_registers(task_t *tsk, struct cpt_context *ctx)
+{
+	struct cpt_x86_regs ri;
+
+	cpt_open_object(NULL, ctx);
+
+	ri.cpt_next = sizeof(ri);
+	ri.cpt_object = CPT_OBJ_X86_REGS;
+	ri.cpt_hdrlen = sizeof(ri);
+	ri.cpt_content = CPT_CONTENT_VOID;
+
+	ri.cpt_debugreg[0] = tsk->thread.debugreg[0];
+	ri.cpt_debugreg[1] = tsk->thread.debugreg[1];
+	ri.cpt_debugreg[2] = tsk->thread.debugreg[2];
+	ri.cpt_debugreg[3] = tsk->thread.debugreg[3];
+	ri.cpt_debugreg[4] = tsk->thread.debugreg[4];
+	ri.cpt_debugreg[5] = tsk->thread.debugreg[5];
+	ri.cpt_debugreg[6] = tsk->thread.debugreg[6];
+	ri.cpt_debugreg[7] = tsk->thread.debugreg[7];
+	ri.cpt_fs = encode_segment(tsk->thread.fs);
+	ri.cpt_gs = encode_segment(tsk->thread.gs);
+
+	memcpy(&ri.cpt_ebx, task_pt_regs(tsk), sizeof(struct pt_regs));
+
+	ri.cpt_xcs = encode_segment(task_pt_regs(tsk)->xcs);
+	ri.cpt_xss = encode_segment(task_pt_regs(tsk)->xss);
+	ri.cpt_xds = encode_segment(task_pt_regs(tsk)->xds);
+        ri.cpt_xes = encode_segment(task_pt_regs(tsk)->xes);
+
+	ctx->write(&ri, sizeof(ri), ctx);
+	cpt_close_object(ctx);
+
+	return 0;
+}
+#endif
+
+static int dump_kstack(task_t *tsk, struct cpt_context *ctx)
+{
+	struct cpt_obj_bits hdr;
+	unsigned long size;
+	void *start;
+
+	cpt_open_object(NULL, ctx);
+
+#ifdef CONFIG_X86_64
+	size = tsk->thread.rsp0 - tsk->thread.rsp;
+	start = (void*)tsk->thread.rsp;
+#else
+	size = tsk->thread.esp0 - tsk->thread.esp;
+	start = (void*)tsk->thread.esp;
+#endif
+
+	hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
+	hdr.cpt_object = CPT_OBJ_BITS;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = CPT_CONTENT_STACK;
+	hdr.cpt_size = size;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	ctx->write(start, size, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	return 0;
+}
+
+/* Formats of i387_fxsave_struct are the same for x86_64
+ * and i386. Plain luck. */
+
+static int dump_fpustate(task_t *tsk, struct cpt_context *ctx)
+{
+	struct cpt_obj_bits hdr;
+	unsigned long size;
+	int type;
+
+	cpt_open_object(NULL, ctx);
+
+	type = CPT_CONTENT_X86_FPUSTATE;
+	size = sizeof(struct i387_fxsave_struct);
+#ifndef CONFIG_X86_64
+	if (!cpu_has_fxsr) {
+		size = sizeof(struct i387_fsave_struct);
+		type = CPT_CONTENT_X86_FPUSTATE_OLD;
+	}
+#endif
+
+	hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
+	hdr.cpt_object = CPT_OBJ_BITS;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = type;
+	hdr.cpt_size = size;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	ctx->write(&tsk->thread.i387, size, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	return 0;
+}
+
+static int encode_siginfo(struct cpt_siginfo_image *si, siginfo_t *info)
+{
+	si->cpt_signo = info->si_signo;
+	si->cpt_errno = info->si_errno;
+	si->cpt_code = info->si_code;
+
+	switch(si->cpt_code & __SI_MASK) {
+	case __SI_TIMER:
+		si->cpt_pid = info->si_tid;
+		si->cpt_uid = info->si_overrun;
+		si->cpt_sigval = cpt_ptr_export(info->_sifields._timer._sigval.sival_ptr);
+		si->cpt_utime = info->si_sys_private;
+		break;
+	case __SI_POLL:
+		si->cpt_pid = info->si_band;
+		si->cpt_uid = info->si_fd;
+		break;
+	case __SI_FAULT:
+		si->cpt_sigval = cpt_ptr_export(info->si_addr);
+#ifdef __ARCH_SI_TRAPNO
+		si->cpt_pid = info->si_trapno;
+#endif
+		break;
+	case __SI_CHLD:
+		si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_type_to_vpid(PIDTYPE_PID, info->si_pid);
+		si->cpt_uid = info->si_uid;
+		si->cpt_sigval = info->si_status;
+		si->cpt_stime = info->si_stime;
+		si->cpt_utime = info->si_utime;
+		break;
+	case __SI_KILL:
+	case __SI_RT:
+	case __SI_MESGQ:
+	default:
+		si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_type_to_vpid(PIDTYPE_TGID, info->si_pid);
+		si->cpt_uid = info->si_uid;
+		si->cpt_sigval = cpt_ptr_export(info->si_ptr);
+		break;
+	}
+	return 0;
+}
+
+static int dump_sigqueue(struct sigpending *list, struct cpt_context *ctx)
+{
+	struct sigqueue *q;
+	loff_t saved_obj;
+
+	if (list_empty(&list->list))
+		return 0;
+
+	cpt_push_object(&saved_obj, ctx);
+	list_for_each_entry(q, &list->list, list) {
+		struct cpt_siginfo_image si;
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_SIGINFO;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		si.cpt_qflags = q->flags;
+		si.cpt_user = q->user->uid;
+
+		if (encode_siginfo(&si, &q->info))
+			return -EINVAL;
+
+		ctx->write(&si, sizeof(si), ctx);
+	}
+	cpt_pop_object(&saved_obj, ctx);
+	return 0;
+}
+
+
+
+static int dump_one_signal_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct signal_struct *sig = obj->o_obj;
+	struct cpt_signal_image *v = cpt_get_buf(ctx);
+	task_t *tsk;
+	int i;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SIGNAL_STRUCT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	if (sig->pgrp <= 0) {
+		eprintk_ctx("bad pgid\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_pgrp_type = CPT_PGRP_NORMAL;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->pgrp);
+	if (tsk == NULL)
+		v->cpt_pgrp_type = CPT_PGRP_ORPHAN;
+	read_unlock(&tasklist_lock);
+	v->cpt_pgrp = pid_type_to_vpid(PIDTYPE_PGID, sig->pgrp);
+
+	v->cpt_old_pgrp = 0;
+	if (sig->tty_old_pgrp < 0) {
+		eprintk_ctx("bad tty_old_pgrp\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	if (sig->tty_old_pgrp > 0) {
+		v->cpt_old_pgrp_type = CPT_PGRP_NORMAL;
+		read_lock(&tasklist_lock);
+		tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->tty_old_pgrp);
+		if (tsk == NULL) {
+			v->cpt_old_pgrp_type = CPT_PGRP_ORPHAN;
+			tsk = find_task_by_pid_type_ve(PIDTYPE_PGID, sig->tty_old_pgrp);
+		}
+		read_unlock(&tasklist_lock);
+		if (tsk == NULL) {
+			eprintk_ctx("tty_old_pgrp does not exist anymore\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_old_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, sig->tty_old_pgrp);
+		if ((int)v->cpt_old_pgrp < 0) {
+			dprintk_ctx("stray tty_old_pgrp %d\n", sig->tty_old_pgrp);
+			v->cpt_old_pgrp = -1;
+			v->cpt_old_pgrp_type = CPT_PGRP_STRAY;
+		}
+	}
+
+	if (sig->session <= 0) {
+		eprintk_ctx("bad session\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_session_type = CPT_PGRP_NORMAL;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->session);
+	if (tsk == NULL)
+		v->cpt_session_type = CPT_PGRP_ORPHAN;
+	read_unlock(&tasklist_lock);
+	v->cpt_session = pid_type_to_vpid(PIDTYPE_SID, sig->session);
+
+	v->cpt_leader = sig->leader;
+	v->cpt_ctty = CPT_NULL;
+	if (sig->tty) {
+		cpt_object_t *cobj = lookup_cpt_object(CPT_OBJ_TTY, sig->tty, ctx);
+		if (cobj)
+			v->cpt_ctty = cobj->o_pos;
+		else {
+			eprintk_ctx("controlling tty is not found\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	memcpy(&v->cpt_sigpending, &sig->shared_pending.signal, 8);
+
+	v->cpt_curr_target = 0;
+	if (sig->curr_target)
+		v->cpt_curr_target = virt_pid(sig->curr_target);
+	v->cpt_group_exit = ((sig->flags & SIGNAL_GROUP_EXIT) != 0);
+	v->cpt_group_exit_code = sig->group_exit_code;
+	v->cpt_group_exit_task = 0;
+	if (sig->group_exit_task)
+		v->cpt_group_exit_task = virt_pid(sig->group_exit_task);
+	v->cpt_notify_count = sig->notify_count;
+	v->cpt_group_stop_count = sig->group_stop_count;
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,8)
+	v->cpt_utime = sig->utime;
+	v->cpt_stime = sig->stime;
+	v->cpt_cutime = sig->cutime;
+	v->cpt_cstime = sig->cstime;
+	v->cpt_nvcsw = sig->nvcsw;
+	v->cpt_nivcsw = sig->nivcsw;
+	v->cpt_cnvcsw = sig->cnvcsw;
+	v->cpt_cnivcsw = sig->cnivcsw;
+	v->cpt_min_flt = sig->min_flt;
+	v->cpt_maj_flt = sig->maj_flt;
+	v->cpt_cmin_flt = sig->cmin_flt;
+	v->cpt_cmaj_flt = sig->cmaj_flt;
+
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
+		if (i < RLIM_NLIMITS) {
+			v->cpt_rlim_cur[i] = sig->rlim[i].rlim_cur;
+			v->cpt_rlim_max[i] = sig->rlim[i].rlim_max;
+		} else {
+			v->cpt_rlim_cur[i] = CPT_NULL;
+			v->cpt_rlim_max[i] = CPT_NULL;
+		}
+	}
+#endif
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	dump_sigqueue(&sig->shared_pending, ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+
+static int dump_one_process(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	task_t *tsk = obj->o_obj;
+	int last_thread;
+	struct cpt_task_image *v = cpt_get_buf(ctx);
+	cpt_object_t *tobj;
+	cpt_object_t *tg_obj;
+	loff_t saved_obj;
+	int i;
+	int err;
+	struct timespec delta;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_signal = CPT_NULL;
+	tg_obj = lookup_cpt_object(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx);
+	if (!tg_obj) BUG();
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_TASK;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_state = tsk->state;
+	if (tsk->state == EXIT_ZOMBIE) {
+		eprintk_ctx("invalid zombie state on" CPT_FID "\n", CPT_TID(tsk));
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	} else if (tsk->state == EXIT_DEAD) {
+		if (tsk->exit_state != EXIT_DEAD &&
+		    tsk->exit_state != EXIT_ZOMBIE) {
+			eprintk_ctx("invalid exit_state %ld on" CPT_FID "\n", tsk->exit_state, CPT_TID(tsk));
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	if (tsk->exit_state) {
+		v->cpt_state = tsk->exit_state;
+		if (tsk->state != EXIT_DEAD) {
+			eprintk_ctx("invalid tsk->state %ld/%ld on" CPT_FID "\n",
+				tsk->state, tsk->exit_state, CPT_TID(tsk));
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	v->cpt_flags = tsk->flags&~PF_FROZEN;
+	v->cpt_ptrace = tsk->ptrace;
+	v->cpt_prio = tsk->prio;
+	v->cpt_exit_code = tsk->exit_code;
+	v->cpt_exit_signal = tsk->exit_signal;
+	v->cpt_pdeath_signal = tsk->pdeath_signal;
+	v->cpt_static_prio = tsk->static_prio;
+	v->cpt_rt_priority = tsk->rt_priority;
+	v->cpt_policy = tsk->policy;
+	if (v->cpt_policy != SCHED_NORMAL) {
+		eprintk_ctx("scheduler policy is not supported %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+
+	v->cpt_mm = CPT_NULL;
+	if (tsk->mm) {
+		tobj = lookup_cpt_object(CPT_OBJ_MM, tsk->mm, ctx);
+		if (!tobj) BUG();
+		v->cpt_mm = tobj->o_pos;
+	}
+	v->cpt_files = CPT_NULL;
+	if (tsk->files) {
+		tobj = lookup_cpt_object(CPT_OBJ_FILES, tsk->files, ctx);
+		if (!tobj) BUG();
+		v->cpt_files = tobj->o_pos;
+	}
+	v->cpt_fs = CPT_NULL;
+	if (tsk->fs) {
+		tobj = lookup_cpt_object(CPT_OBJ_FS, tsk->fs, ctx);
+		if (!tobj) BUG();
+		v->cpt_fs = tobj->o_pos;
+	}
+	v->cpt_namespace = CPT_NULL;
+	if (tsk->namespace) {
+		tobj = lookup_cpt_object(CPT_OBJ_NAMESPACE, tsk->namespace, ctx);
+		if (!tobj) BUG();
+		v->cpt_namespace = tobj->o_pos;
+
+		if (tsk->namespace != current->namespace)
+			eprintk_ctx("namespaces are not supported: process %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
+	}
+	v->cpt_sysvsem_undo = CPT_NULL;
+	if (tsk->sysvsem.undo_list && !tsk->exit_state) {
+		tobj = lookup_cpt_object(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx);
+		if (!tobj) BUG();
+		v->cpt_sysvsem_undo = tobj->o_pos;
+	}
+	v->cpt_sighand = CPT_NULL;
+	if (tsk->sighand) {
+		tobj = lookup_cpt_object(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx);
+		if (!tobj) BUG();
+		v->cpt_sighand = tobj->o_pos;
+	}
+	v->cpt_sigblocked = cpt_sigset_export(&tsk->blocked);
+	v->cpt_sigrblocked = cpt_sigset_export(&tsk->real_blocked);
+	v->cpt_sigsuspend_blocked = cpt_sigset_export(&tsk->saved_sigmask);
+
+	v->cpt_pid = virt_pid(tsk);
+	v->cpt_tgid = virt_tgid(tsk);
+	v->cpt_ppid = 0;
+	if (tsk->parent) {
+		if (tsk->parent != tsk->real_parent &&
+		    !lookup_cpt_object(CPT_OBJ_TASK, tsk->parent, ctx)) {
+			eprintk_ctx("task %d/%d(%s) is ptraced from ve0\n", tsk->pid, virt_pid(tsk), tsk->comm);
+			cpt_release_buf(ctx);
+			return -EBUSY;
+		}
+		v->cpt_ppid = virt_pid(tsk->parent);
+	}
+	v->cpt_rppid = tsk->real_parent ? virt_pid(tsk->real_parent) : 0;
+	v->cpt_pgrp = virt_pgid(tsk);
+	v->cpt_session = virt_sid(tsk);
+	v->cpt_old_pgrp = 0;
+	if (tsk->signal->tty_old_pgrp)
+		v->cpt_old_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, tsk->signal->tty_old_pgrp);
+	v->cpt_leader = tsk->group_leader ? virt_pid(tsk->group_leader) : 0;
+	v->cpt_set_tid = (unsigned long)tsk->set_child_tid;
+	v->cpt_clear_tid = (unsigned long)tsk->clear_child_tid;
+	memcpy(v->cpt_comm, tsk->comm, 16);
+	v->cpt_user = tsk->user->uid;
+	v->cpt_uid = tsk->uid;
+	v->cpt_euid = tsk->euid;
+	v->cpt_suid = tsk->suid;
+	v->cpt_fsuid = tsk->fsuid;
+	v->cpt_gid = tsk->gid;
+	v->cpt_egid = tsk->egid;
+	v->cpt_sgid = tsk->sgid;
+	v->cpt_fsgid = tsk->fsgid;
+	v->cpt_ngids = 0;
+	if (tsk->group_info && tsk->group_info->ngroups != 0) {
+		int i = tsk->group_info->ngroups;
+		if (i > 32) {
+			/* Shame... I did a simplified version and _forgot_
+			 * about this. Later, later. */
+			eprintk_ctx("too many of groups " CPT_FID "\n", CPT_TID(tsk));
+			return -EINVAL;
+		}
+		v->cpt_ngids = i;
+		for (i--; i>=0; i--)
+			v->cpt_gids[i] = tsk->group_info->small_block[i];
+	}
+	memcpy(&v->cpt_ecap, &tsk->cap_effective, 8);
+	memcpy(&v->cpt_icap, &tsk->cap_inheritable, 8);
+	memcpy(&v->cpt_pcap, &tsk->cap_permitted, 8);
+	v->cpt_keepcap = tsk->keep_capabilities;
+
+	v->cpt_did_exec = tsk->did_exec;
+	v->cpt_exec_domain = -1;
+	v->cpt_thrflags = tsk->thread_info->flags & ~(1<<TIF_FREEZE);
+	v->cpt_64bit = 0;
+#ifdef CONFIG_X86_64
+	/* Clear x86_64 specific flags */
+	v->cpt_thrflags &= ~(_TIF_FORK|_TIF_ABI_PENDING|_TIF_IA32);
+	if (!(tsk->thread_info->flags & _TIF_IA32)) {
+		ctx->tasks64++;
+		v->cpt_64bit = 1;
+	}
+#endif
+	v->cpt_thrstatus = tsk->thread_info->status;
+	v->cpt_addr_limit = -1;
+
+	v->cpt_personality = tsk->personality;
+
+	for (i=0; i<GDT_ENTRY_TLS_ENTRIES; i++) {
+		if (i>=3) {
+			eprintk_ctx("too many tls descs\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+#ifndef CONFIG_X86_64
+		v->cpt_tls[i] = (((u64)tsk->thread.tls_array[i].b)<<32) + tsk->thread.tls_array[i].a;
+#else
+		v->cpt_tls[i] = tsk->thread.tls_array[i];
+#endif
+	}
+
+	v->cpt_restart.fn = CPT_RBL_0;
+	if (tsk->thread_info->restart_block.fn != current->thread_info->restart_block.fn) {
+		if (tsk->thread_info->restart_block.fn != nanosleep_restart
+#ifdef CONFIG_X86_64
+		    && tsk->thread_info->restart_block.fn != compat_nanosleep_restart
+#endif
+		    ) {
+			eprintk_ctx("unknown restart block %p\n", tsk->thread_info->restart_block.fn);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_restart.fn = CPT_RBL_NANOSLEEP;
+#ifdef CONFIG_X86_64
+		if (tsk->thread_info->restart_block.fn == compat_nanosleep_restart)
+			v->cpt_restart.fn = CPT_RBL_COMPAT_NANOSLEEP;
+#endif
+		v->cpt_restart.arg0 = tsk->thread_info->restart_block.arg0;
+		v->cpt_restart.arg1 = tsk->thread_info->restart_block.arg1;
+		v->cpt_restart.arg2 = tsk->thread_info->restart_block.arg2;
+		v->cpt_restart.arg3 = tsk->thread_info->restart_block.arg3;
+		if (debug_level > 2) {
+			ktime_t e, e1;
+			struct timespec now;
+
+			do_posix_clock_monotonic_gettime(&now);
+			e = timespec_to_ktime(now);
+			e1.tv64 = ((u64)tsk->thread_info->restart_block.arg1 << 32) | (u64) tsk->thread_info->restart_block.arg0;
+			e = ktime_sub(e1, e);
+			dprintk("cpt " CPT_FID " RBL %ld/%ld %Ld\n", CPT_TID(tsk),
+				tsk->thread_info->restart_block.arg1,
+				tsk->thread_info->restart_block.arg0, e.tv64);
+		}
+	}
+
+	v->cpt_it_real_incr = 0;
+	v->cpt_it_prof_incr = 0;
+	v->cpt_it_virt_incr = 0;
+	v->cpt_it_real_value = 0;
+	v->cpt_it_prof_value = 0;
+	v->cpt_it_virt_value = 0;
+	if (thread_group_leader(tsk) && tsk->exit_state == 0) {
+		ktime_t rem;
+
+		v->cpt_it_real_incr = ktime_to_ns(tsk->signal->it_real_incr);
+		v->cpt_it_prof_incr = tsk->signal->it_prof_incr;
+		v->cpt_it_virt_incr = tsk->signal->it_virt_incr;
+
+		rem = hrtimer_get_remaining(&tsk->signal->real_timer);
+
+		if (hrtimer_active(&tsk->signal->real_timer)) {
+			if (rem.tv64 <= 0)
+				rem.tv64 = NSEC_PER_USEC;
+			v->cpt_it_real_value = ktime_to_ns(rem);
+			dprintk("cpt itimer " CPT_FID " %Lu\n", CPT_TID(tsk), v->cpt_it_real_value);
+		}
+		v->cpt_it_prof_value = tsk->signal->it_prof_expires;
+		v->cpt_it_virt_value = tsk->signal->it_virt_expires;
+	}
+	v->cpt_used_math = (tsk_used_math(tsk) != 0);
+
+	if (tsk->notifier) {
+		eprintk_ctx("task notifier is in use: process %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+
+	v->cpt_utime = tsk->utime;
+	v->cpt_stime = tsk->stime;
+	delta = tsk->start_time;
+	_set_normalized_timespec(&delta,
+			delta.tv_sec - get_exec_env()->init_entry->start_time.tv_sec,
+			delta.tv_nsec - get_exec_env()->init_entry->start_time.tv_nsec);
+	v->cpt_starttime = cpt_timespec_export(&delta);
+	v->cpt_nvcsw = tsk->nvcsw;
+	v->cpt_nivcsw = tsk->nivcsw;
+	v->cpt_min_flt = tsk->min_flt;
+	v->cpt_maj_flt = tsk->maj_flt;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
+	v->cpt_cutime = tsk->cutime;
+	v->cpt_cstime = tsk->cstime;
+	v->cpt_cnvcsw = tsk->cnvcsw;
+	v->cpt_cnivcsw = tsk->cnivcsw;
+	v->cpt_cmin_flt = tsk->cmin_flt;
+	v->cpt_cmaj_flt = tsk->cmaj_flt;
+
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
+		if (i < RLIM_NLIMITS) {
+			v->cpt_rlim_cur[i] = tsk->rlim[i].rlim_cur;
+			v->cpt_rlim_max[i] = tsk->rlim[i].rlim_max;
+		} else {
+			v->cpt_rlim_cur[i] = CPT_NULL;
+			v->cpt_rlim_max[i] = CPT_NULL;
+		}
+	}
+#else
+	v->cpt_cutime = tsk->signal->cutime;
+	v->cpt_cstime = tsk->signal->cstime;
+	v->cpt_cnvcsw = tsk->signal->cnvcsw;
+	v->cpt_cnivcsw = tsk->signal->cnivcsw;
+	v->cpt_cmin_flt = tsk->signal->cmin_flt;
+	v->cpt_cmaj_flt = tsk->signal->cmaj_flt;
+
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
+		if (i < RLIM_NLIMITS) {
+			v->cpt_rlim_cur[i] = tsk->signal->rlim[i].rlim_cur;
+			v->cpt_rlim_max[i] = tsk->signal->rlim[i].rlim_max;
+		} else {
+			v->cpt_rlim_cur[i] = CPT_NULL;
+			v->cpt_rlim_max[i] = CPT_NULL;
+		}
+	}
+#endif
+
+	if (tsk->mm)
+		v->cpt_mm_ub = cpt_lookup_ubc(tsk->mm->mm_ub, ctx);
+	else
+		v->cpt_mm_ub = CPT_NULL;
+	v->cpt_task_ub = cpt_lookup_ubc(tsk->task_bc.task_ub, ctx);
+	v->cpt_exec_ub = cpt_lookup_ubc(tsk->task_bc.exec_ub, ctx);
+	v->cpt_fork_sub = cpt_lookup_ubc(tsk->task_bc.fork_sub, ctx);
+
+	v->cpt_ptrace_message = tsk->ptrace_message;
+	v->cpt_pn_state = tsk->pn_state;
+	v->cpt_stopped_state = tsk->stopped_state;
+	v->cpt_sigsuspend_state = 0;
+
+#ifndef CONFIG_X86_64
+	if (tsk->thread.vm86_info) {
+		eprintk_ctx("vm86 task is running\n");
+		cpt_release_buf(ctx);
+		return -EBUSY;
+	}
+#endif
+
+	v->cpt_sigpending = cpt_sigset_export(&tsk->pending.signal);
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	dump_kstack(tsk, ctx);
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	err = dump_registers(tsk, ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	if (err)
+		return err;
+
+	if (tsk_used_math(tsk)) {
+		cpt_push_object(&saved_obj, ctx);
+		dump_fpustate(tsk, ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+	if (tsk->last_siginfo) {
+		struct cpt_siginfo_image si;
+		cpt_push_object(&saved_obj, ctx);
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_LASTSIGINFO;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		if (encode_siginfo(&si, tsk->last_siginfo))
+			return -EINVAL;
+
+		ctx->write(&si, sizeof(si), ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+	if (tsk->sas_ss_size) {
+		struct cpt_sigaltstack_image si;
+		cpt_push_object(&saved_obj, ctx);
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_SIGALTSTACK;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		si.cpt_stack = tsk->sas_ss_sp;
+		si.cpt_stacksize = tsk->sas_ss_size;
+
+		ctx->write(&si, sizeof(si), ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+	dump_sigqueue(&tsk->pending, ctx);
+
+	last_thread = 1;
+	read_lock(&tasklist_lock);
+	do {
+		task_t * next = next_thread(tsk);
+		if (next != tsk && !thread_group_leader(next))
+			last_thread = 0;
+	} while (0);
+	read_unlock(&tasklist_lock);
+
+	if (last_thread) {
+		task_t *prev_tsk;
+		int err;
+		loff_t pos = ctx->file->f_pos;
+
+		cpt_push_object(&saved_obj, ctx);
+		err = dump_one_signal_struct(tg_obj, ctx);
+		cpt_pop_object(&saved_obj, ctx);
+		if (err)
+			return err;
+
+		prev_tsk = tsk;
+		for (;;) {
+			if (prev_tsk->tgid == tsk->tgid) {
+				loff_t tg_pos;
+
+				tg_pos = obj->o_pos + offsetof(struct cpt_task_image, cpt_signal);
+				ctx->pwrite(&pos, sizeof(pos), ctx, tg_pos);
+				if (thread_group_leader(prev_tsk))
+					break;
+			}
+
+			if (obj->o_list.prev == &ctx->object_array[CPT_OBJ_TASK]) {
+				eprintk_ctx("bug: thread group leader is lost\n");
+				return -EINVAL;
+			}
+
+			obj = list_entry(obj->o_list.prev, cpt_object_t, o_list);
+			prev_tsk = obj->o_obj;
+		}
+	}
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_tasks(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_TASKS);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		int err;
+
+		if ((err = dump_one_process(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_collect_signals(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	/* Collect process fd sets */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->signal && !list_empty(&tsk->signal->posix_timers)) {
+			eprintk_ctx("task %d/%d(%s) uses posix timers\n", tsk->pid, virt_pid(tsk), tsk->comm);
+			return -EBUSY;
+		}
+		if (tsk->signal && cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx) == NULL)
+			return -ENOMEM;
+		if (tsk->sighand && cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx) == NULL)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+
+static int dump_one_sighand_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct sighand_struct *sig = obj->o_obj;
+	struct cpt_sighand_image *v = cpt_get_buf(ctx);
+	int i;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SIGHAND_STRUCT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	for (i=0; i< _NSIG; i++) {
+		if (sig->action[i].sa.sa_handler != SIG_DFL) {
+			loff_t saved_obj;
+			struct cpt_sighandler_image *o = cpt_get_buf(ctx);
+
+			cpt_push_object(&saved_obj, ctx);
+			cpt_open_object(NULL, ctx);
+
+			o->cpt_next = CPT_NULL;
+			o->cpt_object = CPT_OBJ_SIGHANDLER;
+			o->cpt_hdrlen = sizeof(*o);
+			o->cpt_content = CPT_CONTENT_VOID;
+
+			o->cpt_signo = i;
+			o->cpt_handler = (unsigned long)sig->action[i].sa.sa_handler;
+			o->cpt_restorer = (unsigned long)sig->action[i].sa.sa_restorer;
+			o->cpt_flags = sig->action[i].sa.sa_flags;
+			memcpy(&o->cpt_mask, &sig->action[i].sa.sa_mask, 8);
+			ctx->write(o, sizeof(*o), ctx);
+			cpt_release_buf(ctx);
+			cpt_close_object(ctx);
+			cpt_pop_object(&saved_obj, ctx);
+		}
+	}
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_sighand(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_SIGHAND_STRUCT);
+
+	for_each_object(obj, CPT_OBJ_SIGHAND_STRUCT) {
+		int err;
+
+		if ((err = dump_one_sighand_struct(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_process.h linux-2.6.16.ovz/kernel/cpt/cpt_process.h
--- linux-2.6.16/kernel/cpt/cpt_process.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_process.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,10 @@
+int cpt_collect_signals(cpt_context_t *);
+int cpt_dump_signal(struct cpt_context *);
+int cpt_dump_sighand(struct cpt_context *);
+int cpt_dump_tasks(struct cpt_context *);
+
+int rst_signal_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int rst_restore_process(struct cpt_context *ctx);
+int rst_process_linkage(struct cpt_context *ctx);
diff -uprN linux-2.6.16/kernel/cpt/cpt_socket.c linux-2.6.16.ovz/kernel/cpt/cpt_socket.c
--- linux-2.6.16/kernel/cpt/cpt_socket.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_socket.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,779 @@
+/*
+ *
+ *  kernel/cpt/cpt_socket.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <linux/tcp.h>
+#include <net/sock.h>
+#include <net/scm.h>
+#include <net/af_unix.h>
+#include <net/tcp.h>
+#include <net/netlink_sock.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+
+static int dump_rqueue(int owner, struct sock *sk, struct cpt_context *ctx);
+
+
+/* Sockets are quite different of another kinds of files.
+ * There is one simplification: only one struct file can refer to a socket,
+ * so we could store information about socket directly in section FILES as
+ * a description of a file and append f.e. array of not-yet-accepted
+ * connections of listening socket as array of auxiliary data.
+ *
+ * Complications are:
+ * 1. TCP sockets can be orphans. We have to relocate orphans as well,
+ *    so we have to create special section for orphans.
+ * 2. AF_UNIX sockets are distinguished objects: set of links between
+ *    AF_UNIX sockets is quite arbitrary.
+ *    A. Each socket can refers to many of files due to FD passing.
+ *    B. Each socket except for connected ones can have in queue skbs
+ *       sent by any of sockets.
+ *
+ *    2A is relatively easy: after our tasks are frozen we make an additional
+ *    recursive pass throgh set of collected files and get referenced to
+ *    FD passed files. After end of recursion, all the files are treated
+ *    in the same way. All they will be stored in section FILES.
+ *
+ *    2B. We have to resolve all those references at some point.
+ *    It is the place where pipe-like approach to image fails.
+ *
+ * All this makes socket checkpointing quite chumbersome.
+ * Right now we collect all the sockets and assign some numeric index value
+ * to each of them. The socket section is separate and put after section FILES,
+ * so section FILES refers to sockets by index, section SOCKET refers to FILES
+ * as usual by position in image. All the refs inside socket section are
+ * by index. When restoring we read socket section, create objects to hold
+ * mappings index <-> pos. At the second pass we open sockets (simultaneosly
+ * with their pairs) and create FILE objects.
+ */ 
+
+
+/* ====== FD passing ====== */
+
+/* Almost nobody does FD passing via AF_UNIX sockets, nevertheless we
+ * have to implement this. A problem is that in general case we receive
+ * skbs from an unknown context, so new files can arrive to checkpointed
+ * set of processes even after they are stopped. Well, we are going just
+ * to ignore unknown fds while doing real checkpointing. It is fair because
+ * links outside checkpointed set are going to fail anyway.
+ *
+ * ATTN: the procedure is recursive. We linearize the recursion adding
+ * newly found files to the end of file list, so they will be analyzed
+ * in the same loop.
+ */
+
+static int collect_one_passedfd(struct file *file, cpt_context_t * ctx)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct socket *sock;
+	struct sock *sk;
+	struct sk_buff *skb;
+
+	if (!S_ISSOCK(inode->i_mode))
+		return -ENOTSOCK;
+
+	sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
+
+	if (sock->ops->family != AF_UNIX)
+		return 0;
+
+	sk = sock->sk;
+
+	/* Subtle locking issue. skbs cannot be removed while
+	 * we are scanning, because all the processes are stopped.
+	 * They still can be added to tail of queue. Locking while
+	 * we dereference skb->next is enough to resolve this.
+	 * See above about collision with skbs added after we started
+	 * checkpointing.
+	 */
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		if (UNIXCB(skb).fp && skb->sk &&
+		    (!sock_flag(skb->sk, SOCK_DEAD) || unix_peer(sk) == skb->sk)) {
+			struct scm_fp_list *fpl = UNIXCB(skb).fp;
+			int i;
+
+			for (i = fpl->count-1; i >= 0; i--) {
+				if (cpt_object_add(CPT_OBJ_FILE, fpl->fp[i], ctx) == NULL)
+					return -ENOMEM;
+			}
+		}
+
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+
+	return 0;
+}
+
+int cpt_collect_passedfds(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
+			int err;
+
+			if ((err = collect_one_passedfd(file, ctx)) < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+/* ====== End of FD passing ====== */
+
+/* Must be called under bh_lock_sock() */
+
+void clear_backlog(struct sock *sk)
+{
+	struct sk_buff *skb = sk->sk_backlog.head;
+
+	sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
+	while (skb) {
+		struct sk_buff *next = skb->next;
+
+		skb->next = NULL;
+		kfree_skb(skb);
+		skb = next;
+	}
+}
+
+void release_sock_nobacklog(struct sock *sk)
+{
+	spin_lock_bh(&(sk->sk_lock.slock));
+	clear_backlog(sk);
+	sk->sk_lock.owner = NULL;
+        if (waitqueue_active(&(sk->sk_lock.wq)))
+		wake_up(&(sk->sk_lock.wq));
+	spin_unlock_bh(&(sk->sk_lock.slock));
+}
+
+int cpt_dump_skb(int type, int owner, struct sk_buff *skb,
+		 struct cpt_context *ctx)
+{
+	struct cpt_skb_image *v = cpt_get_buf(ctx);
+	loff_t saved_obj;
+	struct timeval tmptv;
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SKB;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_owner = owner;
+	v->cpt_queue = type;
+	skb_get_timestamp(skb, &tmptv);
+	v->cpt_stamp = cpt_timeval_export(&tmptv);
+	v->cpt_hspace = skb->data - skb->head;
+	v->cpt_tspace = skb->end - skb->tail;
+	v->cpt_h = skb->h.raw - skb->head;
+	v->cpt_nh = skb->nh.raw - skb->head;
+	v->cpt_mac = skb->mac.raw - skb->head;
+	if (sizeof(skb->cb) < sizeof(v->cpt_cb)) BUG();
+	memcpy(v->cpt_cb, skb->cb, sizeof(v->cpt_cb));
+	if (sizeof(skb->cb) > sizeof(v->cpt_cb)) {
+		int i;
+		for (i=sizeof(v->cpt_cb); i<sizeof(skb->cb); i++) {
+			if (skb->cb[i]) {
+				wprintk_ctx("dirty skb cb");
+				break;
+			}
+		}
+	}
+	v->cpt_len = skb->len;
+	v->cpt_mac_len = skb->mac_len;
+	v->cpt_csum = skb->csum;
+	v->cpt_local_df = skb->local_df;
+	v->cpt_pkt_type = skb->pkt_type;
+	v->cpt_ip_summed = skb->ip_summed;
+	v->cpt_priority = skb->priority;
+	v->cpt_protocol = skb->protocol;
+	v->cpt_security = 0;
+	v->cpt_tso_segs = skb_shinfo(skb)->tso_segs;
+	v->cpt_tso_size = skb_shinfo(skb)->tso_size;
+	if (skb_shinfo(skb)->ufo_size) {
+		eprintk_ctx("skb ufo is not supported\n");
+		return -EINVAL;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (skb->len + (skb->data - skb->head) > 0) {
+		struct cpt_obj_bits ob;
+		loff_t saved_obj2;
+
+		cpt_push_object(&saved_obj2, ctx);
+		cpt_open_object(NULL, ctx);
+		ob.cpt_next = CPT_NULL;
+		ob.cpt_object = CPT_OBJ_BITS;
+		ob.cpt_hdrlen = sizeof(ob);
+		ob.cpt_content = CPT_CONTENT_DATA;
+		ob.cpt_size = skb->len + v->cpt_hspace;
+
+		ctx->write(&ob, sizeof(ob), ctx);
+
+		ctx->write(skb->head, (skb->data-skb->head) + (skb->len-skb->data_len), ctx);
+		if (skb->data_len) {
+			int offset = skb->len - skb->data_len;
+			while (offset < skb->len) {
+				int copy = skb->len - offset;
+				if (copy > PAGE_SIZE)
+					copy = PAGE_SIZE;
+				(void)cpt_get_buf(ctx);
+				if (skb_copy_bits(skb, offset, ctx->tmpbuf, copy))
+					BUG();
+				ctx->write(ctx->tmpbuf, copy, ctx);
+				__cpt_release_buf(ctx);
+				offset += copy;
+			}
+		}
+
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj2, ctx);
+	}
+
+	if (skb->sk && skb->sk->sk_family == AF_UNIX) {
+		struct scm_fp_list *fpl = UNIXCB(skb).fp;
+
+		if (fpl) {
+			int i;
+
+			for (i = 0; i < fpl->count; i++) {
+				struct cpt_fd_image v;
+				cpt_object_t *obj;
+				loff_t saved_obj2;
+
+				obj = lookup_cpt_object(CPT_OBJ_FILE, fpl->fp[i], ctx);
+
+				if (!obj) {
+					eprintk_ctx("lost passed FD\n");
+					return -EINVAL;
+				}
+
+				cpt_push_object(&saved_obj2, ctx);
+				cpt_open_object(NULL, ctx);
+				v.cpt_next = CPT_NULL;
+				v.cpt_object = CPT_OBJ_FILEDESC;
+				v.cpt_hdrlen = sizeof(v);
+				v.cpt_content = CPT_CONTENT_VOID;
+
+				v.cpt_fd = i;
+				v.cpt_file = obj->o_pos;
+				v.cpt_flags = 0;
+				ctx->write(&v, sizeof(v), ctx);
+				cpt_close_object(ctx);
+				cpt_pop_object(&saved_obj2, ctx);
+			}
+		}
+	}
+
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	return 0;
+}
+
+static int dump_rqueue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+	struct sock *sk_cache = NULL;
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		int err;
+
+		if (sk->sk_family == AF_UNIX) {
+			cpt_object_t *obj;
+			if (skb->sk != sk_cache) {
+				idx = -1;
+				sk_cache = NULL;
+				obj = lookup_cpt_object(CPT_OBJ_SOCKET, skb->sk, ctx);
+				if (obj) {
+					idx = obj->o_index;
+					sk_cache = skb->sk;
+				} else if (unix_peer(sk) != skb->sk)
+					goto next_skb;
+			}
+		}
+
+		err = cpt_dump_skb(CPT_SKB_RQ, idx, skb, ctx);
+		if (err)
+			return err;
+
+next_skb:
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+	return 0;
+}
+
+static int dump_wqueue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+
+	skb = skb_peek(&sk->sk_write_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_write_queue) {
+		int err = cpt_dump_skb(CPT_SKB_WQ, idx, skb, ctx);
+		if (err)
+			return err;
+
+		spin_lock_irq(&sk->sk_write_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_write_queue.lock);
+	}
+	return 0;
+}
+
+void cpt_dump_sock_attr(struct sock *sk, cpt_context_t *ctx)
+{
+	loff_t saved_obj;
+	if (sk->sk_filter) {
+		struct cpt_obj_bits v;
+
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_SKFILTER;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_DATA;
+		v.cpt_size = sk->sk_filter->len*sizeof(struct sock_filter);
+
+		ctx->write(&v, sizeof(v), ctx);
+		ctx->write(sk->sk_filter->insns, v.cpt_size, ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
+		cpt_push_object(&saved_obj, ctx);
+		cpt_dump_mcfilter(sk, ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+}
+
+/* Dump socket content */
+
+int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx)
+{
+	struct cpt_sock_image *v = cpt_get_buf(ctx);
+	struct socket *sock;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SOCKET;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_file = CPT_NULL;
+	sock = sk->sk_socket;
+	if (sock && sock->file) {
+		cpt_object_t *tobj;
+		tobj = lookup_cpt_object(CPT_OBJ_FILE, sock->file, ctx);
+		if (tobj)
+			v->cpt_file = tobj->o_pos;
+	}
+	v->cpt_index = index;
+	v->cpt_parent = parent;
+
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
+		if (sock && !obj->o_lock) {
+			lock_sock(sk);
+			obj->o_lock = 1;
+		}
+	}
+
+	/* Some bits stored in inode */
+	v->cpt_ssflags = sock ? sock->flags : 0;
+	v->cpt_sstate = sock ? sock->state : 0;
+	v->cpt_passcred = sock ? test_bit(SOCK_PASSCRED, &sock->flags) : 0;
+
+	/* Common data */
+	v->cpt_family = sk->sk_family;
+	v->cpt_type = sk->sk_type;
+	v->cpt_state = sk->sk_state;
+	v->cpt_reuse = sk->sk_reuse;
+	v->cpt_zapped = sock_flag(sk, SOCK_ZAPPED);
+	v->cpt_shutdown = sk->sk_shutdown;
+	v->cpt_userlocks = sk->sk_userlocks;
+	v->cpt_no_check = sk->sk_no_check;
+	v->cpt_zapped = sock_flag(sk, SOCK_DBG);
+	v->cpt_rcvtstamp = sock_flag(sk, SOCK_RCVTSTAMP);
+	v->cpt_localroute = sock_flag(sk, SOCK_LOCALROUTE);
+	v->cpt_protocol = sk->sk_protocol;
+	v->cpt_err = sk->sk_err;
+	v->cpt_err_soft = sk->sk_err_soft;
+	v->cpt_max_ack_backlog = sk->sk_max_ack_backlog;
+	v->cpt_priority = sk->sk_priority;
+	v->cpt_rcvlowat = sk->sk_rcvlowat;
+	v->cpt_rcvtimeo = CPT_NULL;
+	if (sk->sk_rcvtimeo != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_rcvtimeo = sk->sk_rcvtimeo > INT_MAX ? INT_MAX : sk->sk_rcvtimeo;
+	v->cpt_sndtimeo = CPT_NULL;
+	if (sk->sk_sndtimeo != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_sndtimeo = sk->sk_sndtimeo > INT_MAX ? INT_MAX : sk->sk_sndtimeo;
+	v->cpt_rcvbuf = sk->sk_rcvbuf;
+	v->cpt_sndbuf = sk->sk_sndbuf;
+	v->cpt_bound_dev_if = sk->sk_bound_dev_if;
+	v->cpt_flags = sk->sk_flags;
+	v->cpt_lingertime = CPT_NULL;
+	if (sk->sk_lingertime != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_lingertime = sk->sk_lingertime > INT_MAX ? INT_MAX : sk->sk_lingertime;
+	v->cpt_peer_pid = sk->sk_peercred.pid;
+	v->cpt_peer_uid = sk->sk_peercred.uid;
+	v->cpt_peer_gid = sk->sk_peercred.gid;
+	v->cpt_stamp = cpt_timeval_export(&sk->sk_stamp);
+
+	v->cpt_peer = -1;
+	v->cpt_socketpair = 0;
+	v->cpt_deleted = 0;
+
+	v->cpt_laddrlen = 0;
+	if (sock) {
+		int alen = sizeof(v->cpt_laddr);
+		int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_laddr, &alen, 0);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		v->cpt_laddrlen = alen;
+	}
+	v->cpt_raddrlen = 0;
+	if (sock) {
+		int alen = sizeof(v->cpt_raddr);
+		int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_raddr, &alen, 2);
+		if (!err)
+			v->cpt_raddrlen = alen;
+	}
+
+	if (sk->sk_family == AF_UNIX) {
+		if (unix_sk(sk)->dentry) {
+			struct dentry *d = unix_sk(sk)->dentry;
+			v->cpt_deleted = !IS_ROOT(d) && d_unhashed(d);
+			if (!v->cpt_deleted) {
+				int err = 0;
+				char *path;
+				unsigned long pg = __get_free_page(GFP_KERNEL);
+
+				if (!pg) {
+					cpt_release_buf(ctx);
+					return -ENOMEM;
+				}
+
+				path = d_path(d, unix_sk(sk)->mnt, (char *)pg, PAGE_SIZE);
+
+				if (!IS_ERR(path)) {
+					int len = strlen(path);
+					if (len < 126) {
+						strcpy(((char*)v->cpt_laddr)+2, path); 
+						v->cpt_laddrlen = len + 2;
+					} else {
+						wprintk_ctx("af_unix path is too long: %s (%s)\n", path, ((char*)v->cpt_laddr)+2);
+					}
+					err = cpt_verify_overmount(path, d, unix_sk(sk)->mnt, ctx);
+				} else {
+					eprintk_ctx("cannot get path of an af_unix socket\n");
+					err = PTR_ERR(path);
+				}
+				free_page(pg);
+				if (err) {
+					cpt_release_buf(ctx);
+					return err;
+				}
+			}
+		}
+
+		/* If the socket is connected, find its peer. If peer is not
+		 * in our table, the socket is connected to external process
+		 * and we consider it disconnected.
+		 */
+		if (unix_peer(sk)) {
+			cpt_object_t *pobj;
+			pobj = lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(sk), ctx);
+			if (pobj)
+				v->cpt_peer = pobj->o_index;
+			else
+				v->cpt_shutdown = SHUTDOWN_MASK;
+
+			if (unix_peer(unix_peer(sk)) == sk)
+				v->cpt_socketpair = 1;
+		}
+
+		/* If the socket shares address with another socket it is
+		 * child of some listening socket. Find and record it. */
+		if (unix_sk(sk)->addr &&
+		    atomic_read(&unix_sk(sk)->addr->refcnt) > 1 &&
+		    sk->sk_state != TCP_LISTEN) {
+			cpt_object_t *pobj;
+			for_each_object(pobj, CPT_OBJ_SOCKET) {
+				struct sock *psk = pobj->o_obj;
+				if (psk->sk_family == AF_UNIX &&
+				    psk->sk_state == TCP_LISTEN &&
+				    unix_sk(psk)->addr == unix_sk(sk)->addr) {
+					v->cpt_parent = pobj->o_index;
+					break;
+				}
+			}
+		}
+	}
+
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+		cpt_dump_socket_in(v, sk, ctx);
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_dump_sock_attr(sk, ctx);
+
+	dump_rqueue(index, sk, ctx);
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
+		dump_wqueue(index, sk, ctx);
+		cpt_dump_ofo_queue(index, sk, ctx);
+	}
+
+	if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+	    && sk->sk_state == TCP_LISTEN)
+		cpt_dump_synwait_queue(sk, index, ctx);
+
+	cpt_close_object(ctx);
+
+	if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+	    && sk->sk_state == TCP_LISTEN)
+		cpt_dump_accept_queue(sk, index, ctx);
+
+	return 0;
+}
+
+int cpt_dump_orphaned_sockets(struct cpt_context *ctx)
+{
+	int i;
+
+	cpt_open_section(ctx, CPT_SECT_ORPHANS);
+
+	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
+		struct sock *sk;
+		struct hlist_node *node;
+
+retry:
+		read_lock_bh(&tcp_hashinfo.ehash[i].lock);
+		sk_for_each(sk, node, &tcp_hashinfo.ehash[i].chain) {
+
+			if (VE_OWNER_SK(sk) != get_exec_env())
+				continue;
+			if (sk->sk_socket)
+				continue;
+			if (!sock_flag(sk, SOCK_DEAD))
+				continue;
+			if (lookup_cpt_object(CPT_OBJ_SOCKET, sk, ctx))
+				continue;
+			sock_hold(sk);
+			read_unlock_bh(&tcp_hashinfo.ehash[i].lock);
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			if (sock_owned_by_user(sk))
+				eprintk_ctx("BUG: sk locked by whom?\n");
+			sk->sk_lock.owner = (void *)1;
+			bh_unlock_sock(sk);
+			local_bh_enable();
+
+			cpt_dump_socket(NULL, sk, -1, -1, ctx);
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			sk->sk_lock.owner = NULL;
+			clear_backlog(sk);
+			tcp_done(sk);
+			bh_unlock_sock(sk);
+			local_bh_enable();
+			sock_put(sk);
+
+			goto retry;
+		}
+		read_unlock_bh(&tcp_hashinfo.ehash[i].lock);
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int can_dump(struct sock *sk, cpt_context_t *ctx)
+{
+	switch (sk->sk_family) {
+	case AF_NETLINK:
+		if (((struct netlink_sock *)sk)->cb) {
+			eprintk_ctx("netlink socket has active callback\n");
+			return 0;
+		}
+		break;
+	}
+	return 1;
+}
+
+/* We are not going to block suspend when we have external AF_UNIX connections.
+ * But we cannot stop feed of new packets/connections to our environment
+ * from outside. Taking into account that it is intrincically unreliable,
+ * we collect some amount of data, but when checkpointing/restoring we
+ * are going to drop everything, which does not make sense: skbs sent
+ * by outside processes, connections from outside etc. etc.
+ */
+
+/* The first pass. When we see socket referenced by a file, we just
+ * add it to socket table */
+int cpt_collect_socket(struct file *file, cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	struct socket *sock;
+	struct sock *sk;
+
+	if (!S_ISSOCK(file->f_dentry->d_inode->i_mode))
+		return -ENOTSOCK;
+	sock = &container_of(file->f_dentry->d_inode, struct socket_alloc, vfs_inode)->socket;
+	sk = sock->sk;
+	if (!can_dump(sk, ctx))
+		return -EBUSY;
+	if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sk, ctx)) == NULL)
+		return -ENOMEM;
+	obj->o_parent = file;
+
+	return 0;
+}
+
+/*
+ * We should end with table containing:
+ *  * all sockets opened by our processes in the table.
+ *  * all the sockets queued in listening queues on _our_ listening sockets,
+ *    which are connected to our opened sockets.
+ */
+
+static int collect_one_unix_listening_sock(cpt_object_t *obj, cpt_context_t * ctx)
+{
+	struct sock *sk = obj->o_obj;
+	cpt_object_t *cobj;
+	struct sk_buff *skb;
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		struct sock *lsk = skb->sk;
+		if (unix_peer(lsk) &&
+		    lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(lsk), ctx)) {
+			if ((cobj = cpt_object_add(CPT_OBJ_SOCKET, lsk, ctx)) == NULL)
+				return -ENOMEM;
+			cobj->o_parent = obj->o_parent;
+		}
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+
+	return 0;
+}
+
+int cpt_index_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	unsigned long index = 0;
+
+	/* Collect not-yet-accepted children of listening sockets. */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+
+		if (sk->sk_state != TCP_LISTEN)
+			continue;
+
+		if (sk->sk_family == AF_UNIX)
+			collect_one_unix_listening_sock(obj, ctx);
+	}
+
+	/* Assign indices to all the sockets. */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if (sk->sk_socket && sk->sk_socket->file) {
+			cpt_object_t *tobj;
+			tobj = lookup_cpt_object(CPT_OBJ_FILE, sk->sk_socket->file, ctx);
+			if (tobj)
+				cpt_obj_setindex(tobj, obj->o_index, ctx);
+		}
+	}
+
+	return 0;
+}
+
+void cpt_unlock_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk && obj->o_lock) {
+			if (sk->sk_socket)
+				release_sock(sk);
+		}
+	}
+}
+
+void cpt_kill_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk && obj->o_lock) {
+			cpt_kill_socket(sk, ctx);
+			if (sk->sk_socket)
+				release_sock_nobacklog(sk);
+		}
+	}
+}
+
+__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx)
+{
+	struct fasync_struct *fa;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct socket *sock;
+
+	sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
+
+	for (fa = sock->fasync_list; fa; fa = fa->fa_next) {
+		if (fa->fa_file == file)
+			return fa->fa_fd;
+	}
+	return -1;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_socket.h linux-2.6.16.ovz/kernel/cpt/cpt_socket.h
--- linux-2.6.16/kernel/cpt/cpt_socket.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_socket.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,33 @@
+struct sock;
+
+int cpt_collect_passedfds(cpt_context_t *);
+int cpt_index_sockets(cpt_context_t *);
+int cpt_collect_socket(struct file *, cpt_context_t *);
+int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx);
+int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx);
+int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx);
+int rst_sockets(struct cpt_context *ctx);
+int rst_sockets_complete(struct cpt_context *ctx);
+int cpt_dump_orphaned_sockets(struct cpt_context *ctx);
+
+int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx);
+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx);
+
+void cpt_unlock_sockets(cpt_context_t *);
+void cpt_kill_sockets(cpt_context_t *);
+
+
+int cpt_kill_socket(struct sock *, cpt_context_t *);
+int cpt_dump_socket_in(struct cpt_sock_image *, struct sock *, struct cpt_context*);
+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *, struct cpt_context *ctx);
+__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx);
+int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *);
+int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si, loff_t pos, struct cpt_context *ctx);
+int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx);
+int cpt_dump_skb(int type, int owner, struct sk_buff *skb, struct cpt_context *ctx);
+int cpt_dump_mcfilter(struct sock *sk, struct cpt_context *ctx);
+
+int rst_sk_mcfilter_in(struct sock *sk, struct cpt_sockmc_image *v,
+		       loff_t pos, cpt_context_t *ctx);
+int rst_sk_mcfilter_in6(struct sock *sk, struct cpt_sockmc_image *v,
+			loff_t pos, cpt_context_t *ctx);
diff -uprN linux-2.6.16/kernel/cpt/cpt_socket_in.c linux-2.6.16.ovz/kernel/cpt/cpt_socket_in.c
--- linux-2.6.16/kernel/cpt/cpt_socket_in.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_socket_in.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,443 @@
+/*
+ *
+ *  kernel/cpt/cpt_socket_in.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/igmp.h>
+#include <linux/ipv6.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+static inline __u32 jiffies_export(unsigned long tmo)
+{
+	__s32 delta = (long)(tmo - jiffies);
+	return delta;
+}
+
+static inline __u32 tcp_jiffies_export(__u32 tmo)
+{
+	__s32 delta = tmo - tcp_time_stamp;
+	return delta;
+}
+
+int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+	struct tcp_sock *tp;
+
+	if (sk->sk_type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP)
+		return 0;
+
+	tp = tcp_sk(sk);
+
+	skb = skb_peek(&tp->out_of_order_queue);
+	while (skb && skb != (struct sk_buff*)&tp->out_of_order_queue) {
+		int err;
+
+		err = cpt_dump_skb(CPT_SKB_OFOQ, idx, skb, ctx);
+		if (err)
+			return err;
+
+		spin_lock_irq(&tp->out_of_order_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&tp->out_of_order_queue.lock);
+	}
+	return 0;
+}
+
+static int cpt_dump_socket_tcp(struct cpt_sock_image *si, struct sock *sk,
+			       struct cpt_context *ctx)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	si->cpt_pred_flags = tp->pred_flags;
+	si->cpt_rcv_nxt = tp->rcv_nxt;
+	si->cpt_snd_nxt = tp->snd_nxt;
+	si->cpt_snd_una = tp->snd_una;
+	si->cpt_snd_sml = tp->snd_sml;
+	si->cpt_rcv_tstamp = tcp_jiffies_export(tp->rcv_tstamp);
+	si->cpt_lsndtime = tcp_jiffies_export(tp->lsndtime);
+	si->cpt_tcp_header_len = tp->tcp_header_len;
+	si->cpt_ack_pending = inet_csk(sk)->icsk_ack.pending;
+	si->cpt_quick = inet_csk(sk)->icsk_ack.quick;
+	si->cpt_pingpong = inet_csk(sk)->icsk_ack.pingpong;
+	si->cpt_blocked = inet_csk(sk)->icsk_ack.blocked;
+	si->cpt_ato = inet_csk(sk)->icsk_ack.ato;
+	si->cpt_ack_timeout = jiffies_export(inet_csk(sk)->icsk_ack.timeout);
+	si->cpt_lrcvtime = tcp_jiffies_export(inet_csk(sk)->icsk_ack.lrcvtime);
+	si->cpt_last_seg_size = inet_csk(sk)->icsk_ack.last_seg_size;
+	si->cpt_rcv_mss = inet_csk(sk)->icsk_ack.rcv_mss;
+	si->cpt_snd_wl1 = tp->snd_wl1;
+	si->cpt_snd_wnd = tp->snd_wnd;
+	si->cpt_max_window = tp->max_window;
+	si->cpt_pmtu_cookie = inet_csk(sk)->icsk_pmtu_cookie;
+	si->cpt_mss_cache = tp->mss_cache;
+	si->cpt_mss_cache_std = tp->mss_cache; /* FIXMW was tp->mss_cache_std */
+	si->cpt_mss_clamp = tp->rx_opt.mss_clamp;
+	si->cpt_ext_header_len = inet_csk(sk)->icsk_ext_hdr_len;
+	si->cpt_ext2_header_len = 0;
+	si->cpt_ca_state = inet_csk(sk)->icsk_ca_state;
+	si->cpt_retransmits = inet_csk(sk)->icsk_retransmits;
+	si->cpt_reordering = tp->reordering;
+	si->cpt_frto_counter = tp->frto_counter;
+	si->cpt_frto_highmark = tp->frto_highmark;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+	// // si->cpt_adv_cong = tp->adv_cong;
+#endif
+	si->cpt_defer_accept = inet_csk(sk)->icsk_accept_queue.rskq_defer_accept;
+	si->cpt_backoff = inet_csk(sk)->icsk_backoff;
+	si->cpt_srtt = tp->srtt;
+	si->cpt_mdev = tp->mdev;
+	si->cpt_mdev_max = tp->mdev_max;
+	si->cpt_rttvar = tp->rttvar;
+	si->cpt_rtt_seq = tp->rtt_seq;
+	si->cpt_rto = inet_csk(sk)->icsk_rto;
+	si->cpt_packets_out = tp->packets_out;
+	si->cpt_left_out = tp->left_out;
+	si->cpt_retrans_out = tp->retrans_out;
+	si->cpt_lost_out = tp->lost_out;
+	si->cpt_sacked_out = tp->sacked_out;
+	si->cpt_fackets_out = tp->fackets_out;
+	si->cpt_snd_ssthresh = tp->snd_ssthresh;
+	si->cpt_snd_cwnd = tp->snd_cwnd;
+	si->cpt_snd_cwnd_cnt = tp->snd_cwnd_cnt;
+	si->cpt_snd_cwnd_clamp = tp->snd_cwnd_clamp;
+	si->cpt_snd_cwnd_used = tp->snd_cwnd_used;
+	si->cpt_snd_cwnd_stamp = tcp_jiffies_export(tp->snd_cwnd_stamp);
+	si->cpt_timeout = jiffies_export(inet_csk(sk)->icsk_timeout);
+	si->cpt_ka_timeout = 0;
+	si->cpt_rcv_wnd = tp->rcv_wnd;
+	si->cpt_rcv_wup = tp->rcv_wup;
+	si->cpt_write_seq = tp->write_seq;
+	si->cpt_pushed_seq = tp->pushed_seq;
+	si->cpt_copied_seq = tp->copied_seq;
+	si->cpt_tstamp_ok = tp->rx_opt.tstamp_ok;
+	si->cpt_wscale_ok = tp->rx_opt.wscale_ok;
+	si->cpt_sack_ok = tp->rx_opt.sack_ok;
+	si->cpt_saw_tstamp = tp->rx_opt.saw_tstamp;
+	si->cpt_snd_wscale = tp->rx_opt.snd_wscale;
+	si->cpt_rcv_wscale = tp->rx_opt.rcv_wscale;
+	si->cpt_nonagle = tp->nonagle;
+	si->cpt_keepalive_probes = tp->keepalive_probes;
+	si->cpt_rcv_tsval = tp->rx_opt.rcv_tsval;
+	si->cpt_rcv_tsecr = tp->rx_opt.rcv_tsecr;
+	si->cpt_ts_recent = tp->rx_opt.ts_recent;
+	si->cpt_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+	si->cpt_user_mss = tp->rx_opt.user_mss;
+	si->cpt_dsack = tp->rx_opt.dsack;
+	si->cpt_eff_sacks = tp->rx_opt.eff_sacks;
+	si->cpt_sack_array[0] = tp->duplicate_sack[0].start_seq;
+	si->cpt_sack_array[1] = tp->duplicate_sack[0].end_seq;
+	si->cpt_sack_array[2] = tp->selective_acks[0].start_seq;
+	si->cpt_sack_array[3] = tp->selective_acks[0].end_seq;
+	si->cpt_sack_array[4] = tp->selective_acks[1].start_seq;
+	si->cpt_sack_array[5] = tp->selective_acks[1].end_seq;
+	si->cpt_sack_array[6] = tp->selective_acks[2].start_seq;
+	si->cpt_sack_array[7] = tp->selective_acks[2].end_seq;
+	si->cpt_sack_array[8] = tp->selective_acks[3].start_seq;
+	si->cpt_sack_array[9] = tp->selective_acks[3].end_seq;
+	si->cpt_window_clamp = tp->window_clamp;
+	si->cpt_rcv_ssthresh = tp->rcv_ssthresh;
+	si->cpt_probes_out = inet_csk(sk)->icsk_probes_out;
+	si->cpt_num_sacks = tp->rx_opt.num_sacks;
+	si->cpt_advmss = tp->advmss;
+	si->cpt_syn_retries = inet_csk(sk)->icsk_syn_retries;
+	si->cpt_ecn_flags = tp->ecn_flags;
+	si->cpt_prior_ssthresh = tp->prior_ssthresh;
+	si->cpt_high_seq = tp->high_seq;
+	si->cpt_retrans_stamp = tp->retrans_stamp;
+	si->cpt_undo_marker = tp->undo_marker;
+	si->cpt_undo_retrans = tp->undo_retrans;
+	si->cpt_urg_seq = tp->urg_seq;
+	si->cpt_urg_data = tp->urg_data;
+	si->cpt_pending = inet_csk(sk)->icsk_pending;
+	si->cpt_urg_mode = tp->urg_mode;
+	si->cpt_snd_up = tp->snd_up;
+	si->cpt_keepalive_time = tp->keepalive_time;
+	si->cpt_keepalive_intvl = tp->keepalive_intvl;
+	si->cpt_linger2 = tp->linger2;
+
+	if (sk->sk_state != TCP_LISTEN &&
+	    sk->sk_state != TCP_CLOSE &&
+	    sock_flag(sk, SOCK_KEEPOPEN)) {
+		si->cpt_ka_timeout = jiffies_export(sk->sk_timer.expires);
+	}
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	{
+		extern struct inet_connection_sock_af_ops ipv6_mapped;
+		if (sk->sk_family == AF_INET6 &&
+			inet_csk(sk)->icsk_af_ops == &ipv6_mapped)
+			si->cpt_mapped = 1;
+	}
+#endif
+
+	return 0;
+}
+
+
+int cpt_dump_socket_in(struct cpt_sock_image *si, struct sock *sk,
+		       struct cpt_context *ctx)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	if (sk->sk_family == AF_INET) {
+		struct sockaddr_in *sin = ((struct sockaddr_in*)si->cpt_laddr);
+		sin->sin_family = AF_INET;
+		sin->sin_port = inet->sport;
+		sin->sin_addr.s_addr = inet->rcv_saddr;
+		si->cpt_laddrlen = sizeof(*sin);
+	} else if (sk->sk_family == AF_INET6) {
+		struct sockaddr_in6 *sin6 = ((struct sockaddr_in6*)si->cpt_laddr);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = inet->sport;
+		memcpy(&sin6->sin6_addr, &np->rcv_saddr, 16);
+		si->cpt_laddrlen = sizeof(*sin6);
+	}
+	if (!inet->num)
+		si->cpt_laddrlen = 0;
+
+	si->cpt_daddr = inet->daddr;
+	si->cpt_dport = inet->dport;
+	si->cpt_saddr = inet->saddr;
+	si->cpt_rcv_saddr = inet->rcv_saddr;
+	si->cpt_sport = inet->sport;
+	si->cpt_uc_ttl = inet->uc_ttl;
+	si->cpt_tos = inet->tos;
+	si->cpt_cmsg_flags = inet->cmsg_flags;
+	si->cpt_mc_index = inet->mc_index;
+	si->cpt_mc_addr = inet->mc_addr;
+	si->cpt_hdrincl = inet->hdrincl;
+	si->cpt_mc_ttl = inet->mc_ttl;
+	si->cpt_mc_loop = inet->mc_loop;
+	si->cpt_pmtudisc = inet->pmtudisc;
+	si->cpt_recverr = inet->recverr;
+	si->cpt_freebind = inet->freebind;
+	si->cpt_idcounter = inet->id;
+
+	si->cpt_cork_flags = inet->cork.flags;
+	si->cpt_cork_fragsize = 0;
+	si->cpt_cork_length = inet->cork.length;
+	si->cpt_cork_addr = inet->cork.addr;
+	si->cpt_cork_saddr = inet->cork.fl.fl4_src;
+	si->cpt_cork_daddr = inet->cork.fl.fl4_dst;
+	si->cpt_cork_oif = inet->cork.fl.oif;
+	if (inet->cork.rt) {
+		si->cpt_cork_fragsize = inet->cork.fragsize;
+		si->cpt_cork_saddr = inet->cork.rt->fl.fl4_src;
+		si->cpt_cork_daddr = inet->cork.rt->fl.fl4_dst;
+		si->cpt_cork_oif = inet->cork.rt->fl.oif;
+	}
+
+	if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
+		struct udp_sock *up = udp_sk(sk);
+		si->cpt_udp_pending  = up->pending;
+		si->cpt_udp_corkflag  = up->corkflag;
+		si->cpt_udp_encap  = up->encap_type;
+		si->cpt_udp_len  = up->len;
+	}
+
+	if (sk->sk_family == AF_INET6) {
+		memcpy(si->cpt_saddr6, &np->saddr, 16);
+		memcpy(si->cpt_rcv_saddr6, &np->rcv_saddr, 16);
+		memcpy(si->cpt_daddr6, &np->daddr, 16);
+		si->cpt_flow_label6 = np->flow_label;
+		si->cpt_frag_size6 = np->frag_size;
+		si->cpt_hop_limit6 = np->hop_limit;
+		si->cpt_mcast_hops6 = np->mcast_hops;
+		si->cpt_mcast_oif6 = np->mcast_oif;
+		si->cpt_rxopt6 = np->rxopt.all;
+		si->cpt_mc_loop6 = np->mc_loop;
+		si->cpt_recverr6 = np->recverr;
+		si->cpt_sndflow6 = np->sndflow;
+		si->cpt_pmtudisc6 = np->pmtudisc;
+		si->cpt_ipv6only6 = np->ipv6only;
+		si->cpt_mapped = 0;
+	}
+
+	if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
+		cpt_dump_socket_tcp(si, sk, ctx);
+
+	return 0;
+}
+
+int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx)
+{
+	struct request_sock *req;
+
+	for (req=inet_csk(sk)->icsk_accept_queue.rskq_accept_head; req; req=req->dl_next)
+		cpt_dump_socket(NULL, req->sk, -1, index, ctx);
+	return 0;
+}
+
+
+static int dump_openreq(struct request_sock *req, struct sock *sk, int index,
+			struct cpt_context *ctx)
+{
+	struct cpt_openreq_image *v = cpt_get_buf(ctx);
+
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_OPENREQ;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_rcv_isn = tcp_rsk(req)->rcv_isn;
+	v->cpt_snt_isn = tcp_rsk(req)->snt_isn;
+	v->cpt_rmt_port = inet_rsk(req)->rmt_port;
+	v->cpt_mss = req->mss;
+	// // v->cpt_family = (req->class == &or_ipv4 ? AF_INET : AF_INET6);
+	v->cpt_retrans = req->retrans;
+	v->cpt_snd_wscale = inet_rsk(req)->snd_wscale;
+	v->cpt_rcv_wscale = inet_rsk(req)->rcv_wscale;
+	v->cpt_tstamp_ok = inet_rsk(req)->tstamp_ok;
+	v->cpt_sack_ok = inet_rsk(req)->sack_ok;
+	v->cpt_wscale_ok = inet_rsk(req)->wscale_ok;
+	v->cpt_ecn_ok = inet_rsk(req)->ecn_ok;
+	v->cpt_acked = inet_rsk(req)->acked;
+	v->cpt_window_clamp = req->window_clamp;
+	v->cpt_rcv_wnd = req->rcv_wnd;
+	v->cpt_ts_recent = req->ts_recent;
+	v->cpt_expires = jiffies_export(req->expires);
+
+	if (v->cpt_family == AF_INET) {
+		memcpy(v->cpt_loc_addr, &inet_rsk(req)->loc_addr, 4);
+		memcpy(v->cpt_rmt_addr, &inet_rsk(req)->rmt_addr, 4);
+	} else {
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		memcpy(v->cpt_loc_addr, &inet6_rsk(req)->loc_addr, 16);
+		memcpy(v->cpt_rmt_addr, &inet6_rsk(req)->rmt_addr, 16);
+		v->cpt_iif = inet6_rsk(req)->iif;
+#endif
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx)
+{
+	struct listen_sock *lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+	struct request_sock *req;
+	int i;
+
+	for (i=0; i<TCP_SYNQ_HSIZE; i++) {
+		for (req=lopt->syn_table[i]; req; req=req->dl_next) {
+			loff_t saved_obj;
+			cpt_push_object(&saved_obj, ctx);
+			dump_openreq(req, sk, index, ctx);
+			cpt_pop_object(&saved_obj, ctx);
+		}
+	}
+	return 0;
+}
+
+
+int cpt_kill_socket(struct sock *sk, cpt_context_t * ctx)
+{
+	if (sk->sk_state != TCP_CLOSE &&
+	    (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
+	    sk->sk_protocol == IPPROTO_TCP) {
+		if (sk->sk_state != TCP_LISTEN)
+			tcp_set_state(sk, TCP_CLOSE);
+		else
+			sk->sk_prot->disconnect(sk, 0);
+	}
+	return 0;
+}
+
+int cpt_dump_mcfilter(struct sock *sk, cpt_context_t *ctx)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_mc_socklist *iml;
+
+	for (iml = inet->mc_list; iml; iml = iml->next) {
+		struct cpt_sockmc_image smi;
+		int scnt = 0;
+		int i;
+
+		if (iml->sflist)
+			scnt = iml->sflist->sl_count*16;
+
+		smi.cpt_next = sizeof(smi) + scnt;
+		smi.cpt_object = CPT_OBJ_SOCK_MCADDR;
+		smi.cpt_hdrlen = sizeof(smi);
+		smi.cpt_content = CPT_CONTENT_DATA;
+
+		smi.cpt_family = AF_INET;
+		smi.cpt_mode = iml->sfmode;
+		smi.cpt_ifindex = iml->multi.imr_ifindex;
+		memset(&smi.cpt_mcaddr, 0, sizeof(smi.cpt_mcaddr));
+		smi.cpt_mcaddr[0] = iml->multi.imr_multiaddr.s_addr;
+
+		ctx->write(&smi, sizeof(smi), ctx);
+
+		for (i = 0; i < scnt; i++) {
+			u32 addr[4];
+			memset(&addr, 0, sizeof(addr));
+			addr[0] = iml->sflist->sl_addr[i];
+			ctx->write(&addr, sizeof(addr), ctx);
+		}
+	}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_mc_socklist *mcl;
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		for (mcl = np->ipv6_mc_list; mcl; mcl = mcl->next) {
+			struct cpt_sockmc_image smi;
+			int scnt = 0;
+			int i;
+
+			if (mcl->sflist)
+				scnt = mcl->sflist->sl_count*16;
+
+			smi.cpt_next = sizeof(smi) + scnt;
+			smi.cpt_object = CPT_OBJ_SOCK_MCADDR;
+			smi.cpt_hdrlen = sizeof(smi);
+			smi.cpt_content = CPT_CONTENT_DATA;
+
+			smi.cpt_family = AF_INET6;
+			smi.cpt_mode = mcl->sfmode;
+			smi.cpt_ifindex = mcl->ifindex;
+			memcpy(&smi.cpt_mcaddr, &mcl->addr, sizeof(smi.cpt_mcaddr));
+
+			ctx->write(&smi, sizeof(smi), ctx);
+			for (i = 0; i < scnt; i++)
+				ctx->write(&mcl->sflist->sl_addr[i], 16, ctx);
+		}
+	}
+#endif
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_syscalls.h linux-2.6.16.ovz/kernel/cpt/cpt_syscalls.h
--- linux-2.6.16/kernel/cpt/cpt_syscalls.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_syscalls.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,95 @@
+#include <linux/unistd.h>
+#include <linux/syscalls.h>
+#include <asm/uaccess.h>
+
+#define WRAP(c, args) return sys_##c args
+#define WRAP2(c, args) int err; mm_segment_t oldfs; \
+	               oldfs = get_fs(); set_fs(KERNEL_DS); \
+                       err = sys_##c args ;\
+                       set_fs(oldfs); \
+                       return err
+
+static inline int sc_close(int fd)
+{
+	WRAP(close, (fd));
+}
+
+static inline int sc_dup2(int fd1, int fd2)
+{
+	WRAP(dup2, (fd1, fd2));
+}
+
+static inline int sc_unlink(char *name)
+{
+	WRAP2(unlink, (name));
+}
+
+static inline int sc_pipe(int *pfd)
+{
+	return do_pipe(pfd);
+}
+
+static inline int sc_mknod(char *name, int mode, int dev)
+{
+	WRAP2(mknod, (name, mode, dev));
+}
+
+static inline int sc_chmod(char *name, int mode)
+{
+	WRAP2(mkdir, (name, mode));
+}
+
+static inline int sc_chown(char *name, int uid, int gid)
+{
+	WRAP2(chown, (name, uid, gid));
+}
+
+static inline int sc_mkdir(char *name, int mode)
+{
+	WRAP2(mkdir, (name, mode));
+}
+
+static inline int sc_rmdir(char *name)
+{
+	WRAP2(rmdir, (name));
+}
+
+static inline int sc_mount(char *mntdev, char *mntpnt, char *type, unsigned long flags)
+{
+	WRAP2(mount, (mntdev ? : "none", mntpnt, type, flags, NULL));
+}
+
+static inline int sc_mprotect(unsigned long start, size_t len,
+			      unsigned long prot)
+{
+	WRAP(mprotect, (start, len, prot));
+}
+
+static inline int sc_mlock(unsigned long start, size_t len)
+{
+	WRAP(mlock, (start, len));
+}
+
+static inline int sc_munlock(unsigned long start, size_t len)
+{
+	WRAP(munlock, (start, len));
+}
+
+static inline int sc_remap_file_pages(unsigned long start, size_t len,
+				      unsigned long prot, unsigned long pgoff,
+				      unsigned long flags)
+{
+	WRAP(remap_file_pages, (start, len, prot, pgoff, flags));
+}
+
+static inline int sc_waitx(int pid, int opt)
+{
+	WRAP(wait4, (pid, NULL, opt, NULL));
+}
+
+static inline int sc_flock(int fd, int flags)
+{
+	WRAP(flock, (fd, flags));
+}
+
+extern int sc_execve(char *cms, char **argv, char **env);
diff -uprN linux-2.6.16/kernel/cpt/cpt_sysvipc.c linux-2.6.16.ovz/kernel/cpt/cpt_sysvipc.c
--- linux-2.6.16/kernel/cpt/cpt_sysvipc.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_sysvipc.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,317 @@
+/*
+ *
+ *  kernel/cpt/cpt_sysvipc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+
+struct _warg {
+		struct file			*file;
+		struct cpt_sysvshm_image	*v;
+};
+
+static int dump_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	struct _warg *warg = arg;
+	struct cpt_sysvshm_image *v = (struct cpt_sysvshm_image *)warg->v;
+
+	if (shp->shm_file != warg->file)
+		return 0;
+
+	v->cpt_key = shp->shm_perm.key;
+	v->cpt_uid = shp->shm_perm.uid;
+	v->cpt_gid = shp->shm_perm.gid;
+	v->cpt_cuid = shp->shm_perm.cuid;
+	v->cpt_cgid = shp->shm_perm.cgid;
+	v->cpt_mode = shp->shm_perm.mode;
+	v->cpt_seq = shp->shm_perm.seq;
+
+	v->cpt_id = shp->id;
+	v->cpt_segsz = shp->shm_segsz;
+	v->cpt_atime = shp->shm_atim;
+	v->cpt_ctime = shp->shm_ctim;
+	v->cpt_dtime = shp->shm_dtim;
+	v->cpt_creator = shp->shm_cprid;
+	v->cpt_last = shp->shm_lprid;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+	v->cpt_mlockuser = shp->mlock_user ? shp->mlock_user->uid : -1;
+#else
+	v->cpt_mlockuser = -1;
+#endif
+	return 1;
+}
+
+int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx)
+{
+	struct cpt_sysvshm_image *v = cpt_get_buf(ctx);
+	struct _warg warg;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_SYSV_SHM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	warg.file = file;
+	warg.v = v;
+	if (sysvipc_walk_shm(dump_one_shm, &warg) == 0) {
+		cpt_release_buf(ctx);
+		return -ESRCH;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+
+int match_sem(int id, struct sem_array *sema, void *arg)
+{
+	if (id != (unsigned long)arg)
+		return 0;
+	return sema->sem_nsems + 1;
+}
+
+static int get_sem_nsem(int id, cpt_context_t *ctx)
+{
+	int res;
+	res = sysvipc_walk_sem(match_sem, (void*)(unsigned long)id);
+	if (res > 0)
+		return res - 1;
+	eprintk_ctx("get_sem_nsem: SYSV semaphore %d not found\n", id);
+	return -ESRCH;
+}
+
+static int dump_one_semundo(struct sem_undo *su, struct cpt_context *ctx)
+{
+	struct cpt_sysvsem_undo_image v;
+	loff_t saved_obj;
+
+	cpt_open_object(NULL, ctx);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_SYSVSEM_UNDO_REC;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_SEMUNDO;
+	v.cpt_id = su->semid;
+	v.cpt_nsem = get_sem_nsem(su->semid, ctx);
+	if ((int)v.cpt_nsem < 0)
+		return -ESRCH;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	ctx->write(su->semadj, v.cpt_nsem*sizeof(short), ctx);
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+struct sem_warg {
+	int				last_id;
+	struct cpt_sysvsem_image	*v;
+};
+
+static int dump_one_sem(int id, struct sem_array *sma, void *arg)
+{
+	struct sem_warg * warg = (struct sem_warg *)arg;
+	struct cpt_sysvsem_image *v = warg->v;
+	int i;
+
+	if (warg->last_id != -1) {
+		if ((id % IPCMNI) <= warg->last_id)
+			return 0;
+	}
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_SYSV_SEM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_SEMARRAY;
+
+	v->cpt_key = sma->sem_perm.key;
+	v->cpt_uid = sma->sem_perm.uid;
+	v->cpt_gid = sma->sem_perm.gid;
+	v->cpt_cuid = sma->sem_perm.cuid;
+	v->cpt_cgid = sma->sem_perm.cgid;
+	v->cpt_mode = sma->sem_perm.mode;
+	v->cpt_seq = sma->sem_perm.seq;
+
+	v->cpt_id = id;
+	v->cpt_ctime = sma->sem_ctime;
+	v->cpt_otime = sma->sem_otime;
+
+	for (i=0; i<sma->sem_nsems; i++) {
+		struct {
+			__u32 semval;
+			__u32 sempid;
+		} *s = (void*)v + v->cpt_next;
+		if (v->cpt_next >= PAGE_SIZE - sizeof(*s))
+			return -EINVAL;
+		s->semval = sma->sem_base[i].semval;
+		s->sempid = sma->sem_base[i].sempid;
+		v->cpt_next += sizeof(*s);
+	}
+
+	warg->last_id = id % IPCMNI;
+	return 1;
+}
+
+
+int cpt_dump_sysvsem(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	struct sem_warg warg;
+
+	/* Dumping semaphores is quite tricky because we cannot
+	 * write to dump file under lock inside sysvipc_walk_sem().
+	 */
+	cpt_open_section(ctx, CPT_SECT_SYSV_SEM);
+	warg.last_id = -1;
+	warg.v = cpt_get_buf(ctx);
+	for (;;) {
+		if (sysvipc_walk_sem(dump_one_sem, &warg) <= 0)
+			break;
+		ctx->write(warg.v, warg.v->cpt_next, ctx);
+	}
+	cpt_release_buf(ctx);
+	cpt_close_section(ctx);
+
+	cpt_open_section(ctx, CPT_SECT_SYSVSEM_UNDO);
+	for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
+		struct sem_undo_list *semu = obj->o_obj;
+		struct sem_undo *su;
+		struct cpt_object_hdr v;
+		loff_t saved_obj;
+
+		cpt_open_object(obj, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_SYSVSEM_UNDO;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_ARRAY;
+
+		ctx->write(&v, sizeof(v), ctx);
+
+		cpt_push_object(&saved_obj, ctx);
+		for (su = semu->proc_list; su; su = su->proc_next) {
+			if (su->semid != -1) {
+				int err;
+				err = dump_one_semundo(su, ctx);
+				if (err < 0)
+					return err;
+			}
+		}
+		cpt_pop_object(&saved_obj, ctx);
+
+		cpt_close_object(ctx);
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int collect_one_msg(int id, struct msg_queue *msq, void *arg)
+{
+	int *retp = arg;
+	(*retp)++;
+	return 0;
+}
+
+int cpt_collect_sysvmsg(cpt_context_t * ctx)
+{
+	int ret = 0;
+	sysvipc_walk_msg(collect_one_msg, &ret);
+	if (ret) {
+		eprintk_ctx("SYSV msgqueues are not supported, found %d\n", ret);
+		return -EBUSY;
+	}
+	return 0;
+}
+
+static int cpt_collect_sysvsem_undo(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->exit_state) {
+			/* ipc/sem.c forgets to clear tsk->sysvsem.undo_list
+			 * on exit. Grrr... */
+			continue;
+		}
+		if (tsk->sysvsem.undo_list &&
+		    cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
+		struct sem_undo_list *semu = obj->o_obj;
+
+		if (atomic_read(&semu->refcnt) != obj->o_count) {
+			eprintk_ctx("sem_undo_list is referenced outside %d %d\n", obj->o_count, atomic_read(&semu->refcnt));
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+static int collect_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	cpt_context_t *ctx = arg;
+
+	if (__cpt_object_add(CPT_OBJ_FILE, shp->shm_file, GFP_ATOMIC, ctx) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+int cpt_collect_sysvshm(cpt_context_t * ctx)
+{
+	int err;
+
+	err = sysvipc_walk_shm(collect_one_shm, ctx);
+
+	return err < 0 ? err : 0;
+}
+
+int cpt_collect_sysv(cpt_context_t * ctx)
+{
+	int err;
+
+	err = cpt_collect_sysvsem_undo(ctx);
+	if (err)
+		return err;
+	err = cpt_collect_sysvmsg(ctx);
+	if (err)
+		return err;
+	err = cpt_collect_sysvshm(ctx);
+	if (err)
+		return err;
+
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_tty.c linux-2.6.16.ovz/kernel/cpt/cpt_tty.c
--- linux-2.6.16/kernel/cpt/cpt_tty.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_tty.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,216 @@
+/*
+ *
+ *  kernel/cpt/cpt_tty.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/tty.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+/* We must support at least N_TTY. */
+
+int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx)
+{
+	struct tty_struct *tty = file->private_data;
+	cpt_object_t *obj;
+	struct cpt_obj_ref o;
+	loff_t saved_pos;
+
+	obj = lookup_cpt_object(CPT_OBJ_TTY, tty, ctx);
+	if (!obj)
+		return -EINVAL;
+
+	cpt_push_object(&saved_pos, ctx);
+
+	o.cpt_next = sizeof(o);
+	o.cpt_object = CPT_OBJ_REF;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_VOID;
+	o.cpt_pos = obj->o_pos;
+	ctx->write(&o, sizeof(o), ctx);
+
+	cpt_pop_object(&saved_pos, ctx);
+
+	return 0;
+}
+
+int cpt_collect_tty(struct file *file, cpt_context_t * ctx)
+{
+	struct tty_struct *tty = file->private_data;
+
+	if (tty) {
+		if (cpt_object_add(CPT_OBJ_TTY, tty, ctx) == NULL)
+			return -ENOMEM;
+		if (tty->link) {
+			cpt_object_t *obj;
+
+			obj = cpt_object_add(CPT_OBJ_TTY, tty->link, ctx);
+			if (obj == NULL)
+				return -ENOMEM;
+			/* Undo o_count, tty->link is not a reference */
+			obj->o_count--;
+		}
+	}
+	return 0;
+}
+
+int cpt_dump_tty(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct tty_struct *tty = obj->o_obj;
+	struct cpt_tty_image *v;
+
+	if (tty->link) {
+		if (lookup_cpt_object(CPT_OBJ_TTY, tty->link, ctx) == NULL) {
+			eprintk_ctx("orphan pty %s %d\n", tty->name, tty->driver->subtype == PTY_TYPE_SLAVE);
+			return -EINVAL;
+		}
+		if (tty->link->link != tty) {
+			eprintk_ctx("bad pty pair\n");
+			return -EINVAL;
+		}
+		if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+		    tty->driver->subtype == PTY_TYPE_SLAVE &&
+		    tty->link->count)
+			obj->o_count++;
+	}
+	if (obj->o_count != tty->count) {
+		eprintk_ctx("tty %s is referenced outside %d %d\n", tty->name, obj->o_count, tty->count);
+		return -EBUSY;
+	}
+
+	cpt_open_object(obj, ctx);
+
+	v = cpt_get_buf(ctx);
+	v->cpt_next = -1;
+	v->cpt_object = CPT_OBJ_TTY;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_index = tty->index;
+	v->cpt_link = -1;
+	if (tty->link)
+		v->cpt_link = tty->link->index;
+	v->cpt_drv_type = tty->driver->type;
+	v->cpt_drv_subtype = tty->driver->subtype;
+	v->cpt_drv_flags = tty->driver->flags;
+	v->cpt_packet = tty->packet;
+	v->cpt_stopped = tty->stopped;
+	v->cpt_hw_stopped = tty->hw_stopped;
+	v->cpt_flow_stopped = tty->flow_stopped;
+	v->cpt_flags = tty->flags;
+	v->cpt_ctrl_status = tty->ctrl_status;
+	v->cpt_canon_data = tty->canon_data;
+	v->cpt_canon_head = tty->canon_head - tty->read_tail;
+	v->cpt_canon_column = tty->canon_column;
+	v->cpt_column = tty->column;
+	v->cpt_erasing = tty->erasing;
+	v->cpt_lnext = tty->lnext;
+	v->cpt_icanon = tty->icanon;
+	v->cpt_raw = tty->raw;
+	v->cpt_real_raw = tty->real_raw;
+	v->cpt_closing = tty->closing;
+	v->cpt_minimum_to_wake = tty->minimum_to_wake;
+	v->cpt_pgrp = 0;
+	if (tty->pgrp > 0) {
+		v->cpt_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, tty->pgrp);
+		if ((int)v->cpt_pgrp < 0) {
+			dprintk_ctx("cannot map tty->pgrp %d -> %d\n", tty->pgrp, (int)v->cpt_pgrp);
+			v->cpt_pgrp = -1;
+		}
+	}
+	v->cpt_session = 0;
+	if (tty->session > 0) {
+		v->cpt_session = _pid_type_to_vpid(PIDTYPE_SID, tty->session);
+		if ((int)v->cpt_session < 0) {
+			eprintk_ctx("cannot map tty->session %d -> %d\n", tty->session, (int)v->cpt_session);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	memcpy(v->cpt_name, tty->name, 64);
+	v->cpt_ws_row = tty->winsize.ws_row;
+	v->cpt_ws_col = tty->winsize.ws_col;
+	v->cpt_ws_prow = tty->winsize.ws_ypixel;
+	v->cpt_ws_pcol = tty->winsize.ws_xpixel;
+	if (tty->termios == NULL) {
+		eprintk_ctx("NULL termios");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_c_line = tty->termios->c_line;
+	v->cpt_c_iflag = tty->termios->c_iflag;
+	v->cpt_c_oflag = tty->termios->c_oflag;
+	v->cpt_c_cflag = tty->termios->c_cflag;
+	v->cpt_c_lflag = tty->termios->c_lflag;
+	memcpy(v->cpt_c_cc, tty->termios->c_cc, NCCS);
+	if (NCCS < 32)
+		memset(v->cpt_c_cc + NCCS, 255, 32 - NCCS);
+	memcpy(v->cpt_read_flags, tty->read_flags, sizeof(v->cpt_read_flags));
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (tty->read_buf && tty->read_cnt) {
+		struct cpt_obj_bits *v = cpt_get_buf(ctx);
+		loff_t saved_pos;
+
+		cpt_push_object(&saved_pos, ctx);
+		cpt_open_object(NULL, ctx);
+		v->cpt_next = CPT_NULL;
+		v->cpt_object = CPT_OBJ_BITS;
+		v->cpt_hdrlen = sizeof(*v);
+		v->cpt_content = CPT_CONTENT_DATA;
+		v->cpt_size = tty->read_cnt;
+		ctx->write(v, sizeof(*v), ctx);
+		cpt_release_buf(ctx);
+
+		if (tty->read_cnt) {
+			int n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
+			ctx->write(tty->read_buf + tty->read_tail, n, ctx);
+			if (tty->read_cnt > n)
+				ctx->write(tty->read_buf, tty->read_cnt-n, ctx);
+			ctx->align(ctx);
+		}
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_pos, ctx);
+	}
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx)
+{
+	struct tty_struct * tty;
+	struct fasync_struct *fa;
+
+	tty = (struct tty_struct *)file->private_data;
+
+	for (fa = tty->fasync; fa; fa = fa->fa_next) {
+		if (fa->fa_file == file)
+			return fa->fa_fd;
+	}
+	return -1;
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_ubc.c linux-2.6.16.ovz/kernel/cpt/cpt_ubc.c
--- linux-2.6.16/kernel/cpt/cpt_ubc.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_ubc.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,132 @@
+/*
+ *
+ *  kernel/cpt/cpt_ubc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/types.h>
+#include <ub/beancounter.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = cpt_object_add(CPT_OBJ_UBC, bc, ctx);
+	if (obj != NULL) {
+		if (obj->o_count == 1)
+			get_beancounter(bc);
+		if (bc->parent != NULL && obj->o_parent == NULL)
+			obj->o_parent = cpt_add_ubc(bc->parent, ctx);
+	}
+	return obj;
+}
+
+__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(CPT_OBJ_UBC, bc, ctx);
+	if (obj == NULL) {
+		char buf[48];
+		print_ub_uid(bc, buf, sizeof(buf));
+		printk(KERN_ERR "CPT: unknown ub %s (%p)\n", buf, bc);
+		dump_stack();
+		return CPT_NULL;
+	}
+	return obj->o_pos;
+}
+
+static void dump_one_bc_parm(__u64 *dmp, struct ubparm *prm, int held)
+{
+	dmp[0] = (prm->barrier < UB_MAXVALUE ? prm->barrier : CPT_NULL);
+	dmp[1] = (prm->limit < UB_MAXVALUE ? prm->limit : CPT_NULL);
+	dmp[2] = (held ? prm->held : CPT_NULL);
+	dmp[3] = prm->maxheld;
+	dmp[4] = prm->minheld;
+	dmp[5] = prm->failcnt;
+}
+
+static int dump_one_bc(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct user_beancounter *bc;
+	struct cpt_beancounter_image *v;
+	int i;
+
+	bc = obj->o_obj;
+	v = cpt_get_buf(ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_UBC;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	if (obj->o_parent != NULL)
+		v->cpt_parent = ((cpt_object_t *)obj->o_parent)->o_pos;
+	else
+		v->cpt_parent = CPT_NULL;
+	v->cpt_id = (obj->o_parent != NULL) ? bc->ub_uid : 0;
+	for (i = 0; i < UB_RESOURCES; i++)
+		dump_one_bc_parm(v->cpt_parms, bc->ub_parms, 0);
+	for (i = 0; i < UB_RESOURCES; i++)
+		dump_one_bc_parm(v->cpt_parms + UB_RESOURCES * 6,
+				bc->ub_store, 1);
+	memset(v->cpt_parms + UB_RESOURCES * 12, 0,
+			sizeof(v->cpt_parms)
+				- UB_RESOURCES * 12 * sizeof(v->cpt_parms[0]));
+
+	cpt_open_object(obj, ctx);
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_close_object(ctx);
+
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_dump_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	int skipped;
+	int top;
+
+	cpt_open_section(ctx, CPT_SECT_UBC);
+
+	do {
+		skipped = 0;
+		top = 0;
+		for_each_object(obj, CPT_OBJ_UBC) {
+			if (obj->o_parent == NULL)
+				top++;
+			if (obj->o_pos != CPT_NULL)
+				continue;
+			if (obj->o_parent != NULL &&
+			    ((cpt_object_t *)obj->o_parent)->o_pos == CPT_NULL)
+				skipped++;
+			else
+				dump_one_bc(obj, ctx);
+		}
+	} while (skipped && (top < 2));
+
+	cpt_close_section(ctx);
+	if (top > 1) {
+		eprintk_ctx("More than one top level ub exist");
+		return -EINVAL;
+	}
+		
+	return 0;
+}
+
+void cpt_finish_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_UBC)
+		put_beancounter(obj->o_obj);
+}
diff -uprN linux-2.6.16/kernel/cpt/cpt_ubc.h linux-2.6.16.ovz/kernel/cpt/cpt_ubc.h
--- linux-2.6.16/kernel/cpt/cpt_ubc.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_ubc.h	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,9 @@
+cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
+__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
+int cpt_dump_ubc(struct cpt_context *ctx);
+
+struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx);
+int rst_undump_ubc(struct cpt_context *ctx);
+
+void cpt_finish_ubc(struct cpt_context *ctx);
+void rst_finish_ubc(struct cpt_context *ctx);
diff -uprN linux-2.6.16/kernel/cpt/cpt_x8664.S linux-2.6.16.ovz/kernel/cpt/cpt_x8664.S
--- linux-2.6.16/kernel/cpt/cpt_x8664.S	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/cpt_x8664.S	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,61 @@
+#define ASSEMBLY 1
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/dwarf2.h>
+#include <asm/calling.h>
+#include <asm/msr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/hw_irq.h>
+#include <asm/errno.h>
+
+	.code64
+
+	.macro FAKE_STACK_FRAME child_rip
+	/* push in order ss, rsp, eflags, cs, rip */
+	xorq %rax, %rax
+	pushq %rax /* ss */
+	pushq %rax /* rsp */
+	pushq $(1<<9) /* eflags - interrupts on */
+	pushq $__KERNEL_CS /* cs */
+	pushq \child_rip /* rip */
+	pushq	%rax /* orig rax */
+	.endm
+
+	.macro UNFAKE_STACK_FRAME
+	addq $8*6, %rsp
+	.endm
+
+ENTRY(asm_kernel_thread)
+	FAKE_STACK_FRAME $child_rip
+	SAVE_ALL
+
+	# rdi: flags, rsi: usp, rdx: will be &pt_regs
+	movq %rdx,%rdi
+	orq  $0x00800000,%rdi
+	movq $-1, %rsi
+	movq %rsp, %rdx
+
+	xorl %r8d,%r8d
+	xorl %r9d,%r9d
+	pushq %rcx
+	call do_fork_pid
+	addq $8, %rsp
+	/* call do_fork */
+	movq %rax,RAX(%rsp)
+	xorl %edi,%edi
+	RESTORE_ALL
+	UNFAKE_STACK_FRAME
+	ret
+
+child_rip:
+	movq %rdi, %rax
+	movq %rsi, %rdi
+	call *%rax
+	xorq %rdi, %rdi
+	xorq %rsi, %rsi
+	call complete_and_exit
diff -uprN linux-2.6.16/kernel/cpt/rst_conntrack.c linux-2.6.16.ovz/kernel/cpt/rst_conntrack.c
--- linux-2.6.16/kernel/cpt/rst_conntrack.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_conntrack.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,294 @@
+/*
+ *
+ *  kernel/cpt/rst_conntrack.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/unistd.h>
+#include <linux/ve.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+
+#define ASSERT_READ_LOCK(x) do { } while (0)
+#define ASSERT_WRITE_LOCK(x) do { } while (0)
+
+#include <linux/netfilter_ipv4/listhelp.h>
+
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+struct ct_holder
+{
+	struct ct_holder *next;
+	struct ip_conntrack *ct;
+	int index;
+};
+
+static void decode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple, int dir)
+{
+	tuple->dst.ip = v->cpt_dst;
+	tuple->dst.u.all = v->cpt_dstport;
+	tuple->dst.protonum = v->cpt_protonum;
+	tuple->dst.dir = v->cpt_dir;
+	if (dir != tuple->dst.dir)
+		wprintk("dir != tuple->dst.dir\n");
+
+	tuple->src.ip = v->cpt_src;
+	tuple->src.u.all = v->cpt_srcport;
+}
+
+
+static int undump_expect_list(struct ip_conntrack *ct,
+			      struct cpt_ip_conntrack_image *ci,
+			      loff_t pos, struct ct_holder *ct_list,
+			      cpt_context_t *ctx)
+{
+	loff_t end;
+	int err;
+
+	end = pos + ci->cpt_next;
+	pos += ci->cpt_hdrlen;
+	while (pos < end) {
+		struct cpt_ip_connexpect_image v;
+		struct ip_conntrack_expect *exp;
+		struct ip_conntrack *sibling;
+
+		err = rst_get_object(CPT_OBJ_NET_CONNTRACK_EXPECT, pos, &v, ctx);
+		if (err)
+			return err;
+
+		sibling = NULL;
+		if (v.cpt_sibling_conntrack) {
+			struct ct_holder *c;
+
+			for (c = ct_list; c; c = c->next) {
+				if (c->index == v.cpt_sibling_conntrack) {
+					sibling = c->ct;
+					break;
+				}
+			}
+			if (!sibling) {
+				eprintk_ctx("lost sibling of expectation\n");
+				return -EINVAL;
+			}
+		}
+
+		write_lock_bh(&ip_conntrack_lock);
+
+		/* It is possible. Helper module could be just unregistered,
+		 * if expectation were on the list, it would be destroyed. */
+		if (ct->helper == NULL) {
+			write_unlock_bh(&ip_conntrack_lock);
+			dprintk_ctx("conntrack: no helper and non-trivial expectation\n");
+			continue;
+		}
+
+		exp = ip_conntrack_expect_alloc(NULL);
+		if (exp == NULL) {
+			write_unlock_bh(&ip_conntrack_lock);
+			return -ENOMEM;
+		}
+
+		if (ct->helper->timeout && !del_timer(&exp->timeout)) {
+			/* Dying already. We can do nothing. */
+			write_unlock_bh(&ip_conntrack_lock);
+			dprintk_ctx("conntrack expectation is dying\n");
+			continue;
+		}
+
+		decode_tuple(&v.cpt_tuple, &exp->tuple, 0);
+		decode_tuple(&v.cpt_mask, &exp->mask, 0);
+
+		exp->master = ct;
+		nf_conntrack_get(&ct->ct_general);
+		ip_conntrack_expect_insert(exp);
+#if 0
+		if (sibling) {
+			exp->sibling = sibling;
+			sibling->master = exp;
+			LIST_DELETE(&ve_ip_conntrack_expect_list, exp);
+			ct->expecting--;
+			nf_conntrack_get(&master_ct(sibling)->infos[0]);
+		} else
+#endif
+		if (ct->helper->timeout) {
+			exp->timeout.expires = jiffies + v.cpt_timeout;
+			add_timer(&exp->timeout);
+		}
+		write_unlock_bh(&ip_conntrack_lock);
+
+		pos += v.cpt_next;
+	}
+	return 0;
+}
+
+static int undump_one_ct(struct cpt_ip_conntrack_image *ci, loff_t pos,
+			 struct ct_holder **ct_list, cpt_context_t *ctx)
+{
+	int err = 0;
+	struct ip_conntrack *conntrack;
+	struct ct_holder *c;
+	struct ip_conntrack_tuple orig, repl;
+
+	c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
+	if (c == NULL)
+		return -ENOMEM;
+
+	decode_tuple(&ci->cpt_tuple[0], &orig, 0);
+	decode_tuple(&ci->cpt_tuple[1], &repl, 1);
+
+	conntrack = ip_conntrack_alloc(&orig, &repl, get_exec_env()->_ip_conntrack->ub);
+	if (!conntrack || IS_ERR(conntrack)) {
+		kfree(c);
+		return -ENOMEM;
+	}
+
+	c->ct = conntrack;
+	c->next = *ct_list;
+	*ct_list = c;
+	c->index = ci->cpt_index;
+
+	decode_tuple(&ci->cpt_tuple[0], &conntrack->tuplehash[0].tuple, 0);
+	decode_tuple(&ci->cpt_tuple[1], &conntrack->tuplehash[1].tuple, 1);
+
+	conntrack->status = ci->cpt_status;
+
+	memcpy(&conntrack->proto, ci->cpt_proto_data, sizeof(conntrack->proto));
+	memcpy(&conntrack->help, ci->cpt_help_data, sizeof(conntrack->help));
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+	conntrack->nat.masq_index = ci->cpt_masq_index;
+#endif
+	if (ci->cpt_initialized) {
+		conntrack->nat.info.seq[0].correction_pos = ci->cpt_nat_seq[0].cpt_correction_pos;
+		conntrack->nat.info.seq[0].offset_before = ci->cpt_nat_seq[0].cpt_offset_before;
+		conntrack->nat.info.seq[0].offset_after = ci->cpt_nat_seq[0].cpt_offset_after;
+		conntrack->nat.info.seq[1].correction_pos = ci->cpt_nat_seq[1].cpt_correction_pos;
+		conntrack->nat.info.seq[1].offset_before = ci->cpt_nat_seq[1].cpt_offset_before;
+		conntrack->nat.info.seq[1].offset_after = ci->cpt_nat_seq[1].cpt_offset_after;
+	}
+	if (conntrack->status & IPS_NAT_DONE_MASK)
+		ip_nat_hash_conntrack(conntrack);
+#endif
+
+	write_lock_bh(&ip_conntrack_lock);
+
+	if (ci->cpt_ct_helper) {
+		conntrack->helper = ip_conntrack_helper_find_get(&conntrack->tuplehash[1].tuple);
+		if (conntrack->helper == NULL) {
+			eprintk_ctx("conntrack: cannot find helper, some module is not loaded\n");
+			err = -EINVAL;
+		}
+	}
+
+	ip_conntrack_hash_insert(conntrack);
+	conntrack->timeout.expires = jiffies + ci->cpt_timeout;
+
+	write_unlock_bh(&ip_conntrack_lock);
+
+	if (err == 0 && ci->cpt_next > ci->cpt_hdrlen)
+		err = undump_expect_list(conntrack, ci, pos, *ct_list, ctx);
+
+	return err;
+}
+
+int rst_restore_ip_conntrack(struct cpt_context * ctx)
+{
+	int err = 0;
+	loff_t sec = ctx->sections[CPT_SECT_NET_CONNTRACK];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_ip_conntrack_image ci;
+	struct ct_holder *c;
+	struct ct_holder *ct_list = NULL;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	if (sizeof(ci.cpt_proto_data) != sizeof(union ip_conntrack_proto)) {
+		eprintk_ctx("conntrack module ct->proto version mismatch\n");
+		return -EINVAL;
+	}
+	if (sizeof(ci.cpt_help_data) != sizeof(union ip_conntrack_help)) {
+		eprintk_ctx("conntrack module ct->help version mismatch\n");
+		return -EINVAL;
+	}
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_CONNTRACK || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		err = rst_get_object(CPT_OBJ_NET_CONNTRACK, sec, &ci, ctx);
+		if (err)
+			break;
+		err = undump_one_ct(&ci, sec, &ct_list, ctx);
+		if (err)
+			break;
+		sec += ci.cpt_next;
+	}
+
+	while ((c = ct_list) != NULL) {
+		ct_list = c->next;
+		if (c->ct)
+			add_timer(&c->ct->timeout);
+		kfree(c);
+	}
+
+	return err;
+}
+
+#else
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+int rst_restore_ip_conntrack(struct cpt_context * ctx)
+{
+	if (ctx->sections[CPT_SECT_NET_CONNTRACK] != CPT_NULL)
+		return -EINVAL;
+	return 0;
+}
+
+#endif
diff -uprN linux-2.6.16/kernel/cpt/rst_context.c linux-2.6.16.ovz/kernel/cpt/rst_context.c
--- linux-2.6.16/kernel/cpt/rst_context.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_context.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,315 @@
+/*
+ *
+ *  kernel/cpt/rst_context.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+static ssize_t file_read(void *addr, size_t count, struct cpt_context *ctx)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->read(file, addr, count, &file->f_pos);
+	set_fs(oldfs);
+	if (err != count)
+		return err >= 0 ? -EIO : err;
+	return 0;
+}
+
+static ssize_t file_pread(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->read(file, addr, count, &pos);
+	set_fs(oldfs);
+	if (err != count)
+		return err >= 0 ? -EIO : err;
+	return 0;
+}
+
+static void file_align(struct cpt_context *ctx)
+{
+	struct file *file = ctx->file;
+
+	if (file)
+		file->f_pos = CPT_ALIGN(file->f_pos);
+}
+
+int rst_get_section(int type, struct cpt_context *ctx, loff_t *start, loff_t *end)
+{
+	struct cpt_section_hdr hdr;
+	int err;
+	loff_t pos;
+
+	pos = ctx->sections[type];
+	*start = *end = pos;
+
+	if (pos != CPT_NULL) {
+		if ((err = ctx->pread(&hdr, sizeof(hdr), ctx, pos)) != 0)
+			return err;
+		if (hdr.cpt_section != type || hdr.cpt_hdrlen < sizeof(hdr))
+			return -EINVAL;
+		*start = pos + hdr.cpt_hdrlen;
+		*end = pos + hdr.cpt_next;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(rst_get_section);
+
+void rst_context_init(struct cpt_context *ctx)
+{
+	int i;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	init_MUTEX(&ctx->main_sem);
+	ctx->refcount = 1;
+
+	ctx->current_section = -1;
+	ctx->current_object = -1;
+	ctx->pagesize = PAGE_SIZE;
+	ctx->read = file_read;
+	ctx->pread = file_pread;
+	ctx->align = file_align;
+	for (i=0; i < CPT_SECT_MAX; i++)
+		ctx->sections[i] = CPT_NULL;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	init_completion(&ctx->pgin_notify);
+#endif
+	cpt_object_init(ctx);
+}
+
+static int parse_sections(loff_t start, loff_t end, cpt_context_t *ctx)
+{
+	struct cpt_section_hdr h;
+
+	while (start < end) {
+		int err;
+
+		err = ctx->pread(&h, sizeof(h), ctx, start);
+		if (err)
+			return err;
+		if (h.cpt_hdrlen < sizeof(h) ||
+		    h.cpt_next < h.cpt_hdrlen ||
+		    start + h.cpt_next > end)
+			return -EINVAL;
+		if (h.cpt_section >= CPT_SECT_MAX)
+			return -EINVAL;
+		ctx->sections[h.cpt_section] = start;
+		start += h.cpt_next;
+	}
+	return 0;
+}
+
+int rst_open_dumpfile(struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_major_tail *v;
+	struct cpt_major_hdr  h;
+	unsigned long size;
+
+	err = -EBADF;
+	if (!ctx->file)
+		goto err_out;
+
+	err = -ENOMEM;
+	ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
+	if (ctx->tmpbuf == NULL)
+		goto err_out;
+	__cpt_release_buf(ctx);
+
+	size = ctx->file->f_dentry->d_inode->i_size;
+
+	if (size & 7) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	if (size < sizeof(struct cpt_major_hdr) +
+	    sizeof(struct cpt_major_tail)) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	err = ctx->pread(&h, sizeof(h), ctx, 0);
+	if (err) {
+		eprintk_ctx("too short image 1 %d\n", err);
+		goto err_out;
+	}
+	if (h.cpt_signature[0] != CPT_SIGNATURE0 ||
+	    h.cpt_signature[1] != CPT_SIGNATURE1 ||
+	    h.cpt_signature[2] != CPT_SIGNATURE2 ||
+	    h.cpt_signature[3] != CPT_SIGNATURE3) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	if (h.cpt_hz != HZ) {
+		err = -EINVAL;
+		eprintk_ctx("HZ mismatch: %d != %d\n", h.cpt_hz, HZ);
+		goto err_out;
+	}
+	ctx->virt_jiffies64 = h.cpt_start_jiffies64;
+	ctx->start_time.tv_sec = h.cpt_start_sec;
+	ctx->start_time.tv_nsec = h.cpt_start_nsec;
+	ctx->kernel_config_flags = h.cpt_kernel_config[0];
+	ctx->iptables_mask = h.cpt_iptables_mask;
+	ctx->image_version = h.cpt_image_version;
+
+	v = cpt_get_buf(ctx);
+	err = ctx->pread(v, sizeof(*v), ctx, size - sizeof(*v));
+	if (err) {
+		eprintk_ctx("too short image 2 %d\n", err);
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+	if (v->cpt_signature[0] != CPT_SIGNATURE0 ||
+	    v->cpt_signature[1] != CPT_SIGNATURE1 ||
+	    v->cpt_signature[2] != CPT_SIGNATURE2 ||
+	    v->cpt_signature[3] != CPT_SIGNATURE3 ||
+	    v->cpt_nsect != CPT_SECT_MAX_INDEX) {
+		err = -EINVAL;
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+	if ((err = parse_sections(h.cpt_hdrlen, size - sizeof(*v) - sizeof(struct cpt_section_hdr), ctx)) < 0) {
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	ctx->lazypages = v->cpt_lazypages;
+#endif
+	ctx->tasks64 = v->cpt_64bit;
+	cpt_release_buf(ctx);
+	return 0;
+
+err_out:
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+	return err;
+}
+
+void rst_close_dumpfile(struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		fput(ctx->file);
+		ctx->file = NULL;
+	}
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+}
+
+int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_object_hdr *hdr = tmp;
+	err = ctx->pread(hdr, sizeof(struct cpt_object_hdr), ctx, pos);
+	if (err)
+		return err;
+	if (type > 0 && type != hdr->cpt_object)
+		return -EINVAL;
+	if (hdr->cpt_hdrlen > hdr->cpt_next)
+		return -EINVAL;
+	if (hdr->cpt_hdrlen < sizeof(struct cpt_object_hdr))
+		return -EINVAL;
+	if (size < sizeof(*hdr))
+		return -EINVAL;
+	if (size > hdr->cpt_hdrlen)
+		size = hdr->cpt_hdrlen;
+	if (size > sizeof(*hdr))
+		err = ctx->pread(hdr+1, size - sizeof(*hdr),
+				 ctx, pos + sizeof(*hdr));
+	return err;
+}
+EXPORT_SYMBOL(_rst_get_object);
+
+void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	void *tmp;
+	struct cpt_object_hdr hdr;
+	err = ctx->pread(&hdr, sizeof(hdr), ctx, pos);
+	if (err)
+		return NULL;
+	if (type > 0 && type != hdr.cpt_object)
+		return NULL;
+	if (hdr.cpt_hdrlen > hdr.cpt_next)
+		return NULL;
+	if (hdr.cpt_hdrlen < sizeof(struct cpt_object_hdr))
+		return NULL;
+	tmp = kmalloc(hdr.cpt_hdrlen, GFP_KERNEL);
+	if (!tmp)
+		return NULL;
+	err = ctx->pread(tmp, hdr.cpt_hdrlen, ctx, pos);
+	if (!err)
+		return tmp;
+	kfree(tmp);
+	return NULL;
+}
+EXPORT_SYMBOL(__rst_get_object);
+
+__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_object_hdr hdr;
+	__u8 *name;
+
+	err = rst_get_object(CPT_OBJ_NAME, *pos_p, &hdr, ctx);
+	if (err)
+		return NULL;
+	if (hdr.cpt_next - hdr.cpt_hdrlen > PAGE_SIZE)
+		return NULL;
+	name = (void*)__get_free_page(GFP_KERNEL);
+	if (!name)
+		return NULL;
+	err = ctx->pread(name, hdr.cpt_next - hdr.cpt_hdrlen,
+		   ctx, *pos_p + hdr.cpt_hdrlen);
+	if (err) {
+		free_page((unsigned long)name);
+		return NULL;
+	}
+	*pos_p += hdr.cpt_next;
+	return name;
+}
+
+__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx)
+{
+	return __rst_get_name(&pos, ctx);
+}
+
+void rst_put_name(__u8 *name, struct cpt_context *ctx)
+{
+	unsigned long addr = (unsigned long)name;
+
+	if (addr)
+		free_page(addr&~(PAGE_SIZE-1));
+}
diff -uprN linux-2.6.16/kernel/cpt/rst_epoll.c linux-2.6.16.ovz/kernel/cpt/rst_epoll.c
--- linux-2.6.16/kernel/cpt/rst_epoll.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_epoll.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,173 @@
+/*
+ *
+ *  kernel/cpt/rst_epoll.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/eventpoll.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+/* Those funcations are static in fs/eventpoll.c */
+extern struct file_operations eventpoll_fops;
+extern int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+		     struct file *tfile, int fd);
+extern struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+extern void ep_release_epitem(struct epitem *epi);
+
+
+struct file *cpt_open_epolldev(struct cpt_file_image *fi,
+			       unsigned flags,
+			       struct cpt_context *ctx)
+{
+	struct file *file;
+	int efd;
+
+	/* Argument "size" is ignored, use just 1 */
+	efd = sys_epoll_create(1);
+	if (efd < 0)
+		return ERR_PTR(efd);
+
+	file = fget(efd);
+	sys_close(efd);
+	return file;
+}
+
+static int restore_one_epoll(cpt_object_t *obj,
+			     loff_t pos,
+			     struct cpt_epoll_image *ebuf,
+			     cpt_context_t *ctx)
+{
+	int err = 0;
+	loff_t endpos;
+	struct file *file = obj->o_obj;
+	struct eventpoll *ep;
+
+	if (file->f_op != &eventpoll_fops) {
+		eprintk_ctx("bad epoll file\n");
+		return -EINVAL;
+	}
+
+	ep = file->private_data;
+
+	if (unlikely(ep == NULL)) {
+		eprintk_ctx("bad epoll device\n");
+		return -EINVAL;
+	}
+
+	endpos = pos + ebuf->cpt_next;
+	pos += ebuf->cpt_hdrlen;
+	while (pos < endpos) {
+		struct cpt_epoll_file_image efi;
+		struct epoll_event epds;
+		
+		cpt_object_t *tobj;
+
+		err = rst_get_object(CPT_OBJ_EPOLL_FILE, pos, &efi, ctx);
+		if (err)
+			return err;
+		tobj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, efi.cpt_file, ctx);
+		if (!tobj) {
+			eprintk_ctx("epoll file not found\n");
+			return -EINVAL;
+		}
+		epds.events = efi.cpt_events;
+		epds.data = efi.cpt_data;
+		down_write(&ep->sem);
+		err = ep_insert(ep, &epds, tobj->o_obj, efi.cpt_fd);
+		if (!err) {
+			struct epitem *epi;
+			epi = ep_find(ep, tobj->o_obj, efi.cpt_fd);
+			if (epi) {
+				epi->revents = efi.cpt_revents;
+				if (efi.cpt_ready) {
+					unsigned long flags;
+					write_lock_irqsave(&ep->lock, flags);
+					if (list_empty(&epi->rdllink))
+						list_add_tail(&epi->rdllink, &ep->rdllist);
+					write_unlock_irqrestore(&ep->lock, flags);
+				}
+				ep_release_epitem(epi);
+			}
+		}
+		up_write(&ep->sem);
+		if (err)
+			break;
+		pos += efi.cpt_next;
+	}
+	return err;
+}
+
+int rst_eventpoll(cpt_context_t *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_EPOLL];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_EPOLL || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		cpt_object_t *obj;
+		struct cpt_epoll_image *ebuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_EPOLL, sec, ebuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, ebuf->cpt_file, ctx);
+		if (obj == NULL) {
+			eprintk_ctx("cannot find epoll file object\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		err = restore_one_epoll(obj, sec, ebuf, ctx);
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		sec += ebuf->cpt_next;
+	}
+
+	return 0;
+	
+}
diff -uprN linux-2.6.16/kernel/cpt/rst_files.c linux-2.6.16.ovz/kernel/cpt/rst_files.c
--- linux-2.6.16/kernel/cpt/rst_files.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_files.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,1453 @@
+/*
+ *
+ *  kernel/cpt/rst_files.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/mount.h>
+#include <linux/tty.h>
+#include <linux/namei.h>
+#include <linux/vmalloc.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <asm/uaccess.h>
+#include <ub/ub_mem.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+
+#include "cpt_syscalls.h"
+
+
+struct filejob {
+	struct filejob *next;
+	int	pid;
+	loff_t	fdi;
+};
+
+static int rst_filejob_queue(loff_t pos, cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	j = kmalloc(sizeof(*j), GFP_KERNEL);
+	if (j == NULL)
+		return -ENOMEM;
+	j->pid = current->pid;
+	j->fdi = pos;
+	j->next = ctx->filejob_queue;
+	ctx->filejob_queue = j;
+	return 0;
+}
+
+static void _anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+	struct page *page = buf->page;
+
+	if (info->tmp_page) {
+		__free_page(page);
+	} else {
+		info->tmp_page = page;
+	}
+	module_put(THIS_MODULE);
+}
+
+static void *_anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+	return kmap(buf->page);
+}
+
+static void _anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+	kunmap(buf->page);
+}
+
+static struct pipe_buf_operations _anon_pipe_buf_ops = {
+	.can_merge = 1,
+	.map = _anon_pipe_buf_map,
+	.unmap = _anon_pipe_buf_unmap,
+	.release = _anon_pipe_buf_release,
+};
+
+/* Sorta ugly... Multiple readers/writers of named pipe rewrite buffer
+ * many times. We need to mark it in CPT_OBJ_INODE table in some way.
+ */
+static int fixup_pipe_data(struct file *file, struct cpt_file_image *fi,
+			   struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	struct cpt_inode_image ii;
+	struct cpt_obj_bits b;
+	struct pipe_inode_info *info;
+	int err;
+	int count;
+
+	if (!S_ISFIFO(ino->i_mode)) {
+		eprintk_ctx("fixup_pipe_data: not a pipe %Ld\n", fi->cpt_inode);
+		return -EINVAL;
+	}
+	if (fi->cpt_inode == CPT_NULL)
+		return 0;
+
+	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
+	if (err)
+		return err;
+
+	if (ii.cpt_next <= ii.cpt_hdrlen)
+		return 0;
+
+	err = rst_get_object(CPT_OBJ_BITS, fi->cpt_inode + ii.cpt_hdrlen, &b, ctx);
+	if (err)
+		return err;
+
+	if (b.cpt_size == 0)
+		return 0;
+
+	mutex_lock(PIPE_MUTEX(*ino));
+	info = ino->i_pipe;
+	if (info->nrbufs) {
+		mutex_unlock(PIPE_MUTEX(*ino));
+		eprintk("pipe buffer is restored already\n");
+		return -EINVAL;
+	}
+	info->curbuf = 0;
+	count = 0;
+	while (count < b.cpt_size) {
+		struct pipe_buffer *buf = info->bufs + info->nrbufs;
+		void * addr;
+		int chars;
+
+		chars = b.cpt_size - count;
+		if (chars > PAGE_SIZE)
+			chars = PAGE_SIZE;
+		if (!try_module_get(THIS_MODULE)) {
+			err = -EBUSY;
+			break;
+		}
+
+		buf->page = alloc_page(GFP_HIGHUSER);
+		if (buf->page == NULL) {
+			err = -ENOMEM;
+			break;
+		}
+		buf->ops = &_anon_pipe_buf_ops;
+		buf->offset = 0;
+		buf->len = chars;
+		info->nrbufs++;
+		addr = kmap(buf->page);
+		err = ctx->pread(addr, chars, ctx,
+				 fi->cpt_inode + ii.cpt_hdrlen + b.cpt_hdrlen + count);
+		if (err)
+			break;
+		count += chars;
+	}
+	mutex_unlock(PIPE_MUTEX(*ino));
+
+	return err;
+}
+
+static int make_flags(struct cpt_file_image *fi)
+{
+	int flags = O_NOFOLLOW;
+	switch (fi->cpt_mode&(FMODE_READ|FMODE_WRITE)) {
+	case FMODE_READ|FMODE_WRITE:
+		flags |= O_RDWR; break;
+	case FMODE_WRITE:
+		flags |= O_WRONLY; break;
+	case FMODE_READ:
+		flags |= O_RDONLY; break;
+	default: break;
+	}
+	flags |= fi->cpt_flags&~(O_ACCMODE|O_CREAT|O_TRUNC|O_EXCL|FASYNC);
+	flags |= O_NONBLOCK|O_NOCTTY;
+	return flags;
+}
+
+static struct file *open_pipe(char *name,
+			      struct cpt_file_image *fi,
+			      unsigned flags,
+			      struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct cpt_inode_image ii;
+	struct file *rf, *wf;
+
+	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
+	if (err)
+		return ERR_PTR(err);
+
+	if (ii.cpt_sb == FSMAGIC_PIPEFS) {
+		int pfd[2];
+
+		if ((err = sc_pipe(pfd)) < 0)
+			return ERR_PTR(err);
+
+		rf = fcheck(pfd[0]);
+		wf = fcheck(pfd[1]);
+		get_file(rf);
+		get_file(wf);
+		sc_close(pfd[0]);
+		sc_close(pfd[1]);
+
+		if (fi->cpt_mode&FMODE_READ) {
+			struct file *tf;
+			tf = wf; wf = rf; rf = tf;
+		}
+	} else {
+		if (fi->cpt_mode&FMODE_READ) {
+			rf = filp_open(name, flags, 0);
+			if (IS_ERR(rf)) {
+				dprintk_ctx("filp_open\n");
+				return rf;
+			}
+			dprintk_ctx(CPT_FID "open RDONLY fifo ino %Ld %p %x\n", CPT_TID(current), fi->cpt_inode, rf, rf->f_dentry->d_inode->i_mode);
+			return rf;
+		}
+
+		dprintk_ctx(CPT_FID "open WRONLY fifo ino %Ld\n", CPT_TID(current), fi->cpt_inode);
+
+		rf = filp_open(name, O_RDWR|O_NONBLOCK, 0);
+		if (IS_ERR(rf))
+			return rf;
+		wf = dentry_open(dget(rf->f_dentry),
+				 mntget(rf->f_vfsmnt), flags);
+	}
+
+	/* Add pipe inode to obj table. */
+	obj = cpt_object_add(CPT_OBJ_INODE, wf->f_dentry->d_inode, ctx);
+	if (obj == NULL) {
+		fput(rf); fput(wf);
+		return ERR_PTR(-ENOMEM);
+	}
+	cpt_obj_setpos(obj, fi->cpt_inode, ctx);
+	obj->o_parent = rf;
+
+	/* Add another side of pipe to obj table, it will not be used
+	 * (o_pos = PT_NULL), another processes opeining pipe will find
+	 * inode and open it with dentry_open(). */
+	obj = cpt_object_add(CPT_OBJ_FILE, rf, ctx);
+	if (obj == NULL) {
+		fput(wf);
+		return ERR_PTR(-ENOMEM);
+	}
+	return wf;
+}
+
+static struct file *open_special(struct cpt_file_image *fi,
+				 unsigned flags,
+				 int deleted,
+				 struct cpt_context *ctx)
+{
+	struct cpt_inode_image *ii;
+	struct file *file;
+
+	/* Directories and named pipes are not special actually */
+	if (S_ISDIR(fi->cpt_i_mode) || S_ISFIFO(fi->cpt_i_mode))
+		return NULL;
+
+	/* No support for block devices at the moment. */
+	if (S_ISBLK(fi->cpt_i_mode))
+		return ERR_PTR(-EINVAL);
+
+	if (S_ISSOCK(fi->cpt_i_mode)) {
+		eprintk_ctx("bug: socket is not open\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Support only (some) character devices at the moment. */
+	if (!S_ISCHR(fi->cpt_i_mode))
+		return ERR_PTR(-EINVAL);
+
+	ii = __rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, ctx);
+	if (ii == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Do not worry about this right now. /dev/null,zero,*random are here.
+	 * To prohibit at least /dev/mem?
+	 */
+	if (MAJOR(ii->cpt_rdev) == MEM_MAJOR) {
+		kfree(ii);
+		return NULL;
+	}
+
+	file = rst_open_tty(fi, ii, flags, ctx);
+	kfree(ii);
+	return file;
+}
+
+static int restore_posix_lock(struct file *file, struct cpt_flock_image *fli, cpt_context_t *ctx)
+{
+	struct file_lock lock;
+	cpt_object_t *obj;
+
+	memset(&lock, 0, sizeof(lock));
+	lock.fl_type = fli->cpt_type;
+	lock.fl_flags = fli->cpt_flags & ~FL_SLEEP;
+	lock.fl_start = fli->cpt_start;
+	lock.fl_end = fli->cpt_end;
+	obj = lookup_cpt_obj_byindex(CPT_OBJ_FILES, fli->cpt_owner, ctx);
+	if (!obj) {
+		eprintk_ctx("unknown lock owner %d\n", (int)fli->cpt_owner);
+		return -EINVAL;
+	}
+	lock.fl_owner = obj->o_obj;
+	lock.fl_pid = vpid_to_pid(fli->cpt_pid);
+	if (lock.fl_pid < 0) {
+		eprintk_ctx("unknown lock pid %d\n", lock.fl_pid);
+		return -EINVAL;
+	}
+	lock.fl_file = file;
+
+	if (lock.fl_owner == NULL)
+		eprintk_ctx("no lock owner\n");
+	return posix_lock_file(file, &lock);
+}
+
+static int restore_flock(struct file *file, struct cpt_flock_image *fli,
+			 cpt_context_t *ctx)
+{
+	int cmd, err, fd;
+	fd = get_unused_fd();
+	if (fd < 0) {
+		eprintk_ctx("BSD flock cannot be restored\n");
+		return fd;
+	}
+	get_file(file);
+	fd_install(fd, file);
+	if (fli->cpt_type == F_RDLCK) {
+		cmd = LOCK_SH;
+	} else if (fli->cpt_type == F_WRLCK) {
+		cmd = LOCK_EX;
+	} else {
+		eprintk_ctx("flock flavor is unknown: %u\n", fli->cpt_type);
+		sc_close(fd);
+		return -EINVAL;
+	}
+
+	err = sc_flock(fd, LOCK_NB | cmd);
+	sc_close(fd);
+	return err;
+}
+
+
+static int fixup_posix_locks(struct file *file,
+			     struct cpt_file_image *fi,
+			     loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end;
+	struct cpt_flock_image fli;
+
+	end = pos + fi->cpt_next;
+	pos += fi->cpt_hdrlen;
+	while (pos < end) {
+		err = rst_get_object(-1, pos, &fli, ctx);
+		if (err)
+			return err;
+		if (fli.cpt_object == CPT_OBJ_FLOCK &&
+		    (fli.cpt_flags&FL_POSIX)) {
+			err = restore_posix_lock(file, &fli, ctx);
+			if (err)
+				return err;
+			dprintk_ctx("posix lock restored\n");
+		}
+		pos += fli.cpt_next;
+	}
+	return 0;
+}
+
+int rst_posix_locks(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		struct cpt_file_image fi;
+
+		if (obj->o_pos == CPT_NULL)
+			continue;
+
+		err = rst_get_object(CPT_OBJ_FILE, obj->o_pos, &fi, ctx);
+		if (err < 0)
+			return err;
+		if (fi.cpt_next > fi.cpt_hdrlen)
+			fixup_posix_locks(file, &fi, obj->o_pos, ctx);
+	}
+	return 0;
+}
+
+static int fixup_flocks(struct file *file,
+			struct cpt_file_image *fi,
+			loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end;
+	struct cpt_flock_image fli;
+
+	end = pos + fi->cpt_next;
+	pos += fi->cpt_hdrlen;
+	while (pos < end) {
+		err = rst_get_object(-1, pos, &fli, ctx);
+		if (err)
+			return err;
+		if (fli.cpt_object == CPT_OBJ_FLOCK &&
+		    (fli.cpt_flags&FL_FLOCK)) {
+			err = restore_flock(file, &fli, ctx);
+			if (err)
+				return err;
+			dprintk_ctx("bsd lock restored\n");
+		}
+		pos += fli.cpt_next;
+	}
+	return 0;
+}
+
+
+static int fixup_reg_data(struct file *file, loff_t pos, loff_t end,
+			  struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_page_block pgb;
+	ssize_t (*do_write)(struct file *, const char __user *, size_t, loff_t *ppos);
+
+	do_write = file->f_op->write;
+	if (do_write == NULL) {
+		eprintk_ctx("no write method. Cannot restore contents of the file.\n");
+		return -EINVAL;
+	}
+
+	atomic_inc(&file->f_count);
+
+	while (pos < end) {
+		loff_t opos;
+		loff_t ipos;
+		int count;
+
+		err = rst_get_object(CPT_OBJ_PAGES, pos, &pgb, ctx);
+		if (err)
+			goto out;
+		dprintk_ctx("restoring file data block: %08x-%08x\n",
+		       (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
+		ipos = pos + pgb.cpt_hdrlen;
+		opos = pgb.cpt_start;
+		count = pgb.cpt_end-pgb.cpt_start;
+		while (count > 0) {
+			mm_segment_t oldfs;
+			int copy = count;
+
+			if (copy > PAGE_SIZE)
+				copy = PAGE_SIZE;
+			(void)cpt_get_buf(ctx);
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
+			set_fs(oldfs);
+			if (err) {
+				__cpt_release_buf(ctx);
+				goto out;
+			}
+			if (!(file->f_mode & FMODE_WRITE) ||
+			    (file->f_flags&O_DIRECT)) {
+				fput(file);
+				file = dentry_open(dget(file->f_dentry),
+						   mntget(file->f_vfsmnt), O_WRONLY);
+				if (IS_ERR(file)) {
+					__cpt_release_buf(ctx);
+					return PTR_ERR(file);
+				}
+			}
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			ipos += copy;
+			err = do_write(file, ctx->tmpbuf, copy, &opos);
+			set_fs(oldfs);
+			__cpt_release_buf(ctx);
+			if (err != copy) {
+				if (err >= 0)
+					err = -EIO;
+				goto out;
+			}
+			count -= copy;
+		}
+		pos += pgb.cpt_next;
+	}
+	err = 0;
+
+out:
+	fput(file);
+	return err;
+}
+
+
+static int fixup_file_content(struct file **file_p, struct cpt_file_image *fi,
+			      struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_inode_image ii;
+	struct file *file = *file_p;
+	struct iattr newattrs;
+
+	if (!S_ISREG(fi->cpt_i_mode))
+		return 0;
+
+	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
+	if (err)
+		return err;
+
+	if (file == NULL) {
+		file = shmem_file_setup("dev/zero", ii.cpt_size, 0);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+		*file_p = file;
+	}
+
+	if (ii.cpt_next > ii.cpt_hdrlen) {
+		err = fixup_reg_data(file, fi->cpt_inode+ii.cpt_hdrlen,
+				     fi->cpt_inode+ii.cpt_next, ctx);
+		if (err)
+			return err;
+	}
+
+	mutex_lock(&file->f_dentry->d_inode->i_mutex);
+	/* stage 1 - update size like do_truncate does */
+	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+	newattrs.ia_size = ii.cpt_size;
+	cpt_timespec_import(&newattrs.ia_ctime, ii.cpt_ctime);
+	err = notify_change(file->f_dentry, &newattrs);
+	if (err)
+		goto out;
+
+	/* stage 2 - update times */
+	newattrs.ia_valid = ATTR_MTIME | ATTR_ATIME |
+		ATTR_ATIME_SET | ATTR_MTIME_SET;
+	cpt_timespec_import(&newattrs.ia_atime, ii.cpt_atime);
+	cpt_timespec_import(&newattrs.ia_mtime, ii.cpt_mtime);
+	err = notify_change(file->f_dentry, &newattrs);
+
+out:
+	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+	return err;
+}
+
+static int fixup_file_flags(struct file *file, struct cpt_file_image *fi,
+			    int was_dentry_open, loff_t pos,
+			    cpt_context_t *ctx)
+{
+	if (fi->cpt_pos != file->f_pos) {
+		int err = -ESPIPE;
+		if (file->f_op->llseek)
+			err = file->f_op->llseek(file, fi->cpt_pos, 0);
+		if (err < 0) {
+			dprintk_ctx("file %Ld lseek %Ld - %Ld\n", pos, file->f_pos, fi->cpt_pos);
+			file->f_pos = fi->cpt_pos;
+		}
+	}
+	file->f_uid = fi->cpt_uid;
+	file->f_gid = fi->cpt_gid;
+	file->f_owner.pid = 0;
+	if (fi->cpt_fown_pid) {
+		file->f_owner.pid = comb_vpid_to_pid(fi->cpt_fown_pid);
+		if (file->f_owner.pid == 0) {
+			wprintk_ctx("fixup_file_flags: owner %d does not exist anymore\n", file->f_owner.pid);
+			return -EINVAL;
+		}
+	}
+	file->f_owner.uid = fi->cpt_fown_uid;
+	file->f_owner.euid = fi->cpt_fown_euid;
+	file->f_owner.signum = fi->cpt_fown_signo;
+
+	if (file->f_mode != fi->cpt_mode) {
+		if (was_dentry_open &&
+		    ((file->f_mode^fi->cpt_mode)&(FMODE_PREAD|FMODE_LSEEK))) {
+			file->f_mode &= ~(FMODE_PREAD|FMODE_LSEEK);
+			file->f_mode |= fi->cpt_mode&(FMODE_PREAD|FMODE_LSEEK);
+		}
+		if (file->f_mode != fi->cpt_mode)
+			wprintk_ctx("file %ld mode mismatch %08x %08x\n", (long)pos, file->f_mode, fi->cpt_mode);
+	}
+	if (file->f_flags != fi->cpt_flags) {
+		if (!(fi->cpt_flags&O_NOFOLLOW))
+			file->f_flags &= ~O_NOFOLLOW;
+		if ((file->f_flags^fi->cpt_flags)&O_NONBLOCK) {
+			file->f_flags &= ~O_NONBLOCK;
+			file->f_flags |= fi->cpt_flags&O_NONBLOCK;
+		}
+		if (fi->cpt_flags&FASYNC) {
+			if (fi->cpt_fown_fd == -1) {
+				wprintk_ctx("No fd for FASYNC\n");
+				return -EINVAL;
+			} else if (file->f_op && file->f_op->fasync) {
+				if (file->f_op->fasync(fi->cpt_fown_fd, file, 1) < 0) {
+					wprintk_ctx("FASYNC problem\n");
+					return -EINVAL;
+				} else {
+					file->f_flags |= FASYNC;
+				}
+			}
+		}
+		if (file->f_flags != fi->cpt_flags) {
+			eprintk_ctx("file %ld flags mismatch %08x %08x\n", (long)pos, file->f_flags, fi->cpt_flags);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static struct file *
+open_deleted(char *name, unsigned flags, struct cpt_file_image *fi,
+	     cpt_context_t *ctx)
+{
+	struct file * file;
+	char *suffix = NULL;
+	int attempt = 0;
+	int tmp_pass = 0;
+	mode_t mode = fi->cpt_i_mode;
+
+	/* Strip (deleted) part... */
+	if (strlen(name) > strlen(" (deleted)")) {
+		if (strcmp(name + strlen(name) - strlen(" (deleted)"), " (deleted)") == 0) {
+			suffix = &name[strlen(name) - strlen(" (deleted)")];
+			*suffix = 0;
+		} else if (memcmp(name, "(deleted) ", strlen("(deleted) ")) == 0) {
+			memmove(name, name + strlen("(deleted) "), strlen(name) - strlen(" (deleted)") + 1);
+			suffix = name + strlen(name);
+		}
+	}
+
+try_again:
+	for (;;) {
+		if (attempt) {
+			if (attempt > 1000) {
+				eprintk_ctx("open_deleted: failed after %d attempts\n", attempt);
+				return ERR_PTR(-EEXIST);
+			}
+			if (suffix == NULL) {
+				eprintk_ctx("open_deleted: no suffix\n");
+				return ERR_PTR(-EEXIST);
+			}
+			sprintf(suffix, ".%08x", (unsigned)((xtime.tv_nsec>>10)+attempt));
+		}
+		attempt++;
+
+		if (S_ISFIFO(mode)) {
+			int err;
+			err = sc_mknod(name, S_IFIFO|(mode&017777), 0);
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = open_pipe(name, fi, flags, ctx);
+			sc_unlink(name);
+		} else if (S_ISCHR(mode)) {
+			int err;
+			struct cpt_inode_image *ii;
+
+			ii = __rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, ctx);
+			if (ii == NULL)
+				return ERR_PTR(-ENOMEM);
+			err = sc_mknod(name, S_IFCHR|(mode&017777), new_encode_dev(ii->cpt_rdev));
+			kfree(ii);
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = filp_open(name, flags, mode&017777);
+			sc_unlink(name);
+		} else if (S_ISDIR(mode)) {
+			int err;
+			err = sc_mkdir(name, mode&017777);
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = filp_open(name, flags, mode&017777);
+			sc_rmdir(name);
+		} else {
+			file = filp_open(name, O_CREAT|O_EXCL|flags, mode&017777);
+			if (IS_ERR(file)) {
+				if (PTR_ERR(file) == -EEXIST)
+					continue;
+				if (!tmp_pass)
+					goto change_dir;
+			} else {
+				sc_unlink(name);
+			}
+		}
+		break;
+	}
+
+	if (IS_ERR(file)) {
+		eprintk_ctx("filp_open %s: %ld\n", name, PTR_ERR(file));
+		return file;
+	} else {
+		dprintk_ctx("deleted file created as %s, %p, %x\n", name, file, file->f_dentry->d_inode->i_mode);
+	}
+	return file;
+
+change_dir:
+	sprintf(name, "/tmp/rst%u", current->pid);
+	suffix = name + strlen(name);
+	attempt = 1;
+	tmp_pass = 1;
+	goto try_again;
+}
+
+struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx)
+{
+	int err;
+	int was_dentry_open = 0;
+	cpt_object_t *obj;
+	cpt_object_t *iobj;
+	struct cpt_file_image fi;
+	__u8 *name = NULL;
+	struct file *file;
+	int flags;
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, pos, ctx);
+	if (obj) {
+		file = obj->o_obj;
+		if (obj->o_index >= 0) {
+			dprintk_ctx("file is attached to a socket\n");
+			err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
+			if (err < 0)
+				goto err_out;
+			fixup_file_flags(file, &fi, 0, pos, ctx);
+		}
+		get_file(file);
+		return file;
+	}
+
+	err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
+	if (err < 0)
+		goto err_out;
+
+	flags = make_flags(&fi);
+
+	/* Easy way, inode has been already open. */
+	if (fi.cpt_inode != CPT_NULL &&
+	    !(fi.cpt_lflags & CPT_DENTRY_CLONING) &&
+	    (iobj = lookup_cpt_obj_bypos(CPT_OBJ_INODE, fi.cpt_inode, ctx)) != NULL &&
+	    iobj->o_parent) {
+		struct file *filp = iobj->o_parent;
+		file = dentry_open(dget(filp->f_dentry),
+				   mntget(filp->f_vfsmnt), flags);
+		dprintk_ctx("rst_file: file obtained by dentry_open\n");
+		was_dentry_open = 1;
+		goto map_file;
+	}
+
+	if (fi.cpt_next > fi.cpt_hdrlen)
+		name = rst_get_name(pos + sizeof(fi), ctx);
+
+	if (fi.cpt_lflags == CPT_DENTRY_DELETED) {
+		if (fi.cpt_inode == CPT_NULL) {
+			eprintk_ctx("deleted file and no inode.\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		/* One very special case... */
+		if (S_ISREG(fi.cpt_i_mode) &&
+		    (!name || !name[0] || strcmp(name, "/dev/zero (deleted)") == 0)) {
+			/* MAP_ANON|MAP_SHARED mapping.
+			 * kernel makes this damn ugly way, when file which
+			 * is passed to mmap by user does not match
+			 * file finally attached to VMA. Ok, rst_mm
+			 * has to take care of this. Otherwise, it will fail.
+			 */
+			file = NULL;
+		} else if (S_ISREG(fi.cpt_i_mode) ||
+			   S_ISCHR(fi.cpt_i_mode) ||
+			   S_ISFIFO(fi.cpt_i_mode) ||
+			   S_ISDIR(fi.cpt_i_mode)) {
+			if (S_ISCHR(fi.cpt_i_mode)) {
+				file = open_special(&fi, flags, 1, ctx);
+				if (file != NULL)
+					goto map_file;
+			}
+			file = open_deleted(name, flags, &fi, ctx);
+			if (IS_ERR(file))
+				goto out;
+		} else {
+			eprintk_ctx("not a regular deleted file.\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		err = fixup_file_content(&file, &fi, ctx);
+		if (err)
+			goto err_put;
+		goto map_file;
+	} else {
+		if (!name || !name[0]) {
+			eprintk_ctx("no name for file?\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+		if ((fi.cpt_lflags & CPT_DENTRY_EPOLL) &&
+		    (file = cpt_open_epolldev(&fi, flags, ctx)) != NULL)
+			goto map_file;
+		if (S_ISFIFO(fi.cpt_i_mode) &&
+		    (file = open_pipe(name, &fi, flags, ctx)) != NULL)
+			goto map_file;
+		if (!S_ISREG(fi.cpt_i_mode) &&
+		    (file = open_special(&fi, flags, 0, ctx)) != NULL)
+			goto map_file;
+	}
+
+	file = filp_open(name, flags, 0);
+
+map_file:
+	if (!IS_ERR(file)) {
+		fixup_file_flags(file, &fi, was_dentry_open, pos, ctx);
+
+		if (S_ISFIFO(fi.cpt_i_mode) && !was_dentry_open) {
+			err = fixup_pipe_data(file, &fi, ctx);
+			if (err)
+				goto err_put;
+		}
+
+		obj = cpt_object_get(CPT_OBJ_FILE, file, ctx);
+		if (!obj) {
+			obj = cpt_object_add(CPT_OBJ_FILE, file, ctx);
+			if (obj)
+				get_file(file);
+		}
+		if (obj)
+			cpt_obj_setpos(obj, pos, ctx);
+
+		obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+		if (obj) {
+			cpt_obj_setpos(obj, fi.cpt_inode, ctx);
+			if (!obj->o_parent || fi.cpt_lflags != CPT_DENTRY_DELETED)
+				obj->o_parent = file;
+		}
+
+		if (fi.cpt_next > fi.cpt_hdrlen) {
+			err = fixup_flocks(file, &fi, pos, ctx);
+			if (err)
+				goto err_put;
+		}
+	} else {
+		if (fi.cpt_lflags & CPT_DENTRY_PROC) {
+			dprintk_ctx("rst_file /proc delayed\n");
+			file = NULL;
+		}
+	}
+
+out:
+	if (name)
+		rst_put_name(name, ctx);
+	return file;
+
+err_put:
+	if (file)
+		fput(file);
+err_out:
+	if (name)
+		rst_put_name(name, ctx);
+	return ERR_PTR(err);
+}
+
+
+__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+	if (ti->cpt_files == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx))
+		flag |= CLONE_FILES;
+	if (ti->cpt_fs == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx))
+		flag |= CLONE_FS;
+	return flag;
+}
+
+static void local_close_files(struct files_struct * files)
+{
+	int i, j;
+
+	j = 0;
+	for (;;) {
+		unsigned long set;
+		i = j * __NFDBITS;
+		if (i >= files->fdt->max_fdset || i >= files->fdt->max_fds)
+			break;
+		set = files->fdt->open_fds->fds_bits[j];
+		while (set) {
+			if (set & 1) {
+				struct file * file = xchg(&files->fdt->fd[i], NULL);
+				if (file)
+					filp_close(file, files);
+			}
+			i++;
+			set >>= 1;
+		}
+		files->fdt->open_fds->fds_bits[j] = 0;
+		files->fdt->close_on_exec->fds_bits[j] = 0;
+		j++;
+	}
+}
+
+extern int expand_fdtable(struct files_struct *files, int nr);
+
+
+int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct cpt_files_struct_image fi;
+	struct files_struct *f = current->files;
+	cpt_object_t *obj;
+	loff_t pos, endpos;
+	int err;
+
+	if (ti->cpt_files == CPT_NULL) {
+		current->files = NULL;
+		if (f)
+			put_files_struct(f);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			put_files_struct(f);
+			f = obj->o_obj;
+			atomic_inc(&f->count);
+			current->files = f;
+		}
+		return 0;
+	}
+
+	err = rst_get_object(CPT_OBJ_FILES, ti->cpt_files, &fi, ctx);
+	if (err)
+		return err;
+
+	local_close_files(f);
+
+	if (fi.cpt_max_fds > f->fdt->max_fds) {
+		spin_lock(&f->file_lock);
+		err = expand_fdtable(f, fi.cpt_max_fds-1);
+		spin_unlock(&f->file_lock);
+		if (err)
+			return err;
+	}
+
+	pos = ti->cpt_files + fi.cpt_hdrlen;
+	endpos = ti->cpt_files + fi.cpt_next;
+	while (pos < endpos) {
+		struct cpt_fd_image fdi;
+		struct file *filp;
+
+		err = rst_get_object(CPT_OBJ_FILEDESC, pos, &fdi, ctx);
+		if (err)
+			return err;
+		filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
+		if (IS_ERR(filp)) {
+			eprintk_ctx("rst_file: %ld %Lu\n", PTR_ERR(filp), fdi.cpt_file);
+			return PTR_ERR(filp);
+		}
+		if (filp == NULL) {
+			int err = rst_filejob_queue(pos, ctx);
+			if (err)
+				return err;
+		} else {
+			if (fdi.cpt_fd >= f->fdt->max_fds) BUG();
+			f->fdt->fd[fdi.cpt_fd] = filp;
+			FD_SET(fdi.cpt_fd, f->fdt->open_fds);
+			if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
+				FD_SET(fdi.cpt_fd, f->fdt->close_on_exec);
+		}
+		pos += fdi.cpt_next;
+	}
+	f->fdt->next_fd = fi.cpt_next_fd;
+
+	obj = cpt_object_add(CPT_OBJ_FILES, f, ctx);
+	if (obj) {
+		cpt_obj_setpos(obj, ti->cpt_files, ctx);
+		cpt_obj_setindex(obj, fi.cpt_index, ctx);
+	}
+	return 0;
+}
+
+int rst_do_filejobs(cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	while ((j = ctx->filejob_queue) != NULL) {
+		int err;
+		task_t *tsk;
+		struct cpt_fd_image fdi;
+		struct file *filp;
+
+		read_lock(&tasklist_lock);
+		tsk = find_task_by_pid_ve(j->pid);
+		if (tsk)
+			get_task_struct(tsk);
+		read_unlock(&tasklist_lock);
+		if (!tsk)
+			return -EINVAL;
+
+		err = rst_get_object(CPT_OBJ_FILEDESC, j->fdi, &fdi, ctx);
+		if (err) {
+			put_task_struct(tsk);
+			return err;
+		}
+
+		if (fdi.cpt_fd >= tsk->files->fdt->max_fds) BUG();
+		if (tsk->files->fdt->fd[fdi.cpt_fd] ||
+		    FD_ISSET(fdi.cpt_fd, tsk->files->fdt->open_fds)) {
+			eprintk_ctx("doing filejob %Ld: fd is busy\n", j->fdi);
+			put_task_struct(tsk);
+			return -EBUSY;
+		}
+
+		filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
+		if (IS_ERR(filp)) {
+			eprintk_ctx("rst_do_filejobs: 1: %ld %Lu\n", PTR_ERR(filp), fdi.cpt_file);
+			put_task_struct(tsk);
+			return PTR_ERR(filp);
+		}
+		if (fdi.cpt_fd >= tsk->files->fdt->max_fds) BUG();
+		tsk->files->fdt->fd[fdi.cpt_fd] = filp;
+		FD_SET(fdi.cpt_fd, tsk->files->fdt->open_fds);
+		if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
+			FD_SET(fdi.cpt_fd, tsk->files->fdt->close_on_exec);
+
+		dprintk_ctx("filejob %Ld done\n", j->fdi);
+
+		put_task_struct(tsk);
+		ctx->filejob_queue = j->next;
+		kfree(j);
+	}
+	return 0;
+}
+
+void rst_flush_filejobs(cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	while ((j = ctx->filejob_queue) != NULL) {
+		ctx->filejob_queue = j->next;
+		kfree(j);
+	}
+}
+
+int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct fs_struct *f = current->fs;
+	cpt_object_t *obj;
+
+	if (ti->cpt_fs == CPT_NULL) {
+		exit_fs(current);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			exit_fs(current);
+			f = obj->o_obj;
+			atomic_inc(&f->count);
+			current->fs = f;
+		}
+		return 0;
+	}
+
+	/* Do _not_ restore root. Image contains absolute pathnames.
+	 * So, we fix it in context of rst process.
+	 */
+
+	obj = cpt_object_add(CPT_OBJ_FS, f, ctx);
+	if (obj)
+		cpt_obj_setpos(obj, ti->cpt_fs, ctx);
+
+	return 0;
+}
+
+static int get_dir(struct dentry **dp, struct vfsmount **mp,
+		   loff_t *pos, struct cpt_context *ctx)
+{
+	struct cpt_file_image fi;
+	struct file * file;
+	int err;
+
+	err = rst_get_object(CPT_OBJ_FILE, *pos, &fi, ctx);
+	if (err)
+		return err;
+
+	file = rst_file(*pos, -1, ctx);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	*dp = dget(file->f_dentry);
+	*mp = mntget(file->f_vfsmnt);
+	*pos += fi.cpt_next;
+	fput(file);
+	return 0;
+}
+
+static void __set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
+			  struct dentry *dentry)
+{
+	struct dentry *old_root;
+	struct vfsmount *old_rootmnt;
+	write_lock(&fs->lock);
+	old_root = fs->root;
+	old_rootmnt = fs->rootmnt;
+	fs->rootmnt = mnt;
+	fs->root = dentry;
+	write_unlock(&fs->lock);
+	if (old_root) {
+		dput(old_root);
+		mntput(old_rootmnt);
+	}
+}
+
+static void __set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
+			 struct dentry *dentry)
+{
+	struct dentry *old_pwd;
+	struct vfsmount *old_pwdmnt;
+
+	write_lock(&fs->lock);
+	old_pwd = fs->pwd;
+	old_pwdmnt = fs->pwdmnt;
+	fs->pwdmnt = mnt;
+	fs->pwd = dentry;
+	write_unlock(&fs->lock);
+
+	if (old_pwd) {
+		dput(old_pwd);
+		mntput(old_pwdmnt);
+	}
+}
+
+
+int rst_restore_fs(struct cpt_context *ctx)
+{
+	loff_t pos;
+	cpt_object_t *obj;
+	int err = 0;
+
+	for_each_object(obj, CPT_OBJ_FS) {
+		struct cpt_fs_struct_image fi;
+		struct fs_struct *fs = obj->o_obj;
+		int i;
+		struct dentry *d[3];
+		struct vfsmount *m[3];
+
+		err = rst_get_object(CPT_OBJ_FS, obj->o_pos, &fi, ctx);
+		if (err)
+			return err;
+
+		fs->umask = fi.cpt_umask;
+
+		pos = obj->o_pos + fi.cpt_hdrlen;
+		d[0] = d[1] = d[2] = NULL;
+		m[0] = m[1] = m[2] = NULL;
+		i = 0;
+		while (pos < obj->o_pos + fi.cpt_next && i<3) {
+			err = get_dir(d+i, m+i, &pos, ctx);
+			if (err) {
+				eprintk_ctx("cannot get_dir: %d", err);
+				for (--i; i >= 0; i--) {
+					if (d[i])
+						dput(d[i]);
+					if (m[i])
+						mntput(m[i]);
+				}
+				return err;
+			}
+			i++;
+		}
+		if (d[0])
+			__set_fs_root(fs, m[0], d[0]);
+		if (d[1])
+			__set_fs_pwd(fs, m[1], d[1]);
+		if (d[2]) {
+			struct dentry *olddentry;
+			struct vfsmount *oldmnt;
+			write_lock(&fs->lock);
+			oldmnt = fs->altrootmnt;
+			olddentry = fs->altroot;
+			fs->altrootmnt = m[2];
+			fs->altroot = d[2];
+			write_unlock(&fs->lock);
+
+			if (olddentry) {
+				dput(olddentry);
+				mntput(oldmnt);
+			}
+		}
+	}
+	return err;
+}
+
+int do_one_mount(char *mntpnt, char *mnttype, char *mntbind, unsigned long flags, struct cpt_context *ctx)
+{
+	int err;
+
+	if (mntbind && (strcmp(mntbind, "/") == 0 || strcmp(mntbind, "") == 0))
+		mntbind = NULL;
+
+	if (mntbind)
+		flags |= MS_BIND;
+
+	err = sc_mount(mntbind, mntpnt, mnttype, flags);
+	if (err < 0) {
+		eprintk_ctx("%d mounting %s %s %08lx\n", err, mntpnt, mnttype, flags);
+		return err;
+	}
+	return 0;
+}
+
+static int undumptmpfs(void *arg)
+{
+	int i;
+	int *pfd = arg;
+	char *argv[] = { "tar", "x", "-C", "/", "-S", NULL };
+
+	if (pfd[0] != 0)
+		sc_dup2(pfd[0], 0);
+
+	for (i=1; i<current->files->fdt->max_fds; i++)
+		sc_close(i);
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/bin/tar", argv, NULL);
+	eprintk("failed to exec /bin/tar: %d\n", i);
+	return -1;
+}
+
+static int rst_restore_tmpfs(loff_t *pos, struct cpt_context * ctx)
+{
+	int err;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	int n;
+	loff_t end;
+	int pid;
+
+	err = rst_get_object(CPT_OBJ_NAME, *pos, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	pid = err = local_kernel_thread(undumptmpfs, (void*)pfd, SIGCHLD, 0);
+	if (err < 0)
+		goto out;
+	f = fget(pfd[1]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	ctx->file->f_pos = *pos + v.cpt_hdrlen;
+	end = *pos + v.cpt_next;
+	*pos += v.cpt_next;
+	do {
+		char buf[16];
+		mm_segment_t oldfs;
+
+		n = end - ctx->file->f_pos;
+		if (n > sizeof(buf))
+			n = sizeof(buf);
+
+		if (ctx->read(buf, n, ctx))
+			break;
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		f->f_op->write(f, buf, n, &f->f_pos);
+		set_fs(oldfs);
+	} while (ctx->file->f_pos < end);
+
+	fput(f);
+
+	clear_tsk_thread_flag(current,TIF_SIGPENDING);
+
+	if ((err = sc_waitx(pid, 0)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+
+	return 0;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	return err;
+}
+
+int restore_one_vfsmount(struct cpt_vfsmount_image *mi, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t endpos;
+
+	endpos = pos + mi->cpt_next;
+	pos += mi->cpt_hdrlen;
+
+	while (pos < endpos) {
+		char *mntdev;
+		char *mntpnt;
+		char *mnttype;
+		char *mntbind;
+
+		mntdev = __rst_get_name(&pos, ctx);
+		mntpnt = __rst_get_name(&pos, ctx);
+		mnttype = __rst_get_name(&pos, ctx);
+		mntbind = __rst_get_name(&pos, ctx);
+		err = -EINVAL;
+		if (mnttype && mntpnt) {
+			err = 0;
+			if (strcmp(mntpnt, "/"))
+				err = do_one_mount(mntpnt, mnttype, mntbind, mi->cpt_flags, ctx);
+			if (strcmp(mnttype, "tmpfs") == 0) {
+				rst_restore_tmpfs(&pos, ctx);
+			}
+		}
+		if (mntdev)
+			rst_put_name(mntdev, ctx);
+		if (mntpnt)
+			rst_put_name(mntpnt, ctx);
+		if (mnttype)
+			rst_put_name(mnttype, ctx);
+		if (mntbind)
+			rst_put_name(mntbind, ctx);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int restore_one_namespace(loff_t pos, loff_t endpos, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_vfsmount_image mi;
+
+	while (pos < endpos) {
+		err = rst_get_object(CPT_OBJ_VFSMOUNT, pos, &mi, ctx);
+		if (err)
+			return err;
+		err = restore_one_vfsmount(&mi, pos, ctx);
+		if (err)
+			return err;
+		pos += mi.cpt_next;
+	}
+	return 0;
+}
+
+int rst_root_namespace(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NAMESPACE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr sbuf;
+	int done = 0;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NAMESPACE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		err = rst_get_object(CPT_OBJ_NAMESPACE, sec, &sbuf, ctx);
+		if (err)
+			return err;
+		if (done) {
+			eprintk_ctx("multiple namespaces are not supported\n");
+			break;
+		}
+		done++;
+		err = restore_one_namespace(sec+sbuf.cpt_hdrlen, sec+sbuf.cpt_next, ctx);
+		if (err)
+			return err;
+		sec += sbuf.cpt_next;
+	}
+
+	return 0;
+}
+
+int rst_stray_files(struct cpt_context *ctx)
+{
+	int err = 0;
+	loff_t sec = ctx->sections[CPT_SECT_FILES];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_FILES || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_object_hdr sbuf;
+		cpt_object_t *obj;
+
+		err = _rst_get_object(CPT_OBJ_FILE, sec, &sbuf, sizeof(sbuf), ctx);
+		if (err)
+			break;
+
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, sec, ctx);
+		if (!obj) {
+			struct file *file;
+
+			dprintk_ctx("stray file %Ld\n", sec);
+
+			file = rst_sysv_shm(sec, ctx);
+
+			if (IS_ERR(file)) {
+				eprintk_ctx("rst_stray_files: %ld\n", PTR_ERR(file));
+				return PTR_ERR(file);
+			} else {
+				fput(file);
+			}
+		}
+		sec += sbuf.cpt_next;
+	}
+
+	return err;
+}
diff -uprN linux-2.6.16/kernel/cpt/rst_i386.S linux-2.6.16.ovz/kernel/cpt/rst_i386.S
--- linux-2.6.16/kernel/cpt/rst_i386.S	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_i386.S	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,40 @@
+#define ASSEMBLY 1
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/errno.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+
+	.section .text
+	.align 4
+	.global ret_last_siginfo
+ret_last_siginfo:
+	call rlsi
+	movl %eax,%esp
+	ret
+
+	.align 8
+	.global ret_child_tid
+ret_child_tid:
+	push %esp
+	call rct
+	movl %eax,%esp
+	ret
+
+	.align 4
+	.global ret_from_rst
+ret_from_rst:
+	pushl	%eax
+	jmp ret_from_fork+6
+
+	.align 4
+	.global pre_ret_from_fork
+pre_ret_from_fork:
+	pushl %eax
+	call schedule_tail
+	popl %eax
+	ret
diff -uprN linux-2.6.16/kernel/cpt/rst_mm.c linux-2.6.16.ovz/kernel/cpt/rst_mm.c
--- linux-2.6.16/kernel/cpt/rst_mm.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_mm.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,986 @@
+/*
+ *
+ *  kernel/cpt/rst_mm.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/virtinfo.h>
+#include <linux/hugetlb.h>
+#include <linux/errno.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/mman.h>
+#include <linux/vmalloc.h>
+#include <linux/rmap.h>
+#include <linux/hash.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#include <asm/mmu_context.h>
+#include <linux/swapops.h>
+#include <linux/cpt_image.h>
+
+#ifdef CONFIG_VE
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+#endif
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_ubc.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+#include "cpt_pagein.h"
+#endif
+
+#include "cpt_syscalls.h"
+
+#define __PAGE_NX (1ULL<<63)
+
+static unsigned long make_prot(struct cpt_vma_image *vmai)
+{
+	unsigned long prot = 0;
+
+	if (vmai->cpt_flags&VM_READ)
+		prot |= PROT_READ;
+	if (vmai->cpt_flags&VM_WRITE)
+		prot |= PROT_WRITE;
+	if (vmai->cpt_flags&VM_EXEC)
+		prot |= PROT_EXEC;
+	if (vmai->cpt_flags&VM_GROWSDOWN)
+		prot |= PROT_GROWSDOWN;
+	if (vmai->cpt_flags&VM_GROWSUP)
+		prot |= PROT_GROWSUP;
+	return prot;
+}
+
+static unsigned long make_flags(struct cpt_vma_image *vmai)
+{
+	unsigned long flags = MAP_FIXED;
+
+	if (vmai->cpt_flags&(VM_SHARED|VM_MAYSHARE))
+		flags |= MAP_SHARED;
+	else
+		flags |= MAP_PRIVATE;
+
+	if (vmai->cpt_file == CPT_NULL)
+		flags |= MAP_ANONYMOUS;
+	if (vmai->cpt_flags&VM_GROWSDOWN)
+		flags |= MAP_GROWSDOWN;
+	if (vmai->cpt_flags&VM_DENYWRITE)
+		flags |= MAP_DENYWRITE;
+	if (vmai->cpt_flags&VM_EXECUTABLE)
+		flags |= MAP_EXECUTABLE;
+	if (!(vmai->cpt_flags&VM_ACCOUNT))
+		flags |= MAP_NORESERVE;
+	return flags;
+}
+
+
+#if !defined(CONFIG_X86_64) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15)
+static int __alloc_ldt(mm_context_t *pc, int mincount)
+{
+	int oldsize, newsize, i;
+
+	if (mincount <= pc->size)
+		return 0;
+	/*
+	 * LDT got larger - reallocate if necessary.
+	 */
+	oldsize = pc->size;
+	mincount = (mincount+511)&(~511);
+	newsize = mincount*LDT_ENTRY_SIZE;
+	for (i = 0; i < newsize; i += PAGE_SIZE) {
+		int nr = i/PAGE_SIZE;
+		BUG_ON(i >= 64*1024);
+		if (!pc->ldt_pages[nr]) {
+			pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER|__GFP_UBC);
+			if (!pc->ldt_pages[nr])
+				return -ENOMEM;
+			clear_highpage(pc->ldt_pages[nr]);
+		}
+	}
+	pc->size = mincount;
+	return 0;
+}
+
+static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = current->mm;
+	int i;
+	int err;
+	int size;
+
+	err = __alloc_ldt(&mm->context, li->cpt_size/LDT_ENTRY_SIZE);
+	if (err)
+		return err;
+
+	size = mm->context.size*LDT_ENTRY_SIZE;
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		int nr = i / PAGE_SIZE, bytes;
+		char *kaddr = kmap(mm->context.ldt_pages[nr]);
+
+		bytes = size - i;
+		if (bytes > PAGE_SIZE)
+			bytes = PAGE_SIZE;
+		err = ctx->pread(kaddr, bytes, ctx, pos + li->cpt_hdrlen + i);
+		kunmap(mm->context.ldt_pages[nr]);
+		if (err)
+			return err;
+	}
+
+	load_LDT(&mm->context);
+	return 0;
+}
+
+#else
+
+static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = current->mm;
+	int oldsize = mm->context.size;
+	void *oldldt;
+	void *newldt;
+	int err;
+
+	if (li->cpt_size > PAGE_SIZE)
+		newldt = vmalloc(li->cpt_size);
+	else
+		newldt = kmalloc(li->cpt_size, GFP_KERNEL);
+
+	if (!newldt)
+		return -ENOMEM;
+
+	err = ctx->pread(newldt, li->cpt_size, ctx, pos + li->cpt_hdrlen);
+	if (err)
+		return err;
+
+	oldldt = mm->context.ldt;
+	mm->context.ldt = newldt;
+	mm->context.size = li->cpt_size/LDT_ENTRY_SIZE;
+
+	load_LDT(&mm->context);
+
+	if (oldsize) {
+		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+			vfree(oldldt);
+		else
+			kfree(oldldt);
+	}
+	return 0;
+}
+#endif
+
+static int
+restore_aio_ring(struct kioctx *aio_ctx, struct cpt_aio_ctx_image *aimg)
+{
+	struct aio_ring_info *info = &aio_ctx->ring_info;
+	unsigned nr_events = aio_ctx->max_reqs;
+	unsigned long size;
+	int nr_pages;
+
+	/* We recalculate parameters of the ring exactly like
+	 * fs/aio.c does and then compare calculated values
+	 * with ones, stored in dump. They must be the same. */
+
+	nr_events += 2;
+
+	size = sizeof(struct aio_ring);
+	size += sizeof(struct io_event) * nr_events;
+	nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT;
+
+	if (nr_pages != aimg->cpt_ring_pages)
+		return -EINVAL;
+
+	info->nr_pages = nr_pages;
+
+	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
+
+	if (nr_events != aimg->cpt_nr)
+		return -EINVAL;
+
+	info->nr = 0;
+	info->ring_pages = info->internal_pages;
+	if (nr_pages > AIO_RING_PAGES) {
+		info->ring_pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
+		if (!info->ring_pages)
+			return -ENOMEM;
+		memset(info->ring_pages, 0, sizeof(struct page *) * nr_pages);
+	}
+
+	info->mmap_size = nr_pages * PAGE_SIZE;
+
+	/* This piece of shit is not entirely my fault. Kernel aio.c makes
+	 * something odd mmap()ping some pages and then pinning them.
+	 * I guess it is just some mud remained of failed attempt to show ring
+	 * to user space. The result is odd. :-) Immediately after
+	 * creation of AIO context, kernel shares those pages with user
+	 * and user can read and even write there. But after the first
+	 * fork, pages are marked COW with evident consequences.
+	 * I remember, I did the same mistake in the first version
+	 * of mmapped packet socket, luckily that crap never reached
+	 * mainstream.
+	 *
+	 * So, what are we going to do? I can simulate this odd behaviour
+	 * exactly, but I am not insane yet. For now just take the pages
+	 * from user space. Alternatively, we could keep kernel copy
+	 * in AIO context image, which would be more correct.
+	 *
+	 * What is wrong now? If the pages are COWed, ring is transferred
+	 * incorrectly.
+	 */
+	down_read(&current->mm->mmap_sem);
+	info->mmap_base = aimg->cpt_mmap_base;
+	info->nr_pages = get_user_pages(current, current->mm,
+					info->mmap_base, nr_pages, 
+					1, 0, info->ring_pages, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (unlikely(info->nr_pages != nr_pages)) {
+		int i;
+
+		for (i=0; i<info->nr_pages; i++)
+			put_page(info->ring_pages[i]);
+		if (info->ring_pages && info->ring_pages != info->internal_pages)
+			kfree(info->ring_pages);
+		return -EFAULT;
+	}
+
+	aio_ctx->user_id = info->mmap_base;
+
+	info->nr = nr_events;
+	info->tail = aimg->cpt_tail;
+
+	return 0;
+}
+
+static int do_rst_aio(struct cpt_aio_ctx_image *aimg, loff_t pos, cpt_context_t *ctx)
+{
+	int err;
+	struct kioctx *aio_ctx;
+	extern spinlock_t aio_nr_lock;
+
+	aio_ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL);
+	if (!aio_ctx)
+		return -ENOMEM;
+
+	memset(aio_ctx, 0, sizeof(*aio_ctx));
+	aio_ctx->max_reqs = aimg->cpt_max_reqs;
+
+	if ((err = restore_aio_ring(aio_ctx, aimg)) < 0) {
+		kmem_cache_free(kioctx_cachep, aio_ctx);
+		eprintk_ctx("AIO %Ld restore_aio_ring: %d\n", pos, err);
+		return err;
+	}
+
+	aio_ctx->mm = current->mm;
+	atomic_inc(&aio_ctx->mm->mm_count);
+	atomic_set(&aio_ctx->users, 1);
+	spin_lock_init(&aio_ctx->ctx_lock);
+	spin_lock_init(&aio_ctx->ring_info.ring_lock);
+	init_waitqueue_head(&aio_ctx->wait);
+	INIT_LIST_HEAD(&aio_ctx->active_reqs);
+	INIT_LIST_HEAD(&aio_ctx->run_list);
+	INIT_WORK(&aio_ctx->wq, aio_kick_handler, ctx);
+
+	spin_lock(&aio_nr_lock);
+	aio_nr += aio_ctx->max_reqs;
+	spin_unlock(&aio_nr_lock);
+
+	write_lock(&aio_ctx->mm->ioctx_list_lock);
+	aio_ctx->next = aio_ctx->mm->ioctx_list;
+	aio_ctx->mm->ioctx_list = aio_ctx;
+	write_unlock(&aio_ctx->mm->ioctx_list_lock);
+
+	return 0;
+}
+
+struct anonvma_map
+{
+	struct hlist_node	list;
+	struct anon_vma		*avma;
+	__u64			id;
+};
+
+static int verify_create_anonvma(struct mm_struct *mm,
+				 struct cpt_vma_image *vmai,
+				 cpt_context_t *ctx)
+{
+	struct anon_vma *avma = NULL;
+	struct anon_vma *new_avma;
+	struct vm_area_struct *vma;
+	int h;
+
+	if (!ctx->anonvmas) {
+		if (CPT_ANONVMA_HSIZE*sizeof(struct hlist_head) > PAGE_SIZE)
+			return -EINVAL;
+		if ((ctx->anonvmas = (void*)__get_free_page(GFP_KERNEL)) == NULL)
+			return -ENOMEM;
+		for (h = 0; h < CPT_ANONVMA_HSIZE; h++)
+			INIT_HLIST_HEAD(&ctx->anonvmas[h]);
+	} else {
+		struct anonvma_map *map;
+		struct hlist_node *elem;
+
+		h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
+		hlist_for_each_entry(map, elem, &ctx->anonvmas[h], list) {
+			if (map->id == vmai->cpt_anonvmaid) {
+				avma = map->avma;
+				break;
+			}
+		}
+	}
+
+	down_read(&mm->mmap_sem);
+	if ((vma = find_vma(mm, vmai->cpt_start)) == NULL) {
+		up_read(&mm->mmap_sem);
+		return -ESRCH;
+	}
+	if (vma->vm_start != vmai->cpt_start) {
+		up_read(&mm->mmap_sem);
+		eprintk_ctx("vma start mismatch\n");
+		return -EINVAL;
+	}
+	if (vma->vm_pgoff != vmai->cpt_pgoff) { 
+		dprintk_ctx("vma pgoff mismatch, fixing\n");
+		if (vma->vm_file || (vma->vm_flags&(VM_SHARED|VM_MAYSHARE))) {
+			eprintk_ctx("cannot fixup vma pgoff\n");
+			up_read(&mm->mmap_sem);	
+			return -EINVAL;
+		}
+		vma->vm_pgoff = vmai->cpt_pgoff;
+	}
+
+	if (!vma->anon_vma) {
+		if (avma) {
+			vma->anon_vma = avma;
+			anon_vma_link(vma);
+		} else {
+			int err;
+
+			err = anon_vma_prepare(vma);
+
+			if (err) {
+				up_read(&mm->mmap_sem);
+				return err;
+			}
+		}
+	} else {
+		/* Note, we _can_ arrive to the situation, when two
+		 * different anonvmaid's point to one anon_vma, this happens
+		 * f.e. when mmap() merged new area to previous one and
+		 * they will share one anon_vma even if they did not on
+		 * original host.
+		 *
+		 * IT IS OK. To all that I understand, we may merge all
+		 * the anon_vma's and rmap can scan all the huge list of vmas
+		 * searching for page. It is just "suboptimal".
+		 *
+		 * Real disaster would happen, if vma already got an anon_vma
+		 * with different id. It is very rare case, kernel does the
+		 * best efforts to merge anon_vmas when some attributes are
+		 * different. In this case we will fall to copying memory.
+		 */
+		if (avma && vma->anon_vma != avma) {
+			up_read(&mm->mmap_sem);
+			wprintk_ctx("anon_vma mismatch\n");
+			return 0;
+		}
+	}
+
+	new_avma = vma->anon_vma;
+	up_read(&mm->mmap_sem);
+
+	if (!avma) {
+		struct anonvma_map *map;
+
+		if (!new_avma)
+			return -EINVAL;
+
+		if ((map = kmalloc(sizeof(*map), GFP_KERNEL)) == NULL)
+			return -ENOMEM;
+
+		map->id = vmai->cpt_anonvmaid;
+		map->avma = new_avma;
+		h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
+		hlist_add_head(&map->list, &ctx->anonvmas[h]);
+	}
+	return 0;
+}
+
+static int copy_mm_pages(struct mm_struct *src, unsigned long start,
+			 unsigned long end)
+{
+	int err;
+
+	for (; start < end; start += PAGE_SIZE) {
+		struct page *page;
+		struct page *spage;
+		void *maddr, *srcaddr;
+
+		err = get_user_pages(current, current->mm,
+				     start, 1, 1, 1, &page, NULL);
+		if (err == 0)
+			err = -EFAULT;
+		if (err < 0)
+			return err;
+
+		err = get_user_pages(current, src,
+				     start, 1, 0, 1, &spage, NULL);
+
+		if (err == 0)
+			err = -EFAULT;
+		if (err < 0) {
+			page_cache_release(page);
+			return err;
+		}
+
+		srcaddr = kmap(spage);
+		maddr = kmap(page);
+		memcpy(maddr, srcaddr, PAGE_SIZE);
+		set_page_dirty_lock(page);
+		kunmap(page);
+		kunmap(spage);
+		page_cache_release(page);
+		page_cache_release(spage);
+	}
+	return 0;
+}
+
+static int do_rst_vma(struct cpt_vma_image *vmai, loff_t vmapos, loff_t mmpos, struct cpt_context *ctx)
+{
+	int err = 0;
+	unsigned long addr;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct file *file = NULL;
+	unsigned long prot;
+	int checked = 0;
+
+	prot = make_prot(vmai);
+
+	if (vmai->cpt_file != CPT_NULL) {
+		if (vmai->cpt_type == CPT_VMA_TYPE_0) {
+			file = rst_file(vmai->cpt_file, -1, ctx);
+			if (IS_ERR(file)) {
+				eprintk_ctx("do_rst_vma: rst_file: %Ld\n", vmai->cpt_file);
+				return PTR_ERR(file);
+			}
+		} else if (vmai->cpt_type == CPT_VMA_TYPE_SHM) {
+			file = rst_sysv_shm(vmai->cpt_file, ctx);
+			if (IS_ERR(file))
+				return PTR_ERR(file);
+		}
+	}
+
+	down_write(&mm->mmap_sem);
+	addr = do_mmap_pgoff(file, vmai->cpt_start,
+			     vmai->cpt_end-vmai->cpt_start,
+			     prot, make_flags(vmai),
+			     vmai->cpt_pgoff);
+
+	if (addr != vmai->cpt_start) {
+		up_write(&mm->mmap_sem);
+
+		err = -EINVAL;
+		if (IS_ERR((void*)addr))
+			err = addr;
+		goto out;
+	}
+
+	vma = find_vma(mm, vmai->cpt_start);
+	if (vma == NULL) {
+		up_write(&mm->mmap_sem);
+		eprintk_ctx("cannot find mmapped vma\n");
+		err = -ESRCH;
+		goto out;
+	}
+
+	/* do_mmap_pgoff() can merge new area to previous one (not to the next,
+	 * we mmap in order, the rest of mm is still unmapped). This can happen
+	 * f.e. if flags are to be adjusted later, or if we had different
+	 * anon_vma on two adjacent regions. Split it by brute force. */
+	if (vma->vm_start != vmai->cpt_start) {
+		dprintk_ctx("vma %Ld merged, split\n", vmapos);
+		err = split_vma(mm, vma, (unsigned long)vmai->cpt_start, 0);
+		if (err) {
+			up_write(&mm->mmap_sem);
+			eprintk_ctx("cannot split vma\n");
+			goto out;
+		}
+	}
+	up_write(&mm->mmap_sem);
+
+	if (vmai->cpt_anonvma && vmai->cpt_anonvmaid) {
+		err = verify_create_anonvma(mm, vmai, ctx);
+		if (err) {
+			eprintk_ctx("cannot verify_create_anonvma %Ld\n", vmapos);
+			goto out;
+		}
+	}
+
+	if (vmai->cpt_next > vmai->cpt_hdrlen) {
+		loff_t offset = vmapos + vmai->cpt_hdrlen;
+
+		do {
+			union {
+				struct cpt_page_block pb;
+				struct cpt_remappage_block rpb;
+				struct cpt_copypage_block cpb;
+				struct cpt_lazypage_block lpb;
+			} u;
+			loff_t pos;
+
+			err = rst_get_object(-1, offset, &u, ctx);
+			if (err) {
+				eprintk_ctx("vma fix object: %d\n", err);
+				goto out;
+			}
+			if (u.rpb.cpt_object == CPT_OBJ_REMAPPAGES) {
+				err = sc_remap_file_pages(u.rpb.cpt_start,
+							  u.rpb.cpt_end-u.rpb.cpt_start,
+							  0, u.rpb.cpt_pgoff, 0);
+				if (err < 0) {
+					eprintk_ctx("remap_file_pages: %d (%08x,%u,%u)\n", err,
+					       (__u32)u.rpb.cpt_start, (__u32)(u.rpb.cpt_end-u.rpb.cpt_start), 
+					       (__u32)u.rpb.cpt_pgoff);
+					goto out;
+				}
+				offset += u.rpb.cpt_next;
+				continue;
+			} else if (u.cpb.cpt_object == CPT_OBJ_LAZYPAGES) {
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+				unsigned long addr = u.lpb.cpt_start;
+
+				down_read(&mm->mmap_sem);
+				if ((vma = find_vma(mm, u.lpb.cpt_start)) == NULL) {
+					up_read(&mm->mmap_sem);
+					eprintk_ctx("lost vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+				err = anon_vma_prepare(vma);
+				if (err) {
+					up_read(&mm->mmap_sem);
+					goto out;
+				}
+				while (addr < u.lpb.cpt_end) {
+					err = rst_pagein(vma, u.lpb.cpt_index + (addr-u.lpb.cpt_start)/PAGE_SIZE,
+							 addr, ctx);
+					if (err)
+						break;
+					addr += PAGE_SIZE;
+				}
+				up_read(&mm->mmap_sem);
+#else
+				err = -EINVAL;
+#endif
+				if (err)
+					goto out;
+				offset += u.cpb.cpt_next;
+				continue;
+			} else if (u.cpb.cpt_object == CPT_OBJ_COPYPAGES) {
+				struct vm_area_struct *vma, *vma1;
+				struct mm_struct *src;
+				struct anon_vma *src_anon;
+				cpt_object_t *mobj;
+
+				if (!vmai->cpt_anonvmaid) {
+					err = -EINVAL;
+					eprintk_ctx("CPT_OBJ_COPYPAGES in !anonvma\n");
+					goto out;
+				}
+
+				mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, u.cpb.cpt_source, ctx);
+				if (!mobj) {
+					eprintk_ctx("lost mm_struct to clone pages from\n");
+					err = -ESRCH;
+					goto out;
+				}
+				src = mobj->o_obj;
+
+				down_read(&src->mmap_sem);
+				src_anon = NULL;
+				vma1 = find_vma(src, u.cpb.cpt_start);
+				if (vma1)
+					src_anon = vma1->anon_vma;
+				up_read(&src->mmap_sem);
+
+				if (!vma1) {
+					eprintk_ctx("lost src vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+
+				down_read(&mm->mmap_sem);
+				if ((vma = find_vma(mm, u.cpb.cpt_start)) == NULL) {
+					up_read(&mm->mmap_sem);
+					eprintk_ctx("lost vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+
+				if (!src_anon ||
+				    !vma->anon_vma ||
+				    vma->anon_vma != src_anon ||
+				    vma->vm_start - vma1->vm_start !=
+				    (vma->vm_pgoff - vma1->vm_pgoff) << PAGE_SHIFT) {
+					up_read(&mm->mmap_sem);
+					wprintk_ctx("anon_vma mismatch in vm_area_struct %Ld\n", vmapos);
+					err = copy_mm_pages(mobj->o_obj,
+							    u.cpb.cpt_start,
+							    u.cpb.cpt_end);
+				} else {
+					err = __copy_page_range(vma, vma1,
+								u.cpb.cpt_start,
+								u.cpb.cpt_end-u.cpb.cpt_start);
+					up_read(&mm->mmap_sem);
+				}
+				if (err) {
+					eprintk_ctx("clone_page_range: %d (%08x,%u,%ld)\n", err,
+						(__u32)u.cpb.cpt_start, (__u32)(u.cpb.cpt_end-u.cpb.cpt_start), 
+						(long)u.cpb.cpt_source);
+					goto out;
+				}
+
+				offset += u.cpb.cpt_next;
+				continue;
+			}
+			if (u.pb.cpt_object != CPT_OBJ_PAGES) {
+				eprintk_ctx("unknown vma fix object %d\n", u.pb.cpt_object);
+				err = -EINVAL;
+				goto out;
+			}
+			pos = offset + sizeof(u.pb);
+			if (!(vmai->cpt_flags&VM_ACCOUNT) && !(prot&PROT_WRITE)) {
+				/* I guess this is get_user_pages() messed things,
+				 * this happens f.e. when gdb inserts breakpoints.
+				 */
+				int i;
+				for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/PAGE_SIZE; i++) {
+					struct page *page;
+					void *maddr;
+					err = get_user_pages(current, current->mm,
+							     (unsigned long)u.pb.cpt_start + i*PAGE_SIZE,
+							     1, 1, 1, &page, NULL);
+					if (err == 0)
+						err = -EFAULT;
+					if (err < 0) {
+						eprintk_ctx("get_user_pages: %d\n", err);
+						goto out;
+					}
+					err = 0;
+					maddr = kmap(page);
+					if (u.pb.cpt_content == CPT_CONTENT_VOID) {
+						memset(maddr, 0, PAGE_SIZE);
+					} else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
+						err = ctx->pread(maddr, PAGE_SIZE,
+								 ctx, pos + i*PAGE_SIZE);
+						if (err) {
+							kunmap(page);
+							goto out;
+						}
+					} else {
+						err = -EINVAL;
+						kunmap(page);
+						goto out;
+					}
+					set_page_dirty_lock(page);
+					kunmap(page);
+					page_cache_release(page);
+				}
+			} else {
+				if (!(prot&PROT_WRITE))
+					sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
+				if (u.pb.cpt_content == CPT_CONTENT_VOID) {
+					int i;
+					for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/sizeof(unsigned long); i++) {
+						err = __put_user(0UL, ((unsigned long __user*)(unsigned long)u.pb.cpt_start) + i);
+						if (err) {
+							eprintk_ctx("__put_user 2 %d\n", err);
+							goto out;
+						}
+					}
+				} else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
+					loff_t tpos = pos;
+					err = ctx->file->f_op->read(ctx->file, cpt_ptr_import(u.pb.cpt_start),
+							 u.pb.cpt_end-u.pb.cpt_start,
+							 &tpos);
+					if (err != u.pb.cpt_end-u.pb.cpt_start) {
+						if (err >= 0)
+							err = -EIO;
+						goto out;
+					}
+				} else {
+					err = -EINVAL;
+					goto out;
+				}
+				if (!(prot&PROT_WRITE))
+					sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
+			}
+			err = 0;
+			offset += u.pb.cpt_next;
+		} while (offset < vmapos + vmai->cpt_next);
+	}
+
+check:
+	do {
+		struct vm_area_struct *vma;
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, addr);
+		if (vma) {
+			if ((vma->vm_flags^vmai->cpt_flags)&VM_READHINTMASK) {
+				VM_ClearReadHint(vma);
+				vma->vm_flags |= vmai->cpt_flags&VM_READHINTMASK;
+			}
+			if ((vma->vm_flags^vmai->cpt_flags)&VM_LOCKED) {
+				dprintk_ctx("fixing up VM_LOCKED %Ld\n", vmapos);
+				up_read(&mm->mmap_sem);
+				if (vma->vm_flags&VM_LOCKED)
+					err = sc_munlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
+				else
+					err = sc_mlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
+				if (err)
+					goto out;
+				goto check;
+			}
+			if ((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&~__PAGE_NX)
+				wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
+				       (__u64)vma->vm_page_prot.pgprot, (__u64)vmai->cpt_pgprot);
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
+			if (((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&__PAGE_NX) &&
+			    (ctx->kernel_config_flags&CPT_KERNEL_CONFIG_PAE))
+				wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
+				       (__u64)vma->vm_page_prot.pgprot, (__u64)vmai->cpt_pgprot);
+#endif
+			if (vma->vm_flags != vmai->cpt_flags) {
+				unsigned long x = vma->vm_flags ^ vmai->cpt_flags;
+				if (x & VM_EXEC) {
+					/* Crap. On i386 this is OK.
+					 * It is impossible to make via mmap/mprotect
+					 * exec.c clears VM_EXEC on stack. */
+					vma->vm_flags &= ~VM_EXEC;
+				} else if ((x & VM_ACCOUNT) && !checked) {
+					checked = 1;
+					if (!(prot&PROT_WRITE)) {
+						up_read(&mm->mmap_sem);
+						sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
+						sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
+						goto check;
+					}
+					wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
+					       (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
+				} else {
+					wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
+					       (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
+				}
+			}
+		} else {
+			wprintk_ctx("no VMA for %08lx@%ld\n", addr, (long)vmapos);
+		}
+		up_read(&mm->mmap_sem);
+	} while (0);
+
+out:
+	if (file)
+		fput(file);
+	return err;
+}
+
+static int do_rst_mm(struct cpt_mm_image *vmi, loff_t pos, struct cpt_context *ctx)
+{
+	int err = 0;
+	unsigned int def_flags;
+	struct mm_struct *mm = current->mm;
+
+	down_write(&mm->mmap_sem);
+	do_munmap(mm, 0, TASK_SIZE);
+
+	mm->start_code = vmi->cpt_start_code;
+	mm->end_code = vmi->cpt_end_code;
+	mm->start_data = vmi->cpt_start_data;
+	mm->end_data = vmi->cpt_end_data;
+	mm->start_brk = vmi->cpt_start_brk;
+	mm->brk = vmi->cpt_brk;
+	mm->start_stack = vmi->cpt_start_stack;
+	mm->arg_start = vmi->cpt_start_arg;
+	mm->arg_end = vmi->cpt_end_arg;
+	mm->env_start = vmi->cpt_start_env;
+	mm->env_end = vmi->cpt_end_env;
+	mm->def_flags = 0;
+	def_flags = vmi->cpt_def_flags;
+
+	mm->dumpable = (vmi->cpt_dumpable != 0);
+	mm->vps_dumpable = (vmi->cpt_vps_dumpable != 0);
+
+#if 0 /* def CONFIG_HUGETLB_PAGE*/
+/* NB: ? */
+	int used_hugetlb;
+#endif
+	up_write(&mm->mmap_sem);
+
+	if (vmi->cpt_next > vmi->cpt_hdrlen) {
+		loff_t offset = pos + vmi->cpt_hdrlen;
+		do {
+			union {
+				struct cpt_vma_image vmai;
+				struct cpt_aio_ctx_image aioi;
+				struct cpt_obj_bits bits;
+			} u;
+			err = rst_get_object(-1, offset, &u, ctx);
+			if (err)
+				goto out;
+			if (u.vmai.cpt_object == CPT_OBJ_VMA) {
+				err = do_rst_vma(&u.vmai, offset, pos, ctx);
+				if (err)
+					goto out;
+			} else if (u.bits.cpt_object == CPT_OBJ_BITS &&
+				   u.bits.cpt_content == CPT_CONTENT_MM_CONTEXT) {
+				err = do_rst_ldt(&u.bits, offset, ctx);
+				if (err)
+					goto out;
+			} else if (u.aioi.cpt_object == CPT_OBJ_AIO_CONTEXT) {
+				err = do_rst_aio(&u.aioi, offset, ctx);
+				if (err)
+					goto out;
+			} else {
+				eprintk_ctx("unknown object %u in mm image\n", u.vmai.cpt_object);
+				err = -EINVAL;
+				goto out;
+			}
+			offset += u.vmai.cpt_next;
+		} while (offset < pos + vmi->cpt_next);
+	}
+
+	down_write(&mm->mmap_sem);
+	mm->def_flags = def_flags;
+	up_write(&mm->mmap_sem);
+
+
+out:
+	return err;
+}
+
+extern void exit_mm(struct task_struct * tsk);
+
+int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err = 0;
+	cpt_object_t *mobj;
+	void *tmp = (void*)__get_free_page(GFP_KERNEL);
+	struct cpt_mm_image *vmi = (struct cpt_mm_image *)tmp;
+
+	if (!tmp)
+		return -ENOMEM;
+
+	if (ti->cpt_mm == CPT_NULL) {
+		if (current->mm)
+			exit_mm(current);
+		goto out;
+	}
+
+	mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
+	if (mobj) {
+		if (current->mm != mobj->o_obj) BUG();
+		goto out;
+	}
+
+	if (current->mm == NULL) {
+		struct mm_struct *mm = mm_alloc();
+		if (mm == NULL) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = init_new_context(current, mm);
+		if (err) {
+			mmdrop(mm);
+			goto out;
+		}
+		current->mm = mm;
+	}
+
+	if ((err = rst_get_object(CPT_OBJ_MM, ti->cpt_mm, vmi, ctx)) != 0)
+		goto out;
+	if ((err = do_rst_mm(vmi, ti->cpt_mm, ctx)) != 0) {
+		eprintk_ctx("do_rst_mm %Ld\n", ti->cpt_mm);
+		goto out;
+	}
+	err = -ENOMEM;
+	mobj = cpt_object_add(CPT_OBJ_MM, current->mm, ctx);
+	if (mobj != NULL) {
+		err = 0;
+		cpt_obj_setpos(mobj, ti->cpt_mm, ctx);
+	}
+
+out:
+	if (tmp)
+		free_page((unsigned long)tmp);
+	return err;
+}
+
+/* This is part of mm setup, made in parent context. Mostly, it is the place,
+ * where we graft mm of another process to child.
+ */
+
+int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	task_t *tsk = obj->o_obj;
+	cpt_object_t *mobj;
+
+	/* Task without mm. Just get rid of this. */
+	if (ti->cpt_mm == CPT_NULL) {
+		if (tsk->mm) {
+			mmput(tsk->mm);
+			tsk->mm = NULL;
+		}
+		return 0;
+	}
+
+	mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
+	if (mobj) {
+		struct mm_struct *newmm = mobj->o_obj;
+		/* Good, the MM is already created. */
+		if (newmm == tsk->mm) {
+			/* Already done by clone(). */
+			return 0;
+		}
+		mmput(tsk->mm);
+		atomic_inc(&newmm->mm_users);
+		tsk->mm = newmm;
+		tsk->active_mm = newmm;
+	}
+	return 0;
+}
+
+/* We use CLONE_VM when mm of child is going to be shared with parent.
+ * Otherwise mm is copied.
+ */
+
+__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	if (ti->cpt_mm == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx))
+		return CLONE_VM;
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/cpt/rst_net.c linux-2.6.16.ovz/kernel/cpt/rst_net.c
--- linux-2.6.16/kernel/cpt/rst_net.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_net.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,481 @@
+/*
+ *
+ *  kernel/cpt/rst_net.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+#include <net/addrconf.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+#include "cpt_net.h"
+
+#include "cpt_syscalls.h"
+
+extern struct in_ifaddr *inet_alloc_ifa(void);
+extern int inet_insert_ifa(struct in_ifaddr *ifa);
+
+int rst_restore_ifaddr(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NET_IFADDR];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_ifaddr_image di;
+	struct net_device *dev;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_IFADDR || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int cindex = -1;
+		int err;
+		err = rst_get_object(CPT_OBJ_NET_IFADDR, sec, &di, ctx);
+		if (err)
+			return err;
+		if (di.cpt_index == ctx->lo_index_old)
+			cindex = ctx->lo_index;
+		else if (di.cpt_index == ctx->venet_index_old)
+			cindex = ctx->venet_index;
+		if (cindex <= 0)
+			eprintk_ctx("unknown ifaddr for %d\n", di.cpt_index);
+		rtnl_lock();
+		dev = __dev_get_by_index(cindex);
+		if (dev && di.cpt_family == AF_INET) {
+			struct in_device *in_dev;
+			struct in_ifaddr *ifa;
+			if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
+				in_dev = inetdev_init(dev);
+			ifa = inet_alloc_ifa();
+			if (ifa) {
+				ifa->ifa_local = di.cpt_address[0];
+				ifa->ifa_address = di.cpt_peer[0];
+				ifa->ifa_broadcast = di.cpt_broadcast[0];
+				ifa->ifa_prefixlen = di.cpt_masklen;
+				ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
+				ifa->ifa_flags = di.cpt_flags;
+				ifa->ifa_scope = di.cpt_scope;
+				memcpy(ifa->ifa_label, di.cpt_label, IFNAMSIZ);
+				in_dev_hold(in_dev);
+				ifa->ifa_dev   = in_dev;
+				err = inet_insert_ifa(ifa);
+				if (err && err != -EEXIST) {
+					rtnl_unlock();
+					eprintk_ctx("add ifaddr err %d for %d %s\n", err, di.cpt_index, di.cpt_label);
+					return err;
+				}
+			}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		} else if (dev && di.cpt_family == AF_INET6) {
+			err = inet6_addr_add(dev->ifindex,
+					     (struct in6_addr *)di.cpt_address,
+					     di.cpt_masklen);
+			if (err && err != -EEXIST) {
+				rtnl_unlock();
+				eprintk_ctx("add ifaddr err %d for %d %s\n", err, di.cpt_index, di.cpt_label);
+				return err;
+			}
+#endif
+		} else {
+			rtnl_unlock();
+			eprintk_ctx("unknown ifaddr 2 for %d\n", di.cpt_index);
+			return -EINVAL;
+		}
+		rtnl_unlock();
+		sec += di.cpt_next;
+	}
+	return 0;
+}
+
+static int rewrite_rtmsg(struct nlmsghdr *nlh, struct cpt_context *ctx)
+{
+	int min_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	int idx = -1;
+	__u32 prefix0 = 0;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *rta = (void*)nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(rta, attrlen)) {
+			if (rta->rta_type == RTA_OIF) {
+				idx = *(int*)RTA_DATA(rta);
+				if (idx == ctx->lo_index_old)
+					idx = ctx->lo_index;
+				else if (idx == ctx->venet_index_old)
+					idx = ctx->venet_index;
+				else {
+					eprintk_ctx("unknown iface %d\n", idx);
+					return -ENODEV;
+				}
+				*(int*)RTA_DATA(rta) = idx;
+			} else if (rta->rta_type == RTA_DST) {
+				prefix0 = *(__u32*)RTA_DATA(rta);
+			}
+			rta = RTA_NEXT(rta, attrlen);
+		}
+	}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	if (rtm->rtm_family == AF_INET6) {
+		if (rtm->rtm_type == RTN_LOCAL)
+			return 2;
+		if (rtm->rtm_flags & RTM_F_CLONED)
+			return 2;
+		if (rtm->rtm_protocol == RTPROT_UNSPEC ||
+		    rtm->rtm_protocol == RTPROT_RA ||
+		    rtm->rtm_protocol == RTPROT_REDIRECT ||
+		    rtm->rtm_protocol == RTPROT_KERNEL)
+			return 2;
+		if (rtm->rtm_protocol == RTPROT_BOOT &&
+		    ((rtm->rtm_dst_len == 8 && prefix0 == htonl(0xFF000000)) ||
+		     (rtm->rtm_dst_len == 64 && prefix0 == htonl(0xFE800000))))
+			return 2;
+	}
+#endif
+	return rtm->rtm_protocol == RTPROT_KERNEL;
+}
+
+int rst_restore_route(struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct msghdr msg;
+	struct iovec iov;
+	struct sockaddr_nl nladdr;
+	mm_segment_t oldfs;
+	loff_t sec = ctx->sections[CPT_SECT_NET_ROUTE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr v;
+	char *pg;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_ROUTE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	if (h.cpt_hdrlen >= h.cpt_next)
+		return 0;
+
+	sec += h.cpt_hdrlen;
+	err = rst_get_object(CPT_OBJ_NET_ROUTE, sec, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
+	if (err)
+		return err;
+
+	pg = (char*)__get_free_page(GFP_KERNEL);
+	if (pg == NULL) {
+		err = -ENOMEM;
+		goto out_sock;
+	}
+
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family = AF_NETLINK;
+
+	endsec = sec + v.cpt_next;
+	sec += v.cpt_hdrlen;
+
+	while (sec < endsec) {
+		struct nlmsghdr *n;
+		struct nlmsghdr nh;
+		int kernel_flag;
+
+		err = ctx->pread(&nh, sizeof(nh), ctx, sec);
+		if (err)
+			goto out_sock_pg;
+		if (nh.nlmsg_len > PAGE_SIZE) {
+			err = -EINVAL;
+			goto out_sock_pg;
+		}
+		err = ctx->pread(pg, nh.nlmsg_len, ctx, sec);
+		if (err)
+			goto out_sock_pg;
+
+		n = (struct nlmsghdr*)pg;
+		n->nlmsg_flags = NLM_F_REQUEST|NLM_F_APPEND|NLM_F_CREATE;
+
+		err = rewrite_rtmsg(n, ctx);
+		if (err < 0)
+			goto out_sock_pg;
+		kernel_flag = err;
+
+		if (kernel_flag == 2)
+			goto do_next;
+
+		iov.iov_base=n;
+		iov.iov_len=nh.nlmsg_len;
+		msg.msg_name=&nladdr;
+		msg.msg_namelen=sizeof(nladdr);
+		msg.msg_iov=&iov;
+		msg.msg_iovlen=1;
+		msg.msg_control=NULL;
+		msg.msg_controllen=0;
+		msg.msg_flags=MSG_DONTWAIT;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_sendmsg(sock, &msg, nh.nlmsg_len);
+		set_fs(oldfs);
+
+		if (err < 0)
+			goto out_sock_pg;
+		err = 0;
+
+		iov.iov_base=pg;
+		iov.iov_len=PAGE_SIZE;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
+		set_fs(oldfs);
+		if (err != -EAGAIN) {
+			if (err == NLMSG_LENGTH(sizeof(struct nlmsgerr)) &&
+			    n->nlmsg_type == NLMSG_ERROR) {
+				struct nlmsgerr *e = NLMSG_DATA(n);
+				if (e->error != -EEXIST || !kernel_flag)
+					eprintk_ctx("NLMERR: %d\n", e->error);
+			} else {
+				eprintk_ctx("Res: %d %d\n", err, n->nlmsg_type);
+			}
+		}
+do_next:
+		err = 0;
+		sec += NLMSG_ALIGN(nh.nlmsg_len);
+	}
+
+out_sock_pg:
+	free_page((unsigned long)pg);
+out_sock:
+	sock_release(sock);
+	return err;
+}
+
+int rst_resume_network(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	env->disable_net = 0;
+	put_ve(env);
+	return 0;
+}
+
+int rst_restore_netdev(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NET_DEVICE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_netdev_image di;
+	struct net_device *dev;
+
+	get_exec_env()->disable_net = 1;
+
+	dev = __dev_get_by_name("lo");
+	if (!dev) {
+		eprintk_ctx("cannot find loopback netdevice\n");
+		return -EINVAL;
+	}
+	ctx->lo_index = dev->ifindex;
+	ctx->lo_index_old = -1;
+	dev = __dev_get_by_name("venet0");
+	if (!dev) {
+		eprintk_ctx("cannot find venet0 netdevice\n");
+		return -EINVAL;
+	}
+	ctx->venet_index = dev->ifindex;
+	ctx->venet_index_old = -1;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_DEVICE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int err;
+		err = rst_get_object(CPT_OBJ_NET_DEVICE, sec, &di, ctx);
+		if (err)
+			return err;
+		if (strcmp(di.cpt_name, "lo") == 0) {
+			ctx->lo_index_old = di.cpt_index;
+		} else if (strcmp(di.cpt_name, "venet0") == 0) {
+			ctx->venet_index_old = di.cpt_index;
+		} else {
+			eprintk_ctx("unknown interface %s\n", di.cpt_name);
+		}
+		dev = __dev_get_by_name(di.cpt_name);
+		if (dev) {
+			if (di.cpt_flags^dev->flags) {
+				rtnl_lock();
+				err = dev_change_flags(dev, di.cpt_flags);
+				rtnl_unlock();
+				if (err)
+					eprintk_ctx("dev_change_flags err: %d\n", err);
+			}
+		} else {
+			eprintk_ctx("unknown interface 2 %s\n", di.cpt_name);
+		}
+		sec += di.cpt_next;
+	}
+	return 0;
+}
+
+static int dumpfn(void *arg)
+{
+	int i;
+	int *pfd = arg;
+	char *argv[] = { "iptables-restore", "-c", NULL };
+
+	if (pfd[0] != 0)
+		sc_dup2(pfd[0], 0);
+
+	for (i=1; i<current->files->fdt->max_fds; i++)
+		sc_close(i);
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/sbin/iptables-restore", argv, NULL);
+	eprintk("failed to exec /sbin/iptables-restore: %d\n", i);
+	return -1;
+}
+
+static int rst_restore_iptables(struct cpt_context * ctx)
+{
+	int err;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	int n;
+	struct cpt_section_hdr h;
+	loff_t sec = ctx->sections[CPT_SECT_NET_IPTABLES];
+	loff_t end;
+	int pid;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_IPTABLES || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	if (h.cpt_hdrlen == h.cpt_next)
+		return 0;
+	if (h.cpt_hdrlen > h.cpt_next)
+		return -EINVAL;
+	sec += h.cpt_hdrlen;
+	err = rst_get_object(CPT_OBJ_NAME, sec, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	pid = err = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
+	if (err < 0)
+		goto out;
+	f = fget(pfd[1]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	ctx->file->f_pos = sec + v.cpt_hdrlen;
+	end = sec + v.cpt_next;
+	do {
+		char *p;
+		char buf[16];
+		mm_segment_t oldfs;
+
+		n = end - ctx->file->f_pos;
+		if (n > sizeof(buf))
+			n = sizeof(buf);
+
+		if (ctx->read(buf, n, ctx))
+			break;
+		if ((p = memchr(buf, 0, n)) != NULL)
+			n = p - buf;
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		f->f_op->write(f, buf, n, &f->f_pos);
+		set_fs(oldfs);
+	} while (ctx->file->f_pos < end);
+
+	fput(f);
+
+	clear_tsk_thread_flag(current,TIF_SIGPENDING);
+
+	if ((err = sc_waitx(pid, 0)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+
+	return 0;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	return err;
+}
+
+int rst_restore_net(struct cpt_context *ctx)
+{
+	int err;
+
+	err = rst_restore_netdev(ctx);
+	if (!err)
+		err = rst_restore_ifaddr(ctx);
+	if (!err)
+		err = rst_restore_route(ctx);
+	if (!err)
+		err = rst_restore_iptables(ctx);
+	if (!err)
+		err = rst_restore_ip_conntrack(ctx);
+	return err;
+}
diff -uprN linux-2.6.16/kernel/cpt/rst_proc.c linux-2.6.16.ovz/kernel/cpt/rst_proc.c
--- linux-2.6.16/kernel/cpt/rst_proc.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_proc.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,604 @@
+/*
+ *
+ *  kernel/cpt/rst_proc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_ioctl.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+
+MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
+MODULE_LICENSE("GPL");
+
+/* List of contexts and lock protecting the list */
+static struct list_head cpt_context_list;
+static spinlock_t cpt_context_lock;
+
+static int proc_read(char *buffer, char **start, off_t offset,
+		     int length, int *eof, void *data)
+{
+	off_t pos = 0;
+	off_t begin = 0;
+	int len = 0;
+	cpt_context_t *ctx;
+
+	len += sprintf(buffer, "Ctx      Id       VE       State\n");
+
+	spin_lock(&cpt_context_lock);
+
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		len += sprintf(buffer+len,"%p %08x %-8u %d",
+			       ctx,
+			       ctx->contextid,
+			       ctx->ve_id,
+			       ctx->ctx_state
+			       );
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+		len += pagein_info_printf(buffer+len, ctx);
+#endif
+
+		buffer[len++] = '\n';
+
+		pos = begin+len;
+		if (pos < offset) {
+			len = 0;
+			begin = pos;
+		}
+		if (pos > offset+length)
+			goto done;
+	}
+	*eof = 1;
+
+done:
+	spin_unlock(&cpt_context_lock);
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+	if(len > length)
+		len = length;
+	if(len < 0)
+		len = 0;
+	return len;
+}
+
+void rst_context_release(cpt_context_t *ctx)
+{
+	list_del(&ctx->ctx_list);
+	spin_unlock(&cpt_context_lock);
+
+	if (ctx->ctx_state > 0)
+		rst_resume(ctx);
+	ctx->ctx_state = CPT_CTX_ERROR;
+
+	rst_close_dumpfile(ctx);
+
+	if (ctx->anonvmas) {
+		int h;
+		for (h = 0; h < CPT_ANONVMA_HSIZE; h++) {
+			while (!hlist_empty(&ctx->anonvmas[h])) {
+				struct hlist_node *elem = ctx->anonvmas[h].first;
+				hlist_del(elem);
+				kfree(elem);
+			}
+		}
+		free_page((unsigned long)ctx->anonvmas);
+	}
+	cpt_flush_error(ctx);
+	if (ctx->errorfile) {
+		fput(ctx->errorfile);
+		ctx->errorfile = NULL;
+	}
+	if (ctx->error_msg) {
+		free_page((unsigned long)ctx->error_msg);
+		ctx->error_msg = NULL;
+	}
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pagein_file_out)
+		fput(ctx->pagein_file_out);
+	if (ctx->pagein_file_in)
+		fput(ctx->pagein_file_in);
+	if (ctx->pgin_task)
+		put_task_struct(ctx->pgin_task);
+#endif
+	if (ctx->filejob_queue)
+		rst_flush_filejobs(ctx);
+	if (ctx->objcount)
+		eprintk_ctx("%d objects leaked\n", ctx->objcount);
+	kfree(ctx);
+
+	spin_lock(&cpt_context_lock);
+}
+
+static void __cpt_context_put(cpt_context_t *ctx)
+{
+	if (!--ctx->refcount)
+		rst_context_release(ctx);
+}
+
+static void cpt_context_put(cpt_context_t *ctx)
+{
+	spin_lock(&cpt_context_lock);
+	__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+}
+
+cpt_context_t * rst_context_open(void)
+{
+	cpt_context_t *ctx;
+
+	if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
+		rst_context_init(ctx);
+		spin_lock(&cpt_context_lock);
+		list_add_tail(&ctx->ctx_list, &cpt_context_list);
+		spin_unlock(&cpt_context_lock);
+		ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
+		if (ctx->error_msg != NULL)
+			ctx->error_msg[0] = 0;
+	}
+	return ctx;
+}
+
+void rst_report_error(int err, cpt_context_t *ctx)
+{
+	if (ctx->statusfile) {
+		mm_segment_t oldfs;
+		int status = 7 /* VZ_ENVCREATE_ERROR */;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		if (ctx->statusfile->f_op && ctx->statusfile->f_op->write)
+			ctx->statusfile->f_op->write(ctx->statusfile, (char*)&status, sizeof(status), &ctx->statusfile->f_pos);
+		set_fs(oldfs);
+		fput(ctx->statusfile);
+		ctx->statusfile = NULL;
+	}
+}
+
+
+static cpt_context_t * cpt_context_lookup(unsigned int ctxid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->contextid == ctxid) {
+			ctx->refcount++;
+			spin_unlock(&cpt_context_lock);
+			return ctx;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return NULL;
+}
+
+static int rst_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	cpt_context_t *ctx;
+	struct file *dfile = NULL;
+
+	unlock_kernel();
+
+	if (cmd == CPT_TEST_CAPS) {
+		err = test_cpu_caps();
+		goto out_lock;
+	}
+
+	if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
+		cpt_context_t *old_ctx;
+
+		ctx = NULL;
+		if (cmd == CPT_JOIN_CONTEXT) {
+			err = -ENOENT;
+			ctx = cpt_context_lookup(arg);
+			if (!ctx)
+				goto out_lock;
+		}
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		file->private_data = ctx;
+
+		if (old_ctx) {
+			if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
+				old_ctx->sticky = 0;
+				old_ctx->refcount--;
+			}
+			__cpt_context_put(old_ctx);
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_lock;
+	}
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	if (ctx)
+		ctx->refcount++;
+	spin_unlock(&cpt_context_lock);
+
+	if (!ctx) {
+		cpt_context_t *old_ctx;
+
+		err = -ENOMEM;
+		ctx = rst_context_open();
+		if (!ctx)
+			goto out_lock;
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		if (!old_ctx) {
+			ctx->refcount++;
+			file->private_data = ctx;
+		} else {
+			old_ctx->refcount++;
+		}
+		if (old_ctx) {
+			__cpt_context_put(ctx);
+			ctx = old_ctx;
+		}
+		spin_unlock(&cpt_context_lock);
+	}
+
+	if (cmd == CPT_GET_CONTEXT) {
+		unsigned int contextid = (unsigned int)arg;
+
+		err = -EINVAL;
+		if (ctx->contextid && ctx->contextid != contextid)
+			goto out_nosem;
+		if (!ctx->contextid) {
+			cpt_context_t *c1 = cpt_context_lookup(contextid);
+			if (c1) {
+				cpt_context_put(c1);
+				err = -EEXIST;
+				goto out_nosem;
+			}
+			ctx->contextid = contextid;
+		}
+		spin_lock(&cpt_context_lock);
+		if (!ctx->sticky) {
+			ctx->sticky = 1;
+			ctx->refcount++;
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_nosem;
+	}
+
+	down(&ctx->main_sem);
+
+	err = -EBUSY;
+	if (ctx->ctx_state < 0)
+		goto out;
+
+	err = 0;
+	switch (cmd) {
+	case CPT_SET_DUMPFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (IS_ERR(dfile)) {
+				err = PTR_ERR(dfile);
+				break;
+			}
+			if (dfile->f_op == NULL ||
+			    dfile->f_op->read == NULL) {
+				fput(dfile);
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->file)
+			fput(ctx->file);
+		ctx->file = dfile;
+		break;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	case CPT_SET_PAGEINFDIN:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (IS_ERR(dfile)) {
+				err = PTR_ERR(dfile);
+				break;
+			}
+		}
+		if (ctx->pagein_file_in)
+			fput(ctx->pagein_file_in);
+		ctx->pagein_file_in = dfile;
+		break;
+	case CPT_SET_PAGEINFDOUT:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (IS_ERR(dfile)) {
+				err = PTR_ERR(dfile);
+				break;
+			}
+		}
+		if (ctx->pagein_file_out)
+			fput(ctx->pagein_file_out);
+		ctx->pagein_file_out = dfile;
+		break;
+	case CPT_PAGEIND:
+		err = rst_pageind(ctx);
+		break;
+#endif
+	case CPT_SET_LOCKFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (IS_ERR(dfile)) {
+				err = PTR_ERR(dfile);
+				break;
+			}
+		}
+		if (ctx->lockfile)
+			fput(ctx->lockfile);
+		ctx->lockfile = dfile;
+		break;
+	case CPT_SET_STATUSFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (IS_ERR(dfile)) {
+				err = PTR_ERR(dfile);
+				break;
+			}
+		}
+		if (ctx->statusfile)
+			fput(ctx->statusfile);
+		ctx->statusfile = dfile;
+		break;
+	case CPT_SET_ERRORFD:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (IS_ERR(dfile)) {
+				err = PTR_ERR(dfile);
+				break;
+			}
+		}
+		if (ctx->errorfile)
+			fput(ctx->errorfile);
+		ctx->errorfile = dfile;
+		break;
+	case CPT_SET_VEID:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->ve_id = arg;
+		break;
+	case CPT_UNDUMP:
+		if (ctx->ctx_state > 0) {
+			err = -ENOENT;
+			break;
+		}
+		ctx->ctx_state = CPT_CTX_UNDUMPING;
+		err = vps_rst_undump(ctx);
+		if (err) {
+			rst_report_error(err, ctx);
+			if (rst_kill(ctx) == 0)
+				ctx->ctx_state = CPT_CTX_IDLE;
+		} else {
+			ctx->ctx_state = CPT_CTX_UNDUMPED;
+		}
+		break;
+	case CPT_RESUME:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		err = rst_resume(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_KILL:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		err = rst_kill(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+out:
+	cpt_flush_error(ctx);
+	up(&ctx->main_sem);
+out_nosem:
+	cpt_context_put(ctx);
+out_lock:
+	lock_kernel();
+	return err;
+}
+
+static int rst_open(struct inode * inode, struct file * file)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int rst_release(struct inode * inode, struct file * file)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	file->private_data = NULL;
+	if (ctx)
+		__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+
+
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+static struct file_operations rst_fops =
+{
+	.owner		= THIS_MODULE,
+	.ioctl		= rst_ioctl,
+	.open		= rst_open,
+	.release	= rst_release,
+};
+
+
+static struct proc_dir_entry *proc_ent;
+extern void *schedule_tail_p;
+extern void schedule_tail_hook(void);
+
+static struct ctl_table_header *ctl_header;
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 9476,
+		.procname	= "rst",
+		.data		= &debug_level,
+		.maxlen		= sizeof(debug_level),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+#ifdef CONFIG_X86_64
+
+static void *vzentry_forkret_get(void)
+{
+	unsigned char *p;
+
+	p = (unsigned char *)ret_from_fork;
+	return (void *)(*(u32 *)(p + 1) + p + 5);
+}
+
+static void vzentry_forkret_set(void *data)
+{
+	unsigned char *p;
+	long offset;
+
+	p = (unsigned char *)ret_from_fork;
+	offset = (unsigned long)data - (unsigned long)(p + 5);
+	if ((long)(s32)offset != offset) {
+		printk("vzentry_forkret_set: too long hook offset\n");
+		BUG();
+	}
+	*(u32 *)(p + 1) = offset;
+}
+#endif
+
+static int __init init_rst(void)
+{
+	int err;
+
+	err = -ENOMEM;
+	ctl_header = register_sysctl_table(root_table, 0);
+	if (!ctl_header)
+		goto err_mon;
+
+	spin_lock_init(&cpt_context_lock);
+	INIT_LIST_HEAD(&cpt_context_list);
+
+	err = -EINVAL;
+	proc_ent = create_proc_entry("rst", 0600, NULL);
+	if (!proc_ent)
+		goto err_out;
+
+	rst_fops.read = proc_ent->proc_fops->read;
+	rst_fops.write = proc_ent->proc_fops->write;
+	rst_fops.llseek = proc_ent->proc_fops->llseek;
+	proc_ent->proc_fops = &rst_fops;
+
+	proc_ent->read_proc = proc_read;
+	proc_ent->data = NULL;
+	proc_ent->owner = THIS_MODULE;
+#ifdef CONFIG_X86_64
+	schedule_tail_p = vzentry_forkret_get();
+	vzentry_forkret_set(&schedule_tail_hook);
+#endif
+	return 0;
+
+err_out:
+	unregister_sysctl_table(ctl_header);
+err_mon:
+	return err;
+}
+module_init(init_rst);
+
+static void __exit exit_rst(void)
+{
+#ifdef CONFIG_X86_64
+	/* This is wrong, of course. But still the best what we can do. */
+	vzentry_forkret_set(schedule_tail_p);
+#endif
+
+	remove_proc_entry("rst", NULL);
+	unregister_sysctl_table(ctl_header);
+
+	spin_lock(&cpt_context_lock);
+	while (!list_empty(&cpt_context_list)) {
+		cpt_context_t *ctx;
+		ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
+
+		if (!ctx->sticky)
+			ctx->refcount++;
+		ctx->sticky = 0;
+
+		BUG_ON(ctx->refcount != 1);
+
+		__cpt_context_put(ctx);
+	}
+	spin_unlock(&cpt_context_lock);
+}
+module_exit(exit_rst);
diff -uprN linux-2.6.16/kernel/cpt/rst_process.c linux-2.6.16.ovz/kernel/cpt/rst_process.c
--- linux-2.6.16/kernel/cpt/rst_process.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_process.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,1257 @@
+/*
+ *
+ *  kernel/cpt/rst_process.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/virtinfo.h>
+#include <linux/kmem_cache.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/ptrace.h>
+#include <linux/tty.h>
+#include <asm/desc.h>
+#include <asm/unistd.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_misc.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_ubc.h"
+#include "cpt_process.h"
+#include "cpt_kernel.h"
+
+#ifdef CONFIG_X86_64
+
+#define _TIF_RESUME (1<<22)
+
+#define SYSCALL_NR(regs) ((regs)->orig_rax)
+#define SYSCALL_RETVAL(regs) ((regs)->rax)
+#define SYSCALL_PC(regs) ((regs)->rip)
+
+#define ESP(tsk) (tsk)->thread.rsp
+
+#define __NR32_restart_syscall	0
+#define __NR32_rt_sigtimedwait	177
+#define __NR32_pause		29
+#define __NR32_futex		240
+
+#define syscall_is(tsk,regs,name) ((!((tsk)->thread_info->flags&_TIF_IA32) && \
+				    SYSCALL_NR(regs) == __NR_##name) || \
+				   (((tsk)->thread_info->flags&_TIF_IA32) && \
+				    SYSCALL_NR(regs) == __NR32_##name))
+#else
+
+#define SYSCALL_NR(regs) ((regs)->orig_eax)
+#define SYSCALL_RETVAL(regs) ((regs)->eax)
+#define SYSCALL_PC(regs) ((regs)->eip)
+
+#define ESP(tsk) (tsk)->thread.esp
+
+#define syscall_is(tsk,regs,name) (SYSCALL_NR(regs) == __NR_##name)
+
+#undef task_pt_regs
+#define task_pt_regs(t) ((struct pt_regs *)((t)->thread.esp0) - 1)
+
+#endif
+
+static void decode_siginfo(siginfo_t *info, struct cpt_siginfo_image *si)
+{
+	memset(info, 0, sizeof(*info));
+	switch(si->cpt_code & __SI_MASK) {
+	case __SI_TIMER:
+		info->si_tid = si->cpt_pid;
+		info->si_overrun = si->cpt_uid;
+		info->_sifields._timer._sigval.sival_ptr = cpt_ptr_import(si->cpt_sigval);
+		info->si_sys_private = si->cpt_utime;
+		break;
+	case __SI_POLL:
+		info->si_band = si->cpt_pid;
+		info->si_fd = si->cpt_uid;
+		break;
+	case __SI_FAULT:
+		info->si_addr = cpt_ptr_import(si->cpt_sigval);
+#ifdef __ARCH_SI_TRAPNO
+		info->si_trapno = si->cpt_pid;
+#endif
+		break;
+	case __SI_CHLD:
+		info->si_pid = si->cpt_pid;
+		info->si_uid = si->cpt_uid;
+		info->si_status = si->cpt_sigval;
+		info->si_stime = si->cpt_stime;
+		info->si_utime = si->cpt_utime;
+		break;
+	case __SI_KILL:
+	case __SI_RT:
+	case __SI_MESGQ:
+	default:
+		info->si_pid = si->cpt_pid;
+		info->si_uid = si->cpt_uid;
+		info->si_ptr = cpt_ptr_import(si->cpt_sigval);
+		break;
+	}
+	info->si_signo = si->cpt_signo;
+	info->si_errno = si->cpt_errno;
+	info->si_code = si->cpt_code;
+}
+
+static int restore_sigqueue(task_t *tsk,
+			    struct sigpending *queue, unsigned long start,
+			    unsigned long end)
+{
+	while (start < end) {
+		struct cpt_siginfo_image *si = (struct cpt_siginfo_image *)start;
+		if (si->cpt_object == CPT_OBJ_SIGINFO) {
+			struct sigqueue *q = NULL;
+			struct user_struct *up;
+			up = alloc_uid(si->cpt_user);
+			if (!up)
+				return -ENOMEM;
+			q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
+			if (!q) {
+				free_uid(up);
+				return -ENOMEM;
+			}
+			if (ub_siginfo_charge(q, get_exec_ub())) {
+				kmem_cache_free(sigqueue_cachep, q);
+				free_uid(up);
+				return -ENOMEM;
+			}
+
+			INIT_LIST_HEAD(&q->list);
+			/* Preallocated elements (posix timers) are not
+			 * supported yet. It is safe to replace them with
+			 * a private one. */
+			q->flags = 0;
+			q->user = up;
+			atomic_inc(&q->user->sigpending);
+
+			decode_siginfo(&q->info, si);
+			list_add_tail(&q->list, &queue->list);
+		}
+		start += si->cpt_next;
+	}
+	return 0;
+}
+
+int rst_process_linkage(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+
+		if (tsk == NULL) {
+			eprintk_ctx("task %u(%s) is missing\n", ti->cpt_pid, ti->cpt_comm);
+			return -EINVAL;
+		}
+
+		if (virt_pgid(tsk) != ti->cpt_pgrp) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_pgrp)) < 0) {
+				eprintk_ctx("illegal PGRP " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			write_lock_irq(&tasklist_lock);
+			detach_pid(tsk, PIDTYPE_PGID);
+			tsk->signal->pgrp = pid;
+			set_virt_pgid(tsk, ti->cpt_pgrp);
+			if (thread_group_leader(tsk))
+				attach_pid(tsk, PIDTYPE_PGID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+		if (virt_sid(tsk) != ti->cpt_session) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_session)) < 0) {
+				eprintk_ctx("illegal SID " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			write_lock_irq(&tasklist_lock);
+			detach_pid(tsk, PIDTYPE_SID);
+			tsk->signal->session = pid;
+			set_virt_sid(tsk, ti->cpt_session);
+			if (thread_group_leader(tsk))
+				attach_pid(tsk, PIDTYPE_SID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+		if (ti->cpt_old_pgrp > 0 && tsk->signal->tty_old_pgrp == 0) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_old_pgrp)) < 0) {
+				eprintk_ctx("illegal OLD_PGRP " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			tsk->signal->tty_old_pgrp = pid;
+		}
+	}
+
+	return 0;
+}
+
+static int restore_one_signal_struct(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_signal_image *si = cpt_get_buf(ctx);
+
+	current->signal->tty = NULL;
+
+	err = rst_get_object(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, si, ctx);
+	if (err) {
+		cpt_release_buf(ctx);
+		return err;
+	}
+
+	if (virt_pgid(current) != si->cpt_pgrp) {
+		int err;
+		int pid = 0;
+
+		if (si->cpt_pgrp_type == CPT_PGRP_ORPHAN) {
+			pid = alloc_pidmap();
+			if (pid < 0) {
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			if ((err = alloc_vpid(pid, si->cpt_pgrp)) < 0) {
+				free_pidmap(pid);
+				pid = 0;
+				if (err != -EEXIST) {
+					cpt_release_buf(ctx);
+					return err;
+				}
+			}
+		}
+		if (pid ||
+		    (pid = vpid_to_pid(si->cpt_pgrp)) > 0) {
+			write_lock_irq(&tasklist_lock);
+			detach_pid(current, PIDTYPE_PGID);
+			current->signal->pgrp = pid;
+			set_virt_pgid(current, si->cpt_pgrp);
+			if (thread_group_leader(current))
+				attach_pid(current, PIDTYPE_PGID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+	}
+
+	current->signal->tty_old_pgrp = 0;
+	if ((int)si->cpt_old_pgrp > 0) {
+		if (si->cpt_old_pgrp_type == CPT_PGRP_STRAY) {
+			current->signal->tty_old_pgrp = alloc_pidmap();
+			if (current->signal->tty_old_pgrp < 0) {
+				eprintk_ctx("failed to allocate stray tty_old_pgrp\n");
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			free_pidmap(current->signal->tty_old_pgrp);
+		} else {
+			current->signal->tty_old_pgrp = vpid_to_pid(si->cpt_old_pgrp);
+			if (current->signal->tty_old_pgrp < 0) {
+				dprintk_ctx("forward old tty PGID\n");
+				current->signal->tty_old_pgrp = 0;
+			}
+		}
+	}
+
+	if (virt_sid(current) != si->cpt_session) {
+		int err;
+		int pid = 0;
+
+		if (si->cpt_session_type == CPT_PGRP_ORPHAN) {
+			pid = alloc_pidmap();
+			if (pid < 0) {
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			if ((err = alloc_vpid(pid, si->cpt_session)) < 0) {
+				free_pidmap(pid);
+				pid = 0;
+				if (err != -EEXIST) {
+					cpt_release_buf(ctx);
+					return err;
+				}
+			}
+		}
+		if (pid ||
+		    (pid = vpid_to_pid(si->cpt_session)) > 0) {
+			write_lock_irq(&tasklist_lock);
+			detach_pid(current, PIDTYPE_SID);
+			set_virt_sid(current, si->cpt_session);
+			current->signal->session = pid;
+			if (thread_group_leader(current))
+				attach_pid(current, PIDTYPE_SID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+	}
+
+	cpt_sigset_import(&current->signal->shared_pending.signal, si->cpt_sigpending);
+	current->signal->leader = si->cpt_leader;
+	if (si->cpt_ctty != CPT_NULL) {
+		cpt_object_t *obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, si->cpt_ctty, ctx);
+		if (obj) {
+			struct tty_struct *tty = obj->o_obj;
+			if (tty->session == 0 || tty->session == current->signal->session) {
+				tty->session = current->signal->session;
+				current->signal->tty = tty;
+			} else {
+				wprintk_ctx("tty session mismatch\n");
+			}
+		}
+	}
+
+	if (si->cpt_curr_target)
+		current->signal->curr_target = find_task_by_pid_ve(si->cpt_curr_target);
+	current->signal->flags = 0;
+	if (si->cpt_group_exit)
+		current->signal->flags |= SIGNAL_GROUP_EXIT;
+	current->signal->group_exit_code = si->cpt_group_exit_code;
+	if (si->cpt_group_exit_task) {
+		current->signal->group_exit_task = find_task_by_pid_ve(si->cpt_group_exit_task);
+		if (current->signal->group_exit_task == NULL) {
+			eprintk_ctx("oops, group_exit_task=NULL, pid=%u\n", si->cpt_group_exit_task);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	current->signal->notify_count = si->cpt_notify_count;
+	current->signal->group_stop_count = si->cpt_group_stop_count;
+
+	if (si->cpt_next > si->cpt_hdrlen) {
+		char *buf = kmalloc(si->cpt_next - si->cpt_hdrlen, GFP_KERNEL);
+		if (buf == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		err = ctx->pread(buf, si->cpt_next - si->cpt_hdrlen, ctx,
+				 ti->cpt_signal + si->cpt_hdrlen);
+		if (err) {
+			kfree(buf);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		restore_sigqueue(current,
+				 &current->signal->shared_pending, (unsigned long)buf,
+				 (unsigned long)buf + si->cpt_next - si->cpt_hdrlen);
+		kfree(buf);
+	}
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int restore_one_sighand_struct(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_sighand_image si;
+	int i;
+	loff_t pos, endpos;
+	
+	err = rst_get_object(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, &si, ctx);
+	if (err)
+		return err;
+
+	for (i=0; i<_NSIG; i++) {
+		current->sighand->action[i].sa.sa_handler = SIG_DFL;
+		current->sighand->action[i].sa.sa_restorer = 0;
+		current->sighand->action[i].sa.sa_flags = SA_ONESHOT | SA_NOMASK;
+		memset(&current->sighand->action[i].sa.sa_mask, 0, sizeof(sigset_t));
+	}
+
+	pos = ti->cpt_sighand + si.cpt_hdrlen;
+	endpos = ti->cpt_sighand + si.cpt_next;
+	while (pos < endpos) {
+		struct cpt_sighandler_image shi;
+
+		err = rst_get_object(CPT_OBJ_SIGHANDLER, pos, &shi, ctx);
+		if (err)
+			return err;
+		current->sighand->action[shi.cpt_signo].sa.sa_handler = (void*)(unsigned long)shi.cpt_handler;
+		current->sighand->action[shi.cpt_signo].sa.sa_restorer = (void*)(unsigned long)shi.cpt_restorer;
+		current->sighand->action[shi.cpt_signo].sa.sa_flags = shi.cpt_flags;
+		cpt_sigset_import(&current->sighand->action[shi.cpt_signo].sa.sa_mask, shi.cpt_mask);
+		pos += shi.cpt_next;
+	}
+
+	return 0;
+}
+
+
+__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+	if (lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx))
+		flag |= CLONE_THREAD;
+	if (ti->cpt_sighand == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx))
+		flag |= CLONE_SIGHAND;
+	return flag;
+}
+
+int rst_signal_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	if (ti->cpt_signal == CPT_NULL || ti->cpt_sighand == CPT_NULL) {
+		return -EINVAL;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx);
+	if (obj) {
+		struct sighand_struct *sig = current->sighand;
+		if (obj->o_obj != sig) {
+			return -EINVAL;
+		}
+	} else {
+		obj = cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, current->sighand, ctx);
+		if (obj == NULL)
+			return -ENOMEM;
+		cpt_obj_setpos(obj, ti->cpt_sighand, ctx);
+		err = restore_one_sighand_struct(ti, ctx);
+		if (err)
+			return err;
+	}
+
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx);
+	if (obj) {
+		struct signal_struct *sig = current->signal;
+		if (obj->o_obj != sig) {
+			return -EINVAL;
+		}
+		if (current->signal) {
+			set_virt_pgid(current, pid_type_to_vpid(PIDTYPE_PGID, current->signal->pgrp));
+			set_virt_sid(current, pid_type_to_vpid(PIDTYPE_SID, current->signal->session));
+		}
+	} else {
+		obj = cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, current->signal, ctx);
+		if (obj == NULL)
+			return -ENOMEM;
+		cpt_obj_setpos(obj, ti->cpt_signal, ctx);
+		err = restore_one_signal_struct(ti, ctx);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static u32 decode_segment(u32 segid)
+{
+	if (segid == CPT_SEG_ZERO)
+		return 0;
+
+	/* TLS descriptors */
+	if (segid <= CPT_SEG_TLS3)
+		return ((GDT_ENTRY_TLS_MIN + segid-CPT_SEG_TLS1)<<3) + 3;
+
+	/* LDT descriptor, it is just an index to LDT array */
+	if (segid >= CPT_SEG_LDT)
+		return ((segid - CPT_SEG_LDT) << 3) | 7;
+
+	/* Check for one of standard descriptors */
+#ifdef CONFIG_X86_64
+	if (segid == CPT_SEG_USER32_DS)
+		return __USER32_DS;
+	if (segid == CPT_SEG_USER32_CS)
+		return __USER32_CS;
+	if (segid == CPT_SEG_USER64_DS)
+		return __USER_DS;
+	if (segid == CPT_SEG_USER64_CS)
+		return __USER_CS;
+#else
+	if (segid == CPT_SEG_USER32_DS)
+		return __USER_DS;
+	if (segid == CPT_SEG_USER32_CS)
+		return __USER_CS;
+#endif
+	wprintk("Invalid segment reg %d\n", segid);
+	return 0;
+}
+
+unsigned long rct(unsigned long *child_tids)
+{
+	dprintk("rct: " CPT_FID "\n", CPT_TID(current));
+	current->clear_child_tid = (void*)child_tids[0];
+	current->set_child_tid = (void*)child_tids[1];
+	module_put(THIS_MODULE);
+	return (unsigned long)(child_tids+2);
+}
+
+unsigned long rlsi(void)
+{
+	int signr;
+	siginfo_t *info = current->last_siginfo;
+	struct pt_regs *regs = task_pt_regs(current);
+	struct k_sigaction *ka;
+	int ptrace_id;
+
+	dprintk("rlsi: " CPT_FID "\n", CPT_TID(current));
+
+	spin_lock_irq(&current->sighand->siglock);
+	current->last_siginfo = NULL;
+	recalc_sigpending();
+
+	ptrace_id = current->pn_state;
+	clear_pn_state(current);
+
+	switch (ptrace_id) {
+	case PN_STOP_TF:
+	case PN_STOP_TF_RT:
+		/* frame_*signal */
+		dprintk("SIGTRAP %u/%u(%s) %u/%u %u %ld %lu %lu\n",
+		       virt_pid(current), current->pid, current->comm,
+		       info->si_signo, info->si_code,
+		       current->exit_code, SYSCALL_NR(regs),
+		       current->ptrace, current->ptrace_message);
+		goto out;
+	case PN_STOP_ENTRY:
+	case PN_STOP_LEAVE:
+		/* do_syscall_trace */
+		spin_unlock_irq(&current->sighand->siglock);
+		dprintk("ptrace do_syscall_trace: %d %d\n", ptrace_id, current->exit_code);
+		if (current->exit_code) {
+			send_sig(current->exit_code, current, 1);
+			current->exit_code = 0;
+		}
+		if (ptrace_id == PN_STOP_ENTRY && SYSCALL_RETVAL(regs) == -ENOSYS) {
+			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
+			SYSCALL_PC(regs) -= 2;
+		} else if (syscall_is(current, regs, rt_sigtimedwait)) {
+			if (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR) {
+				SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
+				SYSCALL_PC(regs) -= 2;
+			}
+		}
+		goto out_nolock;
+	case PN_STOP_FORK:
+		/* fork */
+		SYSCALL_RETVAL(regs) = current->ptrace_message;
+		dprintk("ptrace fork returns pid %ld\n", SYSCALL_RETVAL(regs));
+		goto out;
+	case PN_STOP_VFORK:
+		/* after vfork */
+		SYSCALL_RETVAL(regs) = current->ptrace_message;
+		dprintk("ptrace after vfork returns pid %ld\n", SYSCALL_RETVAL(regs));
+		goto out;
+	case PN_STOP_SIGNAL:
+		/* normal case : dequeue signal */
+		break;
+	case PN_STOP_EXIT:
+		dprintk("ptrace exit caught\n");
+		current->ptrace &= ~PT_TRACE_EXIT;
+		spin_unlock_irq(&current->sighand->siglock);
+		module_put(THIS_MODULE);
+		complete_and_exit(NULL, current->ptrace_message);
+		BUG();
+	case PN_STOP_EXEC:
+		eprintk("ptrace after exec caught: must not happen\n");
+		BUG();
+	default:
+		eprintk("ptrace with unknown identity %d\n", ptrace_id);
+		BUG();
+	}
+
+	signr = current->exit_code;
+	if (signr == 0) {
+		dprintk("rlsi: canceled signal %d\n", info->si_signo);
+		goto out;
+	}
+	current->exit_code = 0;
+
+	if (signr != info->si_signo) {
+		info->si_signo = signr;
+		info->si_errno = 0;
+		info->si_code = SI_USER;
+		info->si_pid = virt_pid(current->parent);
+		info->si_uid = current->parent->uid;
+	}
+
+	/* If the (new) signal is now blocked, requeue it.  */
+	if (sigismember(&current->blocked, signr)) {
+		dprintk("going to requeue signal %d\n", signr);
+		goto out_resend_sig;
+	}
+
+	ka = &current->sighand->action[signr-1];
+	if (ka->sa.sa_handler == SIG_IGN) {
+		dprintk("going to resend signal %d (ignored)\n", signr);
+		goto out;
+	}
+	if (ka->sa.sa_handler != SIG_DFL) {
+		dprintk("going to resend signal %d (not SIG_DFL)\n", signr);
+		goto out_resend_sig;
+	}
+        if (signr == SIGCONT ||
+	    signr == SIGCHLD ||
+	    signr == SIGWINCH ||
+	    signr == SIGURG ||
+	    current->pid == 1)
+		goto out;
+
+	/* All the rest, which we cannot handle are requeued. */
+	dprintk("going to resend signal %d (sigh)\n", signr);
+out_resend_sig:
+	spin_unlock_irq(&current->sighand->siglock);
+	send_sig_info(signr, info, current);
+	module_put(THIS_MODULE);
+	return (unsigned long)(info+1);
+
+out:
+	spin_unlock_irq(&current->sighand->siglock);
+out_nolock:
+	module_put(THIS_MODULE);
+	return (unsigned long)(info+1);
+}
+
+static void ret_finish_stop(void)
+{
+	/* ...
+	 * do_signal() ->
+	 *   get_signal_to_deliver() ->
+	 *     do_signal_stop() ->
+	 *       finish_stop()
+	 *
+	 * Normally after SIGCONT it will dequeue the next signal. If no signal
+	 * is found, do_signal restarts syscall unconditionally.
+	 * Otherwise signal handler is pushed on user stack.
+	 */
+
+	dprintk("rfs: " CPT_FID "\n", CPT_TID(current));
+
+	clear_stop_state(current);
+	current->exit_code = 0;
+
+	module_put(THIS_MODULE);
+}
+
+static void ret_restart_sys(void)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+
+	/* This hook is supposed to be executed, when we have
+	 * to complete some interrupted syscall.
+	 */
+	dprintk("rrs: " CPT_FID "\n", CPT_TID(current));
+
+	if (syscall_is(current,regs,pause)) {
+		if (SYSCALL_RETVAL(regs) == -ERESTARTNOHAND) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule();
+		}
+	} else if (syscall_is(current,regs,rt_sigtimedwait)) {
+		if (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR) {
+			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
+			SYSCALL_PC(regs) -= 2;
+		}
+	} else if (syscall_is(current,regs,futex)) {
+		if (SYSCALL_RETVAL(regs) == -EINTR) {
+			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
+			SYSCALL_PC(regs) -= 2;
+		}
+	}
+
+	if (!signal_pending(current)) {
+		if (SYSCALL_RETVAL(regs) == -ERESTARTSYS ||
+		    SYSCALL_RETVAL(regs) == -ERESTARTNOINTR ||
+		    SYSCALL_RETVAL(regs) == -ERESTARTNOHAND) {
+			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
+			SYSCALL_PC(regs) -= 2;
+		} else if (SYSCALL_RETVAL(regs) == -ERESTART_RESTARTBLOCK) {
+			SYSCALL_RETVAL(regs) = __NR_restart_syscall;
+#ifdef CONFIG_X86_64
+			if (current->thread_info->flags&_TIF_IA32)
+				SYSCALL_RETVAL(regs) = __NR32_restart_syscall;
+#endif
+			SYSCALL_PC(regs) -= 2;
+		}
+	}
+
+	module_put(THIS_MODULE);
+}
+
+extern void ret_last_siginfo(void);
+extern void ret_child_tid(void);
+extern void ret_from_rst(void);
+extern void pre_ret_from_fork(void);
+
+#ifndef CONFIG_X86_64
+
+/* tsk->thread.eip points to pre_ret_from_fork
+ * Stack layout:
+ * [eip of the last hook]
+ * [args of the last hook]
+ * [eip of previous hook]
+ * [args of previous hook]
+ * ...
+ * [eip of the first hook]
+ * [args of the first hook]
+ * [ret_from_rst]
+ */
+
+static void * add_hook(task_t *tsk, void (*hook)(void), int argsize, int *hooks)
+{
+	ESP(tsk) -= sizeof(unsigned long);
+	*(unsigned long*)ESP(tsk) = tsk->thread.eip;
+	ESP(tsk) -= argsize;
+	tsk->thread.eip = (unsigned long)hook;
+	if (!try_module_get(THIS_MODULE)) BUG();
+	(*hooks)++;
+	return (void*)ESP(tsk);
+}
+
+static int restore_registers(task_t *tsk, struct pt_regs *regs,
+			     struct cpt_task_image *ti, struct cpt_x86_regs *b)
+{
+	if (b->cpt_object != CPT_OBJ_X86_REGS)
+		return -EINVAL;
+
+	tsk->thread.esp = (unsigned long) regs;
+	tsk->thread.esp0 = (unsigned long) (regs+1);
+	tsk->thread.eip = (unsigned long) ret_from_rst;
+
+	tsk->thread.fs = decode_segment(b->cpt_fs);
+	tsk->thread.gs = decode_segment(b->cpt_gs);
+	tsk->thread.debugreg[0] = b->cpt_debugreg[0];
+	tsk->thread.debugreg[1] = b->cpt_debugreg[1];
+	tsk->thread.debugreg[2] = b->cpt_debugreg[2];
+	tsk->thread.debugreg[3] = b->cpt_debugreg[3];
+	tsk->thread.debugreg[4] = b->cpt_debugreg[4];
+	tsk->thread.debugreg[5] = b->cpt_debugreg[5];
+	tsk->thread.debugreg[6] = b->cpt_debugreg[6];
+	tsk->thread.debugreg[7] = b->cpt_debugreg[7];
+
+	memcpy(regs, &b->cpt_ebx, sizeof(struct pt_regs));
+
+	regs->xcs = decode_segment(b->cpt_xcs);
+	regs->xss = decode_segment(b->cpt_xss);
+	regs->xds = decode_segment(b->cpt_xds);
+	regs->xes = decode_segment(b->cpt_xes);
+
+	return 0;
+}
+
+#else
+
+/* Stack layout:
+ *
+ * [eip of the last hook]
+ * [args of the last hook]
+ * ...
+ * [eip of the first hook]
+ * [args of the first hook]
+ * [ret_from_fork+5]
+ */
+
+static void * add_hook(task_t *tsk, void (*hook)(void), int argsize, int *hooks)
+{
+	if (!*hooks) {
+		extern void ret_from_fork2(void);
+		ESP(tsk) -= sizeof(unsigned long);
+		*(unsigned long*)ESP(tsk) = (unsigned long)ret_from_fork2;
+		tsk->thread_info->flags |= _TIF_RESUME;
+	}
+	ESP(tsk) -= argsize + sizeof(unsigned long);
+	*(unsigned long*)ESP(tsk) = (unsigned long)hook;
+	if (!try_module_get(THIS_MODULE)) BUG();
+	(*hooks)++;
+	return (void*)(ESP(tsk) + sizeof(unsigned long));
+}
+
+static void xlate_ptregs_32_to_64(struct pt_regs *d, struct cpt_x86_regs *s)
+{
+	memset(d, 0, sizeof(struct pt_regs));
+	d->rbp = s->cpt_ebp;
+	d->rbx = s->cpt_ebx;
+	d->rax = (s32)s->cpt_eax;
+	d->rcx = s->cpt_ecx;
+	d->rdx = s->cpt_edx;
+	d->rsi = s->cpt_esi;
+	d->rdi = s->cpt_edi;
+	d->orig_rax = (s32)s->cpt_orig_eax;
+	d->rip = s->cpt_eip;
+	d->cs = s->cpt_xcs;
+	d->eflags = s->cpt_eflags;
+	d->rsp = s->cpt_esp;
+	d->ss = s->cpt_xss;
+}
+
+static int restore_registers(task_t *tsk, struct pt_regs *regs,
+			     struct cpt_task_image *ti, struct cpt_obj_bits *hdr)
+{
+	if (hdr->cpt_object == CPT_OBJ_X86_64_REGS) {
+		struct cpt_x86_64_regs *b = (void*)hdr;
+
+		tsk->thread.rsp = (unsigned long) regs;
+		tsk->thread.rsp0 = (unsigned long) (regs+1);
+
+		tsk->thread.fs = b->cpt_fsbase;
+		tsk->thread.gs = b->cpt_gsbase;
+		tsk->thread.fsindex = decode_segment(b->cpt_fsindex);
+		tsk->thread.gsindex = decode_segment(b->cpt_gsindex);
+		tsk->thread.ds = decode_segment(b->cpt_ds);
+		tsk->thread.es = decode_segment(b->cpt_es);
+		tsk->thread.debugreg0 = b->cpt_debugreg[0];
+		tsk->thread.debugreg1 = b->cpt_debugreg[1];
+		tsk->thread.debugreg2 = b->cpt_debugreg[2];
+		tsk->thread.debugreg3 = b->cpt_debugreg[3];
+		tsk->thread.debugreg6 = b->cpt_debugreg[6];
+		tsk->thread.debugreg7 = b->cpt_debugreg[7];
+
+		memcpy(regs, &b->cpt_r15, sizeof(struct pt_regs));
+
+		tsk->thread.userrsp = regs->rsp;
+		regs->cs = decode_segment(b->cpt_cs);
+		regs->ss = decode_segment(b->cpt_ss);
+	} else if (hdr->cpt_object == CPT_OBJ_X86_REGS) {
+		struct cpt_x86_regs *b = (void*)hdr;
+
+		tsk->thread.rsp = (unsigned long) regs;
+		tsk->thread.rsp0 = (unsigned long) (regs+1);
+
+		tsk->thread.fs = 0;
+		tsk->thread.gs = 0;
+		tsk->thread.fsindex = decode_segment(b->cpt_fs);
+		tsk->thread.gsindex = decode_segment(b->cpt_gs);
+		tsk->thread.debugreg0 = b->cpt_debugreg[0];
+		tsk->thread.debugreg1 = b->cpt_debugreg[1];
+		tsk->thread.debugreg2 = b->cpt_debugreg[2];
+		tsk->thread.debugreg3 = b->cpt_debugreg[3];
+		tsk->thread.debugreg6 = b->cpt_debugreg[6];
+		tsk->thread.debugreg7 = b->cpt_debugreg[7];
+
+		xlate_ptregs_32_to_64(regs, b);
+
+		tsk->thread.userrsp = regs->rsp;
+		regs->cs = decode_segment(b->cpt_xcs);
+		regs->ss = decode_segment(b->cpt_xss);
+		tsk->thread.ds = decode_segment(b->cpt_xds);
+		tsk->thread.es = decode_segment(b->cpt_xes);
+	} else {
+		return -EINVAL;
+	}
+	return 0;
+}
+
+#endif
+
+int rst_restore_process(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+		struct pt_regs * regs;
+		struct cpt_object_hdr *b;
+		struct cpt_siginfo_image *lsi = NULL;
+		struct group_info *gids, *ogids;
+		int hooks = 0;
+		int i;
+
+		if (tsk == NULL) {
+			eprintk_ctx("oops, task %d/%s is missing\n", ti->cpt_pid, ti->cpt_comm);
+			return -EFAULT;
+		}
+
+		wait_task_inactive(tsk);
+		regs = task_pt_regs(tsk);
+
+		if (!tsk->exit_state) {
+			tsk->lock_depth = -1;
+#ifdef CONFIG_PREEMPT
+			tsk->thread_info->preempt_count--;
+#endif
+		}
+
+		if (tsk->static_prio != ti->cpt_static_prio)
+			set_user_nice(tsk, PRIO_TO_NICE(ti->cpt_static_prio));
+
+		cpt_sigset_import(&tsk->blocked, ti->cpt_sigblocked);
+		cpt_sigset_import(&tsk->real_blocked, ti->cpt_sigrblocked);
+		cpt_sigset_import(&tsk->saved_sigmask, ti->cpt_sigsuspend_blocked);
+		cpt_sigset_import(&tsk->pending.signal, ti->cpt_sigpending);
+
+		tsk->uid = ti->cpt_uid;
+		tsk->euid = ti->cpt_euid;
+		tsk->suid = ti->cpt_suid;
+		tsk->fsuid = ti->cpt_fsuid;
+		tsk->gid = ti->cpt_gid;
+		tsk->egid = ti->cpt_egid;
+		tsk->sgid = ti->cpt_sgid;
+		tsk->fsgid = ti->cpt_fsgid;
+		memcpy(&tsk->cap_effective, &ti->cpt_ecap, sizeof(tsk->cap_effective));
+		memcpy(&tsk->cap_inheritable, &ti->cpt_icap, sizeof(tsk->cap_inheritable));
+		memcpy(&tsk->cap_permitted, &ti->cpt_pcap, sizeof(tsk->cap_permitted));
+		tsk->keep_capabilities = (ti->cpt_keepcap != 0);
+		tsk->did_exec = (ti->cpt_did_exec != 0);
+		gids = groups_alloc(ti->cpt_ngids);
+		ogids = tsk->group_info;
+		if (gids) {
+			int i;
+			for (i=0; i<32; i++)
+				gids->small_block[i] = ti->cpt_gids[i];
+			tsk->group_info = gids;
+		}
+		if (ogids)
+			put_group_info(ogids);
+		tsk->utime = ti->cpt_utime;
+		tsk->stime = ti->cpt_stime;
+		if (ctx->image_version == 0) {
+			tsk->start_time = _ns_to_timespec(ti->cpt_starttime*TICK_NSEC);
+		} else {
+			cpt_timespec_import(&tsk->start_time, ti->cpt_starttime);
+		}
+		_set_normalized_timespec(&tsk->start_time,
+					tsk->start_time.tv_sec -
+					get_exec_env()->init_entry->start_time.tv_sec,
+					tsk->start_time.tv_nsec -
+					get_exec_env()->init_entry->start_time.tv_nsec);
+
+		tsk->nvcsw = ti->cpt_nvcsw;
+		tsk->nivcsw = ti->cpt_nivcsw;
+		tsk->min_flt = ti->cpt_min_flt;
+		tsk->maj_flt = ti->cpt_maj_flt;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
+		tsk->cutime = ti->cpt_cutime;
+		tsk->cstime = ti->cpt_cstime;
+		tsk->cnvcsw = ti->cpt_cnvcsw;
+		tsk->cnivcsw = ti->cpt_cnivcsw;
+		tsk->cmin_flt = ti->cpt_cmin_flt;
+		tsk->cmaj_flt = ti->cpt_cmaj_flt;
+
+		if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+			__asm__("undefined\n");
+
+		for (i=0; i<RLIM_NLIMITS; i++) {
+			tsk->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
+			tsk->rlim[i].rlim_max = ti->cpt_rlim_max[i];
+		}
+#else
+		if (thread_group_leader(tsk) && tsk->signal) {
+			tsk->signal->utime = ti->cpt_utime;
+			tsk->signal->stime = ti->cpt_stime;
+			tsk->signal->cutime = ti->cpt_cutime;
+			tsk->signal->cstime = ti->cpt_cstime;
+			tsk->signal->nvcsw = ti->cpt_nvcsw;
+			tsk->signal->nivcsw = ti->cpt_nivcsw;
+			tsk->signal->cnvcsw = ti->cpt_cnvcsw;
+			tsk->signal->cnivcsw = ti->cpt_cnivcsw;
+			tsk->signal->min_flt = ti->cpt_min_flt;
+			tsk->signal->maj_flt = ti->cpt_maj_flt;
+			tsk->signal->cmin_flt = ti->cpt_cmin_flt;
+			tsk->signal->cmaj_flt = ti->cpt_cmaj_flt;
+
+			if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+				__asm__("undefined\n");
+
+			for (i=0; i<RLIM_NLIMITS; i++) {
+				tsk->signal->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
+				tsk->signal->rlim[i].rlim_max = ti->cpt_rlim_max[i];
+			}
+		}
+#endif
+
+		for (i=0; i<3; i++) {
+			if (i >= GDT_ENTRY_TLS_ENTRIES) {
+				eprintk_ctx("too many tls descs\n");
+			} else {
+#ifndef CONFIG_X86_64
+				tsk->thread.tls_array[i].a = ti->cpt_tls[i]&0xFFFFFFFF;
+				tsk->thread.tls_array[i].b = ti->cpt_tls[i]>>32;
+#else
+				tsk->thread.tls_array[i] = ti->cpt_tls[i];
+#endif
+			}
+		}
+
+		clear_stopped_child_used_math(tsk);
+
+		b = (void *)(ti+1);
+		while ((void*)b < ((void*)ti) + ti->cpt_next) {
+			/* Siginfo objects are at the end of obj array */
+			if (b->cpt_object == CPT_OBJ_SIGINFO) {
+				struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
+				restore_sigqueue(tsk, &tsk->pending, (unsigned long)b, (unsigned long)ti + ti->cpt_next);
+				set_exec_env(env);
+				break;
+			}
+
+			switch (b->cpt_object) {
+			case CPT_OBJ_BITS:
+				if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE &&
+				    cpu_has_fxsr) {
+					memcpy(&tsk->thread.i387,
+					       (void*)b + b->cpt_hdrlen,
+					       sizeof(struct i387_fxsave_struct));
+					if (ti->cpt_used_math)
+						set_stopped_child_used_math(tsk);
+				}
+#ifdef CONFIG_X86_32
+				else if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE_OLD &&
+					 !cpu_has_fxsr) {		
+					memcpy(&tsk->thread.i387,
+					       (void*)b + b->cpt_hdrlen,
+					       sizeof(struct i387_fsave_struct));
+					if (ti->cpt_used_math)
+						set_stopped_child_used_math(tsk);
+				}
+#endif
+				break;
+			case CPT_OBJ_LASTSIGINFO:
+				lsi = (void*)b;
+				break;
+			case CPT_OBJ_X86_REGS:
+			case CPT_OBJ_X86_64_REGS:
+				if (restore_registers(tsk, regs, ti, (void*)b)) {
+					eprintk_ctx("cannot restore registers: image is corrupted\n");
+					return -EINVAL;
+				}
+				break;
+			case CPT_OBJ_SIGALTSTACK: {
+				struct cpt_sigaltstack_image *sas;
+				sas = (struct cpt_sigaltstack_image *)b;
+				tsk->sas_ss_sp = sas->cpt_stack;
+				tsk->sas_ss_size = sas->cpt_stacksize;
+				break;
+			    }
+			}
+			b = ((void*)b) + b->cpt_next;
+		}
+
+		if (ti->cpt_ppid != ti->cpt_rppid) {
+			task_t *parent;
+			struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
+			write_lock_irq(&tasklist_lock);
+			parent = find_task_by_pid_ve(ti->cpt_ppid);
+			if (parent && parent != tsk->parent) {
+				list_add(&tsk->ptrace_list, &tsk->parent->ptrace_children);
+				REMOVE_LINKS(tsk);
+				tsk->parent = parent;
+				SET_LINKS(tsk);
+			}
+			write_unlock_irq(&tasklist_lock);
+			set_exec_env(env);
+		}
+
+		tsk->ptrace_message = ti->cpt_ptrace_message;
+		tsk->pn_state = ti->cpt_pn_state;
+		tsk->stopped_state = ti->cpt_stopped_state;
+		tsk->thread_info->flags = ti->cpt_thrflags;
+
+		/* The image was created with kernel < 2.6.16, while
+		 * task hanged in sigsuspend -> do_signal.
+		 *
+		 * FIXME! This needs more brain efforts...
+		 */
+		if (ti->cpt_sigsuspend_state) {
+			tsk->thread_info->flags |= _TIF_RESTORE_SIGMASK;
+		}
+
+#ifdef CONFIG_X86_64
+		tsk->thread_info->flags |= _TIF_FORK;
+		if (!ti->cpt_64bit)
+			tsk->thread_info->flags |= _TIF_IA32;
+#endif
+
+#ifndef CONFIG_X86_64
+		do {
+			if (regs->orig_eax == __NR__newselect && regs->edi) {
+				struct timeval tv;
+				if (access_process_vm(tsk, regs->edi, &tv, 
+						sizeof(tv), 0) != sizeof(tv)) {
+					wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm: edi %ld\n",
+						virt_pid(tsk), tsk->pid, tsk->comm,
+					       regs->edi);
+					break;
+				}
+				dprintk_ctx("task %d/%d(%s): Old timeval in newselect: %ld.%ld\n",
+				       virt_pid(tsk), tsk->pid, tsk->comm,
+				       tv.tv_sec, tv.tv_usec);
+				tv.tv_sec -= ctx->delta_time.tv_sec;
+				if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
+					tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
+					tv.tv_sec--;
+				} else {
+					tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
+				}
+				if (tv.tv_sec < 0) {
+					tv.tv_sec = 0;
+					tv.tv_usec = 0;
+				}
+				dprintk_ctx("task %d/%d(%s): New timeval in newselect: %ld.%ld\n",
+					virt_pid(tsk), tsk->pid, tsk->comm,
+				       tv.tv_sec, tv.tv_usec);
+				if (access_process_vm(tsk, regs->edi, &tv, 
+						sizeof(tv), 1) != sizeof(tv)) {
+					wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm write: edi %ld\n",
+						virt_pid(tsk), tsk->pid, tsk->comm, regs->edi);
+				}
+				
+			} else if (regs->orig_eax == __NR_select && regs->edi) {
+				struct {
+					unsigned long n;
+					fd_set __user *inp, *outp, *exp;
+					struct timeval __user *tvp;
+				} a;
+				struct timeval tv;
+				if (access_process_vm(tsk, regs->ebx, &a, 
+						sizeof(a), 0) != sizeof(a)) {
+					wprintk_ctx("task %d: Error 2 in access_process_vm\n", tsk->pid);
+					break;
+				}
+				if (access_process_vm(tsk, (unsigned long)a.tvp,
+						&tv, sizeof(tv), 0) != sizeof(tv)) {
+					wprintk_ctx("task %d: Error 3 in access_process_vm\n", tsk->pid);
+					break;
+				}
+				dprintk_ctx("task %d: Old timeval in select: %ld.%ld\n",
+					tsk->pid, tv.tv_sec, tv.tv_usec);
+				tv.tv_sec -= ctx->delta_time.tv_sec;
+				if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
+					tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
+					tv.tv_sec--;
+				} else {
+					tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
+				}
+				if (tv.tv_sec < 0) {
+					tv.tv_sec = 0;
+					tv.tv_usec = 0;
+				}
+				dprintk_ctx("task %d: New timeval in select: %ld.%ld\n",
+					tsk->pid, tv.tv_sec, tv.tv_usec);
+				if (access_process_vm(tsk, (unsigned long)a.tvp,
+						&tv, sizeof(tv), 1) != sizeof(tv)) {
+					wprintk_ctx("task %d: Error 3 in access_process_vm write\n", tsk->pid);
+				}
+			}
+		} while (0);
+#endif
+
+		if (!tsk->exit_state && (long)SYSCALL_NR(regs) >= 0) {
+			if (SYSCALL_RETVAL(regs) == -ERESTARTSYS ||
+			    SYSCALL_RETVAL(regs) == -ERESTARTNOINTR ||
+			    SYSCALL_RETVAL(regs) == -ERESTARTNOHAND ||
+			    SYSCALL_RETVAL(regs) == -ERESTART_RESTARTBLOCK ||
+			    syscall_is(tsk,regs,pause) ||
+			    (syscall_is(tsk,regs,rt_sigtimedwait) &&
+			     (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR)) ||
+			    (syscall_is(tsk,regs,futex) &&
+			     (SYSCALL_RETVAL(regs) == -EINTR)))
+				add_hook(tsk, ret_restart_sys, 0, &hooks);
+		}
+
+		if (lsi || tsk->pn_state) {
+			/* ... -> ptrace_notify()
+			 * or
+			 * ... -> do_signal() -> get_signal_to_deliver() ->
+			 *   ptrace stop
+			 */
+			tsk->last_siginfo = add_hook(tsk, ret_last_siginfo, sizeof(siginfo_t), &hooks);
+			memset(tsk->last_siginfo, 0, sizeof(siginfo_t));
+			if (lsi)
+				decode_siginfo(tsk->last_siginfo, lsi);
+		}
+
+		tsk->ptrace = ti->cpt_ptrace;
+		tsk->flags = ti->cpt_flags & ~PF_FROZEN;
+		clear_tsk_thread_flag(tsk, TIF_FREEZE);
+		tsk->exit_signal = ti->cpt_exit_signal;
+
+		if (tsk->stopped_state) {
+			dprintk_ctx("finish_stop\n");
+			if (ti->cpt_state != TASK_STOPPED)
+				eprintk_ctx("Hellooo, state is %u\n", (unsigned)ti->cpt_state);
+			add_hook(tsk, ret_finish_stop, 0, &hooks);
+		}
+
+		if (!tsk->exit_state &&
+		    (ti->cpt_set_tid || ti->cpt_clear_tid)) {
+			unsigned long *ptr = add_hook(tsk, ret_child_tid, sizeof(unsigned long)*2, &hooks);
+			ptr[0] = ti->cpt_clear_tid;
+			ptr[1] = ti->cpt_set_tid;
+			dprintk_ctx("settids\n");
+		}
+
+#ifdef CONFIG_X86_64
+		if (!hooks && (long)SYSCALL_NR(regs) < 0) {
+			extern void ret_from_fork2(void);
+			ESP(tsk) -= sizeof(unsigned long);
+			*(unsigned long*)ESP(tsk) = (unsigned long)ret_from_fork2;
+			tsk->thread_info->flags |= _TIF_RESUME;
+		}
+#else
+		tsk->thread.esp -= 4;
+		*(__u32*)tsk->thread.esp = tsk->thread.eip;
+		tsk->thread.eip = (unsigned long)pre_ret_from_fork;
+#endif
+
+		if (ti->cpt_state == TASK_TRACED)
+			tsk->state = TASK_TRACED;
+		else if (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD)) {
+			tsk->signal->it_virt_expires = 0;
+			tsk->signal->it_prof_expires = 0;
+			if (tsk->state != EXIT_DEAD)
+				eprintk_ctx("oops, schedule() did not make us dead\n");
+		}
+
+		if (thread_group_leader(tsk) &&
+		    ti->cpt_it_real_value &&
+		    !(ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+			DEFINE_KTIME(val);
+
+			if (ctx->image_version != 0) {
+				ktime_t delta;
+
+				val = ktime_add_ns(val, ti->cpt_it_real_value);
+				delta = timespec_to_ktime(ctx->delta_time);
+				val = ktime_sub(val, delta);
+				if (val.tv64 <= 0)
+					val.tv64 = NSEC_PER_USEC;
+				dprintk("rst itimer " CPT_FID " +%Ld %Ld %Lu\n", CPT_TID(tsk), val.tv64, delta.tv64, ti->cpt_it_real_value);
+			} else {
+				unsigned long jif = ti->cpt_it_real_value -
+					timespec_to_jiffies(&ctx->delta_time);
+				if ((long)jif <= 0)
+					jif = 1;
+				val = ktime_add_ns(val, (u64)jif*TICK_NSEC);
+			}
+			spin_lock_irq(&tsk->sighand->siglock);
+			if (hrtimer_try_to_cancel(&tsk->signal->real_timer) >= 0) {
+				/* FIXME. Check!!!! */
+				hrtimer_start(&tsk->signal->real_timer, val, HRTIMER_REL);
+			} else {
+				wprintk_ctx("Timer clash. Impossible?\n");
+			}
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			dprintk_ctx("itimer " CPT_FID " +%Lu\n", CPT_TID(tsk), val.tv64);
+		}
+
+		module_put(THIS_MODULE);
+	}
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/cpt/rst_socket.c linux-2.6.16.ovz/kernel/cpt/rst_socket.c
--- linux-2.6.16/kernel/cpt/rst_socket.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_socket.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,876 @@
+/*
+ *
+ *  kernel/cpt/rst_socket.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/scm.h>
+#include <net/af_unix.h>
+
+#include <ub/ub_mem.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
+
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+#include "cpt_syscalls.h"
+
+
+static int setup_sock_common(struct sock *sk, struct cpt_sock_image *si,
+			     loff_t pos, struct cpt_context *ctx)
+{
+	if (sk->sk_socket) {
+		sk->sk_socket->flags = si->cpt_ssflags;
+		sk->sk_socket->state = si->cpt_sstate;
+	}
+	sk->sk_reuse = si->cpt_reuse;
+	sk->sk_shutdown = si->cpt_shutdown;
+	sk->sk_userlocks = si->cpt_userlocks;
+	sk->sk_no_check = si->cpt_no_check;
+	sock_reset_flag(sk, SOCK_DBG);
+	if (si->cpt_debug)
+		sock_set_flag(sk, SOCK_DBG);
+	sock_reset_flag(sk, SOCK_RCVTSTAMP);
+	if (si->cpt_rcvtstamp)
+		sock_set_flag(sk, SOCK_RCVTSTAMP);
+	sock_reset_flag(sk, SOCK_LOCALROUTE);
+	if (si->cpt_localroute)
+		sock_set_flag(sk, SOCK_LOCALROUTE);
+	sk->sk_protocol = si->cpt_protocol;
+	sk->sk_err = si->cpt_err;
+	sk->sk_err_soft = si->cpt_err_soft;
+	sk->sk_priority = si->cpt_priority;
+	sk->sk_rcvlowat = si->cpt_rcvlowat;
+	sk->sk_rcvtimeo = si->cpt_rcvtimeo;
+	if (si->cpt_rcvtimeo == CPT_NULL)
+		sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_sndtimeo = si->cpt_sndtimeo;
+	if (si->cpt_sndtimeo == CPT_NULL)
+		sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_rcvbuf = si->cpt_rcvbuf;
+	sk->sk_sndbuf = si->cpt_sndbuf;
+	sk->sk_bound_dev_if = si->cpt_bound_dev_if;
+	sk->sk_flags = si->cpt_flags;
+	sk->sk_lingertime = si->cpt_lingertime;
+	if (si->cpt_lingertime == CPT_NULL)
+		sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_peercred.pid = si->cpt_peer_pid;
+	sk->sk_peercred.uid = si->cpt_peer_uid;
+	sk->sk_peercred.gid = si->cpt_peer_gid;
+	cpt_timeval_import(&sk->sk_stamp, si->cpt_stamp);
+	return 0;
+}
+
+static struct file *sock_mapfile(struct socket *sock)
+{
+	int fd = sock_map_fd(sock);
+
+	if (fd >= 0) {
+		struct file *file = sock->file;
+		get_file(file);
+		sc_close(fd);
+		return file;
+	}
+	return ERR_PTR(fd);
+}
+
+/* Assumption is that /tmp exists and writable.
+ * In previous versions we assumed that listen() will autobind
+ * the socket. It does not do this for AF_UNIX by evident reason:
+ * socket in abstract namespace is accessible, unlike socket bound
+ * to deleted FS object.
+ */
+
+static int
+select_deleted_name(char * name, cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<100; i++) {
+		struct nameidata nd;
+		unsigned int rnd = net_random();
+
+		sprintf(name, "/tmp/SOCK.%08x", rnd);
+
+		if (path_lookup(name, 0, &nd) != 0)
+			return 0;
+
+		path_release(&nd);
+	}
+
+	eprintk_ctx("failed to allocate deleted socket inode\n");
+	return -ELOOP;
+}
+
+static int
+bind_unix_socket(struct socket *sock, struct cpt_sock_image *si,
+		 cpt_context_t *ctx)
+{
+	int err;
+	char *name;
+	struct sockaddr* addr;
+	int addrlen;
+	struct sockaddr_un sun;
+	struct nameidata nd;
+
+	if ((addrlen = si->cpt_laddrlen) <= 2)
+		return 0;
+
+	nd.dentry = NULL;
+	name = ((char*)si->cpt_laddr) + 2;
+	addr = (struct sockaddr *)si->cpt_laddr;
+
+	if (name[0]) {
+		err = path_lookup(name, 0, &nd);
+		if (err) {
+			nd.dentry = NULL;
+		} else {
+			if (si->cpt_deleted) {
+				path_release(&nd);
+				nd.dentry = NULL;
+				addr = (struct sockaddr*)&sun;
+				addr->sa_family = AF_UNIX;
+				name = ((char*)addr) + 2;
+				err = select_deleted_name(name, ctx);
+				if (err)
+					return err;
+				addrlen = 2 + strlen(name);
+			} else if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) {
+				eprintk_ctx("bind_unix_socket: not a socket dentry\n");
+				path_release(&nd);
+				return -EINVAL;
+			}
+		}
+		if (nd.dentry)
+			sc_unlink(name);
+	}
+
+	err = sock->ops->bind(sock, addr, addrlen);
+
+	if (!err) {
+		if (nd.dentry) {
+			sc_chown(name, nd.dentry->d_inode->i_uid,
+				 nd.dentry->d_inode->i_gid);
+			sc_chmod(name, nd.dentry->d_inode->i_mode);
+		}
+		if (si->cpt_deleted && name[0])
+			sc_unlink(name);
+	}
+	if (nd.dentry)
+		path_release(&nd);
+	return err;
+}
+
+static int fixup_unix_address(struct socket *sock, struct cpt_sock_image *si,
+			      struct cpt_context *ctx)
+{
+	struct sock *sk = sock->sk;
+	cpt_object_t *obj;
+	struct sock *parent;
+
+	if (sk->sk_family != AF_UNIX || sk->sk_state == TCP_LISTEN)
+		return 0;
+
+	if (si->cpt_parent == -1)
+		return bind_unix_socket(sock, si, ctx);
+
+	obj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+	if (!obj)
+		return 0;
+
+	parent = obj->o_obj;
+	if (unix_sk(parent)->addr) {
+		if (unix_sk(sk)->addr &&
+		    atomic_dec_and_test(&unix_sk(sk)->addr->refcnt))
+			kfree(unix_sk(sk)->addr);
+		atomic_inc(&unix_sk(parent)->addr->refcnt);
+		unix_sk(sk)->addr = unix_sk(parent)->addr;
+	}
+	return 0;
+}
+
+
+static int open_socket(cpt_object_t *obj, struct cpt_sock_image *si,
+		       struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct socket *sock2 = NULL;
+	struct file *file;
+	cpt_object_t *fobj;
+	cpt_object_t *pobj = NULL;
+
+	err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
+			       &sock);
+	if (err)
+		return err;
+
+	if (si->cpt_socketpair) {
+		err = sock_create_kern(si->cpt_family, si->cpt_type,
+				       si->cpt_protocol, &sock2);
+		if (err)
+			goto err_out;
+
+		err = sock->ops->socketpair(sock, sock2);
+		if (err < 0)
+			goto err_out;
+
+		/* Socketpair with a peer outside our environment.
+		 * So, we create real half-open pipe and do not worry
+		 * about dead end anymore. */
+		if (si->cpt_peer == -1) {
+			sock_release(sock2);
+			sock2 = NULL;
+		}
+	}
+
+	cpt_obj_setobj(obj, sock->sk, ctx);
+
+	if (si->cpt_file != CPT_NULL) {
+		file = sock_mapfile(sock);
+		err = PTR_ERR(file);
+		if (IS_ERR(file))
+			goto err_out;
+
+		err = -ENOMEM;
+
+		obj->o_parent = file;
+
+		if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
+			goto err_out;
+		cpt_obj_setpos(fobj, si->cpt_file, ctx);
+		cpt_obj_setindex(fobj, si->cpt_index, ctx);
+	}
+
+	if (sock2) {
+		struct file *file2;
+
+		pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_peer, ctx);
+		if (!pobj) BUG();
+		if (pobj->o_obj) BUG();
+		cpt_obj_setobj(pobj, sock2->sk, ctx);
+
+		if (pobj->o_ppos != CPT_NULL) {
+			file2 = sock_mapfile(sock2);
+			err = PTR_ERR(file2);
+			if (IS_ERR(file2))
+				goto err_out;
+
+			err = -ENOMEM;
+			if ((fobj = cpt_object_add(CPT_OBJ_FILE, file2, ctx)) == NULL)
+				goto err_out;
+			cpt_obj_setpos(fobj, pobj->o_ppos, ctx);
+			cpt_obj_setindex(fobj, si->cpt_peer, ctx);
+
+			pobj->o_parent = file2;
+		}
+	}
+
+	setup_sock_common(sock->sk, si, obj->o_pos, ctx);
+	if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6) {
+		inet_sk(sock->sk)->freebind = 1;
+		if (si->cpt_laddrlen) {
+			err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+			if (err) {
+				dprintk_ctx("binding failed: %d, do not worry\n", err);
+			}
+		}
+		rst_socket_in(si, obj->o_pos, sock->sk, ctx);
+	} else if (sock->sk->sk_family == AF_NETLINK) {
+		err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+		if (err) {
+			eprintk_ctx("AF_NETLINK binding failed: %d\n", err);
+		}
+		if (si->cpt_raddrlen) {
+			err = sock->ops->connect(sock, (struct sockaddr *)&si->cpt_raddr, si->cpt_raddrlen, O_NONBLOCK);
+			if (err) {
+				eprintk_ctx("oops, AF_NETLINK connect failed: %d\n", err);
+			}
+		}
+	}
+	fixup_unix_address(sock, si, ctx);
+
+	if (sock2) {
+		err = rst_get_object(CPT_OBJ_SOCKET, pobj->o_pos, si, ctx);
+		if (err)
+			return err;
+		setup_sock_common(sock2->sk, si, pobj->o_pos, ctx);
+		fixup_unix_address(sock2, si, ctx);
+	}
+
+	if ((sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
+	    && (int)si->cpt_parent != -1) {
+		cpt_object_t *lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+		if (lobj && cpt_attach_accept(lobj->o_obj, sock->sk, ctx) == 0)
+			sock->sk = NULL;
+	}
+
+
+	if (si->cpt_file == CPT_NULL && sock->sk &&
+	    sock->sk->sk_family == AF_INET) {
+		struct sock *sk = sock->sk;
+
+		if (sk) {
+			sock->sk = NULL;
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			if (sock_owned_by_user(sk))
+				eprintk_ctx("oops, sock is locked by user\n");
+
+			sock_hold(sk);
+			sock_orphan(sk);
+			ub_inc_orphan_count(sk);
+			bh_unlock_sock(sk);
+			local_bh_enable();
+			sock_put(sk);
+			dprintk_ctx("orphaning socket %p\n", sk);
+		}
+	}
+
+	if (si->cpt_file == CPT_NULL && sock->sk == NULL)
+		sock_release(sock);
+
+	return 0;
+
+err_out:
+	if (sock2)
+		sock_release(sock2);
+	sock_release(sock);
+	return err;
+}
+
+static int open_listening_socket(loff_t pos, struct cpt_sock_image *si,
+				 struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct file *file;
+	cpt_object_t *obj, *fobj;
+
+	err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
+			       &sock);
+	if (err) {
+		eprintk_ctx("open_listening_socket: sock_create_kern: %d\n", err);
+		return err;
+	}
+
+	sock->sk->sk_reuse = 2;
+	sock->sk->sk_bound_dev_if = si->cpt_bound_dev_if;
+
+	if (sock->sk->sk_family == AF_UNIX) {
+		err = bind_unix_socket(sock, si, ctx);
+	} else if (si->cpt_laddrlen) {
+		if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
+			inet_sk(sock->sk)->freebind = 1;
+
+		err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+
+		if (err) {
+			eprintk_ctx("open_listening_socket: bind: %d\n", err);
+			goto err_out;
+		}
+	}
+
+	err = sock->ops->listen(sock, si->cpt_max_ack_backlog);
+	if (err) {
+		eprintk_ctx("open_listening_socket: listen: %d, %Ld, %d\n", err, pos, si->cpt_deleted);
+		goto err_out;
+	}
+
+	/* Now we may access socket body directly and fixup all the things. */
+
+	file = sock_mapfile(sock);
+	err = PTR_ERR(file);
+	if (IS_ERR(file)) {
+		eprintk_ctx("open_listening_socket: map: %d\n", err);
+		goto err_out;
+	}
+
+	err = -ENOMEM;
+	if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
+		goto err_out;
+	if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sock->sk, ctx)) == NULL)
+		goto err_out;
+	cpt_obj_setpos(obj, pos, ctx);
+	cpt_obj_setindex(obj, si->cpt_index, ctx);
+	obj->o_parent = file;
+	cpt_obj_setpos(fobj, si->cpt_file, ctx);
+	cpt_obj_setindex(fobj, si->cpt_index, ctx);
+
+	setup_sock_common(sock->sk, si, pos, ctx);
+
+	if (si->cpt_family == AF_INET || si->cpt_family == AF_INET6)
+		rst_restore_synwait_queue(sock->sk, si, pos, ctx);
+
+	return 0;
+
+err_out:
+	sock_release(sock);
+	return err;
+}
+
+static int
+rst_sock_attr_mcfilter(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
+{
+	int err;
+	loff_t pos = *pos_p;
+	struct cpt_sockmc_image v;
+
+	err = rst_get_object(CPT_OBJ_SOCK_MCADDR, pos, &v, ctx);
+	if (err)
+		return err;
+
+	*pos_p += v.cpt_next;
+
+	if (v.cpt_family == AF_INET)
+		return rst_sk_mcfilter_in(sk, &v, pos, ctx);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (v.cpt_family == AF_INET6)
+		return rst_sk_mcfilter_in6(sk, &v, pos, ctx); 
+#endif
+	else
+		return -EAFNOSUPPORT;
+}
+
+
+static int
+rst_sock_attr_skfilter(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
+{
+	int err;
+	struct sk_filter *fp, *old_fp; 
+	loff_t pos = *pos_p;
+	struct cpt_obj_bits v;
+
+	err = rst_get_object(CPT_OBJ_SKFILTER, pos, &v, ctx);
+	if (err)
+		return err;
+
+	*pos_p += v.cpt_next;
+
+	if (v.cpt_size % sizeof(struct sock_filter))
+		return -EINVAL;
+
+	fp = sock_kmalloc(sk, v.cpt_size+sizeof(*fp), GFP_KERNEL_UBC);
+	if (fp == NULL)
+		return -ENOMEM;
+	atomic_set(&fp->refcnt, 1);
+	fp->len = v.cpt_size/sizeof(struct sock_filter);
+
+	err = ctx->pread(fp->insns, v.cpt_size, ctx, pos+v.cpt_hdrlen);
+	if (err) {
+		sk_filter_release(sk, fp);
+		return err;
+	}
+
+	old_fp = sk->sk_filter;
+	sk->sk_filter = fp;
+	if (old_fp)
+		sk_filter_release(sk, old_fp);
+	return 0;
+}
+
+
+int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
+{
+	int err;
+	loff_t pos = *pos_p;
+
+	err = rst_sock_attr_skfilter(pos_p, sk, ctx);
+	if (err && pos == *pos_p)
+		err = rst_sock_attr_mcfilter(pos_p, sk, ctx);
+	return err;
+}
+
+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx)
+{
+	int err;
+	struct sk_buff *skb;
+	struct cpt_skb_image v;
+	loff_t pos = *pos_p;
+	struct scm_fp_list *fpl = NULL;
+	struct timeval tmptv;
+
+	err = rst_get_object(CPT_OBJ_SKB, pos, &v, ctx);
+	if (err)
+		return ERR_PTR(err);
+	*pos_p = pos + v.cpt_next;
+
+	if (owner)
+		*owner = v.cpt_owner;
+	if (queue)
+		*queue = v.cpt_queue;
+
+	skb = alloc_skb(v.cpt_len + v.cpt_hspace + v.cpt_tspace, GFP_KERNEL);
+	if (skb == NULL)
+		return ERR_PTR(-ENOMEM);
+	skb_reserve(skb, v.cpt_hspace);
+	skb_put(skb, v.cpt_len);
+	skb->h.raw = skb->head + v.cpt_h;
+	skb->nh.raw = skb->head + v.cpt_nh;
+	skb->mac.raw = skb->head + v.cpt_mac;
+	if (sizeof(skb->cb) < sizeof(v.cpt_cb)) BUG();
+	memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
+	skb->mac_len = v.cpt_mac_len;
+
+	skb->csum = v.cpt_csum;
+	skb->local_df = v.cpt_local_df;
+	skb->pkt_type = v.cpt_pkt_type;
+	skb->ip_summed = v.cpt_ip_summed;
+	skb->priority = v.cpt_priority;
+	skb->protocol = v.cpt_protocol;
+	cpt_timeval_import(&tmptv, v.cpt_stamp);
+	skb_set_timestamp(skb, &tmptv);
+
+	skb_shinfo(skb)->tso_segs = v.cpt_tso_segs;
+	skb_shinfo(skb)->tso_size = v.cpt_tso_size;
+	if (ctx->image_version == 0) {
+		skb_shinfo(skb)->tso_segs = 1;
+		skb_shinfo(skb)->tso_size = 0;
+	}
+
+	if (v.cpt_next > v.cpt_hdrlen) {
+		pos = pos + v.cpt_hdrlen;
+		while (pos < *pos_p) {
+			union {
+				struct cpt_obj_bits b;
+				struct cpt_fd_image f;
+			} u;
+
+			err = rst_get_object(-1, pos, &u, ctx);
+			if (err) {
+				kfree_skb(skb);
+				return ERR_PTR(err);
+			}
+			if (u.b.cpt_object == CPT_OBJ_BITS) {
+				if (u.b.cpt_size != v.cpt_hspace + skb->len) {
+					eprintk_ctx("invalid skb image %u != %u + %u\n", u.b.cpt_size, v.cpt_hspace, skb->len);
+					kfree_skb(skb);
+					return ERR_PTR(-EINVAL);
+				}
+
+				err = ctx->pread(skb->head, u.b.cpt_size, ctx, pos+u.b.cpt_hdrlen);
+				if (err) {
+					kfree_skb(skb);
+					return ERR_PTR(err);
+				}
+			} else if (u.f.cpt_object == CPT_OBJ_FILEDESC) {
+				if (!fpl) {
+					fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+					if (!fpl) {
+						kfree_skb(skb);
+						return ERR_PTR(-ENOMEM);
+					}
+					fpl->count = 0;
+					UNIXCB(skb).fp = fpl;
+				}
+				fpl->fp[fpl->count] = rst_file(u.f.cpt_file, -1, ctx);
+				if (!IS_ERR(fpl->fp[fpl->count]))
+					fpl->count++;
+			}
+			pos += u.b.cpt_next;
+		}
+	}
+
+	return skb;
+}
+
+static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	int i;
+	scm->fp = UNIXCB(skb).fp;
+	skb->destructor = sock_wfree;
+	UNIXCB(skb).fp = NULL;
+
+	for (i=scm->fp->count-1; i>=0; i--)
+		unix_notinflight(scm->fp->fp[i]);
+}
+
+static void unix_destruct_fds(struct sk_buff *skb)
+{
+	struct scm_cookie scm;
+	memset(&scm, 0, sizeof(scm));
+	unix_detach_fds(&scm, skb);
+	scm_destroy(&scm);
+	sock_wfree(skb);
+	module_put(THIS_MODULE);
+}
+
+
+static int restore_unix_rqueue(struct sock *sk, struct cpt_sock_image *si,
+			       loff_t pos, struct cpt_context *ctx)
+{
+	loff_t endpos;
+
+	pos = pos + si->cpt_hdrlen;
+	endpos = pos + si->cpt_next;
+	while (pos < endpos) {
+		struct sk_buff *skb;
+		struct sock *owner_sk;
+		__u32 owner;
+
+		skb = rst_skb(&pos, &owner, NULL, ctx);
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -EINVAL) {
+				int err;
+
+				err = rst_sock_attr(&pos, sk, ctx);
+				if (err)
+					return err;
+			}
+			return PTR_ERR(skb);
+		}
+
+		owner_sk = unix_peer(sk);
+		if (owner != -1) {
+			cpt_object_t *pobj;
+			pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, owner, ctx);
+			if (pobj == NULL) {
+				eprintk_ctx("orphan af_unix skb?\n");
+				kfree_skb(skb);
+				continue;
+			}
+			owner_sk = pobj->o_obj;
+		}
+		if (owner_sk == NULL) {
+			dprintk_ctx("orphan af_unix skb 2?\n");
+			kfree_skb(skb);
+			continue;
+		}
+		skb_set_owner_w(skb, owner_sk);
+		if (UNIXCB(skb).fp) {
+			skb->destructor = unix_destruct_fds;
+			if (!try_module_get(THIS_MODULE)) BUG();
+		}
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		if (sk->sk_state == TCP_LISTEN) {
+			struct socket *sock = skb->sk->sk_socket;
+			if (sock == NULL) BUG();
+			if (sock->file) BUG();
+			skb->sk->sk_socket = NULL;
+			skb->sk->sk_sleep = NULL;
+			sock->sk = NULL;
+			sock_release(sock);
+		}
+	}
+	return 0;
+}
+
+
+/* All the sockets are created before we start to open files */
+
+int rst_sockets(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_SOCKET];
+	loff_t endsec;
+	cpt_object_t *obj;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err) {
+		eprintk_ctx("rst_sockets: ctx->pread: %d\n", err);
+		return err;
+	}
+	if (h.cpt_section != CPT_SECT_SOCKET || h.cpt_hdrlen < sizeof(h)) {
+		eprintk_ctx("rst_sockets: hdr err\n");
+		return -EINVAL;
+	}
+
+	/* The first pass: we create socket index and open listening sockets. */
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (sbuf->cpt_state == TCP_LISTEN) {
+			err = open_listening_socket(sec, sbuf, ctx); 
+			cpt_release_buf(ctx);
+			if (err) {
+				eprintk_ctx("rst_sockets: open_listening_socket: %d\n", err);
+				return err;
+			}
+		} else {
+			cpt_release_buf(ctx);
+			obj = alloc_cpt_object(GFP_KERNEL, ctx);
+			if (obj == NULL)
+				return -ENOMEM;
+			cpt_obj_setindex(obj, sbuf->cpt_index, ctx);
+			cpt_obj_setpos(obj, sec, ctx);
+			obj->o_ppos  = sbuf->cpt_file;
+			intern_cpt_object(CPT_OBJ_SOCKET, obj, ctx);
+		}
+		sec += sbuf->cpt_next;
+	}
+
+	/* Pass 2: really restore sockets */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct cpt_sock_image *sbuf;
+		if (obj->o_obj != NULL)
+			continue;
+		sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (sbuf->cpt_state == TCP_LISTEN) BUG();
+		err = open_socket(obj, sbuf, ctx); 
+		cpt_release_buf(ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: open_socket: %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int rst_orphans(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_ORPHANS];
+	loff_t endsec;
+	cpt_object_t *obj;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_ORPHANS || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		if (obj == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		obj->o_pos = sec;
+		obj->o_ppos  = sbuf->cpt_file;
+		err = open_socket(obj, sbuf, ctx);
+		dprintk_ctx("Restoring orphan: %d\n", err);
+		free_cpt_object(obj, ctx);
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		sec += sbuf->cpt_next;
+	}
+
+	return 0;
+}
+
+
+/* Pass 3: I understand, this is not funny already :-),
+ * but we have to do another pass to establish links between
+ * not-paired AF_UNIX SOCK_DGRAM sockets and to restore AF_UNIX
+ * skb queues with proper skb->sk links.
+ *
+ * This could be made at the end of rst_sockets(), but we defer
+ * restoring af_unix queues up to the end of restoring files to
+ * make restoring passed FDs cleaner.
+ */
+
+int rst_sockets_complete(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct cpt_sock_image *sbuf;
+		struct sock *sk = obj->o_obj;
+		struct sock *peer;
+
+		if (!sk) BUG();
+
+		if (sk->sk_family != AF_UNIX)
+			continue;
+
+		sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+
+		if (sbuf->cpt_next > sbuf->cpt_hdrlen)
+			restore_unix_rqueue(sk, sbuf, obj->o_pos, ctx);
+
+		cpt_release_buf(ctx);
+
+		if (sk->sk_type == SOCK_DGRAM && unix_peer(sk) == NULL) {
+			cpt_object_t *pobj;
+
+			sbuf = cpt_get_buf(ctx);
+			err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+			if (err) {
+				cpt_release_buf(ctx);
+				return err;
+			}
+
+			if (sbuf->cpt_peer != -1) {
+				pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, sbuf->cpt_peer, ctx);
+				if (pobj) {
+					peer = pobj->o_obj;
+					sock_hold(peer);
+					unix_peer(sk) = peer;
+				}
+			}
+			cpt_release_buf(ctx);
+		}
+	}
+
+	rst_orphans(ctx);
+
+	return 0;
+}
+
diff -uprN linux-2.6.16/kernel/cpt/rst_socket_in.c linux-2.6.16.ovz/kernel/cpt/rst_socket_in.c
--- linux-2.6.16/kernel/cpt/rst_socket_in.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_socket_in.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,494 @@
+/*
+ *
+ *  kernel/cpt/rst_socket_in.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <linux/jhash.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/ipv6.h>
+#include <linux/igmp.h>
+#include <net/addrconf.h>
+#include <net/inet6_connection_sock.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+static inline unsigned long jiffies_import(__u32 tmo)
+{
+	__s32 delta = tmo;
+	return jiffies + (long)delta;
+}
+
+static inline __u32 tcp_jiffies_import(__u32 tmo)
+{
+	return ((__u32)jiffies) + tmo;
+}
+
+
+static int restore_queues(struct sock *sk, struct cpt_sock_image *si,
+			  loff_t pos, struct cpt_context *ctx)
+{
+	loff_t endpos;
+
+	pos = pos + si->cpt_hdrlen;
+	endpos = pos + si->cpt_next;
+	while (pos < endpos) {
+		struct sk_buff *skb;
+		__u32 type;
+
+		skb = rst_skb(&pos, NULL, &type, ctx);
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -EINVAL) {
+				int err;
+
+				err = rst_sock_attr(&pos, sk, ctx);
+				if (err)
+					return err;
+			}
+			return PTR_ERR(skb);
+		}
+
+		if (sk->sk_type == SOCK_STREAM) {
+			if (type == CPT_SKB_RQ) {
+				sk_stream_set_owner_r(skb, sk);
+				ub_tcprcvbuf_charge_forced(sk, skb);
+				skb_queue_tail(&sk->sk_receive_queue, skb);
+			} else if (type == CPT_SKB_OFOQ) {
+				struct tcp_sock *tp = tcp_sk(sk);
+				sk_stream_set_owner_r(skb, sk);
+				ub_tcprcvbuf_charge_forced(sk, skb);
+				skb_queue_tail(&tp->out_of_order_queue, skb);
+			} else if (type == CPT_SKB_WQ) {
+				sk->sk_wmem_queued += skb->truesize;
+				sk->sk_forward_alloc -= skb->truesize;
+				ub_tcpsndbuf_charge_forced(sk, skb);
+				skb_queue_tail(&sk->sk_write_queue, skb);
+			} else {
+				wprintk_ctx("strange stream queue type %u\n", type);
+				kfree_skb(skb);
+			}
+		} else {
+			if (type == CPT_SKB_RQ) {
+				skb_set_owner_r(skb, sk);
+				skb_queue_tail(&sk->sk_receive_queue, skb);
+			} else if (type == CPT_SKB_WQ) {
+				struct inet_sock *inet = inet_sk(sk);
+				if (inet->cork.fragsize) {
+					skb_set_owner_w(skb, sk);
+					skb_queue_tail(&sk->sk_write_queue, skb);
+				} else {
+					eprintk_ctx("cork skb is dropped\n");
+					kfree_skb(skb);
+				}
+			} else {
+				wprintk_ctx("strange dgram queue type %u\n", type);
+				kfree_skb(skb);
+			}
+		}
+	}
+	return 0;
+}
+
+static struct sock *find_parent(__u16 sport, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk &&
+		    sk->sk_state == TCP_LISTEN &&
+		    (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
+		    inet_sk(sk)->sport == sport)
+			return sk;
+	}
+	return NULL;
+}
+
+static int rst_socket_tcp(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
+			  struct cpt_context *ctx)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	tp->pred_flags = si->cpt_pred_flags;
+	tp->rcv_nxt = si->cpt_rcv_nxt;
+	tp->snd_nxt = si->cpt_snd_nxt;
+	tp->snd_una = si->cpt_snd_una;
+	tp->snd_sml = si->cpt_snd_sml;
+	tp->rcv_tstamp = tcp_jiffies_import(si->cpt_rcv_tstamp);
+	tp->lsndtime = tcp_jiffies_import(si->cpt_lsndtime);
+	tp->tcp_header_len = si->cpt_tcp_header_len;
+	inet_csk(sk)->icsk_ack.pending = si->cpt_ack_pending;
+	inet_csk(sk)->icsk_ack.quick = si->cpt_quick;
+	inet_csk(sk)->icsk_ack.pingpong = si->cpt_pingpong;
+	inet_csk(sk)->icsk_ack.blocked = si->cpt_blocked;
+	inet_csk(sk)->icsk_ack.ato = si->cpt_ato;
+	inet_csk(sk)->icsk_ack.timeout = jiffies_import(si->cpt_ack_timeout);
+	inet_csk(sk)->icsk_ack.lrcvtime = tcp_jiffies_import(si->cpt_lrcvtime);
+	inet_csk(sk)->icsk_ack.last_seg_size = si->cpt_last_seg_size;
+	inet_csk(sk)->icsk_ack.rcv_mss = si->cpt_rcv_mss;
+	tp->snd_wl1 = si->cpt_snd_wl1;
+	tp->snd_wnd = si->cpt_snd_wnd;
+	tp->max_window = si->cpt_max_window;
+	inet_csk(sk)->icsk_pmtu_cookie = si->cpt_pmtu_cookie;
+	tp->mss_cache = si->cpt_mss_cache;
+	tp->rx_opt.mss_clamp = si->cpt_mss_clamp;
+	inet_csk(sk)->icsk_ext_hdr_len = si->cpt_ext_header_len;
+	inet_csk(sk)->icsk_ca_state = si->cpt_ca_state;
+	inet_csk(sk)->icsk_retransmits = si->cpt_retransmits;
+	tp->reordering = si->cpt_reordering;
+	tp->frto_counter = si->cpt_frto_counter;
+	tp->frto_highmark = si->cpt_frto_highmark;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
+	// // tp->adv_cong = si->cpt_adv_cong;
+#endif
+	inet_csk(sk)->icsk_accept_queue.rskq_defer_accept = si->cpt_defer_accept;
+	inet_csk(sk)->icsk_backoff = si->cpt_backoff;
+	tp->srtt = si->cpt_srtt;
+	tp->mdev = si->cpt_mdev;
+	tp->mdev_max = si->cpt_mdev_max;
+	tp->rttvar = si->cpt_rttvar;
+	tp->rtt_seq = si->cpt_rtt_seq;
+	inet_csk(sk)->icsk_rto = si->cpt_rto;
+	tp->packets_out = si->cpt_packets_out;
+	tp->left_out = si->cpt_left_out;
+	tp->retrans_out = si->cpt_retrans_out;
+	tp->lost_out = si->cpt_lost_out;
+	tp->sacked_out = si->cpt_sacked_out;
+	tp->fackets_out = si->cpt_fackets_out;
+	tp->snd_ssthresh = si->cpt_snd_ssthresh;
+	tp->snd_cwnd = si->cpt_snd_cwnd;
+	tp->snd_cwnd_cnt = si->cpt_snd_cwnd_cnt;
+	tp->snd_cwnd_clamp = si->cpt_snd_cwnd_clamp;
+	tp->snd_cwnd_used = si->cpt_snd_cwnd_used;
+	tp->snd_cwnd_stamp = tcp_jiffies_import(si->cpt_snd_cwnd_stamp);
+	inet_csk(sk)->icsk_timeout = tcp_jiffies_import(si->cpt_timeout);
+	tp->rcv_wnd = si->cpt_rcv_wnd;
+	tp->rcv_wup = si->cpt_rcv_wup;
+	tp->write_seq = si->cpt_write_seq;
+	tp->pushed_seq = si->cpt_pushed_seq;
+	tp->copied_seq = si->cpt_copied_seq;
+	tp->rx_opt.tstamp_ok = si->cpt_tstamp_ok;
+	tp->rx_opt.wscale_ok = si->cpt_wscale_ok;
+	tp->rx_opt.sack_ok = si->cpt_sack_ok;
+	tp->rx_opt.saw_tstamp = si->cpt_saw_tstamp;
+	tp->rx_opt.snd_wscale = si->cpt_snd_wscale;
+	tp->rx_opt.rcv_wscale = si->cpt_rcv_wscale;
+	tp->nonagle = si->cpt_nonagle;
+	tp->keepalive_probes = si->cpt_keepalive_probes;
+	tp->rx_opt.rcv_tsval = si->cpt_rcv_tsval;
+	tp->rx_opt.rcv_tsecr = si->cpt_rcv_tsecr;
+	tp->rx_opt.ts_recent = si->cpt_ts_recent;
+	tp->rx_opt.ts_recent_stamp = si->cpt_ts_recent_stamp;
+	tp->rx_opt.user_mss = si->cpt_user_mss;
+	tp->rx_opt.dsack = si->cpt_dsack;
+	tp->rx_opt.eff_sacks = si->cpt_num_sacks;
+	tp->duplicate_sack[0].start_seq = si->cpt_sack_array[0];
+	tp->duplicate_sack[0].end_seq = si->cpt_sack_array[1];
+	tp->selective_acks[0].start_seq = si->cpt_sack_array[2];
+	tp->selective_acks[0].end_seq = si->cpt_sack_array[3];
+	tp->selective_acks[1].start_seq = si->cpt_sack_array[4];
+	tp->selective_acks[1].end_seq = si->cpt_sack_array[5];
+	tp->selective_acks[2].start_seq = si->cpt_sack_array[6];
+	tp->selective_acks[2].end_seq = si->cpt_sack_array[7];
+	tp->selective_acks[3].start_seq = si->cpt_sack_array[8];
+	tp->selective_acks[3].end_seq = si->cpt_sack_array[9];
+
+	tp->window_clamp = si->cpt_window_clamp;
+	tp->rcv_ssthresh = si->cpt_rcv_ssthresh;
+	inet_csk(sk)->icsk_probes_out = si->cpt_probes_out;
+	tp->rx_opt.num_sacks = si->cpt_num_sacks;
+	tp->advmss = si->cpt_advmss;
+	inet_csk(sk)->icsk_syn_retries = si->cpt_syn_retries;
+	tp->ecn_flags = si->cpt_ecn_flags;
+	tp->prior_ssthresh = si->cpt_prior_ssthresh;
+	tp->high_seq = si->cpt_high_seq;
+	tp->retrans_stamp = si->cpt_retrans_stamp;
+	tp->undo_marker = si->cpt_undo_marker;
+	tp->undo_retrans = si->cpt_undo_retrans;
+	tp->urg_seq = si->cpt_urg_seq;
+	tp->urg_data = si->cpt_urg_data;
+	inet_csk(sk)->icsk_pending = si->cpt_pending;
+	tp->urg_mode = si->cpt_urg_mode;
+	tp->snd_up = si->cpt_snd_up;
+	tp->keepalive_time = si->cpt_keepalive_time;
+	tp->keepalive_intvl = si->cpt_keepalive_intvl;
+	tp->linger2 = si->cpt_linger2;
+
+	sk->sk_send_head = NULL;
+	for (skb = skb_peek(&sk->sk_write_queue);
+	     skb && skb != (struct sk_buff*)&sk->sk_write_queue;
+	     skb = skb->next) {
+		if (!after(tp->snd_nxt, TCP_SKB_CB(skb)->seq)) {
+			sk->sk_send_head = skb;
+			break;
+		}
+	}
+
+	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) {
+		struct inet_sock *inet = inet_sk(sk);
+		if (inet->num == 0) {
+			cpt_object_t *lobj = NULL;
+
+			if ((int)si->cpt_parent != -1)
+				lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+
+			if (lobj && lobj->o_obj) {
+				inet->num = ntohs(inet->sport);
+				local_bh_disable();
+				__inet_inherit_port(&tcp_hashinfo, lobj->o_obj, sk);
+				local_bh_enable();
+				dprintk_ctx("port inherited from parent\n");
+			} else {
+				struct sock *lsk = find_parent(inet->sport, ctx);
+				if (lsk) {
+					inet->num = ntohs(inet->sport);
+					local_bh_disable();
+					__inet_inherit_port(&tcp_hashinfo, lsk, sk);
+					local_bh_enable();
+					dprintk_ctx("port inherited\n");
+				} else {
+					eprintk_ctx("we are kinda lost...\n");
+				}
+			}
+		}
+
+		sk->sk_prot->hash(sk);
+
+		if (inet_csk(sk)->icsk_ack.pending&ICSK_ACK_TIMER)
+			sk_reset_timer(sk, &inet_csk(sk)->icsk_delack_timer, inet_csk(sk)->icsk_ack.timeout);
+		if (inet_csk(sk)->icsk_pending)
+			sk_reset_timer(sk, &inet_csk(sk)->icsk_retransmit_timer,
+				       inet_csk(sk)->icsk_timeout);
+		if (sock_flag(sk, SOCK_KEEPOPEN)) {
+			unsigned long expires = jiffies_import(si->cpt_ka_timeout);
+			if (time_after(jiffies, expires))
+				expires = jiffies + HZ;
+			sk_reset_timer(sk, &sk->sk_timer, expires);
+		}
+	}
+
+	return 0;
+}
+
+
+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
+		  struct cpt_context *ctx)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	lock_sock(sk);
+
+	sk->sk_state = si->cpt_state;
+
+	inet->daddr = si->cpt_daddr;
+	inet->dport = si->cpt_dport;
+	inet->saddr = si->cpt_saddr;
+	inet->rcv_saddr = si->cpt_rcv_saddr;
+	inet->sport = si->cpt_sport;
+	inet->uc_ttl = si->cpt_uc_ttl;
+	inet->tos = si->cpt_tos;
+	inet->cmsg_flags = si->cpt_cmsg_flags;
+	inet->mc_index = si->cpt_mc_index;
+	inet->mc_addr = si->cpt_mc_addr;
+	inet->hdrincl = si->cpt_hdrincl;
+	inet->mc_ttl = si->cpt_mc_ttl;
+	inet->mc_loop = si->cpt_mc_loop;
+	inet->pmtudisc = si->cpt_pmtudisc;
+	inet->recverr = si->cpt_recverr;
+	inet->freebind = si->cpt_freebind;
+	inet->id = si->cpt_idcounter;
+
+	inet->cork.flags = si->cpt_cork_flags;
+	inet->cork.fragsize = si->cpt_cork_fragsize;
+	inet->cork.length = si->cpt_cork_length;
+	inet->cork.addr = si->cpt_cork_addr;
+	inet->cork.fl.fl4_src = si->cpt_cork_saddr;
+	inet->cork.fl.fl4_dst = si->cpt_cork_daddr;
+	inet->cork.fl.oif = si->cpt_cork_oif;
+	if (inet->cork.fragsize) {
+		if (ip_route_output_key(&inet->cork.rt, &inet->cork.fl)) {
+			eprintk_ctx("failed to restore cork route\n");
+			inet->cork.fragsize = 0;
+		}
+	}
+
+	if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
+		struct udp_sock *up = udp_sk(sk);
+		up->pending = si->cpt_udp_pending;
+		up->corkflag = si->cpt_udp_corkflag;
+		up->encap_type = si->cpt_udp_encap;
+		up->len = si->cpt_udp_len;
+	}
+
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		memcpy(&np->saddr, si->cpt_saddr6, 16);
+		memcpy(&np->rcv_saddr, si->cpt_rcv_saddr6, 16);
+		memcpy(&np->daddr, si->cpt_daddr6, 16);
+		np->flow_label = si->cpt_flow_label6;
+		np->frag_size = si->cpt_frag_size6;
+		np->hop_limit = si->cpt_hop_limit6;
+		np->mcast_hops = si->cpt_mcast_hops6;
+		np->mcast_oif = si->cpt_mcast_oif6;
+		np->rxopt.all = si->cpt_rxopt6;
+		np->mc_loop = si->cpt_mc_loop6;
+		np->recverr = si->cpt_recverr6;
+		np->sndflow = si->cpt_sndflow6;
+		np->pmtudisc = si->cpt_pmtudisc6;
+		np->ipv6only = si->cpt_ipv6only6;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		if (si->cpt_mapped) {
+			extern struct inet_connection_sock_af_ops ipv6_mapped;
+			if (sk->sk_type == SOCK_STREAM &&
+			    sk->sk_protocol == IPPROTO_TCP) {
+				inet_csk(sk)->icsk_af_ops = &ipv6_mapped;
+				sk->sk_backlog_rcv = tcp_v4_do_rcv;
+			}
+		}
+#endif
+	}
+
+	restore_queues(sk, si, pos, ctx);
+
+	if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
+		rst_socket_tcp(si, pos, sk, ctx);
+
+	release_sock(sk);
+	return 0;
+}
+
+int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *ctx)
+{
+	struct request_sock *req;
+
+	if (lsk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	req = reqsk_alloc(&tcp_request_sock_ops);
+	if (!req)
+		return -ENOMEM;
+
+	sk->sk_socket = NULL;
+	sk->sk_sleep = NULL;
+	inet_csk_reqsk_queue_add(lsk, req, sk);
+	return 0;
+}
+
+static __inline__ u32 __tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
+{
+	return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
+}
+
+int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si,
+			      loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end = si->cpt_next;
+
+	pos += si->cpt_hdrlen;
+	while (pos < end) {
+		struct cpt_openreq_image oi;
+
+		err = rst_get_object(CPT_OBJ_OPENREQ, pos, &oi, ctx);
+		if (err) {
+			err = rst_sock_attr(&pos, sk, ctx);
+			if (err)
+				return err;
+			continue;
+		}
+
+		if (oi.cpt_object == CPT_OBJ_OPENREQ) {
+			struct request_sock *req = reqsk_alloc(&tcp_request_sock_ops);
+			if (req == NULL)
+				return -ENOMEM;
+
+			memset(req, 0, sizeof(*req));
+			tcp_rsk(req)->rcv_isn = oi.cpt_rcv_isn;
+			tcp_rsk(req)->snt_isn = oi.cpt_snt_isn;
+			inet_rsk(req)->rmt_port = oi.cpt_rmt_port;
+			req->mss = oi.cpt_mss;
+			req->retrans = oi.cpt_retrans;
+			inet_rsk(req)->snd_wscale = oi.cpt_snd_wscale;
+			inet_rsk(req)->rcv_wscale = oi.cpt_rcv_wscale;
+			inet_rsk(req)->tstamp_ok = oi.cpt_tstamp_ok;
+			inet_rsk(req)->sack_ok = oi.cpt_sack_ok;
+			inet_rsk(req)->wscale_ok = oi.cpt_wscale_ok;
+			inet_rsk(req)->ecn_ok = oi.cpt_ecn_ok;
+			inet_rsk(req)->acked = oi.cpt_acked;
+			req->window_clamp = oi.cpt_window_clamp;
+			req->rcv_wnd = oi.cpt_rcv_wnd;
+			req->ts_recent = oi.cpt_ts_recent;
+			req->expires = jiffies_import(oi.cpt_expires);
+
+			if (oi.cpt_family == AF_INET) {
+				memcpy(&inet_rsk(req)->loc_addr, oi.cpt_loc_addr, 4);
+				memcpy(&inet_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 4);
+				inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+			} else {
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+				memcpy(&inet6_rsk(req)->loc_addr, oi.cpt_loc_addr, 16);
+				memcpy(&inet6_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 16);
+				inet6_rsk(req)->iif = oi.cpt_iif;
+				inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+#endif
+			}
+		}
+		pos += oi.cpt_next;
+	}
+	return 0;
+}
+
+int rst_sk_mcfilter_in(struct sock *sk, struct cpt_sockmc_image *v,
+		       loff_t pos, cpt_context_t *ctx)
+{
+	struct ip_mreqn imr;
+
+	if (v->cpt_mode || v->cpt_next != v->cpt_hdrlen) {
+		eprintk_ctx("IGMPv3 is still not supported\n");
+		return -EINVAL;
+	}
+
+	memset(&imr, 0, sizeof(imr));
+	imr.imr_ifindex = v->cpt_ifindex;
+	imr.imr_multiaddr.s_addr = v->cpt_mcaddr[0];
+	return ip_mc_join_group(sk, &imr);
+}
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+int rst_sk_mcfilter_in6(struct sock *sk, struct cpt_sockmc_image *v,
+			loff_t pos, cpt_context_t *ctx)
+{
+
+	if (v->cpt_mode || v->cpt_next != v->cpt_hdrlen) {
+		eprintk_ctx("IGMPv3 is still not supported\n");
+		return -EINVAL;
+	}
+
+	return ipv6_sock_mc_join(sk, v->cpt_ifindex,
+				 (struct in6_addr*)v->cpt_mcaddr);
+}
+#endif
diff -uprN linux-2.6.16/kernel/cpt/rst_sysvipc.c linux-2.6.16.ovz/kernel/cpt/rst_sysvipc.c
--- linux-2.6.16/kernel/cpt/rst_sysvipc.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_sysvipc.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,409 @@
+/*
+ *
+ *  kernel/cpt/rst_sysvipc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+/* FIXME. x86_64 has asm/ipc.h forgotten? */
+#include <asm-generic/ipc.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <ub/ub_mem.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+
+struct _warg {
+		struct file		*file;
+		struct cpt_sysvshm_image	*v;
+};
+
+static int fixup_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	struct _warg *warg = arg;
+
+	if (shp->shm_file != warg->file)
+		return 0;
+	if (shp->shm_nattch)
+		return -EEXIST;
+
+	shp->shm_perm.uid = warg->v->cpt_uid;
+	shp->shm_perm.gid = warg->v->cpt_gid;
+	shp->shm_perm.cuid = warg->v->cpt_cuid;
+	shp->shm_perm.cgid = warg->v->cpt_cgid;
+	shp->shm_perm.mode = warg->v->cpt_mode;
+
+	shp->shm_atim = warg->v->cpt_atime;
+	shp->shm_dtim = warg->v->cpt_dtime;
+	shp->shm_ctim = warg->v->cpt_ctime;
+	shp->shm_cprid = warg->v->cpt_creator;
+	shp->shm_lprid = warg->v->cpt_last;
+
+	/* TODO: fix shp->mlock_user? */
+	return 1;
+}
+
+static int fixup_shm(struct file *file, struct cpt_sysvshm_image *v)
+{
+	struct _warg warg;
+
+	warg.file = file;
+	warg.v = v;
+
+	return sysvipc_walk_shm(fixup_one_shm, &warg);
+}
+
+static int fixup_shm_data(struct file *file, loff_t pos, loff_t end,
+			  struct cpt_context *ctx)
+{
+	struct cpt_page_block pgb;
+	ssize_t (*do_write)(struct file *, const char __user *, size_t, loff_t *ppos);
+
+	do_write = file->f_dentry->d_inode->i_fop->write;
+	if (do_write == NULL) {
+		eprintk_ctx("No TMPFS? Cannot restore content of SYSV SHM\n");
+		return -EINVAL;
+	}
+
+	while (pos < end) {
+		loff_t opos;
+		loff_t ipos;
+		int count;
+		int err;
+
+		err = rst_get_object(CPT_OBJ_PAGES, pos, &pgb, ctx);
+		if (err)
+			return err;
+		dprintk_ctx("restoring SHM block: %08x-%08x\n",
+		       (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
+		ipos = pos + pgb.cpt_hdrlen;
+		opos = pgb.cpt_start;
+		count = pgb.cpt_end-pgb.cpt_start;
+		while (count > 0) {
+			mm_segment_t oldfs;
+			int copy = count;
+
+			if (copy > PAGE_SIZE)
+				copy = PAGE_SIZE;
+			(void)cpt_get_buf(ctx);
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
+			set_fs(oldfs);
+			if (err) {
+				__cpt_release_buf(ctx);
+				return err;
+			}
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			ipos += copy;
+			err = do_write(file, ctx->tmpbuf, copy, &opos);
+			set_fs(oldfs);
+			__cpt_release_buf(ctx);
+			if (err != copy) {
+				eprintk_ctx("write() failure\n");
+				if (err >= 0)
+					err = -EIO;
+				return err;
+			}
+			count -= copy;
+		}
+		pos += pgb.cpt_next;
+	}
+	return 0;
+}
+
+struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx)
+{
+	struct file *file;
+	int err;
+	loff_t dpos, epos;
+	union {
+		struct cpt_file_image		fi;
+		struct cpt_sysvshm_image	shmi;
+		struct cpt_inode_image 		ii;
+	} u;
+
+	err = rst_get_object(CPT_OBJ_FILE, pos, &u.fi, ctx);
+	if (err < 0)
+		goto err_out;
+	pos = u.fi.cpt_inode;
+	err = rst_get_object(CPT_OBJ_INODE, pos, &u.ii, ctx);
+	if (err < 0)
+		goto err_out;
+	dpos = pos + u.ii.cpt_hdrlen;
+	epos = pos + u.ii.cpt_next;
+	err = rst_get_object(CPT_OBJ_SYSV_SHM, pos + u.ii.cpt_hdrlen, &u.shmi, ctx);
+	if (err < 0)
+		goto err_out;
+	dpos += u.shmi.cpt_next;
+
+	file = sysvipc_setup_shm(u.shmi.cpt_key, u.shmi.cpt_id, 
+				 u.shmi.cpt_segsz, u.shmi.cpt_mode);
+	if (!IS_ERR(file)) {
+		err = fixup_shm(file, &u.shmi);
+		if (err != -EEXIST && dpos < epos)
+			err = fixup_shm_data(file, dpos, epos, ctx);
+	}
+
+	return file;
+
+err_out:
+	return ERR_PTR(err);
+}
+
+static int attach_one_undo(int semid, struct sem_array *sma, void *arg)
+{
+	struct sem_undo *su = arg;
+	struct sem_undo_list *undo_list = current->sysvsem.undo_list;
+
+	if (semid != su->semid)
+		return 0;
+
+	su->proc_next = undo_list->proc_list;
+	undo_list->proc_list = su;
+
+	su->id_next = sma->undo;
+	sma->undo = su;
+
+	return 1;
+}
+
+static int attach_undo(struct sem_undo *su)
+{
+	return sysvipc_walk_sem(attach_one_undo, su);
+}
+
+static int do_rst_semundo(struct cpt_object_hdr *sui, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	struct sem_undo_list *undo_list;
+
+	if (current->sysvsem.undo_list) {
+		eprintk_ctx("Funny undo_list\n");
+		return 0;
+	}
+
+	undo_list = ub_kmalloc(sizeof(struct sem_undo_list), GFP_KERNEL);
+	if (undo_list == NULL)
+		return -ENOMEM;
+	memset(undo_list, 0, sizeof(struct sem_undo_list));
+	atomic_set(&undo_list->refcnt, 1);
+	spin_lock_init(&undo_list->lock);
+	current->sysvsem.undo_list = undo_list;
+
+	if (sui->cpt_next > sui->cpt_hdrlen) {
+		loff_t offset = pos + sui->cpt_hdrlen;
+		do {
+			struct sem_undo *new;
+			struct cpt_sysvsem_undo_image spi;
+			err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO_REC, offset, &spi, ctx);
+			if (err)
+				goto out;
+			new = ub_kmalloc(sizeof(struct sem_undo) +
+					 sizeof(short)*spi.cpt_nsem, GFP_KERNEL);
+			if (!new) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*spi.cpt_nsem);
+			new->semadj = (short *) &new[1];
+			new->semid = spi.cpt_id;
+			err = ctx->pread(new->semadj, spi.cpt_nsem*sizeof(short), ctx, offset + spi.cpt_hdrlen);
+			if (err) {
+				kfree(new);
+				goto out;
+			}
+			err = attach_undo(new);
+			if (err <= 0) {
+				if (err == 0)
+					err = -ENOENT;
+				kfree(new);
+				goto out;
+			}
+			offset += spi.cpt_next;
+		} while (offset < pos + sui->cpt_next);
+	}
+	err = 0;
+
+out:
+	return err;
+}
+
+__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+#if 0
+	if (ti->cpt_sysvsem_undo == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo))
+		flag |= CLONE_SYSVSEM;
+#endif
+	return flag;
+}
+
+int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err;
+	struct sem_undo_list *f = current->sysvsem.undo_list;
+	cpt_object_t *obj;
+	struct cpt_object_hdr sui;
+
+	if (ti->cpt_sysvsem_undo == CPT_NULL) {
+		exit_sem(current);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			exit_sem(current);
+			f = obj->o_obj;
+			atomic_inc(&f->refcnt);
+			current->sysvsem.undo_list = f;
+		}
+		return 0;
+	}
+
+	if ((err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, &sui, ctx)) != 0)
+		goto out;
+
+	if ((err = do_rst_semundo(&sui, ti->cpt_sysvsem_undo, ctx)) != 0)
+		goto out;
+
+	err = -ENOMEM;
+	obj = cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, f, ctx);
+	if (obj) {
+		err = 0;
+		cpt_obj_setpos(obj, ti->cpt_sysvsem_undo, ctx);
+	}
+
+	return 0;
+
+out:
+	return err;
+}
+
+struct _sarg {
+	int semid;
+	struct cpt_sysvsem_image	*v;
+	__u32				*arr;
+};
+
+static int fixup_one_sem(int semid, struct sem_array *sma, void *arg)
+{
+	struct _sarg *warg = arg;
+
+	if (semid != warg->semid)
+		return 0;
+
+	sma->sem_perm.uid = warg->v->cpt_uid;
+	sma->sem_perm.gid = warg->v->cpt_gid;
+	sma->sem_perm.cuid = warg->v->cpt_cuid;
+	sma->sem_perm.cgid = warg->v->cpt_cgid;
+	sma->sem_perm.mode = warg->v->cpt_mode;
+	sma->sem_perm.seq = warg->v->cpt_seq;
+
+	sma->sem_ctime = warg->v->cpt_ctime;
+	sma->sem_otime = warg->v->cpt_otime;
+	memcpy(sma->sem_base, warg->arr, sma->sem_nsems*8);
+	return 1;
+}
+
+static int fixup_sem(int semid, struct cpt_sysvsem_image *v, __u32 *arr)
+{
+	struct _sarg warg;
+
+	warg.semid = semid;
+	warg.v = v;
+	warg.arr = arr;
+
+	return sysvipc_walk_sem(fixup_one_sem, &warg);
+}
+
+
+static int restore_sem(loff_t pos, struct cpt_sysvsem_image *si,
+		       struct cpt_context *ctx)
+{
+	int err;
+	__u32 *arr;
+	int nsems = (si->cpt_next - si->cpt_hdrlen)/8;
+
+	arr = kmalloc(nsems*8, GFP_KERNEL);
+	if (!arr)
+		return -ENOMEM;
+
+	err = ctx->pread(arr, nsems*8, ctx, pos+si->cpt_hdrlen);
+	if (err)
+		goto out;
+	err = sysvipc_setup_sem(si->cpt_key, si->cpt_id, nsems, si->cpt_mode);
+	if (err < 0) {
+		eprintk_ctx("SEM 3\n");
+		goto out;
+	}
+	err = fixup_sem(si->cpt_id, si, arr);
+	if (err == 0)
+		err = -ESRCH;
+	if (err > 0)
+		err = 0;
+out:
+	kfree(arr);
+	return err;
+}
+
+static int rst_sysv_sem(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_SYSV_SEM];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_sysvsem_image sbuf;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_SYSV_SEM || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int err;
+		err = rst_get_object(CPT_OBJ_SYSV_SEM, sec, &sbuf, ctx);
+		if (err)
+			return err;
+		err = restore_sem(sec, &sbuf, ctx);
+		if (err)
+			return err;
+		sec += sbuf.cpt_next;
+	}
+	return 0;
+}
+
+int rst_sysv_ipc(struct cpt_context *ctx)
+{
+	return rst_sysv_sem(ctx);
+}
diff -uprN linux-2.6.16/kernel/cpt/rst_tty.c linux-2.6.16.ovz/kernel/cpt/rst_tty.c
--- linux-2.6.16/kernel/cpt/rst_tty.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_tty.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,380 @@
+/*
+ *
+ *  kernel/cpt/rst_tty.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/mount.h>
+#include <linux/tty.h>
+#include <linux/vmalloc.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+
+static int pty_setup(struct tty_struct *stty, loff_t pos,
+		     struct cpt_tty_image *pi, struct cpt_context *ctx)
+{
+	unsigned long flags;
+
+	stty->pgrp = -1;
+	stty->session = 0;
+	stty->packet = pi->cpt_packet;
+	stty->stopped = pi->cpt_stopped;
+	stty->hw_stopped = pi->cpt_hw_stopped;
+	stty->flow_stopped = pi->cpt_flow_stopped;
+#define DONOT_CHANGE ((1<<TTY_CHARGED)|(1<<TTY_CLOSING)|(1<<TTY_LDISC))
+	flags = stty->flags & DONOT_CHANGE;
+	stty->flags = flags | (pi->cpt_flags & ~DONOT_CHANGE);
+	stty->ctrl_status = pi->cpt_ctrl_status;
+	stty->winsize.ws_row = pi->cpt_ws_row;
+	stty->winsize.ws_col = pi->cpt_ws_col;
+	stty->winsize.ws_ypixel = pi->cpt_ws_prow;
+	stty->winsize.ws_xpixel = pi->cpt_ws_pcol;
+	stty->canon_column = pi->cpt_canon_column;
+	stty->column = pi->cpt_column;
+	stty->raw = pi->cpt_raw;
+	stty->real_raw = pi->cpt_real_raw;
+	stty->erasing = pi->cpt_erasing;
+	stty->lnext = pi->cpt_lnext;
+	stty->icanon = pi->cpt_icanon;
+	stty->closing = pi->cpt_closing;
+	stty->minimum_to_wake = pi->cpt_minimum_to_wake;
+
+	stty->termios->c_iflag = pi->cpt_c_iflag;
+	stty->termios->c_oflag = pi->cpt_c_oflag;
+	stty->termios->c_lflag = pi->cpt_c_lflag;
+	stty->termios->c_cflag = pi->cpt_c_cflag;
+	memcpy(&stty->termios->c_cc, &pi->cpt_c_cc, NCCS);
+	memcpy(stty->read_flags, pi->cpt_read_flags, sizeof(stty->read_flags));
+
+	if (pi->cpt_next > pi->cpt_hdrlen) {
+		int err;
+		struct cpt_obj_bits b;
+		err = rst_get_object(CPT_OBJ_BITS, pos + pi->cpt_hdrlen, &b, ctx);
+		if (err)
+			return err;
+		if (b.cpt_size == 0)
+			return 0;
+		err = ctx->pread(stty->read_buf, b.cpt_size, ctx, pos + pi->cpt_hdrlen + b.cpt_hdrlen);
+		if (err)
+			return err;
+
+		spin_lock_irq(&stty->read_lock);
+		stty->read_tail = 0;
+		stty->read_cnt = b.cpt_size;
+		stty->read_head = b.cpt_size;
+		stty->canon_head = stty->read_tail + pi->cpt_canon_head;
+		stty->canon_data = pi->cpt_canon_data;
+		spin_unlock_irq(&stty->read_lock);
+	}
+
+	return 0;
+}
+
+/* Find slave/master tty in image, when we already know master/slave.
+ * It might be optimized, of course. */
+static loff_t find_pty_pair(struct tty_struct *stty, loff_t pos, struct cpt_tty_image *pi, struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_TTY];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_tty_image *pibuf;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return CPT_NULL;
+	if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
+		return CPT_NULL;
+	pibuf = kmalloc(sizeof(*pibuf), GFP_KERNEL);
+	if (pibuf == NULL) {
+		eprintk_ctx("cannot allocate buffer\n");
+		return CPT_NULL;
+	}
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx))
+			return CPT_NULL;
+		if (pibuf->cpt_index == pi->cpt_index &&
+		    !((pi->cpt_drv_flags^pibuf->cpt_drv_flags)&TTY_DRIVER_DEVPTS_MEM) &&
+		    pos != sec) {
+			pty_setup(stty, sec, pibuf, ctx);
+			return sec;
+		}
+		sec += pibuf->cpt_next;
+	}
+	kfree(pibuf);
+	return CPT_NULL;
+}
+
+static int fixup_tty_attrs(struct cpt_inode_image *ii, struct file *master,
+			   struct cpt_context *ctx)
+{
+	int err;
+	struct iattr newattrs;
+	struct dentry *d = master->f_dentry;
+
+	newattrs.ia_valid = ATTR_UID|ATTR_GID|ATTR_MODE;
+	newattrs.ia_uid = ii->cpt_uid;
+	newattrs.ia_gid = ii->cpt_gid;
+	newattrs.ia_mode = ii->cpt_mode;
+
+	mutex_lock(&d->d_inode->i_mutex);
+	err = notify_change(d, &newattrs);
+	mutex_unlock(&d->d_inode->i_mutex);
+
+	return err;
+}
+
+/* NOTE: "portable", but ugly thing. To allocate /dev/pts/N, we open
+ * /dev/ptmx until we get pty with desired index.
+ */
+
+struct file *ptmx_open(int index, unsigned int flags)
+{
+	struct file *file;
+	struct file **stack = NULL;
+	int depth = 0;
+
+	for (;;) {
+		struct tty_struct *tty;
+
+		file = filp_open("/dev/ptmx", flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+		if (IS_ERR(file))
+			break;
+		tty = file->private_data;
+		if (tty->index == index)
+			break;
+
+		if (depth == PAGE_SIZE/sizeof(struct file *)) {
+			fput(file);
+			file = ERR_PTR(-EBUSY);
+			break;
+		}
+		if (stack == NULL) {
+			stack = (struct file **)__get_free_page(GFP_KERNEL);
+			if (!stack) {
+				fput(file);
+				file = ERR_PTR(-ENOMEM);
+				break;
+			}
+		}
+		stack[depth] = file;
+		depth++;
+	}
+	while (depth > 0) {
+		depth--;
+		fput(stack[depth]);
+	}
+	if (stack)
+		free_page((unsigned long)stack);
+	return file;
+}
+
+
+struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii,
+			   unsigned flags, struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct file *master, *slave;
+	struct tty_struct *stty;
+	struct cpt_tty_image *pi;
+	static char *a = "pqrstuvwxyzabcde";
+	static char *b = "0123456789abcdef";
+	char pairname[16];
+	unsigned master_flags, slave_flags;
+
+	if (fi->cpt_priv == CPT_NULL)
+		return ERR_PTR(-EINVAL);
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, fi->cpt_priv, ctx);
+	if (obj && obj->o_parent) {
+		dprintk_ctx("obtained pty as pair to existing\n");
+		master = obj->o_parent;
+		stty = master->private_data;
+
+		if (stty->driver->subtype == PTY_TYPE_MASTER &&
+		    (stty->driver->flags&TTY_DRIVER_DEVPTS_MEM)) {
+			wprintk_ctx("cloning ptmx\n");
+			get_file(master);
+			return master;
+		}
+
+		master = dentry_open(dget(master->f_dentry),
+				     mntget(master->f_vfsmnt), flags);
+		if (!IS_ERR(master)) {
+			stty = master->private_data;
+			if (stty->driver->subtype != PTY_TYPE_MASTER)
+				fixup_tty_attrs(ii, master, ctx);
+		}
+		return master;
+	}
+
+	pi = cpt_get_buf(ctx);
+	err = rst_get_object(CPT_OBJ_TTY, fi->cpt_priv, pi, ctx);
+	if (err) {
+		cpt_release_buf(ctx);
+		return ERR_PTR(err);
+	}
+
+	master_flags = slave_flags = 0;
+	if (pi->cpt_drv_subtype == PTY_TYPE_MASTER)
+		master_flags = flags;
+	else
+		slave_flags = flags;
+
+	/*
+	 * Open pair master/slave.
+	 */
+	if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM) {
+		master = ptmx_open(pi->cpt_index, master_flags);
+	} else {
+		sprintf(pairname, "/dev/pty%c%c", a[pi->cpt_index/16], b[pi->cpt_index%16]);
+		master = filp_open(pairname, master_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+	}
+	if (IS_ERR(master)) {
+		eprintk_ctx("filp_open master: %Ld %ld\n", fi->cpt_priv, PTR_ERR(master));
+		cpt_release_buf(ctx);
+		return master;
+	}
+	stty = master->private_data;
+	clear_bit(TTY_PTY_LOCK, &stty->flags);
+	if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM)
+		sprintf(pairname, "/dev/pts/%d", stty->index);
+	else
+		sprintf(pairname, "/dev/tty%c%c", a[stty->index/16], b[stty->index%16]);
+	slave = filp_open(pairname, slave_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+	if (IS_ERR(slave)) {
+		eprintk_ctx("filp_open slave %s: %ld\n", pairname, PTR_ERR(slave));
+		fput(master);
+		cpt_release_buf(ctx);
+		return slave;
+	}
+
+	if (pi->cpt_drv_subtype != PTY_TYPE_MASTER)
+		fixup_tty_attrs(ii, slave, ctx);
+
+	cpt_object_add(CPT_OBJ_TTY, master->private_data, ctx);
+	cpt_object_add(CPT_OBJ_TTY, slave->private_data, ctx);
+	cpt_object_add(CPT_OBJ_FILE, master, ctx);
+	cpt_object_add(CPT_OBJ_FILE, slave, ctx);
+
+	if (pi->cpt_drv_subtype == PTY_TYPE_MASTER) {
+		loff_t pos;
+		obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
+		obj->o_parent = master;
+		cpt_obj_setpos(obj, fi->cpt_priv, ctx);
+		pty_setup(stty, fi->cpt_priv, pi, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
+		obj->o_parent = slave;
+		pos = find_pty_pair(stty->link, fi->cpt_priv, pi, ctx);
+		cpt_obj_setpos(obj, pos, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_FILE, slave, ctx);
+		cpt_obj_setpos(obj, CPT_NULL, ctx);
+		get_file(master);
+		cpt_release_buf(ctx);
+		return master;
+	} else {
+		loff_t pos;
+		obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
+		obj->o_parent = slave;
+		cpt_obj_setpos(obj, fi->cpt_priv, ctx);
+		pty_setup(stty->link, fi->cpt_priv, pi, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
+		obj->o_parent = master;
+		pos = find_pty_pair(stty, fi->cpt_priv, pi, ctx);
+		cpt_obj_setpos(obj, pos, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_FILE, master, ctx);
+		cpt_obj_setpos(obj, CPT_NULL, ctx);
+		get_file(slave);
+		cpt_release_buf(ctx);
+		return slave;
+	}
+}
+
+int rst_tty_jobcontrol(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_TTY];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		cpt_object_t *obj;
+		struct cpt_tty_image *pibuf = cpt_get_buf(ctx);
+
+		if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx)) {
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, sec, ctx);
+		if (obj) {
+			struct tty_struct *stty = obj->o_obj;
+			if ((int)pibuf->cpt_pgrp > 0) {
+				stty->pgrp = vpid_to_pid(pibuf->cpt_pgrp);
+				if (stty->pgrp == -1)
+					dprintk_ctx("unknown tty pgrp %d\n", pibuf->cpt_pgrp);
+			} else if (pibuf->cpt_pgrp) {
+				stty->pgrp = alloc_pidmap();
+				if (stty->pgrp < 0) {
+					eprintk_ctx("cannot allocate stray tty->pgrp");
+					cpt_release_buf(ctx);
+					return -EINVAL;
+				}
+				free_pidmap(stty->pgrp);
+			}
+			if ((int)pibuf->cpt_session > 0) {
+				int sess;
+				sess = vpid_to_pid(pibuf->cpt_session);
+				if (sess == -1) {
+					dprintk_ctx("unknown tty session %d\n", pibuf->cpt_session);
+				} else if (stty->session <= 0) {
+					stty->session = sess;
+				} else if (stty->session != sess) {
+					wprintk_ctx("tty session mismatch 2\n");
+				}
+			}
+		}
+		sec += pibuf->cpt_next;
+		cpt_release_buf(ctx);
+	}
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/cpt/rst_ubc.c linux-2.6.16.ovz/kernel/cpt/rst_ubc.c
--- linux-2.6.16/kernel/cpt/rst_ubc.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_ubc.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,108 @@
+/*
+ *
+ *  kernel/cpt/rst_ubc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/types.h>
+#include <ub/beancounter.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, pos, ctx);
+	if (obj == NULL) {
+		printk(KERN_ERR "RST: unknown ub @%Lu\n", pos);
+		return get_beancounter(get_exec_ub());
+	}
+	return get_beancounter(obj->o_obj);
+}
+
+static void restore_one_bc_parm(__u64 *dmp, struct ubparm *prm, int held)
+{
+	prm->barrier = (dmp[0] == CPT_NULL ? UB_MAXVALUE : dmp[0]);
+	prm->limit = (dmp[1] == CPT_NULL ? UB_MAXVALUE : dmp[1]);
+	if (held)
+		prm->held = dmp[2];
+	prm->maxheld = dmp[3];
+	prm->minheld = dmp[4];
+	prm->failcnt = dmp[5];
+}
+
+static int restore_one_bc(struct cpt_beancounter_image *v,
+		cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct user_beancounter *bc;
+	cpt_object_t *pobj;
+	int i;
+
+	if (v->cpt_parent != CPT_NULL) {
+		pobj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, v->cpt_parent, ctx);
+		if (pobj == NULL)
+			return -ESRCH;
+		bc = get_subbeancounter_byid(pobj->o_obj, v->cpt_id, 1);
+	} else {
+		bc = get_exec_ub();
+		while (bc->parent)
+			bc = bc->parent;
+		get_beancounter(bc);
+	}
+	if (bc == NULL)
+		return -ENOMEM;
+	obj->o_obj = bc;
+
+	for (i = 0; i < UB_RESOURCES; i++)
+		restore_one_bc_parm(v->cpt_parms, bc->ub_parms, 0);
+	for (i = 0; i < UB_RESOURCES; i++)
+		restore_one_bc_parm(v->cpt_parms + UB_RESOURCES * 6,
+				bc->ub_store, 1);
+	return 0;
+}
+
+int rst_undump_ubc(struct cpt_context *ctx)
+{
+	loff_t start, end;
+	struct cpt_beancounter_image *v;
+	cpt_object_t *obj;
+	int err;
+
+	err = rst_get_section(CPT_SECT_UBC, ctx, &start, &end);
+	if (err)
+		return err;
+
+	while (start < end) {
+		v = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_UBC, start, v, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		cpt_obj_setpos(obj, start, ctx);
+		intern_cpt_object(CPT_OBJ_UBC, obj, ctx);
+
+		restore_one_bc(v, obj, ctx);
+
+		cpt_release_buf(ctx);
+		start += v->cpt_next;
+	}
+	return 0;
+}
+
+void rst_finish_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_UBC)
+		put_beancounter(obj->o_obj);
+}
diff -uprN linux-2.6.16/kernel/cpt/rst_undump.c linux-2.6.16.ovz/kernel/cpt/rst_undump.c
--- linux-2.6.16/kernel/cpt/rst_undump.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_undump.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,819 @@
+/*
+ *
+ *  kernel/cpt/rst_undump.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/namespace.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/smp_lock.h>
+#include <linux/ve_proto.h>
+#include <linux/virtinfo.h>
+#include <linux/compat.h>
+#include <linux/vzcalluser.h>
+#include <ub/beancounter.h>
+#include <asm/desc.h>
+#include <asm/unistd.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_process.h"
+#include "cpt_socket.h"
+#include "cpt_net.h"
+#include "cpt_ubc.h"
+#include "cpt_kernel.h"
+
+static int rst_utsname(cpt_context_t *ctx);
+
+
+struct thr_context {
+	struct completion init_complete;
+	struct completion task_done;
+	int error;
+	struct cpt_context *ctx;
+	cpt_object_t	*tobj;
+};
+
+static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx);
+
+static int vps_rst_veinfo(struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_veinfo_image *i;
+	struct ve_struct *ve;
+	struct timespec delta;
+	loff_t start, end;
+
+	err = rst_get_section(CPT_SECT_VEINFO, ctx, &start, &end);
+	if (err)
+		goto out;
+
+	i = cpt_get_buf(ctx);
+	err = rst_get_object(CPT_OBJ_VEINFO, start, i, ctx);
+	if (err)
+		goto out_rel;
+
+	ve = get_exec_env();
+	ve->_shm_ctlall = i->shm_ctl_all;
+	ve->_shm_ctlmax = i->shm_ctl_max;
+	ve->_shm_ctlmni = i->shm_ctl_mni;
+
+	ve->_msg_ctlmax = i->msg_ctl_max;
+	ve->_msg_ctlmni = i->msg_ctl_mni;
+	ve->_msg_ctlmnb = i->msg_ctl_mnb;
+
+	BUG_ON(sizeof(ve->_sem_ctls) != sizeof(i->sem_ctl_arr));
+	ve->_sem_ctls[0] = i->sem_ctl_arr[0];
+	ve->_sem_ctls[1] = i->sem_ctl_arr[1];
+	ve->_sem_ctls[2] = i->sem_ctl_arr[2];
+	ve->_sem_ctls[3] = i->sem_ctl_arr[3];
+
+	cpt_timespec_import(&delta, i->start_timespec_delta);
+	_set_normalized_timespec(&ve->start_timespec,
+			ve->start_timespec.tv_sec - delta.tv_sec,
+			ve->start_timespec.tv_nsec - delta.tv_nsec);
+	ve->start_jiffies -= i->start_jiffies_delta;
+	// // FIXME: what???
+	// // ve->start_cycles -= i->start_jiffies_delta * cycles_per_jiffy;
+
+	err = 0;
+out_rel:
+	cpt_release_buf(ctx);
+out:
+	return err;
+}
+
+static int vps_rst_reparent_root(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err;
+	struct env_create_param2 param;
+
+	ctx->cpt_jiffies64 = get_jiffies_64();
+	do_gettimespec(&ctx->delta_time);
+
+	ctx->delta_time.tv_sec -= ctx->start_time.tv_sec;
+	if (ctx->start_time.tv_nsec > ctx->delta_time.tv_nsec) {
+		ctx->delta_time.tv_sec--;
+		ctx->delta_time.tv_nsec = 1000000000 - (ctx->start_time.tv_nsec - ctx->delta_time.tv_nsec);
+	} else {
+		ctx->delta_time.tv_nsec -= ctx->start_time.tv_nsec;
+	}
+
+	memset(&param, 0, sizeof(param));
+	param.iptables_mask = ctx->iptables_mask;
+
+	err = real_env_create(ctx->ve_id, VE_CREATE|VE_LOCK, 2, &param, sizeof(param));
+	if (err < 0)
+		eprintk_ctx("real_env_create: %d\n", err);
+	get_exec_env()->jiffies_fixup = ((ctx->delta_time.tv_sec < 0) ? 
+		0 : timespec_to_jiffies(&ctx->delta_time)) -
+		(unsigned long)(ctx->cpt_jiffies64 - ctx->virt_jiffies64);
+	return err < 0 ? err : 0;
+}
+
+
+static int hook(void *arg)
+{
+	struct thr_context *thr_ctx = arg;
+	struct cpt_context *ctx;
+	cpt_object_t *tobj;
+	struct cpt_task_image *ti;
+	int err = 0;
+
+	current->state = TASK_UNINTERRUPTIBLE;
+	complete(&thr_ctx->init_complete);
+	schedule();
+
+	ctx = thr_ctx->ctx;
+	tobj = thr_ctx->tobj;
+	ti = tobj->o_image;
+
+	current->fs->umask = 0;
+
+	if (ti->cpt_pid == 1) {
+		err = vps_rst_reparent_root(tobj, ctx);
+
+		if (err) {
+			rst_report_error(err, ctx);
+			goto out;
+		}
+
+		memcpy(&get_exec_env()->cap_default, &ti->cpt_ecap, sizeof(kernel_cap_t));
+
+		if (ctx->statusfile) {
+			fput(ctx->statusfile);
+			ctx->statusfile = NULL;
+		}
+
+		if (ctx->lockfile) {
+			mm_segment_t oldfs;
+			ssize_t err = -EINVAL;
+			char b;
+
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			if (ctx->lockfile->f_op && ctx->lockfile->f_op->read)
+				err = ctx->lockfile->f_op->read(ctx->lockfile, &b, 1, &ctx->lockfile->f_pos);
+			set_fs(oldfs);
+			fput(ctx->lockfile);
+			ctx->lockfile = NULL;
+		}
+
+		err = vps_rst_veinfo(ctx);
+		if (err) {
+			eprintk_ctx("rst_veinfo: %d\n", err);
+			goto out;
+		}
+
+		err = rst_utsname(ctx);
+		if (err) {
+			eprintk_ctx("rst_utsname: %d\n", err);
+			goto out;
+		}
+
+		err = rst_root_namespace(ctx);
+		if (err) {
+			eprintk_ctx("rst_namespace: %d\n", err);
+			goto out;
+		}
+
+		if ((err = rst_restore_net(ctx)) != 0) {
+			eprintk_ctx("rst_restore_net: %d\n", err);
+			goto out;
+		}
+
+		err = rst_sockets(ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: %d\n", err);
+			goto out;
+		}
+		err = rst_sysv_ipc(ctx);
+		if (err) {
+			eprintk_ctx("rst_sysv_ipc: %d\n", err);
+			goto out;
+		}
+	}
+
+	do {
+		if (current->user->uid != ti->cpt_user) {
+			struct user_struct *u = alloc_uid(ti->cpt_user);
+			if (!u) {
+				eprintk_ctx("alloc_user\n");
+			} else {
+				switch_uid(u);
+			}
+		}
+	} while (0);
+
+	if ((err = rst_mm_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_mm: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_files_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_files: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_fs_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_fs: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_semundo_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_semundo: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_signal_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_signal: %d\n", err);
+		goto out;
+	}
+
+	if (ti->cpt_namespace == CPT_NULL)
+		exit_namespace(current);
+
+	if (ti->cpt_personality != 0)
+		__set_personality(ti->cpt_personality);
+
+	current->set_child_tid = NULL;
+	current->clear_child_tid = NULL;
+	current->flags &= ~(PF_FORKNOEXEC|PF_SUPERPRIV);
+	current->flags |= ti->cpt_flags&(PF_FORKNOEXEC|PF_SUPERPRIV);
+	current->exit_code = ti->cpt_exit_code;
+	current->pdeath_signal = ti->cpt_pdeath_signal;
+
+	if (ti->cpt_restart.fn != CPT_RBL_0) {
+		if (ti->cpt_restart.fn != CPT_RBL_NANOSLEEP
+		    && ti->cpt_restart.fn != CPT_RBL_COMPAT_NANOSLEEP
+		    ) {
+			eprintk_ctx("unknown restart block\n");
+		} else {
+			current->thread_info->restart_block.fn = nanosleep_restart;
+#ifdef CONFIG_X86_64
+			if (!ti->cpt_64bit)
+				current->thread_info->restart_block.fn = compat_nanosleep_restart;
+#endif
+			if (ctx->image_version != 0) {
+				current->thread_info->restart_block.arg0 = ti->cpt_restart.arg0;
+				current->thread_info->restart_block.arg1 = ti->cpt_restart.arg1;
+				current->thread_info->restart_block.arg2 = ti->cpt_restart.arg2;
+				current->thread_info->restart_block.arg3 = ti->cpt_restart.arg3;	
+				if (debug_level > 2) {
+					ktime_t e, e1;
+					struct timespec now;
+
+					do_posix_clock_monotonic_gettime(&now);
+					e = timespec_to_ktime(now);
+					e1.tv64 = ((u64)current->thread_info->restart_block.arg1 << 32) | (u64) current->thread_info->restart_block.arg0;
+					e = ktime_sub(e1, e);
+					dprintk("rst " CPT_FID " RBL %ld/%ld %Ld\n", CPT_TID(current),
+						current->thread_info->restart_block.arg1,
+						current->thread_info->restart_block.arg0, e.tv64);
+				}
+			} else {
+				struct timespec now;
+				ktime_t expire;
+				unsigned long val = ti->cpt_restart.arg0 -
+					timespec_to_jiffies(&ctx->delta_time);
+				if ((long)val <= 0)
+					val = 1;
+				do_posix_clock_monotonic_gettime(&now);
+				expire = ktime_add_ns(timespec_to_ktime(now), (u64)val*TICK_NSEC);
+				current->thread_info->restart_block.arg0 = expire.tv64 & 0xFFFFFFFF;
+				current->thread_info->restart_block.arg1 = expire.tv64 >> 32;
+				current->thread_info->restart_block.arg2 = ti->cpt_restart.arg1;
+				current->thread_info->restart_block.arg3 = CLOCK_MONOTONIC;	
+			}
+		}
+	}
+
+	if (thread_group_leader(current)) {
+		current->signal->it_real_incr.tv64 = 0;
+		if (ctx->image_version != 0) {
+			ktime_add_ns(current->signal->it_real_incr, ti->cpt_it_real_incr);
+		} else {
+			ktime_add_ns(current->signal->it_real_incr, ti->cpt_it_real_incr*TICK_NSEC);
+		}
+		current->signal->it_prof_incr = ti->cpt_it_prof_incr;
+		current->signal->it_virt_incr = ti->cpt_it_virt_incr; 
+		current->signal->it_prof_expires = ti->cpt_it_prof_value;
+		current->signal->it_virt_expires = ti->cpt_it_virt_value;
+	}
+
+	err = rst_clone_children(tobj, ctx);
+	if (err) {
+		eprintk_ctx("rst_clone_children\n");
+		goto out;
+	}
+
+	if (ti->cpt_pid == 1) {
+		if ((err = rst_process_linkage(ctx)) != 0) {
+			eprintk_ctx("rst_process_linkage: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_do_filejobs(ctx)) != 0) {
+			eprintk_ctx("rst_do_filejobs: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_eventpoll(ctx)) != 0) {
+			eprintk_ctx("rst_eventpoll: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_sockets_complete(ctx)) != 0) {
+			eprintk_ctx("rst_sockets_complete: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_stray_files(ctx)) != 0) {
+			eprintk_ctx("rst_stray_files: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_posix_locks(ctx)) != 0) {
+			eprintk_ctx("rst_posix_locks: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_tty_jobcontrol(ctx)) != 0) {
+			eprintk_ctx("rst_tty_jobcontrol: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_restore_fs(ctx)) != 0) {
+			eprintk_ctx("rst_restore_fs: %d\n", err);
+			goto out;
+		}
+	}
+
+out:
+	thr_ctx->error = err;
+	lock_kernel();
+	complete(&thr_ctx->task_done);
+
+	if (!err && (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+		preempt_disable();
+		current->exit_state = EXIT_ZOMBIE;
+		write_lock_irq(&tasklist_lock);
+		nr_zombie++;
+		write_unlock_irq(&tasklist_lock);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+		atomic_dec(&current->signal->live);
+#endif
+		current->flags |= PF_DEAD;
+		if (!(ti->cpt_flags&PF_DEAD))
+			wprintk_ctx("zombie %d,%d(%s) is not pf_dead\n", current->pid, virt_pid(current), current->comm);
+		module_put(current->thread_info->exec_domain->module);
+		if (current->binfmt)
+			module_put(current->binfmt->module);
+	} else {
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+	}
+
+	schedule();
+
+	dprintk_ctx("leaked through %d/%d %p\n", current->pid, virt_pid(current), current->mm);
+
+	module_put(THIS_MODULE);
+	complete_and_exit(NULL, 0);
+	return 0;
+}
+
+#if 0
+static void set_task_ubs(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct task_beancounter *tbc;
+
+	tbc = task_bc(current);
+
+	put_beancounter(tbc->fork_sub);
+	tbc->fork_sub = rst_lookup_ubc(ti->cpt_task_ub, ctx);
+	if (ti->cpt_mm_ub != CPT_NULL) {
+		put_beancounter(tbc->exec_ub);
+		tbc->exec_ub = rst_lookup_ubc(ti->cpt_mm_ub, ctx);
+	}
+}
+#endif
+
+static int create_root_task(cpt_object_t *obj, struct cpt_context *ctx,
+		struct thr_context *thr_ctx)
+{
+	task_t *tsk;
+	int pid;
+
+	thr_ctx->ctx = ctx;
+	thr_ctx->error = 0;
+	init_completion(&thr_ctx->init_complete);
+	init_completion(&thr_ctx->task_done);
+#if 0
+	set_task_ubs(obj->o_image, ctx);
+#endif
+
+	pid = local_kernel_thread(hook, thr_ctx, 0, 0);
+	if (pid < 0)
+		return pid;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_ve(pid);
+	if (tsk)
+		get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+	if (tsk == NULL)
+		return -ESRCH;
+	cpt_obj_setobj(obj, tsk, ctx);
+	thr_ctx->tobj = obj;
+	return 0;
+}
+
+static int rst_basic_init_task(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	task_t *tsk = obj->o_obj;
+	struct cpt_task_image *ti = obj->o_image;
+
+	memcpy(tsk->comm, ti->cpt_comm, sizeof(tsk->comm));
+	rst_mm_basic(obj, ti, ctx);
+	return 0;
+}
+
+static int make_baby(cpt_object_t *cobj,
+		     struct cpt_task_image *pi,
+		     struct cpt_context *ctx)
+{
+	unsigned long flags;
+	struct cpt_task_image *ci = cobj->o_image;
+	struct thr_context thr_ctx;
+	task_t *tsk;
+	pid_t pid;
+
+	flags = rst_mm_flag(ci, ctx) | rst_files_flag(ci, ctx)
+		| rst_signal_flag(ci, ctx) | rst_semundo_flag(ci, ctx);
+	if (ci->cpt_rppid != pi->cpt_pid) {
+		flags |= CLONE_THREAD|CLONE_PARENT;
+		if (ci->cpt_signal != pi->cpt_signal ||
+		    !(flags&CLONE_SIGHAND) ||
+		    (!(flags&CLONE_VM) && pi->cpt_mm != CPT_NULL)) {
+			eprintk_ctx("something is wrong with threads: %d %d %d %Ld %Ld %08lx\n",
+			       (int)ci->cpt_pid, (int)ci->cpt_rppid, (int)pi->cpt_pid,
+			       ci->cpt_signal, pi->cpt_signal, flags
+			       );
+			return -EINVAL;
+		}
+	}
+
+	thr_ctx.ctx = ctx;
+	thr_ctx.error = 0;
+	init_completion(&thr_ctx.init_complete);
+	init_completion(&thr_ctx.task_done);
+	thr_ctx.tobj = cobj;
+
+#if 0
+	set_task_ubs(ci, ctx);
+#endif
+
+	pid = local_kernel_thread(hook, &thr_ctx, flags, ci->cpt_pid);
+	if (pid < 0)
+		return pid;
+
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_ve(pid);
+	if (tsk)
+		get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+	if (tsk == NULL)
+		return -ESRCH;
+	cpt_obj_setobj(cobj, tsk, ctx);
+	thr_ctx.tobj = cobj;
+	wait_for_completion(&thr_ctx.init_complete);
+#ifdef CONFIG_SMP
+	wait_task_inactive(cobj->o_obj);
+#endif
+	rst_basic_init_task(cobj, ctx);
+
+	/* clone() increases group_stop_count if it was not zero and
+	 * CLONE_THREAD was asked. Undo.
+	 */
+	if (current->signal->group_stop_count && (flags & CLONE_THREAD)) {
+		if (tsk->signal != current->signal) BUG();
+		current->signal->group_stop_count--;
+	}
+
+	wake_up_process(tsk);
+	wait_for_completion(&thr_ctx.task_done);
+	wait_task_inactive(tsk);
+
+	return thr_ctx.error;
+}
+
+static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct cpt_task_image *ti = obj->o_image;
+	cpt_object_t *cobj;
+
+	for_each_object(cobj, CPT_OBJ_TASK) {
+		struct cpt_task_image *ci = cobj->o_image;
+		if (cobj == obj)
+			continue;
+		if ((ci->cpt_rppid == ti->cpt_pid && ci->cpt_tgid == ci->cpt_pid) ||
+		    (ci->cpt_leader == ti->cpt_pid &&
+		     ci->cpt_tgid != ci->cpt_pid && ci->cpt_pid != 1)) {
+			err = make_baby(cobj, ti, ctx);
+			if (err) {
+				eprintk_ctx("make_baby: %d\n", err);
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+static int read_task_images(struct cpt_context *ctx)
+{
+	int err;
+	loff_t start, end;
+
+	err = rst_get_section(CPT_SECT_TASKS, ctx, &start, &end);
+	if (err)
+		return err;
+
+	while (start < end) {
+		cpt_object_t *obj;
+		struct cpt_task_image *ti = cpt_get_buf(ctx);
+
+		err = rst_get_object(CPT_OBJ_TASK, start, ti, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (ti->cpt_pid != 1 && !__is_virtual_pid(ti->cpt_pid)) {
+			eprintk_ctx("BUG: pid %d is not virtual\n", ti->cpt_pid);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		cpt_obj_setpos(obj, start, ctx);
+		intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
+		obj->o_image = kmalloc(ti->cpt_next, GFP_KERNEL);
+		if (obj->o_image == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		memcpy(obj->o_image, ti, sizeof(*ti));
+		err = ctx->pread(obj->o_image + sizeof(*ti),
+				 ti->cpt_next - sizeof(*ti), ctx, start + sizeof(*ti));
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		start += ti->cpt_next;
+	}
+	return 0;
+}
+
+
+static int vps_rst_restore_tree(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct thr_context thr_ctx_root;
+
+	err = read_task_images(ctx);
+	if (err)
+		return err;
+
+	err = rst_undump_ubc(ctx);
+	if (err)
+		return err;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		err = create_root_task(obj, ctx, &thr_ctx_root);
+		if (err)
+			return err;
+
+		wait_for_completion(&thr_ctx_root.init_complete);
+#ifdef CONFIG_SMP
+		wait_task_inactive(obj->o_obj);
+#endif
+		rst_basic_init_task(obj, ctx);
+
+		wake_up_process(obj->o_obj);
+		wait_for_completion(&thr_ctx_root.task_done);
+		wait_task_inactive(obj->o_obj);
+		err = thr_ctx_root.error;
+		if (err)
+			return err;
+		break;
+	}
+
+	return err;
+}
+
+
+int vps_rst_undump(struct cpt_context *ctx)
+{
+	int err;
+	unsigned long umask;
+
+	err = rst_open_dumpfile(ctx);
+	if (err)
+		return err;
+
+#ifndef CONFIG_X86_64
+	if (ctx->tasks64) {
+		eprintk_ctx("Cannot restore 64 bit VE on this architecture\n");
+		return -EINVAL;
+	}
+#endif
+
+	umask = current->fs->umask;
+	current->fs->umask = 0;
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	err = rst_setup_pagein(ctx);
+#endif
+
+	if (err == 0)
+		err = vps_rst_restore_tree(ctx);
+
+	if (err == 0)
+		err = rst_restore_process(ctx);
+
+	current->fs->umask = umask;
+
+        return err;
+}
+
+static int rst_unlock_ve(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	down_write(&env->op_sem);
+	env->is_locked = 0;
+	up_write(&env->op_sem);
+	put_ve(env);
+	return 0;
+}
+
+int rst_resume(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	int err = 0;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		fput(file);
+	}
+
+	rst_resume_network(ctx);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+
+		if (!tsk)
+			continue;
+
+		if (ti->cpt_state == TASK_UNINTERRUPTIBLE) {
+			dprintk_ctx("task %d/%d(%s) is started\n", virt_pid(tsk), tsk->pid, tsk->comm);
+
+			/* Weird... If a signal is sent to stopped task,
+			 * nobody makes recalc_sigpending(). We have to do
+			 * this by hands after wake_up_process().
+			 * if we did this before a signal could arrive before
+			 * wake_up_process() and stall.
+			 */
+			spin_lock_irq(&tsk->sighand->siglock);
+			if (!signal_pending(tsk))
+				recalc_sigpending_tsk(tsk);
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			wake_up_process(tsk);
+		} else {
+			if (ti->cpt_state == TASK_STOPPED ||
+			    ti->cpt_state == TASK_TRACED) {
+				set_task_state(tsk, ti->cpt_state);
+			}
+		}
+		put_task_struct(tsk);
+	}
+
+	rst_unlock_ve(ctx);
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	rst_complete_pagein(ctx, 0);
+#endif
+
+	rst_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+
+        return err;
+}
+
+int rst_kill(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	int err = 0;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		fput(file);
+	}
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+
+		if (tsk == NULL)
+			continue;
+
+		if (tsk->exit_state == 0) {
+			send_sig(SIGKILL, tsk, 1);
+
+			spin_lock_irq(&tsk->sighand->siglock);
+			sigfillset(&tsk->blocked);
+			sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
+			set_tsk_thread_flag(tsk, TIF_SIGPENDING);
+			clear_tsk_thread_flag(tsk, TIF_FREEZE);
+			if (tsk->flags & PF_FROZEN)
+				tsk->flags &= ~PF_FROZEN;
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			wake_up_process(tsk);
+		}
+
+		put_task_struct(tsk);
+	}
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	rst_complete_pagein(ctx, 1);
+#endif
+
+	rst_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+
+        return err;
+}
+
+static int rst_utsname(cpt_context_t *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_UTSNAME];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr o;
+	int i;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_UTSNAME || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	i = 0;
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int len;
+		char *ptr;
+		err = rst_get_object(CPT_OBJ_NAME, sec, &o, ctx);
+		if (err)
+			return err;
+		len = o.cpt_next - o.cpt_hdrlen;
+		if (len > __NEW_UTS_LEN+1)
+			return -ENAMETOOLONG;
+		switch (i) {
+		case 0:
+			ptr = ve_utsname.nodename; break;
+		case 1:
+			ptr = ve_utsname.domainname; break;
+		default:
+			return -EINVAL;
+		}
+		err = ctx->pread(ptr, len, ctx, sec+o.cpt_hdrlen);
+		if (err)
+			return err;
+		i++;
+		sec += o.cpt_next;
+	}
+
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/cpt/rst_x8664.S linux-2.6.16.ovz/kernel/cpt/rst_x8664.S
--- linux-2.6.16/kernel/cpt/rst_x8664.S	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpt/rst_x8664.S	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,61 @@
+#define ASSEMBLY 1
+	
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/dwarf2.h>
+#include <asm/calling.h>
+#include <asm/msr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/hw_irq.h>
+#include <asm/errno.h>
+#include <asm/asm-offsets.h>
+
+	.code64
+	.global schedule_tail_hook, schedule_tail_p
+	.align 8
+schedule_tail_hook:
+	movq schedule_tail_p(%rip),%r11
+	call *%r11
+	GET_THREAD_INFO(%rcx)
+	btr $22,threadinfo_flags(%rcx)	/* TIF_RESUME */
+	jc  1f
+	retq
+
+	/* If TIF_RESUME is set, (%rsp) is pointer to hook function
+	 * the hook will do the work and jump to the next hook,
+	 * everything should end at ret_from_fork+5.
+	 */
+1:	addq $8,%rsp
+	retq
+
+	.align 8
+	.global ret_from_fork2
+ret_from_fork2:
+	cmpq $0,ORIG_RAX(%rsp)
+	jge  ret_from_fork+5
+	RESTORE_REST
+	jmp  int_ret_from_sys_call	
+
+	.align 8
+	.global ret_last_siginfo
+ret_last_siginfo:
+	call rlsi
+	movq %rax,%rsp
+	retq
+
+	.align 8
+	.global ret_child_tid
+ret_child_tid:
+	movq %rsp,%rdi
+	call rct
+	movq %rax,%rsp
+	retq
+	
+	.data
+schedule_tail_p:
+	.quad	0
diff -uprN linux-2.6.16/kernel/cpu.c linux-2.6.16.ovz/kernel/cpu.c
--- linux-2.6.16/kernel/cpu.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpu.c	2006-07-05 08:34:56.000000000 -0400
@@ -21,6 +21,11 @@ static DECLARE_MUTEX(cpucontrol);
 static struct notifier_block *cpu_chain;
 
 #ifdef CONFIG_HOTPLUG_CPU
+
+#ifdef CONFIG_SCHED_VCPU
+#error "CONFIG_HOTPLUG_CPU isn't supported with CONFIG_SCHED_VCPU"
+#endif
+
 static struct task_struct *lock_cpu_hotplug_owner;
 static int lock_cpu_hotplug_depth;
 
@@ -95,8 +100,8 @@ static inline void check_for_tasks(int c
 	struct task_struct *p;
 
 	write_lock_irq(&tasklist_lock);
-	for_each_process(p) {
-		if (task_cpu(p) == cpu &&
+	for_each_process_all(p) {
+		if (task_pcpu(p) == cpu &&
 		    (!cputime_eq(p->utime, cputime_zero) ||
 		     !cputime_eq(p->stime, cputime_zero)))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
@@ -106,6 +111,13 @@ static inline void check_for_tasks(int c
 	write_unlock_irq(&tasklist_lock);
 }
 
+#ifdef CONFIG_SCHED_VCPU
+#error VCPU vs. HOTPLUG: fix hotplug code below
+/*
+ * What should be fixed:
+ * - check for if (idle_cpu()) yield()
+ */
+#endif
 /* Take this CPU down. */
 static int take_cpu_down(void *unused)
 {
diff -uprN linux-2.6.16/kernel/cpuset.c linux-2.6.16.ovz/kernel/cpuset.c
--- linux-2.6.16/kernel/cpuset.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/cpuset.c	2006-07-05 08:34:56.000000000 -0400
@@ -897,7 +897,7 @@ static int update_nodemask(struct cpuset
 	n = 0;
 
 	/* Load up mmarray[] with mm reference for each task in cpuset. */
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		struct mm_struct *mm;
 
 		if (n >= ntasks) {
@@ -911,7 +911,7 @@ static int update_nodemask(struct cpuset
 		if (!mm)
 			continue;
 		mmarray[n++] = mm;
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	write_unlock_irq(&tasklist_lock);
 
 	/*
@@ -1125,7 +1125,7 @@ static int attach_task(struct cpuset *cs
 	if (pid) {
 		read_lock(&tasklist_lock);
 
-		tsk = find_task_by_pid(pid);
+		tsk = find_task_by_pid_all(pid);
 		if (!tsk || tsk->flags & PF_EXITING) {
 			read_unlock(&tasklist_lock);
 			return -ESRCH;
@@ -1561,13 +1561,13 @@ static int pid_array_load(pid_t *pidarra
 
 	read_lock(&tasklist_lock);
 
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		if (p->cpuset == cs) {
 			pidarray[n++] = p->pid;
 			if (unlikely(n == npids))
 				goto array_full;
 		}
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 array_full:
 	read_unlock(&tasklist_lock);
diff -uprN linux-2.6.16/kernel/exec_domain.c linux-2.6.16.ovz/kernel/exec_domain.c
--- linux-2.6.16/kernel/exec_domain.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/exec_domain.c	2006-07-05 08:34:56.000000000 -0400
@@ -140,6 +140,7 @@ __set_personality(u_long personality)
 	ep = lookup_exec_domain(personality);
 	if (ep == current_thread_info()->exec_domain) {
 		current->personality = personality;
+		module_put(ep->module);
 		return 0;
 	}
 
diff -uprN linux-2.6.16/kernel/exit.c linux-2.6.16.ovz/kernel/exit.c
--- linux-2.6.16/kernel/exit.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/exit.c	2006-07-05 08:34:56.000000000 -0400
@@ -42,7 +42,7 @@ extern struct task_struct *child_reaper;
 
 int getrusage(struct task_struct *, int, struct rusage __user *);
 
-static void exit_mm(struct task_struct * tsk);
+void exit_mm(struct task_struct * tsk);
 
 static void __unhash_process(struct task_struct *p)
 {
@@ -57,18 +57,19 @@ static void __unhash_process(struct task
 	}
 
 	REMOVE_LINKS(p);
+	REMOVE_VE_LINKS(p);
 }
 
 void release_task(struct task_struct * p)
 {
 	int zap_leader;
 	task_t *leader;
-	struct dentry *proc_dentry;
+	struct dentry *proc_dentry[2];
 
 repeat: 
 	atomic_dec(&p->user->processes);
 	spin_lock(&p->proc_lock);
-	proc_dentry = proc_pid_unhash(p);
+	proc_pid_unhash(p, proc_dentry);
 	write_lock_irq(&tasklist_lock);
 	if (unlikely(p->ptrace))
 		__ptrace_unlink(p);
@@ -80,6 +81,8 @@ repeat: 
 	 * the process by __unhash_process.
 	 */
 	__unhash_process(p);
+	nr_zombie--;
+	atomic_inc(&nr_dead);
 
 	/*
 	 * If we are the last non-leader member of the thread
@@ -107,6 +110,10 @@ repeat: 
 	spin_unlock(&p->proc_lock);
 	proc_pid_flush(proc_dentry);
 	release_thread(p);
+#ifdef CONFIG_VE
+	if (atomic_dec_and_test(&VE_TASK_INFO(p)->owner_env->pcounter))
+		do_env_cleanup(VE_TASK_INFO(p)->owner_env);
+#endif
 	put_task_struct(p);
 
 	p = leader;
@@ -118,10 +125,10 @@ repeat: 
 
 void unhash_process(struct task_struct *p)
 {
-	struct dentry *proc_dentry;
+	struct dentry *proc_dentry[2];
 
 	spin_lock(&p->proc_lock);
-	proc_dentry = proc_pid_unhash(p);
+	proc_pid_unhash(p, proc_dentry);
 	write_lock_irq(&tasklist_lock);
 	__unhash_process(p);
 	write_unlock_irq(&tasklist_lock);
@@ -139,14 +146,16 @@ int session_of_pgrp(int pgrp)
 	struct task_struct *p;
 	int sid = -1;
 
+	WARN_ON(is_virtual_pid(pgrp));
+
 	read_lock(&tasklist_lock);
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p->signal->session > 0) {
 			sid = p->signal->session;
 			goto out;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
-	p = find_task_by_pid(pgrp);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+	p = find_task_by_pid_ve(pgrp);
 	if (p)
 		sid = p->signal->session;
 out:
@@ -168,17 +177,19 @@ static int will_become_orphaned_pgrp(int
 	struct task_struct *p;
 	int ret = 1;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	WARN_ON(is_virtual_pid(pgrp));
+
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p == ignored_task
 				|| p->exit_state
-				|| p->real_parent->pid == 1)
+				|| virt_pid(p->real_parent) == 1)
 			continue;
 		if (process_group(p->real_parent) != pgrp
 			    && p->real_parent->signal->session == p->signal->session) {
 			ret = 0;
 			break;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return ret;	/* (sighing) "Often!" */
 }
 
@@ -186,6 +197,8 @@ int is_orphaned_pgrp(int pgrp)
 {
 	int retval;
 
+	WARN_ON(is_virtual_pid(pgrp));
+
 	read_lock(&tasklist_lock);
 	retval = will_become_orphaned_pgrp(pgrp, NULL);
 	read_unlock(&tasklist_lock);
@@ -198,7 +211,7 @@ static int has_stopped_jobs(int pgrp)
 	int retval = 0;
 	struct task_struct *p;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p->state != TASK_STOPPED)
 			continue;
 
@@ -214,7 +227,7 @@ static int has_stopped_jobs(int pgrp)
 
 		retval = 1;
 		break;
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return retval;
 }
 
@@ -263,6 +276,9 @@ void __set_special_pids(pid_t session, p
 {
 	struct task_struct *curr = current->group_leader;
 
+	WARN_ON(is_virtual_pid(pgrp));
+	WARN_ON(is_virtual_pid(session));
+
 	if (curr->signal->session != session) {
 		detach_pid(curr, PIDTYPE_SID);
 		curr->signal->session = session;
@@ -281,6 +297,7 @@ void set_special_pids(pid_t session, pid
 	__set_special_pids(session, pgrp);
 	write_unlock_irq(&tasklist_lock);
 }
+EXPORT_SYMBOL(set_special_pids);
 
 /*
  * Let kernel threads use this to say that they
@@ -500,7 +517,7 @@ EXPORT_SYMBOL_GPL(exit_fs);
  * Turn us into a lazy TLB process if we
  * aren't already..
  */
-static void exit_mm(struct task_struct * tsk)
+void exit_mm(struct task_struct * tsk)
 {
 	struct mm_struct *mm = tsk->mm;
 
@@ -535,6 +552,7 @@ static void exit_mm(struct task_struct *
 	task_unlock(tsk);
 	mmput(mm);
 }
+EXPORT_SYMBOL_GPL(exit_mm);
 
 static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
 {
@@ -613,13 +631,12 @@ static void reparent_thread(task_t *p, t
 static void forget_original_parent(struct task_struct * father,
 					  struct list_head *to_release)
 {
-	struct task_struct *p, *reaper = father;
+	struct task_struct *p, *tsk_reaper, *reaper = father;
 	struct list_head *_p, *_n;
 
 	do {
 		reaper = next_thread(reaper);
 		if (reaper == father) {
-			reaper = child_reaper;
 			break;
 		}
 	} while (reaper->exit_state);
@@ -641,9 +658,16 @@ static void forget_original_parent(struc
 		/* if father isn't the real parent, then ptrace must be enabled */
 		BUG_ON(father != p->real_parent && !ptrace);
 
+		tsk_reaper = reaper;
+		if (tsk_reaper == father)
+#ifdef CONFIG_VE
+			tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
+		if (tsk_reaper == p)
+#endif
+			tsk_reaper = child_reaper;
 		if (father == p->real_parent) {
-			/* reparent with a reaper, real father it's us */
-			choose_new_parent(p, reaper, child_reaper);
+			/* reparent with a tsk_reaper, real father it's us */
+			choose_new_parent(p, tsk_reaper, child_reaper);
 			reparent_thread(p, father, 0);
 		} else {
 			/* reparent ptraced task to its real parent */
@@ -664,7 +688,15 @@ static void forget_original_parent(struc
 	}
 	list_for_each_safe(_p, _n, &father->ptrace_children) {
 		p = list_entry(_p,struct task_struct,ptrace_list);
-		choose_new_parent(p, reaper, child_reaper);
+
+		tsk_reaper = reaper;
+		if (tsk_reaper == father)
+#ifdef CONFIG_VE
+			tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
+		if (tsk_reaper == p)
+#endif
+			tsk_reaper = child_reaper;
+		choose_new_parent(p, tsk_reaper, child_reaper);
 		reparent_thread(p, father, 1);
 	}
 }
@@ -760,6 +792,9 @@ static void exit_notify(struct task_stru
 	    && !capable(CAP_KILL))
 		tsk->exit_signal = SIGCHLD;
 
+	if (tsk->exit_signal != -1 && t == child_reaper)
+		/* We dont want people slaying init. */
+		tsk->exit_signal = SIGCHLD;
 
 	/* If something other than our normal parent is ptracing us, then
 	 * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
@@ -778,6 +813,7 @@ static void exit_notify(struct task_stru
 	     unlikely(tsk->parent->signal->flags & SIGNAL_GROUP_EXIT)))
 		state = EXIT_DEAD;
 	tsk->exit_state = state;
+	nr_zombie++;
 
 	write_unlock_irq(&tasklist_lock);
 
@@ -792,6 +828,82 @@ static void exit_notify(struct task_stru
 		release_task(tsk);
 }
 
+#ifdef CONFIG_VE
+/*
+ * Handle exitting of init process, it's a special case for VE.
+ */
+static void do_initproc_exit(void)
+{
+	struct task_struct *tsk;
+	struct ve_struct *env;
+	struct siginfo info;
+	struct task_struct *g, *p;
+	long delay = 1L;
+
+	tsk = current;
+	env = VE_TASK_INFO(current)->owner_env;
+	if (env->init_entry != tsk)
+		return;
+
+	if (ve_is_super(env) && tsk->pid == 1)
+		panic("Attempted to kill init!");
+
+	memset(&info, 0, sizeof(info));
+	info.si_errno = 0;
+	info.si_code = SI_KERNEL;
+	info.si_pid = virt_pid(tsk);
+	info.si_uid = current->uid;
+	info.si_signo = SIGKILL;
+
+	/*
+	 * Here the VE changes its state into "not running".
+	 * op_sem taken for write is a barrier to all VE manipulations from
+	 * ioctl: it waits for operations currently in progress and blocks all
+	 * subsequent operations until is_running is set to 0 and op_sem is
+	 * released.
+	 */
+	down_write(&env->op_sem);
+	env->is_running = 0;
+	up_write(&env->op_sem);
+
+	/* send kill to all processes of VE */
+	read_lock(&tasklist_lock);
+	do_each_thread_ve(g, p) {
+		force_sig_info(SIGKILL, &info, p);
+	} while_each_thread_ve(g, p);
+	read_unlock(&tasklist_lock);
+
+	/* wait for all init childs exit */
+	while (atomic_read(&env->pcounter) > 1) {
+		if (sys_wait4(-1, NULL, __WALL | WNOHANG, NULL) > 0)
+			continue;
+		/* it was ENOCHLD or no more children somehow */
+		if (atomic_read(&env->pcounter) == 1)
+			break;
+
+		/* clear all signals to avoid wakeups */
+		if (signal_pending(tsk))
+			flush_signals(tsk);
+		/* we have child without signal sent */
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(delay);
+		delay = (delay < HZ) ? (delay << 1) : HZ;
+		read_lock(&tasklist_lock);
+		do_each_thread_ve(g, p) {
+			if (p != tsk)
+				force_sig_info(SIGKILL, &info, p);
+		} while_each_thread_ve(g, p);
+		read_unlock(&tasklist_lock);
+	}
+	env->init_entry = child_reaper;
+	write_lock_irq(&tasklist_lock);
+	REMOVE_LINKS(tsk);
+	tsk->parent = tsk->real_parent = child_reaper;
+	SET_LINKS(tsk);
+	write_unlock_irq(&tasklist_lock);
+}
+#endif
+
 fastcall NORET_TYPE void do_exit(long code)
 {
 	struct task_struct *tsk = current;
@@ -805,14 +917,20 @@ fastcall NORET_TYPE void do_exit(long co
 		panic("Aiee, killing interrupt handler!");
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
+#ifdef CONFIG_VE
+	do_initproc_exit();
+#else
 	if (unlikely(tsk->pid == 1))
 		panic("Attempted to kill init!");
+#endif
 	if (tsk->io_context)
 		exit_io_context();
 
 	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
 		current->ptrace_message = code;
+		set_pn_state(current, PN_STOP_EXIT);
 		ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
+		clear_pn_state(current);
 	}
 
 	/*
@@ -828,14 +946,6 @@ fastcall NORET_TYPE void do_exit(long co
 
 	tsk->flags |= PF_EXITING;
 
-	/*
-	 * Make sure we don't try to process any timer firings
-	 * while we are already exiting.
-	 */
- 	tsk->it_virt_expires = cputime_zero;
- 	tsk->it_prof_expires = cputime_zero;
-	tsk->it_sched_expires = 0;
-
 	if (unlikely(in_atomic()))
 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
 				current->comm, current->pid,
@@ -911,7 +1021,14 @@ asmlinkage long sys_exit(int error_code)
 
 task_t fastcall *next_thread(const task_t *p)
 {
-	return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
+	task_t *tsk;
+
+	tsk = pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
+#ifdef CONFIG_VE
+	/* all threads should belong to ONE ve! */
+	BUG_ON(VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(p)->owner_env);
+#endif
+	return tsk;
 }
 
 EXPORT_SYMBOL(next_thread);
@@ -960,14 +1077,19 @@ asmlinkage void sys_exit_group(int error
 static int eligible_child(pid_t pid, int options, task_t *p)
 {
 	if (pid > 0) {
-		if (p->pid != pid)
+		if ((is_virtual_pid(pid) ? virt_pid(p) : p->pid) != pid)
 			return 0;
 	} else if (!pid) {
 		if (process_group(p) != process_group(current))
 			return 0;
 	} else if (pid != -1) {
-		if (process_group(p) != -pid)
-			return 0;
+		if (__is_virtual_pid(-pid)) {
+			if (virt_pgid(p) != -pid)
+				return 0;
+		} else {
+			if (process_group(p) != -pid)
+				return 0;
+		}
 	}
 
 	/*
@@ -1157,7 +1279,7 @@ static int wait_task_zombie(task_t *p, i
 		p->exit_state = EXIT_ZOMBIE;
 		return retval;
 	}
-	retval = p->pid;
+	retval = get_task_pid(p);
 	if (p->real_parent != p->parent) {
 		write_lock_irq(&tasklist_lock);
 		/* Double-check with lock held.  */
@@ -1292,7 +1414,7 @@ bail_ref:
 	if (!retval && infop)
 		retval = put_user(p->uid, &infop->si_uid);
 	if (!retval)
-		retval = p->pid;
+		retval = get_task_pid(p);
 	put_task_struct(p);
 
 	BUG_ON(!retval);
@@ -1574,6 +1696,7 @@ asmlinkage long sys_wait4(pid_t pid, int
 	prevent_tail_call(ret);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(sys_wait4);
 
 #ifdef __ARCH_WANT_SYS_WAITPID
 
diff -uprN linux-2.6.16/kernel/fairsched.c linux-2.6.16.ovz/kernel/fairsched.c
--- linux-2.6.16/kernel/fairsched.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/fairsched.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,1288 @@
+/*
+ * Fair Scheduler
+ *
+ * Copyright (C) 2000-2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * Start-tag scheduling follows the theory presented in
+ * http://www.cs.utexas.edu/users/dmcl/papers/ps/SIGCOMM96.ps
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <asm/timex.h>
+#include <asm/atomic.h>
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/sysctl.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/fairsched.h>
+#include <linux/vsched.h>
+
+/* we need it for vsched routines in sched.c */
+spinlock_t fairsched_lock = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_FAIRSCHED
+
+#define FAIRSHED_DEBUG		" debug"
+
+
+/*********************************************************************/
+/*
+ * Special arithmetics
+ */
+/*********************************************************************/
+
+#define CYCLES_SHIFT (8)
+#define SCYCLES_TIME(time) \
+        ((scycles_t) {((time) + (1 << CYCLES_SHIFT) - 1)  >> CYCLES_SHIFT})
+
+#define CYCLES_ZERO (0)
+static inline int CYCLES_BEFORE(cycles_t x, cycles_t y)
+{
+        return (__s64)(x-y) < 0;
+}
+static inline int CYCLES_AFTER(cycles_t x, cycles_t y)
+{
+        return (__s64)(y-x) < 0;
+}
+static inline void CYCLES_DADD(cycles_t *x, fschdur_t y) {*x+=y.d;}
+
+#define FSCHDUR_ZERO (0)
+#define TICK_DUR ((fschdur_t){cycles_per_jiffy})
+static inline fschdur_t FSCHDURATION(cycles_t x, cycles_t y)
+{
+	return (fschdur_t){x - y};
+}
+static inline int FSCHDUR_CMP(fschdur_t x, fschdur_t y)
+{
+	if (x.d < y.d) return -1;
+	if (x.d > y.d) return 1;
+	return 0;
+}
+static inline fschdur_t FSCHDUR_SUB(fschdur_t x, fschdur_t y)
+{
+	return (fschdur_t){x.d - y.d};
+}
+
+#define FSCHTAG_ZERO ((fschtag_t){0})
+static inline int FSCHTAG_CMP(fschtag_t x, fschtag_t y)
+{
+	if (x.t < y.t) return -1;
+	if (x.t > y.t) return 1;
+	return 0;
+}
+static inline fschtag_t FSCHTAG_MAX(fschtag_t x, fschtag_t y)
+{
+	return x.t >= y.t ? x : y;
+}
+static inline int FSCHTAG_DADD(fschtag_t *tag, fschdur_t dur, unsigned w)
+{
+	cycles_t new_tag;
+	new_tag = tag->t + (cycles_t)dur.d * w;
+	if (new_tag < tag->t)
+		return -1;
+	/* DEBUG */
+	if (new_tag >= (1ULL << 48))
+		return -1;
+	tag->t = new_tag;
+	return 0;
+}
+static inline int FSCHTAG_ADD(fschtag_t *tag, fschtag_t y)
+{
+	cycles_t new_tag;
+	new_tag = tag->t + y.t;
+	if (new_tag < tag->t)
+		return -1;
+	tag->t = new_tag;
+	return 0;
+}
+static inline fschtag_t FSCHTAG_SUB(fschtag_t x, fschtag_t y)
+{
+	return (fschtag_t){x.t - y.t};
+}
+
+#define FSCHVALUE_ZERO ((fschvalue_t){0})
+#define TICK_VALUE ((fschvalue_t){(cycles_t)cycles_per_jiffy << FSCHRATE_SHIFT})
+static inline fschvalue_t FSCHVALUE(unsigned long t)
+{
+	return (fschvalue_t){(cycles_t)t << FSCHRATE_SHIFT};
+}
+static inline int FSCHVALUE_CMP(fschvalue_t x, fschvalue_t y)
+{
+	if (x.v < y.v) return -1;
+	if (x.v > y.v) return 1;
+	return 0;
+}
+static inline void FSCHVALUE_DADD(fschvalue_t *val, fschdur_t dur,
+		unsigned rate)
+{
+	val->v += (cycles_t)dur.d * rate;
+}
+static inline fschvalue_t FSCHVALUE_SUB(fschvalue_t x, fschvalue_t y)
+{
+	return (fschvalue_t){x.v - y.v};
+}
+static inline cycles_t FSCHVALUE_TO_DELAY(fschvalue_t val, unsigned rate)
+{
+	unsigned long t;
+	/*
+	 * Here we lose precision to make the division 32-bit on IA-32.
+	 * The value is not greater than TICK_VALUE.
+	 * (TICK_VALUE >> FSCHRATE_SHIFT) fits unsigned long.
+	 */
+	t = (val.v + (1 << FSCHRATE_SHIFT) - 1) >> FSCHRATE_SHIFT;
+	return (cycles_t)((t + rate - 1) / rate) << FSCHRATE_SHIFT;
+}
+
+
+/*********************************************************************/
+/*
+ * Global data
+ */
+/*********************************************************************/
+
+#define fsch_assert(x)							\
+	do {								\
+		static int count;					\
+		if (!(x) && count++ < 10)				\
+			printk("fsch_assert " #x " failed\n");		\
+	} while (0)
+
+/*
+ * Configurable parameters
+ */
+unsigned fairsched_max_latency = 25; /* jiffies */
+
+/*
+ * Parameters initialized at startup
+ */
+/* Number of online CPUs */
+unsigned fairsched_nr_cpus;
+/* Token Bucket depth (burst size) */
+static fschvalue_t max_value;
+
+struct fairsched_node fairsched_init_node = {
+	.id		= INT_MAX,
+#ifdef CONFIG_VE
+	.owner_env	= get_ve0(),
+#endif
+	.weight		= 1,
+};
+EXPORT_SYMBOL(fairsched_init_node);
+
+struct fairsched_node fairsched_idle_node = {
+	.id =			-1,
+};
+
+static int fairsched_nr_nodes;
+static LIST_HEAD(fairsched_node_head);
+static LIST_HEAD(fairsched_running_head);
+static LIST_HEAD(fairsched_delayed_head);
+
+DEFINE_PER_CPU(cycles_t, prev_schedule);
+static fschtag_t max_latency;
+
+static DECLARE_MUTEX(fairsched_mutex);
+
+/*********************************************************************/
+/*
+ * Small helper routines
+ */
+/*********************************************************************/
+
+/* this didn't proved to be very valuable statistics... */
+#define fairsched_inc_ve_strv(node, cycles)  do {} while(0)
+#define fairsched_dec_ve_strv(node, cycles)  do {} while(0)
+
+/*********************************************************************/
+/*
+ * Runlist management
+ */
+/*********************************************************************/
+
+/*
+ * Returns the start_tag of the first runnable node, or 0.
+ */
+static inline fschtag_t virtual_time(void)
+{
+	struct fairsched_node *p;
+
+	if (!list_empty(&fairsched_running_head)) {
+		p = list_first_entry(&fairsched_running_head,
+				struct fairsched_node, runlist);
+		return p->start_tag;
+	}
+	return FSCHTAG_ZERO;
+}
+
+static void fairsched_recompute_max_latency(void)
+{
+	struct fairsched_node *p;
+	unsigned w;
+	fschtag_t tag;
+
+	w = FSCHWEIGHT_MAX;
+	list_for_each_entry(p, &fairsched_node_head, nodelist) {
+		if (p->weight < w)
+			w = p->weight;
+	}
+	tag = FSCHTAG_ZERO;
+	(void) FSCHTAG_DADD(&tag, TICK_DUR,
+				fairsched_nr_cpus * fairsched_max_latency * w);
+	max_latency = tag;
+}
+
+static void fairsched_reset_start_tags(void)
+{
+	struct fairsched_node *cnode;
+	fschtag_t min_tag;
+
+	min_tag = virtual_time();
+	list_for_each_entry(cnode, &fairsched_node_head, nodelist) {
+		if (FSCHTAG_CMP(cnode->start_tag, min_tag) > 0)
+			cnode->start_tag = FSCHTAG_SUB(cnode->start_tag,
+						       min_tag);
+		else
+			cnode->start_tag = FSCHTAG_ZERO;
+	}
+}
+
+static void fairsched_running_insert(struct fairsched_node *node)
+{
+	struct list_head *tmp;
+	struct fairsched_node *p;
+	fschtag_t start_tag_max;
+
+	if (!list_empty(&fairsched_running_head)) {
+		start_tag_max = virtual_time();
+		if (!FSCHTAG_ADD(&start_tag_max, max_latency) &&
+		    FSCHTAG_CMP(start_tag_max, node->start_tag) < 0)
+			node->start_tag = start_tag_max;
+	}
+
+	list_for_each(tmp, &fairsched_running_head) {
+		p = list_entry(tmp, struct fairsched_node, runlist);
+		if (FSCHTAG_CMP(node->start_tag, p->start_tag) <= 0)
+			break;
+	}
+	/* insert node just before tmp */
+	list_add_tail(&node->runlist, tmp);
+}
+
+static inline void fairsched_running_insert_fromsleep(
+		struct fairsched_node *node)
+{
+	node->start_tag = FSCHTAG_MAX(node->start_tag, virtual_time());
+	fairsched_running_insert(node);
+}
+
+
+/*********************************************************************/
+/*
+ * CPU limiting helper functions
+ *
+ * These functions compute rates, delays and manipulate with sleep
+ * lists and so on.
+ */
+/*********************************************************************/
+
+/*
+ * Insert a node into the list of nodes removed from scheduling,
+ * sorted by the time at which the the node is allowed to run,
+ * historically called `delay'.
+ */
+static void fairsched_delayed_insert(struct fairsched_node *node)
+{
+	struct fairsched_node *p;
+	struct list_head *tmp;
+
+	list_for_each(tmp, &fairsched_delayed_head) {
+		p = list_entry(tmp, struct fairsched_node,
+				   runlist);
+		if (CYCLES_AFTER(p->delay, node->delay))
+			break;
+	}
+        /* insert node just before tmp */
+	list_add_tail(&node->runlist, tmp);
+}
+
+static inline void nodevalue_add(struct fairsched_node *node,
+		fschdur_t duration, unsigned rate)
+{
+	FSCHVALUE_DADD(&node->value, duration, rate);
+	if (FSCHVALUE_CMP(node->value, max_value) > 0)
+		node->value = max_value;
+}
+
+/*
+ * The node has been selected to run.
+ * This function accounts in advance for the time that the node will run.
+ * The advance not used by the node will be credited back.
+ */
+static void fairsched_ratelimit_charge_advance(
+		struct fairsched_node *node,
+		cycles_t time)
+{
+	fsch_assert(!node->delayed);
+	fsch_assert(FSCHVALUE_CMP(node->value, TICK_VALUE) >= 0);
+
+	/*
+	 * Account for the time passed since last update.
+	 * It might be needed if the node has become runnable because of
+	 * a wakeup, but hasn't gone through other functions updating
+	 * the bucket value.
+	 */
+	if (CYCLES_AFTER(time, node->last_updated_at)) {
+		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
+			      node->rate);
+		node->last_updated_at = time;
+	}
+
+	/* charge for the full tick the node might be running */
+	node->value = FSCHVALUE_SUB(node->value, TICK_VALUE);
+	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
+		list_del(&node->runlist);
+		node->delayed = 1;
+		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
+					FSCHVALUE_SUB(TICK_VALUE, node->value),
+					node->rate);
+		node->nr_ready = 0;
+		fairsched_delayed_insert(node);
+	}
+}
+
+static void fairsched_ratelimit_credit_unused(
+		struct fairsched_node *node,
+		cycles_t time, fschdur_t duration)
+{
+	/* account for the time passed since last update */
+	if (CYCLES_AFTER(time, node->last_updated_at)) {
+		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
+			      node->rate);
+		node->last_updated_at = time;
+	}
+
+	/*
+	 * When the node was given this CPU, it was charged for 1 tick.
+	 * Credit back the unused time.
+	 */
+	if (FSCHDUR_CMP(duration, TICK_DUR) < 0)
+		nodevalue_add(node, FSCHDUR_SUB(TICK_DUR, duration),
+			      1 << FSCHRATE_SHIFT);
+
+	/* check if the node is allowed to run */
+	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
+		/*
+		 * The node was delayed and remain such.
+		 * But since the bucket value has been updated,
+		 * update the delay time and move the node in the list.
+		 */
+		fsch_assert(node->delayed);
+		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
+					FSCHVALUE_SUB(TICK_VALUE, node->value),
+					node->rate);
+	} else if (node->delayed) {
+		/*
+		 * The node was delayed, but now it is allowed to run.
+		 * We do not manipulate with lists, it will be done by the
+		 * caller.
+		 */
+		node->nr_ready = node->nr_runnable;
+		node->delayed = 0;
+	}
+}
+
+static void fairsched_delayed_wake(cycles_t time)
+{
+	struct fairsched_node *p;
+
+	while (!list_empty(&fairsched_delayed_head)) {
+		p = list_entry(fairsched_delayed_head.next,
+				  struct fairsched_node,
+				  runlist);
+		if (CYCLES_AFTER(p->delay, time))
+			break;
+
+		/* ok, the delay period is completed */
+		/* account for the time passed since last update */
+		if (CYCLES_AFTER(time, p->last_updated_at)) {
+			nodevalue_add(p, FSCHDURATION(time, p->last_updated_at),
+					p->rate);
+			p->last_updated_at = time;
+		}
+
+		fsch_assert(FSCHVALUE_CMP(p->value, TICK_VALUE) >= 0);
+		p->nr_ready = p->nr_runnable;
+		p->delayed = 0;
+		list_del_init(&p->runlist);
+		if (p->nr_ready)
+			fairsched_running_insert_fromsleep(p);
+	}
+}
+
+static struct fairsched_node *fairsched_find(unsigned int id);
+
+void fairsched_cpu_online_map(int id, cpumask_t *mask)
+{
+	/* FIXME - obtain real map */
+	*mask = cpu_online_map;
+#if 0
+	struct fairsched_node *node;
+
+	down(&fairsched_mutex);
+	node = fairsched_find(id);
+	if (node == NULL)
+		*mask = CPU_MASK_NONE;
+	else
+		vsched_cpu_online_map(node->vsched, mask);
+	up(&fairsched_mutex);
+#endif
+}
+
+/*********************************************************************/
+/*
+ * The heart of the algorithm:
+ * fairsched_incrun, fairsched_decrun, fairsched_schedule
+ *
+ * Note: old property nr_ready >= nr_pcpu doesn't hold anymore.
+ * However, nr_runnable, nr_ready and delayed are maintained in sync.
+ */
+/*********************************************************************/
+
+/*
+ * Called on a wakeup inside the node.
+ */
+void fairsched_incrun(struct fairsched_node *node)
+{
+	if (!node->delayed && !node->nr_ready++)
+		/* the node wasn't on the running list, insert */
+		fairsched_running_insert_fromsleep(node);
+	node->nr_runnable++;
+}
+
+/*
+ * Called from inside schedule() when a sleeping state is entered.
+ */
+void fairsched_decrun(struct fairsched_node *node)
+{
+	if (!node->delayed && !--node->nr_ready)
+		/* nr_ready changed 1->0, remove from the running list */
+		list_del_init(&node->runlist);
+	--node->nr_runnable;
+}
+
+void fairsched_inccpu(struct fairsched_node *node)
+{
+	node->nr_pcpu++;
+	fairsched_dec_ve_strv(node, cycles);
+}
+
+static inline void __fairsched_deccpu(struct fairsched_node *node)
+{
+	node->nr_pcpu--;
+	fairsched_inc_ve_strv(node, cycles);
+}
+
+void fairsched_deccpu(struct fairsched_node *node)
+{
+	if (node == &fairsched_idle_node)
+		return;
+
+	__fairsched_deccpu(node);
+}
+
+static void fairsched_account(struct fairsched_node *node,
+		cycles_t time)
+{
+	fschdur_t duration;
+
+	duration = FSCHDURATION(time, __get_cpu_var(prev_schedule));
+#ifdef CONFIG_VE
+	CYCLES_DADD(&node->owner_env->cpu_used_ve, duration);
+#endif
+
+	/*
+	 * The duration is not greater than TICK_DUR since
+	 * task->need_resched is always 1.
+	 */
+	if (FSCHTAG_DADD(&node->start_tag, duration, node->weight)) {
+		fairsched_reset_start_tags();
+		(void) FSCHTAG_DADD(&node->start_tag, duration,
+					node->weight);
+	}
+
+	list_del_init(&node->runlist);
+	if (node->rate_limited)
+		fairsched_ratelimit_credit_unused(node, time, duration);
+	if (!node->delayed) {
+		if (node->nr_ready)
+			fairsched_running_insert(node);
+	} else
+		fairsched_delayed_insert(node);
+}
+
+/*
+ * Scheduling decision
+ *
+ * Updates CPU usage for the node releasing the CPU and selects a new node.
+ */
+struct fairsched_node *fairsched_schedule(
+		struct fairsched_node *prev_node,
+		struct fairsched_node *cur_node,
+		int cur_node_active,
+		cycles_t time)
+{
+	struct fairsched_node *p;
+
+	if (prev_node != &fairsched_idle_node)
+		fairsched_account(prev_node, time);
+	__get_cpu_var(prev_schedule) = time;
+
+	fairsched_delayed_wake(time);
+
+	list_for_each_entry(p, &fairsched_running_head, runlist) {
+		if (p->nr_pcpu < p->nr_ready ||
+		    (cur_node_active && p == cur_node)) {
+			if (p->rate_limited)
+				fairsched_ratelimit_charge_advance(p, time);
+			return p;
+		}
+	}
+	return NULL;
+}
+
+
+/*********************************************************************/
+/*
+ * System calls 
+ *
+ * All do_xxx functions are called under fairsched semaphore and after
+ * capability check.
+ *
+ * The binary interfaces follow some other Fair Scheduler implementations
+ * (although some system call arguments are not needed for our implementation).
+ */
+/*********************************************************************/
+
+static struct fairsched_node *fairsched_find(unsigned int id)
+{
+	struct fairsched_node *p;
+
+	list_for_each_entry(p, &fairsched_node_head, nodelist) {
+		if (p->id == id)
+			return p;
+	}
+	return NULL;
+}
+
+static int do_fairsched_mknod(unsigned int parent, unsigned int weight,
+		unsigned int newid)
+{
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -EINVAL;
+	if (weight < 1 || weight > FSCHWEIGHT_MAX)
+		goto out;
+	if (newid < 0 || newid > INT_MAX)
+		goto out;
+
+	retval = -EBUSY;
+	if (fairsched_find(newid) != NULL)
+		goto out;
+
+	retval = -ENOMEM;
+	node = kmalloc(sizeof(*node), GFP_KERNEL);
+	if (node == NULL)
+		goto out;
+
+	memset(node, 0, sizeof(*node));
+	node->weight = weight;
+	INIT_LIST_HEAD(&node->runlist);
+	node->id = newid;
+#ifdef CONFIG_VE
+	node->owner_env = get_exec_env();
+#endif
+
+	spin_lock_irq(&fairsched_lock);
+	list_add(&node->nodelist, &fairsched_node_head);
+	fairsched_nr_nodes++;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	retval = newid;
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
+				    unsigned int newid)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	down(&fairsched_mutex);
+	retval = do_fairsched_mknod(parent, weight, newid);
+	up(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_mknod);
+
+static int do_fairsched_rmnod(unsigned int id)
+{
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -EINVAL;
+	node = fairsched_find(id);
+	if (node == NULL)
+		goto out;
+	if (node == &fairsched_init_node)
+		goto out;
+
+	retval = vsched_destroy(node->vsched);
+	if (retval)
+		goto out;
+
+	spin_lock_irq(&fairsched_lock);
+	list_del(&node->runlist); /* required for delayed nodes */
+	list_del(&node->nodelist);
+	fairsched_nr_nodes--;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	kfree(node);
+	retval = 0;
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_rmnod(unsigned int id)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	down(&fairsched_mutex);
+	retval = do_fairsched_rmnod(id);
+	up(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_rmnod);
+
+int do_fairsched_chwt(unsigned int id, unsigned weight)
+{
+	struct fairsched_node *node;
+
+	if (id == 0)
+		return -EINVAL;
+	if (weight < 1 || weight > FSCHWEIGHT_MAX)
+		return -EINVAL;
+
+	node = fairsched_find(id);
+	if (node == NULL)
+		return -ENOENT;
+
+	spin_lock_irq(&fairsched_lock);
+	node->weight = weight;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	return 0;
+}
+
+asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned weight)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	down(&fairsched_mutex);
+	retval = do_fairsched_chwt(id, weight);
+	up(&fairsched_mutex);
+
+	return retval;
+}
+
+int do_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	struct fairsched_node *node;
+	cycles_t time;
+	int retval;
+
+	if (id == 0)
+		return -EINVAL;
+	if (op == 0 && (rate < 1 || rate >= (1UL << 31)))
+		return -EINVAL;
+
+	node = fairsched_find(id);
+	if (node == NULL)
+		return -ENOENT;
+
+	retval = -EINVAL;
+	spin_lock_irq(&fairsched_lock);
+	time = get_cycles();
+	switch (op) {
+		case 0:
+			node->rate = rate;
+			if (node->rate > (fairsched_nr_cpus << FSCHRATE_SHIFT))
+				node->rate =
+					fairsched_nr_cpus << FSCHRATE_SHIFT;
+			node->rate_limited = 1;
+			node->value = max_value;
+			if (node->delayed) {
+				list_del(&node->runlist);
+				node->delay = time;
+				fairsched_delayed_insert(node);
+				node->last_updated_at = time;
+				fairsched_delayed_wake(time);
+			}
+			retval = node->rate;
+			break;
+		case 1:
+			node->rate = 0; /* This assignment is not needed
+					   for the kernel code, and it should
+					   not rely on rate being 0 when it's
+					   unset.  This is a band-aid for some
+					   existing tools (don't know which one
+					   exactly).  --SAW */
+			node->rate_limited = 0;
+			node->value = max_value;
+			if (node->delayed) {
+				list_del(&node->runlist);
+				node->delay = time;
+				fairsched_delayed_insert(node);
+				node->last_updated_at = time;
+				fairsched_delayed_wake(time);
+			}
+			retval = 0;
+			break;
+		case 2:
+			if (node->rate_limited)
+				retval = node->rate;
+			else
+				retval = -ENODATA;
+			break;
+	}
+	spin_unlock_irq(&fairsched_lock);
+
+	return retval;
+}
+
+asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	down(&fairsched_mutex);
+	retval = do_fairsched_rate(id, op, rate);
+	up(&fairsched_mutex);
+
+	return retval;
+}
+
+/*
+ * Called under fairsched_mutex.
+ */
+static int __do_fairsched_mvpr(struct task_struct *p,
+		struct fairsched_node *node)
+{
+	int retval;
+
+	if (node->vsched == NULL) {
+		retval = vsched_create(node->id, node);
+		if (retval < 0)
+			return retval;
+	}
+
+	/* no need to destroy vsched in case of mvpr failure */
+	return vsched_mvpr(p, node->vsched);
+}
+
+int do_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	struct task_struct *p;
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -ENOENT;
+	node = fairsched_find(nodeid);
+	if (node == NULL)
+		goto out;
+
+	read_lock(&tasklist_lock);
+	retval = -ESRCH;
+	p = find_task_by_pid_all(pid);
+	if (p == NULL)
+		goto out_unlock;
+	get_task_struct(p);
+	read_unlock(&tasklist_lock);
+
+	retval = __do_fairsched_mvpr(p, node);
+	put_task_struct(p);
+	return retval;
+
+out_unlock:
+	read_unlock(&tasklist_lock);
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	down(&fairsched_mutex);
+	retval = do_fairsched_mvpr(pid, nodeid);
+	up(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_mvpr);
+
+
+/*********************************************************************/
+/*
+ * proc interface
+ */
+/*********************************************************************/
+
+struct fairsched_node_dump {
+#ifdef CONFIG_VE
+	envid_t veid;
+#endif
+	int id;
+	unsigned weight;
+	unsigned rate;
+	unsigned rate_limited : 1,
+		 delayed : 1;
+	fschtag_t start_tag;
+	fschvalue_t value;
+	cycles_t delay;
+	int nr_ready;
+	int nr_runnable;
+	int nr_pcpu;
+	int nr_tasks, nr_runtasks;
+};
+
+struct fairsched_dump {
+	int len, compat;
+	struct fairsched_node_dump nodes[0];
+};
+
+static struct fairsched_dump *fairsched_do_dump(int compat)
+{
+	int nr_nodes;
+	int len, i;
+	struct fairsched_dump *dump;
+	struct fairsched_node *node;
+	struct fairsched_node_dump *p;
+	unsigned long flags;
+
+start:
+	nr_nodes = (ve_is_super(get_exec_env()) ? fairsched_nr_nodes + 16 : 1);
+	len = sizeof(*dump) + nr_nodes * sizeof(dump->nodes[0]);
+	dump = ub_vmalloc(len);
+	if (dump == NULL)
+		goto out;
+
+	spin_lock_irqsave(&fairsched_lock, flags);
+	if (ve_is_super(get_exec_env()) && nr_nodes < fairsched_nr_nodes)
+		goto repeat;
+	p = dump->nodes;
+	list_for_each_entry_reverse(node, &fairsched_node_head, nodelist) {
+		if ((char *)p - (char *)dump >= len)
+			break;
+		p->nr_tasks = 0;
+		p->nr_runtasks = 0;
+#ifdef CONFIG_VE
+		if (!ve_accessible(node->owner_env, get_exec_env()))
+			continue;
+		p->veid = node->owner_env->veid;
+		if (compat) {
+			p->nr_tasks = atomic_read(&node->owner_env->pcounter);
+			for (i = 0; i < NR_CPUS; i++)
+				p->nr_runtasks +=
+					VE_CPU_STATS(node->owner_env, i)
+								->nr_running;
+			if (p->nr_runtasks < 0)
+				p->nr_runtasks = 0;
+		}
+#endif
+		p->id = node->id;
+		p->weight = node->weight;
+		p->rate = node->rate;
+		p->rate_limited = node->rate_limited;
+		p->delayed = node->delayed;
+		p->start_tag = node->start_tag;
+		p->value = node->value;
+		p->delay = node->delay;
+		p->nr_ready = node->nr_ready;
+		p->nr_runnable = node->nr_runnable;
+		p->nr_pcpu = node->nr_pcpu;
+		p++;
+	}
+	dump->len = p - dump->nodes;
+	dump->compat = compat;
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+
+out:
+	return dump;
+
+repeat:
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+	vfree(dump);
+	goto start;
+}
+
+#define FAIRSCHED_PROC_HEADLINES 2
+
+#if defined(CONFIG_VE)
+/*
+ * File format is dictated by compatibility reasons.
+ */
+static int fairsched_seq_show(struct seq_file *m, void *v)
+{
+	struct fairsched_dump *dump;
+	struct fairsched_node_dump *p;
+	unsigned vid, nid, pid, r;
+
+	dump = m->private;
+	p = (struct fairsched_node_dump *)((unsigned long)v & ~3UL);
+	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
+		if (p == dump->nodes)
+			seq_printf(m, "Version: 2.6 debug\n");
+		else if (p == dump->nodes + 1)
+			seq_printf(m,
+				       "      veid "
+				       "        id "
+				       "    parent "
+				       "weight "
+				       " rate "
+  				       "tasks "
+				       "  run "
+				       "cpus"
+				       " "
+				       "flg "
+				       "ready "
+				       "           start_tag "
+				       "               value "
+				       "               delay"
+				       "\n");
+	} else {
+		p -= FAIRSCHED_PROC_HEADLINES;
+		vid = nid = pid = 0;
+		r = (unsigned long)v & 3;
+		if (p == dump->nodes) {
+			if (r == 2)
+				nid = p->id;
+		} else {
+			if (!r)
+				nid = p->id;
+			else if (r == 1)
+				vid = pid = p->id;
+			else
+				vid = p->id, nid = 1;
+		}
+		seq_printf(m,
+			       "%10u "
+			       "%10u %10u %6u %5u %5u %5u %4u"
+			       " "
+			       " %c%c %5u %20Lu %20Lu %20Lu"
+			       "\n",
+			       vid,
+			       nid,
+			       pid,
+			       p->weight,
+			       p->rate,
+			       p->nr_tasks,
+			       p->nr_runtasks,
+			       p->nr_pcpu,
+			       p->rate_limited ? 'L' : '.',
+			       p->delayed ? 'D' : '.',
+			       p->nr_ready,
+			       p->start_tag.t,
+			       p->value.v,
+			       p->delay
+			       );
+	}
+
+	return 0;
+}
+
+static void *fairsched_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct fairsched_dump *dump;
+	unsigned long l;
+
+	dump = m->private;
+	if (*pos >= dump->len * 3 - 1 + FAIRSCHED_PROC_HEADLINES)
+		return NULL;
+	if (*pos < FAIRSCHED_PROC_HEADLINES)
+		return dump->nodes + *pos;
+	/* guess why... */
+	l = (unsigned long)(dump->nodes +
+		((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) / 3);
+	l |= ((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) % 3;
+	return (void *)l;
+}
+static void *fairsched_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return fairsched_seq_start(m, pos);
+}
+#endif
+
+static int fairsched2_seq_show(struct seq_file *m, void *v)
+{
+	struct fairsched_dump *dump;
+	struct fairsched_node_dump *p;
+
+	dump = m->private;
+	p = v;
+	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
+		if (p == dump->nodes)
+			seq_printf(m, "Version: 2.7" FAIRSHED_DEBUG "\n");
+		else if (p == dump->nodes + 1)
+			seq_printf(m,
+				       "        id "
+				       "weight "
+				       " rate "
+				       "  run "
+				       "cpus"
+#ifdef FAIRSHED_DEBUG
+				       " "
+				       "flg "
+				       "ready "
+				       "           start_tag "
+				       "               value "
+				       "               delay"
+#endif
+				       "\n");
+	} else {
+		p -= FAIRSCHED_PROC_HEADLINES;
+		seq_printf(m,
+			       "%10u %6u %5u %5u %4u"
+#ifdef FAIRSHED_DEBUG
+			       " "
+			       " %c%c %5u %20Lu %20Lu %20Lu"
+#endif
+			       "\n",
+			       p->id,
+			       p->weight,
+			       p->rate,
+			       p->nr_runnable,
+			       p->nr_pcpu
+#ifdef FAIRSHED_DEBUG
+			       ,
+			       p->rate_limited ? 'L' : '.',
+			       p->delayed ? 'D' : '.',
+			       p->nr_ready,
+			       p->start_tag.t,
+			       p->value.v,
+			       p->delay
+#endif
+			       );
+	}
+
+	return 0;
+}
+
+static void *fairsched2_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct fairsched_dump *dump;
+
+	dump = m->private;
+	if (*pos >= dump->len + FAIRSCHED_PROC_HEADLINES)
+		return NULL;
+	return dump->nodes + *pos;
+}
+static void *fairsched2_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return fairsched2_seq_start(m, pos);
+}
+static void fairsched2_seq_stop(struct seq_file *m, void *v)
+{
+}
+
+#ifdef CONFIG_VE
+static struct seq_operations fairsched_seq_op = {
+	.start		= fairsched_seq_start,
+	.next		= fairsched_seq_next,
+	.stop		= fairsched2_seq_stop,
+	.show		= fairsched_seq_show
+};
+#endif
+static struct seq_operations fairsched2_seq_op = {
+	.start		= fairsched2_seq_start,
+	.next		= fairsched2_seq_next,
+	.stop		= fairsched2_seq_stop,
+	.show		= fairsched2_seq_show
+};
+static int fairsched_seq_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct seq_file *m;
+	int compat;
+
+#ifdef CONFIG_VE
+	compat = (file->f_dentry->d_name.len == sizeof("fairsched") - 1);
+	ret = seq_open(file, compat ? &fairsched_seq_op : &fairsched2_seq_op);
+#else
+	compat = 0;
+	ret = seq_open(file, fairsched2_seq_op);
+#endif
+	if (ret)
+		return ret;
+	m = file->private_data;
+	m->private = fairsched_do_dump(compat);
+	if (m->private == NULL) {
+		seq_release(inode, file);
+		ret = -ENOMEM;
+	}
+	return ret;
+}
+static int fairsched_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	struct fairsched_dump *dump;
+
+	m = file->private_data;
+	dump = m->private;
+	m->private = NULL;
+	vfree(dump);
+	seq_release(inode, file);
+	return 0;
+}
+static struct file_operations proc_fairsched_operations = {
+	.open		= fairsched_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= fairsched_seq_release
+};
+
+
+/*********************************************************************/
+/*
+ * Fairsched initialization
+ */
+/*********************************************************************/
+
+int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
+			void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (!write || *valp == val)
+		return ret;
+
+	spin_lock_irq(&fairsched_lock);
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+	return ret;
+}
+
+static void fairsched_calibrate(void)
+{
+	fairsched_nr_cpus = num_online_cpus();
+	max_value = FSCHVALUE(cycles_per_jiffy * (fairsched_nr_cpus + 1));
+}
+
+void __init fairsched_init_early(void)
+{
+	list_add(&fairsched_init_node.nodelist, &fairsched_node_head);
+	fairsched_nr_nodes++;
+}
+
+/*
+ * Note: this function is execute late in the initialization sequence.
+ * We ourselves need calibrated cycles and initialized procfs...
+ * The consequence of this late initialization is that start tags are
+ * efficiently ignored and each node preempts others on insertion.
+ * But it isn't a problem (only init node can be runnable).
+ */
+void __init fairsched_init_late(void)
+{
+	struct proc_dir_entry *entry;
+
+	if (get_cycles() == 0)
+		panic("FAIRSCHED: no TSC!\n");
+	fairsched_calibrate();
+	fairsched_recompute_max_latency();
+
+	entry = create_proc_glob_entry("fairsched", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &proc_fairsched_operations;
+	entry = create_proc_glob_entry("fairsched2", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &proc_fairsched_operations;
+}
+
+
+#else /* CONFIG_FAIRSCHED */
+
+
+/*********************************************************************/
+/*
+ * No Fairsched
+ */
+/*********************************************************************/
+
+asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
+				    unsigned int newid)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_rmnod(unsigned int id)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned int weight)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	return -ENOSYS;
+}
+
+void __init fairsched_init_late(void)
+{
+}
+
+#endif /* CONFIG_FAIRSCHED */
diff -uprN linux-2.6.16/kernel/fork.c linux-2.6.16.ovz/kernel/fork.c
--- linux-2.6.16/kernel/fork.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/fork.c	2006-07-05 08:34:56.000000000 -0400
@@ -20,6 +20,7 @@
 #include <linux/vmalloc.h>
 #include <linux/completion.h>
 #include <linux/namespace.h>
+#include <linux/file.h>
 #include <linux/personality.h>
 #include <linux/mempolicy.h>
 #include <linux/sem.h>
@@ -52,11 +53,15 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+#include <ub/ub_misc.h>
+
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
 unsigned long total_forks;	/* Handle normal Linux uptimes. */
 int nr_threads; 		/* The idle threads do not count.. */
+EXPORT_SYMBOL(nr_threads);
 
 int max_threads;		/* tunable limit on nr_threads */
 
@@ -103,6 +108,7 @@ static kmem_cache_t *mm_cachep;
 
 void free_task(struct task_struct *tsk)
 {
+	ub_task_uncharge(tsk);
 	free_thread_info(tsk->thread_info);
 	free_task_struct(tsk);
 }
@@ -122,9 +128,14 @@ void __put_task_struct_cb(struct rcu_hea
 	free_uid(tsk->user);
 	put_group_info(tsk->group_info);
 
+#ifdef CONFIG_VE
+	put_ve(VE_TASK_INFO(tsk)->owner_env);
+	atomic_dec(&nr_dead);
+#endif
 	if (!profile_handoff_task(tsk))
 		free_task(tsk);
 }
+EXPORT_SYMBOL_GPL(__put_task_struct_cb);
 
 void __init fork_init(unsigned long mempages)
 {
@@ -135,7 +146,7 @@ void __init fork_init(unsigned long memp
 	/* create a slab on which task_structs can be allocated */
 	task_struct_cachep =
 		kmem_cache_create("task_struct", sizeof(struct task_struct),
-			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
+			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_UBC, NULL, NULL);
 #endif
 
 	/*
@@ -166,22 +177,30 @@ static struct task_struct *dup_task_stru
 
 	tsk = alloc_task_struct();
 	if (!tsk)
-		return NULL;
+		goto out;
 
 	ti = alloc_thread_info(tsk);
-	if (!ti) {
-		free_task_struct(tsk);
-		return NULL;
-	}
+	if (!ti)
+		goto out_tsk;
 
 	*tsk = *orig;
 	tsk->thread_info = ti;
 	setup_thread_stack(tsk, orig);
 
+	if (ub_task_charge(orig, tsk))
+		goto out_ti;
+
 	/* One for us, one for whoever does the "release_task()" (usually parent) */
 	atomic_set(&tsk->usage,2);
 	atomic_set(&tsk->fs_excl, 0);
 	return tsk;
+
+out_ti:
+	free_thread_info(ti);
+out_tsk:
+	free_task_struct(tsk);
+out:
+	return NULL;
 }
 
 #ifdef CONFIG_MMU
@@ -219,7 +238,12 @@ static inline int dup_mmap(struct mm_str
 								-pages);
 			continue;
 		}
+
 		charge = 0;
+		if (ub_memory_charge(mm, mpnt->vm_end - mpnt->vm_start,
+					mpnt->vm_flags & ~VM_LOCKED,
+					mpnt->vm_file, UB_HARD))
+			goto fail_noch;
 		if (mpnt->vm_flags & VM_ACCOUNT) {
 			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
 			if (security_vm_enough_memory(len))
@@ -238,6 +262,7 @@ static inline int dup_mmap(struct mm_str
 		tmp->vm_flags &= ~VM_LOCKED;
 		tmp->vm_mm = mm;
 		tmp->vm_next = NULL;
+		set_vma_rss(tmp, 0);
 		anon_vma_link(tmp);
 		file = tmp->vm_file;
 		if (file) {
@@ -266,7 +291,7 @@ static inline int dup_mmap(struct mm_str
 		rb_parent = &tmp->vm_rb;
 
 		mm->map_count++;
-		retval = copy_page_range(mm, oldmm, mpnt);
+		retval = copy_page_range(mm, oldmm, tmp, mpnt);
 
 		if (tmp->vm_ops && tmp->vm_ops->open)
 			tmp->vm_ops->open(tmp);
@@ -283,6 +308,9 @@ out:
 fail_nomem_policy:
 	kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
+	ub_memory_uncharge(mm, mpnt->vm_end - mpnt->vm_start,
+			mpnt->vm_flags & ~VM_LOCKED, mpnt->vm_file);
+fail_noch:
 	retval = -ENOMEM;
 	vm_unacct_memory(charge);
 	goto out;
@@ -313,7 +341,8 @@ static inline void mm_free_pgd(struct mm
 
 #include <linux/init_task.h>
 
-static struct mm_struct * mm_init(struct mm_struct * mm)
+static struct mm_struct * mm_init(struct mm_struct * mm,
+		struct task_struct *tsk)
 {
 	atomic_set(&mm->mm_users, 1);
 	atomic_set(&mm->mm_count, 1);
@@ -328,11 +357,14 @@ static struct mm_struct * mm_init(struct
 	mm->ioctx_list = NULL;
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
+	set_mm_ub(mm, tsk);
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
 		return mm;
 	}
+
+	put_mm_ub(mm);
 	free_mm(mm);
 	return NULL;
 }
@@ -347,10 +379,11 @@ struct mm_struct * mm_alloc(void)
 	mm = allocate_mm();
 	if (mm) {
 		memset(mm, 0, sizeof(*mm));
-		mm = mm_init(mm);
+		mm = mm_init(mm, NULL);
 	}
 	return mm;
 }
+EXPORT_SYMBOL_GPL(mm_alloc);
 
 /*
  * Called when the last reference to the mm
@@ -362,8 +395,10 @@ void fastcall __mmdrop(struct mm_struct 
 	BUG_ON(mm == &init_mm);
 	mm_free_pgd(mm);
 	destroy_context(mm);
+	put_mm_ub(mm);
 	free_mm(mm);
 }
+EXPORT_SYMBOL_GPL(__mmdrop);
 
 /*
  * Decrement the use count and release all resources for an mm.
@@ -466,7 +501,7 @@ static struct mm_struct *dup_mm(struct t
 
 	memcpy(mm, oldmm, sizeof(*mm));
 
-	if (!mm_init(mm))
+	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
 	if (init_new_context(tsk, mm))
@@ -720,7 +755,7 @@ out_release:
 	free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
 	free_fd_array(new_fdt->fd, new_fdt->max_fds);
 	kmem_cache_free(files_cachep, newf);
-	goto out;
+	return NULL;
 }
 
 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
@@ -896,7 +931,7 @@ asmlinkage long sys_set_tid_address(int 
 {
 	current->clear_child_tid = tidptr;
 
-	return current->pid;
+	return virt_pid(current);
 }
 
 /*
@@ -913,7 +948,7 @@ static task_t *copy_process(unsigned lon
 				 unsigned long stack_size,
 				 int __user *parent_tidptr,
 				 int __user *child_tidptr,
-				 int pid)
+				 int pid, long pid0)
 {
 	int retval;
 	struct task_struct *p = NULL;
@@ -974,12 +1009,20 @@ static task_t *copy_process(unsigned lon
 	p->did_exec = 0;
 	copy_flags(clone_flags, p);
 	p->pid = pid;
+#ifdef CONFIG_VE
+	set_virt_pid(p, alloc_vpid(p->pid, pid0 ? : -1));
+	if (virt_pid(p) < 0)
+		goto bad_fork_cleanup_module;
+#endif
 	retval = -EFAULT;
 	if (clone_flags & CLONE_PARENT_SETTID)
-		if (put_user(p->pid, parent_tidptr))
+		if (put_user(virt_pid(p), parent_tidptr))
 			goto bad_fork_cleanup;
 
 	p->proc_dentry = NULL;
+#ifdef CONFIG_VE
+	p->ve_task_info.glob_proc_dentry = NULL;
+#endif
 
 	INIT_LIST_HEAD(&p->children);
 	INIT_LIST_HEAD(&p->sibling);
@@ -1027,8 +1070,13 @@ static task_t *copy_process(unsigned lon
 #endif
 
 	p->tgid = p->pid;
-	if (clone_flags & CLONE_THREAD)
+	set_virt_tgid(p, virt_pid(p));
+	set_virt_pgid(p, virt_pgid(current));
+	set_virt_sid(p, virt_sid(current));
+	if (clone_flags & CLONE_THREAD) {
 		p->tgid = current->tgid;
+		set_virt_tgid(p, virt_tgid(current));
+	}
 
 	if ((retval = security_task_alloc(p)))
 		goto bad_fork_cleanup_policy;
@@ -1111,8 +1159,8 @@ static task_t *copy_process(unsigned lon
 	 */
 	p->cpus_allowed = current->cpus_allowed;
 	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
-			!cpu_online(task_cpu(p))))
-		set_task_cpu(p, smp_processor_id());
+			!vcpu_online(task_cpu(p))))
+		set_task_cpu(p, task_cpu(current));
 
 	/*
 	 * Check for pending SIGKILL! The new thread should not be allowed
@@ -1181,6 +1229,12 @@ static task_t *copy_process(unsigned lon
 	if (unlikely(p->ptrace & PT_PTRACED))
 		__ptrace_link(p, current->parent);
 
+#ifdef CONFIG_VE
+	SET_VE_LINKS(p);
+	atomic_inc(&p->ve_task_info.owner_env->pcounter);
+	get_ve(p->ve_task_info.owner_env);
+	seqcount_init(&p->ve_task_info.wakeup_lock);
+#endif
 	if (thread_group_leader(p)) {
 		p->signal->tty = current->signal->tty;
 		p->signal->pgrp = process_group(current);
@@ -1228,6 +1282,11 @@ bad_fork_cleanup_cpuset:
 #endif
 	cpuset_exit(p);
 bad_fork_cleanup:
+#ifdef CONFIG_VE
+	if (virt_pid(p) != p->pid && virt_pid(p) > 0)
+		free_vpid(virt_pid(p), get_exec_env());
+bad_fork_cleanup_module:
+#endif
 	if (p->binfmt)
 		module_put(p->binfmt->module);
 bad_fork_cleanup_put_domain:
@@ -1253,7 +1312,7 @@ task_t * __devinit fork_idle(int cpu)
 	task_t *task;
 	struct pt_regs regs;
 
-	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
+	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0, 0);
 	if (!task)
 		return ERR_PTR(-ENOMEM);
 	init_idle(task, cpu);
@@ -1283,12 +1342,13 @@ static inline int fork_traceflag (unsign
  * It copies the process, and if successful kick-starts
  * it and waits for it to finish using the VM if required.
  */
-long do_fork(unsigned long clone_flags,
+long do_fork_pid(unsigned long clone_flags,
 	      unsigned long stack_start,
 	      struct pt_regs *regs,
 	      unsigned long stack_size,
 	      int __user *parent_tidptr,
-	      int __user *child_tidptr)
+	      int __user *child_tidptr,
+	      long pid0)
 {
 	struct task_struct *p;
 	int trace = 0;
@@ -1302,7 +1362,8 @@ long do_fork(unsigned long clone_flags,
 			clone_flags |= CLONE_PTRACE;
 	}
 
-	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+	p = copy_process(clone_flags, stack_start, regs, stack_size,
+			parent_tidptr, child_tidptr, pid, pid0);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
@@ -1310,6 +1371,7 @@ long do_fork(unsigned long clone_flags,
 	if (!IS_ERR(p)) {
 		struct completion vfork;
 
+		pid = virt_pid(p);
 		if (clone_flags & CLONE_VFORK) {
 			p->vfork_done = &vfork;
 			init_completion(&vfork);
@@ -1330,13 +1392,18 @@ long do_fork(unsigned long clone_flags,
 
 		if (unlikely (trace)) {
 			current->ptrace_message = pid;
+			set_pn_state(current, PN_STOP_FORK);
 			ptrace_notify ((trace << 8) | SIGTRAP);
+			clear_pn_state(current);
 		}
 
 		if (clone_flags & CLONE_VFORK) {
 			wait_for_completion(&vfork);
-			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
+			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
+				set_pn_state(current, PN_STOP_VFORK);
 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
+				clear_pn_state(current);
+			}
 		}
 	} else {
 		free_pidmap(pid);
@@ -1349,26 +1416,39 @@ long do_fork(unsigned long clone_flags,
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
 
+EXPORT_SYMBOL(do_fork_pid);
+
+long do_fork(unsigned long clone_flags,
+		unsigned long stack_start,
+		struct pt_regs *regs,
+		unsigned long stack_size,
+		int __user *parent_tidptr,
+		int __user *child_tidptr)
+{
+	return do_fork_pid(clone_flags, stack_start, regs, stack_size,
+			parent_tidptr, child_tidptr, 0);
+}
+
 void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	files_cachep = kmem_cache_create("files_cache", 
 			sizeof(struct files_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	fs_cachep = kmem_cache_create("fs_cache", 
 			sizeof(struct fs_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	vm_area_cachep = kmem_cache_create("vm_area_struct",
 			sizeof(struct vm_area_struct), 0,
 			SLAB_PANIC, NULL, NULL);
 	mm_cachep = kmem_cache_create("mm_struct",
 			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 }
 
 
diff -uprN linux-2.6.16/kernel/hrtimer.c linux-2.6.16.ovz/kernel/hrtimer.c
--- linux-2.6.16/kernel/hrtimer.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/hrtimer.c	2006-07-05 08:34:56.000000000 -0400
@@ -439,6 +439,7 @@ hrtimer_start(struct hrtimer *timer, kti
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(hrtimer_start);
 
 /**
  * hrtimer_try_to_cancel - try to deactivate a timer
@@ -467,6 +468,7 @@ int hrtimer_try_to_cancel(struct hrtimer
 	return ret;
 
 }
+EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 
 /**
  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
@@ -504,6 +506,7 @@ ktime_t hrtimer_get_remaining(const stru
 
 	return rem;
 }
+EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
 
 #ifdef CONFIG_NO_IDLE_HZ
 /**
@@ -670,7 +673,7 @@ void hrtimer_run_queues(void)
  * The current task state is guaranteed to be TASK_RUNNING when this
  * routine returns.
  */
-static ktime_t __sched
+ktime_t __sched
 schedule_hrtimer(struct hrtimer *timer, const enum hrtimer_mode mode)
 {
 	/* fn stays NULL, meaning single-shot wakeup: */
@@ -697,7 +700,7 @@ schedule_hrtimer_interruptible(struct hr
 	return schedule_hrtimer(timer, mode);
 }
 
-static long __sched nanosleep_restart(struct restart_block *restart)
+long __sched nanosleep_restart(struct restart_block *restart)
 {
 	struct timespec __user *rmtp;
 	struct timespec tu;
@@ -726,6 +729,7 @@ static long __sched nanosleep_restart(st
 	/* The other values in restart are already filled in */
 	return -ERESTART_RESTARTBLOCK;
 }
+EXPORT_SYMBOL_GPL(nanosleep_restart);
 
 long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 		       const enum hrtimer_mode mode, const clockid_t clockid)
diff -uprN linux-2.6.16/kernel/irq/handle.c linux-2.6.16.ovz/kernel/irq/handle.c
--- linux-2.6.16/kernel/irq/handle.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/irq/handle.c	2006-07-05 08:34:56.000000000 -0400
@@ -14,6 +14,8 @@
 
 #include "internals.h"
 
+#include <ub/beancounter.h>
+
 /*
  * Linux has a controller-independent interrupt architecture.
  * Every controller has a 'controller-template', that is used
@@ -80,10 +82,12 @@ fastcall int handle_IRQ_event(unsigned i
 				struct irqaction *action)
 {
 	int ret, retval = 0, status = 0;
+	struct user_beancounter *ub;
 
 	if (!(action->flags & SA_INTERRUPT))
 		local_irq_enable();
 
+	ub = set_exec_ub(get_ub0());
 	do {
 		ret = action->handler(irq, action->dev_id, regs);
 		if (ret == IRQ_HANDLED)
@@ -91,6 +95,7 @@ fastcall int handle_IRQ_event(unsigned i
 		retval |= ret;
 		action = action->next;
 	} while (action);
+	(void)set_exec_ub(ub);
 
 	if (status & SA_SAMPLE_RANDOM)
 		add_interrupt_randomness(irq);
diff -uprN linux-2.6.16/kernel/kmod.c linux-2.6.16.ovz/kernel/kmod.c
--- linux-2.6.16/kernel/kmod.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/kmod.c	2006-07-05 08:34:56.000000000 -0400
@@ -78,6 +78,10 @@ int request_module(const char *fmt, ...)
 #define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
 	static int kmod_loop_msg;
 
+	/* Don't allow request_module() inside VE. */
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	va_start(args, fmt);
 	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
 	va_end(args);
@@ -246,6 +250,9 @@ int call_usermodehelper_keys(char *path,
 	};
 	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
 
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	if (!khelper_wq)
 		return -EBUSY;
 
diff -uprN linux-2.6.16/kernel/kthread.c linux-2.6.16.ovz/kernel/kthread.c
--- linux-2.6.16/kernel/kthread.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/kthread.c	2006-07-05 08:34:56.000000000 -0400
@@ -114,7 +114,7 @@ static void keventd_create_kthread(void 
 		create->result = ERR_PTR(pid);
 	} else {
 		wait_for_completion(&create->started);
-		create->result = find_task_by_pid(pid);
+		create->result = find_task_by_pid_all(pid);
 	}
 	complete(&create->done);
 }
diff -uprN linux-2.6.16/kernel/module.c linux-2.6.16.ovz/kernel/module.c
--- linux-2.6.16/kernel/module.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/module.c	2006-07-05 08:34:56.000000000 -0400
@@ -2130,6 +2130,8 @@ static void *m_start(struct seq_file *m,
 	loff_t n = 0;
 
 	down(&module_mutex);
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
 	list_for_each(i, &modules) {
 		if (n++ == *pos)
 			break;
diff -uprN linux-2.6.16/kernel/mutex-debug.c linux-2.6.16.ovz/kernel/mutex-debug.c
--- linux-2.6.16/kernel/mutex-debug.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/mutex-debug.c	2006-07-05 08:34:56.000000000 -0400
@@ -193,12 +193,12 @@ retry:
 	if (count != 10)
 		printk(" locked it.\n");
 
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		show_task_locks(p);
 		if (!unlock)
 			if (read_trylock(&tasklist_lock))
 				unlock = 1;
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 	printk("\n");
 	show_held_locks(NULL);
diff -uprN linux-2.6.16/kernel/panic.c linux-2.6.16.ovz/kernel/panic.c
--- linux-2.6.16/kernel/panic.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/panic.c	2006-07-05 08:34:56.000000000 -0400
@@ -23,6 +23,8 @@
 int panic_timeout;
 int panic_on_oops;
 int tainted;
+int kernel_text_csum_broken;
+EXPORT_SYMBOL(kernel_text_csum_broken);
 
 EXPORT_SYMBOL(panic_timeout);
 
@@ -156,7 +158,8 @@ const char *print_tainted(void)
 {
 	static char buf[20];
 	if (tainted) {
-		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c",
+		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c",
+			kernel_text_csum_broken ? 'B' : ' ',
 			tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
 			tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
 			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
diff -uprN linux-2.6.16/kernel/pid.c linux-2.6.16.ovz/kernel/pid.c
--- linux-2.6.16/kernel/pid.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/pid.c	2006-07-05 08:34:56.000000000 -0400
@@ -27,6 +27,10 @@
 #include <linux/bootmem.h>
 #include <linux/hash.h>
 
+#ifdef CONFIG_VE
+static void __free_vpid(int vpid, struct ve_struct *ve);
+#endif
+
 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
 static struct hlist_head *pid_hash[PIDTYPE_MAX];
 static int pidhash_shift;
@@ -57,8 +61,14 @@ typedef struct pidmap {
 	void *page;
 } pidmap_t;
 
+#ifdef CONFIG_VE
+#define PIDMAP_NRFREE (BITS_PER_PAGE/2)
+#else
+#define PIDMAP_NRFREE BITS_PER_PAGE
+#endif
+
 static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
-	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
+	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(PIDMAP_NRFREE), NULL } };
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
@@ -67,9 +77,13 @@ fastcall void free_pidmap(int pid)
 	pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
 	int offset = pid & BITS_PER_PAGE_MASK;
 
-	clear_bit(offset, map->page);
+	BUG_ON(__is_virtual_pid(pid) || pid == 1);
+
+	if (test_and_clear_bit(offset, map->page) == 0)
+		BUG();
 	atomic_inc(&map->nr_free);
 }
+EXPORT_SYMBOL_GPL(free_pidmap);
 
 int alloc_pidmap(void)
 {
@@ -77,6 +91,8 @@ int alloc_pidmap(void)
 	pidmap_t *map;
 
 	pid = last + 1;
+	if (__is_virtual_pid(pid))
+		pid += VPID_DIV;
 	if (pid >= pid_max)
 		pid = RESERVED_PIDS;
 	offset = pid & BITS_PER_PAGE_MASK;
@@ -106,6 +122,8 @@ int alloc_pidmap(void)
 					return pid;
 				}
 				offset = find_next_offset(map, offset);
+				if (__is_virtual_pid(offset))
+					offset += VPID_DIV;
 				pid = mk_pid(map, offset);
 			/*
 			 * find_next_offset() found a bit, the pid from it
@@ -130,6 +148,7 @@ int alloc_pidmap(void)
 	}
 	return -1;
 }
+EXPORT_SYMBOL_GPL(alloc_pidmap);
 
 struct pid * fastcall find_pid(enum pid_type type, int nr)
 {
@@ -143,6 +162,7 @@ struct pid * fastcall find_pid(enum pid_
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(find_pid);
 
 int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 {
@@ -162,6 +182,7 @@ int fastcall attach_pid(task_t *task, en
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(attach_pid);
 
 static fastcall int __detach_pid(task_t *task, enum pid_type type)
 {
@@ -201,13 +222,27 @@ void fastcall detach_pid(task_t *task, e
 		if (tmp != type && find_pid(tmp, nr))
 			return;
 
+#ifdef CONFIG_VE
+	__free_vpid(task->pids[type].vnr, VE_TASK_INFO(task)->owner_env);
+#endif
 	free_pidmap(nr);
 }
+EXPORT_SYMBOL_GPL(detach_pid);
 
 task_t *find_task_by_pid_type(int type, int nr)
 {
+	BUG();
+	return NULL;
+}
+
+EXPORT_SYMBOL(find_task_by_pid_type);
+
+task_t *find_task_by_pid_type_all(int type, int nr)
+{
 	struct pid *pid;
 
+	BUG_ON(nr != -1 && is_virtual_pid(nr));
+
 	pid = find_pid(type, nr);
 	if (!pid)
 		return NULL;
@@ -215,7 +250,35 @@ task_t *find_task_by_pid_type(int type, 
 	return pid_task(&pid->pid_list, type);
 }
 
-EXPORT_SYMBOL(find_task_by_pid_type);
+EXPORT_SYMBOL(find_task_by_pid_type_all);
+
+#ifdef CONFIG_VE
+
+task_t *find_task_by_pid_type_ve(int type, int nr)
+{
+	task_t *tsk;
+	int gnr = nr;
+	struct pid *pid;
+
+	if (is_virtual_pid(nr)) {
+		gnr = __vpid_to_pid(nr);
+		if (unlikely(gnr == -1))
+			return NULL;
+	}
+
+	pid = find_pid(type, gnr);
+	if (!pid)
+		return NULL;
+
+	tsk = pid_task(&pid->pid_list, type);
+	if (!ve_accessible(VE_TASK_INFO(tsk)->owner_env, get_exec_env()))
+		return NULL;
+	return tsk;
+}
+
+EXPORT_SYMBOL(find_task_by_pid_type_ve);
+
+#endif
 
 /*
  * This function switches the PIDs if a non-leader thread calls
@@ -234,12 +297,16 @@ void switch_exec_pids(task_t *leader, ta
 
 	leader->pid = leader->tgid = thread->pid;
 	thread->pid = thread->tgid;
+	set_virt_tgid(leader, virt_pid(thread));
+	set_virt_pid(leader, virt_pid(thread));
+	set_virt_pid(thread, virt_tgid(thread));
 
 	attach_pid(thread, PIDTYPE_PID, thread->pid);
 	attach_pid(thread, PIDTYPE_TGID, thread->tgid);
 	attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
 	attach_pid(thread, PIDTYPE_SID, thread->signal->session);
 	list_add_tail(&thread->tasks, &init_task.tasks);
+	SET_VE_LINKS(thread);
 
 	attach_pid(leader, PIDTYPE_PID, leader->pid);
 	attach_pid(leader, PIDTYPE_TGID, leader->tgid);
@@ -247,6 +314,362 @@ void switch_exec_pids(task_t *leader, ta
 	attach_pid(leader, PIDTYPE_SID, leader->signal->session);
 }
 
+#ifdef CONFIG_VE
+
+/* Virtual PID bits.
+ *
+ * At the moment all internal structures in kernel store real global pid.
+ * The only place, where virtual PID is used, is at user frontend. We
+ * remap virtual pids obtained from user to global ones (vpid_to_pid) and
+ * map globals to virtuals before showing them to user (virt_pid_type).
+ *
+ * We hold virtual PIDs inside struct pid, so map global -> virtual is easy.
+ */
+
+pid_t _pid_type_to_vpid(int type, pid_t pid)
+{
+	struct pid * p;
+
+	if (unlikely(is_virtual_pid(pid)))
+		return -1;
+
+	read_lock(&tasklist_lock);
+	p = find_pid(type, pid);
+	if (p) {
+		pid = p->vnr;
+	} else {
+		pid = -1;
+	}
+	read_unlock(&tasklist_lock);
+	return pid;
+}
+EXPORT_SYMBOL_GPL(_pid_type_to_vpid);
+
+pid_t pid_type_to_vpid(int type, pid_t pid)
+{
+	int vpid;
+
+	if (unlikely(pid <= 0))
+		return pid;
+
+	BUG_ON(is_virtual_pid(pid));
+
+	if (ve_is_super(get_exec_env()))
+		return pid;
+
+	vpid = _pid_type_to_vpid(type, pid);
+	if (unlikely(vpid == -1)) {
+		/* It is allowed: global pid can be used everywhere.
+		 * This can happen, when kernel remembers stray pids:
+		 * signal queues, locks etc.
+		 */
+		vpid = pid;
+	}
+	return vpid;
+}
+EXPORT_SYMBOL_GPL(pid_type_to_vpid);
+
+/* To map virtual pids to global we maintain special hash table.
+ *
+ * Mapping entries are allocated when a process with non-trivial
+ * mapping is forked, which is possible only after VE migrated.
+ * Mappings are destroyed, when a global pid is removed from global
+ * pidmap, which means we do not need to refcount mappings.
+ */
+
+static struct hlist_head *vpid_hash;
+
+struct vpid_mapping
+{
+	int	vpid;
+	int	veid;
+	int	pid;
+	struct hlist_node link;
+	struct rcu_head rcu;
+};
+
+static kmem_cache_t *vpid_mapping_cachep;
+
+static inline int vpid_hashfn(int vnr, int veid)
+{
+	return hash_long((unsigned long)(vnr+(veid<<16)), pidhash_shift);
+}
+
+struct vpid_mapping *__lookup_vpid_mapping(int vnr, int veid)
+{
+	struct hlist_node *elem;
+	struct vpid_mapping *map;
+
+	hlist_for_each_entry_rcu(map, elem,
+			&vpid_hash[vpid_hashfn(vnr, veid)], link) {
+		if (map->vpid == vnr && map->veid == veid)
+			return map;
+	}
+	return NULL;
+}
+
+/* __vpid_to_pid() is raw version of vpid_to_pid(). It is to be used
+ * only under tasklist_lock. In some places we must use only this version
+ * (f.e. __kill_pg_info is called under write lock!)
+ *
+ * Caller should pass virtual pid. This function returns an error, when
+ * seeing a global pid.
+ */
+int __vpid_to_pid(int pid)
+{
+	struct vpid_mapping *map;
+
+	if (unlikely(!is_virtual_pid(pid) || ve_is_super(get_exec_env())))
+		return -1;
+
+	if (!get_exec_env()->sparse_vpid) {
+		if (pid != 1)
+			return pid - VPID_DIV;
+		return get_exec_env()->init_entry->pid;
+	}
+
+	map = __lookup_vpid_mapping(pid, VEID(get_exec_env()));
+	if (map)
+		return map->pid;
+	return -1;
+}
+EXPORT_SYMBOL_GPL(__vpid_to_pid);
+
+int vpid_to_pid(int pid)
+{
+	/* User gave bad pid. It is his problem. */
+	if (unlikely(pid <= 0))
+		return pid;
+
+	if (!is_virtual_pid(pid))
+		return pid;
+
+	read_lock(&tasklist_lock);
+	pid = __vpid_to_pid(pid);
+	read_unlock(&tasklist_lock);
+	return pid;
+}
+EXPORT_SYMBOL_GPL(vpid_to_pid);
+
+/* VEs which never migrated have trivial "arithmetic" mapping pid <-> vpid:
+ *
+ * vpid == 1 -> ve->init_task->pid
+ * else	        pid & ~VPID_DIV
+ *
+ * In this case VE has ve->sparse_vpid = 0 and we do not use vpid hash table.
+ *
+ * When VE migrates and we see non-trivial mapping the first time, we
+ * scan process table and populate mapping hash table.
+ */
+
+static int add_mapping(int pid, int vpid, int veid, struct hlist_head *cache)
+{
+        if (unlikely(pid <= 0 || vpid <= 0))
+		return 0;
+
+	/* VE can contain non-virtual (VE_ENTER'ed) processes when
+	 * switching to sparse mapping. We should not create mappings
+	 * for them. */
+	if (unlikely(!__is_virtual_pid(vpid) && vpid != 1)) {
+		printk("DEBUG (do not worry, but report): non-virtual pid while switching mode %d %d\n", pid, vpid);
+		return 0;
+	}
+
+	if (!__lookup_vpid_mapping(vpid, veid)) {
+		struct vpid_mapping *m;
+		if (hlist_empty(cache)) {
+			m = kmem_cache_alloc(vpid_mapping_cachep, GFP_ATOMIC);
+			if (unlikely(m == NULL))
+				return -ENOMEM;
+		} else {
+			m = hlist_entry(cache->first, struct vpid_mapping, link);
+			hlist_del_rcu(&m->link);
+		}
+		m->pid = pid;
+		m->vpid = vpid;
+		m->veid = veid;
+		hlist_add_head_rcu(&m->link,
+			       &vpid_hash[vpid_hashfn(vpid, veid)]);
+	}
+	return 0;
+}
+
+static int switch_to_sparse_mapping(int pid)
+{
+	struct ve_struct *env = get_exec_env();
+	struct hlist_head cache;
+	task_t *g, *t;
+	int pcount;
+	int err;
+
+	/* Transition happens under write_lock_irq, so we try to make
+	 * it more reliable and fast preallocating mapping entries.
+	 * pcounter may be not enough, we could have lots of orphaned
+	 * process groups and sessions, which also require mappings.
+	 */
+	INIT_HLIST_HEAD(&cache);
+	pcount = atomic_read(&env->pcounter);
+	err = -ENOMEM;
+	while (pcount > 0) {
+		struct vpid_mapping *m;
+		m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
+		if (!m)
+			goto out;
+		hlist_add_head(&m->link, &cache);
+		pcount--;
+	}
+
+	write_lock_irq(&tasklist_lock);
+	err = 0;
+	if (env->sparse_vpid)
+		goto out_unlock;
+
+	err = -ENOMEM;
+	do_each_thread_ve(g, t) {
+		if (t->pid == pid)
+			continue;
+		if (add_mapping(t->pid, virt_pid(t), VEID(env), &cache))
+			goto out_unlock;
+	} while_each_thread_ve(g, t);
+
+	for_each_process_ve(t) {
+		if (t->pid == pid)
+			continue;
+
+		if (add_mapping(t->tgid, virt_tgid(t), VEID(env), &cache))
+			goto out_unlock;
+		if (add_mapping(t->signal->pgrp, virt_pgid(t), VEID(env), &cache))
+			goto out_unlock;
+		if (add_mapping(t->signal->session, virt_sid(t), VEID(env), &cache))
+			goto out_unlock;
+	}
+	env->sparse_vpid = 1;
+	err = 0;
+
+out_unlock:
+	if (err) {
+		int i;
+
+		for (i=0; i<(1<<pidhash_shift); i++) {
+			struct hlist_node *elem, *next;
+			struct vpid_mapping *map;
+
+			hlist_for_each_entry_safe(map, elem, next, &vpid_hash[i], link) {
+				if (map->veid == VEID(env)) {
+					hlist_del(elem);
+					hlist_add_head(elem, &cache);
+				}
+			}
+		}
+	}
+	write_unlock_irq(&tasklist_lock);
+
+out:
+	while (!hlist_empty(&cache)) {
+		struct vpid_mapping *m;
+		m = hlist_entry(cache.first, struct vpid_mapping, link);
+		hlist_del_rcu(&m->link);
+		kmem_cache_free(vpid_mapping_cachep, m);
+	}
+	return err;
+}
+
+int alloc_vpid(int pid, int virt_pid)
+{
+	int result;
+	struct vpid_mapping *m;
+	struct ve_struct *env = get_exec_env();
+
+	if (ve_is_super(env) || !env->virt_pids)
+		return pid;
+
+	if (!env->sparse_vpid) {
+		if (virt_pid == -1)
+			return pid + VPID_DIV;
+
+		if (virt_pid == 1 || virt_pid == pid + VPID_DIV)
+			return virt_pid;
+
+		if ((result = switch_to_sparse_mapping(pid)) < 0)
+			return result;
+	}
+
+	m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
+	if (!m)
+		return -ENOMEM;
+
+	m->pid = pid;
+	m->veid = VEID(env);
+
+	result = (virt_pid == -1) ? pid + VPID_DIV : virt_pid;
+
+	write_lock_irq(&tasklist_lock);
+	if (unlikely(__lookup_vpid_mapping(result, m->veid))) {
+		if (virt_pid > 0) {
+			result = -EEXIST;
+			goto out;
+		}
+
+		/* No luck. Now we search for some not-existing vpid.
+		 * It is weak place. We do linear search. */
+		do {
+			result++;
+			if (!__is_virtual_pid(result))
+				result += VPID_DIV;
+			if (result >= pid_max)
+				result = RESERVED_PIDS + VPID_DIV;
+		} while (__lookup_vpid_mapping(result, m->veid) != NULL);
+
+		/* And set last_pid in hope future alloc_pidmap to avoid
+		 * collisions after future alloc_pidmap() */
+		last_pid = result - VPID_DIV;
+	}
+	if (result > 0) {
+		m->vpid = result;
+		hlist_add_head_rcu(&m->link,
+			       &vpid_hash[vpid_hashfn(result, m->veid)]);
+	}
+out:
+	write_unlock_irq(&tasklist_lock);
+	if (result < 0)
+		kmem_cache_free(vpid_mapping_cachep, m);
+	return result;
+}
+EXPORT_SYMBOL(alloc_vpid);
+
+static void vpid_free_cb(struct rcu_head *rhp)
+{
+	struct vpid_mapping *m;
+
+	m = container_of(rhp, struct vpid_mapping, rcu);
+	kmem_cache_free(vpid_mapping_cachep, m);
+}
+
+static void __free_vpid(int vpid, struct ve_struct *ve)
+{
+	struct vpid_mapping *m;
+
+	if (!ve->sparse_vpid)
+		return;
+
+	if (!__is_virtual_pid(vpid) && (vpid != 1 || ve_is_super(ve)))
+		return;
+
+	m = __lookup_vpid_mapping(vpid, ve->veid);
+	BUG_ON(m == NULL);
+	hlist_del_rcu(&m->link);
+	call_rcu(&m->rcu, vpid_free_cb);
+}
+
+void free_vpid(int vpid, struct ve_struct *ve)
+{
+	write_lock_irq(&tasklist_lock);
+	__free_vpid(vpid, ve);
+	write_unlock_irq(&tasklist_lock);
+}
+EXPORT_SYMBOL(free_vpid);
+#endif
+
 /*
  * The pid hash table is scaled according to the amount of memory in the
  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
@@ -273,6 +696,14 @@ void __init pidhash_init(void)
 		for (j = 0; j < pidhash_size; j++)
 			INIT_HLIST_HEAD(&pid_hash[i][j]);
 	}
+
+#ifdef CONFIG_VE
+	vpid_hash = alloc_bootmem(pidhash_size * sizeof(struct hlist_head));
+	if (!vpid_hash)
+		panic("Could not alloc vpid_hash!\n");
+	for (j = 0; j < pidhash_size; j++)
+		INIT_HLIST_HEAD(&vpid_hash[j]);
+#endif
 }
 
 void __init pidmap_init(void)
@@ -289,4 +720,12 @@ void __init pidmap_init(void)
 
 	for (i = 0; i < PIDTYPE_MAX; i++)
 		attach_pid(current, i, 0);
+
+#ifdef CONFIG_VE
+	vpid_mapping_cachep =
+		kmem_cache_create("vpid_mapping",
+				  sizeof(struct vpid_mapping),
+				  __alignof__(struct vpid_mapping),
+				  SLAB_PANIC|SLAB_UBC, NULL, NULL);
+#endif
 }
diff -uprN linux-2.6.16/kernel/posix-cpu-timers.c linux-2.6.16.ovz/kernel/posix-cpu-timers.c
--- linux-2.6.16/kernel/posix-cpu-timers.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/posix-cpu-timers.c	2006-07-05 08:34:56.000000000 -0400
@@ -20,7 +20,7 @@ static int check_clock(const clockid_t w
 		return 0;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p || (CPUCLOCK_PERTHREAD(which_clock) ?
 		   p->tgid != current->tgid : p->tgid != pid)) {
 		error = -EINVAL;
@@ -292,7 +292,7 @@ int posix_cpu_clock_get(const clockid_t 
 		 */
 		struct task_struct *p;
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 		if (p) {
 			if (CPUCLOCK_PERTHREAD(which_clock)) {
 				if (p->tgid == current->tgid) {
@@ -336,7 +336,7 @@ int posix_cpu_timer_create(struct k_itim
 		if (pid == 0) {
 			p = current;
 		} else {
-			p = find_task_by_pid(pid);
+			p = find_task_by_pid_ve(pid);
 			if (p && p->tgid != current->tgid)
 				p = NULL;
 		}
@@ -344,7 +344,7 @@ int posix_cpu_timer_create(struct k_itim
 		if (pid == 0) {
 			p = current->group_leader;
 		} else {
-			p = find_task_by_pid(pid);
+			p = find_task_by_pid_ve(pid);
 			if (p && p->tgid != pid)
 				p = NULL;
 		}
@@ -1173,6 +1173,9 @@ static void check_process_timers(struct 
 		}
 		t = tsk;
 		do {
+			if (unlikely(t->flags & PF_EXITING))
+				continue;
+
 			ticks = cputime_add(cputime_add(t->utime, t->stime),
 					    prof_left);
 			if (!cputime_eq(prof_expires, cputime_zero) &&
@@ -1193,11 +1196,7 @@ static void check_process_timers(struct 
 					      t->it_sched_expires > sched)) {
 				t->it_sched_expires = sched;
 			}
-
-			do {
-				t = next_thread(t);
-			} while (unlikely(t->flags & PF_EXITING));
-		} while (t != tsk);
+		} while ((t = next_thread(t)) != tsk);
 	}
 }
 
@@ -1289,30 +1288,30 @@ void run_posix_cpu_timers(struct task_st
 
 #undef	UNEXPIRED
 
-	BUG_ON(tsk->exit_state);
-
 	/*
 	 * Double-check with locks held.
 	 */
 	read_lock(&tasklist_lock);
-	spin_lock(&tsk->sighand->siglock);
+	if (likely(tsk->signal != NULL)) {
+		spin_lock(&tsk->sighand->siglock);
 
-	/*
-	 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
-	 * all the timers that are firing, and put them on the firing list.
-	 */
-	check_thread_timers(tsk, &firing);
-	check_process_timers(tsk, &firing);
+		/*
+		 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
+		 * all the timers that are firing, and put them on the firing list.
+		 */
+		check_thread_timers(tsk, &firing);
+		check_process_timers(tsk, &firing);
 
-	/*
-	 * We must release these locks before taking any timer's lock.
-	 * There is a potential race with timer deletion here, as the
-	 * siglock now protects our private firing list.  We have set
-	 * the firing flag in each timer, so that a deletion attempt
-	 * that gets the timer lock before we do will give it up and
-	 * spin until we've taken care of that timer below.
-	 */
-	spin_unlock(&tsk->sighand->siglock);
+		/*
+		 * We must release these locks before taking any timer's lock.
+		 * There is a potential race with timer deletion here, as the
+		 * siglock now protects our private firing list.  We have set
+		 * the firing flag in each timer, so that a deletion attempt
+		 * that gets the timer lock before we do will give it up and
+		 * spin until we've taken care of that timer below.
+		 */
+		spin_unlock(&tsk->sighand->siglock);
+	}
 	read_unlock(&tasklist_lock);
 
 	/*
diff -uprN linux-2.6.16/kernel/posix-timers.c linux-2.6.16.ovz/kernel/posix-timers.c
--- linux-2.6.16/kernel/posix-timers.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/posix-timers.c	2006-07-05 08:34:56.000000000 -0400
@@ -31,6 +31,7 @@
  * POSIX clocks & timers
  */
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
@@ -48,6 +49,8 @@
 #include <linux/workqueue.h>
 #include <linux/module.h>
 
+#include <ub/beancounter.h>
+
 /*
  * Management arrays for POSIX timers.	 Timers are kept in slab memory
  * Timer ids are allocated by an external routine that keeps track of the
@@ -241,7 +244,8 @@ static __init int init_posix_timers(void
 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
 
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
-					sizeof (struct k_itimer), 0, 0, NULL, NULL);
+					sizeof (struct k_itimer), 0,
+					SLAB_UBC, NULL, NULL);
 	idr_init(&posix_timers_id);
 	return 0;
 }
@@ -294,6 +298,13 @@ void do_schedule_next_timer(struct sigin
 
 int posix_timer_event(struct k_itimer *timr,int si_private)
 {
+	int ret;
+	struct ve_struct *ve;
+	struct user_beancounter *ub;
+
+	ve = set_exec_env(timr->it_process->ve_task_info.owner_env);
+	ub = set_exec_ub(timr->it_process->task_bc.task_ub);
+
 	memset(&timr->sigq->info, 0, sizeof(siginfo_t));
 	timr->sigq->info.si_sys_private = si_private;
 	/* Send signal to the process that owns this timer.*/
@@ -306,11 +317,11 @@ int posix_timer_event(struct k_itimer *t
 
 	if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
 		struct task_struct *leader;
-		int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
+		ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
 					timr->it_process);
 
 		if (likely(ret >= 0))
-			return ret;
+			goto out;
 
 		timr->it_sigev_notify = SIGEV_SIGNAL;
 		leader = timr->it_process->group_leader;
@@ -318,8 +329,12 @@ int posix_timer_event(struct k_itimer *t
 		timr->it_process = leader;
 	}
 
-	return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
+	ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
 				   timr->it_process);
+out:
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(ve);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(posix_timer_event);
 
@@ -366,7 +381,7 @@ static struct task_struct * good_sigeven
 	struct task_struct *rtn = current->group_leader;
 
 	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
-		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
+		(!(rtn = find_task_by_pid_ve(event->sigev_notify_thread_id)) ||
 		 rtn->tgid != current->tgid ||
 		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
 		return NULL;
diff -uprN linux-2.6.16/kernel/power/Kconfig linux-2.6.16.ovz/kernel/power/Kconfig
--- linux-2.6.16/kernel/power/Kconfig	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/power/Kconfig	2006-07-05 08:34:56.000000000 -0400
@@ -38,7 +38,7 @@ config PM_DEBUG
 
 config SOFTWARE_SUSPEND
 	bool "Software Suspend"
-	depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)
+	depends on PM && SWAP && X86 || ((FRV || PPC32) && !SMP)
 	---help---
 	  Enable the possibility of suspending the machine.
 	  It doesn't need APM.
diff -uprN linux-2.6.16/kernel/power/process.c linux-2.6.16.ovz/kernel/power/process.c
--- linux-2.6.16/kernel/power/process.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/power/process.c	2006-07-05 08:34:56.000000000 -0400
@@ -38,18 +38,23 @@ void refrigerator(void)
 	   processes around? */
 	long save;
 	save = current->state;
+	current->state = TASK_UNINTERRUPTIBLE;
 	pr_debug("%s entered refrigerator\n", current->comm);
-	printk("=");
+	/* printk("="); */
 
-	frozen_process(current);
 	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* We sent fake signal, clean it up */
+	if (test_and_clear_thread_flag(TIF_FREEZE)) {
+		recalc_sigpending(); /* We sent fake signal, clean it up */
+		current->flags |= PF_FROZEN;
+	} else {
+		/* Freeze request could be canceled before we entered
+		 * refrigerator(). In this case we do nothing. */
+		current->state = save;
+	}
 	spin_unlock_irq(&current->sighand->siglock);
 
-	while (frozen(current)) {
-		current->state = TASK_UNINTERRUPTIBLE;
+	while (current->flags & PF_FROZEN)
 		schedule();
-	}
 	pr_debug("%s left refrigerator\n", current->comm);
 	current->state = save;
 }
@@ -67,7 +72,7 @@ int freeze_processes(void)
 	do {
 		todo = 0;
 		read_lock(&tasklist_lock);
-		do_each_thread(g, p) {
+		do_each_thread_all(g, p) {
 			if (!freezeable(p))
 				continue;
 			if (frozen(p))
@@ -78,7 +83,7 @@ int freeze_processes(void)
 			signal_wake_up(p, 0);
 			spin_unlock_irqrestore(&p->sighand->siglock, flags);
 			todo++;
-		} while_each_thread(g, p);
+		} while_each_thread_all(g, p);
 		read_unlock(&tasklist_lock);
 		yield();			/* Yield is okay here */
 		if (todo && time_after(jiffies, start_time + TIMEOUT)) {
@@ -95,15 +100,15 @@ int freeze_processes(void)
 	 */
 	if (todo) {
 		read_lock(&tasklist_lock);
-		do_each_thread(g, p)
+		do_each_thread_all(g, p)
 			if (freezing(p)) {
 				pr_debug("  clean up: %s\n", p->comm);
-				p->flags &= ~PF_FREEZE;
 				spin_lock_irqsave(&p->sighand->siglock, flags);
+				clear_tsk_thread_flag(p, TIF_FREEZE);
 				recalc_sigpending_tsk(p);
 				spin_unlock_irqrestore(&p->sighand->siglock, flags);
 			}
-		while_each_thread(g, p);
+		while_each_thread_all(g, p);
 		read_unlock(&tasklist_lock);
 		return todo;
 	}
@@ -119,12 +124,12 @@ void thaw_processes(void)
 
 	printk( "Restarting tasks..." );
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		if (!freezeable(p))
 			continue;
 		if (!thaw_process(p))
 			printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 	read_unlock(&tasklist_lock);
 	schedule();
diff -uprN linux-2.6.16/kernel/printk.c linux-2.6.16.ovz/kernel/printk.c
--- linux-2.6.16/kernel/printk.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/printk.c	2006-07-05 08:34:56.000000000 -0400
@@ -30,7 +30,9 @@
 #include <linux/smp.h>
 #include <linux/security.h>
 #include <linux/bootmem.h>
+#include <linux/vzratelimit.h>
 #include <linux/syscalls.h>
+#include <linux/veprintk.h>
 
 #include <asm/uaccess.h>
 
@@ -83,7 +85,7 @@ static int console_locked;
  * It is also used in interesting ways to provide interlocking in
  * release_console_sem().
  */
-static DEFINE_SPINLOCK(logbuf_lock);
+DEFINE_SPINLOCK(logbuf_lock);
 
 #define LOG_BUF_MASK	(log_buf_len-1)
 #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
@@ -114,6 +116,7 @@ static int preferred_console = -1;
 
 /* Flag: console code may call schedule() */
 static int console_may_schedule;
+int console_silence_loglevel;
 
 #ifdef CONFIG_PRINTK
 
@@ -160,6 +163,19 @@ static int __init console_setup(char *st
 
 __setup("console=", console_setup);
 
+static int __init setup_console_silencelevel(char *str)
+{
+	int level;
+
+	if (get_option(&str, &level) != 1)
+		return 0;
+
+	console_silence_loglevel = level;
+	return 1;
+}
+
+__setup("silencelevel=", setup_console_silencelevel);
+
 static int __init log_buf_len_setup(char *str)
 {
 	unsigned long size = memparse(str, &str);
@@ -223,6 +239,10 @@ int do_syslog(int type, char __user *buf
 	char c;
 	int error = 0;
 
+	if (!ve_is_super(get_exec_env()) &&
+			(type == 6 || type == 7 || type == 8))
+		goto out;
+
 	error = security_syslog(type);
 	if (error)
 		return error;
@@ -243,15 +263,15 @@ int do_syslog(int type, char __user *buf
 			error = -EFAULT;
 			goto out;
 		}
-		error = wait_event_interruptible(log_wait,
-							(log_start - log_end));
+		error = wait_event_interruptible(ve_log_wait,
+						(ve_log_start - ve_log_end));
 		if (error)
 			goto out;
 		i = 0;
 		spin_lock_irq(&logbuf_lock);
-		while (!error && (log_start != log_end) && i < len) {
-			c = LOG_BUF(log_start);
-			log_start++;
+		while (!error && (ve_log_start != ve_log_end) && i < len) {
+			c = VE_LOG_BUF(ve_log_start);
+			ve_log_start++;
 			spin_unlock_irq(&logbuf_lock);
 			error = __put_user(c,buf);
 			buf++;
@@ -277,15 +297,17 @@ int do_syslog(int type, char __user *buf
 			error = -EFAULT;
 			goto out;
 		}
+		if (ve_log_buf == NULL)
+			goto out;
 		count = len;
-		if (count > log_buf_len)
-			count = log_buf_len;
+		if (count > ve_log_buf_len)
+			count = ve_log_buf_len;
 		spin_lock_irq(&logbuf_lock);
-		if (count > logged_chars)
-			count = logged_chars;
+		if (count > ve_logged_chars)
+			count = ve_logged_chars;
 		if (do_clear)
-			logged_chars = 0;
-		limit = log_end;
+			ve_logged_chars = 0;
+		limit = ve_log_end;
 		/*
 		 * __put_user() could sleep, and while we sleep
 		 * printk() could overwrite the messages
@@ -294,9 +316,9 @@ int do_syslog(int type, char __user *buf
 		 */
 		for (i = 0; i < count && !error; i++) {
 			j = limit-1-i;
-			if (j + log_buf_len < log_end)
+			if (j + ve_log_buf_len < ve_log_end)
 				break;
-			c = LOG_BUF(j);
+			c = VE_LOG_BUF(j);
 			spin_unlock_irq(&logbuf_lock);
 			error = __put_user(c,&buf[count-1-i]);
 			cond_resched();
@@ -320,7 +342,7 @@ int do_syslog(int type, char __user *buf
 		}
 		break;
 	case 5:		/* Clear ring buffer */
-		logged_chars = 0;
+		ve_logged_chars = 0;
 		break;
 	case 6:		/* Disable logging to console */
 		console_loglevel = minimum_console_loglevel;
@@ -338,10 +360,10 @@ int do_syslog(int type, char __user *buf
 		error = 0;
 		break;
 	case 9:		/* Number of chars in the log buffer */
-		error = log_end - log_start;
+		error = ve_log_end - ve_log_start;
 		break;
 	case 10:	/* Size of the log buffer */
-		error = log_buf_len;
+		error = ve_log_buf_len;
 		break;
 	default:
 		error = -EINVAL;
@@ -439,14 +461,14 @@ static void call_console_drivers(unsigne
 
 static void emit_log_char(char c)
 {
-	LOG_BUF(log_end) = c;
-	log_end++;
-	if (log_end - log_start > log_buf_len)
-		log_start = log_end - log_buf_len;
-	if (log_end - con_start > log_buf_len)
-		con_start = log_end - log_buf_len;
-	if (logged_chars < log_buf_len)
-		logged_chars++;
+	VE_LOG_BUF(ve_log_end) = c;
+	ve_log_end++;
+	if (ve_log_end - ve_log_start > ve_log_buf_len)
+		ve_log_start = ve_log_end - ve_log_buf_len;
+	if (ve_is_super(get_exec_env()) && ve_log_end - con_start > ve_log_buf_len)
+		con_start = ve_log_end - ve_log_buf_len;
+	if (ve_logged_chars < ve_log_buf_len)
+		ve_logged_chars++;
 }
 
 /*
@@ -511,6 +533,30 @@ __attribute__((weak)) unsigned long long
  * printf(3)
  */
 
+static inline int ve_log_init(void)
+{
+#ifdef CONFIG_VE
+	if (ve_log_buf != NULL)
+		return 0;
+
+	if (ve_is_super(get_exec_env())) {
+		ve0._log_wait = &log_wait;
+		ve0._log_start = &log_start;
+		ve0._log_end = &log_end;
+		ve0._logged_chars = &logged_chars;
+		ve0.log_buf = log_buf;
+		return 0;
+	}
+
+	ve_log_buf = kmalloc(ve_log_buf_len, GFP_ATOMIC);
+	if (!ve_log_buf)
+		return -ENOMEM;
+
+	memset(ve_log_buf, 0, ve_log_buf_len);
+#endif
+	return 0;
+}
+
 asmlinkage int printk(const char *fmt, ...)
 {
 	va_list args;
@@ -526,13 +572,14 @@ asmlinkage int printk(const char *fmt, .
 /* cpu currently holding logbuf_lock */
 static volatile unsigned int printk_cpu = UINT_MAX;
 
-asmlinkage int vprintk(const char *fmt, va_list args)
+asmlinkage int __vprintk(const char *fmt, va_list args)
 {
 	unsigned long flags;
 	int printed_len;
 	char *p;
 	static char printk_buf[1024];
 	static int log_level_unknown = 1;
+	int err, need_wake;
 
 	preempt_disable();
 	if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
@@ -544,6 +591,12 @@ asmlinkage int vprintk(const char *fmt, 
 	spin_lock_irqsave(&logbuf_lock, flags);
 	printk_cpu = smp_processor_id();
 
+	err = ve_log_init();
+	if (err) {
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+		return err;
+	}
+
 	/* Emit the output into the temporary buffer */
 	printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
 
@@ -615,7 +668,12 @@ asmlinkage int vprintk(const char *fmt, 
 		spin_unlock_irqrestore(&logbuf_lock, flags);
 		goto out;
 	}
-	if (!down_trylock(&console_sem)) {
+	if (!ve_is_super(get_exec_env())) {
+		need_wake = (ve_log_start != ve_log_end);
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+		if (!oops_in_progress && need_wake)
+			wake_up_interruptible(&ve_log_wait);
+	} else if (!down_trylock(&console_sem)) {
 		console_locked = 1;
 		/*
 		 * We own the drivers.  We can drop the spinlock and let
@@ -641,6 +699,38 @@ out:
 EXPORT_SYMBOL(printk);
 EXPORT_SYMBOL(vprintk);
 
+asmlinkage int vprintk(const char *fmt, va_list args)
+{
+	int i;
+	struct ve_struct *env;
+
+	env = set_exec_env(get_ve0());
+	i = __vprintk(fmt, args);
+	set_exec_env(env);
+	return i;
+}
+
+asmlinkage int ve_printk(int dst, const char *fmt, ...)
+{
+	va_list args;
+	int printed_len;
+
+	printed_len = 0;
+	if (ve_is_super(get_exec_env()) || (dst & VE0_LOG)) {
+		va_start(args, fmt);
+		printed_len = vprintk(fmt, args);
+		va_end(args);
+	}
+	if (!ve_is_super(get_exec_env()) && (dst & VE_LOG)) {
+		va_start(args, fmt);
+		printed_len = __vprintk(fmt, args);
+		va_end(args);
+	}
+	return printed_len;
+}
+EXPORT_SYMBOL(ve_printk);
+
+
 #else
 
 asmlinkage long sys_syslog(int type, char __user *buf, int len)
@@ -732,6 +822,12 @@ int is_console_locked(void)
 }
 EXPORT_SYMBOL(is_console_locked);
 
+void wake_up_klogd(void)
+{
+	if (!oops_in_progress && waitqueue_active(&log_wait))
+		wake_up_interruptible(&log_wait);
+}
+
 /**
  * release_console_sem - unlock the console system
  *
@@ -768,8 +864,8 @@ void release_console_sem(void)
 	console_may_schedule = 0;
 	up(&console_sem);
 	spin_unlock_irqrestore(&logbuf_lock, flags);
-	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
-		wake_up_interruptible(&log_wait);
+	if (wake_klogd)
+		wake_up_klogd();
 }
 EXPORT_SYMBOL(release_console_sem);
 
@@ -1049,3 +1145,33 @@ int printk_ratelimit(void)
 				printk_ratelimit_burst);
 }
 EXPORT_SYMBOL(printk_ratelimit);
+
+/*
+ *	Rate limiting stuff.
+ */
+int vz_ratelimit(struct vz_rate_info *p)
+{
+	unsigned long cjif, djif;
+	unsigned long flags;
+	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
+	long new_bucket;
+
+	spin_lock_irqsave(&ratelimit_lock, flags);
+	cjif = jiffies;
+	djif = cjif - p->last;
+	if (djif < p->interval) {
+		if (p->bucket >= p->burst) {
+			spin_unlock_irqrestore(&ratelimit_lock, flags);
+			return 0;
+		}
+		p->bucket++;
+	} else {
+		new_bucket = p->bucket - (djif / (unsigned)p->interval);
+		if (new_bucket < 0)
+			new_bucket = 0;
+		p->bucket = new_bucket + 1;
+	}
+	p->last = cjif;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
+}
diff -uprN linux-2.6.16/kernel/ptrace.c linux-2.6.16.ovz/kernel/ptrace.c
--- linux-2.6.16/kernel/ptrace.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ptrace.c	2006-07-05 08:34:56.000000000 -0400
@@ -57,10 +57,6 @@ void ptrace_untrace(task_t *child)
 			signal_wake_up(child, 1);
 		}
 	}
-	if (child->signal->flags & SIGNAL_GROUP_EXIT) {
-		sigaddset(&child->pending.signal, SIGKILL);
-		signal_wake_up(child, 1);
-	}
 	spin_unlock(&child->sighand->siglock);
 }
 
@@ -82,7 +78,8 @@ void __ptrace_unlink(task_t *child)
 		SET_LINKS(child);
 	}
 
-	ptrace_untrace(child);
+	if (child->state == TASK_TRACED)
+		ptrace_untrace(child);
 }
 
 /*
@@ -136,7 +133,10 @@ static int may_attach(struct task_struct
 	smp_rmb();
 	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
 		return -EPERM;
-
+	if (!task->mm->vps_dumpable && !ve_is_super(get_exec_env()))
+		return -EPERM;
+	if (!ve_accessible(VE_TASK_INFO(task)->owner_env, get_exec_env()))
+		return -EPERM;
 	return security_ptrace(current, task);
 }
 
@@ -152,12 +152,34 @@ int ptrace_may_attach(struct task_struct
 int ptrace_attach(struct task_struct *task)
 {
 	int retval;
-	task_lock(task);
+
 	retval = -EPERM;
 	if (task->pid <= 1)
-		goto bad;
+		goto out;
 	if (task->tgid == current->tgid)
-		goto bad;
+		goto out;
+
+repeat:
+	/*
+	 * Nasty, nasty.
+	 *
+	 * We want to hold both the task-lock and the
+	 * tasklist_lock for writing at the same time.
+	 * But that's against the rules (tasklist_lock
+	 * is taken for reading by interrupts on other
+	 * cpu's that may have task_lock).
+	 */
+	task_lock(task);
+	local_irq_disable();
+	if (!write_trylock(&tasklist_lock)) {
+		local_irq_enable();
+		task_unlock(task);
+		do {
+			cpu_relax();
+		} while (!write_can_lock(&tasklist_lock));
+		goto repeat;
+	}
+
 	/* the same process cannot be attached many times */
 	if (task->ptrace & PT_PTRACED)
 		goto bad;
@@ -170,17 +192,15 @@ int ptrace_attach(struct task_struct *ta
 				      ? PT_ATTACHED : 0);
 	if (capable(CAP_SYS_PTRACE))
 		task->ptrace |= PT_PTRACE_CAP;
-	task_unlock(task);
 
-	write_lock_irq(&tasklist_lock);
 	__ptrace_link(task, current);
-	write_unlock_irq(&tasklist_lock);
 
 	force_sig_specific(SIGSTOP, task);
-	return 0;
 
 bad:
+	write_unlock_irq(&tasklist_lock);
 	task_unlock(task);
+out:
 	return retval;
 }
 
@@ -263,6 +283,7 @@ int access_process_vm(struct task_struct
 	
 	return buf - old_buf;
 }
+EXPORT_SYMBOL_GPL(access_process_vm);
 
 int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
 {
@@ -421,21 +442,22 @@ int ptrace_request(struct task_struct *c
  */
 int ptrace_traceme(void)
 {
-	int ret;
+	int ret = -EPERM;
 
 	/*
 	 * Are we already being traced?
 	 */
-	if (current->ptrace & PT_PTRACED)
-		return -EPERM;
-	ret = security_ptrace(current->parent, current);
-	if (ret)
-		return -EPERM;
-	/*
-	 * Set the ptrace bit in the process ptrace flags.
-	 */
-	current->ptrace |= PT_PTRACED;
-	return 0;
+	task_lock(current);
+	if (!(current->ptrace & PT_PTRACED)) {
+		ret = security_ptrace(current->parent, current);
+		/*
+		 * Set the ptrace bit in the process ptrace flags.
+		 */
+		if (!ret)
+			current->ptrace |= PT_PTRACED;
+	}
+	task_unlock(current);
+	return ret;
 }
 
 /**
@@ -459,7 +481,7 @@ struct task_struct *ptrace_get_task_stru
 		return ERR_PTR(-EPERM);
 
 	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
+	child = find_task_by_pid_ve(pid);
 	if (child)
 		get_task_struct(child);
 	read_unlock(&tasklist_lock);
diff -uprN linux-2.6.16/kernel/sched.c linux-2.6.16.ovz/kernel/sched.c
--- linux-2.6.16/kernel/sched.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/sched.c	2006-07-05 08:34:56.000000000 -0400
@@ -49,6 +49,8 @@
 #include <linux/syscalls.h>
 #include <linux/times.h>
 #include <linux/acct.h>
+#include <linux/vsched.h>
+#include <linux/fairsched.h>
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
@@ -134,7 +136,7 @@
 #ifdef CONFIG_SMP
 #define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
-			num_online_cpus())
+			vsched_num_online_vcpus(task_vsched(p)))
 #else
 #define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
@@ -199,6 +201,7 @@ struct prio_array {
  * (such as the load balancing or the thread migration code), lock
  * acquire operations must be ordered by ascending &runqueue.
  */
+typedef struct vcpu_info *vcpu_t;
 struct runqueue {
 	spinlock_t lock;
 
@@ -220,9 +223,12 @@ struct runqueue {
 	 */
 	unsigned long nr_uninterruptible;
 
+	unsigned long nr_sleeping;
+	unsigned long nr_stopped;
+
 	unsigned long expired_timestamp;
 	unsigned long long timestamp_last_tick;
-	task_t *curr, *idle;
+	task_t *curr;
 	struct mm_struct *prev_mm;
 	prio_array_t *active, *expired, arrays[2];
 	int best_expired_prio;
@@ -233,11 +239,12 @@ struct runqueue {
 
 	/* For active balancing */
 	int active_balance;
-	int push_cpu;
+#endif
+	vcpu_t push_cpu;
 
 	task_t *migration_thread;
 	struct list_head migration_queue;
-#endif
+	int cpu;
 
 #ifdef CONFIG_SCHEDSTATS
 	/* latency stats */
@@ -260,7 +267,51 @@ struct runqueue {
 #endif
 };
 
-static DEFINE_PER_CPU(struct runqueue, runqueues);
+/* VCPU scheduler state description */
+struct vcpu_info;
+struct vcpu_scheduler {
+	struct list_head idle_list;
+	struct list_head active_list;
+	struct list_head running_list;
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node;
+#endif
+	struct vcpu_info *vcpu[NR_CPUS];
+	int id;
+	cpumask_t vcpu_online_map, vcpu_running_map;
+	cpumask_t pcpu_running_map;
+	int num_online_vcpus;
+} ____cacheline_internodealigned_in_smp;
+
+/* virtual CPU description */
+struct vcpu_info {
+	struct runqueue rq;
+#ifdef CONFIG_SCHED_VCPU
+	unsigned active : 1,
+		 running : 1;
+	struct list_head list;
+	struct vcpu_scheduler *vsched;
+	int last_pcpu;
+	u32 start_time;
+#endif
+	int id;
+} ____cacheline_internodealigned_in_smp;
+
+/* physical CPU description */
+struct pcpu_info {
+	struct vcpu_scheduler *vsched;
+	struct vcpu_info *vcpu;
+	task_t *idle;
+#ifdef CONFIG_SMP
+	struct sched_domain *sd;
+#endif
+	int id;
+} ____cacheline_internodealigned_in_smp;
+
+struct pcpu_info pcpu_info[NR_CPUS];
+
+#define pcpu(nr)		(&pcpu_info[nr])
+#define this_pcpu()		(pcpu(smp_processor_id()))
 
 /*
  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
@@ -269,13 +320,399 @@ static DEFINE_PER_CPU(struct runqueue, r
  * The domain tree of any CPU may only be accessed from within
  * preempt-disabled sections.
  */
+#define for_each_pdomain(sd, domain) \
+for (domain = rcu_dereference(sd); domain; domain = domain->parent)
+
 #define for_each_domain(cpu, domain) \
-for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)
+	for_each_pdomain(vcpu_rq(cpu)->sd, domain)
+
+#ifdef CONFIG_SCHED_VCPU
+
+u32 vcpu_sched_timeslice = 5;
+u32 vcpu_timeslice = 0;
+EXPORT_SYMBOL(vcpu_sched_timeslice);
+EXPORT_SYMBOL(vcpu_timeslice);
+
+extern spinlock_t fairsched_lock;
+static struct vcpu_scheduler default_vsched, idle_vsched;
+static struct vcpu_info boot_vcpu, boot_idle_vcpu;
+
+#define vsched_default_vsched()	(&default_vsched)
+#define vsched_default_vcpu(id)	(default_vsched.vcpu[id])
+
+/* 
+ * All macroses below could be used without locks, if there is no
+ * strict ordering requirements, because we assume, that:
+ *
+ * 1. VCPU could not disappear "on the fly" (FIXME)
+ *
+ * 2. p->vsched access is atomic.
+ */
+
+#define task_vsched(tsk)	((tsk)->vsched)
+#define this_vsched()		(task_vsched(current))
+
+#define vsched_vcpu(vsched, id)	((vsched)->vcpu[id])
+#define this_vcpu()		(task_vcpu(current))
+#define task_vcpu(p)		((p)->vcpu)
+
+#define vsched_id(vsched)	((vsched)->id)
+#define vsched_vcpu_online_map(vsched)	((vsched)->vcpu_online_map)
+#define vsched_num_online_vcpus(vsched)	((vsched)->num_online_vcpus)
+#define vsched_pcpu_running_map(vsched)	((vsched)->pcpu_running_map)
+
+#define vcpu_vsched(vcpu)	((vcpu)->vsched)
+#define vcpu_last_pcpu(vcpu)	((vcpu)->last_pcpu)
+#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
+#define vcpu_is_offline(vcpu)	(!vcpu_isset(vcpu, \
+					vcpu_vsched(vcpu)->vcpu_online_map))
+
+static int __add_vcpu(struct vcpu_scheduler *vsched, int id);
+
+#else	/* CONFIG_SCHED_VCPU */
+
+static DEFINE_PER_CPU(struct vcpu_info, vcpu_info);
+
+#define task_vsched(p)		NULL
+#define this_vcpu()		(task_vcpu(current))
+#define task_vcpu(p)		(vcpu(task_cpu(p)))
+
+#define vsched_vcpu(sched, id)	(vcpu(id))
+#define vsched_id(vsched)	0
+#define vsched_default_vsched()	NULL
+#define vsched_default_vcpu(id)	(vcpu(id))
+
+#define vsched_vcpu_online_map(vsched)	(cpu_online_map)
+#define vsched_num_online_vcpus(vsched)	(num_online_cpus())
+#define vsched_pcpu_running_map(vsched)	(cpu_online_map)
+
+#define vcpu(id)		(&per_cpu(vcpu_info, id))
+
+#define vcpu_vsched(vcpu)	NULL
+#define vcpu_last_pcpu(vcpu)	((vcpu)->id)
+#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
+#define vcpu_is_offline(vcpu)	(cpu_is_offline((vcpu)->id))
+
+#endif	/* CONFIG_SCHED_VCPU */
+
+#define this_rq()		(vcpu_rq(this_vcpu()))
+#define task_rq(p)		(vcpu_rq(task_vcpu(p)))
+#define vcpu_rq(vcpu)		(&(vcpu)->rq)
+#define get_vcpu()		({ preempt_disable(); this_vcpu(); })
+#define put_vcpu()		({ put_cpu(); })
+#define rq_vcpu(__rq)		(container_of((__rq), struct vcpu_info, rq))
+
+/**
+ * idle_task - return the idle task for a given cpu.
+ * @cpu: the processor in question.
+ */
+task_t *idle_task(int cpu) 
+{
+	return pcpu(cpu)->idle;
+}
+
+#ifdef CONFIG_SMP
+static inline void update_rq_cpu_load(runqueue_t *this_rq)
+{
+	unsigned long old_load, this_load;
+	int i;
+
+	if (unlikely(this_rq->nr_running == 0)) {
+		for (i = 0; i < 3; i++)
+			this_rq->cpu_load[i] = 0;
+		return;
+	}
+
+	this_load = this_rq->nr_running * SCHED_LOAD_SCALE;
+	for (i = 0; i < 3; i++) {
+		unsigned long new_load = this_load;
+		int scale = 1 << i;
+		old_load = this_rq->cpu_load[i];
+		/*
+		 * Round up the averaging division if load is increasing. This
+		 * prevents us from getting stuck on 9 if the load is 10, for
+		 * example.
+		 */
+		if (new_load > old_load)
+			new_load += scale-1;
+		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
+	}
+}
+#else	/* CONFIG_SMP */
+static inline void update_rq_cpu_load(runqueue_t *this_rq)
+{
+}
+#endif	/* CONFIG_SMP */
+
+#ifdef CONFIG_SCHED_VCPU
+
+void fastcall vsched_cpu_online_map(struct vcpu_scheduler *vsched,
+		cpumask_t *mask)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fairsched_lock, flags);
+	*mask = vsched->vcpu_online_map;
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+}
+
+static inline void set_task_vsched(task_t *p, struct vcpu_scheduler *vsched)
+{
+	/* NOTE: set_task_cpu() is required after every set_task_vsched()! */
+	p->vsched = vsched;
+	p->vsched_id = vsched_id(vsched);
+}
+
+inline void set_task_cpu(struct task_struct *p, unsigned int vcpu_id)
+{
+	p->vcpu = vsched_vcpu(task_vsched(p), vcpu_id);
+	p->vcpu_id = vcpu_id;
+}
+
+static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
+{
+	p->vcpu = vcpu;
+	p->vcpu_id = vcpu->id;
+}
+
+/* this is called when rq->nr_running changes from 0 to 1 */
+static void vcpu_attach(runqueue_t *rq)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+
+	vcpu = rq_vcpu(rq);
+	vsched = vcpu_vsched(vcpu);
+
+	BUG_ON(vcpu->active);
+	spin_lock(&fairsched_lock);
+	vcpu->active = 1;
+	if (!vcpu->running)
+		list_move_tail(&vcpu->list, &vsched->active_list);
+
+	fairsched_incrun(vsched->node);
+	spin_unlock(&fairsched_lock);
+}
+
+/* this is called when rq->nr_running changes from 1 to 0 */
+static void vcpu_detach(runqueue_t *rq)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+
+	vcpu = rq_vcpu(rq);
+	vsched = vcpu_vsched(vcpu);
+	BUG_ON(!vcpu->active);
+
+	spin_lock(&fairsched_lock);
+	fairsched_decrun(vsched->node);
+
+	vcpu->active = 0;
+	if (!vcpu->running)
+		list_move_tail(&vcpu->list, &vsched->idle_list);
+	spin_unlock(&fairsched_lock);
+}
+
+static inline void __vcpu_get(vcpu_t vcpu)
+{
+	struct pcpu_info *pcpu;
+	struct vcpu_scheduler *vsched;
+
+	BUG_ON(!this_vcpu()->running);
+
+	pcpu = this_pcpu();
+	vsched = vcpu_vsched(vcpu);
+
+	pcpu->vcpu = vcpu;
+	pcpu->vsched = vsched;
+
+	fairsched_inccpu(vsched->node);
+
+	list_move_tail(&vcpu->list, &vsched->running_list);
+	vcpu->start_time = jiffies;
+	vcpu->last_pcpu = pcpu->id;
+	vcpu->running = 1;
+	__set_bit(vcpu->id, vsched->vcpu_running_map.bits);
+	__set_bit(pcpu->id, vsched->pcpu_running_map.bits);
+#ifdef CONFIG_SMP
+	vcpu_rq(vcpu)->sd = pcpu->sd;
+#endif
+}
+
+static void vcpu_put(vcpu_t vcpu)
+{
+	struct vcpu_scheduler *vsched;
+	struct pcpu_info *cur_pcpu;
+	runqueue_t *rq;
+
+	vsched = vcpu_vsched(vcpu);
+	rq = vcpu_rq(vcpu);
+	cur_pcpu = this_pcpu();
+
+	BUG_ON(!vcpu->running);
+
+	spin_lock(&fairsched_lock);
+	vcpu->running = 0;
+	list_move_tail(&vcpu->list,
+		vcpu->active ? &vsched->active_list : &vsched->idle_list);
+	fairsched_deccpu(vsched->node);
+	__clear_bit(vcpu->id, vsched->vcpu_running_map.bits);
+	if (vsched != this_vsched())
+		__clear_bit(cur_pcpu->id, vsched->pcpu_running_map.bits);
+
+	if (!rq->nr_running)
+		rq->expired_timestamp = 0;
+	/* from this point task_running(prev_rq, prev) will be 0 */
+	rq->curr = cur_pcpu->idle;
+	update_rq_cpu_load(rq);
+	spin_unlock(&fairsched_lock);
+}
+
+static vcpu_t schedule_vcpu(vcpu_t cur_vcpu, cycles_t cycles)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	runqueue_t *rq;
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node, *nodec;
+
+	nodec = vcpu_vsched(cur_vcpu)->node;
+	node = nodec;
+#endif
+
+	BUG_ON(!cur_vcpu->running);
+restart:
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		goto affine;
+
+	spin_lock(&fairsched_lock);
+#ifdef CONFIG_FAIRSCHED
+	node = fairsched_schedule(node, nodec,
+			cur_vcpu->active,
+			cycles);
+	if (unlikely(node == NULL))
+		goto idle;
+
+	vsched = node->vsched;
+#else
+	vsched = &default_vsched;
+#endif
+	/* FIXME: optimize vcpu switching, maybe we do not need to call
+	   fairsched_schedule() at all if vcpu is still active and too
+	   little time have passed so far */
+	if (cur_vcpu->vsched == vsched && cur_vcpu->active &&
+	    jiffies - cur_vcpu->start_time < msecs_to_jiffies(vcpu_sched_timeslice)) {
+		vcpu = cur_vcpu;
+		goto done;
+	}
+
+	if (list_empty(&vsched->active_list)) {
+		/* nothing except for this cpu can be scheduled */
+		if (likely(cur_vcpu->vsched == vsched && cur_vcpu->active)) {
+			/* 
+			 * Current vcpu is the one we need. We have not
+			 * put it yet, so it's not on the active_list.
+			 */
+			vcpu = cur_vcpu;
+			goto done;
+		} else
+			goto none;
+	}
+
+	/* select vcpu and add to running list */
+	vcpu = list_entry(vsched->active_list.next, struct vcpu_info, list);
+	__vcpu_get(vcpu);
+done:
+	spin_unlock(&fairsched_lock);
+
+	rq = vcpu_rq(vcpu);
+	if (unlikely(vcpu != cur_vcpu)) {
+		spin_unlock(&vcpu_rq(cur_vcpu)->lock);
+		spin_lock(&rq->lock);
+		if (unlikely(!rq->nr_running)) {
+			/* race with balancing? */
+			spin_unlock(&rq->lock);
+			vcpu_put(vcpu);
+			spin_lock(&vcpu_rq(cur_vcpu)->lock);
+			goto restart;
+		}
+	}
+	BUG_ON(!rq->nr_running);
+	return vcpu;
+
+none:
+#ifdef CONFIG_FAIRSCHED
+	spin_unlock(&fairsched_lock);
+
+	/* fairsched doesn't schedule more CPUs than we have active */
+	BUG_ON(1);
+#else
+	goto idle;
+#endif
+
+idle:
+	vcpu = task_vcpu(this_pcpu()->idle);
+	__vcpu_get(vcpu);
+	spin_unlock(&fairsched_lock);
+	spin_unlock(&vcpu_rq(cur_vcpu)->lock);
+
+	spin_lock(&vcpu_rq(vcpu)->lock);
+	return vcpu;
+
+affine:
+	vcpu = vsched_vcpu(&default_vsched, raw_smp_processor_id());
+	/* current VCPU busy, continue */
+	if (cur_vcpu == vcpu && vcpu->active)
+		return cur_vcpu;
+	/* current is idle and nothing to run, keep idle */
+	if (vcpu_vsched(cur_vcpu) == &idle_vsched && !vcpu->active)
+		return cur_vcpu;
+
+	/* need to switch to idle... */
+	if (cur_vcpu == vcpu) {
+		spin_lock(&fairsched_lock);
+		goto idle;
+	}
+
+	/* ... and from idle */
+	spin_lock(&fairsched_lock);
+	__vcpu_get(vcpu);
+	goto done;
+}
+
+#else /* CONFIG_SCHED_VCPU */
+
+#define set_task_vsched(task, vsched)		do { } while (0)
+
+static inline void vcpu_attach(runqueue_t *rq)
+{
+}
+
+static inline void vcpu_detach(runqueue_t *rq)
+{
+}
+
+static inline void vcpu_put(vcpu_t vcpu)
+{
+}
+
+static inline vcpu_t schedule_vcpu(vcpu_t prev_vcpu, cycles_t cycles)
+{
+	return prev_vcpu;
+}
+
+static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
+{
+	set_task_pcpu(p, vcpu->id);
+}
+
+#endif /* CONFIG_SCHED_VCPU */
+
+int vcpu_online(int cpu)
+{
+	return cpu_isset(cpu, vsched_vcpu_online_map(this_vsched()));
+}
 
-#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-#define this_rq()		(&__get_cpu_var(runqueues))
-#define task_rq(p)		cpu_rq(task_cpu(p))
-#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 
 #ifndef prepare_arch_switch
 # define prepare_arch_switch(next)	do { } while (0)
@@ -284,6 +721,11 @@ for (domain = rcu_dereference(cpu_rq(cpu
 # define finish_arch_switch(prev)	do { } while (0)
 #endif
 
+struct kernel_stat_glob kstat_glob;
+spinlock_t kstat_glb_lock = SPIN_LOCK_UNLOCKED;
+EXPORT_SYMBOL(kstat_glob);
+EXPORT_SYMBOL(kstat_glb_lock);
+
 #ifndef __ARCH_WANT_UNLOCKED_CTXSW
 static inline int task_running(runqueue_t *rq, task_t *p)
 {
@@ -300,7 +742,7 @@ static inline void finish_lock_switch(ru
 	/* this is a valid case when another task releases the spinlock */
 	rq->lock.owner = current;
 #endif
-	spin_unlock_irq(&rq->lock);
+	spin_unlock(&rq->lock);
 }
 
 #else /* __ARCH_WANT_UNLOCKED_CTXSW */
@@ -374,6 +816,208 @@ static inline void task_rq_unlock(runque
 	spin_unlock_irqrestore(&rq->lock, *flags);
 }
 
+#ifdef CONFIG_VE
+#define ve_nr_iowait_inc(env, cpu) 					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_iowait++;		\
+	} while(0)
+#define ve_nr_iowait_dec(env, cpu)					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_iowait--;		\
+	} while(0)
+#define ve_nr_unint_inc(env, cpu)					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_unint++;			\
+	} while(0)
+#define ve_nr_unint_dec(env, cpu)					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_unint--;			\
+	} while(0)
+
+#define cycles_after(a, b)	((long long)(b) - (long long)(a) < 0)
+
+cycles_t ve_sched_get_idle_time(struct ve_struct *ve, int cpu)
+{
+	struct ve_cpu_stats *ve_stat;
+	unsigned v;
+	cycles_t strt, ret, cycles;
+
+	ve_stat = VE_CPU_STATS(ve, cpu);
+	do {
+		v = read_seqcount_begin(&ve_stat->stat_lock);
+		ret = ve_stat->idle_time;
+		strt = ve_stat->strt_idle_time;
+		if (strt && nr_uninterruptible_ve(ve) == 0) {
+			cycles = get_cycles();
+			if (cycles_after(cycles, strt))
+				ret += cycles - strt;
+		}
+	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
+	return ret;
+}
+EXPORT_SYMBOL(ve_sched_get_idle_time);
+
+cycles_t ve_sched_get_iowait_time(struct ve_struct *ve, int cpu)
+{
+	struct ve_cpu_stats *ve_stat;
+	unsigned v;
+	cycles_t strt, ret, cycles;
+
+	ve_stat = VE_CPU_STATS(ve, cpu);
+	do {
+		v = read_seqcount_begin(&ve_stat->stat_lock);
+		ret = ve_stat->iowait_time;
+		strt = ve_stat->strt_idle_time;
+		if (strt && nr_iowait_ve(ve) > 0) {
+			cycles = get_cycles();
+			if (cycles_after(cycles, strt))
+				ret += cycles - strt;
+		}
+	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
+	return ret;
+}
+
+EXPORT_SYMBOL(ve_sched_get_iowait_time);
+
+static inline void ve_stop_idle(struct ve_struct *ve,
+		unsigned int cpu, cycles_t cycles)
+{
+	struct ve_cpu_stats *ve_stat;
+
+	ve_stat = VE_CPU_STATS(ve, cpu);
+
+	write_seqcount_begin(&ve_stat->stat_lock);
+	if (ve_stat->strt_idle_time) {
+		if (cycles_after(cycles, ve_stat->strt_idle_time)) {
+			if (nr_iowait_ve(ve) == 0)
+				ve_stat->idle_time += cycles -
+					ve_stat->strt_idle_time;
+			else
+				ve_stat->iowait_time += cycles - 
+					ve_stat->strt_idle_time;
+		}
+		ve_stat->strt_idle_time = 0;
+	}
+	write_seqcount_end(&ve_stat->stat_lock);
+}
+
+static inline void ve_strt_idle(struct ve_struct *ve,
+		unsigned int cpu, cycles_t cycles)
+{
+	struct ve_cpu_stats *ve_stat;
+
+	ve_stat = VE_CPU_STATS(ve, cpu);
+
+	write_seqcount_begin(&ve_stat->stat_lock);
+	ve_stat->strt_idle_time = cycles;
+	write_seqcount_end(&ve_stat->stat_lock);
+}
+
+#define ve_nr_running_inc(env, cpu, cycles)	do {			\
+		if (++VE_CPU_STATS((env), (cpu))->nr_running == 1)	\
+			ve_stop_idle(env, cpu, cycles);			\
+	} while (0)
+#define ve_nr_running_dec(env, cpu, cyclses)	do {			\
+		if (--VE_CPU_STATS((env), (cpu))->nr_running == 0)	\
+			ve_strt_idle(env, cpu, cycles);			\
+	} while (0)
+
+void ve_sched_attach(struct ve_struct *envid)
+{
+	struct task_struct *tsk;
+	unsigned int cpu;
+	cycles_t cycles;
+
+	tsk = current;
+	preempt_disable();
+	cycles = get_cycles();
+	cpu = task_cpu(tsk);
+	ve_nr_running_dec(VE_TASK_INFO(tsk)->owner_env, cpu, cycles);
+	ve_nr_running_inc(envid, cpu, cycles);
+	preempt_enable();
+}
+EXPORT_SYMBOL(ve_sched_attach);
+
+static inline void write_wakeup_stamp(struct task_struct *p, cycles_t cyc)
+{
+	struct ve_task_info *ti;
+
+	ti = VE_TASK_INFO(p);
+	write_seqcount_begin(&ti->wakeup_lock);
+	ti->wakeup_stamp = cyc;
+	write_seqcount_end(&ti->wakeup_lock);
+}
+
+static inline void update_sched_lat(struct task_struct *t, cycles_t cycles)
+{
+	int cpu;
+	cycles_t ve_wstamp;
+
+	/* safe due to runqueue lock */
+	cpu = smp_processor_id();
+	ve_wstamp = t->ve_task_info.wakeup_stamp;
+
+	if (ve_wstamp && cycles > ve_wstamp) {
+		KSTAT_LAT_PCPU_ADD(&kstat_glob.sched_lat,
+				cpu, cycles - ve_wstamp);
+		KSTAT_LAT_PCPU_ADD(&t->ve_task_info.exec_env->sched_lat_ve,
+				cpu, cycles - ve_wstamp);
+	}
+}
+
+static inline void update_ve_task_info(task_t *prev, cycles_t cycles)
+{
+#ifdef CONFIG_FAIRSCHED
+	if (prev != this_pcpu()->idle) {
+#else
+	if (prev != this_rq()->idle) {
+#endif
+		VE_CPU_STATS(prev->ve_task_info.owner_env,
+				smp_processor_id())->used_time +=
+			cycles - prev->ve_task_info.sched_time;
+
+		prev->ve_task_info.sched_time = cycles;
+	}
+}
+
+#else
+#define ve_nr_running_inc(env, cpu, cycles)	do { } while(0)
+#define ve_nr_running_dec(env, cpu, cycles)	do { } while(0)
+#define ve_nr_iowait_inc(env, cpu)		do { } while(0)
+#define ve_nr_iowait_dec(env, cpu)		do { } while(0)
+#define ve_nr_unint_inc(env, cpu)		do { } while(0)
+#define ve_nr_unint_dec(env, cpu)		do { } while(0)
+#define update_ve_task_info(prev, cycles)	do { } while (0)
+#endif
+
+struct task_nrs_struct {
+	long nr_running;
+	long nr_unint;
+	long nr_stopped;
+	long nr_sleeping;
+	long nr_iowait;
+	long long nr_switches;
+} ____cacheline_aligned_in_smp;
+
+static struct task_nrs_struct glob_task_nrs[NR_CPUS];
+#define nr_running_inc(cpu)	do { glob_task_nrs[cpu].nr_running++; } while (0)
+#define nr_running_dec(cpu)	do { glob_task_nrs[cpu].nr_running--; } while (0)
+#define nr_unint_inc(cpu)	do { glob_task_nrs[cpu].nr_unint++; } while (0)
+#define nr_unint_dec(cpu)	do { glob_task_nrs[cpu].nr_unint--; } while (0)
+#define nr_stopped_inc(cpu)	do { glob_task_nrs[cpu].nr_stopped++; } while (0)
+#define nr_stopped_dec(cpu)	do { glob_task_nrs[cpu].nr_stopped--; } while (0)
+#define nr_sleeping_inc(cpu)	do { glob_task_nrs[cpu].nr_sleeping++; } while (0)
+#define nr_sleeping_dec(cpu)	do { glob_task_nrs[cpu].nr_sleeping--; } while (0)
+#define nr_iowait_inc(cpu)	do { glob_task_nrs[cpu].nr_iowait++; } while (0)
+#define nr_iowait_dec(cpu)	do { glob_task_nrs[cpu].nr_iowait--; } while (0)
+
+
+unsigned long nr_zombie = 0;   /* protected by tasklist_lock */
+EXPORT_SYMBOL(nr_zombie);
+
+atomic_t nr_dead = ATOMIC_INIT(0);
+EXPORT_SYMBOL(nr_dead);
+ 
 #ifdef CONFIG_SCHEDSTATS
 /*
  * bump this up when changing the output format or the meaning of an existing
@@ -666,8 +1310,19 @@ static int effective_prio(task_t *p)
  */
 static inline void __activate_task(task_t *p, runqueue_t *rq)
 {
+	cycles_t cycles;
+
+#ifdef CONFIG_VE
+	cycles = get_cycles();
+	write_wakeup_stamp(p, cycles);
+	p->ve_task_info.sleep_time += cycles;
+#endif
 	enqueue_task(p, rq->active);
 	rq->nr_running++;
+	ve_nr_running_inc(VE_TASK_INFO(p)->owner_env, task_cpu(p), cycles);
+	nr_running_inc(smp_processor_id());
+	if (rq->nr_running == 1)
+		vcpu_attach(rq);
 }
 
 /*
@@ -800,9 +1455,38 @@ static void activate_task(task_t *p, run
  */
 static void deactivate_task(struct task_struct *p, runqueue_t *rq)
 {
+	cycles_t cycles;
+#ifdef CONFIG_VE
+	unsigned int cpu, pcpu;
+	struct ve_struct *ve;
+
+	cycles = get_cycles();
+	cpu = task_cpu(p);
+	pcpu = smp_processor_id();
+	ve = p->ve_task_info.owner_env;
+
+	p->ve_task_info.sleep_time -= cycles;
+#endif
+	if (p->state == TASK_UNINTERRUPTIBLE) {
+		ve_nr_unint_inc(ve, cpu);
+		nr_unint_inc(pcpu);
+	}
+	if (p->state == TASK_INTERRUPTIBLE) {
+		rq->nr_sleeping++;
+		nr_sleeping_inc(pcpu);
+	}
+	if (p->state == TASK_STOPPED) {
+		rq->nr_stopped++;
+		nr_stopped_inc(pcpu);
+	}
+
+	ve_nr_running_dec(VE_TASK_INFO(p)->owner_env, cpu, cycles);
+	nr_running_dec(pcpu);
 	rq->nr_running--;
 	dequeue_task(p, p->array);
 	p->array = NULL;
+	if (rq->nr_running == 0)
+		vcpu_detach(rq);
 }
 
 /*
@@ -813,18 +1497,22 @@ static void deactivate_task(struct task_
  * the target CPU.
  */
 #ifdef CONFIG_SMP
+/* FIXME: need to add vsched arg */
 static void resched_task(task_t *p)
 {
 	int cpu;
 
+#if 0
+	/* FIXME: this fails due to idle rq->curre == idle */
 	assert_spin_locked(&task_rq(p)->lock);
+#endif
 
 	if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
 		return;
 
 	set_tsk_thread_flag(p, TIF_NEED_RESCHED);
 
-	cpu = task_cpu(p);
+	cpu = task_pcpu(p);
 	if (cpu == smp_processor_id())
 		return;
 
@@ -847,15 +1535,35 @@ static inline void resched_task(task_t *
  */
 inline int task_curr(const task_t *p)
 {
-	return cpu_curr(task_cpu(p)) == p;
+	return task_rq(p)->curr == p;
 }
 
-#ifdef CONFIG_SMP
+/**
+ * idle_cpu - is a given cpu idle currently?
+ * @cpu: the processor in question.
+ */
+inline int idle_cpu(int cpu)
+{
+	return pcpu(cpu)->vsched == &idle_vsched;
+}
+
+EXPORT_SYMBOL_GPL(idle_cpu);
+
+static inline int idle_vcpu(vcpu_t cpu)
+{
+#ifdef CONFIG_SCHED_VCPU
+	return !cpu->active;
+#else
+	return idle_cpu(cpu->id);
+#endif
+}
+
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
 typedef struct {
 	struct list_head list;
 
 	task_t *task;
-	int dest_cpu;
+	vcpu_t dest_cpu;
 
 	struct completion done;
 } migration_req_t;
@@ -864,7 +1572,7 @@ typedef struct {
  * The task's runqueue lock must be held.
  * Returns true if you have to wait for migration thread.
  */
-static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
+static int migrate_task(task_t *p, vcpu_t dest_cpu, migration_req_t *req)
 {
 	runqueue_t *rq = task_rq(p);
 
@@ -872,8 +1580,13 @@ static int migrate_task(task_t *p, int d
 	 * If the task is not on a runqueue (and not running), then
 	 * it is sufficient to simply update the task's cpu field.
 	 */
+#ifdef CONFIG_SCHED_VCPU
+	BUG_ON(task_vsched(p) == &idle_vsched);
+	BUG_ON(vcpu_vsched(dest_cpu) == &idle_vsched);
+#endif
 	if (!p->array && !task_running(rq, p)) {
-		set_task_cpu(p, dest_cpu);
+		set_task_vsched(p, vcpu_vsched(dest_cpu));
+		set_task_vcpu(p, dest_cpu);
 		return 0;
 	}
 
@@ -913,6 +1626,7 @@ repeat:
 	}
 	task_rq_unlock(rq, &flags);
 }
+EXPORT_SYMBOL_GPL(wait_task_inactive);
 
 /***
  * kick_process - kick a running thread to enter/exit the kernel
@@ -932,21 +1646,26 @@ void kick_process(task_t *p)
 	int cpu;
 
 	preempt_disable();
-	cpu = task_cpu(p);
+	cpu = task_pcpu(p);
 	if ((cpu != smp_processor_id()) && task_curr(p))
+		/* FIXME: ??? think over */
+		/* should add something like get_pcpu(cpu)->vcpu->id == task_cpu(p),
+		   but with serialization of vcpu access... */
 		smp_send_reschedule(cpu);
 	preempt_enable();
 }
+#endif
 
+#ifdef CONFIG_SMP
 /*
  * Return a low guess at the load of a migration-source cpu.
  *
  * We want to under-estimate the load of migration sources, to
  * balance conservatively.
  */
-static inline unsigned long source_load(int cpu, int type)
+static inline unsigned long source_load(vcpu_t cpu, int type)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	runqueue_t *rq = vcpu_rq(cpu);
 	unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
 	if (type == 0)
 		return load_now;
@@ -957,9 +1676,9 @@ static inline unsigned long source_load(
 /*
  * Return a high guess at the load of a migration-target cpu
  */
-static inline unsigned long target_load(int cpu, int type)
+static inline unsigned long target_load(vcpu_t cpu, int type)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	runqueue_t *rq = vcpu_rq(cpu);
 	unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
 	if (type == 0)
 		return load_now;
@@ -972,33 +1691,35 @@ static inline unsigned long target_load(
  * domain.
  */
 static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
+find_idlest_group(struct sched_domain *sd, struct task_struct *p, vcpu_t this_cpu)
 {
 	struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long min_load = ULONG_MAX, this_load = 0;
 	int load_idx = sd->forkexec_idx;
 	int imbalance = 100 + (sd->imbalance_pct-100)/2;
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	int this_pcpu;
 
+	vsched = vcpu_vsched(this_cpu);
+	this_pcpu = vcpu_last_pcpu(this_cpu);
 	do {
 		unsigned long load, avg_load;
 		int local_group;
 		int i;
 
-		/* Skip over this group if it has no CPUs allowed */
-		if (!cpus_intersects(group->cpumask, p->cpus_allowed))
-			goto nextgroup;
-
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpu_isset(this_pcpu, group->cpumask);
 
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
 
 		for_each_cpu_mask(i, group->cpumask) {
+			vcpu = pcpu(i)->vcpu;
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
-				load = source_load(i, load_idx);
+				load = source_load(vcpu, load_idx);
 			else
-				load = target_load(i, load_idx);
+				load = target_load(vcpu, load_idx);
 
 			avg_load += load;
 		}
@@ -1013,7 +1734,6 @@ find_idlest_group(struct sched_domain *s
 			min_load = avg_load;
 			idlest = group;
 		}
-nextgroup:
 		group = group->next;
 	} while (group != sd->groups);
 
@@ -1025,23 +1745,31 @@ nextgroup:
 /*
  * find_idlest_queue - find the idlest runqueue among the cpus in group.
  */
-static int
-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+static vcpu_t 
+find_idlest_cpu(struct sched_group *group, struct task_struct *p, vcpu_t this_cpu)
 {
-	cpumask_t tmp;
 	unsigned long load, min_load = ULONG_MAX;
-	int idlest = -1;
+	cpumask_t vmask;
+	struct vcpu_scheduler *vsched;
+	vcpu_t idlest = (vcpu_t)-1;
+	vcpu_t vcpu;
 	int i;
 
-	/* Traverse only the allowed CPUs */
-	cpus_and(tmp, group->cpumask, p->cpus_allowed);
+	vsched = vcpu_vsched(this_cpu);
+	BUG_ON(vsched != task_vsched(p));
 
-	for_each_cpu_mask(i, tmp) {
-		load = source_load(i, 0);
+	cpus_and(vmask, vsched_vcpu_online_map(vsched), p->cpus_allowed);
+	for_each_cpu_mask(i, vmask) {
+		vcpu = vsched_vcpu(vsched, i);
 
-		if (load < min_load || (load == min_load && i == this_cpu)) {
+		if (!cpu_isset(vcpu_last_pcpu(vcpu), group->cpumask))
+			continue;
+
+		load = source_load(vcpu, 0);
+
+		if (load < min_load || (load == min_load && vcpu == this_cpu)) {
 			min_load = load;
-			idlest = i;
+			idlest = vcpu;
 		}
 	}
 
@@ -1059,7 +1787,7 @@ find_idlest_cpu(struct sched_group *grou
  *
  * preempt must be disabled.
  */
-static int sched_balance_self(int cpu, int flag)
+static vcpu_t sched_balance_self(vcpu_t cpu, int flag)
 {
 	struct task_struct *t = current;
 	struct sched_domain *tmp, *sd = NULL;
@@ -1071,7 +1799,7 @@ static int sched_balance_self(int cpu, i
 	while (sd) {
 		cpumask_t span;
 		struct sched_group *group;
-		int new_cpu;
+		vcpu_t new_cpu;
 		int weight;
 
 		span = sd->span;
@@ -1080,7 +1808,7 @@ static int sched_balance_self(int cpu, i
 			goto nextlevel;
 
 		new_cpu = find_idlest_cpu(group, t, cpu);
-		if (new_cpu == -1 || new_cpu == cpu)
+		if (new_cpu == (vcpu_t)(-1) || new_cpu == cpu)
 			goto nextlevel;
 
 		/* Now try balancing at a lower domain level */
@@ -1111,21 +1839,27 @@ nextlevel:
  * Returns the CPU we should wake onto.
  */
 #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
-static int wake_idle(int cpu, task_t *p)
+static vcpu_t wake_idle(vcpu_t cpu, task_t *p)
 {
-	cpumask_t tmp;
+	cpumask_t vtmp;
 	struct sched_domain *sd;
+	struct vcpu_scheduler *vsched;
 	int i;
 
-	if (idle_cpu(cpu))
+	if (idle_vcpu(cpu))
 		return cpu;
 
+	vsched = vcpu_vsched(cpu);
+	cpus_and(vtmp, vsched_vcpu_online_map(vsched), p->cpus_allowed);
 	for_each_domain(cpu, sd) {
 		if (sd->flags & SD_WAKE_IDLE) {
-			cpus_and(tmp, sd->span, p->cpus_allowed);
-			for_each_cpu_mask(i, tmp) {
-				if (idle_cpu(i))
-					return i;
+			for_each_cpu_mask(i, vtmp) {
+				vcpu_t vcpu;
+				vcpu = vsched_vcpu(vsched, i);
+				if (!cpu_isset(vcpu_last_pcpu(vcpu), sd->span))
+					continue;
+				if (idle_vcpu(vcpu))
+					return vcpu;
 			}
 		}
 		else
@@ -1134,7 +1868,7 @@ static int wake_idle(int cpu, task_t *p)
 	return cpu;
 }
 #else
-static inline int wake_idle(int cpu, task_t *p)
+static inline vcpu_t wake_idle(vcpu_t cpu, task_t *p)
 {
 	return cpu;
 }
@@ -1156,15 +1890,17 @@ static inline int wake_idle(int cpu, tas
  */
 static int try_to_wake_up(task_t *p, unsigned int state, int sync)
 {
-	int cpu, this_cpu, success = 0;
+	vcpu_t cpu, this_cpu;
+	int success = 0;
 	unsigned long flags;
 	long old_state;
 	runqueue_t *rq;
 #ifdef CONFIG_SMP
 	unsigned long load, this_load;
 	struct sched_domain *sd, *this_sd = NULL;
-	int new_cpu;
+	vcpu_t new_cpu;
 #endif
+	cpu = NULL;
 
 	rq = task_rq_lock(p, &flags);
 	old_state = p->state;
@@ -1174,8 +1910,8 @@ static int try_to_wake_up(task_t *p, uns
 	if (p->array)
 		goto out_running;
 
-	cpu = task_cpu(p);
-	this_cpu = smp_processor_id();
+	cpu = task_vcpu(p);
+	this_cpu = this_vcpu();
 
 #ifdef CONFIG_SMP
 	if (unlikely(task_running(rq, p)))
@@ -1184,20 +1920,23 @@ static int try_to_wake_up(task_t *p, uns
 	new_cpu = cpu;
 
 	schedstat_inc(rq, ttwu_cnt);
+	/* FIXME: add vsched->last_vcpu array to optimize wakeups in different vsched */
+	if (vcpu_vsched(cpu) != vcpu_vsched(this_cpu))
+		goto out_set_cpu;
 	if (cpu == this_cpu) {
 		schedstat_inc(rq, ttwu_local);
 		goto out_set_cpu;
 	}
 
 	for_each_domain(this_cpu, sd) {
-		if (cpu_isset(cpu, sd->span)) {
+		if (cpu_isset(vcpu_last_pcpu(cpu), sd->span)) {
 			schedstat_inc(sd, ttwu_wake_remote);
 			this_sd = sd;
 			break;
 		}
 	}
 
-	if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
+	if (unlikely(!vcpu_isset(this_cpu, p->cpus_allowed)))
 		goto out_set_cpu;
 
 	/*
@@ -1253,7 +1992,7 @@ static int try_to_wake_up(task_t *p, uns
 out_set_cpu:
 	new_cpu = wake_idle(new_cpu, p);
 	if (new_cpu != cpu) {
-		set_task_cpu(p, new_cpu);
+		set_task_vcpu(p, new_cpu);
 		task_rq_unlock(rq, &flags);
 		/* might preempt at this point */
 		rq = task_rq_lock(p, &flags);
@@ -1263,13 +2002,21 @@ out_set_cpu:
 		if (p->array)
 			goto out_running;
 
-		this_cpu = smp_processor_id();
-		cpu = task_cpu(p);
+		this_cpu = this_vcpu();
+		cpu = task_vcpu(p);
 	}
 
 out_activate:
 #endif /* CONFIG_SMP */
-	if (old_state == TASK_UNINTERRUPTIBLE) {
+	if (old_state == TASK_INTERRUPTIBLE) {
+		nr_sleeping_dec(smp_processor_id());
+		rq->nr_sleeping--;
+	} else if (old_state == TASK_STOPPED) {
+		nr_stopped_dec(smp_processor_id());
+		rq->nr_stopped--;
+	} else if (old_state == TASK_UNINTERRUPTIBLE) {
+		nr_unint_dec(smp_processor_id());
+		ve_nr_unint_dec(p->ve_task_info.owner_env, task_cpu(p));
 		rq->nr_uninterruptible--;
 		/*
 		 * Tasks on involuntary sleep don't earn
@@ -1324,17 +2071,45 @@ int fastcall wake_up_state(task_t *p, un
 }
 
 /*
+ * init is special, it is forked from swapper (idle_vsched) and should
+ * belong to default_vsched, so we have to change it's vsched/fairsched manually
+ */
+static void wake_up_init(task_t *p)
+{
+	runqueue_t *rq;
+	unsigned long flags;
+
+	/* we should change both fairsched node and vsched here */
+	set_task_vsched(p, &default_vsched);
+	set_task_cpu(p, 0);
+
+	/*
+	 * can't call wake_up_new_task() directly here,
+	 * since it assumes that a child belongs to the same vsched
+	 */
+	p->state = TASK_RUNNING;
+	p->sleep_avg = 0;
+	p->prio = effective_prio(p);
+
+	rq = task_rq_lock(p, &flags);
+	__activate_task(p, rq);
+	task_rq_unlock(rq, &flags);
+}
+
+/*
  * Perform scheduler related setup for a newly forked process p.
  * p is forked by current.
  */
 void fastcall sched_fork(task_t *p, int clone_flags)
 {
-	int cpu = get_cpu();
-
+	vcpu_t cpu;
+       
+	preempt_disable();
+	cpu = this_vcpu();
 #ifdef CONFIG_SMP
 	cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
 #endif
-	set_task_cpu(p, cpu);
+	set_task_vcpu(p, cpu);
 
 	/*
 	 * We mark the process as running here, but have not actually
@@ -1369,6 +2144,10 @@ void fastcall sched_fork(task_t *p, int 
 	p->first_time_slice = 1;
 	current->time_slice >>= 1;
 	p->timestamp = sched_clock();
+#ifdef CONFIG_VE
+	/*cosmetic: sleep till wakeup below*/
+	p->ve_task_info.sleep_time -= get_cycles();
+#endif
 	if (unlikely(!current->time_slice)) {
 		/*
 		 * This case is rare, it happens when the parent has only
@@ -1379,7 +2158,7 @@ void fastcall sched_fork(task_t *p, int 
 		scheduler_tick();
 	}
 	local_irq_enable();
-	put_cpu();
+	preempt_enable();
 }
 
 /*
@@ -1392,13 +2171,19 @@ void fastcall sched_fork(task_t *p, int 
 void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
 {
 	unsigned long flags;
-	int this_cpu, cpu;
+	vcpu_t this_cpu, cpu;
 	runqueue_t *rq, *this_rq;
 
+	if (unlikely(p->pid == 1)) {
+		wake_up_init(p);
+		return;
+	}
+
 	rq = task_rq_lock(p, &flags);
 	BUG_ON(p->state != TASK_RUNNING);
-	this_cpu = smp_processor_id();
-	cpu = task_cpu(p);
+	BUG_ON(task_vsched(current) != task_vsched(p));
+	this_cpu = this_vcpu();
+	cpu = task_vcpu(p);
 
 	/*
 	 * We decrease the sleep average of forking parents
@@ -1426,6 +2211,9 @@ void fastcall wake_up_new_task(task_t *p
 				p->array = current->array;
 				p->array->nr_active++;
 				rq->nr_running++;
+				ve_nr_running_inc(VE_TASK_INFO(p)->owner_env,
+						task_cpu(p), get_cycles());
+				nr_running_inc(smp_processor_id());
 			}
 			set_need_resched();
 		} else
@@ -1439,7 +2227,7 @@ void fastcall wake_up_new_task(task_t *p
 		 */
 		this_rq = rq;
 	} else {
-		this_rq = cpu_rq(this_cpu);
+		this_rq = vcpu_rq(this_cpu);
 
 		/*
 		 * Not the local CPU - must adjust timestamp. This should
@@ -1482,7 +2270,7 @@ void fastcall sched_exit(task_t *p)
 	 * the sleep_avg of the parent as well.
 	 */
 	rq = task_rq_lock(p->parent, &flags);
-	if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) {
+	if (p->first_time_slice && task_vcpu(p) == task_vcpu(p->parent)) {
 		p->parent->time_slice += p->time_slice;
 		if (unlikely(p->parent->time_slice > task_timeslice(p)))
 			p->parent->time_slice = task_timeslice(p);
@@ -1532,7 +2320,10 @@ static inline void finish_task_switch(ru
 {
 	struct mm_struct *mm = rq->prev_mm;
 	unsigned long prev_task_flags;
+	vcpu_t prev_vcpu, vcpu;
 
+	prev_vcpu = task_vcpu(prev);
+	vcpu = rq_vcpu(rq);
 	rq->prev_mm = NULL;
 
 	/*
@@ -1549,6 +2340,10 @@ static inline void finish_task_switch(ru
 	prev_task_flags = prev->flags;
 	finish_arch_switch(prev);
 	finish_lock_switch(rq, prev);
+	if (prev_vcpu != vcpu)
+		vcpu_put(prev_vcpu);
+	local_irq_enable();
+
 	if (mm)
 		mmdrop(mm);
 	if (unlikely(prev_task_flags & PF_DEAD))
@@ -1569,8 +2364,9 @@ asmlinkage void schedule_tail(task_t *pr
 	preempt_enable();
 #endif
 	if (current->set_child_tid)
-		put_user(current->pid, current->set_child_tid);
+		put_user(virt_pid(current), current->set_child_tid);
 }
+EXPORT_SYMBOL_GPL(schedule_tail);
 
 /*
  * context_switch - switch to the new MM and the new
@@ -1610,20 +2406,26 @@ task_t * context_switch(runqueue_t *rq, 
  */
 unsigned long nr_running(void)
 {
-	unsigned long i, sum = 0;
+	unsigned long i, sum;
 
+	sum = 0;
 	for_each_online_cpu(i)
-		sum += cpu_rq(i)->nr_running;
+		sum += glob_task_nrs[i].nr_running;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
 
 	return sum;
 }
+EXPORT_SYMBOL(nr_running);
 
 unsigned long nr_uninterruptible(void)
 {
-	unsigned long i, sum = 0;
+	unsigned long i, sum;
 
+	sum = 0;
 	for_each_cpu(i)
-		sum += cpu_rq(i)->nr_uninterruptible;
+		sum += glob_task_nrs[i].nr_unint;
 
 	/*
 	 * Since we read the counters lockless, it might be slightly
@@ -1635,31 +2437,133 @@ unsigned long nr_uninterruptible(void)
 	return sum;
 }
 
+EXPORT_SYMBOL(nr_uninterruptible);
+
 unsigned long long nr_context_switches(void)
 {
-	unsigned long long i, sum = 0;
+	unsigned long long i, sum;
 
+	sum = 0;
 	for_each_cpu(i)
-		sum += cpu_rq(i)->nr_switches;
+		sum += glob_task_nrs[i].nr_switches;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
 
 	return sum;
 }
 
+EXPORT_SYMBOL(nr_context_switches);
+
 unsigned long nr_iowait(void)
 {
-	unsigned long i, sum = 0;
+	unsigned long i, sum;
 
+	sum = 0;
 	for_each_cpu(i)
-		sum += atomic_read(&cpu_rq(i)->nr_iowait);
+		sum += glob_task_nrs[i].nr_iowait;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
 
 	return sum;
 }
 
-#ifdef CONFIG_SMP
+EXPORT_SYMBOL(nr_iowait);
+
+unsigned long nr_stopped(void)
+{
+	unsigned long i, sum;
+
+	sum = 0;
+	for_each_cpu(i)
+		sum += glob_task_nrs[i].nr_stopped;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
+
+	return sum;
+}
+
+EXPORT_SYMBOL(nr_stopped);
+
+unsigned long nr_sleeping(void)
+{
+	unsigned long i, sum;
+
+	sum = 0;
+	for_each_cpu(i)
+		sum += glob_task_nrs[i].nr_sleeping;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
+
+	return sum;
+}
+
+EXPORT_SYMBOL(nr_sleeping);
+
+#ifdef CONFIG_VE
+unsigned long nr_running_ve(struct ve_struct *ve)
+{
+	int i;
+	long sum;
+	cpumask_t ve_cpus;
+
+	sum = 0;
+	ve_cpu_online_map(ve, &ve_cpus);
+	for_each_cpu_mask(i, ve_cpus)
+		sum += VE_CPU_STATS(ve, i)->nr_running;
+	return (unsigned long)(sum < 0 ? 0 : sum);
+}
+
+EXPORT_SYMBOL(nr_running_ve);
+
+unsigned long nr_uninterruptible_ve(struct ve_struct *ve)
+{
+	int i;
+	long sum;
+	cpumask_t ve_cpus;
+
+	sum = 0;
+	ve_cpu_online_map(ve, &ve_cpus);
+	for_each_cpu_mask(i, ve_cpus)
+		sum += VE_CPU_STATS(ve, i)->nr_unint;
+	return (unsigned long)(sum < 0 ? 0 : sum);
+}
+
+EXPORT_SYMBOL(nr_uninterruptible_ve);
+
+unsigned long nr_iowait_ve(struct ve_struct *ve)
+{
+	int i;
+	long sum;
+	cpumask_t ve_cpus;
+
+	sum = 0;
+	ve_cpu_online_map(ve, &ve_cpus);
+	for_each_cpu_mask(i, ve_cpus)
+		sum += VE_CPU_STATS(ve, i)->nr_iowait;
+	return (unsigned long)(sum < 0 ? 0 : sum);
+}
+
+EXPORT_SYMBOL(nr_iowait_ve);
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
+
+#ifdef CONFIG_SCHED_VCPU
+#define rq_compare(rq1, rq2)	(rq1 < rq2)
+#else
+#define rq_compare(rq1, rq2)	(rq1->cpu < rq2->cpu)
+#endif
 
 /*
  * double_rq_lock - safely lock two runqueues
  *
+ * We must take them in cpu order to match code in
+ * dependent_sleeper and wake_dependent_sleeper.
+ *
  * Note this does not disable interrupts like task_rq_lock,
  * you need to do so manually before calling.
  */
@@ -1671,7 +2575,7 @@ static void double_rq_lock(runqueue_t *r
 		spin_lock(&rq1->lock);
 		__acquire(rq2->lock);	/* Fake it out ;) */
 	} else {
-		if (rq1 < rq2) {
+		if (rq_compare(rq1, rq2)) {
 			spin_lock(&rq1->lock);
 			spin_lock(&rq2->lock);
 		} else {
@@ -1699,38 +2603,20 @@ static void double_rq_unlock(runqueue_t 
 }
 
 /*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
- */
-static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
-	__releases(this_rq->lock)
-	__acquires(busiest->lock)
-	__acquires(this_rq->lock)
-{
-	if (unlikely(!spin_trylock(&busiest->lock))) {
-		if (busiest < this_rq) {
-			spin_unlock(&this_rq->lock);
-			spin_lock(&busiest->lock);
-			spin_lock(&this_rq->lock);
-		} else
-			spin_lock(&busiest->lock);
-	}
-}
-
-/*
  * If dest_cpu is allowed for this process, migrate the task to it.
  * This is accomplished by forcing the cpu_allowed mask to only
  * allow dest_cpu, which will force the cpu onto dest_cpu.  Then
  * the cpu_allowed mask is restored.
  */
-static void sched_migrate_task(task_t *p, int dest_cpu)
+static void sched_migrate_task(task_t *p, vcpu_t dest_cpu)
 {
 	migration_req_t req;
 	runqueue_t *rq;
 	unsigned long flags;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpu_isset(dest_cpu, p->cpus_allowed)
-	    || unlikely(cpu_is_offline(dest_cpu)))
+	if (!vcpu_isset(dest_cpu, p->cpus_allowed)
+	    || unlikely(vcpu_is_offline(dest_cpu)))
 		goto out;
 
 	/* force the process onto the specified CPU */
@@ -1747,6 +2633,26 @@ static void sched_migrate_task(task_t *p
 out:
 	task_rq_unlock(rq, &flags);
 }
+#endif
+
+#ifdef CONFIG_SMP
+/*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
+	__releases(this_rq->lock)
+	__acquires(busiest->lock)
+	__acquires(this_rq->lock)
+{
+	if (unlikely(!spin_trylock(&busiest->lock))) {
+		if (rq_compare(busiest, this_rq)) {
+			spin_unlock(&this_rq->lock);
+			spin_lock(&busiest->lock);
+			spin_lock(&this_rq->lock);
+		} else
+			spin_lock(&busiest->lock);
+	}
+}
 
 /*
  * sched_exec - execve() is a valuable balancing opportunity, because at
@@ -1754,9 +2660,12 @@ out:
  */
 void sched_exec(void)
 {
-	int new_cpu, this_cpu = get_cpu();
+	vcpu_t new_cpu, this_cpu;
+
+	preempt_disable();
+	this_cpu = this_vcpu();
 	new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC);
-	put_cpu();
+	preempt_enable();
 	if (new_cpu != this_cpu)
 		sched_migrate_task(current, new_cpu);
 }
@@ -1767,12 +2676,24 @@ void sched_exec(void)
  */
 static
 void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
-	       runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
+	       runqueue_t *this_rq, prio_array_t *this_array, vcpu_t this_cpu)
 {
+	struct ve_struct *ve;
+	cycles_t cycles;
+
+	cycles = get_cycles();
+	ve = VE_TASK_INFO(p)->owner_env;
+
 	dequeue_task(p, src_array);
 	src_rq->nr_running--;
-	set_task_cpu(p, this_cpu);
+	ve_nr_running_dec(ve, task_cpu(p), cycles);
+	if (src_rq->nr_running == 0)
+		vcpu_detach(src_rq);
+	set_task_vcpu(p, this_cpu);
+	if (this_rq->nr_running == 0)
+		vcpu_attach(this_rq);
 	this_rq->nr_running++;
+	ve_nr_running_inc(ve, task_cpu(p), cycles);
 	enqueue_task(p, this_array);
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
 				+ this_rq->timestamp_last_tick;
@@ -1788,7 +2709,7 @@ void pull_task(runqueue_t *src_rq, prio_
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
 static
-int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
+int can_migrate_task(task_t *p, runqueue_t *rq, vcpu_t this_cpu,
 		     struct sched_domain *sd, enum idle_type idle,
 		     int *all_pinned)
 {
@@ -1798,7 +2719,7 @@ int can_migrate_task(task_t *p, runqueue
 	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
 	 * 3) are cache-hot on their current CPU.
 	 */
-	if (!cpu_isset(this_cpu, p->cpus_allowed))
+	if (!vcpu_isset(this_cpu, p->cpus_allowed))
 		return 0;
 	*all_pinned = 0;
 
@@ -1826,7 +2747,7 @@ int can_migrate_task(task_t *p, runqueue
  *
  * Called with both runqueues locked.
  */
-static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
+static int move_tasks(runqueue_t *this_rq, vcpu_t this_cpu, runqueue_t *busiest,
 		      unsigned long max_nr_move, struct sched_domain *sd,
 		      enum idle_type idle, int *all_pinned)
 {
@@ -1919,13 +2840,19 @@ out:
  * moved to restore balance via the imbalance parameter.
  */
 static struct sched_group *
-find_busiest_group(struct sched_domain *sd, int this_cpu,
+find_busiest_group(struct sched_domain *sd, vcpu_t this_cpu,
 		   unsigned long *imbalance, enum idle_type idle, int *sd_idle)
 {
 	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
 	unsigned long max_pull;
 	int load_idx;
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	int this_pcpu;
+
+	vsched = vcpu_vsched(this_cpu);
+	this_pcpu = vcpu_last_pcpu(this_cpu);
 
 	max_load = this_load = total_load = total_pwr = 0;
 	if (idle == NOT_IDLE)
@@ -1936,24 +2863,27 @@ find_busiest_group(struct sched_domain *
 		load_idx = sd->idle_idx;
 
 	do {
+		cpumask_t tmp;
 		unsigned long load;
 		int local_group;
 		int i;
 
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpu_isset(this_pcpu, group->cpumask);
 
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
+		cpus_and(tmp, group->cpumask, vsched_pcpu_running_map(vsched));
 
-		for_each_cpu_mask(i, group->cpumask) {
+		for_each_cpu_mask(i, tmp) {
+			vcpu = pcpu(i)->vcpu;
 			if (*sd_idle && !idle_cpu(i))
 				*sd_idle = 0;
 
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
-				load = target_load(i, load_idx);
+				load = target_load(vcpu, load_idx);
 			else
-				load = source_load(i, load_idx);
+				load = source_load(vcpu, load_idx);
 
 			avg_load += load;
 		}
@@ -1976,6 +2906,8 @@ find_busiest_group(struct sched_domain *
 
 	if (!busiest || this_load >= max_load || max_load <= SCHED_LOAD_SCALE)
 		goto out_balanced;
+	if (!this)
+		this = busiest; /* this->cpu_power is needed below */
 
 	avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
 
@@ -2058,25 +2990,57 @@ out_balanced:
 /*
  * find_busiest_queue - find the busiest runqueue among the cpus in group.
  */
-static runqueue_t *find_busiest_queue(struct sched_group *group,
+static vcpu_t find_busiest_queue(vcpu_t this_cpu, struct sched_group *group,
 	enum idle_type idle)
 {
 	unsigned long load, max_load = 0;
-	runqueue_t *busiest = NULL;
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu, busiest = NULL;
+	cpumask_t tmp;
 	int i;
 
+	vsched = vcpu_vsched(this_cpu);
 	for_each_cpu_mask(i, group->cpumask) {
-		load = source_load(i, 0);
+		vcpu = pcpu(i)->vcpu;
+		if (vcpu_vsched(vcpu) != vsched && idle != SCHED_IDLE)
+			continue;
+		load = source_load(vcpu, 0);
+		if (load > max_load) {
+			max_load = load;
+			busiest = vcpu;
+		}
+	}
 
+#ifdef CONFIG_SCHED_VCPU
+	cpus_andnot(tmp, vsched->vcpu_online_map, vsched->vcpu_running_map);
+	for_each_cpu_mask(i, tmp) {
+		vcpu = vsched_vcpu(vsched, i);
+		load = source_load(vcpu, 0);
 		if (load > max_load) {
 			max_load = load;
-			busiest = cpu_rq(i);
+			busiest = vcpu;
 		}
 	}
+#endif
 
 	return busiest;
 }
 
+#ifdef CONFIG_SCHED_VCPU
+vcpu_t find_idle_vcpu(struct vcpu_scheduler *vsched)
+{
+	vcpu_t vcpu;
+
+	vcpu = NULL;
+	spin_lock(&fairsched_lock);
+	if (!list_empty(&vsched->idle_list))
+		vcpu = list_entry(vsched->idle_list.next,
+				struct vcpu_info, list);
+	spin_unlock(&fairsched_lock);
+	return vcpu;
+}
+#endif
+
 /*
  * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but
  * so long as it is large enough.
@@ -2089,10 +3053,11 @@ static runqueue_t *find_busiest_queue(st
  *
  * Called with this_rq unlocked.
  */
-static int load_balance(int this_cpu, runqueue_t *this_rq,
+static int load_balance(vcpu_t this_cpu, runqueue_t *this_rq,
 			struct sched_domain *sd, enum idle_type idle)
 {
 	struct sched_group *group;
+	vcpu_t busiest_vcpu;
 	runqueue_t *busiest;
 	unsigned long imbalance;
 	int nr_moved, all_pinned = 0;
@@ -2110,13 +3075,24 @@ static int load_balance(int this_cpu, ru
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group, idle);
-	if (!busiest) {
+	busiest_vcpu = find_busiest_queue(this_cpu, group, idle);
+	if (!busiest_vcpu) {
 		schedstat_inc(sd, lb_nobusyq[idle]);
 		goto out_balanced;
 	}
 
-	BUG_ON(busiest == this_rq);
+#ifdef CONFIG_SCHED_VCPU
+	if (vcpu_vsched(this_cpu) != vcpu_vsched(busiest_vcpu)) {
+		this_cpu = find_idle_vcpu(vcpu_vsched(busiest_vcpu));
+		if (!this_cpu)
+			goto out_one_pinned;
+		this_rq = vcpu_rq(this_cpu);
+	}
+#endif
+	busiest = vcpu_rq(busiest_vcpu);
+
+	if (unlikely(busiest == this_rq))
+		goto out_balanced;
 
 	schedstat_add(sd, lb_imbalance[idle], imbalance);
 
@@ -2149,7 +3125,7 @@ static int load_balance(int this_cpu, ru
 			/* don't kick the migration_thread, if the curr
 			 * task on busiest cpu can't be moved to this_cpu
 			 */
-			if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+			if (!vcpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
 				spin_unlock(&busiest->lock);
 				all_pinned = 1;
 				goto out_one_pinned;
@@ -2214,11 +3190,12 @@ out_one_pinned:
  * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
  * this_rq is locked.
  */
-static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
+static int load_balance_newidle(vcpu_t this_cpu, runqueue_t *this_rq,
 				struct sched_domain *sd)
 {
 	struct sched_group *group;
-	runqueue_t *busiest = NULL;
+	runqueue_t *busiest;
+	vcpu_t busiest_vcpu;
 	unsigned long imbalance;
 	int nr_moved = 0;
 	int sd_idle = 0;
@@ -2233,13 +3210,12 @@ static int load_balance_newidle(int this
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group, NEWLY_IDLE);
-	if (!busiest) {
+	busiest_vcpu = find_busiest_queue(this_cpu, group, NEWLY_IDLE);
+	if (!busiest_vcpu || busiest_vcpu == this_cpu) {
 		schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
 		goto out_balanced;
 	}
-
-	BUG_ON(busiest == this_rq);
+	busiest = vcpu_rq(busiest_vcpu);
 
 	schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance);
 
@@ -2272,8 +3248,11 @@ out_balanced:
 /*
  * idle_balance is called by schedule() if this_cpu is about to become
  * idle. Attempts to pull tasks from other CPUs.
+ *
+ * Returns whether to continue with another runqueue
+ * instead of switching to idle.
  */
-static void idle_balance(int this_cpu, runqueue_t *this_rq)
+static int idle_balance(vcpu_t this_cpu, runqueue_t *this_rq)
 {
 	struct sched_domain *sd;
 
@@ -2281,10 +3260,11 @@ static void idle_balance(int this_cpu, r
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
 			if (load_balance_newidle(this_cpu, this_rq, sd)) {
 				/* We've pulled tasks over so stop searching */
-				break;
+				return 1;
 			}
 		}
 	}
+	return 0;
 }
 
 /*
@@ -2294,18 +3274,26 @@ static void idle_balance(int this_cpu, r
  * logical imbalances.
  *
  * Called with busiest_rq locked.
+ *
+ * In human terms: balancing of CPU load by moving tasks between CPUs is
+ * performed by 2 methods, push and pull.
+ * In certain places when CPU is found to be idle, it performs pull from busy
+ * CPU to current (idle) CPU.
+ * active_load_balance implements push method, with migration thread getting
+ * scheduled on a busy CPU (hence, making all running processes on this CPU sit
+ * in the queue) and selecting where to push and which task.
  */
-static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
+static void active_load_balance(runqueue_t *busiest_rq, vcpu_t busiest_cpu)
 {
 	struct sched_domain *sd;
 	runqueue_t *target_rq;
-	int target_cpu = busiest_rq->push_cpu;
+	vcpu_t target_cpu = busiest_rq->push_cpu;
 
 	if (busiest_rq->nr_running <= 1)
 		/* no task to move */
 		return;
 
-	target_rq = cpu_rq(target_cpu);
+	target_rq = vcpu_rq(target_cpu);
 
 	/*
 	 * This condition is "impossible", if it occurs
@@ -2317,10 +3305,17 @@ static void active_load_balance(runqueue
 	/* move a task from busiest_rq to target_rq */
 	double_lock_balance(busiest_rq, target_rq);
 
+	/*
+	 * Our main candidate where to push our tasks is busiest->push_cpu.
+	 * First, find the domain that spans over both that candidate CPU and
+	 * the current one.
+	 *
+	 * FIXME: make sure that push_cpu doesn't disappear before we get here.
+	 */
 	/* Search for an sd spanning us and the target CPU. */
 	for_each_domain(target_cpu, sd)
 		if ((sd->flags & SD_LOAD_BALANCE) &&
-			cpu_isset(busiest_cpu, sd->span))
+			cpu_isset(vcpu_last_pcpu(busiest_cpu), sd->span))
 				break;
 
 	if (unlikely(sd == NULL))
@@ -2346,31 +3341,17 @@ out:
  */
 
 /* Don't have all balancing operations going off at once */
-#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS)
+#define CPU_OFFSET(cpu) (HZ * (cpu) / NR_CPUS)
 
-static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
+static void rebalance_tick(vcpu_t this_cpu, runqueue_t *this_rq,
 			   enum idle_type idle)
 {
-	unsigned long old_load, this_load;
-	unsigned long j = jiffies + CPU_OFFSET(this_cpu);
+	unsigned long j;
 	struct sched_domain *sd;
-	int i;
 
-	this_load = this_rq->nr_running * SCHED_LOAD_SCALE;
 	/* Update our load */
-	for (i = 0; i < 3; i++) {
-		unsigned long new_load = this_load;
-		int scale = 1 << i;
-		old_load = this_rq->cpu_load[i];
-		/*
-		 * Round up the averaging division if load is increasing. This
-		 * prevents us from getting stuck on 9 if the load is 10, for
-		 * example.
-		 */
-		if (new_load > old_load)
-			new_load += scale-1;
-		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
-	}
+	update_rq_cpu_load(this_rq);
+	j = jiffies + CPU_OFFSET(smp_processor_id());
 
 	for_each_domain(this_cpu, sd) {
 		unsigned long interval;
@@ -2404,17 +3385,19 @@ static void rebalance_tick(int this_cpu,
 /*
  * on UP we do not need to balance between CPUs:
  */
-static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle)
+static inline void rebalance_tick(vcpu_t cpu, runqueue_t *rq, enum idle_type idle)
 {
 }
-static inline void idle_balance(int cpu, runqueue_t *rq)
+static inline void idle_balance(vcpu_t cpu, runqueue_t *rq)
 {
 }
 #endif
 
-static inline int wake_priority_sleeper(runqueue_t *rq)
+static inline int wake_priority_sleeper(runqueue_t *rq, task_t *idle)
 {
 	int ret = 0;
+#ifndef CONFIG_SCHED_VCPU
+	/* FIXME: can we implement SMT priority sleeping for this? */
 #ifdef CONFIG_SCHED_SMT
 	spin_lock(&rq->lock);
 	/*
@@ -2422,11 +3405,13 @@ static inline int wake_priority_sleeper(
 	 * reasons reschedule the idle task to see if it can now run.
 	 */
 	if (rq->nr_running) {
-		resched_task(rq->idle);
+		/* FIXME */
+		resched_task(idle);
 		ret = 1;
 	}
 	spin_unlock(&rq->lock);
 #endif
+#endif
 	return ret;
 }
 
@@ -2476,6 +3461,15 @@ unsigned long long current_sched_time(co
 			STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
 			((rq)->curr->static_prio > (rq)->best_expired_prio))
 
+#ifdef CONFIG_VE
+#define update_ve_cpu_time(p, time, tick)	do {		\
+		VE_CPU_STATS((p)->ve_task_info.owner_env,	\
+			task_cpu(p))->time += tick;		\
+	} while (0)
+#else
+#define update_ve_cpu_time(p, time, tick)	do { } while (0)
+#endif
+
 /*
  * Account user cpu time to a process.
  * @p: the process that the cpu time gets accounted to
@@ -2491,10 +3485,13 @@ void account_user_time(struct task_struc
 
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
-	if (TASK_NICE(p) > 0)
+	if (TASK_NICE(p) > 0) {
 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
-	else
+		update_ve_cpu_time(p, nice, tmp);
+	} else {
 		cpustat->user = cputime64_add(cpustat->user, tmp);
+		update_ve_cpu_time(p, user, tmp);
+	}
 }
 
 /*
@@ -2511,14 +3508,16 @@ void account_system_time(struct task_str
 	cputime64_t tmp;
 
 	p->stime = cputime_add(p->stime, cputime);
+	tmp = cputime_to_cputime64(cputime);
+
+	update_ve_cpu_time(p, system, tmp);
 
 	/* Add system time to cpustat. */
-	tmp = cputime_to_cputime64(cputime);
 	if (hardirq_count() - hardirq_offset)
 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
 	else if (softirq_count())
 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
-	else if (p != rq->idle)
+	else if (p != this_pcpu()->idle)
 		cpustat->system = cputime64_add(cpustat->system, tmp);
 	else if (atomic_read(&rq->nr_iowait) > 0)
 		cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
@@ -2539,7 +3538,7 @@ void account_steal_time(struct task_stru
 	cputime64_t tmp = cputime_to_cputime64(steal);
 	runqueue_t *rq = this_rq();
 
-	if (p == rq->idle) {
+	if (p == this_pcpu()->idle) {
 		p->stime = cputime_add(p->stime, steal);
 		if (atomic_read(&rq->nr_iowait) > 0)
 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
@@ -2559,18 +3558,23 @@ void account_steal_time(struct task_stru
 void scheduler_tick(void)
 {
 	int cpu = smp_processor_id();
-	runqueue_t *rq = this_rq();
+	vcpu_t vcpu;
+	runqueue_t *rq;
 	task_t *p = current;
 	unsigned long long now = sched_clock();
 
+	vcpu = this_vcpu();
+	rq = vcpu_rq(vcpu);
 	update_cpu_clock(p, rq, now);
 
 	rq->timestamp_last_tick = now;
 
-	if (p == rq->idle) {
-		if (wake_priority_sleeper(rq))
+	set_tsk_need_resched(p); //FIXME
+
+	if (p == pcpu(cpu)->idle) {
+		if (wake_priority_sleeper(rq, pcpu(cpu)->idle))
 			goto out;
-		rebalance_tick(cpu, rq, SCHED_IDLE);
+		rebalance_tick(vcpu, rq, SCHED_IDLE);
 		return;
 	}
 
@@ -2646,10 +3650,14 @@ void scheduler_tick(void)
 out_unlock:
 	spin_unlock(&rq->lock);
 out:
-	rebalance_tick(cpu, rq, NOT_IDLE);
+	rebalance_tick(vcpu, rq, NOT_IDLE);
 }
 
-#ifdef CONFIG_SCHED_SMT
+#if defined(CONFIG_SCHED_SMT) && !defined(CONFIG_SCHED_VCPU)
+/* FIXME: SMT scheduling
+ * rq->cpu is initialized with rq address if FAIRSCED is on
+ * this is not correct for SMT case
+ */
 static inline void wakeup_busy_runqueue(runqueue_t *rq)
 {
 	/* If an SMT runqueue is sleeping due to priority reasons wake it up */
@@ -2657,7 +3665,7 @@ static inline void wakeup_busy_runqueue(
 		resched_task(rq->idle);
 }
 
-static void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
+static void wake_sleeping_dependent(vcpu_t this_cpu)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	cpumask_t sibling_map;
@@ -2711,7 +3719,7 @@ static inline unsigned long smt_slice(ta
 	return p->time_slice * (100 - sd->per_cpu_gain) / 100;
 }
 
-static int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
+static int dependent_sleeper(vcpu_t this_cpu)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	cpumask_t sibling_map;
@@ -2812,11 +3820,11 @@ out_unlock:
 	return ret;
 }
 #else
-static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
+static inline void wake_sleeping_dependent(vcpu_t this_cpu)
 {
 }
 
-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
+static inline int dependent_sleeper(vcpu_t this_cpu)
 {
 	return 0;
 }
@@ -2866,7 +3874,9 @@ asmlinkage void __sched schedule(void)
 	struct list_head *queue;
 	unsigned long long now;
 	unsigned long run_time;
-	int cpu, idx, new_prio;
+	int idx, new_prio;
+	vcpu_t vcpu;
+	cycles_t cycles;
 
 	/*
 	 * Test if we are atomic.  Since do_exit() needs to call into
@@ -2888,13 +3898,14 @@ need_resched:
 	prev = current;
 	release_kernel_lock(prev);
 need_resched_nonpreemptible:
+	cycles = get_cycles();
 	rq = this_rq();
 
 	/*
 	 * The idle thread is not allowed to schedule!
 	 * Remove this check after it has been exercised a bit.
 	 */
-	if (unlikely(prev == rq->idle) && prev->state != TASK_RUNNING) {
+	if (unlikely(prev == this_pcpu()->idle) && prev->state != TASK_RUNNING) {
 		printk(KERN_ERR "bad: scheduling from the idle thread!\n");
 		dump_stack();
 	}
@@ -2932,25 +3943,35 @@ need_resched_nonpreemptible:
 		}
 	}
 
-	cpu = smp_processor_id();
+	prev->sleep_avg -= run_time;
+	if ((long)prev->sleep_avg <= 0)
+		prev->sleep_avg = 0;
+
+	vcpu = rq_vcpu(rq);
+	if (rq->nr_running &&
+	    jiffies - vcpu->start_time < msecs_to_jiffies(vcpu_timeslice))
+		goto same_vcpu;
+
+	if (unlikely(!rq->nr_running))
+		idle_balance(vcpu, rq);
+	vcpu = schedule_vcpu(vcpu, cycles);
+	rq = vcpu_rq(vcpu);
+
 	if (unlikely(!rq->nr_running)) {
 go_idle:
-		idle_balance(cpu, rq);
-		if (!rq->nr_running) {
-			next = rq->idle;
-			rq->expired_timestamp = 0;
-			wake_sleeping_dependent(cpu, rq);
-			/*
-			 * wake_sleeping_dependent() might have released
-			 * the runqueue, so break out if we got new
-			 * tasks meanwhile:
-			 */
-			if (!rq->nr_running)
-				goto switch_tasks;
-		}
+		next = this_pcpu()->idle;
+		rq->expired_timestamp = 0;
+		wake_sleeping_dependent(vcpu);
+		/*
+		 * wake_sleeping_dependent() might have released
+		 * the runqueue, so break out if we got new
+		 * tasks meanwhile:
+		 */
+		if (!rq->nr_running)
+			goto switch_tasks;
 	} else {
-		if (dependent_sleeper(cpu, rq)) {
-			next = rq->idle;
+		if (dependent_sleeper(vcpu)) {
+			next = this_pcpu()->idle;
 			goto switch_tasks;
 		}
 		/*
@@ -2962,6 +3983,7 @@ go_idle:
 			goto go_idle;
 	}
 
+same_vcpu:
 	array = rq->active;
 	if (unlikely(!array->nr_active)) {
 		/*
@@ -2998,28 +4020,50 @@ go_idle:
 			requeue_task(next, array);
 	}
 	next->activated = 0;
+
 switch_tasks:
-	if (next == rq->idle)
+	if (next == this_pcpu()->idle)
 		schedstat_inc(rq, sched_goidle);
 	prefetch(next);
 	prefetch_stack(next);
 	clear_tsk_need_resched(prev);
-	rcu_qsctr_inc(task_cpu(prev));
+	rcu_qsctr_inc(task_pcpu(prev));
 
 	update_cpu_clock(prev, rq, now);
 
-	prev->sleep_avg -= run_time;
-	if ((long)prev->sleep_avg <= 0)
-		prev->sleep_avg = 0;
+	/* updated w/o rq->lock, which is ok due to after-read-checks */
 	prev->timestamp = prev->last_ran = now;
 
 	sched_info_switch(prev, next);
 	if (likely(prev != next)) {
+		cycles_t cycles;
+
+		/* current physical CPU id should be valid after switch */
+		set_task_vcpu(next, vcpu);
+		set_task_pcpu(next, task_pcpu(prev));
+		cycles = get_cycles();
 		next->timestamp = now;
 		rq->nr_switches++;
+		glob_task_nrs[smp_processor_id()].nr_switches++;
 		rq->curr = next;
 		++*switch_count;
 
+#ifdef CONFIG_VE
+		prev->ve_task_info.sleep_stamp = cycles;
+		if (prev->state == TASK_RUNNING && prev != this_pcpu()->idle)
+			write_wakeup_stamp(prev, cycles);
+		update_sched_lat(next, cycles);
+
+		/* because next & prev are protected with
+		 * runqueue lock we may not worry about
+		 * wakeup_stamp and sched_time protection
+		 * (same thing in 'else' branch below)
+		 */
+		update_ve_task_info(prev, cycles);
+		next->ve_task_info.sched_time = cycles;
+		write_wakeup_stamp(next, 0);
+#endif
+
 		prepare_task_switch(rq, next);
 		prev = context_switch(rq, prev, next);
 		barrier();
@@ -3029,8 +4073,10 @@ switch_tasks:
 		 * frame will be invalid.
 		 */
 		finish_task_switch(this_rq(), prev);
-	} else
+	} else {
+		update_ve_task_info(prev, get_cycles());
 		spin_unlock_irq(&rq->lock);
+	}
 
 	prev = current;
 	if (unlikely(reacquire_kernel_lock(prev) < 0))
@@ -3565,27 +4611,9 @@ int task_prio(const task_t *p)
  */
 int task_nice(const task_t *p)
 {
-	return TASK_NICE(p);
-}
-EXPORT_SYMBOL_GPL(task_nice);
-
-/**
- * idle_cpu - is a given cpu idle currently?
- * @cpu: the processor in question.
- */
-int idle_cpu(int cpu)
-{
-	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
-}
-
-/**
- * idle_task - return the idle task for a given cpu.
- * @cpu: the processor in question.
- */
-task_t *idle_task(int cpu)
-{
-	return cpu_rq(cpu)->idle;
+	return TASK_NICE(p);
 }
+EXPORT_SYMBOL_GPL(task_nice);
 
 /**
  * find_process_by_pid - find a process with a matching PID value.
@@ -3593,7 +4621,7 @@ task_t *idle_task(int cpu)
  */
 static inline task_t *find_process_by_pid(pid_t pid)
 {
-	return pid ? find_task_by_pid(pid) : current;
+	return pid ? find_task_by_pid_ve(pid) : current;
 }
 
 /* Actually do priority change: must hold rq lock. */
@@ -3653,7 +4681,7 @@ recheck:
 	/*
 	 * Allow unprivileged RT tasks to decrease priority:
 	 */
-	if (!capable(CAP_SYS_NICE)) {
+	if (!capable(CAP_SYS_ADMIN)) {
 		/*
 		 * can't change policy, except between SCHED_NORMAL
 		 * and SCHED_BATCH:
@@ -4110,10 +5138,19 @@ EXPORT_SYMBOL(yield);
  */
 void __sched io_schedule(void)
 {
-	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+	struct runqueue *rq = this_rq();
+
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+	ve = current->ve_task_info.owner_env;
+#endif
 
 	atomic_inc(&rq->nr_iowait);
+	ve_nr_iowait_inc(ve, task_cpu(current));
+	nr_iowait_inc(smp_processor_id());
 	schedule();
+	nr_iowait_dec(smp_processor_id());
+	ve_nr_iowait_dec(ve, task_cpu(current));
 	atomic_dec(&rq->nr_iowait);
 }
 
@@ -4121,11 +5158,20 @@ EXPORT_SYMBOL(io_schedule);
 
 long __sched io_schedule_timeout(long timeout)
 {
-	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+	struct runqueue *rq = this_rq();
 	long ret;
 
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+	ve = current->ve_task_info.owner_env;
+#endif
+
 	atomic_inc(&rq->nr_iowait);
+	ve_nr_iowait_inc(ve, task_cpu(current));
+	nr_iowait_inc(smp_processor_id());
 	ret = schedule_timeout(timeout);
+	nr_iowait_dec(smp_processor_id());
+	ve_nr_iowait_dec(ve, task_cpu(current));
 	atomic_dec(&rq->nr_iowait);
 	return ret;
 }
@@ -4248,15 +5294,9 @@ static void show_task(task_t *p)
 	else
 		printk("?");
 #if (BITS_PER_LONG == 32)
-	if (state == TASK_RUNNING)
-		printk(" running ");
-	else
-		printk(" %08lX ", thread_saved_pc(p));
+	printk(" %08lX ", (unsigned long)p);
 #else
-	if (state == TASK_RUNNING)
-		printk("  running task   ");
-	else
-		printk(" %016lx ", thread_saved_pc(p));
+	printk(" %016lx ", (unsigned long)p);
 #endif
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	{
@@ -4295,26 +5335,41 @@ void show_state(void)
 #if (BITS_PER_LONG == 32)
 	printk("\n"
 	       "                                               sibling\n");
-	printk("  task             PC      pid father child younger older\n");
+	printk("  task       taskaddr      pid father child younger older\n");
 #else
 	printk("\n"
 	       "                                                       sibling\n");
-	printk("  task                 PC          pid father child younger older\n");
+	printk("  task           taskaddr          pid father child younger older\n");
 #endif
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		/*
 		 * reset the NMI-timeout, listing all files on a slow
 		 * console might take alot of time:
 		 */
 		touch_nmi_watchdog();
 		show_task(p);
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 	read_unlock(&tasklist_lock);
 	mutex_debug_show_all_locks();
 }
 
+static void init_boot_vcpus(long cpu)
+{
+	if (vsched_vcpu(&idle_vsched, cpu) != NULL)
+		return;
+
+	if (__add_vcpu(&idle_vsched, cpu) != 0)
+		panic("Can't create idle vcpu %ld\n", cpu);
+
+	/* Also create vcpu for default_vsched */
+	if (__add_vcpu(&default_vsched, cpu) != 0)
+		panic("Can't create default vcpu %ld\n", cpu);
+
+	cpu_set(cpu, idle_vsched.pcpu_running_map);
+}
+
 /**
  * init_idle - set up an idle thread for a given CPU
  * @idle: task in question
@@ -4325,22 +5380,47 @@ void show_state(void)
  */
 void __devinit init_idle(task_t *idle, int cpu)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	runqueue_t *rq;
 	unsigned long flags;
 
+#ifdef CONFIG_SCHED_VCPU
+	init_boot_vcpus(cpu);
+#endif
+	vsched = &idle_vsched;
+	vcpu = vsched_vcpu(vsched, cpu);
+	rq = vcpu_rq(vcpu);
+
 	idle->timestamp = sched_clock();
 	idle->sleep_avg = 0;
 	idle->array = NULL;
 	idle->prio = MAX_PRIO;
 	idle->state = TASK_RUNNING;
 	idle->cpus_allowed = cpumask_of_cpu(cpu);
+	set_task_vsched(idle, &idle_vsched);
 	set_task_cpu(idle, cpu);
 
 	spin_lock_irqsave(&rq->lock, flags);
-	rq->curr = rq->idle = idle;
+	pcpu(cpu)->idle = idle;
+	rq->curr = idle;
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	idle->oncpu = 1;
 #endif
+	set_task_pcpu(idle, cpu);
+	set_task_vsched(idle, vsched);
+	set_task_vcpu(idle, vcpu);
+#ifdef CONFIG_SCHED_VCPU
+	/* the following code is very close to vcpu_get */
+	spin_lock(&fairsched_lock);
+	pcpu(cpu)->vcpu = vcpu;
+	pcpu(cpu)->vsched = vcpu->vsched;
+	list_move_tail(&vcpu->list, &vsched->running_list);
+	__set_bit(cpu, vsched->vcpu_running_map.bits);
+	__set_bit(cpu, vsched->pcpu_running_map.bits);
+	vcpu->running = 1;
+	spin_unlock(&fairsched_lock);
+#endif
 	spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
@@ -4360,7 +5440,6 @@ void __devinit init_idle(task_t *idle, i
  */
 cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
 
-#ifdef CONFIG_SMP
 /*
  * This is how migration works:
  *
@@ -4377,6 +5456,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
  * 7) we wake up and the migration is done.
  */
 
+#ifdef CONFIG_SMP
 /*
  * Change a given task's CPU affinity. Migrate the thread to a
  * proper CPU and schedule it away if the CPU it's executing on
@@ -4392,9 +5472,11 @@ int set_cpus_allowed(task_t *p, cpumask_
 	int ret = 0;
 	migration_req_t req;
 	runqueue_t *rq;
+	struct vcpu_scheduler *vsched;
 
+	vsched = task_vsched(p);
 	rq = task_rq_lock(p, &flags);
-	if (!cpus_intersects(new_mask, cpu_online_map)) {
+	if (!cpus_intersects(new_mask, vsched_vcpu_online_map(vsched))) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -4404,7 +5486,8 @@ int set_cpus_allowed(task_t *p, cpumask_
 	if (cpu_isset(task_cpu(p), new_mask))
 		goto out;
 
-	if (migrate_task(p, any_online_cpu(new_mask), &req)) {
+	if (migrate_task(p, vsched_vcpu(vsched, any_online_cpu(new_mask)),
+								&req)) {
 		/* Need help from migration thread: drop lock and wait. */
 		task_rq_unlock(rq, &flags);
 		wake_up_process(rq->migration_thread);
@@ -4418,6 +5501,7 @@ out:
 }
 
 EXPORT_SYMBOL_GPL(set_cpus_allowed);
+#endif
 
 /*
  * Move (not current) task off this cpu, onto dest cpu.  We're doing
@@ -4428,25 +5512,30 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
  * So we race with normal scheduler movements, but that's OK, as long
  * as the task is no longer on this CPU.
  */
-static void __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+static void __migrate_task(struct task_struct *p, vcpu_t src_cpu, vcpu_t dest_cpu)
 {
 	runqueue_t *rq_dest, *rq_src;
 
-	if (unlikely(cpu_is_offline(dest_cpu)))
+	if (unlikely(vcpu_is_offline(dest_cpu)))
 		return;
 
-	rq_src = cpu_rq(src_cpu);
-	rq_dest = cpu_rq(dest_cpu);
+#ifdef CONFIG_SCHED_VCPU
+	BUG_ON(vcpu_vsched(src_cpu) == &idle_vsched);
+#endif
+	rq_src = vcpu_rq(src_cpu);
+	rq_dest = vcpu_rq(dest_cpu);
 
 	double_rq_lock(rq_src, rq_dest);
 	/* Already moved. */
-	if (task_cpu(p) != src_cpu)
+	if (task_vcpu(p) != src_cpu)
 		goto out;
 	/* Affinity changed (again). */
-	if (!cpu_isset(dest_cpu, p->cpus_allowed))
+	if (!vcpu_isset(dest_cpu, p->cpus_allowed))
 		goto out;
 
-	set_task_cpu(p, dest_cpu);
+	BUG_ON(task_running(rq_src, p));
+	set_task_vsched(p, vcpu_vsched(dest_cpu));
+	set_task_vcpu(p, dest_cpu);
 	if (p->array) {
 		/*
 		 * Sync timestamp with rq_dest's before activating.
@@ -4474,9 +5563,9 @@ out:
 static int migration_thread(void *data)
 {
 	runqueue_t *rq;
-	int cpu = (long)data;
+	vcpu_t cpu = (vcpu_t)data;
 
-	rq = cpu_rq(cpu);
+	rq = vcpu_rq(cpu);
 	BUG_ON(rq->migration_thread != current);
 
 	set_current_state(TASK_INTERRUPTIBLE);
@@ -4488,15 +5577,17 @@ static int migration_thread(void *data)
 
 		spin_lock_irq(&rq->lock);
 
-		if (cpu_is_offline(cpu)) {
+		if (vcpu_is_offline(cpu)) {
 			spin_unlock_irq(&rq->lock);
 			goto wait_to_die;
 		}
 
+#ifdef CONFIG_SMP
 		if (rq->active_balance) {
 			active_load_balance(rq, cpu);
 			rq->active_balance = 0;
 		}
+#endif
 
 		head = &rq->migration_queue;
 
@@ -4529,14 +5620,16 @@ wait_to_die:
 	return 0;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 /* Figure out where task on dead CPU should go, use force if neccessary. */
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
+static void move_task_off_dead_cpu(vcpu_t dead_cpu, struct task_struct *tsk)
 {
 	int dest_cpu;
+	struct vcpu_scheduler *vsched;
 	cpumask_t mask;
 
+#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_SCHED_VCPU)
 	/* On same node? */
+#error FIXME: wrong code
 	mask = node_to_cpumask(cpu_to_node(dead_cpu));
 	cpus_and(mask, mask, tsk->cpus_allowed);
 	dest_cpu = any_online_cpu(mask);
@@ -4560,9 +5653,20 @@ static void move_task_off_dead_cpu(int d
 			       "longer affine to cpu%d\n",
 			       tsk->pid, tsk->comm, dead_cpu);
 	}
-	__migrate_task(tsk, dead_cpu, dest_cpu);
+#elif defined(CONFIG_SCHED_VCPU) 
+	vsched = vcpu_vsched(dead_cpu);
+	mask = vsched_vcpu_online_map(vsched);
+	cpus_and(mask, mask, tsk->cpus_allowed);
+	dest_cpu = any_online_cpu(mask);
+
+	/* On any allowed CPU? */
+	if (dest_cpu == NR_CPUS)
+		dest_cpu = any_online_cpu(vsched_vcpu_online_map(vsched));
+#endif
+	__migrate_task(tsk, dead_cpu, vsched_vcpu(vsched, dest_cpu));
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * While a dead CPU has no uninterruptible tasks queued at this point,
  * it might still have a nonzero ->nr_uninterruptible counter, because
@@ -4582,25 +5686,30 @@ static void migrate_nr_uninterruptible(r
 	double_rq_unlock(rq_src, rq_dest);
 	local_irq_restore(flags);
 }
+#endif
 
 /* Run through task list and migrate tasks from the dead cpu. */
-static void migrate_live_tasks(int src_cpu)
+static void migrate_live_tasks(vcpu_t src_cpu)
 {
 	struct task_struct *tsk, *t;
 
+	BUG_ON(vcpu_isset(src_cpu, vsched_vcpu_online_map(vcpu_vsched(src_cpu))));
 	write_lock_irq(&tasklist_lock);
 
-	do_each_thread(t, tsk) {
+	do_each_thread_all(t, tsk) {
 		if (tsk == current)
 			continue;
+		if (tsk == vcpu_rq(src_cpu)->migration_thread)
+			continue;
 
-		if (task_cpu(tsk) == src_cpu)
+		if (task_vcpu(tsk) == src_cpu)
 			move_task_off_dead_cpu(src_cpu, tsk);
-	} while_each_thread(t, tsk);
+	} while_each_thread_all(t, tsk);
 
 	write_unlock_irq(&tasklist_lock);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /* Schedules idle task to be the next runnable task on current CPU.
  * It does so by boosting its priority to highest possible and adding it to
  * the _front_ of runqueue. Used by CPU offline code.
@@ -4622,6 +5731,9 @@ void sched_idle_next(void)
 
 	__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
 	/* Add idle task to _front_ of it's priority queue */
+#ifdef CONFIG_SCHED_VCPU
+#error "FIXME: VCPU vs. HOTPLUG: fix the code below"
+#endif
 	__activate_idle_task(p, rq);
 
 	spin_unlock_irqrestore(&rq->lock, flags);
@@ -4683,48 +5795,83 @@ static void migrate_dead_tasks(unsigned 
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static void migration_thread_bind(struct task_struct *k, vcpu_t cpu)
+{
+	BUG_ON(k->state != TASK_INTERRUPTIBLE);
+	/* Must have done schedule() in kthread() before we set_task_cpu */
+	wait_task_inactive(k);
+
+	set_task_vsched(k, vcpu_vsched(cpu));
+	set_task_vcpu(k, cpu);
+	k->cpus_allowed = cpumask_of_cpu(cpu->id);
+}
+
+static void migration_thread_stop(runqueue_t *rq)
+{
+	struct task_struct *thread;
+
+	thread = rq->migration_thread;
+	if (thread == NULL)
+		return;
+
+	get_task_struct(thread);
+	kthread_stop(thread);
+
+	/* We MUST ensure, that the do_exit of the migration thread is
+	 * completed and it will never scheduled again before vsched_destroy.
+	 * The task with flag PF_DEAD if unscheduled will never receive
+	 * CPU again. */
+	while (!(thread->flags & PF_DEAD) || task_running(rq, thread))
+		yield();
+	put_task_struct(thread);
+
+	rq->migration_thread = NULL;
+}
+
 /*
  * migration_call - callback that gets triggered when a CPU is added.
  * Here we can start up the necessary migration thread for the new CPU.
  */
-static int migration_call(struct notifier_block *nfb, unsigned long action,
+static int vmigration_call(struct notifier_block *nfb, unsigned long action,
 			  void *hcpu)
 {
-	int cpu = (long)hcpu;
+	vcpu_t cpu = (vcpu_t)hcpu;
 	struct task_struct *p;
 	struct runqueue *rq;
 	unsigned long flags;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-		p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
+		p = kthread_create(migration_thread, hcpu, "migration/%d/%d", 
+			vsched_id(vcpu_vsched(cpu)), cpu->id);
 		if (IS_ERR(p))
 			return NOTIFY_BAD;
 		p->flags |= PF_NOFREEZE;
-		kthread_bind(p, cpu);
-		/* Must be high prio: stop_machine expects to yield to it. */
+
+		migration_thread_bind(p, cpu);
 		rq = task_rq_lock(p, &flags);
+		/* Must be high prio: stop_machine expects to yield to it. */
 		__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
 		task_rq_unlock(rq, &flags);
-		cpu_rq(cpu)->migration_thread = p;
+		vcpu_rq(cpu)->migration_thread = p;
 		break;
 	case CPU_ONLINE:
 		/* Strictly unneccessary, as first user will wake it. */
-		wake_up_process(cpu_rq(cpu)->migration_thread);
+		wake_up_process(vcpu_rq(cpu)->migration_thread);
 		break;
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_SCHED_VCPU)
+#error "FIXME: CPU down code doesn't work yet with VCPUs"
+#endif
 	case CPU_UP_CANCELED:
 		/* Unbind it from offline cpu so it can run.  Fall thru. */
-		kthread_bind(cpu_rq(cpu)->migration_thread,
-			     any_online_cpu(cpu_online_map));
-		kthread_stop(cpu_rq(cpu)->migration_thread);
-		cpu_rq(cpu)->migration_thread = NULL;
+		migration_thread_bind(vcpu_rq(cpu)->migration_thread, this_vcpu());
+		migration_thread_stop(vcpu_rq(cpu));
 		break;
 	case CPU_DEAD:
 		migrate_live_tasks(cpu);
-		rq = cpu_rq(cpu);
-		kthread_stop(rq->migration_thread);
-		rq->migration_thread = NULL;
+		rq = vcpu_rq(cpu);
+		migration_thread_stop(rq);
+#ifdef CONFIG_HOTPLUG_CPU
 		/* Idle task back to normal (off runqueue, low prio) */
 		rq = task_rq_lock(rq->idle, &flags);
 		deactivate_task(rq->idle, rq);
@@ -4734,6 +5881,7 @@ static int migration_call(struct notifie
 		task_rq_unlock(rq, &flags);
 		migrate_nr_uninterruptible(rq);
 		BUG_ON(rq->nr_running != 0);
+#endif
 
 		/* No need to migrate the tasks: it was best-effort if
 		 * they didn't do lock_cpu_hotplug().  Just wake up
@@ -4748,11 +5896,19 @@ static int migration_call(struct notifie
 		}
 		spin_unlock_irq(&rq->lock);
 		break;
-#endif
 	}
 	return NOTIFY_OK;
 }
 
+static int migration_call(struct notifier_block *nfb, unsigned long action,
+			  void *hcpu)
+{
+	if (action == CPU_UP_PREPARE)
+		init_boot_vcpus((long)hcpu);
+	/* we need to translate pcpu to vcpu */
+	return vmigration_call(nfb, action, vsched_default_vcpu((long)hcpu));
+}
+
 /* Register at highest priority so that task migration (migrate_all_tasks)
  * happens before everything else.
  */
@@ -4770,7 +5926,6 @@ int __init migration_init(void)
 	register_cpu_notifier(&migration_notifier);
 	return 0;
 }
-#endif
 
 #ifdef CONFIG_SMP
 #undef SCHED_DOMAIN_DEBUG
@@ -4798,7 +5953,7 @@ static void sched_domain_debug(struct sc
 		printk(KERN_DEBUG);
 		for (i = 0; i < level + 1; i++)
 			printk(" ");
-		printk("domain %d: ", level);
+		printk("domain %d, flags %x: ", level, sd->flags);
 
 		if (!(sd->flags & SD_LOAD_BALANCE)) {
 			printk("does not load-balance\n");
@@ -4923,7 +6078,7 @@ static int sd_parent_degenerate(struct s
  */
 static void cpu_attach_domain(struct sched_domain *sd, int cpu)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	runqueue_t *rq = vcpu_rq(vsched_default_vcpu(cpu));
 	struct sched_domain *tmp;
 
 	/* Remove the sched domains which do not contribute to scheduling. */
@@ -4940,6 +6095,7 @@ static void cpu_attach_domain(struct sch
 
 	sched_domain_debug(sd, cpu);
 
+	rcu_assign_pointer(pcpu(cpu)->sd, sd);
 	rcu_assign_pointer(rq->sd, sd);
 }
 
@@ -5118,7 +6274,7 @@ static unsigned long domain_distance(int
 	unsigned long distance = 0;
 	struct sched_domain *sd;
 
-	for_each_domain(cpu1, sd) {
+	for_each_pdomain(pcpu(cpu1)->sd, sd) {
 		WARN_ON(!cpu_isset(cpu1, sd->span));
 		if (cpu_isset(cpu2, sd->span))
 			return distance;
@@ -5440,7 +6596,7 @@ static void calibrate_migration_costs(co
 	 */
 	for_each_cpu_mask(cpu, *cpu_map) {
 		distance = 0;
-		for_each_domain(cpu, sd) {
+		for_each_pdomain(pcpu(cpu)->sd, sd) {
 			sd->cache_hot_time = migration_cost[distance];
 			distance++;
 		}
@@ -6012,42 +7168,398 @@ int in_sched_functions(unsigned long add
 		&& addr < (unsigned long)__sched_text_end);
 }
 
-void __init sched_init(void)
+static void init_rq(struct runqueue *rq, int cpu)
+{
+	int j, k;
+	prio_array_t *array;
+
+	spin_lock_init(&rq->lock);
+	rq->nr_running = 0;
+	rq->active = rq->arrays;
+	rq->expired = rq->arrays + 1;
+	rq->best_expired_prio = MAX_PRIO;
+
+#ifdef CONFIG_SMP
+	rq->sd = NULL;
+	for (j = 0; j < 3; j++)
+		rq->cpu_load[j] = 0;
+	rq->active_balance = 0;
+#endif
+	rq->push_cpu = 0;
+	rq->migration_thread = NULL;
+	INIT_LIST_HEAD(&rq->migration_queue);
+	rq->cpu = cpu;
+	atomic_set(&rq->nr_iowait, 0);
+
+	for (j = 0; j < 2; j++) {
+		array = rq->arrays + j;
+		for (k = 0; k < MAX_PRIO; k++) {
+			INIT_LIST_HEAD(array->queue + k);
+			__clear_bit(k, array->bitmap);
+		}
+		// delimiter for bitsearch
+		__set_bit(MAX_PRIO, array->bitmap);
+	}
+}
+
+#if defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED)
+static void init_vcpu(vcpu_t vcpu, int id)
+{
+	memset(vcpu, 0, sizeof(struct vcpu_info));
+	vcpu->id = id;
+#ifdef CONFIG_SCHED_VCPU
+	vcpu->last_pcpu = id;
+#endif
+	init_rq(vcpu_rq(vcpu), id);
+}
+
+/* both rq and vsched lock should be taken */
+static void __install_vcpu(struct vcpu_scheduler *vsched, vcpu_t vcpu)
+{
+	int id;
+
+	id = vcpu->id;
+	vcpu->vsched = vsched;
+	vsched->vcpu[id] = vcpu;
+	vcpu->last_pcpu = id;
+	wmb();
+	/* FIXME: probably locking should be reworked, e.g.
+	   we don't have corresponding rmb(), so we need to update mask
+	   only after quiscent state */
+	/* init_boot_vcpu() should be remade if RCU is used here */
+	list_add(&vcpu->list, &vsched->idle_list);
+	cpu_set(id, vsched->vcpu_online_map);
+	vsched->num_online_vcpus++;
+}
+
+static int install_vcpu(vcpu_t vcpu, struct vcpu_scheduler *vsched)
 {
 	runqueue_t *rq;
-	int i, j, k;
+	unsigned long flags;
+	int res = 0;
 
-	for_each_cpu(i) {
-		prio_array_t *array;
+	rq = vcpu_rq(vcpu);
+	spin_lock_irqsave(&rq->lock, flags);
+	spin_lock(&fairsched_lock);
 
-		rq = cpu_rq(i);
-		spin_lock_init(&rq->lock);
-		rq->nr_running = 0;
-		rq->active = rq->arrays;
-		rq->expired = rq->arrays + 1;
-		rq->best_expired_prio = MAX_PRIO;
+	if (vsched->vcpu[vcpu->id] != NULL)
+		res = -EBUSY;
+	else
+		__install_vcpu(vsched, vcpu);
 
-#ifdef CONFIG_SMP
-		rq->sd = NULL;
-		for (j = 1; j < 3; j++)
-			rq->cpu_load[j] = 0;
-		rq->active_balance = 0;
-		rq->push_cpu = 0;
-		rq->migration_thread = NULL;
-		INIT_LIST_HEAD(&rq->migration_queue);
-#endif
-		atomic_set(&rq->nr_iowait, 0);
-
-		for (j = 0; j < 2; j++) {
-			array = rq->arrays + j;
-			for (k = 0; k < MAX_PRIO; k++) {
-				INIT_LIST_HEAD(array->queue + k);
-				__clear_bit(k, array->bitmap);
-			}
-			// delimiter for bitsearch
-			__set_bit(MAX_PRIO, array->bitmap);
+	spin_unlock(&fairsched_lock);
+	spin_unlock_irqrestore(&rq->lock, flags);
+	return res;
+}
+
+static int __add_vcpu(struct vcpu_scheduler *vsched, int id)
+{
+	vcpu_t vcpu;
+	int res;
+
+	res = -ENOMEM;
+	vcpu = kmalloc(sizeof(struct vcpu_info), GFP_KERNEL);
+	if (vcpu == NULL)
+		goto out;
+
+	init_vcpu(vcpu, id);
+	vcpu_rq(vcpu)->curr = this_pcpu()->idle;
+	res = install_vcpu(vcpu, vsched);
+	if (res < 0)
+		goto out_free;
+	return 0;
+
+out_free:
+	kfree(vcpu);
+out:
+	return res;
+}
+
+void vsched_init(struct vcpu_scheduler *vsched, int id)
+{
+	memset(vsched, 0, sizeof(*vsched));
+
+	INIT_LIST_HEAD(&vsched->idle_list);
+	INIT_LIST_HEAD(&vsched->active_list);
+	INIT_LIST_HEAD(&vsched->running_list);
+	vsched->num_online_vcpus = 0;
+	vsched->vcpu_online_map = CPU_MASK_NONE;
+	vsched->vcpu_running_map = CPU_MASK_NONE;
+	vsched->pcpu_running_map = CPU_MASK_NONE;
+	vsched->id = id;
+}
+
+#ifdef CONFIG_FAIRSCHED
+
+/* No locks supposed to be held */
+static void vsched_del_vcpu(vcpu_t vcpu);
+static int vsched_add_vcpu(struct vcpu_scheduler *vsched)
+{
+	int res, err;
+	vcpu_t vcpu;
+	int id;
+	static DECLARE_MUTEX(id_mutex);
+
+	down(&id_mutex);
+	id = find_first_zero_bit(vsched->vcpu_online_map.bits, NR_CPUS);
+	if (id >= NR_CPUS) {
+		err = -EBUSY;
+		goto out_up;
+	}
+
+	err = __add_vcpu(vsched, id);
+	if (err < 0)
+		goto out_up;
+
+	vcpu = vsched_vcpu(vsched, id);
+	err = -ENOMEM;
+
+	res = vmigration_call(&migration_notifier, CPU_UP_PREPARE, vcpu);
+	if (res != NOTIFY_OK)
+		goto out_del_up;
+
+	res = vmigration_call(&migration_notifier, CPU_ONLINE, vcpu);
+	if (res != NOTIFY_OK)
+		goto out_cancel_del_up;
+
+	err = 0;
+
+out_up:
+	up(&id_mutex);
+	return err;
+
+out_cancel_del_up:
+	vmigration_call(&migration_notifier, CPU_UP_CANCELED, vcpu);
+out_del_up:
+	vsched_del_vcpu(vcpu);
+	goto out_up;
+}
+
+static void vsched_del_vcpu(vcpu_t vcpu)
+{
+	struct vcpu_scheduler *vsched;
+	runqueue_t *rq;
+
+	vsched = vcpu_vsched(vcpu);
+	rq = vcpu_rq(vcpu);
+
+	spin_lock_irq(&rq->lock);
+	spin_lock(&fairsched_lock);
+	cpu_clear(vcpu->id, vsched->vcpu_online_map);
+	vsched->num_online_vcpus--;
+	spin_unlock(&fairsched_lock);
+	spin_unlock_irq(&rq->lock);
+
+	/*
+        * FIXME: ideas for VCPU hotplug:
+        *
+        * - push_cpu should be checked/cleanuped
+        * - serialization
+        */
+
+	/*
+	 * all tasks should migrate from this VCPU somewhere,
+	 * also, since this moment VCPU is offline, so migration_thread
+	 * won't accept any new tasks...
+	 */
+	vmigration_call(&migration_notifier, CPU_DEAD, vcpu);
+	BUG_ON(rq->nr_running != 0);
+
+	/* vcpu_put() is called after deactivate_task. This loop makes sure
+	 * that vcpu_put() was finished and vcpu can be freed */
+	while ((volatile int)vcpu->running)
+		yield();
+
+	BUG_ON(vcpu->active);	/* should be in idle_list */
+	BUG_ON(vcpu_rq(vcpu)->prev_mm != NULL);
+
+	spin_lock_irq(&fairsched_lock);
+	list_del(&vcpu->list);
+	vsched_vcpu(vsched, vcpu->id) = NULL;
+	spin_unlock_irq(&fairsched_lock);
+
+	kfree(vcpu);
+}
+
+int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched)
+{
+	vcpu_t dest_vcpu;
+	int id;
+	int res;
+
+	res = 0;
+	while(1) {
+		/* FIXME: we suppose here that vcpu can't dissapear on the fly */
+		for(id = first_cpu(vsched->vcpu_online_map); id < NR_CPUS; 
+		    id++) {
+			if ((vsched->vcpu[id] != NULL) && 
+			    !vcpu_isset(vsched->vcpu[id], p->cpus_allowed))
+				continue;
+			else
+				break;
+		}
+		if (id >= NR_CPUS) {
+			res = -EINVAL;
+			goto out;
+		}
+
+		dest_vcpu = vsched_vcpu(vsched, id);
+		while(1) {
+			sched_migrate_task(p, dest_vcpu);
+			if (task_vsched_id(p) == vsched_id(vsched))
+				goto out;
+			if (!vcpu_isset(vsched->vcpu[id], p->cpus_allowed))
+				break;
 		}
 	}
+out:
+	return res;
+}
+
+void vsched_fairsched_link(struct vcpu_scheduler *vsched,
+		struct fairsched_node *node)
+{
+	vsched->node = node;
+	node->vsched = vsched;
+}
+
+void vsched_fairsched_unlink(struct vcpu_scheduler *vsched,
+		struct fairsched_node *node)
+{
+	vsched->node = NULL;
+	node->vsched = NULL;
+}
+
+int vsched_create(int id, struct fairsched_node *node)
+{
+	struct vcpu_scheduler *vsched;
+	int i, res;
+
+	vsched = kmalloc(sizeof(*vsched), GFP_KERNEL);
+	if (vsched == NULL)
+		return -ENOMEM;
+
+	vsched_init(vsched, node->id);
+	vsched_fairsched_link(vsched, node);
+
+	for(i = 0; i < num_online_cpus(); i++) {
+		res = vsched_add_vcpu(vsched);
+		if (res < 0)
+			goto err_add;
+	}
+	return 0;
+
+err_add:
+	vsched_destroy(vsched);
+	return res;
+}
+
+int vsched_destroy(struct vcpu_scheduler *vsched)
+{
+	vcpu_t vcpu;
+
+	if (vsched == NULL)
+		return 0;
+
+	spin_lock_irq(&fairsched_lock);
+	while(1) {
+		if (!list_empty(&vsched->running_list))
+			vcpu = list_entry(vsched->running_list.next,
+						struct vcpu_info, list);
+		else if (!list_empty(&vsched->active_list))
+			vcpu = list_entry(vsched->active_list.next,
+						struct vcpu_info, list);
+		else if (!list_empty(&vsched->idle_list))
+			vcpu = list_entry(vsched->idle_list.next,
+						struct vcpu_info, list);
+		else
+			break;
+		spin_unlock_irq(&fairsched_lock);
+		vsched_del_vcpu(vcpu);
+		spin_lock_irq(&fairsched_lock);
+	}
+	if (vsched->num_online_vcpus)
+		goto err_busy;
+	spin_unlock_irq(&fairsched_lock);
+
+	vsched_fairsched_unlink(vsched, vsched->node);
+	kfree(vsched);
+	return 0;
+
+err_busy:
+	printk(KERN_ERR "BUG in vsched_destroy, vsched id %d\n",
+			vsched->id);
+	spin_unlock_irq(&fairsched_lock);
+	return -EBUSY;
+	
+}
+#endif /* defined(CONFIG_FAIRSCHED) */
+#endif /* defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED) */
+
+static void init_boot_vcpu(void)
+{
+	int res;
+
+	/*
+	 * We setup boot_vcpu and it's runqueue until init_idle() happens
+	 * on cpu0. This is required since timer interrupts can happen
+	 * between sched_init() and init_idle().
+	 */
+	init_vcpu(&boot_idle_vcpu, 0);
+	vcpu_rq(&boot_idle_vcpu)->curr = current;
+	res = install_vcpu(&boot_idle_vcpu, &idle_vsched);
+	if (res < 0)
+		panic("Can't install boot idle vcpu");
+
+	init_vcpu(&boot_vcpu, 0);
+	vcpu_rq(&boot_vcpu)->curr = current;
+	res = install_vcpu(&boot_vcpu, &default_vsched);
+	if (res < 0)
+		panic("Can't install boot vcpu");
+
+	this_pcpu()->vcpu = &boot_idle_vcpu;
+	this_pcpu()->vsched = &idle_vsched;
+}
+
+static void init_pcpu(int id)
+{
+	struct pcpu_info *pcpu;
+
+	pcpu = pcpu(id);
+	pcpu->id = id;
+#ifdef CONFIG_SMP
+	pcpu->sd = NULL;
+#endif
+
+#ifndef CONFIG_SCHED_VCPU
+	init_vcpu(vcpu(id), id);
+#endif
+}
+
+static void init_pcpus(void)
+{
+	int i;
+	for (i = 0; i < NR_CPUS; i++)
+		init_pcpu(i);
+}
+
+void __init sched_init(void)
+{
+	init_pcpus();
+#if defined(CONFIG_SCHED_VCPU)
+	vsched_init(&idle_vsched, -1);
+	vsched_init(&default_vsched, 0);
+#if defined(CONFIG_FAIRSCHED)
+	fairsched_init_early();
+	vsched_fairsched_link(&idle_vsched, &fairsched_idle_node);
+	vsched_fairsched_link(&default_vsched, &fairsched_init_node);
+#endif
+	init_boot_vcpu();
+#else
+#if defined(CONFIG_FAIRSCHED)
+	fairsched_init_early();
+#endif
+#endif
 
 	/*
 	 * The boot idle thread does lazy MMU switching as well:
@@ -6064,6 +7576,149 @@ void __init sched_init(void)
 	init_idle(current, smp_processor_id());
 }
 
+#ifdef CONFIG_SCHED_VCPU
+static void show_vcpu_list(struct vcpu_scheduler *vsched, struct list_head *lh)
+{
+	cpumask_t m;
+	vcpu_t vcpu;
+	int i;
+
+	cpus_clear(m);
+	list_for_each_entry(vcpu, lh, list)
+		cpu_set(vcpu->id, m);
+
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_isset(i, m))
+			printk("%d ", i);
+}
+
+#define PRINT(s, sz, fmt...)				\
+	do {						\
+		int __out;				\
+		__out = scnprintf(*s, *sz, fmt);	\
+		*s += __out;				\
+		*sz -= __out;				\
+	} while(0)
+
+static void show_rq_array(prio_array_t *array, char *header, char **s, int *sz)
+{
+	struct list_head *list;
+	task_t *p;
+	int k, h;
+
+	h = 0;
+	for (k = 0; k < MAX_PRIO; k++) {
+		list = array->queue + k;
+		if (list_empty(list))
+			continue;
+
+		if (!h) {
+			PRINT(s, sz, header);
+			h = 1;
+		}
+
+		PRINT(s, sz, " prio %d (", k);
+		list_for_each_entry(p, list, run_list)
+			PRINT(s, sz, "%s[%d] ", p->comm, p->pid);
+		PRINT(s, sz, ")");
+	}
+	if (h)
+		PRINT(s, sz, "\n");
+}
+
+static void show_vcpu(vcpu_t vcpu)
+{
+	runqueue_t *rq;
+	char buf[1024], *s;
+	unsigned long flags;
+	int sz;
+
+	if (vcpu == NULL)
+		return;
+
+	rq = vcpu_rq(vcpu);
+	spin_lock_irqsave(&rq->lock, flags);
+	printk("  vcpu %d: last_pcpu %d, state %s%s\n",
+			vcpu->id, vcpu->last_pcpu,
+			vcpu->active ? "A" : "",
+			vcpu->running ? "R" : "");
+
+	printk("    rq: running %lu, load {%lu,%lu,%lu}, sw %Lu, sd %p, curr %p\n",
+			rq->nr_running,
+#ifdef CONFIG_SMP
+			rq->cpu_load[0], rq->cpu_load[1], rq->cpu_load[2],
+#else
+			0LU, 0LU, 0LU,
+#endif
+			rq->nr_switches,
+#ifdef CONFIG_SMP
+			rq->sd,
+#else
+			NULL,
+#endif
+			rq->curr
+	      );
+
+	s = buf;
+	sz = sizeof(buf) - 1;
+
+	show_rq_array(rq->active, "      active:", &s, &sz);
+	show_rq_array(rq->expired, "      expired:", &s, &sz);
+	spin_unlock_irqrestore(&rq->lock, flags);
+
+	*s = 0;
+	printk(buf);
+}
+
+static inline void fairsched_show_node(struct vcpu_scheduler *vsched)
+{
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node;
+
+	node = vsched->node;
+	printk("fsnode: ready %d run %d cpu %d vsched %p, pcpu %d\n",
+			node->nr_ready, node->nr_runnable, node->nr_pcpu,
+			node->vsched, smp_processor_id());
+#endif
+}
+
+static void __show_vsched(struct vcpu_scheduler *vsched)
+{
+	char mask[NR_CPUS + 1];
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fairsched_lock, flags);
+	printk("vsched id=%d\n", vsched_id(vsched));
+	fairsched_show_node(vsched);
+
+	printk("  idle cpus ");
+	show_vcpu_list(vsched, &vsched->idle_list);
+	printk("; active cpus ");
+	show_vcpu_list(vsched, &vsched->active_list);
+	printk("; running cpus ");
+	show_vcpu_list(vsched, &vsched->running_list);
+	printk("\n");
+
+	cpumask_scnprintf(mask, NR_CPUS, vsched->vcpu_online_map);
+	printk("  num_online_cpus=%d, mask=%s (w=%d)\n",
+			vsched->num_online_vcpus, mask,
+			cpus_weight(vsched->vcpu_online_map));
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+
+	for (i = 0; i < NR_CPUS; i++)
+		show_vcpu(vsched->vcpu[i]);
+}
+
+void show_vsched(void)
+{
+	oops_in_progress = 1;
+	__show_vsched(&idle_vsched);
+	__show_vsched(&default_vsched);
+	oops_in_progress = 0;
+}
+#endif /* CONFIG_SCHED_VCPU */
+
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 void __might_sleep(char *file, int line)
 {
@@ -6095,7 +7750,7 @@ void normalize_rt_tasks(void)
 	runqueue_t *rq;
 
 	read_lock_irq(&tasklist_lock);
-	for_each_process (p) {
+	for_each_process_all (p) {
 		if (!rt_task(p))
 			continue;
 
@@ -6136,7 +7791,7 @@ void normalize_rt_tasks(void)
  */
 task_t *curr_task(int cpu)
 {
-	return cpu_curr(cpu);
+	return vcpu_rq(pcpu(cpu)->vcpu)->curr;
 }
 
 /**
@@ -6156,7 +7811,7 @@ task_t *curr_task(int cpu)
  */
 void set_curr_task(int cpu, task_t *p)
 {
-	cpu_curr(cpu) = p;
+	vcpu_rq(pcpu(cpu)->vcpu)->curr = p;
 }
 
 #endif
diff -uprN linux-2.6.16/kernel/signal.c linux-2.6.16.ovz/kernel/signal.c
--- linux-2.6.16/kernel/signal.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/signal.c	2006-07-05 08:34:56.000000000 -0400
@@ -25,17 +25,20 @@
 #include <linux/posix-timers.h>
 #include <linux/signal.h>
 #include <linux/audit.h>
+#include <linux/kmem_cache.h>
 #include <linux/capability.h>
 #include <asm/param.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/siginfo.h>
+#include <ub/ub_misc.h>
 
 /*
  * SLAB caches for signal bits.
  */
 
-static kmem_cache_t *sigqueue_cachep;
+kmem_cache_t *sigqueue_cachep;
+EXPORT_SYMBOL_GPL(sigqueue_cachep);
 
 /*
  * In POSIX a signal is sent either to a specific thread (Linux task)
@@ -221,6 +224,7 @@ fastcall void recalc_sigpending_tsk(stru
 	else
 		clear_tsk_thread_flag(t, TIF_SIGPENDING);
 }
+EXPORT_SYMBOL_GPL(recalc_sigpending_tsk);
 
 void recalc_sigpending(void)
 {
@@ -271,8 +275,13 @@ static struct sigqueue *__sigqueue_alloc
 	atomic_inc(&t->user->sigpending);
 	if (override_rlimit ||
 	    atomic_read(&t->user->sigpending) <=
-			t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
+			t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) {
 		q = kmem_cache_alloc(sigqueue_cachep, flags);
+		if (q && ub_siginfo_charge(q, get_task_ub(t))) {
+			kmem_cache_free(sigqueue_cachep, q);
+			q = NULL;
+		}
+	}
 	if (unlikely(q == NULL)) {
 		atomic_dec(&t->user->sigpending);
 	} else {
@@ -289,6 +298,7 @@ static void __sigqueue_free(struct sigqu
 		return;
 	atomic_dec(&q->user->sigpending);
 	free_uid(q->user);
+	ub_siginfo_uncharge(q);
 	kmem_cache_free(sigqueue_cachep, q);
 }
 
@@ -378,8 +388,11 @@ void __exit_signal(struct task_struct *t
 			wake_up_process(sig->group_exit_task);
 			sig->group_exit_task = NULL;
 		}
-		if (tsk == sig->curr_target)
+		if (tsk == sig->curr_target) {
 			sig->curr_target = next_thread(tsk);
+			if (tsk == sig->curr_target)
+				sig->curr_target = NULL;
+		}
 		tsk->signal = NULL;
 		/*
 		 * Accumulate here the counters for all threads but the
@@ -524,7 +537,16 @@ static int __dequeue_signal(struct sigpe
 {
 	int sig = 0;
 
-	sig = next_signal(pending, mask);
+	/* SIGKILL must have priority, otherwise it is quite easy
+	 * to create an unkillable process, sending sig < SIGKILL
+	 * to self */
+	if (unlikely(sigismember(&pending->signal, SIGKILL))) {
+		if (!sigismember(mask, SIGKILL))
+			sig = SIGKILL;
+	}
+
+	if (likely(!sig))
+		sig = next_signal(pending, mask);
 	if (sig) {
 		if (current->notifier) {
 			if (sigismember(current->notifier_mask, sig)) {
@@ -618,6 +640,7 @@ void signal_wake_up(struct task_struct *
 	if (!wake_up_state(t, mask))
 		kick_process(t);
 }
+EXPORT_SYMBOL_GPL(signal_wake_up);
 
 /*
  * Remove signals in mask from the pending set and queue.
@@ -838,7 +861,7 @@ static int send_signal(int sig, struct s
 			q->info.si_signo = sig;
 			q->info.si_errno = 0;
 			q->info.si_code = SI_USER;
-			q->info.si_pid = current->pid;
+			q->info.si_pid = virt_pid(current);
 			q->info.si_uid = current->uid;
 			break;
 		case (unsigned long) SEND_SIG_PRIV:
@@ -975,7 +998,6 @@ __group_complete_signal(int sig, struct 
 		if (t == NULL)
 			/* restart balancing at this thread */
 			t = p->signal->curr_target = p;
-		BUG_ON(t->tgid != p->tgid);
 
 		while (!wants_signal(sig, t)) {
 			t = next_thread(t);
@@ -1159,13 +1181,18 @@ int __kill_pg_info(int sig, struct sigin
 	if (pgrp <= 0)
 		return -EINVAL;
 
+	/* Use __vpid_to_pid(). This function is used under write_lock
+	 * tasklist_lock. */
+	if (is_virtual_pid(pgrp))
+		pgrp = __vpid_to_pid(pgrp);
+
 	success = 0;
 	retval = -ESRCH;
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		int err = group_send_sig_info(sig, info, p);
 		success |= !err;
 		retval = err;
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return success ? 0 : retval;
 }
 
@@ -1193,7 +1220,7 @@ kill_proc_info(int sig, struct siginfo *
 		read_lock(&tasklist_lock);
 		acquired_tasklist_lock = 1;
 	}
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	error = -ESRCH;
 	if (p)
 		error = group_send_sig_info(sig, info, p);
@@ -1214,7 +1241,7 @@ int kill_proc_info_as_uid(int sig, struc
 		return ret;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p) {
 		ret = -ESRCH;
 		goto out_unlock;
@@ -1253,8 +1280,8 @@ static int kill_something_info(int sig, 
 		struct task_struct * p;
 
 		read_lock(&tasklist_lock);
-		for_each_process(p) {
-			if (p->pid > 1 && p->tgid != current->tgid) {
+		for_each_process_ve(p) {
+			if (virt_pid(p) > 1 && p->tgid != current->tgid) {
 				int err = group_send_sig_info(sig, info, p);
 				++count;
 				if (err != -EPERM)
@@ -1562,9 +1589,17 @@ void do_notify_parent(struct task_struct
 	BUG_ON(!tsk->ptrace &&
 	       (tsk->group_leader != tsk || !thread_group_empty(tsk)));
 
+#ifdef CONFIG_VE
+	/* Allow to send only SIGCHLD from VE */
+	if (sig != SIGCHLD &&
+			tsk->ve_task_info.owner_env != 
+			tsk->parent->ve_task_info.owner_env)
+		sig = SIGCHLD;
+#endif
+
 	info.si_signo = sig;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	info.si_pid = get_task_pid_ve(tsk, tsk->parent->ve_task_info.owner_env);
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1629,7 +1664,7 @@ static void do_notify_parent_cldstop(str
 
 	info.si_signo = SIGCHLD;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(parent)->owner_env);
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1763,7 +1798,9 @@ finish_stop(int stop_count)
 	read_unlock(&tasklist_lock);
 
 out:
+	set_stop_state(current);
 	schedule();
+	clear_stop_state(current);
 	/*
 	 * Now we don't run again until continued.
 	 */
@@ -1940,11 +1977,13 @@ relock:
 			ptrace_signal_deliver(regs, cookie);
 
 			/* Let the debugger run.  */
+			set_pn_state(current, PN_STOP_SIGNAL);
 			ptrace_stop(signr, signr, info);
+			clear_pn_state(current);
 
-			/* We're back.  Did the debugger cancel the sig or group_exit? */
+			/* We're back.  Did the debugger cancel the sig?  */
 			signr = current->exit_code;
-			if (signr == 0 || current->signal->flags & SIGNAL_GROUP_EXIT)
+			if (signr == 0)
 				continue;
 
 			current->exit_code = 0;
@@ -1957,7 +1996,7 @@ relock:
 				info->si_signo = signr;
 				info->si_errno = 0;
 				info->si_code = SI_USER;
-				info->si_pid = current->parent->pid;
+				info->si_pid = virt_pid(current->parent);
 				info->si_uid = current->parent->uid;
 			}
 
@@ -1988,8 +2027,14 @@ relock:
 			continue;
 
 		/* Init gets no signals it doesn't want.  */
-		if (current->pid == 1)
+		if (virt_pid(current) == 1) {
+			/* Allow SIGKILL for non-root VE */
+#ifdef CONFIG_VE
+			if (current->pid == 1 ||
+			    signr != SIGKILL)
+#endif
 			continue;
+		}
 
 		if (sig_kernel_stop(signr)) {
 			/*
@@ -2307,7 +2352,6 @@ sys_rt_sigtimedwait(const sigset_t __use
 
 			timeout = schedule_timeout_interruptible(timeout);
 
-			try_to_freeze();
 			spin_lock_irq(&current->sighand->siglock);
 			sig = dequeue_signal(current, &these, &info);
 			current->blocked = current->real_blocked;
@@ -2340,7 +2384,7 @@ sys_kill(int pid, int sig)
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_USER;
-	info.si_pid = current->tgid;
+	info.si_pid = virt_tgid(current);
 	info.si_uid = current->uid;
 
 	return kill_something_info(sig, &info, pid);
@@ -2356,12 +2400,12 @@ static int do_tkill(int tgid, int pid, i
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_TKILL;
-	info.si_pid = current->tgid;
+	info.si_pid = virt_tgid(current);
 	info.si_uid = current->uid;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
-	if (p && (tgid <= 0 || p->tgid == tgid)) {
+	p = find_task_by_pid_ve(pid);
+	if (p && (tgid <= 0 || virt_tgid(p) == tgid)) {
 		error = check_kill_permission(sig, &info, p);
 		/*
 		 * The null signal is a permissions and process existence
diff -uprN linux-2.6.16/kernel/softirq.c linux-2.6.16.ovz/kernel/softirq.c
--- linux-2.6.16/kernel/softirq.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/softirq.c	2006-07-05 08:34:56.000000000 -0400
@@ -13,10 +13,13 @@
 #include <linux/mm.h>
 #include <linux/notifier.h>
 #include <linux/percpu.h>
+#include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/kthread.h>
 #include <linux/rcupdate.h>
 
+#include <ub/beancounter.h>
+
 #include <asm/irq.h>
 /*
    - No shared variables, all the data are CPU local.
@@ -44,6 +47,8 @@ EXPORT_SYMBOL(irq_stat);
 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
 
 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+static DEFINE_PER_CPU(struct task_struct *, ksoftirqd_wakeup);
+static int ksoftirqd_stat[NR_CPUS];
 
 /*
  * we cannot loop indefinitely here to avoid userspace starvation,
@@ -54,7 +59,7 @@ static DEFINE_PER_CPU(struct task_struct
 static inline void wakeup_softirqd(void)
 {
 	/* Interrupts are disabled: no need to stop preemption */
-	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
+	struct task_struct *tsk = __get_cpu_var(ksoftirqd_wakeup);
 
 	if (tsk && tsk->state != TASK_RUNNING)
 		wake_up_process(tsk);
@@ -73,10 +78,14 @@ static inline void wakeup_softirqd(void)
 
 asmlinkage void __do_softirq(void)
 {
+	struct user_beancounter *ub;
 	struct softirq_action *h;
 	__u32 pending;
 	int max_restart = MAX_SOFTIRQ_RESTART;
 	int cpu;
+	struct ve_struct *envid;
+
+	envid = set_exec_env(get_ve0());
 
 	pending = local_softirq_pending();
 
@@ -90,6 +99,7 @@ restart:
 
 	h = softirq_vec;
 
+	ub = set_exec_ub(get_ub0());
 	do {
 		if (pending & 1) {
 			h->action(h);
@@ -98,6 +108,7 @@ restart:
 		h++;
 		pending >>= 1;
 	} while (pending);
+	(void)set_exec_ub(ub);
 
 	local_irq_disable();
 
@@ -108,6 +119,7 @@ restart:
 	if (pending)
 		wakeup_softirqd();
 
+	(void)set_exec_env(envid);
 	__local_bh_enable();
 }
 
@@ -483,6 +495,52 @@ static int __devinit cpu_callback(struct
 	return NOTIFY_OK;
 }
 
+static int proc_ksoftirqd(ctl_table *ctl, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret, cpu;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	if (!write)
+		return ret;
+
+	for_each_online_cpu(cpu) {
+		per_cpu(ksoftirqd_wakeup, cpu) =
+			ksoftirqd_stat[cpu] ? per_cpu(ksoftirqd, cpu) : NULL;
+	}
+	return ret;
+}
+
+static int sysctl_ksoftirqd(ctl_table *table, int *name, int nlen,
+		void *oldval, size_t *oldlenp, void *newval, size_t newlen,
+		void **context)
+{
+	return -EINVAL;
+}
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 1246,
+		.procname	= "ksoftirqd",
+		.data		= ksoftirqd_stat,
+		.maxlen		= sizeof(ksoftirqd_stat),
+		.mode		= 0644,
+		.proc_handler	= &proc_ksoftirqd,
+		.strategy	= &sysctl_ksoftirqd
+	},
+	{0}
+};
+
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table
+	},
+	{0}
+};
+
 static struct notifier_block __devinitdata cpu_nfb = {
 	.notifier_call = cpu_callback
 };
@@ -493,5 +551,6 @@ __init int spawn_ksoftirqd(void)
 	cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 	register_cpu_notifier(&cpu_nfb);
+	register_sysctl_table(root_table, 0);
 	return 0;
 }
diff -uprN linux-2.6.16/kernel/stop_machine.c linux-2.6.16.ovz/kernel/stop_machine.c
--- linux-2.6.16/kernel/stop_machine.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/stop_machine.c	2006-07-05 08:34:56.000000000 -0400
@@ -96,7 +96,7 @@ static int stop_machine(void)
 	stopmachine_state = STOPMACHINE_WAIT;
 
 	for_each_online_cpu(i) {
-		if (i == raw_smp_processor_id())
+		if (i == task_cpu(current))
 			continue;
 		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
 		if (ret < 0)
@@ -178,7 +178,7 @@ struct task_struct *__stop_machine_run(i
 
 	/* If they don't care which CPU fn runs on, bind to any online one. */
 	if (cpu == NR_CPUS)
-		cpu = raw_smp_processor_id();
+		cpu = task_cpu(current);
 
 	p = kthread_create(do_stop, &smdata, "kstopmachine");
 	if (!IS_ERR(p)) {
diff -uprN linux-2.6.16/kernel/sys.c linux-2.6.16.ovz/kernel/sys.c
--- linux-2.6.16/kernel/sys.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/sys.c	2006-07-05 08:34:56.000000000 -0400
@@ -11,6 +11,7 @@
 #include <linux/mman.h>
 #include <linux/smp_lock.h>
 #include <linux/notifier.h>
+#include <linux/virtinfo.h>
 #include <linux/reboot.h>
 #include <linux/prctl.h>
 #include <linux/init.h>
@@ -236,6 +237,94 @@ int capable(int cap)
 EXPORT_SYMBOL(capable);
 #endif
 
+static DECLARE_MUTEX(virtinfo_sem);
+static struct vnotifier_block *virtinfo_chain[VIRT_TYPES];
+
+void virtinfo_notifier_register(int type, struct vnotifier_block *nb)
+{
+	struct vnotifier_block **p;
+
+	down(&virtinfo_sem);
+	for (p = &virtinfo_chain[type];
+	     *p != NULL && nb->priority < (*p)->priority;
+	     p = &(*p)->next);
+	nb->next = *p;
+	smp_wmb();
+	*p = nb;
+	up(&virtinfo_sem);
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_register);
+
+struct virtinfo_cnt_struct {
+	volatile unsigned long exit[NR_CPUS];
+	volatile unsigned long entry;
+};
+static DEFINE_PER_CPU(struct virtinfo_cnt_struct, virtcnt);
+
+void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb)
+{
+	struct vnotifier_block **p;
+	int entry_cpu, exit_cpu;
+	unsigned long cnt, ent;
+
+	down(&virtinfo_sem);
+	for (p = &virtinfo_chain[type]; *p != nb; p = &(*p)->next);
+	*p = nb->next;
+	smp_mb();
+
+	for_each_cpu_mask(entry_cpu, cpu_possible_map) {
+		while (1) {
+			cnt = 0;
+			for_each_cpu_mask(exit_cpu, cpu_possible_map)
+				cnt +=
+				    per_cpu(virtcnt, entry_cpu).exit[exit_cpu];
+			smp_rmb();
+			ent = per_cpu(virtcnt, entry_cpu).entry;
+			if (cnt == ent)
+				break;
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout(HZ / 100);
+		}
+	}
+	up(&virtinfo_sem);
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_unregister);
+
+int virtinfo_notifier_call(int type, unsigned long n, void *data)
+{
+	int ret;
+	int entry_cpu, exit_cpu;
+	struct vnotifier_block *nb;
+
+	entry_cpu = get_cpu();
+	per_cpu(virtcnt, entry_cpu).entry++;
+	smp_wmb();
+	put_cpu();
+
+	nb = virtinfo_chain[type];
+	ret = NOTIFY_DONE;
+	while (nb)
+	{
+		ret = nb->notifier_call(nb, n, data, ret);
+		if(ret & NOTIFY_STOP_MASK) {
+			ret &= ~NOTIFY_STOP_MASK;
+			break;
+		}
+		nb = nb->next;
+	}
+
+	exit_cpu = get_cpu();
+	smp_wmb();
+	per_cpu(virtcnt, entry_cpu).exit[exit_cpu]++;
+	put_cpu();
+
+	return ret;
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_call);
+
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
 	int no_nice;
@@ -281,17 +370,19 @@ asmlinkage long sys_setpriority(int whic
 	switch (which) {
 		case PRIO_PROCESS:
 			if (!who)
-				who = current->pid;
-			p = find_task_by_pid(who);
+				who = virt_pid(current);
+			p = find_task_by_pid_ve(who);
 			if (p)
 				error = set_one_prio(p, niceval, error);
 			break;
 		case PRIO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			else
+				who = vpid_to_pid(who);
+			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
 				error = set_one_prio(p, niceval, error);
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			user = current->user;
@@ -301,10 +392,10 @@ asmlinkage long sys_setpriority(int whic
 				if ((who != current->uid) && !(user = find_user(who)))
 					goto out_unlock;	/* No processes for this user */
 
-			do_each_thread(g, p)
+			do_each_thread_ve(g, p)
 				if (p->uid == who)
 					error = set_one_prio(p, niceval, error);
-			while_each_thread(g, p);
+			while_each_thread_ve(g, p);
 			if (who != current->uid)
 				free_uid(user);		/* For find_user() */
 			break;
@@ -334,8 +425,8 @@ asmlinkage long sys_getpriority(int whic
 	switch (which) {
 		case PRIO_PROCESS:
 			if (!who)
-				who = current->pid;
-			p = find_task_by_pid(who);
+				who = virt_pid(current);
+			p = find_task_by_pid_ve(who);
 			if (p) {
 				niceval = 20 - task_nice(p);
 				if (niceval > retval)
@@ -345,11 +436,13 @@ asmlinkage long sys_getpriority(int whic
 		case PRIO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			else
+				who = vpid_to_pid(who);
+			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
 				niceval = 20 - task_nice(p);
 				if (niceval > retval)
 					retval = niceval;
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			user = current->user;
@@ -359,13 +452,13 @@ asmlinkage long sys_getpriority(int whic
 				if ((who != current->uid) && !(user = find_user(who)))
 					goto out_unlock;	/* No processes for this user */
 
-			do_each_thread(g, p)
+			do_each_thread_ve(g, p)
 				if (p->uid == who) {
 					niceval = 20 - task_nice(p);
 					if (niceval > retval)
 						retval = niceval;
 				}
-			while_each_thread(g, p);
+			while_each_thread_ve(g, p);
 			if (who != current->uid)
 				free_uid(user);		/* for find_user() */
 			break;
@@ -497,6 +590,35 @@ asmlinkage long sys_reboot(int magic1, i
 	                magic2 != LINUX_REBOOT_MAGIC2C))
 		return -EINVAL;
 
+#ifdef CONFIG_VE
+	if (!ve_is_super(get_exec_env()))
+		switch (cmd) {
+		case LINUX_REBOOT_CMD_RESTART:
+		case LINUX_REBOOT_CMD_HALT:
+		case LINUX_REBOOT_CMD_POWER_OFF:
+		case LINUX_REBOOT_CMD_RESTART2: {
+				struct siginfo info;
+
+				info.si_errno = 0;
+				info.si_code = SI_KERNEL;
+				info.si_pid = virt_pid(current);
+				info.si_uid = current->uid;
+				info.si_signo = SIGKILL;
+
+				/* Sending to real init is safe */
+				send_sig_info(SIGKILL, &info,
+						get_exec_env()->init_entry);
+			}
+
+		case LINUX_REBOOT_CMD_CAD_ON:
+		case LINUX_REBOOT_CMD_CAD_OFF:
+			return 0;
+
+		default:
+			return -EINVAL;
+		}
+#endif
+
 	/* Instead of trying to make the power_off code look like
 	 * halt when pm_power_off is not set do it the easy way.
 	 */
@@ -686,7 +808,7 @@ asmlinkage long sys_setgid(gid_t gid)
 	return 0;
 }
   
-static int set_user(uid_t new_ruid, int dumpclear)
+int set_user(uid_t new_ruid, int dumpclear)
 {
 	struct user_struct *new_user;
 
@@ -711,6 +833,7 @@ static int set_user(uid_t new_ruid, int 
 	current->uid = new_ruid;
 	return 0;
 }
+EXPORT_SYMBOL(set_user);
 
 /*
  * Unprivileged users may change the real uid to the effective uid
@@ -1079,7 +1202,12 @@ asmlinkage long sys_times(struct tms __u
 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 			return -EFAULT;
 	}
+#ifndef CONFIG_VE
 	return (long) jiffies_64_to_clock_t(get_jiffies_64());
+#else
+	return (long) jiffies_64_to_clock_t(get_jiffies_64() -
+			get_exec_env()->start_jiffies);
+#endif
 }
 
 /*
@@ -1100,21 +1228,24 @@ asmlinkage long sys_setpgid(pid_t pid, p
 	struct task_struct *p;
 	struct task_struct *group_leader = current->group_leader;
 	int err = -EINVAL;
+	int _pgid;
 
 	if (!pid)
-		pid = group_leader->pid;
+		pid = virt_pid(group_leader);
 	if (!pgid)
 		pgid = pid;
 	if (pgid < 0)
 		return -EINVAL;
 
+	_pgid = vpid_to_pid(pgid);
+
 	/* From this point forward we keep holding onto the tasklist lock
 	 * so that our parent does not change from under us. -DaveM
 	 */
 	write_lock_irq(&tasklist_lock);
 
 	err = -ESRCH;
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p)
 		goto out;
 
@@ -1139,25 +1270,35 @@ asmlinkage long sys_setpgid(pid_t pid, p
 	if (p->signal->leader)
 		goto out;
 
-	if (pgid != pid) {
+	pgid = virt_pid(p);
+	if (_pgid != p->pid) {
 		struct task_struct *p;
 
-		do_each_task_pid(pgid, PIDTYPE_PGID, p) {
-			if (p->signal->session == group_leader->signal->session)
+		do_each_task_pid_ve(_pgid, PIDTYPE_PGID, p) {
+			if (p->signal->session == group_leader->signal->session) {
+				pgid = virt_pgid(p);
 				goto ok_pgid;
-		} while_each_task_pid(pgid, PIDTYPE_PGID, p);
+			}
+		} while_each_task_pid_ve(_pgid, PIDTYPE_PGID, p);
 		goto out;
 	}
 
 ok_pgid:
-	err = security_task_setpgid(p, pgid);
+	err = security_task_setpgid(p, _pgid);
 	if (err)
 		goto out;
 
-	if (process_group(p) != pgid) {
+	if (process_group(p) != _pgid) {
 		detach_pid(p, PIDTYPE_PGID);
-		p->signal->pgrp = pgid;
-		attach_pid(p, PIDTYPE_PGID, pgid);
+		p->signal->pgrp = _pgid;
+		set_virt_pgid(p, pgid);
+		attach_pid(p, PIDTYPE_PGID, _pgid);
+		if (atomic_read(&p->signal->count) != 1) {
+			task_t *t;
+			for (t = next_thread(p); t != p; t = next_thread(t)) {
+				set_virt_pgid(t, pgid);
+			}
+		}
 	}
 
 	err = 0;
@@ -1170,19 +1311,19 @@ out:
 asmlinkage long sys_getpgid(pid_t pid)
 {
 	if (!pid) {
-		return process_group(current);
+		return virt_pgid(current);
 	} else {
 		int retval;
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		retval = -ESRCH;
 		if (p) {
 			retval = security_task_getpgid(p);
 			if (!retval)
-				retval = process_group(p);
+				retval = virt_pgid(p);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1194,7 +1335,7 @@ asmlinkage long sys_getpgid(pid_t pid)
 asmlinkage long sys_getpgrp(void)
 {
 	/* SMP - assuming writes are word atomic this is fine */
-	return process_group(current);
+	return virt_pgid(current);
 }
 
 #endif
@@ -1202,19 +1343,19 @@ asmlinkage long sys_getpgrp(void)
 asmlinkage long sys_getsid(pid_t pid)
 {
 	if (!pid) {
-		return current->signal->session;
+		return virt_sid(current);
 	} else {
 		int retval;
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		retval = -ESRCH;
 		if(p) {
 			retval = security_task_getsid(p);
 			if (!retval)
-				retval = p->signal->session;
+				retval = virt_sid(p);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1236,9 +1377,20 @@ asmlinkage long sys_setsid(void)
 
 	group_leader->signal->leader = 1;
 	__set_special_pids(group_leader->pid, group_leader->pid);
+	set_virt_pgid(group_leader, virt_pid(group_leader));
+	set_virt_sid(group_leader, virt_pid(group_leader));
 	group_leader->signal->tty = NULL;
 	group_leader->signal->tty_old_pgrp = 0;
-	err = process_group(group_leader);
+	if (atomic_read(&group_leader->signal->count) != 1) {
+		task_t *t;
+		for (t = next_thread(group_leader); t != group_leader;
+					t = next_thread(t)) {
+			set_virt_pgid(t, virt_pid(group_leader));
+			set_virt_sid(t, virt_pid(group_leader));
+		}
+	}
+
+	err = virt_pgid(group_leader);
 out:
 	write_unlock_irq(&tasklist_lock);
 	up(&tty_sem);
@@ -1518,7 +1670,7 @@ asmlinkage long sys_newuname(struct new_
 	int errno = 0;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name,&system_utsname,sizeof *name))
+	if (copy_to_user(name,&ve_utsname,sizeof *name))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
@@ -1529,15 +1681,15 @@ asmlinkage long sys_sethostname(char __u
 	int errno;
 	char tmp[__NEW_UTS_LEN];
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	if (len < 0 || len > __NEW_UTS_LEN)
 		return -EINVAL;
 	down_write(&uts_sem);
 	errno = -EFAULT;
 	if (!copy_from_user(tmp, name, len)) {
-		memcpy(system_utsname.nodename, tmp, len);
-		system_utsname.nodename[len] = 0;
+		memcpy(ve_utsname.nodename, tmp, len);
+		ve_utsname.nodename[len] = 0;
 		errno = 0;
 	}
 	up_write(&uts_sem);
@@ -1553,11 +1705,11 @@ asmlinkage long sys_gethostname(char __u
 	if (len < 0)
 		return -EINVAL;
 	down_read(&uts_sem);
-	i = 1 + strlen(system_utsname.nodename);
+	i = 1 + strlen(ve_utsname.nodename);
 	if (i > len)
 		i = len;
 	errno = 0;
-	if (copy_to_user(name, system_utsname.nodename, i))
+	if (copy_to_user(name, ve_utsname.nodename, i))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
@@ -1574,7 +1726,7 @@ asmlinkage long sys_setdomainname(char _
 	int errno;
 	char tmp[__NEW_UTS_LEN];
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	if (len < 0 || len > __NEW_UTS_LEN)
 		return -EINVAL;
@@ -1582,8 +1734,8 @@ asmlinkage long sys_setdomainname(char _
 	down_write(&uts_sem);
 	errno = -EFAULT;
 	if (!copy_from_user(tmp, name, len)) {
-		memcpy(system_utsname.domainname, tmp, len);
-		system_utsname.domainname[len] = 0;
+		memcpy(ve_utsname.domainname, tmp, len);
+		ve_utsname.domainname[len] = 0;
 		errno = 0;
 	}
 	up_write(&uts_sem);
@@ -1657,7 +1809,19 @@ asmlinkage long sys_setrlimit(unsigned i
 	    (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
 	     new_rlim.rlim_cur <= cputime_to_secs(
 		     current->signal->it_prof_expires))) {
-		cputime_t cputime = secs_to_cputime(new_rlim.rlim_cur);
+		unsigned long rlim_cur = new_rlim.rlim_cur;
+		cputime_t cputime;
+
+		if (rlim_cur == 0) {
+			/*
+			 * The caller is asking for an immediate RLIMIT_CPU
+			 * expiry.  But we use the zero value to mean "it was
+			 * never set".  So let's cheat and make it one second
+			 * instead
+			 */
+			rlim_cur = 1;
+		}
+		cputime = secs_to_cputime(rlim_cur);
 		read_lock(&tasklist_lock);
 		spin_lock_irq(&current->sighand->siglock);
 		set_process_cpu_timer(current, CPUCLOCK_PROF,
diff -uprN linux-2.6.16/kernel/sysctl.c linux-2.6.16.ovz/kernel/sysctl.c
--- linux-2.6.16/kernel/sysctl.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/sysctl.c	2006-07-05 08:34:56.000000000 -0400
@@ -25,6 +25,8 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
+#include <linux/ve_owner.h>
+#include <linux/ve.h>
 #include <linux/capability.h>
 #include <linux/ctype.h>
 #include <linux/utsname.h>
@@ -63,6 +65,7 @@ extern int max_threads;
 extern int sysrq_enabled;
 extern int core_uses_pid;
 extern int suid_dumpable;
+extern int sysctl_at_vsyscall;
 extern char core_pattern[];
 extern int cad_pid;
 extern int pid_max;
@@ -72,6 +75,12 @@ extern int printk_ratelimit_burst;
 extern int pid_max_min, pid_max_max;
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
+#ifdef CONFIG_VE
+int glob_virt_pids = 1;
+EXPORT_SYMBOL(glob_virt_pids);
+#endif
+
+extern int ve_area_access_check; /* fs/namei.c */
 
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 int unknown_nmi_panic;
@@ -101,6 +110,10 @@ extern int msg_ctlmnb;
 extern int msg_ctlmni;
 extern int sem_ctls[];
 #endif
+#ifdef CONFIG_SCHED_VCPU
+extern u32 vcpu_sched_timeslice;
+extern u32 vcpu_timeslice;
+#endif
 
 #ifdef __sparc__
 extern char reboot_command [];
@@ -108,6 +121,8 @@ extern int stop_a_enabled;
 extern int scons_pwroff;
 #endif
 
+extern int alloc_fail_warn;
+
 #ifdef __hppa__
 extern int pwrsw_enabled;
 extern int unaligned_enabled;
@@ -122,6 +137,7 @@ extern int spin_retry;
 #endif
 
 extern int sysctl_hz_timer;
+int decode_call_traces = 1;
 
 #ifdef CONFIG_BSD_PROCESS_ACCT
 extern int acct_parm[];
@@ -131,10 +147,14 @@ extern int acct_parm[];
 extern int no_unaligned_warning;
 #endif
 
+#ifdef CONFIG_FAIRSCHED
+extern int fairsched_max_latency;
+int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
+		        void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
 		       ctl_table *, void **);
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
-		  void __user *buffer, size_t *lenp, loff_t *ppos);
 
 static ctl_table root_table[];
 static struct ctl_table_header root_table_header =
@@ -178,6 +198,8 @@ static void register_proc_table(ctl_tabl
 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
 #endif
 
+extern struct new_utsname virt_utsname;
+
 /* The default sysctl tables: */
 
 static ctl_table root_table[] = {
@@ -276,6 +298,15 @@ static ctl_table kern_table[] = {
 		.strategy	= &sysctl_string,
 	},
 	{
+		.ctl_name	= KERN_VIRT_OSRELEASE,
+		.procname	= "virt_osrelease",
+		.data		= virt_utsname.release,
+		.maxlen		= sizeof(virt_utsname.release),
+		.mode		= 0644,
+		.proc_handler	= &proc_doutsstring,
+		.strategy	= &sysctl_string,
+	},
+	{
 		.ctl_name	= KERN_PANIC,
 		.procname	= "panic",
 		.data		= &panic_timeout,
@@ -353,6 +384,22 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+	{
+		.ctl_name	= KERN_SILENCE_LEVEL,
+		.procname	= "silence-level",
+		.data		= &console_silence_loglevel,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= KERN_ALLOC_FAIL_WARN,
+		.procname	= "alloc_fail_warn",
+		.data		= &alloc_fail_warn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 #ifdef __hppa__
 	{
 		.ctl_name	= KERN_HPPA_PWRSW,
@@ -579,6 +626,24 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_SCHED_VCPU
+	{
+		.ctl_name	= KERN_VCPU_SCHED_TIMESLICE,
+		.procname	= "vcpu_sched_timeslice",
+		.data		= &vcpu_sched_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_VCPU_TIMESLICE,
+		.procname	= "vcpu_timeslice",
+		.data		= &vcpu_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{
 		.ctl_name	= KERN_PIDMAX,
 		.procname	= "pid_max",
@@ -590,6 +655,16 @@ static ctl_table kern_table[] = {
 		.extra1		= &pid_max_min,
 		.extra2		= &pid_max_max,
 	},
+#ifdef CONFIG_VE
+	{
+		.ctl_name	= KERN_VIRT_PIDS,
+		.procname	= "virt_pids",
+		.data		= &glob_virt_pids,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{
 		.ctl_name	= KERN_PANIC_ON_OOPS,
 		.procname	= "panic_on_oops",
@@ -683,6 +758,16 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_FAIRSCHED
+	{
+		.ctl_name	= KERN_FAIRSCHED_MAX_LATENCY,
+		.procname	= "fairsched-max-latency",
+		.data		=  &fairsched_max_latency,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &fsch_sysctl_latency
+	},
+#endif
 	{ .ctl_name = 0 }
 };
 
@@ -1046,10 +1131,26 @@ static ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= FS_AT_VSYSCALL,
+		.procname	= "vsyscall",
+		.data		= &sysctl_at_vsyscall,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 	{ .ctl_name = 0 }
 };
 
 static ctl_table debug_table[] = {
+	{
+		.ctl_name	= DBG_DECODE_CALLTRACES,
+		.procname	= "decode_call_traces",
+		.data		= &decode_call_traces,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 	{ .ctl_name = 0 }
 };
 
@@ -1113,6 +1214,7 @@ int do_sysctl(int __user *name, int nlen
 {
 	struct list_head *tmp;
 	int error = -ENOTDIR;
+	struct ve_struct *ve;
 
 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
 		return -ENOTDIR;
@@ -1121,13 +1223,24 @@ int do_sysctl(int __user *name, int nlen
 		if (!oldlenp || get_user(old_len, oldlenp))
 			return -EFAULT;
 	}
+	ve = get_exec_env();
 	spin_lock(&sysctl_lock);
+#ifdef CONFIG_VE
+	tmp = ve->sysctl_lh.next;
+#else
 	tmp = &root_table_header.ctl_entry;
+#endif
 	do {
-		struct ctl_table_header *head =
-			list_entry(tmp, struct ctl_table_header, ctl_entry);
+		struct ctl_table_header *head;
 		void *context = NULL;
 
+#ifdef CONFIG_VE
+		if (tmp == &ve->sysctl_lh)
+			/* second pass over global variables */
+			tmp = &root_table_header.ctl_entry;
+#endif
+
+		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
 		if (!use_table(head))
 			continue;
 
@@ -1181,10 +1294,14 @@ static int test_perm(int mode, int op)
 static inline int ctl_perm(ctl_table *table, int op)
 {
 	int error;
+	int mode = table->mode;
+
 	error = security_sysctl(table, op);
 	if (error)
 		return error;
-	return test_perm(table->mode, op);
+	if (!ve_accessible(table->owner_env, get_exec_env()))
+		mode &= ~0222; /* disable write access */
+	return test_perm(mode, op);
 }
 
 static int parse_table(int __user *name, int nlen,
@@ -1350,6 +1467,8 @@ struct ctl_table_header *register_sysctl
 					       int insert_at_head)
 {
 	struct ctl_table_header *tmp;
+	struct list_head *lh;
+
 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
 	if (!tmp)
 		return NULL;
@@ -1358,17 +1477,52 @@ struct ctl_table_header *register_sysctl
 	tmp->used = 0;
 	tmp->unregistering = NULL;
 	spin_lock(&sysctl_lock);
+#ifdef CONFIG_VE
+	lh = &get_exec_env()->sysctl_lh;
+#else
+	lh = &root_table_header.ctl_entry;
+#endif
 	if (insert_at_head)
-		list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
+		list_add(&tmp->ctl_entry, lh);
 	else
-		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
+		list_add_tail(&tmp->ctl_entry, lh);
 	spin_unlock(&sysctl_lock);
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_VE
+	register_proc_table(table, get_exec_env()->proc_sys_root, tmp);
+#else
 	register_proc_table(table, proc_sys_root, tmp);
 #endif
+#endif
 	return tmp;
 }
 
+void free_sysctl_clone(ctl_table *clone)
+{
+	kfree(clone);
+}
+
+ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr)
+{
+	int i;
+	ctl_table *clone;
+
+	clone = kmalloc(nr * sizeof(ctl_table), GFP_KERNEL);
+	if (clone == NULL)
+		return NULL;
+
+	memcpy(clone, tmpl, nr * sizeof(ctl_table));
+	for (i = 0; i < nr; i++) {
+		if (tmpl[i].ctl_name == 0)
+			continue;
+		clone[i].owner_env = get_exec_env();
+		if (tmpl[i].child == NULL)
+			continue;
+		clone[i].child = clone + (tmpl[i].child - tmpl);
+	}
+	return clone;
+}
+
 /**
  * unregister_sysctl_table - unregister a sysctl table hierarchy
  * @header: the header returned from register_sysctl_table
@@ -1382,8 +1536,12 @@ void unregister_sysctl_table(struct ctl_
 	spin_lock(&sysctl_lock);
 	start_unregistering(header);
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_VE
+	unregister_proc_table(header->ctl_table, get_exec_env()->proc_sys_root);
+#else
 	unregister_proc_table(header->ctl_table, proc_sys_root);
 #endif
+#endif
 	spin_unlock(&sysctl_lock);
 	kfree(header);
 }
@@ -1469,11 +1627,6 @@ static void unregister_proc_table(ctl_ta
 		 * its fields.  We are under sysctl_lock here.
 		 */
 		de->data = NULL;
-
-		/* Don't unregister proc entries that are still being used.. */
-		if (atomic_read(&de->count))
-			continue;
-
 		table->de = NULL;
 		remove_proc_entry(table->procname, root);
 	}
@@ -1615,7 +1768,7 @@ int proc_dostring(ctl_table *table, int 
  *	to observe. Should this be in kernel/sys.c ????
  */
  
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+int proc_doutsstring(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int r;
@@ -2190,7 +2343,7 @@ int proc_dostring(ctl_table *table, int 
 	return -ENOSYS;
 }
 
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+int proc_doutsstring(ctl_table *table, int write, struct file *filp,
 			    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
@@ -2494,6 +2647,14 @@ void unregister_sysctl_table(struct ctl_
 {
 }
 
+ctl_table * clone_sysctl_template(ctl_table *tmpl, int nr)
+{
+	return NULL;
+}
+
+void free_sysctl_clone(ctl_table *tmpl)
+{
+}
 #endif /* CONFIG_SYSCTL */
 
 /*
@@ -2506,6 +2667,7 @@ EXPORT_SYMBOL(proc_dointvec_minmax);
 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
 EXPORT_SYMBOL(proc_dostring);
+EXPORT_SYMBOL(proc_doutsstring);
 EXPORT_SYMBOL(proc_doulongvec_minmax);
 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
 EXPORT_SYMBOL(register_sysctl_table);
@@ -2514,3 +2676,5 @@ EXPORT_SYMBOL(sysctl_jiffies);
 EXPORT_SYMBOL(sysctl_ms_jiffies);
 EXPORT_SYMBOL(sysctl_string);
 EXPORT_SYMBOL(unregister_sysctl_table);
+EXPORT_SYMBOL(clone_sysctl_template);
+EXPORT_SYMBOL(free_sysctl_clone);
diff -uprN linux-2.6.16/kernel/timer.c linux-2.6.16.ovz/kernel/timer.c
--- linux-2.6.16/kernel/timer.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/timer.c	2006-07-05 08:34:56.000000000 -0400
@@ -460,7 +460,11 @@ static inline void __run_timers(tvec_bas
 			spin_unlock_irq(&base->t_base.lock);
 			{
 				int preempt_count = preempt_count();
+				struct ve_struct *ve;
+
+				ve = set_exec_env(get_ve0());
 				fn(data);
+				(void)set_exec_env(ve);
 				if (preempt_count != preempt_count()) {
 					printk(KERN_WARNING "huh, entered %p "
 					       "with preempt_count %08x, exited"
@@ -868,6 +872,23 @@ EXPORT_SYMBOL(avenrun);
  * calc_load - given tick count, update the avenrun load estimates.
  * This is called while holding a write_lock on xtime_lock.
  */
+
+static void calc_load_ve(void)
+{
+	unsigned long flags, nr_unint;
+
+	nr_unint = nr_uninterruptible() * FIXED_1;
+	spin_lock_irqsave(&kstat_glb_lock, flags);
+	CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
+	CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
+	CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
+	spin_unlock_irqrestore(&kstat_glb_lock, flags);
+
+#ifdef CONFIG_VE
+	do_update_load_avg_ve();
+#endif
+}
+
 static inline void calc_load(unsigned long ticks)
 {
 	unsigned long active_tasks; /* fixed-point */
@@ -880,6 +901,7 @@ static inline void calc_load(unsigned lo
 		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+		calc_load_ve();
 	}
 }
 
@@ -990,7 +1012,7 @@ asmlinkage unsigned long sys_alarm(unsig
  */
 asmlinkage long sys_getpid(void)
 {
-	return current->tgid;
+	return virt_tgid(current);
 }
 
 /*
@@ -1012,12 +1034,13 @@ asmlinkage long sys_getpid(void)
 asmlinkage long sys_getppid(void)
 {
 	int pid;
+#ifndef CONFIG_DEBUG_SLAB
 	struct task_struct *me = current;
 	struct task_struct *parent;
 
 	parent = me->group_leader->real_parent;
 	for (;;) {
-		pid = parent->tgid;
+		pid = virt_tgid(parent);
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
 {
 		struct task_struct *old = parent;
@@ -1034,6 +1057,16 @@ asmlinkage long sys_getppid(void)
 #endif
 		break;
 	}
+#else
+	/*
+	 * ->real_parent could be released before dereference and
+	 * we accessed freed kernel memory, which faults with debugging on.
+	 * Keep it simple and stupid.
+	 */
+	read_lock(&tasklist_lock);
+	pid = virt_tgid(current->group_leader->real_parent);
+	read_unlock(&tasklist_lock);
+#endif
 	return pid;
 }
 
@@ -1164,7 +1197,7 @@ EXPORT_SYMBOL(schedule_timeout_uninterru
 /* Thread ID - the internal kernel "pid" */
 asmlinkage long sys_gettid(void)
 {
-	return current->pid;
+	return virt_pid(current);
 }
 
 /*
@@ -1176,11 +1209,12 @@ asmlinkage long sys_sysinfo(struct sysin
 	unsigned long mem_total, sav_total;
 	unsigned int mem_unit, bitcount;
 	unsigned long seq;
+	unsigned long *__avenrun;
+	struct timespec tp;
 
 	memset((char *)&val, 0, sizeof(struct sysinfo));
 
 	do {
-		struct timespec tp;
 		seq = read_seqbegin(&xtime_lock);
 
 		/*
@@ -1197,14 +1231,25 @@ asmlinkage long sys_sysinfo(struct sysin
 			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
 			tp.tv_sec++;
 		}
-		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
-
-		val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+	} while (read_seqretry(&xtime_lock, seq));
 
+	if (ve_is_super(get_exec_env())) {
+		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+		__avenrun = &avenrun[0];
 		val.procs = nr_threads;
-	} while (read_seqretry(&xtime_lock, seq));
+	}
+#ifdef CONFIG_VE
+	else {
+		struct ve_struct *ve;
+		ve = get_exec_env();
+		__avenrun = &ve->avenrun[0];
+		val.procs = atomic_read(&ve->pcounter);
+		val.uptime = tp.tv_sec - ve->start_timespec.tv_sec;
+	}
+#endif
+	val.loads[0] = __avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+	val.loads[1] = __avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+	val.loads[2] = __avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
 
 	si_meminfo(&val);
 	si_swapinfo(&val);
diff -uprN linux-2.6.16/kernel/ub/Kconfig linux-2.6.16.ovz/kernel/ub/Kconfig
--- linux-2.6.16/kernel/ub/Kconfig	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/Kconfig	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,89 @@
+#
+# User resources part (UBC)
+#
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+#
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+menu "User resources"
+
+config USER_RESOURCE
+	bool "Enable user resource accounting"
+	default y
+	help 
+          This patch provides accounting and allows to configure
+          limits for user's consumption of exhaustible system resources.
+          The most important resource controlled by this patch is unswappable 
+          memory (either mlock'ed or used by internal kernel structures and 
+          buffers). The main goal of this patch is to protect processes
+          from running short of important resources because of an accidental
+          misbehavior of processes or malicious activity aiming to ``kill'' 
+          the system. It's worth to mention that resource limits configured 
+          by setrlimit(2) do not give an acceptable level of protection 
+          because they cover only small fraction of resources and work on a 
+          per-process basis.  Per-process accounting doesn't prevent malicious
+          users from spawning a lot of resource-consuming processes.
+
+config USER_RSS_ACCOUNTING
+	bool "Account physical memory usage"
+	default y
+	depends on USER_RESOURCE
+	help
+          This allows to estimate per beancounter physical memory usage.
+          Implemented alghorithm accounts shared pages of memory as well,
+          dividing them by number of beancounter which use the page.
+
+config USER_SWAP_ACCOUNTING
+	bool "Account swap usage"
+	default y
+	depends on USER_RESOURCE
+	help
+          This allows accounting of swap usage.
+
+config USER_RESOURCE_PROC
+	bool "Report resource usage in /proc"
+	default y
+	depends on USER_RESOURCE
+	help
+          Allows a system administrator to inspect resource accounts and limits.
+
+config UBC_DEBUG
+	bool "User resources debug features"
+	default n
+	depends on USER_RESOURCE
+	help
+	  Enables to setup debug features for user resource accounting
+
+config UBC_DEBUG_KMEM
+	bool "Debug kmemsize with cache counters"
+	default n
+	depends on UBC_DEBUG
+	help
+	  Adds /proc/user_beancounters_debug entry to get statistics
+	  about cache usage of each beancounter
+
+config UBC_KEEP_UNUSED
+	bool "Keep unused beancounter alive"
+	default y
+	depends on UBC_DEBUG
+	help
+	  If on, unused beancounters are kept on the hash and maxheld value
+	  can be looked through.
+
+config UBC_DEBUG_ITEMS
+	bool "Account resources in items rather than in bytes"
+	default y
+	depends on UBC_DEBUG
+	help
+	  When true some of the resources (e.g. kmemsize) are accounted
+	  in items instead of bytes.
+
+config UBC_UNLIMITED
+	bool "Use unlimited ubc settings"
+	default y
+	depends on UBC_DEBUG
+	help
+	  When ON all limits and barriers are set to max values.
+
+endmenu
diff -uprN linux-2.6.16/kernel/ub/Makefile linux-2.6.16.ovz/kernel/ub/Makefile
--- linux-2.6.16/kernel/ub/Makefile	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/Makefile	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,20 @@
+#
+# User resources part (UBC)
+#
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+#
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+obj-y := ub_sys.o
+obj-$(CONFIG_USER_RESOURCE) += beancounter.o
+obj-$(CONFIG_USER_RESOURCE) += ub_dcache.o
+obj-$(CONFIG_USER_RESOURCE) += ub_mem.o
+obj-$(CONFIG_USER_RESOURCE) += ub_misc.o
+obj-$(CONFIG_USER_RESOURCE) += ub_net.o
+obj-$(CONFIG_USER_RESOURCE) += ub_pages.o
+obj-$(CONFIG_USER_RESOURCE) += ub_stat.o
+# obj-$(CONFIG_USER_RESOURCE) += ub_oom.o
+
+obj-$(CONFIG_USER_RSS_ACCOUNTING) += ub_page_bc.o
+obj-$(CONFIG_USER_RESOURCE_PROC)  += ub_proc.o
diff -uprN linux-2.6.16/kernel/ub/beancounter.c linux-2.6.16.ovz/kernel/ub/beancounter.c
--- linux-2.6.16/kernel/ub/beancounter.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/beancounter.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,675 @@
+/*
+ *  linux/kernel/ub/beancounter.c
+ *
+ *  Copyright (C) 1998  Alan Cox
+ *                1998-2000  Andrey V. Savochkin <saw@saw.sw.com.sg>
+ *  Copyright (C) 2000-2005 SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * TODO:
+ *   - more intelligent limit check in mremap(): currently the new size is
+ *     charged and _then_ old size is uncharged
+ *     (almost done: !move_vma case is completely done,
+ *      move_vma in its current implementation requires too many conditions to
+ *      do things right, because it may be not only expansion, but shrinking
+ *      also, plus do_munmap will require an additional parameter...)
+ *   - problem: bad pmd page handling
+ *   - consider /proc redesign
+ *   - TCP/UDP ports
+ *   + consider whether __charge_beancounter_locked should be inline
+ *
+ * Changes:
+ *   1999/08/17  Marcelo Tosatti <marcelo@conectiva.com.br>
+ *	- Set "barrier" and "limit" parts of limits atomically.
+ *   1999/10/06  Marcelo Tosatti <marcelo@conectiva.com.br>
+ *	- setublimit system call.
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_vmpages.h>
+
+static kmem_cache_t *ub_cachep;
+static struct user_beancounter default_beancounter;
+struct user_beancounter ub0;
+
+const char *ub_rnames[] = {
+	"kmemsize",	/* 0 */
+	"lockedpages",
+	"privvmpages",
+	"shmpages",
+	"dummy",
+	"numproc",	/* 5 */
+	"physpages",
+	"vmguarpages",
+	"oomguarpages",
+	"numtcpsock",
+	"numflock",	/* 10 */
+	"numpty",
+	"numsiginfo",
+	"tcpsndbuf",
+	"tcprcvbuf",
+	"othersockbuf",	/* 15 */
+	"dgramrcvbuf",
+	"numothersock",
+	"dcachesize",
+	"numfile",
+	"dummy",	/* 20 */
+	"dummy",
+	"dummy",
+	"numiptent",
+	"unused_privvmpages",	/* UB_RESOURCES */
+	"tmpfs_respages",
+	"swap_pages",
+	"held_pages",
+};
+
+static void init_beancounter_struct(struct user_beancounter *ub);
+static void init_beancounter_store(struct user_beancounter *ub);
+static void init_beancounter_nolimits(struct user_beancounter *ub);
+
+void print_ub_uid(struct user_beancounter *ub, char *buf, int size)
+{
+	if (ub->parent != NULL)
+		snprintf(buf, size, "%u.%u", ub->parent->ub_uid, ub->ub_uid);
+	else
+		snprintf(buf, size, "%u", ub->ub_uid);
+}
+EXPORT_SYMBOL(print_ub_uid);
+
+#define ub_hash_fun(x) ((((x) >> 8) ^ (x)) & (UB_HASH_SIZE - 1))
+#define ub_subhash_fun(p, id) ub_hash_fun((p)->ub_uid + (id) * 17)
+struct ub_hash_slot ub_hash[UB_HASH_SIZE];
+spinlock_t ub_hash_lock;
+EXPORT_SYMBOL(ub_hash);
+EXPORT_SYMBOL(ub_hash_lock);
+
+/*
+ *	Per user resource beancounting. Resources are tied to their luid.
+ *	The resource structure itself is tagged both to the process and
+ *	the charging resources (a socket doesn't want to have to search for
+ *	things at irq time for example). Reference counters keep things in
+ *	hand.
+ *
+ *	The case where a user creates resource, kills all his processes and
+ *	then starts new ones is correctly handled this way. The refcounters
+ *	will mean the old entry is still around with resource tied to it.
+ */
+struct user_beancounter *get_beancounter_byuid(uid_t uid, int create)
+{
+	struct user_beancounter *new_ub, *ub;
+	unsigned long flags;
+	struct ub_hash_slot *slot;
+
+	slot = &ub_hash[ub_hash_fun(uid)];
+	new_ub = NULL;
+
+retry:
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	ub = slot->ubh_beans;
+	while (ub != NULL && (ub->ub_uid != uid || ub->parent != NULL))
+		ub = ub->ub_next;
+
+	if (ub != NULL) {
+		/* found */
+		get_beancounter(ub);
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		if (new_ub != NULL)
+			kmem_cache_free(ub_cachep, new_ub);
+		return ub;
+	}
+
+	if (!create) {
+		/* no ub found */
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return NULL;
+	}
+
+	if (new_ub != NULL) {
+		/* install new ub */
+		new_ub->ub_next = slot->ubh_beans;
+		slot->ubh_beans = new_ub;
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return new_ub;
+	}
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	/* alloc new ub */
+	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, 
+			GFP_KERNEL);
+	if (new_ub == NULL)
+		return NULL;
+
+	ub_debug(UBD_ALLOC, "Creating ub %p in slot %p\n", new_ub, slot);
+	memcpy(new_ub, &default_beancounter, sizeof(*new_ub));
+	init_beancounter_struct(new_ub);
+	new_ub->ub_uid = uid;
+	goto retry;
+}
+EXPORT_SYMBOL(get_beancounter_byuid);
+
+struct user_beancounter *get_subbeancounter_byid(struct user_beancounter *p,
+		int id, int create)
+{
+	struct user_beancounter *new_ub, *ub;
+	unsigned long flags;
+	struct ub_hash_slot *slot;
+
+	slot = &ub_hash[ub_subhash_fun(p, id)];
+	new_ub = NULL;
+
+retry:
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	ub = slot->ubh_beans;
+	while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
+		ub = ub->ub_next;
+
+	if (ub != NULL) {
+		/* found */
+		get_beancounter(ub);
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		if (new_ub != NULL) {
+			put_beancounter(new_ub->parent);
+			kmem_cache_free(ub_cachep, new_ub);
+		}
+		return ub;
+	}
+
+	if (!create) {
+		/* no ub found */
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return NULL;
+	}
+
+	if (new_ub != NULL) {
+		/* install new ub */
+		get_beancounter(new_ub);
+		new_ub->ub_next = slot->ubh_beans;
+		slot->ubh_beans = new_ub;
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return new_ub;
+	}
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	/* alloc new ub */
+	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, 
+			GFP_KERNEL);
+	if (new_ub == NULL)
+		return NULL;
+
+	ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", new_ub, slot);
+	memset(new_ub, 0, sizeof(*new_ub));
+	init_beancounter_nolimits(new_ub);
+	init_beancounter_store(new_ub);
+	init_beancounter_struct(new_ub);
+	atomic_set(&new_ub->ub_refcount, 0);
+	new_ub->ub_uid = id;
+	new_ub->parent = get_beancounter(p);
+	goto retry;
+}
+EXPORT_SYMBOL(get_subbeancounter_byid);
+
+struct user_beancounter *subbeancounter_findcreate(struct user_beancounter *p,
+		int id)
+{
+	struct user_beancounter *ub;
+	unsigned long flags;
+	struct ub_hash_slot *slot;
+
+	slot = &ub_hash[ub_subhash_fun(p, id)];
+
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	ub = slot->ubh_beans;
+	while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
+		ub = ub->ub_next;
+
+	if (ub != NULL) {
+		/* found */
+		get_beancounter(ub);
+		goto done;
+	}
+
+	/* alloc new ub */
+	/* Can be called from non-atomic contexts. Den */
+	ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, GFP_ATOMIC);
+	if (ub == NULL)
+		goto done;
+
+	ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", ub, slot);
+	memset(ub, 0, sizeof(*ub));
+	init_beancounter_nolimits(ub);
+	init_beancounter_store(ub);
+	init_beancounter_struct(ub);
+	atomic_set(&ub->ub_refcount, 0);
+	ub->ub_uid = id;
+	ub->parent = get_beancounter(p);
+
+	/* install new ub */
+	get_beancounter(ub);
+	ub->ub_next = slot->ubh_beans;
+	slot->ubh_beans = ub;
+
+done:
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+	return ub;
+}
+EXPORT_SYMBOL(subbeancounter_findcreate);
+#ifndef CONFIG_UBC_KEEP_UNUSED
+
+static int verify_res(struct user_beancounter *ub, int resource,
+		unsigned long held)
+{
+	char id[64];
+
+	if (likely(held == 0))
+		return 1;
+
+	print_ub_uid(ub, id, sizeof(id));
+	printk(KERN_WARNING "Ub %s helds %lu in %s on put\n",
+			id, held, ub_rnames[resource]);
+	return 0;
+}
+
+static inline void verify_held(struct user_beancounter *ub)
+{
+	int i, clean;
+
+	clean = 1;
+	for (i = 0; i < UB_RESOURCES; i++)
+		clean &= verify_res(ub, i, ub->ub_parms[i].held);
+
+	clean &= verify_res(ub, UB_UNUSEDPRIVVM, ub->ub_unused_privvmpages);
+	clean &= verify_res(ub, UB_TMPFSPAGES, ub->ub_tmpfs_respages);
+	clean &= verify_res(ub, UB_SWAPPAGES, ub->ub_swap_pages);
+	clean &= verify_res(ub, UB_HELDPAGES, (unsigned long)ub->ub_held_pages);
+
+	ub_debug_trace(!clean, 5, 60*HZ);
+}
+
+static void __unhash_beancounter(struct user_beancounter *ub)
+{
+	struct user_beancounter **ubptr;
+	struct ub_hash_slot *slot;
+
+	if (ub->parent != NULL)
+		slot = &ub_hash[ub_subhash_fun(ub->parent, ub->ub_uid)];
+	else
+	       	slot = &ub_hash[ub_hash_fun(ub->ub_uid)];
+	ubptr = &slot->ubh_beans;
+
+	while (*ubptr != NULL) {
+		if (*ubptr == ub) {
+			verify_held(ub);
+			*ubptr = ub->ub_next;
+			return;
+		}
+		ubptr = &((*ubptr)->ub_next);
+	}
+	printk(KERN_ERR "Invalid beancounter %p, luid=%d on free, slot %p\n",
+			ub, ub->ub_uid, slot);
+}
+#endif
+
+void __put_beancounter(struct user_beancounter *ub)
+{
+	unsigned long flags;
+	struct user_beancounter *parent;
+
+again:
+	parent = ub->parent;
+	ub_debug(UBD_ALLOC, "__put bc %p (cnt %d) for %.20s pid %d "
+			"cur %08lx cpu %d.\n",
+			ub, atomic_read(&ub->ub_refcount), 
+			current->comm, current->pid, 
+			(unsigned long)current, smp_processor_id());
+
+	/* equevalent to atomic_dec_and_lock_irqsave() */
+	local_irq_save(flags);
+	if (likely(!atomic_dec_and_lock(&ub->ub_refcount, &ub_hash_lock))) {
+		if (unlikely(atomic_read(&ub->ub_refcount) < 0))
+			printk(KERN_ERR "UB: Bad ub refcount: ub=%p, "
+					"luid=%d, ref=%d\n",
+					ub, ub->ub_uid,
+					atomic_read(&ub->ub_refcount));
+		local_irq_restore(flags);
+		return;
+	}
+
+	if (unlikely(ub == get_ub0())) {
+		printk(KERN_ERR "Trying to put ub0\n");
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return;
+	}
+
+#ifndef CONFIG_UBC_KEEP_UNUSED
+	__unhash_beancounter(ub);
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+	ub_free_counters(ub);
+	kmem_cache_free(ub_cachep, ub);
+#else
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+#endif
+	ub = parent;
+	if (ub != NULL)
+		goto again;
+}
+EXPORT_SYMBOL(__put_beancounter);
+
+/*
+ *	Generic resource charging stuff
+ */
+
+int __charge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val, enum severity strict)
+{
+	ub_debug_resource(resource, "Charging %lu for %d of %p with %lu\n",
+			val, resource, ub, ub->ub_parms[resource].held);
+	/*
+	 * ub_value <= UB_MAXVALUE, value <= UB_MAXVALUE, and only one addition
+	 * at the moment is possible so an overflow is impossible.  
+	 */
+	ub->ub_parms[resource].held += val;
+
+	switch (strict) {
+		case UB_HARD:
+			if (ub->ub_parms[resource].held >
+					ub->ub_parms[resource].barrier)
+				break;
+		case UB_SOFT:
+			if (ub->ub_parms[resource].held >
+					ub->ub_parms[resource].limit)
+				break;
+		case UB_FORCE:
+			ub_adjust_maxheld(ub, resource);
+			return 0;
+		default:
+			BUG();
+	}
+
+	if (strict == UB_SOFT && ub_ratelimit(&ub->ub_limit_rl))
+		printk(KERN_INFO "Fatal resource shortage: %s, UB %d.\n",
+		       ub_rnames[resource], ub->ub_uid);
+	ub->ub_parms[resource].failcnt++;
+	ub->ub_parms[resource].held -= val;
+	return -ENOMEM;
+}
+
+int charge_beancounter(struct user_beancounter *ub,
+		int resource, unsigned long val, enum severity strict)
+{
+	int retval;
+	struct user_beancounter *p, *q;
+	unsigned long flags;
+
+	retval = -EINVAL;
+	if (val > UB_MAXVALUE)
+		goto out;
+
+	local_irq_save(flags);
+	for (p = ub; p != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		retval = __charge_beancounter_locked(p, resource, val, strict);
+		spin_unlock(&p->ub_lock);
+		if (retval)
+			goto unroll;
+	}
+out_restore:
+	local_irq_restore(flags);
+out:
+	return retval;
+
+unroll:
+	for (q = ub; q != p; q = q->parent) {
+		spin_lock(&q->ub_lock);
+		__uncharge_beancounter_locked(q, resource, val);
+		spin_unlock(&q->ub_lock);
+	}
+	goto out_restore;
+}
+
+EXPORT_SYMBOL(charge_beancounter);
+
+void charge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	struct user_beancounter *p;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	for (p = ub; p->parent != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		__charge_beancounter_locked(p, resource, val, UB_FORCE);
+		spin_unlock(&p->ub_lock);
+	}
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(charge_beancounter_notop);
+
+void uncharge_warn(struct user_beancounter *ub, int resource,
+		unsigned long val, unsigned long held)
+{
+	char id[64];
+
+	print_ub_uid(ub, id, sizeof(id));
+	printk(KERN_ERR "Uncharging too much %lu h %lu, res %s ub %s\n",
+			val, held, ub_rnames[resource], id);
+	ub_debug_trace(1, 10, 10*HZ);
+}
+
+void __uncharge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	ub_debug_resource(resource, "Uncharging %lu for %d of %p with %lu\n",
+			val, resource, ub, ub->ub_parms[resource].held);
+	if (ub->ub_parms[resource].held < val) {
+		uncharge_warn(ub, resource,
+				val, ub->ub_parms[resource].held);
+		val = ub->ub_parms[resource].held;
+	}
+	ub->ub_parms[resource].held -= val;
+}
+
+void uncharge_beancounter(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	unsigned long flags;
+	struct user_beancounter *p;
+
+	for (p = ub; p != NULL; p = p->parent) {
+		spin_lock_irqsave(&p->ub_lock, flags);
+		__uncharge_beancounter_locked(p, resource, val);
+		spin_unlock_irqrestore(&p->ub_lock, flags);
+	}
+}
+
+EXPORT_SYMBOL(uncharge_beancounter);
+
+void uncharge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	struct user_beancounter *p;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	for (p = ub; p->parent != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		__uncharge_beancounter_locked(p, resource, val);
+		spin_unlock(&p->ub_lock);
+	}
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(uncharge_beancounter_notop);
+
+
+/*
+ *	Rate limiting stuff.
+ */
+int ub_ratelimit(struct ub_rate_info *p)
+{
+	unsigned long cjif, djif;
+	unsigned long flags;
+	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
+	long new_bucket;
+
+	spin_lock_irqsave(&ratelimit_lock, flags);
+	cjif = jiffies;
+	djif = cjif - p->last;
+	if (djif < p->interval) {
+		if (p->bucket >= p->burst) {
+			spin_unlock_irqrestore(&ratelimit_lock, flags);
+			return 0;
+		}
+		p->bucket++;
+	} else {
+		new_bucket = p->bucket - (djif / (unsigned)p->interval);
+		if (new_bucket < 0)
+			new_bucket = 0;
+		p->bucket = new_bucket + 1;
+	}
+	p->last = cjif;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
+}
+EXPORT_SYMBOL(ub_ratelimit);
+
+
+/*
+ *	Initialization
+ *
+ *	struct user_beancounter contains
+ *	 - limits and other configuration settings,
+ *	   with a copy stored for accounting purposes,
+ *	 - structural fields: lists, spinlocks and so on.
+ *
+ *	Before these parts are initialized, the structure should be memset
+ *	to 0 or copied from a known clean structure.  That takes care of a lot
+ *	of fields not initialized explicitly.
+ */
+
+static void init_beancounter_struct(struct user_beancounter *ub)
+{
+	ub->ub_magic = UB_MAGIC;
+	atomic_set(&ub->ub_refcount, 1);
+	spin_lock_init(&ub->ub_lock);
+	INIT_LIST_HEAD(&ub->ub_tcp_sk_list);
+	INIT_LIST_HEAD(&ub->ub_other_sk_list);
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	INIT_LIST_HEAD(&ub->ub_cclist);
+#endif
+}
+
+static void init_beancounter_store(struct user_beancounter *ub)
+{
+	int k;
+
+	for (k = 0; k < UB_RESOURCES; k++) {
+		memcpy(&ub->ub_store[k], &ub->ub_parms[k],
+				sizeof(struct ubparm));
+	}
+}
+
+static void init_beancounter_nolimits(struct user_beancounter *ub)
+{
+	int k;
+
+	for (k = 0; k < UB_RESOURCES; k++) {
+		ub->ub_parms[k].limit = UB_MAXVALUE;
+		/* FIXME: whether this is right for physpages and guarantees? */
+		ub->ub_parms[k].barrier = UB_MAXVALUE;
+	}
+
+	/* FIXME: set unlimited rate? */
+	ub->ub_limit_rl.burst = 4;
+	ub->ub_limit_rl.interval = 300*HZ;
+}
+
+static void init_beancounter_syslimits(struct user_beancounter *ub,
+		unsigned long mp)
+{
+	extern int max_threads;
+	int k;
+
+	ub->ub_parms[UB_KMEMSIZE].limit = 
+		mp > (192*1024*1024 >> PAGE_SHIFT) ?
+				32*1024*1024 : (mp << PAGE_SHIFT) / 6;
+	ub->ub_parms[UB_LOCKEDPAGES].limit = 8;
+	ub->ub_parms[UB_PRIVVMPAGES].limit = UB_MAXVALUE;
+	ub->ub_parms[UB_SHMPAGES].limit = 64;
+	ub->ub_parms[UB_NUMPROC].limit = max_threads / 2;
+	ub->ub_parms[UB_NUMTCPSOCK].limit = 1024;
+	ub->ub_parms[UB_TCPSNDBUF].limit = 1024*4*1024; /* 4k per socket */
+	ub->ub_parms[UB_TCPRCVBUF].limit = 1024*6*1024; /* 6k per socket */
+	ub->ub_parms[UB_NUMOTHERSOCK].limit = 256;
+	ub->ub_parms[UB_DGRAMRCVBUF].limit = 256*4*1024; /* 4k per socket */
+	ub->ub_parms[UB_OTHERSOCKBUF].limit = 256*8*1024; /* 8k per socket */
+	ub->ub_parms[UB_NUMFLOCK].limit = 1024;
+	ub->ub_parms[UB_NUMPTY].limit = 16;
+	ub->ub_parms[UB_NUMSIGINFO].limit = 1024;
+	ub->ub_parms[UB_DCACHESIZE].limit = 1024*1024;
+	ub->ub_parms[UB_NUMFILE].limit = 1024;
+
+	for (k = 0; k < UB_RESOURCES; k++)
+		ub->ub_parms[k].barrier = ub->ub_parms[k].limit;
+
+	ub->ub_limit_rl.burst = 4;
+	ub->ub_limit_rl.interval = 300*HZ;
+}
+
+void __init ub_init_ub0(void)
+{
+	struct user_beancounter *ub;
+
+	init_cache_counters();
+	ub = get_ub0();
+	memset(ub, 0, sizeof(*ub));
+	ub->ub_uid = 0;
+	init_beancounter_nolimits(ub);
+	init_beancounter_store(ub);
+	init_beancounter_struct(ub);
+
+	memset(&current->task_bc, 0, sizeof(struct task_beancounter));
+	(void)set_exec_ub(get_ub0());
+	current->task_bc.fork_sub = get_beancounter(get_ub0());
+	init_mm.mm_ub = get_beancounter(ub);
+}
+
+void __init ub_hash_init(void)
+{
+	struct ub_hash_slot *slot;
+
+	spin_lock_init(&ub_hash_lock);
+	/* insert ub0 into the hash */
+	slot = &ub_hash[ub_hash_fun(get_ub0()->ub_uid)];
+	slot->ubh_beans = get_ub0();
+}
+
+void __init ub_init_cache(unsigned long mempages)
+{
+	extern int skbc_cache_init(void);
+	int res;
+
+	res = 0; /* skbc_cache_init(); */
+	ub_cachep = kmem_cache_create("user_beancounters",
+			sizeof(struct user_beancounter),
+			0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (res < 0 || ub_cachep == NULL)
+		panic("Can't create ubc caches\n");
+
+	memset(&default_beancounter, 0, sizeof(default_beancounter));
+#ifdef CONFIG_UBC_UNLIMITED
+	init_beancounter_nolimits(&default_beancounter);
+#else
+	init_beancounter_syslimits(&default_beancounter, mempages);
+#endif
+	init_beancounter_store(&default_beancounter);
+	init_beancounter_struct(&default_beancounter);
+
+	ub_hash_init();
+}
diff -uprN linux-2.6.16/kernel/ub/ub_dcache.c linux-2.6.16.ovz/kernel/ub/ub_dcache.c
--- linux-2.6.16/kernel/ub/ub_dcache.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/ub_dcache.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,325 @@
+/*
+ *  kernel/ub/ub_dcache.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/dcache.h>
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/fs.h>
+#include <linux/err.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/ub_dcache.h>
+
+/*
+ * Locking
+ *                          traverse  dcache_lock  d_lock
+ *        ub_dentry_charge   +         +            +
+ *      ub_dentry_uncharge   +         -            +
+ * ub_dentry_charge_nofail   +         +            -
+ *
+ * d_inuse is atomic so that we can inc dentry's parent d_inuse in 
+ * ub_dentry_charhe with the only dentry's d_lock held.
+ *
+ * Race in uncharge vs charge_nofail is handled with dcache_lock.
+ * Race in charge vs charge_nofail is inessential since they both inc d_inuse.
+ * Race in uncharge vs charge is handled by altering d_inuse under d_lock.
+ *
+ * Race with d_move is handled this way:
+ *  - charge_nofail and uncharge are protected by dcache_lock;
+ *  - charge works only with dentry and dentry->d_parent->d_inuse, so
+ *    it's enough to lock only the dentry.
+ */
+
+/*
+ * Beancounting
+ * UB argument must NOT be NULL
+ */
+
+static int do_charge_dcache(struct user_beancounter *ub, unsigned long size, 
+		enum severity sv)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size), sv))
+		goto out_mem;
+	if (__charge_beancounter_locked(ub, UB_DCACHESIZE, size, sv))
+		goto out_dcache;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return 0;
+
+out_dcache:
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
+out_mem:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return -ENOMEM;
+}
+
+static void do_uncharge_dcache(struct user_beancounter *ub, 
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
+	__uncharge_beancounter_locked(ub, UB_DCACHESIZE, size);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static int charge_dcache(struct user_beancounter *ub, unsigned long size, 
+		enum severity sv)
+{
+	struct user_beancounter *p, *q;
+
+	for (p = ub; p != NULL; p = p->parent) {
+		if (do_charge_dcache(p, size, sv))
+			goto unroll;
+	}
+	return 0;
+
+unroll:
+	for (q = ub; q != p; q = q->parent)
+		do_uncharge_dcache(q, size);
+	return -ENOMEM;
+}
+
+void uncharge_dcache(struct user_beancounter *ub, unsigned long size)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_uncharge_dcache(ub, size);
+}
+
+static inline void charge_dcache_forced(struct user_beancounter *ub, 
+		unsigned long size)
+{
+	charge_dcache(ub, size, UB_FORCE);
+}
+
+static inline void d_forced_charge(struct dentry_beancounter *d_bc)
+{
+	d_bc->d_ub = get_beancounter(get_exec_ub());
+	if (d_bc->d_ub == NULL)
+		return;
+
+	charge_dcache_forced(d_bc->d_ub, d_bc->d_ubsize);
+}
+
+static inline void d_uncharge(struct dentry_beancounter *d_bc)
+{
+	if (d_bc->d_ub == NULL)
+		return;
+
+	uncharge_dcache(d_bc->d_ub, d_bc->d_ubsize);
+	put_beancounter(d_bc->d_ub);
+	d_bc->d_ub = NULL;
+}
+
+/*
+ * Alloc / free dentry_beancounter
+ */
+
+static inline int d_alloc_beancounter(struct dentry *d)
+{
+	return 0;
+}
+
+static inline void d_free_beancounter(struct dentry_beancounter *d_bc)
+{
+}
+
+static inline unsigned long d_charge_size(struct dentry *dentry)
+{
+	/* dentry's d_name is already set to appropriate value (see d_alloc) */
+	return inode_cachep->objuse + dentry_cache->objuse +
+		(dname_external(dentry) ?
+		 kmem_obj_memusage((void *)dentry->d_name.name) : 0);
+}
+
+/*
+ * dentry mark in use operation
+ * d_lock is held
+ */
+
+static int d_inc_inuse(struct dentry *dentry)
+{
+	struct user_beancounter *ub;
+	struct dentry_beancounter *d_bc;
+
+	if (dentry != dentry->d_parent) {
+		struct dentry *parent;
+
+		/*
+		 * Increment d_inuse of parent.
+		 * It can't change since dentry->d_lock is held.
+		 */
+		parent = dentry->d_parent;
+		if (ub_dget_testone(parent))
+			BUG();
+	}
+
+	d_bc = &dentry->dentry_bc;
+	ub = get_beancounter(get_exec_ub());
+
+	if (ub != NULL && charge_dcache(ub, d_bc->d_ubsize, UB_SOFT))
+		goto out_err;
+
+	d_bc->d_ub = ub;
+	return 0;
+
+out_err:
+	put_beancounter(ub);
+	d_bc->d_ub = NULL;
+	return -ENOMEM;
+}
+
+/* 
+ * no locks
+ */
+int ub_dentry_alloc(struct dentry *dentry)
+{
+	int err;
+	struct dentry_beancounter *d_bc;
+
+	err = d_alloc_beancounter(dentry);
+	if (err < 0)
+		return err;
+
+	d_bc = &dentry->dentry_bc;
+	d_bc->d_ub = get_beancounter(get_exec_ub());
+	atomic_set(&d_bc->d_inuse, INUSE_INIT); /* see comment in ub_dcache.h */
+	d_bc->d_ubsize = d_charge_size(dentry);
+
+	err = 0;
+	if (d_bc->d_ub != NULL &&
+			charge_dcache(d_bc->d_ub, d_bc->d_ubsize, UB_HARD)) {
+		put_beancounter(d_bc->d_ub);
+		d_free_beancounter(d_bc);
+		err = -ENOMEM;
+	}
+
+	return err;
+}
+
+/*
+ * Charge / uncharge functions.
+ *
+ * We take d_lock to protect dentry_bc from concurrent acces
+ * when simultaneous __d_lookup and d_put happens on one dentry.
+ */
+
+/*
+ * no dcache_lock, d_lock and rcu_read_lock are held
+ * drops d_lock, rcu_read_lock and returns error if any
+ */
+int ub_dentry_charge(struct dentry *dentry)
+{
+	int err;
+
+	err = 0;
+	if (ub_dget_testone(dentry))
+		err = d_inc_inuse(dentry);
+
+	/*
+	 * d_lock and rcu_read_lock are dropped here
+	 * (see also __d_lookup)
+	 */
+	spin_unlock(&dentry->d_lock);
+	rcu_read_unlock();
+
+	if (!err)
+		return 0;
+
+	/*
+	 * d_invlaidate is required for real_lookup
+	 * since it tries to create new dentry on
+	 * d_lookup failure.
+	 */
+	if (!d_invalidate(dentry))
+		return err;
+
+	/* didn't succeeded, force dentry to be charged */
+	d_forced_charge(&dentry->dentry_bc);
+	return 0;
+}
+
+/*
+ * dcache_lock is held
+ * no d_locks, sequentaly takes and drops from dentry upward
+ */
+void ub_dentry_uncharge(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	/* go up until status is changed and root is not reached */
+	while (1) {
+		/*
+		 * We need d_lock here to handle 
+		 * the race with ub_dentry_charge
+		 */
+		spin_lock(&dentry->d_lock);
+		if (!ub_dput_testzero(dentry)) {
+			spin_unlock(&dentry->d_lock);
+			break;
+		}
+
+		/* state transition 0 => -1 */
+		d_uncharge(&dentry->dentry_bc);
+		parent = dentry->d_parent;
+		spin_unlock(&dentry->d_lock);
+
+		/*
+		 * dcache_lock is held (see comment in __dget_locked)
+		 * so we can safely move upwards.
+		 */
+		if (dentry == parent)
+			break;
+		dentry = parent;
+	}
+}
+
+/* 
+ * forced version. for dget in clean cache, when error is not an option
+ *
+ * dcache_lock is held
+ * no d_locks
+ */
+void ub_dentry_charge_nofail(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	/* go up until status is changed and root is not reached */
+	while (1) {
+		if (!ub_dget_testone(dentry))
+			break;
+
+		/*
+		 * state transition -1 => 0
+		 *
+		 * No need to lock dentry before atomic_inc
+		 * like we do in ub_dentry_uncharge.
+		 * We can't race with ub_dentry_uncharge due
+		 * to dcache_lock. The only possible race with
+		 * ub_dentry_charge is OK since they both
+		 * do atomic_inc.
+		 */
+		d_forced_charge(&dentry->dentry_bc);
+		/*
+		 * dcache_lock is held (see comment in __dget_locked)
+		 * so we can safely move upwards.
+		 */
+		parent = dentry->d_parent;
+
+		if (dentry == parent)
+			break;
+		dentry = parent;
+	}
+}
diff -uprN linux-2.6.16/kernel/ub/ub_mem.c linux-2.6.16.ovz/kernel/ub/ub_mem.c
--- linux-2.6.16/kernel/ub/ub_mem.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/ub_mem.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,384 @@
+/*
+ *  kernel/ub/ub_mem.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/kmem_slab.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/swap.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/ub_hash.h>
+
+/*
+ * Initialization
+ */
+
+/*
+ * Slab accounting
+ */
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+
+#define CC_HASH_SIZE	1024
+static struct ub_cache_counter *cc_hash[CC_HASH_SIZE];
+spinlock_t cc_lock;
+
+static void __free_cache_counters(struct user_beancounter *ub,
+		kmem_cache_t *cachep)
+{
+	struct ub_cache_counter *cc, **pprev, *del;
+	int i;
+	unsigned long flags;
+
+	del = NULL;
+	spin_lock_irqsave(&cc_lock, flags);
+	for (i = 0; i < CC_HASH_SIZE; i++) {
+		pprev = &cc_hash[i];
+		cc = cc_hash[i];
+		while (cc != NULL) {
+			if (cc->ub != ub && cc->cachep != cachep) {
+				pprev = &cc->next;
+				cc = cc->next;
+				continue;
+			}
+
+			list_del(&cc->ulist);
+			*pprev = cc->next;
+			cc->next = del;
+			del = cc;
+			cc = *pprev;
+		}
+	}
+	spin_unlock_irqrestore(&cc_lock, flags);
+
+	while (del != NULL) {
+		cc = del->next;
+		kfree(del);
+		del = cc;
+	}
+}
+
+void ub_free_counters(struct user_beancounter *ub)
+{
+	__free_cache_counters(ub, NULL);
+}
+
+void ub_kmemcache_free(kmem_cache_t *cachep)
+{
+	__free_cache_counters(NULL, cachep);
+}
+
+void __init init_cache_counters(void)
+{
+	memset(cc_hash, 0, CC_HASH_SIZE * sizeof(cc_hash[0]));
+	spin_lock_init(&cc_lock);
+}
+
+#define cc_hash_fun(ub, cachep)	(				\
+	(((unsigned long)(ub) >> L1_CACHE_SHIFT) ^		\
+	 ((unsigned long)(ub) >> (BITS_PER_LONG / 2)) ^		\
+	 ((unsigned long)(cachep) >> L1_CACHE_SHIFT) ^		\
+	 ((unsigned long)(cachep) >> (BITS_PER_LONG / 2))	\
+	) & (CC_HASH_SIZE - 1))
+
+static int change_slab_charged(struct user_beancounter *ub, void *objp,
+		unsigned long val, int mask)
+{
+	struct ub_cache_counter *cc, *new_cnt, **pprev;
+	kmem_cache_t *cachep;
+	unsigned long flags;
+
+	cachep = virt_to_cache(objp);
+	new_cnt = NULL;
+
+again:
+	spin_lock_irqsave(&cc_lock, flags);
+	cc = cc_hash[cc_hash_fun(ub, cachep)];
+	while (cc) {
+		if (cc->ub == ub && cc->cachep == cachep)
+			goto found;
+		cc = cc->next;
+	}
+
+	if (new_cnt != NULL)
+		goto insert;
+
+	spin_unlock_irqrestore(&cc_lock, flags);
+
+	new_cnt = kmalloc(sizeof(*new_cnt), mask & ~__GFP_UBC);
+	if (new_cnt == NULL)
+		return -ENOMEM;
+
+	new_cnt->counter = 0;
+	new_cnt->ub = ub;
+	new_cnt->cachep = cachep;
+	goto again;
+
+insert:
+	pprev = &cc_hash[cc_hash_fun(ub, cachep)];
+	new_cnt->next = *pprev;
+	*pprev = new_cnt;
+	list_add(&new_cnt->ulist, &ub->ub_cclist);
+	cc = new_cnt;
+	new_cnt = NULL;
+
+found:
+	cc->counter += val;
+	spin_unlock_irqrestore(&cc_lock, flags);
+	if (new_cnt)
+		kfree(new_cnt);
+	return 0;
+}
+
+static inline int inc_slab_charged(struct user_beancounter *ub,
+		void *objp, int mask)
+{
+	return change_slab_charged(ub, objp, 1, mask);
+}
+
+static inline void dec_slab_charged(struct user_beancounter *ub, void *objp)
+{
+	if (change_slab_charged(ub, objp, -1, 0) < 0)
+		BUG();
+}
+
+#include <linux/vmalloc.h>
+
+static inline int inc_pages_charged(struct user_beancounter *ub,
+		struct page *pg, int order)
+{
+	int cpu;
+
+	cpu = get_cpu();
+	ub->ub_stat[cpu].pages_charged += (1 << order);
+	put_cpu();
+	return 0;
+}
+
+static inline void dec_pages_charged(struct user_beancounter *ub,
+		struct page *pg, int order)
+{
+	int cpu;
+
+	cpu = get_cpu();
+	ub->ub_stat[cpu].pages_charged -= (1 << order);
+	put_cpu();
+}
+
+void inc_vmalloc_charged(struct vm_struct *vm, int flags)
+{
+	int cpu;
+	struct user_beancounter *ub;
+
+	if (!(flags & __GFP_UBC))
+		return;
+
+	ub = get_exec_ub();
+	if (ub == NULL)
+		return;
+
+	cpu = get_cpu();
+	ub->ub_stat[cpu].vmalloc_charged += vm->nr_pages;
+	put_cpu();
+}
+
+void dec_vmalloc_charged(struct vm_struct *vm)
+{
+	int cpu;
+	struct user_beancounter *ub;
+
+	ub = page_ub(vm->pages[0]);
+	if (ub == NULL)
+		return;
+
+	cpu = get_cpu();
+	ub->ub_stat[cpu].vmalloc_charged -= vm->nr_pages;
+	put_cpu();
+}
+
+#else
+#define inc_slab_charged(ub, o, m)	(0)
+#define dec_slab_charged(ub, o)		do { } while (0)
+#define inc_pages_charged(ub, pg, o) 	(0)
+#define dec_pages_charged(ub, pg, o)	do { } while (0)
+#endif
+
+static inline struct user_beancounter **slab_ub_ref(void *objp)
+{
+	kmem_cache_t *cachep;
+	struct slab *slabp;
+	int objnr;
+
+	cachep = virt_to_cache(objp);
+	BUG_ON(!(cachep->flags & SLAB_UBC));
+	slabp = virt_to_slab(objp);
+	objnr = (objp - slabp->s_mem) / cachep->buffer_size;
+	return slab_ubcs(cachep, slabp) + objnr;
+}
+
+struct user_beancounter *slab_ub(void *objp)
+{
+	struct user_beancounter **ub_ref;
+
+	ub_ref = slab_ub_ref(objp);
+	return *ub_ref;
+}
+
+EXPORT_SYMBOL(slab_ub);
+
+static inline int should_charge(void *objp, int flags)
+{
+	kmem_cache_t *cachep;
+
+	cachep = virt_to_cache(objp);
+	if (!(cachep->flags & SLAB_UBC))
+		return 0;
+	if ((cachep->flags & SLAB_NO_CHARGE) && !(flags & __GFP_UBC))
+		return 0;
+	return 1;
+}
+
+#define should_uncharge(objp)	should_charge(objp, __GFP_UBC)
+
+int ub_slab_charge(void *objp, int flags)
+{
+	unsigned int size;
+	struct user_beancounter *ub;
+
+	if (!should_charge(objp, flags))
+		return 0;
+
+	ub = get_beancounter(get_exec_ub());
+	if (ub == NULL)
+		return 0;
+
+	size = CHARGE_SIZE(kmem_obj_memusage(objp));
+	if (charge_beancounter(ub, UB_KMEMSIZE, size,
+				(flags & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
+		goto out_err;
+
+	if (inc_slab_charged(ub, objp, flags) < 0) {
+		uncharge_beancounter(ub, UB_KMEMSIZE, size);
+		goto out_err;
+	}
+	*slab_ub_ref(objp) = ub;
+	return 0;
+
+out_err:
+	put_beancounter(ub);
+	return -ENOMEM;
+}
+
+void ub_slab_uncharge(void *objp)
+{
+	unsigned int size;
+	struct user_beancounter **ub_ref;
+
+	if (!should_uncharge(objp))
+		return;
+
+	ub_ref = slab_ub_ref(objp);
+	if (*ub_ref == NULL)
+		return;
+
+	dec_slab_charged(*ub_ref, objp);
+	size = CHARGE_SIZE(kmem_obj_memusage(objp));
+	uncharge_beancounter(*ub_ref, UB_KMEMSIZE, size);
+	put_beancounter(*ub_ref);
+	*ub_ref = NULL;
+}
+
+/*
+ * Pages accounting
+ */
+
+inline int ub_page_charge(struct page *page, int order, int mask)
+{
+	struct user_beancounter *ub;
+
+	ub = NULL;
+	if (!(mask & __GFP_UBC))
+		goto out;
+
+	ub = get_beancounter(get_exec_ub());
+	if (ub == NULL)
+		goto out;
+
+	if (charge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order),
+				(mask & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
+		goto err;
+	if (inc_pages_charged(ub, page, order) < 0) {
+		uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
+		goto err;
+	}
+out:
+	BUG_ON(page_ub(page) != NULL);
+	page_ub(page) = ub;
+	return 0;
+
+err:
+	BUG_ON(page_ub(page) != NULL);
+	put_beancounter(ub);
+	return -ENOMEM;
+}
+
+inline void ub_page_uncharge(struct page *page, int order)
+{
+	struct user_beancounter *ub;
+
+	ub = page_ub(page);
+	if (ub == NULL)
+		return;
+
+	dec_pages_charged(ub, page, order);
+	BUG_ON(ub->ub_magic != UB_MAGIC);
+	uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
+	put_beancounter(ub);
+	page_ub(page) = NULL;
+}
+
+/* 
+ * takes init_mm.page_table_lock 
+ * some outer lock to protect pages from vmalloced area must be held
+ */
+struct user_beancounter *vmalloc_ub(void *obj)
+{
+	struct page *pg;
+
+	pg = vmalloc_to_page(obj);
+	if (pg == NULL)
+		return NULL;
+
+	return page_ub(pg);
+}
+
+EXPORT_SYMBOL(vmalloc_ub);
+
+struct user_beancounter *mem_ub(void *obj)
+{
+	struct user_beancounter *ub;
+
+	if ((unsigned long)obj >= VMALLOC_START &&
+	    (unsigned long)obj  < VMALLOC_END)
+		ub = vmalloc_ub(obj);
+	else
+		ub = slab_ub(obj);
+
+	return ub;
+}
+
+EXPORT_SYMBOL(mem_ub);
diff -uprN linux-2.6.16/kernel/ub/ub_misc.c linux-2.6.16.ovz/kernel/ub/ub_misc.c
--- linux-2.6.16/kernel/ub/ub_misc.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/ub_misc.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,244 @@
+/*
+ *  kernel/ub/ub_misc.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/kmem_cache.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+
+/*
+ * Task staff
+ */
+
+static void init_task_sub(struct task_struct *tsk,
+		struct task_beancounter *old_bc)
+{
+	struct task_beancounter *new_bc;
+	struct user_beancounter *sub;
+
+	new_bc = &tsk->task_bc;
+	sub = old_bc->fork_sub;
+	new_bc->fork_sub = get_beancounter(sub);
+	new_bc->task_fnode = NULL;
+	new_bc->task_freserv = old_bc->task_freserv;
+	old_bc->task_freserv = NULL;
+	memset(&new_bc->task_data, 0, sizeof(new_bc->task_data));
+}
+
+int ub_task_charge(struct task_struct *parent, struct task_struct *task)
+{
+	struct task_beancounter *old_bc;
+	struct task_beancounter *new_bc;
+	struct user_beancounter *ub;
+
+	old_bc = &parent->task_bc;
+#if 0
+	if (old_bc->exec_ub == NULL) {
+		/* FIXME: this won't work if task_bc is outside task_struct */
+		init_task_sub(task, old_bc);
+		return 0;
+	}
+#endif
+	ub = old_bc->fork_sub;
+
+	if (charge_beancounter(ub, UB_NUMPROC, 1, UB_HARD) < 0)
+		return -ENOMEM;
+
+	new_bc = &task->task_bc;
+	new_bc->task_ub = get_beancounter(ub);
+	new_bc->exec_ub = get_beancounter(ub);
+	init_task_sub(task, old_bc);
+	return 0;
+}
+
+void ub_task_uncharge(struct task_struct *task)
+{
+	struct task_beancounter *task_bc;
+
+	task_bc = &task->task_bc;
+	if (task_bc->task_ub != NULL)
+		uncharge_beancounter(task_bc->task_ub, UB_NUMPROC, 1);
+
+	put_beancounter(task_bc->exec_ub);
+	put_beancounter(task_bc->task_ub);
+	put_beancounter(task_bc->fork_sub);
+	/* can't be freed elsewhere, failures possible in the middle of fork */
+	if (task_bc->task_freserv != NULL)
+		kfree(task_bc->task_freserv);
+
+	task_bc->exec_ub = (struct user_beancounter *)0xdeadbcbc;
+}
+
+/*
+ * Files and file locks.
+ */
+
+int ub_file_charge(struct file *f)
+{
+	struct user_beancounter *ub;
+
+	/* No need to get_beancounter here since it's already got in slab */
+	ub = slab_ub(f);
+	if (ub == NULL)
+		return 0;
+
+	return charge_beancounter(ub, UB_NUMFILE, 1, UB_HARD);
+}
+
+void ub_file_uncharge(struct file *f)
+{
+	struct user_beancounter *ub;
+
+	/* Ub will be put in slab */
+	ub = slab_ub(f);
+	if (ub == NULL)
+		return;
+
+	uncharge_beancounter(ub, UB_NUMFILE, 1);
+}
+
+int ub_flock_charge(struct file_lock *fl, int hard)
+{
+	struct user_beancounter *ub;
+	int err;
+
+	/* No need to get_beancounter here since it's already got in slab */
+	ub = slab_ub(fl);
+	if (ub == NULL)
+		return 0;
+
+	err = charge_beancounter(ub, UB_NUMFLOCK, 1, hard ? UB_HARD : UB_SOFT);
+	if (!err)
+		fl->fl_charged = 1;
+	return err;
+}
+
+void ub_flock_uncharge(struct file_lock *fl)
+{
+	struct user_beancounter *ub;
+
+	/* Ub will be put in slab */
+	ub = slab_ub(fl);
+	if (ub == NULL || !fl->fl_charged)
+		return;
+
+	uncharge_beancounter(ub, UB_NUMFLOCK, 1);
+	fl->fl_charged = 0;
+}
+
+/*
+ * Signal handling
+ */
+
+static int do_ub_siginfo_charge(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, size, UB_HARD))
+		goto out_kmem;
+
+	if (__charge_beancounter_locked(ub, UB_NUMSIGINFO, 1, UB_HARD))
+		goto out_num;
+
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return 0;
+
+out_num:
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
+out_kmem:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return -ENOMEM;
+}
+
+static void do_ub_siginfo_uncharge(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
+	__uncharge_beancounter_locked(ub, UB_NUMSIGINFO, 1);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+int ub_siginfo_charge(struct sigqueue *sq, struct user_beancounter *ub)
+{
+	unsigned long size;
+	struct user_beancounter *p, *q;
+
+	size = CHARGE_SIZE(kmem_obj_memusage(sq));
+	for (p = ub; p != NULL; p = p->parent) {
+		if (do_ub_siginfo_charge(p, size))
+			goto unroll;
+	}
+
+	sq->sig_ub = get_beancounter(ub);
+	return 0;
+
+unroll:
+	for (q = ub; q != p; q = q->parent)
+		do_ub_siginfo_uncharge(q, size);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(ub_siginfo_charge);
+
+void ub_siginfo_uncharge(struct sigqueue *sq)
+{
+	unsigned long size;
+	struct user_beancounter *ub, *p;
+
+	p = ub = sq->sig_ub;
+	sq->sig_ub = NULL;
+	size = CHARGE_SIZE(kmem_obj_memusage(sq));
+	for (; ub != NULL; ub = ub->parent)
+		do_ub_siginfo_uncharge(ub, size);
+	put_beancounter(p);
+}
+
+/*
+ * PTYs
+ */
+
+int ub_pty_charge(struct tty_struct *tty)
+{
+	struct user_beancounter *ub;
+	int retval;
+
+	ub = slab_ub(tty);
+	retval = 0;
+	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
+			!test_bit(TTY_CHARGED, &tty->flags)) {
+		retval = charge_beancounter(ub, UB_NUMPTY, 1, UB_HARD);
+		if (!retval)
+			set_bit(TTY_CHARGED, &tty->flags);
+	}
+	return retval;
+}
+
+void ub_pty_uncharge(struct tty_struct *tty)
+{
+	struct user_beancounter *ub;
+
+	ub = slab_ub(tty);
+	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
+			test_bit(TTY_CHARGED, &tty->flags)) {
+		uncharge_beancounter(ub, UB_NUMPTY, 1);
+		clear_bit(TTY_CHARGED, &tty->flags);
+	}
+}
diff -uprN linux-2.6.16/kernel/ub/ub_net.c linux-2.6.16.ovz/kernel/ub/ub_net.c
--- linux-2.6.16/kernel/ub/ub_net.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/ub_net.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,1044 @@
+/*
+ *  linux/kernel/ub/ub_net.c
+ *
+ *  Copyright (C) 1998-2004  Andrey V. Savochkin <saw@saw.sw.com.sg>
+ *  Copyright (C) 2005 SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * TODO:
+ *   - sizeof(struct inode) charge
+ *   = tcp_mem_schedule() feedback based on ub limits
+ *   + measures so that one socket won't exhaust all send buffers,
+ *     see bug in bugzilla
+ *   = sk->socket check for NULL in snd_wakeups
+ *     (tcp_write_space checks for NULL itself)
+ *   + in tcp_close(), orphaned socket abortion should be based on ubc
+ *     resources (same in tcp_out_of_resources)
+ *     Beancounter should also have separate orphaned socket counter...
+ *   + for rcv, in-order segment should be accepted
+ *     if only barrier is exceeded
+ *   = tcp_rmem_schedule() feedback based on ub limits
+ *   - repair forward_alloc mechanism for receive buffers
+ *     It's idea is that some buffer space is pre-charged so that receive fast
+ *     path doesn't need to take spinlocks and do other heavy stuff
+ *   + tcp_prune_queue actions based on ub limits
+ *   + window adjustments depending on available buffers for receive
+ *   - window adjustments depending on available buffers for send
+ *   + race around usewreserv
+ *   + avoid allocating new page for each tiny-gram, see letter from ANK
+ *   + rename ub_sock_lock
+ *   + sk->sleep wait queue probably can be used for all wakeups, and
+ *     sk->ub_wait is unnecessary
+ *   + for UNIX sockets, the current algorithm will lead to
+ *     UB_UNIX_MINBUF-sized messages only for non-blocking case
+ *   - charge for af_packet sockets
+ *   + all datagram sockets should be charged to NUMUNIXSOCK
+ *   - we do not charge for skb copies and clones staying in device queues
+ *   + live-lock if number of sockets is big and buffer limits are small
+ *     [diff-ubc-dbllim3]
+ *   - check that multiple readers/writers on the same socket won't cause fatal
+ *     consequences
+ *   - check allocation/charge orders
+ *   + There is potential problem with callback_lock.  In *snd_wakeup we take
+ *     beancounter first, in sock_def_error_report - callback_lock first.
+ *     then beancounter.  This is not a problem if callback_lock taken
+ *     readonly, but anyway...
+ *   - SKB_CHARGE_SIZE doesn't include the space wasted by slab allocator
+ * General kernel problems:
+ *   - in tcp_sendmsg(), if allocation fails, non-blocking sockets with ASYNC
+ *     notification won't get signals
+ *   - datagram_poll looks racy
+ *
+ */
+
+#include <linux/net.h>
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#include <net/sock.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_net.h>
+#include <ub/ub_debug.h>
+
+
+/* Skb truesize definition. Bad place. Den */
+
+static inline int skb_chargesize_head(struct sk_buff *skb)
+{
+	return skb_charge_size(skb->end - skb->head +
+				sizeof(struct skb_shared_info));
+}
+
+int skb_charge_fullsize(struct sk_buff *skb)
+{
+	int chargesize;
+	struct sk_buff *skbfrag;
+
+	chargesize = skb_chargesize_head(skb) +
+		PAGE_SIZE * skb_shinfo(skb)->nr_frags;
+	if (likely(skb_shinfo(skb)->frag_list == NULL))
+		return chargesize;
+	for (skbfrag = skb_shinfo(skb)->frag_list;
+	     skbfrag != NULL;
+	     skbfrag = skbfrag->next) {
+		chargesize += skb_charge_fullsize(skbfrag);
+	}
+	return chargesize;
+}
+EXPORT_SYMBOL(skb_charge_fullsize);
+
+static int ub_sock_makewreserv_locked(struct sock *sk, 
+		int bufid, int sockid, unsigned long size);
+
+int __ub_too_many_orphans(struct sock *sk, int count)
+{
+	struct user_beancounter *ub;
+
+	if (sock_has_ubc(sk)) {
+		for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
+		if (count >= ub->ub_parms[UB_NUMTCPSOCK].barrier >> 2)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Queueing
+ */
+
+static void ub_sock_snd_wakeup(struct user_beancounter *ub)
+{
+	struct list_head *p;
+	struct sock_beancounter *skbc;
+	struct sock *sk;
+	struct user_beancounter *cub;
+	unsigned long added;
+
+	while (!list_empty(&ub->ub_other_sk_list)) {
+		p = ub->ub_other_sk_list.next;
+		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
+		sk = skbc_sock(skbc);
+		ub_debug(UBD_NET_SLEEP, "Found sock to wake up\n");
+		added = -skbc->poll_reserv;
+		if (ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF,
+					UB_NUMOTHERSOCK, skbc->ub_waitspc))
+			break;
+		added += skbc->poll_reserv;
+
+		/*
+		 * See comments in ub_tcp_snd_wakeup.
+		 * Locking note: both unix_write_space and
+		 * sock_def_write_space take callback_lock themselves.
+		 * We take it here just to be on the safe side and to
+		 * act the same way as ub_tcp_snd_wakeup does.
+		 */
+		sk->sk_write_space(sk);
+
+		list_del_init(&skbc->ub_sock_list);
+
+		if (skbc->ub != ub && added) {
+			cub = get_beancounter(skbc->ub);
+			spin_unlock(&ub->ub_lock);
+			charge_beancounter_notop(cub, UB_OTHERSOCKBUF, added);
+			put_beancounter(cub);
+			spin_lock(&ub->ub_lock);
+		}
+	}
+}
+
+static void ub_tcp_snd_wakeup(struct user_beancounter *ub)
+{
+	struct list_head *p;
+	struct sock *sk;
+	struct sock_beancounter *skbc;
+	struct socket *sock;
+	struct user_beancounter *cub;
+	unsigned long added;
+
+	while (!list_empty(&ub->ub_tcp_sk_list)) {
+		p = ub->ub_tcp_sk_list.next;
+		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
+		sk = skbc_sock(skbc);
+
+		added = 0;
+		sock = sk->sk_socket;
+		if (sock == NULL)
+			/* sk being destroyed */
+			goto cont;
+
+		ub_debug(UBD_NET_SLEEP, 
+				"Checking queue, waiting %lu, reserv %lu\n",
+				skbc->ub_waitspc, skbc->poll_reserv);
+		added = -skbc->poll_reserv;
+		if (ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF,
+					UB_NUMTCPSOCK, skbc->ub_waitspc))
+			break;
+		added += skbc->poll_reserv;
+
+		/*
+		 * Send async notifications and wake up.
+		 * Locking note: we get callback_lock here because
+		 * tcp_write_space is over-optimistic about calling context
+		 * (socket lock is presumed).  So we get the lock here although
+		 * it belongs to the callback.
+		 */
+		sk->sk_write_space(sk);
+
+cont:
+		list_del_init(&skbc->ub_sock_list);
+
+		if (skbc->ub != ub && added) {
+			cub = get_beancounter(skbc->ub);
+			spin_unlock(&ub->ub_lock);
+			charge_beancounter_notop(cub, UB_TCPSNDBUF, added);
+			put_beancounter(cub);
+			spin_lock(&ub->ub_lock);
+		}
+	}
+}
+
+void ub_sock_snd_queue_add(struct sock *sk, int res, unsigned long size)
+{
+	unsigned long flags;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long added_reserv;
+
+	if (!sock_has_ubc(sk))
+		return;
+
+	skbc = sock_bc(sk);
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub_debug(UBD_NET_SLEEP, "attempt to charge for %lu\n", size);
+	added_reserv = -skbc->poll_reserv;
+	if (!ub_sock_makewreserv_locked(sk, res, bid2sid(res), size)) {
+		/*
+		 * It looks a bit hackish, but it is compatible with both
+		 * wait_for_xx_ubspace and poll.
+		 * This __set_current_state is equivalent to a wakeup event
+		 * right after spin_unlock_irqrestore.
+		 */
+		__set_current_state(TASK_RUNNING);
+		added_reserv += skbc->poll_reserv;
+		spin_unlock_irqrestore(&ub->ub_lock, flags);
+		if (added_reserv)
+			charge_beancounter_notop(skbc->ub, res, added_reserv);
+		return;
+	}
+
+	ub_debug(UBD_NET_SLEEP, "Adding sk to queue\n");
+	skbc->ub_waitspc = size;
+	if (!list_empty(&skbc->ub_sock_list)) {
+		ub_debug(UBD_NET_SOCKET, 
+				"re-adding socket to beancounter %p.\n", ub);
+		goto out;
+	}
+
+	switch (res) {
+		case UB_TCPSNDBUF:
+			list_add_tail(&skbc->ub_sock_list, 
+					&ub->ub_tcp_sk_list);
+			break;
+		case UB_OTHERSOCKBUF:
+			list_add_tail(&skbc->ub_sock_list, 
+					&ub->ub_other_sk_list);
+			break;
+		default:
+			BUG();
+	}
+out:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+
+/*
+ * Helpers
+ */
+
+void ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
+		       unsigned long size, int resource)
+{
+	if (!sock_has_ubc(sk))
+		return;
+
+	if (sock_bc(sk)->ub == NULL)
+		BUG();
+	skb_bc(skb)->ub = sock_bc(sk)->ub;
+	skb_bc(skb)->charged = size;
+	skb_bc(skb)->resource = resource;
+
+	/* Ugly. Ugly. Skb in sk writequeue can live without ref to sk */
+	if (skb->sk == NULL)
+		skb->sk = sk;
+}
+
+static inline void ub_skb_set_uncharge(struct sk_buff *skb)
+{
+	skb_bc(skb)->ub = NULL;
+	skb_bc(skb)->charged = 0;
+	skb_bc(skb)->resource = 0;
+}
+
+static inline void __uncharge_sockbuf(struct sock_beancounter *skbc,
+		struct user_beancounter *ub, int resource, unsigned long size)
+{
+	if (ub != NULL)
+		__uncharge_beancounter_locked(ub, resource, size);
+
+	if (skbc != NULL) {
+		if (skbc->ub_wcharged > size)
+			skbc->ub_wcharged -= size;
+		else
+			skbc->ub_wcharged = 0;
+	}
+}
+
+static void ub_update_rmem_thres(struct sock_beancounter *skub)
+{
+	struct user_beancounter *ub;
+
+	if (skub && skub->ub) {
+		for (ub = skub->ub; ub->parent != NULL; ub = ub->parent);
+		ub->ub_rmem_thres = ub->ub_parms[UB_TCPRCVBUF].barrier /
+			(ub->ub_parms[UB_NUMTCPSOCK].held + 1);
+	}
+}
+inline int ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask)
+{
+	memset(skb_bc(skb), 0, sizeof(struct skb_beancounter));
+	return 0;
+}
+
+inline void ub_skb_free_bc(struct sk_buff *skb)
+{
+}
+
+
+/*
+ * Charge socket number
+ */
+
+static inline int sk_alloc_beancounter(struct sock *sk)
+{
+	struct sock_beancounter *skbc;
+
+	skbc = sock_bc(sk);
+	memset(skbc, 0, sizeof(struct sock_beancounter));
+	return 0;
+}
+
+static inline void sk_free_beancounter(struct sock *sk)
+{
+}
+
+static int __sock_charge(struct sock *sk, int res)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+
+	ub = get_exec_ub();
+	if (ub == NULL)
+		return 0;
+	if (sk_alloc_beancounter(sk) < 0)
+		return -ENOMEM;
+
+	skbc = sock_bc(sk);
+	INIT_LIST_HEAD(&skbc->ub_sock_list);
+
+	if (charge_beancounter(ub, res, 1, UB_HARD) < 0)
+		goto out_limit;
+
+	/* TCP listen sock or process keeps referrence to UB */
+	skbc->ub = get_beancounter(ub);
+	return 0;
+
+out_limit:
+	sk_free_beancounter(sk);
+	return -ENOMEM;
+}
+
+int ub_tcp_sock_charge(struct sock *sk)
+{
+	int ret;
+
+	ret = __sock_charge(sk, UB_NUMTCPSOCK);
+	ub_update_rmem_thres(sock_bc(sk));
+
+	return ret;
+}
+
+int ub_other_sock_charge(struct sock *sk)
+{
+	return __sock_charge(sk, UB_NUMOTHERSOCK);
+}
+
+EXPORT_SYMBOL(ub_other_sock_charge);
+
+int ub_sock_charge(struct sock *sk, int family, int type)
+{
+	return (IS_TCP_SOCK(family, type) ? 
+			ub_tcp_sock_charge(sk) : ub_other_sock_charge(sk));
+}
+EXPORT_SYMBOL(ub_sock_charge);
+
+/*
+ * Uncharge socket number
+ */
+
+void ub_sock_uncharge(struct sock *sk)
+{
+	int is_tcp_sock;
+	unsigned long flags;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long reserv;
+
+	if (!sock_has_ubc(sk))
+		return;
+
+	is_tcp_sock = IS_TCP_SOCK(sk->sk_family, sk->sk_type);
+	skbc = sock_bc(sk);
+	ub_debug(UBD_NET_SOCKET, "Calling ub_sock_uncharge on %p\n", sk);
+
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (!list_empty(&skbc->ub_sock_list)) {
+		ub_debug(UBD_NET_SOCKET, 
+			 "ub_sock_uncharge: removing from ub(%p) queue.\n",
+			 skbc);
+		list_del_init(&skbc->ub_sock_list);
+	}
+
+	reserv = skbc->poll_reserv;
+	__uncharge_beancounter_locked(ub,
+			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
+			reserv);
+	__uncharge_beancounter_locked(ub,
+			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
+
+	/* The check sk->sk_family != PF_NETLINK is made as the skb is
+	 * queued to the kernel end of socket while changed to the user one.
+	 * Den */
+	if (skbc->ub_wcharged > reserv &&
+	    sk->sk_family != PF_NETLINK) {
+		skbc->ub_wcharged -= reserv;
+		printk(KERN_WARNING
+		       "ub_sock_uncharge: wch=%lu for ub %p (%d).\n",
+		       skbc->ub_wcharged, skbc->ub, skbc->ub->ub_uid);
+	} else
+		skbc->ub_wcharged = 0;
+	skbc->poll_reserv = 0;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(skbc->ub,
+			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
+			reserv);
+	uncharge_beancounter_notop(skbc->ub,
+			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
+
+	put_beancounter(skbc->ub);
+	sk_free_beancounter(sk);
+}
+
+/*
+ * Send - receive buffers
+ */
+
+/* Special case for netlink_dump - (un)charges precalculated size */
+int ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk)
+{
+	int ret;
+	unsigned long chargesize;
+
+	if (!sock_has_ubc(sk))
+		return 0;
+
+	chargesize = skb_charge_fullsize(skb);
+	ret = charge_beancounter(sock_bc(sk)->ub,
+			UB_DGRAMRCVBUF, chargesize, UB_HARD);
+	if (ret < 0)
+		return ret;
+	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
+	return ret;
+}
+
+/*
+ * Poll reserv accounting
+ */
+static int ub_sock_makewreserv_locked(struct sock *sk, 
+		int bufid, int sockid, unsigned long size)
+{
+	unsigned long wcharge_added;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+
+	if (!sock_has_ubc(sk))
+		goto out;
+
+	skbc = sock_bc(sk);
+	if (skbc->poll_reserv >= size) /* no work to be done */
+		goto out;
+
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	ub->ub_parms[bufid].held += size - skbc->poll_reserv;
+
+	wcharge_added = 0;
+	/*
+	 * Logic:
+	 *  1) when used memory hits barrier, we set wmem_pressure;
+	 *     wmem_pressure is reset under barrier/2;
+	 *     between barrier/2 and barrier we limit per-socket buffer growth;
+	 *  2) each socket is guaranteed to get (limit-barrier)/maxsockets
+	 *     calculated on the base of memory eaten after the barrier is hit
+	 */
+	skbc = sock_bc(sk);
+	if (!ub_hfbarrier_hit(ub, bufid)) {
+		if (ub->ub_wmem_pressure)
+			ub_debug(UBD_NET_SEND, "makewres: pressure -> 0 "
+				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+				sk, size, skbc->poll_reserv,
+				ub->ub_parms[bufid].held,
+				skbc->ub_wcharged, sk->sk_sndbuf);
+		ub->ub_wmem_pressure = 0;
+	}
+	if (ub_barrier_hit(ub, bufid)) {
+		if (!ub->ub_wmem_pressure)
+			ub_debug(UBD_NET_SEND, "makewres: pressure -> 1 "
+				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+				sk, size, skbc->poll_reserv,
+				ub->ub_parms[bufid].held,
+				skbc->ub_wcharged, sk->sk_sndbuf);
+		ub->ub_wmem_pressure = 1;
+		wcharge_added = size - skbc->poll_reserv;
+		skbc->ub_wcharged += wcharge_added;
+		if (skbc->ub_wcharged * ub->ub_parms[sockid].limit +
+				ub->ub_parms[bufid].barrier >
+					ub->ub_parms[bufid].limit)
+			goto unroll;
+	}
+	if (ub->ub_parms[bufid].held > ub->ub_parms[bufid].limit)
+		goto unroll;
+
+	ub_adjust_maxheld(ub, bufid);
+	skbc->poll_reserv = size;
+out:
+	return 0;
+
+unroll:
+	ub_debug(UBD_NET_SEND, 
+			"makewres: deny "
+			"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+			sk, size, skbc->poll_reserv, ub->ub_parms[bufid].held,
+			skbc->ub_wcharged, sk->sk_sndbuf);
+	skbc->ub_wcharged -= wcharge_added;
+	ub->ub_parms[bufid].failcnt++;
+	ub->ub_parms[bufid].held -= size - skbc->poll_reserv;
+	return -ENOMEM;
+}
+
+int ub_sock_make_wreserv(struct sock *sk, int bufid, unsigned long size)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long flags;
+	unsigned long added_reserv;
+	int err;
+
+	skbc = sock_bc(sk);
+
+	/*
+	 * This function provides that there is sufficient reserve upon return
+	 * only if sk has only one user.  We can check poll_reserv without
+	 * serialization and avoid locking if the reserve already exists.
+	 */
+	if (!sock_has_ubc(sk) || skbc->poll_reserv >= size)
+		return 0;
+
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	added_reserv = -skbc->poll_reserv;
+	err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
+	added_reserv += skbc->poll_reserv;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (added_reserv)
+		charge_beancounter_notop(skbc->ub, bufid, added_reserv);
+
+	return err;
+}
+
+int ub_sock_get_wreserv(struct sock *sk, int bufid, unsigned long size)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long flags;
+	unsigned long added_reserv;
+	int err;
+
+	if (!sock_has_ubc(sk))
+		return 0;
+
+	skbc = sock_bc(sk);
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	added_reserv = -skbc->poll_reserv;
+	err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
+	added_reserv += skbc->poll_reserv;
+	if (!err)
+		skbc->poll_reserv -= size;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (added_reserv)
+		charge_beancounter_notop(skbc->ub, bufid, added_reserv);
+
+	return err;
+}
+
+void ub_sock_ret_wreserv(struct sock *sk, int bufid, 
+		unsigned long size, unsigned long ressize)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long extra;
+	unsigned long flags;
+	
+	if (!sock_has_ubc(sk))
+		return;
+
+	extra = 0;
+	skbc = sock_bc(sk);
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	skbc->poll_reserv += size;
+	if (skbc->poll_reserv > ressize) {
+		extra = skbc->poll_reserv - ressize;
+		__uncharge_beancounter_locked(ub, bufid, extra);
+
+		if (skbc->ub_wcharged > skbc->poll_reserv - ressize)
+			skbc->ub_wcharged -= skbc->poll_reserv - ressize;
+		else
+			skbc->ub_wcharged = 0;
+		skbc->poll_reserv = ressize;
+	}
+
+	ub_tcp_snd_wakeup(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (extra)
+		uncharge_beancounter_notop(skbc->ub, bufid, extra);
+}
+
+long ub_sock_wait_for_space(struct sock *sk, long timeo, unsigned long size)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(sk->sk_sleep, &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size))
+			break;
+
+		if (sk->sk_shutdown & SEND_SHUTDOWN)
+			break;
+		if (sk->sk_err)
+			break;
+		ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, size);
+		timeo = schedule_timeout(timeo);
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return timeo;
+}
+
+int ub_sock_makewres_other(struct sock *sk, unsigned long size)
+{
+	return ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size);
+}
+
+int ub_sock_makewres_tcp(struct sock *sk, unsigned long size)
+{
+	return ub_sock_make_wreserv(sk, UB_TCPSNDBUF, size);
+}
+
+int ub_sock_getwres_other(struct sock *sk, unsigned long size)
+{
+	return ub_sock_get_wreserv(sk, UB_OTHERSOCKBUF, size);
+}
+
+int ub_sock_getwres_tcp(struct sock *sk, unsigned long size)
+{
+	return ub_sock_get_wreserv(sk, UB_TCPSNDBUF, size);
+}
+
+void ub_sock_retwres_other(struct sock *sk, unsigned long size, 
+		unsigned long ressize)
+{
+	ub_sock_ret_wreserv(sk, UB_OTHERSOCKBUF, size, ressize);
+}
+
+void ub_sock_retwres_tcp(struct sock *sk, unsigned long size, 
+		unsigned long ressize)
+{
+	ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, ressize);
+}
+
+void ub_sock_sndqueueadd_other(struct sock *sk, unsigned long sz)
+{
+	ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, sz);
+}
+
+void ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz)
+{
+	ub_sock_snd_queue_add(sk, UB_TCPSNDBUF, sz);
+}
+
+void ub_sock_sndqueuedel(struct sock *sk)
+{
+	struct sock_beancounter *skbc;
+	unsigned long flags;
+
+	if (!sock_has_ubc(sk))
+		return;
+	skbc = sock_bc(sk);
+
+	/* race with write_space callback of other socket */
+	spin_lock_irqsave(&skbc->ub->ub_lock, flags);
+	list_del_init(&skbc->ub_sock_list);
+	spin_unlock_irqrestore(&skbc->ub->ub_lock, flags);
+}
+
+/*
+ * UB_DGRAMRCVBUF
+ */
+
+int ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb)
+{
+	unsigned long chargesize;
+
+	if (!sock_has_ubc(sk))
+		return 0;
+
+	chargesize = skb_charge_fullsize(skb);
+	if (charge_beancounter(sock_bc(sk)->ub, UB_DGRAMRCVBUF, 
+				 chargesize, UB_HARD))
+		return -ENOMEM;
+
+	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
+	return 0;
+}
+
+EXPORT_SYMBOL(ub_sockrcvbuf_charge);
+
+static void ub_sockrcvbuf_uncharge(struct sk_buff *skb)
+{
+	uncharge_beancounter(skb_bc(skb)->ub, UB_DGRAMRCVBUF,
+			     skb_bc(skb)->charged);
+	ub_skb_set_uncharge(skb);
+}
+
+/*
+ * UB_TCPRCVBUF
+ */
+static int charge_tcprcvbuf(struct sock *sk, struct sk_buff *skb,
+			    enum severity strict)
+{
+	int retval;
+	unsigned long flags;
+	struct user_beancounter *ub;
+	unsigned long chargesize;
+
+	if (!sock_has_ubc(sk))
+		return 0;
+
+	/*
+	 * Memory pressure reactions:
+	 *  1) set UB_RMEM_KEEP (clearing UB_RMEM_EXPAND)
+	 *  2) set UB_RMEM_SHRINK and tcp_clamp_window()
+	 *     tcp_collapse_queues() if rmem_alloc > rcvbuf
+	 *  3) drop OFO, tcp_purge_ofo()
+	 *  4) drop all.
+	 * Currently, we do #2 and #3 at once (which means that current
+	 * collapsing of OFO queue in tcp_collapse_queues() is a waste of time,
+	 * for example...)
+	 * On memory pressure we jump from #0 to #3, and when the pressure
+	 * subsides, to #1.
+	 */
+	retval = 0;
+	chargesize = skb_charge_fullsize(skb);
+
+	for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_parms[UB_TCPRCVBUF].held += chargesize;
+	if (ub->ub_parms[UB_TCPRCVBUF].held >
+			ub->ub_parms[UB_TCPRCVBUF].barrier &&
+			strict != UB_FORCE)
+		goto excess;
+	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+out:
+	if (retval == 0) {
+		charge_beancounter_notop(sock_bc(sk)->ub, UB_TCPRCVBUF,
+				chargesize);
+		ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
+	}
+	return retval;
+
+excess:
+	ub->ub_rmem_pressure = UB_RMEM_SHRINK;
+	if (strict == UB_HARD)
+		retval = -ENOMEM;
+	if (ub->ub_parms[UB_TCPRCVBUF].held > ub->ub_parms[UB_TCPRCVBUF].limit)
+		retval = -ENOMEM;
+	/*
+	 * We try to leave numsock*maxadvmss as a reserve for sockets not
+	 * queueing any data yet (if the difference between the barrier and the
+	 * limit is enough for this reserve).
+	 */
+	if (ub->ub_parms[UB_TCPRCVBUF].held +
+			ub->ub_parms[UB_NUMTCPSOCK].limit * ub->ub_maxadvmss
+			> ub->ub_parms[UB_TCPRCVBUF].limit &&
+			atomic_read(&sk->sk_rmem_alloc))
+		retval = -ENOMEM;
+	if (retval) {
+		ub->ub_parms[UB_TCPRCVBUF].held -= chargesize;
+		ub->ub_parms[UB_TCPRCVBUF].failcnt++;
+	}
+	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	goto out;
+}
+
+int ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb)
+{
+	return charge_tcprcvbuf(sk, skb, UB_HARD);
+}
+
+int ub_tcprcvbuf_charge_forced(struct sock *sk, struct sk_buff *skb)
+{
+	return charge_tcprcvbuf(sk, skb, UB_FORCE);
+}
+EXPORT_SYMBOL(ub_tcprcvbuf_charge_forced);
+
+static void ub_tcprcvbuf_uncharge(struct sk_buff *skb)
+{
+	unsigned long flags;
+	unsigned long held, bar;
+	int prev_pres;
+	struct user_beancounter *ub;
+
+	for (ub = skb_bc(skb)->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (ub->ub_parms[UB_TCPRCVBUF].held < skb_bc(skb)->charged) {
+		printk(KERN_ERR "Uncharging %d for tcprcvbuf of %p with %lu\n",
+				skb_bc(skb)->charged,
+				ub, ub->ub_parms[UB_TCPRCVBUF].held);
+		/* ass-saving bung */
+		skb_bc(skb)->charged = ub->ub_parms[UB_TCPRCVBUF].held;
+	}
+	ub->ub_parms[UB_TCPRCVBUF].held -= skb_bc(skb)->charged;
+	held = ub->ub_parms[UB_TCPRCVBUF].held;
+	bar = ub->ub_parms[UB_TCPRCVBUF].barrier;
+	prev_pres = ub->ub_rmem_pressure;
+	if (held <= bar - (bar >> 2))
+		ub->ub_rmem_pressure = UB_RMEM_EXPAND;
+	else if (held <= bar)
+		ub->ub_rmem_pressure = UB_RMEM_KEEP;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(skb_bc(skb)->ub, UB_TCPRCVBUF,
+			skb_bc(skb)->charged);
+	ub_skb_set_uncharge(skb);
+}
+
+
+/*
+ * UB_OTHERSOCKBUF
+ */
+
+static void ub_socksndbuf_uncharge(struct sk_buff *skb)
+{
+	unsigned long flags;
+	struct user_beancounter *ub, *cub;
+	struct sock_beancounter *sk_bc;
+
+	/* resource was set. no check for ub required */
+	cub = skb_bc(skb)->ub;
+	for (ub = cub; ub->parent != NULL; ub = ub->parent);
+	skb_bc(skb)->ub = NULL;
+	if (skb->sk != NULL)
+		sk_bc = sock_bc(skb->sk);
+	else
+		sk_bc = NULL;
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_sockbuf(sk_bc, ub, UB_OTHERSOCKBUF,
+			   skb_bc(skb)->charged);
+	ub_sock_snd_wakeup(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(cub, UB_OTHERSOCKBUF, skb_bc(skb)->charged);
+	ub_skb_set_uncharge(skb);
+}
+
+static void ub_tcpsndbuf_uncharge(struct sk_buff *skb)
+{
+	unsigned long flags;
+	struct user_beancounter *ub, *cub;
+
+	/* resource can be not set, called manually */
+	cub = skb_bc(skb)->ub;
+	if (cub == NULL)
+		return;
+	for (ub = cub; ub->parent != NULL; ub = ub->parent);
+	skb_bc(skb)->ub = NULL;
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_sockbuf(sock_bc(skb->sk), ub, UB_TCPSNDBUF,
+			   skb_bc(skb)->charged);
+	ub_tcp_snd_wakeup(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(cub, UB_TCPSNDBUF, skb_bc(skb)->charged);
+	ub_skb_set_uncharge(skb);
+}
+
+void ub_skb_uncharge(struct sk_buff *skb)
+{
+	switch (skb_bc(skb)->resource) {
+		case UB_TCPSNDBUF:
+			ub_tcpsndbuf_uncharge(skb);
+			break;
+		case UB_TCPRCVBUF:
+			ub_tcprcvbuf_uncharge(skb);
+			break;
+		case UB_DGRAMRCVBUF:
+			ub_sockrcvbuf_uncharge(skb);
+			break;
+		case UB_OTHERSOCKBUF:
+			ub_socksndbuf_uncharge(skb);
+			break;
+	}
+}
+
+EXPORT_SYMBOL(ub_skb_uncharge);	/* due to skb_orphan()/conntracks */
+
+/*
+ * TCP send buffers accouting. Paged part
+ */
+int ub_sock_tcp_chargepage(struct sock *sk)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long added;
+	unsigned long flags;
+	int err;
+
+	if (!sock_has_ubc(sk))
+		return 0;
+
+	skbc = sock_bc(sk);
+
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	/* Try to charge full page */
+	err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
+					 PAGE_SIZE);
+	if (err == 0) {
+		skbc->poll_reserv -= PAGE_SIZE;
+		spin_unlock_irqrestore(&ub->ub_lock, flags);
+		charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, PAGE_SIZE);
+		return 0;
+	}
+
+	/* Try to charge page enough to satisfy sys_select. The possible
+	   overdraft for the rest of the page is generally better then
+	   requesting full page in tcp_poll. This should not happen
+	   frequently. Den */
+	added = -skbc->poll_reserv;
+	err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
+					 SOCK_MIN_UBCSPACE);
+	if (err < 0) {
+		spin_unlock_irqrestore(&ub->ub_lock, flags);
+		return err;
+	}
+	__charge_beancounter_locked(ub, UB_TCPSNDBUF,
+				    PAGE_SIZE - skbc->poll_reserv,
+				    UB_FORCE);
+	added += PAGE_SIZE;
+	skbc->poll_reserv = 0;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, added);
+
+	return 0;
+	 
+}
+
+void ub_sock_tcp_detachpage(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	if (!sock_has_ubc(sk))
+		return;
+
+	/* The page is just detached from socket. The last skb in queue
+	   with paged part holds referrence to it */
+	skb = skb_peek_tail(&sk->sk_write_queue);
+	if (skb == NULL) {
+	   	/* If the queue is empty - all data is sent and page is about
+		   to be freed */
+		uncharge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, PAGE_SIZE);
+		return;
+	}
+	/* Last skb is a good aproximation for a last skb with paged part */
+	skb_bc(skb)->charged += PAGE_SIZE;
+}
+
+static int charge_tcpsndbuf(struct sock *sk, struct sk_buff *skb,
+			    enum severity strict)
+{
+	int ret;
+	unsigned long chargesize;
+
+	if (!sock_has_ubc(sk))
+		return 0;
+
+	chargesize = skb_charge_fullsize(skb);
+	ret = charge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, chargesize,
+				 strict);
+	if (ret < 0)
+		return ret;
+	ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+	sock_bc(sk)->ub_wcharged += chargesize;
+	return ret;
+}
+
+int ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb)
+{
+	return charge_tcpsndbuf(sk, skb, UB_HARD);
+}
+
+int ub_tcpsndbuf_charge_forced(struct sock *sk,	struct sk_buff *skb)
+{
+	return charge_tcpsndbuf(sk, skb, UB_FORCE);
+}
+EXPORT_SYMBOL(ub_tcpsndbuf_charge_forced);
+
+/*
+ * Initialization staff
+ */
+int __init skbc_cache_init(void)
+{
+	return 0;
+}
diff -uprN linux-2.6.16/kernel/ub/ub_page_bc.c linux-2.6.16.ovz/kernel/ub/ub_page_bc.c
--- linux-2.6.16/kernel/ub/ub_page_bc.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/ub_page_bc.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,439 @@
+/*
+ *  kernel/ub/ub_page_bc.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_vmpages.h>
+#include <ub/ub_page.h>
+
+static kmem_cache_t *pb_cachep;
+static spinlock_t pb_lock = SPIN_LOCK_UNLOCKED;
+static struct page_beancounter **pb_hash_table;
+static unsigned int pb_hash_mask;
+
+/*
+ * Auxiliary staff
+ */
+
+static inline struct page_beancounter *next_page_pb(struct page_beancounter *p)
+{
+	return list_entry(p->page_list.next, struct page_beancounter,
+			page_list);
+}
+
+static inline struct page_beancounter *prev_page_pb(struct page_beancounter *p)
+{
+	return list_entry(p->page_list.prev, struct page_beancounter,
+			page_list);
+}
+
+/*
+ * Held pages manipulation
+ */
+static inline void set_held_pages(struct user_beancounter *bc)
+{
+	/* all three depend on ub_held_pages */
+	__ub_update_physpages(bc);
+	__ub_update_oomguarpages(bc);
+	__ub_update_privvm(bc);
+}
+
+static inline void do_dec_held_pages(struct user_beancounter *ub, int value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_held_pages -= value;
+	set_held_pages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static void dec_held_pages(struct user_beancounter *ub, int value)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_dec_held_pages(ub, value);
+}
+
+static inline void do_inc_held_pages(struct user_beancounter *ub, int value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_held_pages += value;
+	set_held_pages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static void inc_held_pages(struct user_beancounter *ub, int value)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_inc_held_pages(ub, value);
+}
+
+/*
+ * Alloc - free
+ */
+
+inline int pb_alloc(struct page_beancounter **pbc)
+{
+	*pbc = kmem_cache_alloc(pb_cachep, GFP_KERNEL);
+	if (*pbc != NULL) {
+		(*pbc)->next_hash = NULL;
+		(*pbc)->pb_magic = PB_MAGIC;
+	}
+	return (*pbc == NULL);
+}
+
+inline void pb_free(struct page_beancounter **pb)
+{
+	if (*pb != NULL) {
+		kmem_cache_free(pb_cachep, *pb);
+		*pb = NULL;
+	}
+}
+
+void pb_free_list(struct page_beancounter **p_pb)
+{
+	struct page_beancounter *list, *pb;
+	
+	list = *p_pb;
+	if (list == PBC_COPY_SAME)
+		return;
+
+	while (list) {
+		pb = list;
+		list = list->next_hash;
+		pb_free(&pb);
+	}
+	*p_pb = NULL;
+}
+
+/*
+ * head -> <new objs> -> <old objs> -> ...
+ */
+static int __alloc_list(struct page_beancounter **head, int num)
+{
+	struct page_beancounter *pb;
+
+	while (num > 0) {
+		if (pb_alloc(&pb))
+			return -1;
+		pb->next_hash = *head;
+		*head = pb;
+		num--;
+	}
+
+	return num;
+}
+
+/* 
+ * Ensure that the list contains at least num elements.
+ * p_pb points to an initialized list, may be of the zero length. 
+ *
+ * mm->page_table_lock should be held
+ */
+int pb_alloc_list(struct page_beancounter **p_pb, int num)
+{
+	struct page_beancounter *list;
+
+	for (list = *p_pb; list != NULL && num; list = list->next_hash, num--);
+	if (!num)
+		return 0;
+
+	/*
+	 *  *p_pb(after)       *p_pb (before)
+	 *     \                  \
+	 *     <new objs> -...-> <old objs> -> ...
+	 */
+	if (__alloc_list(p_pb, num) < 0)
+		goto nomem;
+	return 0;
+
+nomem:
+	pb_free_list(p_pb);
+	return -ENOMEM;
+}
+
+/*
+ * Allocates a page_beancounter for each
+ * user_beancounter in a hash
+ */
+int pb_alloc_all(struct page_beancounter **pbs)
+{
+	int i, need_alloc;
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	need_alloc = 0;
+	for_each_beancounter(i, ub)
+		need_alloc++;
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	if (!__alloc_list(pbs, need_alloc))
+		return 0;
+
+	pb_free_list(pbs);
+	return -ENOMEM;
+}
+
+/*
+ * Hash routines
+ */
+
+static inline int pb_hash(struct user_beancounter *ub, struct page *page)
+{
+	return (page_to_pfn(page) + (ub->ub_uid << 10)) & pb_hash_mask;
+}
+
+/* pb_lock should be held */
+static inline void insert_pb(struct page_beancounter *p, struct page *page,
+		struct user_beancounter *ub, int hash)
+{
+	p->page = page;
+	p->ub = get_beancounter(ub);
+	p->next_hash = pb_hash_table[hash];
+	pb_hash_table[hash] = p;
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	ub->ub_stat[smp_processor_id()].pbcs++;
+#endif
+}
+
+/*
+ * Heart
+ */
+
+static int __pb_dup_ref(struct page *page, struct user_beancounter *bc,
+		int hash)
+{
+	struct page_beancounter *p;
+
+	for (p = pb_hash_table[hash];
+			p != NULL && (p->page != page || p->ub != bc);
+			p = p->next_hash);
+	if (p == NULL)
+		return -1;
+
+	PB_COUNT_INC(p->refcount);
+	return 0;
+}
+
+static void __pb_add_ref(struct page *page, struct user_beancounter *bc,
+		struct page_beancounter **ppb, int hash)
+{
+	struct page_beancounter *head, *p;
+	int shift;
+
+	p = *ppb;
+	*ppb = p->next_hash;
+
+	insert_pb(p, page, bc, hash);
+	head = page_pbc(page);
+
+	if (head != NULL) {
+		/* 
+		 * Move the first element to the end of the list.
+		 * List head (pb_head) is set to the next entry.
+		 * Note that this code works even if head is the only element
+		 * on the list (because it's cyclic). 
+		 */
+		BUG_ON(head->pb_magic != PB_MAGIC);
+		page_pbc(page) = next_page_pb(head);
+		PB_SHIFT_INC(head->refcount);
+		shift = PB_SHIFT_GET(head->refcount);
+		/* 
+		 * Update user beancounter, the share of head has been changed.
+		 * Note that the shift counter is taken after increment. 
+		 */
+		dec_held_pages(head->ub, UB_PAGE_WEIGHT >> shift);
+		/* add the new page beancounter to the end of the list */
+		list_add_tail(&p->page_list, &page_pbc(page)->page_list);
+	} else {
+		page_pbc(page) = p;
+		shift = 0;
+		INIT_LIST_HEAD(&p->page_list);
+	}
+
+	p->refcount = PB_REFCOUNT_MAKE(shift, 1);
+	/* update user beancounter for the new page beancounter */
+	inc_held_pages(bc, UB_PAGE_WEIGHT >> shift);
+}
+
+void pb_add_ref(struct page *page, struct mm_struct *mm,
+		struct page_beancounter **p_pb)
+{
+	int hash;
+	struct user_beancounter *bc;
+
+	bc = mm->mm_ub;
+	if (bc == NULL)
+		return;
+
+	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
+		return;
+
+	hash = pb_hash(bc, page);
+
+	spin_lock(&pb_lock);
+	if (__pb_dup_ref(page, bc, hash))
+		__pb_add_ref(page, bc, p_pb, hash);
+	spin_unlock(&pb_lock);
+}
+
+void pb_dup_ref(struct page *page, struct mm_struct *mm,
+		struct page_beancounter **p_pb)
+{
+	int hash;
+	struct user_beancounter *bc;
+
+	bc = mm->mm_ub;
+	if (bc == NULL)
+		return;
+
+	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
+		return;
+
+	hash = pb_hash(bc, page);
+
+	spin_lock(&pb_lock);
+	if (page_pbc(page) == NULL)
+		/*
+		 * pages like ZERO_PAGE must not be accounted in pbc
+		 * so on fork we just skip them
+		 */
+		goto out_unlock;
+
+	if (unlikely(*p_pb != PBC_COPY_SAME))
+		__pb_add_ref(page, bc, p_pb, hash);
+	else if (unlikely(__pb_dup_ref(page, bc, hash)))
+		WARN_ON(1);
+out_unlock:
+	spin_unlock(&pb_lock);
+}
+
+void pb_remove_ref(struct page *page, struct mm_struct *mm)
+{
+	int hash;
+	struct user_beancounter *bc;
+	struct page_beancounter *p, **q;
+	int shift, shiftt;
+
+	bc = mm->mm_ub;
+	if (bc == NULL)
+		return;
+
+	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
+		return;
+
+	hash = pb_hash(bc, page);
+
+	spin_lock(&pb_lock);
+	BUG_ON(page_pbc(page) != NULL && page_pbc(page)->pb_magic != PB_MAGIC);
+	for (q = pb_hash_table + hash, p = *q;
+			p != NULL && (p->page != page || p->ub != bc);
+			q = &p->next_hash, p = *q);
+	if (p == NULL)
+		goto out_unlock;
+
+	PB_COUNT_DEC(p->refcount);
+	if (PB_COUNT_GET(p->refcount))
+		/* 
+		 * More references from the same user beancounter exist.
+		 * Nothing needs to be done. 
+		 */
+		goto out_unlock;
+
+	/* remove from the hash list */
+	*q = p->next_hash;
+
+	shift = PB_SHIFT_GET(p->refcount);
+
+	dec_held_pages(p->ub, UB_PAGE_WEIGHT >> shift);
+
+	if (page_pbc(page) == p) {
+		if (list_empty(&p->page_list))
+			goto out_free;
+		page_pbc(page) = next_page_pb(p);
+	}
+	list_del(&p->page_list);
+	put_beancounter(p->ub);
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	p->ub->ub_stat[smp_processor_id()].pbcs--;
+#endif
+	pb_free(&p);
+
+	/* Now balance the list.  Move the tail and adjust its shift counter. */
+	p = prev_page_pb(page_pbc(page));
+	shiftt = PB_SHIFT_GET(p->refcount);
+	page_pbc(page) = p;
+	PB_SHIFT_DEC(p->refcount);
+
+	inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
+
+	/* 
+	 * If the shift counter of the moved beancounter is different from the
+	 * removed one's, repeat the procedure for one more tail beancounter 
+	 */
+	if (shiftt > shift) {
+		p = prev_page_pb(page_pbc(page));
+		page_pbc(page) = p;
+		PB_SHIFT_DEC(p->refcount);
+		inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
+	}
+	spin_unlock(&pb_lock);
+	return;
+
+out_free:
+	page_pbc(page) = NULL;
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	p->ub->ub_stat[smp_processor_id()].pbcs--;
+#endif
+	put_beancounter(p->ub);
+	pb_free(&p);
+out_unlock:
+	spin_unlock(&pb_lock);
+	return;
+}
+
+struct user_beancounter *pb_grab_page_ub(struct page *page)
+{
+	struct page_beancounter *pb;
+	struct user_beancounter *ub;
+
+	spin_lock(&pb_lock);
+	pb = page_pbc(page);
+	ub = (pb == NULL ? ERR_PTR(-EINVAL) :
+			get_beancounter(pb->ub));
+	spin_unlock(&pb_lock);
+	return ub;
+}
+
+void __init ub_init_pbc(void)
+{
+	unsigned long hash_size;
+
+	pb_cachep = kmem_cache_create("page_beancounter", 
+			sizeof(struct page_beancounter), 0,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+	hash_size = num_physpages >> 2;
+	for (pb_hash_mask = 1;
+		(hash_size & pb_hash_mask) != hash_size;
+		pb_hash_mask = (pb_hash_mask << 1) + 1);
+	hash_size = pb_hash_mask + 1;
+	printk(KERN_INFO "Page beancounter hash is %lu entries.\n", hash_size);
+	pb_hash_table = vmalloc(hash_size * sizeof(struct page_beancounter *));
+	memset(pb_hash_table, 0, hash_size * sizeof(struct page_beancounter *));
+}
diff -uprN linux-2.6.16/kernel/ub/ub_pages.c linux-2.6.16.ovz/kernel/ub/ub_pages.c
--- linux-2.6.16/kernel/ub/ub_pages.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/ub_pages.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,530 @@
+/*
+ *  kernel/ub/ub_pages.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/virtinfo.h>
+#include <linux/module.h>
+#include <linux/shmem_fs.h>
+#include <linux/vmalloc.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+
+void warn_bad_rss(struct vm_area_struct *vma, unsigned long freed)
+{
+	static struct ub_rate_info ri = {
+		.burst = 10,
+		.interval = 40 * HZ,
+	};
+	struct user_beancounter *ub;
+	char ubuid[64] = "No UB";
+	unsigned long vmrss;
+
+	if (!ub_ratelimit(&ri))
+		return;
+
+	ub = vma->vm_mm->mm_ub;
+	if (ub)
+		print_ub_uid(ub, ubuid, sizeof(ubuid));
+
+	vmrss = get_vma_rss(vma) + freed;
+	printk(KERN_WARNING
+			"%s vm_rss: process pid %d comm %.20s flags %lx\n"
+			"vma %p/%p rss %lu/%lu freed %lu\n"
+			"flags %lx, ub %s\n",
+			vmrss > freed ? "Positive" : "Negative",
+			current->pid, current->comm, current->flags,
+			vma, vma->vm_mm, vmrss, vma_pages(vma), freed,
+			vma->vm_flags, ubuid);
+	dump_stack();
+}
+
+static inline unsigned long pages_in_pte_range(struct vm_area_struct *vma,
+		pmd_t *pmd, unsigned long addr, unsigned long end,
+		unsigned long *ret)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	do {
+		if (!pte_none(*pte) && pte_present(*pte))
+			(*ret)++;
+	} while (pte++, addr += PAGE_SIZE, (addr != end));
+	pte_unmap_unlock(pte - 1, ptl);
+
+	return addr;
+}
+
+static inline unsigned long pages_in_pmd_range(struct vm_area_struct *vma,
+		pud_t *pud, unsigned long addr, unsigned long end,
+		unsigned long *ret)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		next = pages_in_pte_range(vma, pmd, addr, next, ret);
+	} while (pmd++, addr = next, (addr != end));
+
+	return addr;
+}
+
+static inline unsigned long pages_in_pud_range(struct vm_area_struct *vma,
+		pgd_t *pgd, unsigned long addr, unsigned long end,
+		unsigned long *ret)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		next = pages_in_pmd_range(vma, pud, addr, next, ret);
+	} while (pud++, addr = next, (addr != end));
+
+	return addr;
+}
+
+unsigned long pages_in_vma_range(struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end)
+{
+	pgd_t *pgd;
+	unsigned long next;
+	unsigned long ret;
+
+	ret = 0;
+	BUG_ON(addr >= end);
+	pgd = pgd_offset(vma->vm_mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		next = pages_in_pud_range(vma, pgd, addr, next, &ret);
+	} while (pgd++, addr = next, (addr != end));
+	return ret;
+}
+
+void fastcall __ub_update_physpages(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_PHYSPAGES].held = ub->ub_tmpfs_respages
+		+ (ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT);
+	ub_adjust_maxheld(ub, UB_PHYSPAGES);
+}
+
+void fastcall __ub_update_oomguarpages(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_OOMGUARPAGES].held =
+		ub->ub_parms[UB_PHYSPAGES].held + ub->ub_swap_pages;
+	ub_adjust_maxheld(ub, UB_OOMGUARPAGES);
+}
+
+void fastcall __ub_update_privvm(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_PRIVVMPAGES].held =
+		(ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT)
+		+ ub->ub_unused_privvmpages
+		+ ub->ub_parms[UB_SHMPAGES].held;
+	ub_adjust_maxheld(ub, UB_PRIVVMPAGES);
+}
+
+static inline int __charge_privvm_locked(struct user_beancounter *ub, 
+		unsigned long s, enum severity strict)
+{
+	if (__charge_beancounter_locked(ub, UB_PRIVVMPAGES, s, strict) < 0)
+		return -ENOMEM;
+
+	ub->ub_unused_privvmpages += s;
+	return 0;
+}
+
+static void __unused_privvm_dec_locked(struct user_beancounter *ub, 
+		long size)
+{
+	/* catch possible overflow */
+	if (ub->ub_unused_privvmpages < size) {
+		uncharge_warn(ub, UB_UNUSEDPRIVVM,
+				size, ub->ub_unused_privvmpages);
+		size = ub->ub_unused_privvmpages;
+	}
+	ub->ub_unused_privvmpages -= size;
+	__ub_update_privvm(ub);
+}
+
+void __ub_unused_privvm_dec(struct mm_struct *mm, long size)
+{
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__unused_privvm_dec_locked(ub, size);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_unused_privvm_sub(struct mm_struct *mm,
+		struct vm_area_struct *vma, unsigned long count)
+{
+	if (VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
+		__ub_unused_privvm_dec(mm, count);
+}
+
+void ub_unused_privvm_add(struct mm_struct *mm,
+		struct vm_area_struct *vma, unsigned long size)
+{
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL || !VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
+		return;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_unused_privvmpages += size;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+int ub_protected_charge(struct mm_struct *mm, unsigned long size,
+		unsigned long newflags, struct vm_area_struct *vma)
+{
+	unsigned long flags;
+	struct file *file;
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return PRIVVM_NO_CHARGE;
+
+	flags = vma->vm_flags;
+	if (!((newflags ^ flags) & VM_WRITE))
+		return PRIVVM_NO_CHARGE;
+
+	file = vma->vm_file;
+	if (!VM_UB_PRIVATE(newflags | VM_WRITE, file))
+		return PRIVVM_NO_CHARGE;
+
+	if (flags & VM_WRITE)
+		return PRIVVM_TO_SHARED;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_privvm_locked(ub, size, UB_SOFT) < 0)
+		goto err;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return PRIVVM_TO_PRIVATE;
+
+err:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return PRIVVM_ERROR;
+}
+
+int ub_memory_charge(struct mm_struct *mm, unsigned long size,
+		unsigned vm_flags, struct file *vm_file, int sv)
+{
+	struct user_beancounter *ub, *ubl;
+	unsigned long flags;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return 0;
+
+	size >>= PAGE_SHIFT;
+	if (size > UB_MAXVALUE)
+		return -EINVAL;
+
+	BUG_ON(sv != UB_SOFT && sv != UB_HARD);
+
+	if (vm_flags & VM_LOCKED) {
+		if (charge_beancounter(ub, UB_LOCKEDPAGES, size, sv))
+			goto out_err;
+	}
+	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
+		for (ubl = ub; ubl->parent != NULL; ubl = ubl->parent);
+		spin_lock_irqsave(&ubl->ub_lock, flags);
+		if (__charge_privvm_locked(ubl, size, sv))
+			goto out_private;
+		spin_unlock_irqrestore(&ubl->ub_lock, flags);
+	}
+	return 0;
+
+out_private:
+	spin_unlock_irqrestore(&ubl->ub_lock, flags);
+	if (vm_flags & VM_LOCKED)
+		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
+out_err:
+	return -ENOMEM;
+}
+
+void ub_memory_uncharge(struct mm_struct *mm, unsigned long size,
+		unsigned vm_flags, struct file *vm_file)
+{
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return;
+
+	size >>= PAGE_SHIFT;
+
+	if (vm_flags & VM_LOCKED)
+		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
+	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
+		for (; ub->parent != NULL; ub = ub->parent);
+		spin_lock_irqsave(&ub->ub_lock, flags);
+		__unused_privvm_dec_locked(ub, size);
+		spin_unlock_irqrestore(&ub->ub_lock, flags);
+	}
+}
+
+int ub_locked_charge(struct mm_struct *mm, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return 0;
+
+	return charge_beancounter(ub, UB_LOCKEDPAGES,
+			size >> PAGE_SHIFT, UB_HARD);
+}
+
+void ub_locked_uncharge(struct mm_struct *mm, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return;
+
+	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
+}
+
+int ub_lockedshm_charge(struct shmem_inode_info *shi, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return 0;
+
+	return charge_beancounter(ub, UB_LOCKEDPAGES,
+			size >> PAGE_SHIFT, UB_HARD);
+}
+
+void ub_lockedshm_uncharge(struct shmem_inode_info *shi, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return;
+
+	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
+}
+
+
+static inline void do_ub_tmpfs_respages_inc(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_tmpfs_respages++;
+	__ub_update_physpages(ub);
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_tmpfs_respages_inc(struct shmem_inode_info *shi)
+{
+	struct user_beancounter *ub;
+
+	for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
+		do_ub_tmpfs_respages_inc(ub);
+}
+
+static inline void do_ub_tmpfs_respages_sub(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	/* catch possible overflow */
+	if (ub->ub_tmpfs_respages < size) {
+		uncharge_warn(ub, UB_TMPFSPAGES,
+				size, ub->ub_tmpfs_respages);
+		size = ub->ub_tmpfs_respages;
+	}
+	ub->ub_tmpfs_respages -= size;
+	/* update values what is the most interesting */
+	__ub_update_physpages(ub);
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
+		unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
+		do_ub_tmpfs_respages_sub(ub, size);
+}
+
+int ub_shmpages_charge(struct shmem_inode_info *shi, unsigned long size)
+{
+	int ret;
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return 0;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ret = __charge_beancounter_locked(ub, UB_SHMPAGES, size, UB_HARD);
+	if (ret == 0)
+		__ub_update_privvm(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return ret;
+}
+
+void ub_shmpages_uncharge(struct shmem_inode_info *shi, unsigned long size)
+{
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_SHMPAGES, size);
+	__ub_update_privvm(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+#ifdef CONFIG_USER_SWAP_ACCOUNTING
+static inline void do_ub_swapentry_inc(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_swap_pages++;
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_swapentry_inc(struct swap_info_struct *si, pgoff_t num,
+		struct user_beancounter *ub)
+{
+	si->swap_ubs[num] = get_beancounter(ub);
+	for (; ub != NULL; ub = ub->parent)
+		do_ub_swapentry_inc(ub);
+}
+EXPORT_SYMBOL(ub_swapentry_inc);
+
+static inline void do_ub_swapentry_dec(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (ub->ub_swap_pages <= 0)
+		uncharge_warn(ub, UB_SWAPPAGES, 1, ub->ub_swap_pages);
+	else
+		ub->ub_swap_pages--;
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_swapentry_dec(struct swap_info_struct *si, pgoff_t num)
+{
+	struct user_beancounter *ub, *ubp;
+
+	ub = si->swap_ubs[num];
+	si->swap_ubs[num] = NULL;
+	for (ubp = ub; ubp != NULL; ubp = ubp->parent)
+		do_ub_swapentry_dec(ubp);
+	put_beancounter(ub);
+}
+EXPORT_SYMBOL(ub_swapentry_dec);
+
+int ub_swap_init(struct swap_info_struct *si, pgoff_t num)
+{
+	struct user_beancounter **ubs;
+
+	ubs = vmalloc(num * sizeof(struct user_beancounter *));
+	if (ubs == NULL)
+		return -ENOMEM;
+
+	memset(ubs, 0, num * sizeof(struct user_beancounter *));
+	si->swap_ubs = ubs;
+	return 0;
+}
+
+void ub_swap_fini(struct swap_info_struct *si)
+{
+	if (si->swap_ubs) {
+		vfree(si->swap_ubs);
+		si->swap_ubs = NULL;
+	}
+}
+#endif
+
+static int vmguar_enough_memory(struct vnotifier_block *self,
+		unsigned long event, void *arg, int old_ret)
+{
+	struct user_beancounter *ub;
+
+	if (event != VIRTINFO_ENOUGHMEM)
+		return old_ret;
+
+	for (ub = current->mm->mm_ub; ub->parent != NULL; ub = ub->parent);
+	if (ub->ub_parms[UB_PRIVVMPAGES].held >
+			ub->ub_parms[UB_VMGUARPAGES].barrier)
+		return old_ret;
+
+	return NOTIFY_OK;
+}
+
+static struct vnotifier_block vmguar_notifier_block = {
+	.notifier_call = vmguar_enough_memory
+};
+
+static int __init init_vmguar_notifier(void)
+{
+	virtinfo_notifier_register(VITYPE_GENERAL, &vmguar_notifier_block);
+	return 0;
+}
+
+static void __exit fini_vmguar_notifier(void)
+{
+	virtinfo_notifier_unregister(VITYPE_GENERAL, &vmguar_notifier_block);
+}
+
+module_init(init_vmguar_notifier);
+module_exit(fini_vmguar_notifier);
diff -uprN linux-2.6.16/kernel/ub/ub_proc.c linux-2.6.16.ovz/kernel/ub/ub_proc.c
--- linux-2.6.16/kernel/ub/ub_proc.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/ub_proc.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,389 @@
+/*
+ *  linux/fs/proc/proc_ub.c
+ *
+ *  Copyright (C)  1998-2000  Andrey V. Savochkin <saw@saw.sw.com.sg>
+ *  Copyright (C)  2005       SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * TODO:
+ *
+ * Changes:
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_debug.h>
+#include <ub/ub_page.h>
+
+#include <asm/page.h>
+#include <asm/uaccess.h>
+
+/* 
+ * we have 8 format strings depending on:
+ * 1. BITS_PER_LONG
+ * 2. CONFIG_UBC_KEEP_UNUSED
+ * 3. resource number (see out_proc_beancounter)
+ */
+
+#ifdef CONFIG_UBC_KEEP_UNUSED
+#define REF_FORMAT	"%5.5s %4i: %-12s "
+#define UID_HEAD_STR	"uid ref"
+#else
+#define REF_FORMAT	"%10.10s: %-12s "
+#define UID_HEAD_STR	"uid"
+#endif
+#define REF2_FORMAT	"%10s  %-12s "
+
+#if BITS_PER_LONG == 32
+#define RES_FORMAT	"%10lu %10lu %10lu %10lu %10lu"
+#define HEAD_FORMAT	"%10s %10s %10s %10s %10s"
+#define UB_PROC_LINE_TEXT	(10+2+12+1+10+1+10+1+10+1+10+1+10)
+#else
+#define RES_FORMAT	"%20lu %20lu %20lu %20lu %20lu"
+#define HEAD_FORMAT	"%20s %20s %20s %20s %20s"
+#define UB_PROC_LINE_TEXT	(10+2+12+1+20+1+20+1+20+1+20+1+20)
+#endif
+
+#define UB_PROC_LINE_LEN	(UB_PROC_LINE_TEXT + 1)
+
+static void out_proc_version(char *buf)
+{
+	int len;
+
+	len = sprintf(buf, "Version: 2.5");
+	memset(buf + len, ' ', UB_PROC_LINE_TEXT - len);
+	buf[UB_PROC_LINE_TEXT] = '\n';
+}
+
+static void out_proc_head(char *buf)
+{
+	sprintf(buf, REF2_FORMAT HEAD_FORMAT,
+			UID_HEAD_STR, "resource", "held", "maxheld", 
+			"barrier", "limit", "failcnt");
+	buf[UB_PROC_LINE_TEXT] = '\n';
+}
+
+static void out_proc_beancounter(char *buf, struct user_beancounter *ub, int r)
+{
+	if (r == 0) {
+		char tmpbuf[64];
+		print_ub_uid(ub, tmpbuf, sizeof(tmpbuf));
+		sprintf(buf, REF_FORMAT RES_FORMAT, 
+			tmpbuf, 
+#ifdef CONFIG_UBC_KEEP_UNUSED
+			atomic_read(&ub->ub_refcount),
+#endif
+			ub_rnames[r], ub->ub_parms[r].held, 
+			ub->ub_parms[r].maxheld, ub->ub_parms[r].barrier, 
+			ub->ub_parms[r].limit, ub->ub_parms[r].failcnt);
+	} else
+		sprintf(buf, REF2_FORMAT RES_FORMAT, 
+			"", ub_rnames[r],
+			ub->ub_parms[r].held, ub->ub_parms[r].maxheld,
+			ub->ub_parms[r].barrier, ub->ub_parms[r].limit,
+			ub->ub_parms[r].failcnt);
+
+	buf[UB_PROC_LINE_TEXT] = '\n';
+}
+
+static int ub_accessible(struct user_beancounter *ub,
+		struct user_beancounter *exec_ub,
+		struct file *file)
+{
+	struct user_beancounter *p, *q;
+
+	for (p = exec_ub; p->parent != NULL; p = p->parent);
+	for (q = ub; q->parent != NULL; q = q->parent);
+	if (p != get_ub0() && q != p)
+		return 0;
+	if (ub->parent == NULL)
+		return 1;
+	return file->private_data == NULL ? 0 : 1;
+}
+
+static ssize_t ub_proc_read(struct file *file, char *usrbuf, size_t len,
+		loff_t *poff)
+{
+	ssize_t retval;
+	char *buf;
+	unsigned long flags;
+	int i, resource;
+	struct ub_hash_slot *slot;
+	struct user_beancounter *ub;
+	struct user_beancounter *exec_ub = get_exec_ub();
+	loff_t n, off;
+	int rem, produced, job, tocopy;
+	const int is_capable =
+		(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH));
+
+	retval = -ENOBUFS;
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (buf == NULL)
+		goto out;
+
+	retval = 0;
+	if (!is_capable)
+		goto out_free;
+
+	off = *poff;
+	if (off < 0) /* can't happen, just in case */
+		goto inval;
+
+again:
+	i = 0;
+	slot = ub_hash;
+	n = off; /* The amount of data tp skip */
+	produced = 0;
+	if (n < (UB_PROC_LINE_LEN * 2)) {
+		if (n < UB_PROC_LINE_LEN) {
+			out_proc_version(buf);
+			produced += UB_PROC_LINE_LEN;
+			n += UB_PROC_LINE_LEN;
+		}
+		out_proc_head(buf + produced);
+		produced += UB_PROC_LINE_LEN;
+		n += UB_PROC_LINE_LEN;
+	}
+	n -= (2 * UB_PROC_LINE_LEN);
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	while (1) {
+		for (ub = slot->ubh_beans; 
+		     ub != NULL && n >= (UB_RESOURCES * UB_PROC_LINE_LEN);
+		     ub = ub->ub_next)
+			if (is_capable && ub_accessible(ub, exec_ub, file))
+				n -= (UB_RESOURCES * UB_PROC_LINE_LEN);
+		if (ub != NULL || ++i >= UB_HASH_SIZE)
+			break;
+		++slot;
+	}
+	rem = n; /* the amount of the data in the buffer to skip */
+	job = PAGE_SIZE - UB_PROC_LINE_LEN + 1; /* end of buffer data */
+	if (len < job - rem)
+		job = rem + len;
+	while (ub != NULL && produced < job) {
+		if (is_capable && ub_accessible(ub, exec_ub, file))
+			for (resource = 0;
+				produced < job && resource < UB_RESOURCES;
+				resource++, produced += UB_PROC_LINE_LEN)
+			{
+				out_proc_beancounter(buf + produced,
+						ub, resource);
+			}
+		if (produced >= job)
+			break;
+		/* Find the next beancounter to produce more data. */
+		ub = ub->ub_next;
+		while (ub == NULL && ++i < UB_HASH_SIZE) {
+			++slot;
+			ub = slot->ubh_beans;
+		}
+	}
+
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+	ub_debug(UBD_ALLOC, KERN_DEBUG "UB_PROC: produced %d, job %d, rem %d\n",
+			produced, job, rem);
+
+	/* 
+	 * Temporary buffer `buf' contains `produced' bytes.
+	 * Extract no more than `len' bytes at offset `rem'.
+	 */
+	if (produced <= rem)
+		goto out_free;
+	tocopy = produced - rem;
+	if (len < tocopy)
+		tocopy = len;
+	if (!tocopy)
+		goto out_free;
+	if (copy_to_user(usrbuf, buf + rem, tocopy))
+		goto fault;
+	off += tocopy; /* can't overflow */
+	*poff = off;
+	len -= tocopy;
+	retval += tocopy;
+	if (!len)
+		goto out_free;
+	usrbuf += tocopy;
+	goto again;
+
+fault:
+	retval = -EFAULT;
+out_free:
+	free_page((unsigned long)buf);
+out:
+	return retval;
+
+inval:
+	retval = -EINVAL;
+	goto out_free;
+}
+
+static int ub_proc_open(struct inode *inode, struct file *file)
+{
+	file->private_data = strcmp(file->f_dentry->d_name.name,
+						"user_beancounters") ?
+						(void *)-1 : NULL;
+	return 0;
+}
+
+static struct file_operations ub_file_operations = {
+	.read = &ub_proc_read,
+	.open = &ub_proc_open
+};
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+#include <linux/seq_file.h>
+#include <linux/kmem_cache.h>
+
+static void *ubd_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+	struct user_beancounter *ub;
+	long slot;
+
+	spin_lock_irq(&ub_hash_lock);
+	for (slot = 0; slot < UB_HASH_SIZE; slot++)
+		for (ub = ub_hash[slot].ubh_beans; ub; ub = ub->ub_next) {
+			if (n == 0) {
+				m->private = (void *)slot;
+				return (void *)ub;
+			}
+			n--;
+		}
+	return NULL;
+}
+
+static void *ubd_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	struct user_beancounter *ub;
+	long slot;
+
+	ub = (struct user_beancounter *)p;
+	slot = (long)m->private;
+
+	++*pos;
+	ub = ub->ub_next;
+	while (1) {
+		for (; ub; ub = ub->ub_next) {
+			m->private = (void *)slot;
+			return (void *)ub;
+		}
+		slot++;
+		if (slot == UB_HASH_SIZE)
+			break;
+		ub = ub_hash[slot].ubh_beans;
+	}
+	return NULL;
+}
+
+static void ubd_stop(struct seq_file *m, void *p)
+{
+	spin_unlock_irq(&ub_hash_lock);
+}
+
+#define PROC_LINE_FMT	"\t%-17s\t%5lu\t%5lu\n"
+
+static int ubd_show(struct seq_file *m, void *p)
+{
+	struct user_beancounter *ub;
+	struct ub_cache_counter *cc;
+	long pages, vmpages, pbc, swap, unmap;
+	int i;
+	char id[64];
+
+	ub = (struct user_beancounter *)p;
+	print_ub_uid(ub, id, sizeof(id));
+	seq_printf(m, "%s:%d\n", id, atomic_read(&ub->ub_refcount));
+
+	pages = vmpages = pbc = swap = unmap = 0;
+	for (i = 0; i < NR_CPUS; i++) {
+		pages += ub->ub_stat[i].pages_charged;
+		vmpages += ub->ub_stat[i].vmalloc_charged;
+		pbc += ub->ub_stat[i].pbcs;
+		swap += ub->ub_stat[i].swapin;
+		unmap += ub->ub_stat[i].unmap;
+	}
+	if (pages < 0)
+		pages = 0;
+	if (vmpages < 0)
+		vmpages = 0;
+	seq_printf(m, PROC_LINE_FMT, "pages", pages, PAGE_SIZE);
+	seq_printf(m, PROC_LINE_FMT, "vmalloced", vmpages, PAGE_SIZE);
+
+	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_UNUSEDPRIVVM],
+			ub->ub_unused_privvmpages, PAGE_SIZE);
+	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_TMPFSPAGES],
+			ub->ub_tmpfs_respages, PAGE_SIZE);
+	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_SWAPPAGES],
+			ub->ub_swap_pages, PAGE_SIZE);
+	seq_printf(m, PROC_LINE_FMT, "pbcs", pbc,
+			(unsigned long)sizeof(struct page_beancounter));
+
+	seq_printf(m, PROC_LINE_FMT, "swapin", swap, 0UL);
+	seq_printf(m, PROC_LINE_FMT, "unmap", unmap, 0UL);
+	/* interrupts are disabled by locking ub_hash_lock */
+	spin_lock(&cc_lock);
+	list_for_each_entry (cc, &ub->ub_cclist, ulist) {
+		kmem_cache_t *cachep;
+
+		cachep = cc->cachep;
+		seq_printf(m, PROC_LINE_FMT,
+				cachep->name,
+				cc->counter,
+				(unsigned long)cachep->objuse);
+	}
+	spin_unlock(&cc_lock);
+	return 0;
+}
+
+static struct seq_operations kmemdebug_op = {
+	.start	= ubd_start,
+	.next	= ubd_next,
+	.stop	= ubd_stop,
+	.show	= ubd_show,
+};
+
+static int kmem_debug_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kmemdebug_op);
+}
+
+static struct file_operations kmem_debug_ops = {
+	.open		= kmem_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+#endif
+
+void __init ub_init_proc(void)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry("user_beancounters", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &ub_file_operations;
+	else
+		panic("Can't create /proc/user_beancounters entry!\n");
+
+	entry = create_proc_entry("user_beancounters_sub", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &ub_file_operations;
+	else
+		panic("Can't create /proc/user_beancounters2 entry!\n");
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	entry = create_proc_entry("user_beancounters_debug", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &kmem_debug_ops;
+	else
+		panic("Can't create /proc/user_beancounters_debug entry!\n");
+#endif
+}
diff -uprN linux-2.6.16/kernel/ub/ub_stat.c linux-2.6.16.ovz/kernel/ub/ub_stat.c
--- linux-2.6.16/kernel/ub/ub_stat.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/ub_stat.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,465 @@
+/*
+ *  kernel/ub/ub_stat.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/timer.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/suspend.h>
+
+#include <asm/uaccess.h>
+#include <asm/param.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_stat.h>
+
+static spinlock_t ubs_notify_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(ubs_notify_list);
+static long ubs_min_interval;
+static ubstattime_t ubs_start_time, ubs_end_time;
+static struct timer_list ubs_timer;
+
+static int ubstat_get_list(void *buf, long size)
+{
+	int retval;
+	unsigned long flags;
+	int slotnr;
+	struct ub_hash_slot *slot;
+	struct user_beancounter *ub, *last_ub;
+	long *page, *ptr, *end;
+	int len;
+
+	page = (long *)__get_free_page(GFP_KERNEL);
+	if (page == NULL)
+		return -ENOMEM;
+
+	retval = 0;
+	slotnr = 0;
+	slot = ub_hash;
+	last_ub = NULL;
+	while (1) {
+		ptr = page;
+		end = page + PAGE_SIZE / sizeof(*ptr);
+
+		spin_lock_irqsave(&ub_hash_lock, flags);
+		if (last_ub == NULL)
+			ub = slot->ubh_beans;
+		else
+			ub = last_ub->ub_next;
+		while (1) {
+			for (; ub != NULL; ub = ub->ub_next) {
+				if (ub->parent != NULL)
+					continue;
+				*ptr++ = ub->ub_uid;
+				if (ptr == end)
+					break;
+			}
+			if (ptr == end)
+				break;
+			++slot;
+			if (++slotnr >= UB_HASH_SIZE)
+				break;
+			ub = slot->ubh_beans;
+		}
+		if (ptr == page)
+			goto out_unlock;
+		if (ub != NULL)
+			get_beancounter(ub);
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+		if (last_ub != NULL)
+			put_beancounter(last_ub);
+		last_ub = ub; /* last visited beancounter in the slot */
+
+		len = min_t(long, (ptr - page) * sizeof(*ptr), size);
+		if (copy_to_user(buf, page, len)) {
+			retval = -EFAULT;
+			break;
+		}
+		retval += len;
+		if (len < PAGE_SIZE)
+			break;
+		buf += len;
+		size -= len;
+	}
+out:
+	if (last_ub != NULL)
+		put_beancounter(last_ub);
+	free_page((unsigned long)page);
+	return retval;
+
+out_unlock:
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+	goto out;
+}
+
+static int ubstat_gettime(void *buf, long size)
+{
+	ubgettime_t data;
+	int retval;
+
+	spin_lock(&ubs_notify_lock);
+	data.start_time = ubs_start_time;
+	data.end_time = ubs_end_time;
+	data.cur_time = ubs_start_time + (jiffies - ubs_start_time * HZ) / HZ;
+	spin_unlock(&ubs_notify_lock);
+
+	retval = min_t(long, sizeof(data), size);
+	if (copy_to_user(buf, &data, retval))
+		retval = -EFAULT;
+	return retval;
+}
+
+static int ubstat_do_read_one(struct user_beancounter *ub, int res, void *kbuf)
+{
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparm_t	param[1];
+	} *data;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+
+	data->param[0].maxheld = ub->ub_store[res].maxheld;
+	data->param[0].failcnt = ub->ub_store[res].failcnt;
+
+	return sizeof(*data);
+}
+
+static int ubstat_do_read_all(struct user_beancounter *ub, void *kbuf, int size)
+{
+	int wrote;
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparm_t	param[UB_RESOURCES];
+	} *data;
+	int resource;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+	wrote = sizeof(data->start_time) + sizeof(data->end_time);
+
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		if (size < wrote + sizeof(data->param[resource]))
+			break;
+		data->param[resource].maxheld = ub->ub_store[resource].maxheld;
+		data->param[resource].failcnt = ub->ub_store[resource].failcnt;
+		wrote += sizeof(data->param[resource]); 
+	}
+
+	return wrote;
+}
+
+static int ubstat_do_read_full(struct user_beancounter *ub, void *kbuf,
+		int size)
+{
+	int wrote;
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparmf_t	param[UB_RESOURCES];
+	} *data;
+	int resource;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+	wrote = sizeof(data->start_time) + sizeof(data->end_time);
+
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		if (size < wrote + sizeof(data->param[resource]))
+			break;
+		/* The beginning of ubstatparmf_t matches struct ubparm. */
+		memcpy(&data->param[resource], &ub->ub_store[resource],
+				sizeof(ub->ub_store[resource]));
+		data->param[resource].__unused1 = 0;
+		data->param[resource].__unused2 = 0;
+		wrote += sizeof(data->param[resource]);
+	}
+	return wrote;
+}
+
+static int ubstat_get_stat(struct user_beancounter *ub, long cmd,
+		void *buf, long size)
+{
+	void *kbuf;
+	int retval;
+
+	kbuf = (void *)__get_free_page(GFP_KERNEL);
+	if (kbuf == NULL)
+		return -ENOMEM;
+
+	spin_lock(&ubs_notify_lock);
+	switch (UBSTAT_CMD(cmd)) {
+		case UBSTAT_READ_ONE:
+			retval = -EINVAL;
+			if (UBSTAT_PARMID(cmd) >= UB_RESOURCES)
+				break;
+			retval = ubstat_do_read_one(ub,
+					UBSTAT_PARMID(cmd), kbuf);
+			break;
+		case UBSTAT_READ_ALL:
+			retval = ubstat_do_read_all(ub, kbuf, PAGE_SIZE);
+			break;
+		case UBSTAT_READ_FULL:
+			retval = ubstat_do_read_full(ub, kbuf, PAGE_SIZE);
+			break;
+		default:
+			retval = -EINVAL;
+	}
+	spin_unlock(&ubs_notify_lock);
+
+	if (retval > 0) {
+		retval = min_t(long, retval, size);
+		if (copy_to_user(buf, kbuf, retval))
+			retval = -EFAULT;
+	}
+
+	free_page((unsigned long)kbuf);
+	return retval;
+}
+
+static int ubstat_handle_notifrq(ubnotifrq_t *req)
+{
+	int retval;
+	struct ub_stat_notify *new_notify;
+	struct list_head *entry;
+	struct task_struct *tsk_to_free;
+
+	new_notify = kmalloc(sizeof(new_notify), GFP_KERNEL);
+	if (new_notify == NULL)
+		return -ENOMEM;
+
+	tsk_to_free = NULL;
+	INIT_LIST_HEAD(&new_notify->list);
+
+	spin_lock(&ubs_notify_lock);
+	list_for_each(entry, &ubs_notify_list) {
+		struct ub_stat_notify *notify;
+
+		notify = list_entry(entry, struct ub_stat_notify, list);
+		if (notify->task == current) {
+			kfree(new_notify);
+			new_notify = notify;
+			break;
+		}
+	}
+
+	retval = -EINVAL;
+	if (req->maxinterval < 1)
+		goto out_unlock;
+	if (req->maxinterval > TIME_MAX_SEC)
+		req->maxinterval = TIME_MAX_SEC;
+	if (req->maxinterval < ubs_min_interval) {
+		unsigned long dif;
+
+		ubs_min_interval = req->maxinterval;
+		dif = (ubs_timer.expires - jiffies + HZ - 1) / HZ;
+		if (dif > req->maxinterval)
+			mod_timer(&ubs_timer,
+					ubs_timer.expires -
+					(dif - req->maxinterval) * HZ);
+	}
+
+	if (entry != &ubs_notify_list) {
+		list_del(&new_notify->list);
+		tsk_to_free = new_notify->task;
+	}
+	if (req->signum) {
+		new_notify->task = current;
+		get_task_struct(new_notify->task);
+		new_notify->signum = req->signum;
+		list_add(&new_notify->list, &ubs_notify_list);
+	} else
+		kfree(new_notify);
+	retval = 0;
+out_unlock:
+	spin_unlock(&ubs_notify_lock);
+	if (tsk_to_free != NULL)
+		put_task_struct(tsk_to_free);
+	return retval;
+}
+
+/*
+ * former sys_ubstat
+ */
+long do_ubstat(int func, unsigned long arg1, unsigned long arg2, void *buf, 
+		long size)
+{
+	int retval;
+	struct user_beancounter *ub;
+
+	if (func == UBSTAT_UBPARMNUM)
+		return UB_RESOURCES;
+	if (func == UBSTAT_UBLIST)
+		return ubstat_get_list(buf, size);
+	if (!(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)))
+		return -EPERM;
+
+	if (func == UBSTAT_GETTIME) {
+		retval = ubstat_gettime(buf, size);
+		goto notify;
+	}
+
+	ub = get_exec_ub();
+	if (ub != NULL && ub->ub_uid == arg1)
+		get_beancounter(ub);
+	else /* FIXME must be if (ve_is_super) */
+		ub = get_beancounter_byuid(arg1, 0);
+
+	if (ub == NULL)
+		return -ESRCH;
+
+	retval = ubstat_get_stat(ub, func, buf, size);
+	put_beancounter(ub);
+notify:
+	/* Handle request for notification */
+	if (retval >= 0) {
+		ubnotifrq_t notifrq;
+		int err;
+
+		err = -EFAULT;
+		if (!copy_from_user(&notifrq, (void *)arg2, sizeof(notifrq)))
+			err = ubstat_handle_notifrq(&notifrq);
+		if (err)
+			retval = err;
+	}
+
+	return retval;
+}
+
+static void ubstat_save_onestat(struct user_beancounter *ub)
+{
+	int resource;
+
+	/* called with local irq disabled */
+	spin_lock(&ub->ub_lock);
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		memcpy(&ub->ub_store[resource], &ub->ub_parms[resource],
+			sizeof(struct ubparm));
+		ub->ub_parms[resource].minheld = 
+			ub->ub_parms[resource].maxheld =
+			ub->ub_parms[resource].held;
+	}
+	spin_unlock(&ub->ub_lock);
+}
+
+static void ubstat_save_statistics(void)
+{
+	unsigned long flags;
+	int i;
+	struct user_beancounter *ub;
+
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	for_each_beancounter(i, ub)
+			ubstat_save_onestat(ub);
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+}
+
+static void ubstatd_timeout(unsigned long __data)
+{
+	struct task_struct *p;
+
+	p = (struct task_struct *) __data;
+	wake_up_process(p);
+}
+
+/*
+ * Safe wrapper for send_sig. It prevents a race with release_task
+ * for sighand.
+ * Should be called under tasklist_lock.
+ */
+static void task_send_sig(struct ub_stat_notify *notify)
+{
+	if (likely(notify->task->sighand != NULL))
+		send_sig(notify->signum, notify->task, 1);
+}
+
+static inline void do_notifies(void)
+{
+	LIST_HEAD(notif_free_list);
+	struct ub_stat_notify *notify;
+	struct ub_stat_notify *tmp;
+
+	spin_lock(&ubs_notify_lock);
+	ubs_start_time = ubs_end_time;
+	/*
+	 * the expression below relies on time being unsigned long and
+	 * arithmetic promotion rules
+	 */
+	ubs_end_time += (ubs_timer.expires - ubs_start_time * HZ) / HZ;
+	mod_timer(&ubs_timer, ubs_timer.expires + ubs_min_interval * HZ);
+	ubs_min_interval = TIME_MAX_SEC;
+	/* save statistics accumulated for the interval */
+	ubstat_save_statistics();
+	/* send signals */
+	read_lock(&tasklist_lock);
+	while (!list_empty(&ubs_notify_list)) {
+		notify = list_entry(ubs_notify_list.next,
+				struct ub_stat_notify, list);
+		task_send_sig(notify);
+		list_del(&notify->list);
+		list_add(&notify->list, &notif_free_list);
+	}
+	read_unlock(&tasklist_lock);
+	spin_unlock(&ubs_notify_lock);
+
+	list_for_each_entry_safe(notify, tmp, &notif_free_list, list) {
+		put_task_struct(notify->task);
+		kfree(notify);
+	}
+}
+
+/*
+ * Kernel thread
+ */
+static int ubstatd(void *unused)
+{
+	/* daemonize call will take care of signals */
+	daemonize("ubstatd");
+
+	ubs_timer.data = (unsigned long)current;
+	ubs_timer.function = ubstatd_timeout;
+	add_timer(&ubs_timer);
+
+	while (1) {
+		set_task_state(current, TASK_INTERRUPTIBLE);
+		if (time_after(ubs_timer.expires, jiffies)) {
+			schedule();
+			try_to_freeze();
+			continue;
+		}
+
+		__set_task_state(current, TASK_RUNNING);
+		do_notifies();
+	}
+	return 0;
+}
+
+static int __init ubstatd_init(void)
+{
+	init_timer(&ubs_timer);
+	ubs_timer.expires = TIME_MAX_JIF;
+	ubs_min_interval = TIME_MAX_SEC;
+	ubs_start_time = ubs_end_time = 0;
+
+	kernel_thread(ubstatd, NULL, 0);
+	return 0;
+}
+
+module_init(ubstatd_init);
diff -uprN linux-2.6.16/kernel/ub/ub_sys.c linux-2.6.16.ovz/kernel/ub/ub_sys.c
--- linux-2.6.16/kernel/ub/ub_sys.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ub/ub_sys.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,154 @@
+/*
+ *  kernel/ub/ub_sys.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+
+#include <ub/beancounter.h>
+
+#ifndef CONFIG_USER_RESOURCE
+asmlinkage long sys_getluid(void)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long sys_setluid(uid_t uid)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long sys_setublimit(uid_t uid, unsigned long resource, 
+		unsigned long *limits)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2, 
+		void *buf, long size)
+{
+	return -ENOSYS;
+}
+#else /* CONFIG_USER_RESOURCE */
+
+/*
+ *	The (rather boring) getluid syscall
+ */
+asmlinkage long sys_getluid(void)
+{
+	struct user_beancounter *ub;
+
+	ub = get_exec_ub();
+	if (ub == NULL)
+		return -EINVAL;
+
+	return ub->ub_uid;
+}
+
+/*
+ *	The setluid syscall
+ */
+asmlinkage long sys_setluid(uid_t uid)
+{
+	struct user_beancounter *ub;
+	struct task_beancounter *task_bc;
+	int error;
+
+	task_bc = &current->task_bc;
+
+	/* You may not disown a setluid */
+	error = -EINVAL;
+	if (uid == (uid_t)-1)
+		goto out;
+
+	/* You may only set an ub as root */
+	error = -EPERM;
+	if (!capable(CAP_SETUID))
+		goto out;
+
+	/* Ok - set up a beancounter entry for this user */
+	error = -ENOBUFS;
+	ub = get_beancounter_byuid(uid, 1);
+	if (ub == NULL)
+		goto out;
+
+	ub_debug(UBD_ALLOC | UBD_LIMIT, "setluid, bean %p (count %d) "
+			"for %.20s pid %d\n",
+			ub, atomic_read(&ub->ub_refcount),
+			current->comm, current->pid);
+	/* install bc */
+	put_beancounter(task_bc->exec_ub);
+	task_bc->exec_ub = ub;
+	put_beancounter(task_bc->fork_sub);
+	task_bc->fork_sub = get_beancounter(ub);
+	error = 0;
+out:
+	return error;
+}
+
+/*
+ *	The setbeanlimit syscall
+ */
+asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
+		unsigned long *limits)
+{
+	int error;
+	unsigned long flags;
+	struct user_beancounter *ub;
+	unsigned long new_limits[2];
+
+	error = -EPERM;
+	if(!capable(CAP_SYS_RESOURCE))
+		goto out;
+
+	if (!ve_is_super(get_exec_env()))
+		goto out;
+
+	error = -EINVAL;
+	if (resource >= UB_RESOURCES)
+		goto out;
+
+	error = -EFAULT;
+	if (copy_from_user(&new_limits, limits, sizeof(new_limits)))
+		goto out;
+
+	error = -EINVAL;
+	if (new_limits[0] > UB_MAXVALUE || new_limits[1] > UB_MAXVALUE)
+		goto out;
+
+	error = -ENOENT;
+	ub = get_beancounter_byuid(uid, 0);
+	if (ub == NULL) {
+		ub_debug(UBD_LIMIT, "No login bc for uid %d\n", uid);
+		goto out;
+	}
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_parms[resource].barrier = new_limits[0];
+	ub->ub_parms[resource].limit = new_limits[1];
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	put_beancounter(ub);
+
+	error = 0;
+out:
+	return error;
+}
+
+extern long do_ubstat(int func, unsigned long arg1, unsigned long arg2, 
+		void *buf, long size);
+asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2, 
+		void *buf, long size)
+{
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
+	return do_ubstat(func, arg1, arg2, buf, size);
+}
+#endif
diff -uprN linux-2.6.16/kernel/uid16.c linux-2.6.16.ovz/kernel/uid16.c
--- linux-2.6.16/kernel/uid16.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/uid16.c	2006-07-05 08:34:56.000000000 -0400
@@ -20,43 +20,67 @@
 
 asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group)
 {
-	return sys_chown(filename, low2highuid(user), low2highgid(group));
+	long ret = sys_chown(filename, low2highuid(user), low2highgid(group));
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group)
 {
-	return sys_lchown(filename, low2highuid(user), low2highgid(group));
+	long ret = sys_lchown(filename, low2highuid(user), low2highgid(group));
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group)
 {
-	return sys_fchown(fd, low2highuid(user), low2highgid(group));
+	long ret = sys_fchown(fd, low2highuid(user), low2highgid(group));
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid)
 {
-	return sys_setregid(low2highgid(rgid), low2highgid(egid));
+	long ret = sys_setregid(low2highgid(rgid), low2highgid(egid));
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 asmlinkage long sys_setgid16(old_gid_t gid)
 {
-	return sys_setgid(low2highgid(gid));
+	long ret = sys_setgid(low2highgid(gid));
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid)
 {
-	return sys_setreuid(low2highuid(ruid), low2highuid(euid));
+	long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid));
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 asmlinkage long sys_setuid16(old_uid_t uid)
 {
-	return sys_setuid(low2highuid(uid));
+	long ret = sys_setuid(low2highuid(uid));
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
 {
-	return sys_setresuid(low2highuid(ruid), low2highuid(euid),
-		low2highuid(suid));
+	long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid),
+				 low2highuid(suid));
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid)
@@ -72,8 +96,11 @@ asmlinkage long sys_getresuid16(old_uid_
 
 asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
 {
-	return sys_setresgid(low2highgid(rgid), low2highgid(egid),
-		low2highgid(sgid));
+	long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid),
+				 low2highgid(sgid));
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid)
@@ -89,12 +116,18 @@ asmlinkage long sys_getresgid16(old_gid_
 
 asmlinkage long sys_setfsuid16(old_uid_t uid)
 {
-	return sys_setfsuid(low2highuid(uid));
+	long ret = sys_setfsuid(low2highuid(uid));
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 asmlinkage long sys_setfsgid16(old_gid_t gid)
 {
-	return sys_setfsgid(low2highgid(gid));
+	long ret = sys_setfsgid(low2highgid(gid));
+	/* avoid REGPARM breakage on x86: */
+	prevent_tail_call(ret);
+	return ret;
 }
 
 static int groups16_to_user(old_gid_t __user *grouplist,
diff -uprN linux-2.6.16/kernel/user.c linux-2.6.16.ovz/kernel/user.c
--- linux-2.6.16/kernel/user.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/kernel/user.c	2006-07-05 08:34:56.000000000 -0400
@@ -14,6 +14,7 @@
 #include <linux/bitops.h>
 #include <linux/key.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 
 /*
  * UID task count cache, to get fast user lookup in "alloc_uid"
@@ -24,7 +25,20 @@
 #define UIDHASH_SZ		(1 << UIDHASH_BITS)
 #define UIDHASH_MASK		(UIDHASH_SZ - 1)
 #define __uidhashfn(uid)	(((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
-#define uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
+#define __uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
+
+#ifdef CONFIG_VE
+#define UIDHASH_MASK_VE			(UIDHASH_SZ_VE - 1)
+#define __uidhashfn_ve(uid)		(((uid >> UIDHASH_BITS_VE) ^ uid) & \
+						UIDHASH_MASK_VE)
+#define __uidhashentry_ve(uid, envid)	((envid)->uidhash_table + \
+						__uidhashfn_ve(uid))
+#define uidhashentry_ve(uid)		(ve_is_super(get_exec_env()) ?	\
+						__uidhashentry(uid) :	\
+						__uidhashentry_ve(uid, get_exec_env()))
+#else
+#define uidhashentry_ve(uid)		__uidhashentry(uid)
+#endif
 
 static kmem_cache_t *uid_cachep;
 static struct list_head uidhash_table[UIDHASH_SZ];
@@ -96,7 +110,7 @@ struct user_struct *find_user(uid_t uid)
 	unsigned long flags;
 
 	spin_lock_irqsave(&uidhash_lock, flags);
-	ret = uid_hash_find(uid, uidhashentry(uid));
+	ret = uid_hash_find(uid, uidhashentry_ve(uid));
 	spin_unlock_irqrestore(&uidhash_lock, flags);
 	return ret;
 }
@@ -115,10 +129,11 @@ void free_uid(struct user_struct *up)
 	}
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(free_uid);
 
 struct user_struct * alloc_uid(uid_t uid)
 {
-	struct list_head *hashent = uidhashentry(uid);
+	struct list_head *hashent = uidhashentry_ve(uid);
 	struct user_struct *up;
 
 	spin_lock_irq(&uidhash_lock);
@@ -168,6 +183,7 @@ struct user_struct * alloc_uid(uid_t uid
 	}
 	return up;
 }
+EXPORT_SYMBOL_GPL(alloc_uid);
 
 void switch_uid(struct user_struct *new_user)
 {
@@ -186,21 +202,21 @@ void switch_uid(struct user_struct *new_
 	free_uid(old_user);
 	suid_keys(current);
 }
-
+EXPORT_SYMBOL_GPL(switch_uid);
 
 static int __init uid_cache_init(void)
 {
 	int n;
 
 	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 
 	for(n = 0; n < UIDHASH_SZ; ++n)
 		INIT_LIST_HEAD(uidhash_table + n);
 
 	/* Insert the root user immediately (init already runs as root) */
 	spin_lock_irq(&uidhash_lock);
-	uid_hash_insert(&root_user, uidhashentry(0));
+	uid_hash_insert(&root_user, __uidhashentry(0));
 	spin_unlock_irq(&uidhash_lock);
 
 	return 0;
diff -uprN linux-2.6.16/kernel/ve.c linux-2.6.16.ovz/kernel/ve.c
--- linux-2.6.16/kernel/ve.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/ve.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,205 @@
+/*
+ *  linux/kernel/ve.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * 've.c' helper file performing VE sub-system initialization
+ */
+
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/ve.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/sys.h>
+#include <linux/kdev_t.h>
+#include <linux/termios.h>
+#include <linux/tty_driver.h>
+#include <linux/netdevice.h>
+#include <linux/utsname.h>
+#include <linux/proc_fs.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/rcupdate.h>
+#include <linux/ve_proto.h>
+#include <linux/ve_owner.h>
+#include <linux/devpts_fs.h>
+
+#include <linux/nfcalls.h>
+
+unsigned long vz_rstamp = 0x37e0f59d;
+
+#ifdef CONFIG_MODULES
+struct module no_module = { .state = MODULE_STATE_GOING };
+EXPORT_SYMBOL(no_module);
+#endif
+
+#ifdef CONFIG_VE
+
+DCL_VE_OWNER(SKB, struct sk_buff, owner_env)
+DCL_VE_OWNER(SK, struct sock, sk_owner_env)
+DCL_VE_OWNER(TW, struct tcp_tw_bucket, tw_owner_env)
+DCL_VE_OWNER(FILP, struct file, owner_env)
+DCL_VE_OWNER(FSTYPE, struct file_system_type, owner_env)
+
+INIT_KSYM_MODULE(x_tables);
+INIT_KSYM_MODULE(xt_tcpudp);
+INIT_KSYM_MODULE(ip_tables);
+INIT_KSYM_MODULE(ip6_tables);
+INIT_KSYM_MODULE(iptable_filter);
+INIT_KSYM_MODULE(ip6table_filter);
+INIT_KSYM_MODULE(iptable_mangle);
+INIT_KSYM_MODULE(ip6table_mangle);
+INIT_KSYM_MODULE(xt_limit);
+INIT_KSYM_MODULE(ipt_multiport);
+INIT_KSYM_MODULE(ip6t_multiport);
+INIT_KSYM_MODULE(ipt_tos);
+INIT_KSYM_MODULE(ipt_TOS);
+INIT_KSYM_MODULE(ipt_REJECT);
+INIT_KSYM_MODULE(ip6t_REJECT);
+INIT_KSYM_MODULE(ipt_TCPMSS);
+INIT_KSYM_MODULE(xt_tcpmss);
+INIT_KSYM_MODULE(ipt_ttl);
+INIT_KSYM_MODULE(ipt_LOG);
+INIT_KSYM_MODULE(ip6t_LOG);
+INIT_KSYM_MODULE(xt_length);
+INIT_KSYM_MODULE(ip_conntrack);
+INIT_KSYM_MODULE(ip_conntrack_ftp);
+INIT_KSYM_MODULE(ip_conntrack_irc);
+INIT_KSYM_MODULE(xt_conntrack);
+INIT_KSYM_MODULE(xt_state);
+INIT_KSYM_MODULE(xt_helper);
+INIT_KSYM_MODULE(ip_nat);
+INIT_KSYM_MODULE(iptable_nat);
+INIT_KSYM_MODULE(ip_nat_ftp);
+INIT_KSYM_MODULE(ip_nat_irc);
+INIT_KSYM_MODULE(ipt_REDIRECT);
+
+INIT_KSYM_CALL(int, init_netfilter, (void));
+INIT_KSYM_CALL(int, init_xtables, (void));
+INIT_KSYM_CALL(int, init_xt_tcpudp, (void));
+INIT_KSYM_CALL(int, init_iptables, (void));
+INIT_KSYM_CALL(int, init_ip6tables, (void));
+INIT_KSYM_CALL(int, init_iptable_filter, (void));
+INIT_KSYM_CALL(int, init_ip6table_filter, (void));
+INIT_KSYM_CALL(int, init_iptable_mangle, (void));
+INIT_KSYM_CALL(int, init_ip6table_mangle, (void));
+INIT_KSYM_CALL(int, init_xt_limit, (void));
+INIT_KSYM_CALL(int, init_iptable_multiport, (void));
+INIT_KSYM_CALL(int, init_ip6table_multiport, (void));
+INIT_KSYM_CALL(int, init_iptable_tos, (void));
+INIT_KSYM_CALL(int, init_iptable_TOS, (void));
+INIT_KSYM_CALL(int, init_iptable_REJECT, (void));
+INIT_KSYM_CALL(int, init_ip6table_REJECT, (void));
+INIT_KSYM_CALL(int, init_iptable_TCPMSS, (void));
+INIT_KSYM_CALL(int, init_xt_tcpmss, (void));
+INIT_KSYM_CALL(int, init_iptable_ttl, (void));
+INIT_KSYM_CALL(int, init_iptable_LOG, (void));
+INIT_KSYM_CALL(int, init_ip6table_LOG, (void));
+INIT_KSYM_CALL(int, init_xt_length, (void));
+INIT_KSYM_CALL(int, init_iptable_conntrack, (void));
+INIT_KSYM_CALL(int, init_iptable_ftp, (void));
+INIT_KSYM_CALL(int, init_iptable_irc, (void));
+INIT_KSYM_CALL(int, init_xt_conntrack_match, (void));
+INIT_KSYM_CALL(int, init_xt_state, (void));
+INIT_KSYM_CALL(int, init_xt_helper, (void));
+INIT_KSYM_CALL(int, ip_nat_init, (void));
+INIT_KSYM_CALL(int, init_iptable_nat, (void));
+INIT_KSYM_CALL(int, init_iptable_nat_ftp, (void));
+INIT_KSYM_CALL(int, init_iptable_nat_irc, (void));
+INIT_KSYM_CALL(int, init_iptable_REDIRECT, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat_irc, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat, (void));
+INIT_KSYM_CALL(void, ip_nat_cleanup, (void));
+INIT_KSYM_CALL(void, fini_xt_helper, (void));
+INIT_KSYM_CALL(void, fini_xt_state, (void));
+INIT_KSYM_CALL(void, fini_xt_conntrack_match, (void));
+INIT_KSYM_CALL(void, fini_iptable_irc, (void));
+INIT_KSYM_CALL(void, fini_iptable_ftp, (void));
+INIT_KSYM_CALL(void, fini_iptable_conntrack, (void));
+INIT_KSYM_CALL(void, fini_xt_length, (void));
+INIT_KSYM_CALL(void, fini_ip6table_LOG, (void));
+INIT_KSYM_CALL(void, fini_iptable_LOG, (void));
+INIT_KSYM_CALL(void, fini_iptable_ttl, (void));
+INIT_KSYM_CALL(void, fini_xt_tcpmss, (void));
+INIT_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
+INIT_KSYM_CALL(void, fini_ip6table_REJECT, (void));
+INIT_KSYM_CALL(void, fini_iptable_REJECT, (void));
+INIT_KSYM_CALL(void, fini_iptable_TOS, (void));
+INIT_KSYM_CALL(void, fini_iptable_tos, (void));
+INIT_KSYM_CALL(void, fini_ip6table_multiport, (void));
+INIT_KSYM_CALL(void, fini_iptable_multiport, (void));
+INIT_KSYM_CALL(void, fini_xt_limit, (void));
+INIT_KSYM_CALL(void, fini_iptable_filter, (void));
+INIT_KSYM_CALL(void, fini_ip6table_filter, (void));
+INIT_KSYM_CALL(void, fini_iptable_mangle, (void));
+INIT_KSYM_CALL(void, fini_ip6table_mangle, (void));
+INIT_KSYM_CALL(void, fini_ip6tables, (void));
+INIT_KSYM_CALL(void, fini_iptables, (void));
+INIT_KSYM_CALL(void, fini_xt_tcpudp, (void));
+INIT_KSYM_CALL(void, fini_xtables, (void));
+INIT_KSYM_CALL(void, fini_netfilter, (void));
+INIT_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
+
+INIT_KSYM_CALL(void, ipt_flush_table, (struct xt_table *table));
+INIT_KSYM_CALL(void, ip6t_flush_table, (struct xt_table *table));
+
+#if defined(CONFIG_VE_CALLS_MODULE) || defined(CONFIG_VE_CALLS)
+INIT_KSYM_MODULE(vzmon);
+INIT_KSYM_CALL(int, real_get_device_perms_ve,
+		(int dev_type, dev_t dev, int access_mode));
+INIT_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
+INIT_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
+INIT_KSYM_CALL(void, real_update_load_avg_ve, (void));
+
+int get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
+{
+	return KSYMSAFECALL(int, vzmon, real_get_device_perms_ve,
+					(dev_type, dev, access_mode));
+}
+EXPORT_SYMBOL(get_device_perms_ve);
+
+void do_env_cleanup(struct ve_struct *env)
+{
+	KSYMSAFECALL_VOID(vzmon, real_do_env_cleanup, (env));
+}
+
+void do_env_free(struct ve_struct *env)
+{
+	KSYMSAFECALL_VOID(vzmon, real_do_env_free, (env));
+}
+EXPORT_SYMBOL(do_env_free);
+
+void do_update_load_avg_ve(void)
+{
+	KSYMSAFECALL_VOID(vzmon, real_update_load_avg_ve, ());
+}
+#endif
+
+struct ve_struct ve0 = {
+	.utsname		= &system_utsname,
+	.vetask_lh		= LIST_HEAD_INIT(ve0.vetask_lh),
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	._net_dev_tail		= &ve0._net_dev_base,
+	.ifindex		= -1,
+#endif
+#ifdef CONFIG_UNIX98_PTYS
+	.devpts_config		= &devpts_config,
+#endif
+};
+
+EXPORT_SYMBOL(ve0);
+
+#endif /* CONFIG_VE */
diff -uprN linux-2.6.16/kernel/vecalls.c linux-2.6.16.ovz/kernel/vecalls.c
--- linux-2.6.16/kernel/vecalls.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/vecalls.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,3547 @@
+/*
+ *  linux/kernel/vecalls.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ */
+
+/*
+ * 'vecalls.c' is file with basic VE support. It provides basic primities
+ * along with initialization script
+ */
+
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/ve.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/ve_owner.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sys.h>
+#include <linux/fs.h>
+#include <linux/namespace.h>
+#include <linux/termios.h>
+#include <linux/tty_driver.h>
+#include <linux/netdevice.h>
+#include <linux/wait.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/utsname.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/suspend.h>
+#include <linux/rcupdate.h>
+#include <linux/in.h>
+#include <linux/major.h>
+#include <linux/kdev_t.h>
+#include <linux/idr.h>
+#include <linux/inetdevice.h>
+#include <net/pkt_sched.h>
+#include <linux/divert.h>
+#include <ub/beancounter.h>
+
+#include <net/route.h>
+#include <net/ip_fib.h>
+#include <net/ip6_route.h>
+#include <net/arp.h>
+#include <net/ipv6.h>
+
+#include <linux/ve_proto.h>
+#include <linux/venet.h>
+#include <linux/vzctl.h>
+#include <linux/vzcalluser.h>
+#ifdef CONFIG_FAIRSCHED
+#include <linux/fairsched.h>
+#endif
+
+#include <linux/nfcalls.h>
+#include <linux/virtinfo.h>
+
+struct ve_struct *ve_list_head = NULL;
+int nr_ve = 1;	/* One VE always exists. Compatibility with vestat */
+rwlock_t ve_list_guard = RW_LOCK_UNLOCKED;
+static rwlock_t devperms_hash_guard = RW_LOCK_UNLOCKED;
+
+extern int glob_virt_pids;
+
+static int	do_env_enter(struct ve_struct *ve, unsigned int flags);
+static void	do_clean_devperms(envid_t veid);
+static int	alloc_ve_tty_drivers(struct ve_struct* ve);
+static void	free_ve_tty_drivers(struct ve_struct* ve);
+static int	register_ve_tty_drivers(struct ve_struct* ve);
+static void	unregister_ve_tty_drivers(struct ve_struct* ve);
+static int	init_ve_tty_drivers(struct ve_struct *);
+static void	fini_ve_tty_drivers(struct ve_struct *);
+static void	clear_termios(struct tty_driver* driver );
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+static void	ve_mapped_devs_cleanup(struct ve_struct *ve);
+#endif
+
+static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf);
+
+static void vecalls_exit(void);
+
+struct ve_struct *__find_ve_by_id(envid_t veid)
+{
+	struct ve_struct *ve;
+	for (ve = ve_list_head;
+	     ve != NULL && ve->veid != veid;
+	     ve = ve->next);
+	return ve;
+}
+
+struct ve_struct *get_ve_by_id(envid_t veid)
+{
+	struct ve_struct *ve;
+	read_lock(&ve_list_guard);
+	ve = __find_ve_by_id(veid);
+	get_ve(ve);
+	read_unlock(&ve_list_guard);
+	return ve;
+}
+
+/*
+ * real_put_ve() MUST be used instead of put_ve() inside vecalls.
+ */
+void real_do_env_free(struct ve_struct *ve);
+static inline void real_put_ve(struct ve_struct *ve)
+{
+	if (ve && atomic_dec_and_test(&ve->counter)) {
+		if (atomic_read(&ve->pcounter) > 0)
+			BUG();
+		if (ve->is_running)
+			BUG();
+		real_do_env_free(ve);
+	}
+}
+
+extern struct file_system_type devpts_fs_type;
+extern struct file_system_type sysfs_fs_type;
+extern struct file_system_type tmpfs_fs_type;
+extern struct file_system_type proc_fs_type;
+
+extern spinlock_t task_capability_lock;
+extern void ve_ipc_free(struct ve_struct * ve);
+extern void ip_fragment_cleanup(struct ve_struct *ve);
+
+static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf)
+{
+	struct ve_struct *ve;
+	struct vz_cpu_stat *vstat;
+	int retval;
+	int i, cpu;
+	unsigned long tmp;
+
+	if (!ve_is_super(get_exec_env()) && (veid != get_exec_env()->veid))
+		return -EPERM;
+	if (veid == 0)
+		return -ESRCH;
+
+	vstat = kmalloc(sizeof(*vstat), GFP_KERNEL);
+	if (!vstat)
+		return -ENOMEM;
+	memset(vstat, 0, sizeof(*vstat));
+	
+	retval = -ESRCH;
+	read_lock(&ve_list_guard);
+	ve = __find_ve_by_id(veid);
+	if (ve == NULL)
+		goto out_unlock;
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		struct ve_cpu_stats *st;
+
+		st = VE_CPU_STATS(ve, cpu);
+		vstat->user_jif += st->user;
+		vstat->nice_jif += st->nice;
+		vstat->system_jif += st->system;
+		vstat->idle_clk += ve_sched_get_idle_time(ve, cpu);
+	}
+	vstat->uptime_clk = get_cycles() - ve->start_cycles;
+	vstat->uptime_jif = jiffies - ve->start_jiffies;
+	for (i = 0; i < 3; i++) {
+		tmp = ve->avenrun[i] + (FIXED_1/200);
+		vstat->avenrun[i].val_int = LOAD_INT(tmp);
+		vstat->avenrun[i].val_frac = LOAD_FRAC(tmp);
+	}
+	read_unlock(&ve_list_guard);
+
+	retval = 0;
+	if (copy_to_user(buf, vstat, sizeof(*vstat)))
+		retval = -EFAULT;
+out_free:
+	kfree(vstat);
+	return retval;
+
+out_unlock:
+	read_unlock(&ve_list_guard);
+	goto out_free;
+}
+
+/**********************************************************************
+ * Devices permissions routines,
+ * character and block devices separately
+ **********************************************************************/
+
+/* Rules applied in the following order:
+   MAJOR!=0, MINOR!=0
+   MAJOR!=0, MINOR==0
+   MAJOR==0, MINOR==0
+*/
+struct devperms_struct
+{
+	dev_t   	dev;	/* device id */
+	unsigned char	mask;
+	unsigned 	type;
+	envid_t	 	veid;
+
+	struct devperms_struct *devhash_next;
+	struct devperms_struct **devhash_pprev;
+};
+
+static struct devperms_struct original_perms[] =
+{{
+	MKDEV(0,0),	/*device*/
+	S_IROTH | S_IWOTH,
+	S_IFCHR,	/*type*/
+	0,		/*veid*/
+	NULL, NULL
+},
+{
+	MKDEV(0,0),	/*device*/
+	S_IXGRP | S_IROTH | S_IWOTH,
+	S_IFBLK,	/*type*/
+	0,		/*veid*/
+	NULL, NULL
+}};
+
+static struct devperms_struct default_major_perms[] = {
+	{MKDEV(UNIX98_PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
+	{MKDEV(UNIX98_PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
+	{MKDEV(PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
+	{MKDEV(PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
+};
+static struct devperms_struct default_minor_perms[] = {
+	{MKDEV(MEM_MAJOR, 3), S_IROTH | S_IWOTH, S_IFCHR},   /* null */
+	{MKDEV(MEM_MAJOR, 5), S_IROTH | S_IWOTH, S_IFCHR},   /* zero */
+	{MKDEV(MEM_MAJOR, 7), S_IROTH | S_IWOTH, S_IFCHR},   /* full */
+	{MKDEV(TTYAUX_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},/* tty */
+	{MKDEV(TTYAUX_MAJOR, 2), S_IROTH | S_IWOTH, S_IFCHR},/* ptmx */
+	{MKDEV(MEM_MAJOR, 8), S_IROTH, S_IFCHR},  /* random */
+	{MKDEV(MEM_MAJOR, 9), S_IROTH, S_IFCHR},  /* urandom */
+};
+
+static struct devperms_struct default_deny_perms = {
+	MKDEV(0, 0), 0, S_IFCHR
+};
+
+static inline struct devperms_struct *find_default_devperms(int type,
+						    dev_t dev)
+{
+	int i;
+
+	/* XXX all defaults perms are S_IFCHR */
+	if (type != S_IFCHR)
+		return &default_deny_perms;
+
+	for (i = 0; 
+	     i < sizeof(default_minor_perms)/sizeof(struct devperms_struct);
+	     i++)
+		if (MAJOR(dev) == MAJOR(default_minor_perms[i].dev) &&
+		    MINOR(dev) == MINOR(default_minor_perms[i].dev))
+			return &default_minor_perms[i];
+	for (i = 0; 
+	     i < sizeof(default_major_perms)/sizeof(struct devperms_struct);
+	     i++)
+		if (MAJOR(dev) == MAJOR(default_major_perms[i].dev))
+			return &default_major_perms[i];
+
+	return &default_deny_perms;
+}
+
+#define DEVPERMS_HASH_SZ 512
+struct devperms_struct *devperms_hash[DEVPERMS_HASH_SZ];
+
+#define devperms_hashfn(id,dev) \
+	( (id << 5) ^ (id >> 5) ^ (MAJOR(dev)) ^ MINOR(dev) ) & \
+						(DEVPERMS_HASH_SZ - 1)
+
+static inline void hash_devperms(struct devperms_struct *p)
+{
+	struct devperms_struct **htable =
+		&devperms_hash[devperms_hashfn(p->veid,p->dev)];
+
+	if ((p->devhash_next = *htable) != NULL)
+		(*htable)->devhash_pprev = &p->devhash_next;
+	*htable = p;
+	p->devhash_pprev = htable;
+}
+
+static inline void unhash_devperms(struct devperms_struct *p)
+{
+	if (p->devhash_next)
+		p->devhash_next->devhash_pprev = p->devhash_pprev;
+	*p->devhash_pprev = p->devhash_next;
+}
+
+static int __init init_devperms_hash(void)
+{
+	write_lock_irq(&devperms_hash_guard);
+	memset(devperms_hash, 0, sizeof(devperms_hash));
+	hash_devperms(original_perms);
+	hash_devperms(original_perms+1);
+	write_unlock_irq(&devperms_hash_guard);
+	return 0;
+}
+
+static inline void fini_devperms_hash(void)
+{
+}
+
+static inline struct devperms_struct *find_devperms(envid_t veid,
+						    int type,
+						    dev_t dev)
+{
+	struct devperms_struct *p, **htable =
+		&devperms_hash[devperms_hashfn(veid,dev)];
+
+	for (p = *htable; p && !(p->type==type &&
+				 MAJOR(dev)==MAJOR(p->dev) &&
+				 MINOR(dev)==MINOR(p->dev) &&
+				 p->veid==veid);
+	     p = p->devhash_next)
+		;
+	return p;
+}
+
+
+static void do_clean_devperms(envid_t veid)
+{
+	int i;
+	struct devperms_struct* ve;
+
+	write_lock_irq(&devperms_hash_guard);
+	for (i = 0; i < DEVPERMS_HASH_SZ; i++)
+		for (ve = devperms_hash[i]; ve;) {
+			struct devperms_struct *next = ve->devhash_next;
+			if (ve->veid == veid) {
+				unhash_devperms(ve);
+				kfree(ve);
+			}
+
+			ve = next;
+		}
+	write_unlock_irq(&devperms_hash_guard);
+}
+
+/*
+ * Mode is a mask of
+ *	FMODE_READ	for read access (configurable by S_IROTH)
+ *	FMODE_WRITE	for write access (configurable by S_IWOTH)
+ *	FMODE_QUOTACTL	for quotactl access (configurable by S_IXGRP)
+ */
+int real_get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
+{
+	struct devperms_struct *perms;
+	struct ve_struct *ve;
+	envid_t veid;
+
+	perms = NULL;
+	ve = get_exec_env();
+	veid = ve->veid;
+
+	read_lock(&devperms_hash_guard);
+
+	perms = find_devperms(veid, dev_type|VE_USE_MINOR, dev);
+	if (perms)
+		goto end;
+
+	perms = find_devperms(veid, dev_type|VE_USE_MAJOR, MKDEV(MAJOR(dev),0));
+	if (perms)
+		goto end;
+
+	perms = find_devperms(veid, dev_type, MKDEV(0,0));
+	if (perms)
+		goto end;
+
+	perms = find_default_devperms(dev_type, dev);
+
+end:
+	read_unlock(&devperms_hash_guard);
+
+	access_mode = "\000\004\002\006\010\014\012\016"[access_mode];
+	return perms ?
+		(((perms->mask & access_mode) == access_mode) ? 0 : -EACCES) :
+		-ENODEV;
+}
+EXPORT_SYMBOL(real_get_device_perms_ve);
+
+int do_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
+{
+	struct devperms_struct   *perms;
+
+	write_lock_irq(&devperms_hash_guard);
+	perms = find_devperms(veid, type, dev);
+	if (!perms) {
+		struct devperms_struct   *perms_new;
+		write_unlock_irq(&devperms_hash_guard);
+
+		perms_new = kmalloc(sizeof(struct devperms_struct), GFP_KERNEL);
+		if (!perms_new)
+			return -ENOMEM;
+
+		write_lock_irq(&devperms_hash_guard);
+		perms = find_devperms(veid, type, dev);
+		if (perms) {
+			kfree(perms_new);
+			perms_new = perms;
+		}
+
+		switch (type & VE_USE_MASK) {
+		case 0:
+			dev = 0;
+			break;
+		case VE_USE_MAJOR:
+			dev = MKDEV(MAJOR(dev),0);
+			break;
+		}
+
+		perms_new->veid = veid;
+		perms_new->dev = dev;
+		perms_new->type = type;
+		perms_new->mask = mask & S_IALLUGO;
+		hash_devperms(perms_new);
+	} else
+		perms->mask = mask & S_IALLUGO;
+	write_unlock_irq(&devperms_hash_guard);
+	return 0;
+}
+EXPORT_SYMBOL(do_setdevperms);
+
+int real_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
+{
+	struct ve_struct *ve;
+	int err;
+
+	if (!capable(CAP_SETVEID) || veid == 0)
+		return -EPERM;
+
+	if ((ve = get_ve_by_id(veid)) == NULL)
+		return -ESRCH;
+
+	down_read(&ve->op_sem);
+	err = -ESRCH;
+	if (ve->is_running)
+		err = do_setdevperms(veid, type, dev, mask);
+	up_read(&ve->op_sem);
+	real_put_ve(ve);
+	return err;
+}
+
+void real_update_load_avg_ve(void)
+{
+	struct ve_struct *ve;
+	unsigned long nr_active;
+
+	read_lock(&ve_list_guard);
+	for (ve = ve_list_head; ve != NULL; ve = ve->next) {
+		nr_active = nr_running_ve(ve) + nr_uninterruptible_ve(ve);
+		nr_active *= FIXED_1;
+		CALC_LOAD(ve->avenrun[0], EXP_1, nr_active);
+		CALC_LOAD(ve->avenrun[1], EXP_5, nr_active);
+		CALC_LOAD(ve->avenrun[2], EXP_15, nr_active);
+	}
+	read_unlock(&ve_list_guard);
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * FS-related helpers to VE start/stop
+ *
+ **********************************************************************
+ **********************************************************************/
+
+/*
+ * DEVPTS needs a virtualization: each environment should see each own list of
+ * pseudo-terminals.
+ * To implement it we need to have separate devpts superblocks for each
+ * VE, and each VE should mount its own one.
+ * Thus, separate vfsmount structures are required.
+ * To minimize intrusion into vfsmount lookup code, separate file_system_type
+ * structures are created.
+ *
+ * In addition to this, patch fo character device itself is required, as file
+ * system itself is used only for MINOR/MAJOR lookup.
+ */
+static int register_ve_fs_type(struct ve_struct *ve,
+		struct file_system_type *template,
+		struct file_system_type **p_fs_type, struct vfsmount **p_mnt)
+{
+	struct vfsmount *mnt;
+	struct file_system_type *local_fs_type;
+	int ret;
+
+	VZTRACE("register_ve_fs_type(\"%s\")\n", template->name);
+
+	local_fs_type = kmalloc(sizeof(*local_fs_type) + sizeof(void *),
+					GFP_KERNEL);
+	if (local_fs_type == NULL)
+		return -ENOMEM;
+
+	memset(local_fs_type, 0, sizeof(*local_fs_type));
+	local_fs_type->name = template->name;
+	local_fs_type->fs_flags = template->fs_flags;
+	local_fs_type->get_sb = template->get_sb;
+	local_fs_type->kill_sb = template->kill_sb;
+	local_fs_type->owner = template->owner;
+	/*
+	 * 1. we do not have refcounter on fstype
+	 * 2. fstype holds reference to ve using get_ve()/put_ve().
+	 * so we free fstype when freeing ve and we are sure it's ok to free it
+	 */
+	SET_VE_OWNER_FSTYPE(local_fs_type, ve);
+	get_filesystem(local_fs_type);	/* get_ve() inside */
+
+	ret = register_filesystem(local_fs_type); /* does not get */
+	if (ret)
+		goto reg_err;
+
+	mnt = kern_mount(local_fs_type);
+	if (IS_ERR(mnt))
+		goto mnt_err;
+
+	/* Usage counters after succesful execution kern_mount:
+	 * local_fs_type - +1 (get_fs_type,get_sb_single,put_filesystem)
+	 * mnt - +1 == 1 (alloc_vfsmnt)
+	 */
+
+	*p_fs_type = local_fs_type;
+	*p_mnt = mnt;
+	return 0;
+
+mnt_err:
+	ret = PTR_ERR(mnt);
+	unregister_filesystem(local_fs_type); /* does not put */
+
+reg_err:
+	put_filesystem(local_fs_type);
+	kfree(local_fs_type);
+	printk(KERN_DEBUG
+	       "register_ve_fs_type(\"%s\") err=%d\n", template->name, ret);
+	return ret;
+}
+
+static void umount_ve_fs_type(struct file_system_type *local_fs_type)
+{
+	struct vfsmount *mnt;
+	struct list_head *p, *q;
+	LIST_HEAD(kill);
+	LIST_HEAD(umount_list);
+
+	down_write(&namespace_sem);
+	spin_lock(&vfsmount_lock);
+	list_for_each_safe(p, q, &current->namespace->list) {
+		mnt = list_entry(p, struct vfsmount, mnt_list);
+		if (mnt->mnt_sb->s_type != local_fs_type)
+			continue;
+		list_del(p);
+		list_add(p, &kill);
+	}
+
+	while (!list_empty(&kill)) {
+		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
+		umount_tree(mnt, 1, &umount_list);
+	}
+	spin_unlock(&vfsmount_lock);
+	up_write(&namespace_sem);
+	release_mounts(&umount_list);
+}
+
+static void unregister_ve_fs_type(struct file_system_type *local_fs_type,
+		struct vfsmount *local_fs_mount)
+{
+	if (local_fs_mount == NULL ||
+	    local_fs_type == NULL) {
+		if (local_fs_mount != NULL ||
+		    local_fs_type != NULL)
+			BUG();
+		return;
+	}
+
+	VZTRACE("unregister_ve_fs_type(\"%s\")\n", local_fs_type->name);
+
+	unregister_filesystem(local_fs_type);
+	umount_ve_fs_type(local_fs_type);
+	kern_umount(local_fs_mount); /* alias to mntput, drop our ref */
+	put_filesystem(local_fs_type);
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * FS-related helpers to VE start/stop
+ *
+ **********************************************************************
+ **********************************************************************/
+
+#ifdef CONFIG_SYSCTL
+static ctl_table ve_sysctl_tables[] = {
+	/* kernel */
+	{
+		.ctl_name	= CTL_KERN,
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= &ve_sysctl_tables[2],
+	},
+	{ .ctl_name = 0 },
+	/* kernel/[vars] */
+	{
+		.ctl_name	= KERN_NODENAME,
+		.procname	= "hostname",
+		.maxlen 	= 64,
+		.mode		= 0644,
+		.proc_handler	= &proc_doutsstring,
+		.strategy	= &sysctl_string,
+	},
+	{
+		.ctl_name	= KERN_DOMAINNAME,
+		.procname	= "domainname",
+		.maxlen		= 64,
+		.mode		= 0644,
+		.proc_handler	= &proc_doutsstring,
+		.strategy	= &sysctl_string,
+	},
+	{
+		.ctl_name	= KERN_SHMMAX,
+		.procname	= "shmmax",
+		.maxlen		= sizeof(size_t),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax,
+	},
+	{
+		.ctl_name	= KERN_SHMALL,
+		.procname	= "shmall",
+		.maxlen		= sizeof(size_t),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax,
+	},
+	{
+		.ctl_name	= KERN_SHMMNI,
+		.procname	= "shmmni",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_MSGMAX,
+		.procname	= "msgmax",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_MSGMNI,
+		.procname	= "msgmni",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_MSGMNB,
+		.procname	= "msgmnb",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_SEM,
+		.procname	= "sem",
+		.maxlen		= 4 * sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0, }
+};
+
+static int register_ve_sysctltables(struct ve_struct *ve)
+{
+	struct ctl_table_header *header;
+	ctl_table *root, *table;
+	
+	VZTRACE("register_ve_sysctltables\n");
+
+	root = clone_sysctl_template(ve_sysctl_tables,
+			sizeof(ve_sysctl_tables) / sizeof(ctl_table));
+	if (root == NULL)
+		goto out;
+
+	table = root->child;
+	table[0].data = &ve->utsname->nodename;
+	table[1].data = &ve->utsname->domainname;
+	table[2].data = &ve->_shm_ctlmax;
+	table[3].data = &ve->_shm_ctlall;
+	table[4].data = &ve->_shm_ctlmni;
+	table[5].data = &ve->_msg_ctlmax;
+	table[6].data = &ve->_msg_ctlmni;
+	table[7].data = &ve->_msg_ctlmnb;
+	table[8].data = &ve->_sem_ctls[0];
+
+	/* insert at head to override kern entries */
+	header = register_sysctl_table(root, 1);
+	if (header == NULL)
+		goto out_free;
+
+	ve->kern_header = header;
+	ve->kern_table = root;
+	return 0;
+
+out_free:
+	free_sysctl_clone(root);
+out:
+	return -ENOMEM;
+}
+
+static inline void unregister_ve_sysctltables(struct ve_struct *ve)
+{
+	unregister_sysctl_table(ve->kern_header);
+}
+
+static inline void free_ve_sysctltables(struct ve_struct *ve)
+{
+	free_sysctl_clone(ve->kern_table);
+}
+#endif
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE start: subsystems
+ *
+ **********************************************************************
+ **********************************************************************/
+
+extern struct new_utsname virt_utsname;
+
+static int init_ve_utsname(struct ve_struct *ve)
+{
+	ve->utsname = kmalloc(sizeof(*ve->utsname), GFP_KERNEL);
+	if (ve->utsname == NULL)
+		return -ENOMEM;
+
+	down_read(&uts_sem); /* protect the source */
+	memcpy(ve->utsname, &system_utsname, sizeof(*ve->utsname));
+	memcpy(ve->utsname->release, virt_utsname.release,
+			sizeof(virt_utsname.release));
+	up_read(&uts_sem);
+
+	return 0;
+}
+
+static void free_ve_utsname(struct ve_struct *ve)
+{
+	kfree(ve->utsname);
+	ve->utsname = NULL;
+}
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+
+static int init_fini_ve_mibs(struct ve_struct *ve, int fini)
+{
+	if (fini)
+		goto fini;
+	if (!(ve->_net_statistics[0] = alloc_percpu(struct linux_mib)))
+		goto out1;
+	if (!(ve->_net_statistics[1] = alloc_percpu(struct linux_mib)))
+		goto out2;
+	if (!(ve->_ip_statistics[0] = alloc_percpu(struct ipstats_mib)))
+		goto out3;
+	if (!(ve->_ip_statistics[1] = alloc_percpu(struct ipstats_mib)))
+		goto out4;
+	if (!(ve->_icmp_statistics[0] = alloc_percpu(struct icmp_mib)))
+		goto out5;
+	if (!(ve->_icmp_statistics[1] = alloc_percpu(struct icmp_mib)))
+		goto out6;
+	if (!(ve->_tcp_statistics[0] = alloc_percpu(struct tcp_mib)))
+		goto out7;
+	if (!(ve->_tcp_statistics[1] = alloc_percpu(struct tcp_mib)))
+		goto out8;
+	if (!(ve->_udp_statistics[0] = alloc_percpu(struct udp_mib)))
+		goto out9;
+	if (!(ve->_udp_statistics[1] = alloc_percpu(struct udp_mib)))
+		goto out10;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	if (!(ve->_ipv6_statistics[0] = alloc_percpu(struct ipstats_mib)))
+		goto out11;
+	if (!(ve->_ipv6_statistics[1] = alloc_percpu(struct ipstats_mib)))
+		goto out12;
+	if (!(ve->_icmpv6_statistics[0] = alloc_percpu(struct icmpv6_mib)))
+		goto out13;
+	if (!(ve->_icmpv6_statistics[1] = alloc_percpu(struct icmpv6_mib)))
+		goto out14;
+	if (!(ve->_udp_stats_in6[0] = alloc_percpu(struct udp_mib)))
+		goto out15;
+	if (!(ve->_udp_stats_in6[1] = alloc_percpu(struct udp_mib)))
+		goto out16;
+#endif
+	return 0;
+fini:
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	free_percpu(ve->_udp_stats_in6[1]);
+out16:
+	free_percpu(ve->_udp_stats_in6[0]);
+out15:
+	free_percpu(ve->_icmpv6_statistics[1]);
+out14:
+	free_percpu(ve->_icmpv6_statistics[0]);
+out13:
+	free_percpu(ve->_ipv6_statistics[1]);
+out12:
+	free_percpu(ve->_ipv6_statistics[0]);
+out11:
+#endif
+	free_percpu(ve->_udp_statistics[1]);
+out10:
+	free_percpu(ve->_udp_statistics[0]);
+out9:
+	free_percpu(ve->_tcp_statistics[1]);
+out8:
+	free_percpu(ve->_tcp_statistics[0]);
+out7:
+	free_percpu(ve->_icmp_statistics[1]);
+out6:
+	free_percpu(ve->_icmp_statistics[0]);
+out5:
+	free_percpu(ve->_ip_statistics[1]);
+out4:
+	free_percpu(ve->_ip_statistics[0]);
+out3:
+	free_percpu(ve->_net_statistics[1]);
+out2:
+	free_percpu(ve->_net_statistics[0]);
+out1:
+	return -ENOMEM;
+}
+
+static inline int init_ve_mibs(struct ve_struct *ve)
+{
+	return init_fini_ve_mibs(ve, 0);
+}
+
+static inline void fini_ve_mibs(struct ve_struct *ve)
+{
+	(void)init_fini_ve_mibs(ve, 1);
+}
+
+extern struct net_device templ_loopback_dev;
+static void veloop_setup(struct net_device *dev)
+{
+	int padded;
+	padded = dev->padded;
+	memcpy(dev, &templ_loopback_dev, sizeof(struct net_device));
+	dev->padded = padded;
+}
+
+static int init_ve_netdev(void)
+{
+	struct ve_struct *ve;
+	struct net_device_stats *stats;
+	int err;
+
+	ve = get_exec_env();
+	INIT_HLIST_HEAD(&ve->_net_dev_head);
+	ve->_net_dev_base = NULL;
+	ve->_net_dev_tail = &ve->_net_dev_base;
+
+	ve->_loopback_dev = alloc_netdev(0, templ_loopback_dev.name, 
+					 veloop_setup);
+	if (ve->_loopback_dev == NULL)
+		return -ENOMEM;
+	if (loopback_dev.get_stats != NULL) {
+		stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+		if (stats != NULL) {
+			memset(stats, 0, sizeof(struct net_device_stats));
+			ve->_loopback_dev->priv = stats;
+			ve->_loopback_dev->get_stats = loopback_dev.get_stats;
+			ve->_loopback_dev->destructor = loopback_dev.destructor;
+		}
+	}
+	err = register_netdev(ve->_loopback_dev);
+	if (err) {
+		if (ve->_loopback_dev->priv != NULL)
+			kfree(ve->_loopback_dev->priv);
+		free_netdev(ve->_loopback_dev);
+	}
+	return err;
+}
+
+static void fini_ve_netdev(void)
+{
+	struct ve_struct *ve;
+	struct net_device *dev;
+
+	ve = get_exec_env();
+	while (1) {
+		rtnl_lock();
+		/* 
+		 * loopback is special, it can be referenced in  fib's, 
+		 * so it must be freed the last. Doing so is 
+		 * sufficient to guarantee absence of such references.
+		 */
+		if (dev_base == ve->_loopback_dev)
+			dev = dev_base->next;
+		else
+			dev = dev_base;
+		if (dev == NULL)
+			break;
+		unregister_netdevice(dev);
+		rtnl_unlock();
+		free_netdev(dev);
+	}
+	unregister_netdevice(ve->_loopback_dev);
+	rtnl_unlock();
+	free_netdev(ve->_loopback_dev);
+	ve->_loopback_dev = NULL;
+}
+#else
+#define init_ve_mibs(ve)	(0)
+#define fini_ve_mibs(ve)	do { } while (0)
+#define init_ve_netdev()	(0)
+#define fini_ve_netdev()	do { } while (0)
+#endif
+
+static int prepare_proc_root(struct ve_struct *ve)
+{
+	struct proc_dir_entry *de;
+
+	de = kmalloc(sizeof(struct proc_dir_entry) + 6, GFP_KERNEL);
+	if (de == NULL)
+		return -ENOMEM;
+	memset(de, 0, sizeof(struct proc_dir_entry));
+	memcpy(de + 1, "/proc", 6);
+	de->name = (char *)(de + 1);
+	de->namelen = 5;
+	de->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	de->nlink = 2;
+	atomic_set(&de->count, 1);
+
+	ve->proc_root = de;
+	return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static int init_ve_proc(struct ve_struct *ve)
+{
+	int err;
+	struct proc_dir_entry *de;
+
+	err = prepare_proc_root(ve);
+	if (err)
+		goto out_root;
+
+	err = register_ve_fs_type(ve, &proc_fs_type,
+			&ve->proc_fstype, &ve->proc_mnt);
+	if (err)
+		goto out_reg;
+
+	/* create necessary /proc subdirs in VE local proc tree */
+	err = -ENOMEM;
+	de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+	if (!de)
+		goto out_vz;
+
+#ifdef CONFIG_VE_IPTABLES
+	proc_net = proc_mkdir("net", NULL);
+	if (!proc_net)
+		goto out_net;
+#endif	
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	if (ve_snmp_proc_init())
+		goto out_snmp;
+#endif
+
+	return 0;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+out_snmp:
+	remove_proc_entry("net", NULL);
+#endif
+#ifdef CONFIG_VE_IPTABLES
+out_net:
+	remove_proc_entry("vz", NULL);
+#endif
+out_vz:
+	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
+	ve->proc_mnt = NULL;
+out_reg:
+	/* proc_fstype and proc_root are freed in real_put_ve -> free_ve_proc */
+	;
+out_root:
+	return err;
+}
+
+static void fini_ve_proc(struct ve_struct *ve)
+{
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	ve_snmp_proc_fini();
+#endif
+#ifdef CONFIG_VE_IPTABLES
+	remove_proc_entry("net", NULL);
+	proc_net =  NULL;
+#endif
+	remove_proc_entry("vz", NULL);
+	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
+	ve->proc_mnt = NULL;
+}
+
+static void free_ve_proc(struct ve_struct *ve)
+{
+	/* proc filesystem frees proc_dir_entries on remove_proc_entry() only,
+	   so we check that everything was removed and not lost */
+	if (ve->proc_root && ve->proc_root->subdir) {
+		struct proc_dir_entry *p = ve->proc_root;
+		printk(KERN_WARNING "VPS: %d: proc entry /proc", ve->veid);
+		while ((p = p->subdir) != NULL)
+			printk("/%s", p->name);
+		printk(" is not removed!\n");
+	}
+
+	kfree(ve->proc_root);
+	kfree(ve->proc_fstype);
+
+	ve->proc_fstype = NULL;
+	ve->proc_root = NULL;
+}
+#else
+#define init_ve_proc(ve)	(0)
+#define fini_ve_proc(ve)	do { } while (0)
+#define free_ve_proc(ve)	do { } while (0)
+#endif
+
+#ifdef CONFIG_SYSCTL
+static int init_ve_sysctl(struct ve_struct *ve)
+{
+	int err;
+
+#ifdef CONFIG_PROC_FS
+	err = -ENOMEM;
+	ve->proc_sys_root = proc_mkdir("sys", 0);
+	if (ve->proc_sys_root == NULL)
+		goto out_proc;
+#endif
+	INIT_LIST_HEAD(&ve->sysctl_lh);
+	err = register_ve_sysctltables(ve);
+	if (err)
+		goto out_reg;
+
+	err = devinet_sysctl_init(ve);
+	if (err)
+		goto out_dev;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	err = addrconf_sysctl_init(ve);
+	if (err)
+		goto out_dev6;
+#endif
+
+	return 0;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+out_dev6:
+	devinet_sysctl_fini(ve);
+#endif
+out_dev:
+	unregister_ve_sysctltables(ve);
+	free_ve_sysctltables(ve);
+out_reg:
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("sys", NULL);
+out_proc:
+#endif
+	return err;
+}
+
+static void fini_ve_sysctl(struct ve_struct *ve)
+{
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	addrconf_sysctl_fini(ve);
+#endif
+	devinet_sysctl_fini(ve);
+	unregister_ve_sysctltables(ve);
+	remove_proc_entry("sys", NULL);
+}
+
+static void free_ve_sysctl(struct ve_struct *ve)
+{
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	addrconf_sysctl_free(ve);
+#endif
+	devinet_sysctl_free(ve);
+	free_ve_sysctltables(ve);
+}
+#else
+#define init_ve_sysctl(ve)	(0)
+#define fini_ve_sysctl(ve)	do { } while (0)
+#define free_ve_sysctl(ve)	do { } while (0)
+#endif
+
+#ifdef CONFIG_UNIX98_PTYS
+#include <linux/devpts_fs.h>
+
+static int init_ve_devpts(struct ve_struct *ve)
+{
+	int err;
+
+	err = -ENOMEM;
+	ve->devpts_config = kmalloc(sizeof(struct devpts_config), GFP_KERNEL);
+	if (ve->devpts_config == NULL)
+		goto out;
+	memset(ve->devpts_config, 0, sizeof(struct devpts_config));
+	ve->devpts_config->mode = 0600;
+	err = register_ve_fs_type(ve, &devpts_fs_type,
+			&ve->devpts_fstype, &ve->devpts_mnt);
+	if (err) {
+		kfree(ve->devpts_config);
+		ve->devpts_config = NULL;
+	}
+out:
+	return err;
+}
+
+static void fini_ve_devpts(struct ve_struct *ve)
+{
+	unregister_ve_fs_type(ve->devpts_fstype, ve->devpts_mnt);
+	/* devpts_fstype is freed in real_put_ve -> free_ve_filesystems */
+	ve->devpts_mnt = NULL;
+	kfree(ve->devpts_config);
+	ve->devpts_config = NULL;
+}
+#else
+#define init_ve_devpts(ve)	(0)
+#define fini_ve_devpts(ve)	do { } while (0)
+#endif
+
+static int init_ve_shmem(struct ve_struct *ve)
+{
+	return register_ve_fs_type(ve,
+				   &tmpfs_fs_type,
+				   &ve->shmem_fstype,
+				   &ve->shmem_mnt);
+}
+
+static void fini_ve_shmem(struct ve_struct *ve)
+{
+	unregister_ve_fs_type(ve->shmem_fstype, ve->shmem_mnt);
+	/* shmem_fstype is freed in real_put_ve -> free_ve_filesystems */
+	ve->shmem_mnt = NULL;
+}
+
+static inline int init_ve_sysfs_root(struct ve_struct *ve)
+{
+	struct sysfs_dirent *sysfs_root;
+
+	sysfs_root = kmalloc(sizeof(struct sysfs_dirent), GFP_KERNEL);
+	if (sysfs_root == NULL)
+		return -ENOMEM;
+
+	memset(sysfs_root, 0, sizeof(struct sysfs_dirent));
+	INIT_LIST_HEAD(&sysfs_root->s_sibling);
+	INIT_LIST_HEAD(&sysfs_root->s_children);
+	sysfs_root->s_type = SYSFS_ROOT;
+	ve->sysfs_root = sysfs_root;
+	return 0;
+}
+
+static int init_ve_sysfs(struct ve_struct *ve)
+{
+	struct subsystem *subsys;
+	struct class *nc;
+	int err;
+	extern struct subsystem class_obj_subsys;
+	extern struct subsystem class_subsys;
+	extern struct class net_class;
+
+#ifdef CONFIG_SYSFS
+	err = 0;
+	if (ve->features & VE_FEATURE_SYSFS) {
+		err = init_ve_sysfs_root(ve);
+		if (err != 0)
+			goto out;
+		err = register_ve_fs_type(ve,
+				   &sysfs_fs_type,
+				   &ve->sysfs_fstype,
+				   &ve->sysfs_mnt);
+	}
+	if (err != 0)
+		goto out_fs_type;
+#endif
+	err = -ENOMEM;
+	subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
+	if (subsys == NULL)
+		goto out_class_obj;
+	/* ick, this is ugly, the things we go through to keep from showing up
+	 * in sysfs... */
+	memset(subsys, 0, sizeof(*subsys));
+	memcpy(&subsys->kset.kobj.name, &class_obj_subsys.kset.kobj.name,
+			sizeof(subsys->kset.kobj.name));
+	subsys->kset.ktype = class_obj_subsys.kset.ktype;
+	subsys->kset.uevent_ops = class_obj_subsys.kset.uevent_ops;
+	subsystem_init(subsys);
+	if (!subsys->kset.subsys)
+			subsys->kset.subsys = subsys;
+	ve->class_obj_subsys = subsys;
+
+	err = -ENOMEM;
+	subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
+	if (subsys == NULL)
+		goto out_class_subsys;
+	/* ick, this is ugly, the things we go through to keep from showing up
+	 * in sysfs... */
+	memset(subsys, 0, sizeof(*subsys));
+	memcpy(&subsys->kset.kobj.name, &class_subsys.kset.kobj.name,
+			sizeof(subsys->kset.kobj.name));
+	subsys->kset.ktype = class_subsys.kset.ktype;
+	subsys->kset.uevent_ops = class_subsys.kset.uevent_ops;
+	ve->class_subsys = subsys;
+	err = subsystem_register(subsys);
+	if (err != 0)
+		goto out_register;
+
+	err = -ENOMEM;
+	nc = kmalloc(sizeof(*nc), GFP_KERNEL);
+	if (nc == NULL)
+		goto out_nc;
+	memset(nc, 0, sizeof(*nc));
+	nc->name = net_class.name;
+	nc->release = net_class.release;
+	nc->uevent = net_class.uevent;
+	err = class_register(nc);
+	if (err != 0)
+		goto out_class_register;
+	ve->net_class = nc;
+
+	return err;
+
+out_class_register:
+	kfree(nc);
+out_nc:
+	subsystem_unregister(subsys);
+out_register:
+	kfree(ve->class_subsys);
+out_class_subsys:
+	kfree(ve->class_obj_subsys);
+out_class_obj:
+#ifdef CONFIG_SYSFS
+	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
+	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
+out_fs_type:
+	kfree(ve->sysfs_root);
+	ve->sysfs_root = NULL;
+#endif
+	ve->class_subsys = NULL;
+	ve->class_obj_subsys = NULL;
+out:
+	return err;
+}
+
+static void fini_ve_sysfs(struct ve_struct *ve)
+{
+	class_unregister(ve->net_class);
+	subsystem_unregister(ve->class_subsys);
+
+	kfree(ve->net_class);
+	kfree(ve->class_subsys);
+	kfree(ve->class_obj_subsys);
+
+	ve->net_class = NULL;
+	ve->class_subsys = NULL;
+	ve->class_obj_subsys = NULL;
+#ifdef CONFIG_SYSFS
+	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
+	ve->sysfs_mnt = NULL;
+	kfree(ve->sysfs_root);
+	ve->sysfs_root = NULL;
+	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
+#endif
+}
+
+static void free_ve_filesystems(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSFS
+	kfree(ve->sysfs_fstype);
+	ve->sysfs_fstype = NULL;
+#endif
+	kfree(ve->shmem_fstype);
+	ve->shmem_fstype = NULL;
+
+	kfree(ve->devpts_fstype);
+	ve->devpts_fstype = NULL;
+
+	free_ve_proc(ve);
+}
+
+static int init_printk(struct ve_struct *ve)
+{
+	struct ve_prep_printk {
+		wait_queue_head_t       log_wait;
+		unsigned long           log_start;
+		unsigned long           log_end;
+		unsigned long           logged_chars;
+	} *tmp;
+
+	tmp = kmalloc(sizeof(struct ve_prep_printk), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+	memset(tmp, 0, sizeof(struct ve_prep_printk));
+	init_waitqueue_head(&tmp->log_wait);
+	ve->_log_wait = &tmp->log_wait;
+	ve->_log_start = &tmp->log_start;
+	ve->_log_end = &tmp->log_end;
+	ve->_logged_chars = &tmp->logged_chars;
+	/* ve->log_buf will be initialized later by ve_log_init() */
+	return 0;
+}
+
+static void fini_printk(struct ve_struct *ve)
+{
+	/* 
+	 * there is no spinlock protection here because nobody can use
+	 * log_buf at the moments when this code is called. 
+	 */
+	kfree(ve->log_buf);
+	kfree(ve->_log_wait);
+}
+
+static void fini_venet(struct ve_struct *ve)
+{
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	tcp_v4_kill_ve_sockets(ve);
+#endif
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	ve_mapped_devs_cleanup(ve);
+#endif
+}
+
+static int init_ve_sched(struct ve_struct *ve)
+{
+#ifdef CONFIG_FAIRSCHED
+	int err;
+
+	/*
+	 * We refuse to switch to an already existing node since nodes
+	 * keep a pointer to their ve_struct...
+	 */
+	err = sys_fairsched_mknod(0, 1, ve->veid);
+	if (err < 0) {
+		printk(KERN_WARNING "Can't create fairsched node %d\n",
+				ve->veid);
+		return err;
+	}
+	err = sys_fairsched_mvpr(current->pid, ve->veid);
+	if (err) {
+		printk(KERN_WARNING "Can't switch to fairsched node %d\n",
+				ve->veid);
+		if (sys_fairsched_rmnod(ve->veid))
+			printk(KERN_ERR "Can't clean fairsched node %d\n",
+					ve->veid);
+		return err;
+	}
+#endif
+	ve_sched_attach(ve);
+	return 0;
+}
+
+static void fini_ve_sched(struct ve_struct *ve)
+{
+#ifdef CONFIG_FAIRSCHED
+	if (task_vsched_id(current) == ve->veid)
+		if (sys_fairsched_mvpr(current->pid, fairsched_init_node.id))
+			printk(KERN_WARNING "Can't leave fairsched node %d\n",
+					ve->veid);
+	if (sys_fairsched_rmnod(ve->veid))
+		printk(KERN_ERR "Can't remove fairsched node %d\n",
+				ve->veid);
+#endif
+}
+
+static int init_ve_struct(struct ve_struct *ve, envid_t veid,
+		u32 class_id, env_create_param_t *data,
+		struct task_struct *init_tsk)
+{
+	int n;
+
+	memset(ve, 0, sizeof(*ve));
+	(void)get_ve(ve);
+	ve->veid = veid;
+	ve->class_id = class_id;
+	ve->init_entry = init_tsk;
+	ve->features = data->feature_mask;
+	INIT_LIST_HEAD(&ve->vetask_lh);
+	init_rwsem(&ve->op_sem);
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	ve->ifindex = -1;
+#endif
+
+	for(n = 0; n < UIDHASH_SZ_VE; ++n)
+		INIT_LIST_HEAD(&ve->uidhash_table[n]);
+
+	do_posix_clock_monotonic_gettime(&ve->start_timespec);
+	ve->start_jiffies = jiffies;
+	ve->start_cycles = get_cycles();
+	ve->virt_pids = glob_virt_pids;
+
+	return 0;
+}
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * /proc/meminfo virtualization
+ *
+ **********************************************************************
+ **********************************************************************/
+static int ve_set_meminfo(envid_t veid, unsigned long val)
+{
+	struct ve_struct *ve;
+
+	ve = get_ve_by_id(veid);
+	if (!ve)
+		return -EINVAL;
+
+	ve->meminfo_val = val;
+	real_put_ve(ve);
+	return 0;
+}
+
+static int init_ve_meminfo(struct ve_struct *ve)
+{
+	ve->meminfo_val = 0;
+	return 0;
+}
+
+static inline void fini_ve_meminfo(struct ve_struct *ve)
+{
+}
+
+static void set_ve_root(struct ve_struct *ve, struct task_struct *tsk)
+{
+	read_lock(&tsk->fs->lock);
+	ve->fs_rootmnt = tsk->fs->rootmnt;
+	ve->fs_root = tsk->fs->root;
+	read_unlock(&tsk->fs->lock);
+	mark_tree_virtual(ve->fs_rootmnt, ve->fs_root);
+}
+
+static void set_ve_caps(struct ve_struct *ve, struct task_struct *tsk)
+{
+	/* required for real_setdevperms from register_ve_<fs> above */
+	memcpy(&ve->cap_default, &tsk->cap_effective, sizeof(kernel_cap_t));
+	cap_lower(ve->cap_default, CAP_SETVEID);
+}
+
+static int ve_list_add(struct ve_struct *ve)
+{
+	write_lock_irq(&ve_list_guard);
+	if (__find_ve_by_id(ve->veid) != NULL)
+		goto err_exists;
+
+	ve->prev = NULL;
+	ve->next = ve_list_head;
+	if (ve_list_head)
+		ve_list_head->prev = ve;
+	ve_list_head = ve;
+	nr_ve++;
+	write_unlock_irq(&ve_list_guard);
+	return 0;
+
+err_exists:
+	write_unlock_irq(&ve_list_guard);
+	return -EEXIST;
+}
+
+static void ve_list_del(struct ve_struct *ve)
+{
+	write_lock_irq(&ve_list_guard);
+	if (ve->prev)
+		ve->prev->next = ve->next;
+	else
+		ve_list_head = ve->next;
+	if (ve->next)
+		ve->next->prev = ve->prev;
+	nr_ve--;
+	write_unlock_irq(&ve_list_guard);
+}
+
+static void set_task_ve_caps(struct task_struct *tsk, struct ve_struct *ve)
+{
+	spin_lock(&task_capability_lock);
+	cap_mask(tsk->cap_effective, ve->cap_default);
+	cap_mask(tsk->cap_inheritable, ve->cap_default);
+	cap_mask(tsk->cap_permitted, ve->cap_default);
+	spin_unlock(&task_capability_lock);
+}
+
+static void move_task(struct task_struct *tsk, struct ve_struct *new,
+		struct ve_struct *old)
+{
+	/* this probihibts ptracing of task entered to VPS from host system */
+	tsk->mm->vps_dumpable = 0;
+	/* setup capabilities before enter */
+	set_task_ve_caps(tsk, new);
+
+	write_lock_irq(&tasklist_lock);
+	VE_TASK_INFO(tsk)->owner_env = new;
+	VE_TASK_INFO(tsk)->exec_env = new;
+	REMOVE_VE_LINKS(tsk);
+	SET_VE_LINKS(tsk);
+
+	atomic_dec(&old->pcounter);
+	atomic_inc(&new->pcounter);
+	real_put_ve(old);
+	get_ve(new);
+	write_unlock_irq(&tasklist_lock);
+}
+
+#ifdef CONFIG_VE_IPTABLES
+extern int init_netfilter(void);
+extern void fini_netfilter(void);
+#define init_ve_netfilter()	init_netfilter()
+#define fini_ve_netfilter()	fini_netfilter()
+
+#define KSYMIPTINIT(mask, ve, full_mask, mod, name, args)	\
+({								\
+	int ret = 0;						\
+	if (VE_IPT_CMP(mask, full_mask) &&			\
+		VE_IPT_CMP((ve)->_iptables_modules, 		\
+			full_mask & ~(full_mask##_MOD))) {	\
+		ret = KSYMERRCALL(1, mod, name, args);		\
+		if (ret == 0)					\
+			(ve)->_iptables_modules |=		\
+					full_mask##_MOD;	\
+		if (ret == 1)					\
+			ret = 0;				\
+	}							\
+	ret;							\
+})
+
+#define KSYMIPTFINI(mask, full_mask, mod, name, args)		\
+({								\
+ 	if (VE_IPT_CMP(mask, full_mask##_MOD))			\
+		KSYMSAFECALL_VOID(mod, name, args);		\
+})
+
+
+static int do_ve_iptables(struct ve_struct *ve, __u64 init_mask,
+		int init_or_cleanup)
+{
+	int err;
+
+	err = 0;
+	if (!init_or_cleanup)
+		goto cleanup;
+
+	/* init part */
+#if defined(CONFIG_NETFILTER_XTABLES) || \
+    defined(CONFIG_NETFILTER_XTABLES_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
+			x_tables, init_xtables, ());
+	if (err < 0)
+		goto err_xtables;
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
+			xt_tcpudp, init_xt_tcpudp, ());
+	if (err < 0)
+		goto err_xt_tcpudp;
+#endif
+#if defined(CONFIG_IP_NF_IPTABLES) || \
+    defined(CONFIG_IP_NF_IPTABLES_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
+			ip_tables, init_iptables, ());
+	if (err < 0)
+		goto err_iptables;
+#endif
+#if defined(CONFIG_IP6_NF_IPTABLES) || \
+    defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
+			ip6_tables, init_ip6tables, ());
+	if (err < 0)
+		goto err_ip6tables;
+#endif
+#if defined(CONFIG_IP_NF_CONNTRACK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK,
+			ip_conntrack, init_iptable_conntrack, ());
+	if (err < 0)
+		goto err_iptable_conntrack;
+#endif
+#if defined(CONFIG_IP_NF_FTP) || \
+    defined(CONFIG_IP_NF_FTP_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_FTP,
+			ip_conntrack_ftp, init_iptable_ftp, ());
+	if (err < 0)
+		goto err_iptable_ftp;
+#endif
+#if defined(CONFIG_IP_NF_IRC) || \
+    defined(CONFIG_IP_NF_IRC_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_IRC,
+			ip_conntrack_irc, init_iptable_irc, ());
+	if (err < 0)
+		goto err_iptable_irc;
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_CONNTRACK,
+			xt_conntrack, init_xt_conntrack_match, ());
+	if (err < 0)
+		goto err_xt_conntrack_match;
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_STATE) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_STATE_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_STATE,
+			xt_state, init_xt_state, ());
+	if (err < 0)
+		goto err_xt_state;
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_HELPER) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_HELPER_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_HELPER,
+			xt_helper, init_xt_helper, ());
+	if (err < 0)
+		goto err_xt_helper;
+#endif
+#if defined(CONFIG_IP_NF_NAT) || \
+    defined(CONFIG_IP_NF_NAT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
+			ip_nat, ip_nat_init, ());
+	if (err < 0)
+		goto err_iptable_nat;
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
+			iptable_nat, init_iptable_nat, ());
+	if (err < 0)
+		goto err_iptable_nat2;
+#endif
+#if defined(CONFIG_IP_NF_NAT_FTP) || \
+    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_FTP,
+			ip_nat_ftp, init_iptable_nat_ftp, ());
+	if (err < 0)
+		goto err_iptable_nat_ftp;
+#endif
+#if defined(CONFIG_IP_NF_NAT_IRC) || \
+    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_IRC,
+			ip_nat_irc, init_iptable_nat_irc, ());
+	if (err < 0)
+		goto err_iptable_nat_irc;
+#endif
+#if defined(CONFIG_IP_NF_FILTER) || \
+    defined(CONFIG_IP_NF_FILTER_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER,
+			iptable_filter,	init_iptable_filter, ());
+	if (err < 0)
+		goto err_iptable_filter;
+#endif
+#if defined(CONFIG_IP6_NF_FILTER) || \
+    defined(CONFIG_IP6_NF_FILTER_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER,
+			ip6table_filter, init_ip6table_filter, ());
+	if (err < 0)
+		goto err_ip6table_filter;
+#endif
+#if defined(CONFIG_IP_NF_MANGLE) || \
+    defined(CONFIG_IP_NF_MANGLE_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE,
+			iptable_mangle,	init_iptable_mangle, ());
+	if (err < 0)
+		goto err_iptable_mangle;
+#endif
+#if defined(CONFIG_IP6_NF_MANGLE) || \
+    defined(CONFIG_IP6_NF_MANGLE_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE,
+			ip6table_mangle, init_ip6table_mangle, ());
+	if (err < 0)
+		goto err_ip6table_mangle;
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_LIMIT) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_LIMIT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LIMIT,
+			xt_limit, init_xt_limit, ());
+	if (err < 0)
+		goto err_xt_limit;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
+    defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_MULTIPORT,
+			ipt_multiport, init_iptable_multiport, ());
+	if (err < 0)
+		goto err_iptable_multiport;
+#endif
+#if defined(CONFIG_IP6_NF_MATCH_MULTIPORT) || \
+    defined(CONFIG_IP6_NF_MATCH_MULTIPORT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_MULTIPORT,
+			ip6t_multiport, init_ip6table_multiport, ());
+	if (err < 0)
+		goto err_ip6table_multiport;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_TOS) || \
+    defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TOS,
+			ipt_tos, init_iptable_tos, ());
+	if (err < 0)
+		goto err_iptable_tos;
+#endif
+#if defined(CONFIG_IP_NF_TARGET_TOS) || \
+    defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TOS,
+			ipt_TOS, init_iptable_TOS, ());
+	if (err < 0)
+		goto err_iptable_TOS;
+#endif
+#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
+    defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REJECT,
+			ipt_REJECT, init_iptable_REJECT, ());
+	if (err < 0)
+		goto err_iptable_REJECT;
+#endif
+#if defined(CONFIG_IP6_NF_TARGET_REJECT) || \
+    defined(CONFIG_IP6_NF_TARGET_REJECT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REJECT,
+			ip6t_REJECT, init_ip6table_REJECT, ());
+	if (err < 0)
+		goto err_ip6table_REJECT;
+#endif
+#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
+    defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TCPMSS,
+			ipt_TCPMSS, init_iptable_TCPMSS, ());
+	if (err < 0)
+		goto err_iptable_TCPMSS;
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TCPMSS,
+			xt_tcpmss, init_xt_tcpmss, ());
+	if (err < 0)
+		goto err_xt_tcpmss;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_TTL) || \
+    defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TTL,
+			ipt_ttl, init_iptable_ttl, ());
+	if (err < 0)
+		goto err_iptable_ttl;
+#endif
+#if defined(CONFIG_IP_NF_TARGET_LOG) || \
+    defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_LOG,
+			ipt_LOG, init_iptable_LOG, ());
+	if (err < 0)
+		goto err_iptable_LOG;
+#endif
+#if defined(CONFIG_IP6_NF_TARGET_LOG) || \
+    defined(CONFIG_IP6_NF_TARGET_LOG_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_LOG,
+			ip6t_LOG, init_ip6table_LOG, ());
+	if (err < 0)
+		goto err_ip6table_LOG;
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_LENGTH) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_LENGTH_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LENGTH,
+			xt_length, init_xt_length, ());
+	if (err < 0)
+		goto err_xt_length;
+#endif
+#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
+    defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REDIRECT,
+			ipt_REDIRECT, init_iptable_REDIRECT, ());
+	if (err < 0)
+		goto err_iptable_REDIRECT;
+#endif
+	return 0;
+
+/* ------------------------------------------------------------------------- */
+
+cleanup:
+#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
+    defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REDIRECT,
+			ipt_REDIRECT, fini_iptable_REDIRECT, ());
+err_iptable_REDIRECT:
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_LENGTH) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_LENGTH_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LENGTH,
+			xt_length, fini_xt_length, ());
+err_xt_length:
+#endif
+#if defined(CONFIG_IP6_NF_TARGET_LOG) || \
+    defined(CONFIG_IP6_NF_TARGET_LOG_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_LOG,
+			ip6t_LOG, fini_ip6table_LOG, ());
+err_ip6table_LOG:
+#endif
+#if defined(CONFIG_IP_NF_TARGET_LOG) || \
+    defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_LOG,
+			ipt_LOG, fini_iptable_LOG, ());
+err_iptable_LOG:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_TTL) || \
+    defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TTL,
+			ipt_ttl, fini_iptable_ttl, ());
+err_iptable_ttl:
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TCPMSS,
+			xt_tcpmss, fini_xt_tcpmss, ());
+err_xt_tcpmss:
+#endif
+#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
+    defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TCPMSS,
+			ipt_TCPMSS, fini_iptable_TCPMSS, ());
+err_iptable_TCPMSS:
+#endif
+#if defined(CONFIG_IP6_NF_TARGET_REJECT) || \
+    defined(CONFIG_IP6_NF_TARGET_REJECT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REJECT,
+			ip6t_REJECT, fini_ip6table_REJECT, ());
+err_ip6table_REJECT:
+#endif
+#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
+    defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REJECT,
+			ipt_REJECT, fini_iptable_REJECT, ());
+err_iptable_REJECT:
+#endif
+#if defined(CONFIG_IP_NF_TARGET_TOS) || \
+    defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TOS,
+			ipt_TOS, fini_iptable_TOS, ());
+err_iptable_TOS:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_TOS) || \
+    defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TOS,
+			ipt_tos, fini_iptable_tos, ());
+err_iptable_tos:
+#endif
+#if defined(CONFIG_IP6_NF_MATCH_MULTIPORT) || \
+    defined(CONFIG_IP6_NF_MATCH_MULTIPORT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_MULTIPORT,
+			ip6t_multiport, fini_ip6table_multiport, ());
+err_ip6table_multiport:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
+    defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_MULTIPORT,
+			ipt_multiport, fini_iptable_multiport, ());
+err_iptable_multiport:
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_LIMIT) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_LIMIT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LIMIT,
+			xt_limit, fini_xt_limit, ());
+err_xt_limit:
+#endif
+#if defined(CONFIG_IP6_NF_MANGLE) || \
+    defined(CONFIG_IP6_NF_MANGLE_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE,
+			ip6table_mangle, fini_ip6table_mangle, ());
+err_ip6table_mangle:
+#endif
+#if defined(CONFIG_IP_NF_MANGLE) || \
+    defined(CONFIG_IP_NF_MANGLE_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE,
+			iptable_mangle,	fini_iptable_mangle, ());
+err_iptable_mangle:
+#endif
+#if defined(CONFIG_IP6_NF_FILTER) || \
+    defined(CONFIG_IP6_NF_FILTER_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER,
+			ip6table_filter, fini_ip6table_filter, ());
+err_ip6table_filter:
+#endif
+#if defined(CONFIG_IP_NF_FILTER) || \
+    defined(CONFIG_IP_NF_FILTER_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER,
+			iptable_filter,	fini_iptable_filter, ());
+err_iptable_filter:
+#endif
+#if defined(CONFIG_IP_NF_NAT_IRC) || \
+    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_IRC,
+			ip_nat_irc, fini_iptable_nat_irc, ());
+err_iptable_nat_irc:
+#endif
+#if defined(CONFIG_IP_NF_NAT_FTP) || \
+    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_FTP,
+			ip_nat_ftp, fini_iptable_nat_ftp, ());
+err_iptable_nat_ftp:
+#endif
+#if defined(CONFIG_IP_NF_NAT) || \
+    defined(CONFIG_IP_NF_NAT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
+			iptable_nat, fini_iptable_nat, ());
+err_iptable_nat2:
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
+			ip_nat, ip_nat_cleanup, ());
+err_iptable_nat:
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_HELPER) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_HELPER_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_HELPER,
+			xt_helper, fini_xt_helper, ());
+err_xt_helper:
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_STATE) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_STATE_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_STATE,
+			xt_state, fini_xt_state, ());
+err_xt_state:
+#endif
+#if defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) || \
+    defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_CONNTRACK,
+			xt_conntrack, fini_xt_conntrack_match, ());
+err_xt_conntrack_match:
+#endif
+#if defined(CONFIG_IP_NF_IRC) || \
+    defined(CONFIG_IP_NF_IRC_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_IRC,
+			ip_conntrack_irc, fini_iptable_irc, ());
+err_iptable_irc:
+#endif
+#if defined(CONFIG_IP_NF_FTP) || \
+    defined(CONFIG_IP_NF_FTP_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_FTP,
+			ip_conntrack_ftp, fini_iptable_ftp, ());
+err_iptable_ftp:
+#endif
+#if defined(CONFIG_IP_NF_CONNTRACK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK,
+			ip_conntrack, fini_iptable_conntrack, ());
+err_iptable_conntrack:
+#endif
+#if defined(CONFIG_IP6_NF_IPTABLES) || \
+    defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
+			ip6_tables, fini_ip6tables, ());
+err_ip6tables:
+#endif
+#if defined(CONFIG_IP_NF_IPTABLES) || \
+    defined(CONFIG_IP_NF_IPTABLES_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
+			ip_tables, fini_iptables, ());
+err_iptables:
+#endif
+#if defined(CONFIG_NETFILTER_XTABLES) || \
+    defined(CONFIG_NETFILTER_XTABLES_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
+			xt_tcpudp, fini_xt_tcpudp, ());
+err_xt_tcpudp:
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
+			x_tables, fini_xtables, ());
+err_xtables:
+#endif
+	ve->_iptables_modules = 0;
+
+	return err;
+}
+
+static inline int init_ve_iptables(struct ve_struct *ve, __u64 init_mask)
+{
+	return do_ve_iptables(ve, init_mask, 1);
+}
+
+static inline void fini_ve_iptables(struct ve_struct *ve, __u64 init_mask)
+{
+	(void)do_ve_iptables(ve, init_mask, 0);
+}
+
+static void flush_ve_iptables(struct ve_struct *ve)
+{
+	/*
+	 * flush all rule tables first,
+	 * this helps us to avoid refs to freed objs
+	 */
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE, ip_tables,
+			ipt_flush_table, (ve->_ipt_mangle_table));
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE, ip6_tables,
+			ip6t_flush_table, (ve->_ip6t_mangle_table));
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER, ip_tables,
+			ipt_flush_table, (ve->_ve_ipt_filter_pf));
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER, ip6_tables,
+			ip6t_flush_table, (ve->_ve_ip6t_filter_pf));
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT, ip_tables,
+			ipt_flush_table, (ve->_ip_conntrack->_ip_nat_table));
+}
+#else
+#define init_ve_iptables(x, y)	(0)
+#define fini_ve_iptables(x, y)	do { } while (0)
+#define flush_ve_iptables(x)	do { } while (0)
+#define init_ve_netfilter()	(0)
+#define fini_ve_netfilter()	do { } while (0)
+#endif
+
+static struct list_head ve_hooks[VE_MAX_HOOKS];
+static DECLARE_RWSEM(ve_hook_sem);
+
+int ve_hook_register(struct ve_hook *vh)
+{
+	struct list_head *lh;
+	struct ve_hook *tmp;
+
+	down_write(&ve_hook_sem);
+	list_for_each(lh, &ve_hooks[vh->hooknum]) {
+		tmp = list_entry(lh, struct ve_hook, list);
+		if (vh->priority < tmp->priority)
+			break;
+	}
+	list_add_tail(&vh->list, lh);
+	up_write(&ve_hook_sem);
+	return 0;
+}
+EXPORT_SYMBOL(ve_hook_register);
+
+void ve_hook_unregister(struct ve_hook *vh)
+{
+	down_write(&ve_hook_sem);
+	list_del(&vh->list);
+	up_write(&ve_hook_sem);
+}
+EXPORT_SYMBOL(ve_hook_unregister);
+
+static int ve_hook_iterate(unsigned int hooknum, void *data)
+{
+	struct ve_hook *vh;
+	int err;
+
+	err = 0;
+	down_read(&ve_hook_sem);
+	list_for_each_entry(vh, &ve_hooks[hooknum], list) {
+		if (!try_module_get(vh->owner))
+			continue;
+		err = vh->hook(hooknum, data);
+		module_put(vh->owner);
+		if (err)
+			break;
+	}
+
+	if (err) {
+		list_for_each_entry_continue_reverse(vh,
+					&ve_hooks[hooknum], list) {
+			if (!try_module_get(vh->owner))
+				continue;
+			if (vh->undo)
+				vh->undo(hooknum, data);
+			module_put(vh->owner);
+		}
+	}
+	up_read(&ve_hook_sem);
+	return err;
+}
+
+static void ve_hook_iterate_cleanup(unsigned int hooknum, void *data)
+{
+	struct ve_hook *vh;
+
+	down_read(&ve_hook_sem);
+	list_for_each_entry_reverse(vh, &ve_hooks[hooknum], list) {
+		if (!try_module_get(vh->owner))
+			continue;
+		(void)vh->hook(hooknum, data);
+		module_put(vh->owner);
+	}
+	up_read(&ve_hook_sem);
+}
+
+static int do_env_create(envid_t veid, unsigned int flags, u32 class_id,
+			 env_create_param_t *data, int datalen)
+{
+	struct task_struct *tsk;
+	struct ve_struct *old;
+	struct ve_struct *old_exec;
+	struct ve_struct *ve;
+ 	__u64 init_mask;
+	int err;
+
+	tsk = current;
+	old = VE_TASK_INFO(tsk)->owner_env;
+
+	if (!thread_group_leader(tsk))
+		return -EINVAL;
+
+	if (tsk->signal->tty) {
+		printk("ERR: VE init has controlling terminal\n");
+		return -EINVAL;
+	}
+	if (tsk->signal->pgrp != tsk->pid || tsk->signal->session != tsk->pid) {
+		int may_setsid;
+		read_lock(&tasklist_lock);
+		may_setsid = (find_pid(PIDTYPE_PGID, tsk->pid) == NULL);
+		read_unlock(&tasklist_lock);
+		if (!may_setsid) {
+			printk("ERR: VE init is process group leader\n");
+			return -EINVAL;
+		}
+	}
+
+
+	VZTRACE("%s: veid=%d classid=%d pid=%d\n",
+		__FUNCTION__, veid, class_id, current->pid);
+
+	err = -ENOMEM;
+	ve = kmalloc(sizeof(struct ve_struct), GFP_KERNEL);
+	if (ve == NULL)
+		goto err_struct;
+
+	init_ve_struct(ve, veid, class_id, data, tsk);
+	__module_get(THIS_MODULE);
+	down_write(&ve->op_sem);
+	if (flags & VE_LOCK)
+		ve->is_locked = 1;
+	if ((err = ve_list_add(ve)) < 0)
+		goto err_exist;
+
+	/* this should be done before context switching */
+	if ((err = init_printk(ve)) < 0)
+		goto err_log_wait;
+
+	old_exec = set_exec_env(ve);
+
+	if ((err = init_ve_sched(ve)) < 0)
+		goto err_sched;
+
+	/* move user to VE */
+	if ((err = set_user(0, 0)) < 0)
+		goto err_set_user;
+
+	set_ve_root(ve, tsk);
+
+	if ((err = init_ve_utsname(ve)))
+		goto err_utsname;
+
+	if ((err = init_ve_mibs(ve)))
+		goto err_mibs;
+
+	if ((err = init_ve_proc(ve)))
+		goto err_proc;
+
+	if ((err = init_ve_sysctl(ve)))
+		goto err_sysctl;
+
+	if ((err = init_ve_sysfs(ve)))
+		goto err_sysfs;
+
+	if ((err = ve_arp_init(ve)) < 0)
+		goto err_route;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	if ((err = ve_ndisc_init(ve)) < 0)
+		goto err_route;
+#endif
+
+	if ((err = init_ve_route(ve)) < 0)
+		goto err_route;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	if ((err = init_ve_route6(ve)) < 0)
+		goto err_route;
+#endif
+
+	if ((err = init_ve_netdev()))
+		goto err_dev;
+
+	if ((err = init_ve_tty_drivers(ve)) < 0)
+		goto err_tty;
+
+	if ((err = init_ve_shmem(ve)))
+		goto err_shmem;
+
+	if ((err = init_ve_devpts(ve)))
+		goto err_devpts;
+
+	if((err = init_ve_meminfo(ve)))
+		goto err_meminf;
+
+	/* init SYSV IPC variables */
+	if ((err = init_ve_ipc(ve)) < 0)
+		goto err_ipc;
+
+	set_ve_caps(ve, tsk);
+
+	/* It is safe to initialize netfilter here as routing initialization and
+	   interface setup will be done below. This means that NO skb can be
+	   passed inside. Den */
+	/* iptables ve initialization for non ve0;
+	   ve0 init is in module_init */
+	if ((err = init_ve_netfilter()) < 0)
+		goto err_netfilter;
+
+	init_mask = data ? data->iptables_mask : VE_IP_DEFAULT;
+	if ((err = init_ve_iptables(ve, init_mask)) < 0)
+		goto err_iptables;
+
+	if ((err = alloc_vpid(tsk->pid, 1)) < 0)
+		goto err_vpid;
+
+	if ((err = ve_hook_iterate(VE_HOOK_INIT, (void *)ve)) < 0)
+		goto err_ve_hook;
+
+	/* finally: set vpids and move inside */
+	move_task(tsk, ve, old);
+
+	set_virt_pid(tsk, 1);
+	set_virt_tgid(tsk, 1);
+
+	set_special_pids(tsk->pid, tsk->pid);
+	current->signal->tty_old_pgrp = 0;
+	set_virt_pgid(tsk, 1);
+	set_virt_sid(tsk, 1);
+
+	ve->is_running = 1;
+	up_write(&ve->op_sem);
+
+	printk(KERN_INFO "VPS: %d: started\n", veid);
+	return veid;
+
+err_ve_hook:
+	free_vpid(1, ve);
+err_vpid:
+	fini_venet(ve);
+	fini_ve_iptables(ve, init_mask);
+err_iptables:
+	fini_ve_netfilter();
+err_netfilter:
+	fini_ve_ipc(ve);
+err_ipc:
+	fini_ve_meminfo(ve);
+err_meminf:
+	fini_ve_devpts(ve);
+err_devpts:
+	fini_ve_shmem(ve);
+err_shmem:
+	fini_ve_tty_drivers(ve);
+err_tty:
+	fini_ve_netdev();
+err_dev:
+	fini_ve_route(ve);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	fini_ve_route6(ve);
+#endif
+err_route:
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	ve_ndisc_fini(ve);
+#endif
+	ve_arp_fini(ve);
+	fini_ve_sysfs(ve);
+err_sysfs:
+	fini_ve_sysctl(ve);
+err_sysctl:
+	fini_ve_proc(ve);
+err_proc:
+	do_clean_devperms(ve->veid); /* register procfs adds devperms */
+	fini_ve_mibs(ve);
+err_mibs:
+	/* free_ve_utsname() is called inside real_put_ve() */ ;
+err_utsname:
+	/* It is safe to restore current->envid here because
+	 * ve_fairsched_detach does not use current->envid. */
+	/* Really fairsched code uses current->envid in sys_fairsched_mknod 
+	 * only.  It is correct if sys_fairsched_mknod is called from
+	 * userspace.  If sys_fairsched_mknod is called from
+	 * ve_fairsched_attach, then node->envid and node->parent_node->envid
+	 * are explicitly set to valid value after the call. */
+	/* FIXME */
+	VE_TASK_INFO(tsk)->owner_env = old;
+	VE_TASK_INFO(tsk)->exec_env = old_exec;
+	/* move user back */
+	if (set_user(0, 0) < 0)
+		printk(KERN_WARNING"Can't restore UID\n");
+
+err_set_user:
+	fini_ve_sched(ve);
+err_sched:
+	(void)set_exec_env(old_exec);
+
+	/* we can jump here having incorrect envid */
+	VE_TASK_INFO(tsk)->owner_env = old;
+	fini_printk(ve);
+err_log_wait:
+	ve_list_del(ve);
+	up_write(&ve->op_sem);
+
+	real_put_ve(ve);
+err_struct:
+	printk(KERN_INFO "VPS: %d: failed to start with err=%d\n", veid, err);
+	return err;
+
+err_exist:
+	kfree(ve);
+	goto err_struct;
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE start/stop callbacks
+ *
+ **********************************************************************
+ **********************************************************************/
+
+int real_env_create(envid_t veid, unsigned flags, u32 class_id,
+			env_create_param_t *data, int datalen)
+{
+	int status;
+	struct ve_struct *ve;
+
+	if (!flags) {
+		status = get_exec_env()->veid;
+		goto out;
+	}
+
+	status = -EPERM;
+	if (!capable(CAP_SETVEID))
+		goto out;
+
+	status = -EINVAL;
+	if ((flags & VE_TEST) && (flags & (VE_ENTER|VE_CREATE)))
+		goto out;
+
+	status = -EINVAL;
+	ve = get_ve_by_id(veid);
+	if (ve) {
+		if (flags & VE_TEST) {
+			status = 0;
+			goto out_put;
+		}
+		if (flags & VE_EXCLUSIVE) {
+			status = -EACCES;
+			goto out_put;
+		}
+		if (flags & VE_CREATE) {
+			flags &= ~VE_CREATE;
+			flags |= VE_ENTER;
+		}
+	} else {
+		if (flags & (VE_TEST|VE_ENTER)) {
+			status = -ESRCH;
+			goto out;
+		}
+	}
+
+	if (flags & VE_CREATE) {
+		status = do_env_create(veid, flags, class_id, data, datalen);
+		goto out;
+	} else if (flags & VE_ENTER)
+		status = do_env_enter(ve, flags);
+
+	/* else: returning EINVAL */
+
+out_put:
+	real_put_ve(ve);
+out:
+	return status;
+}
+
+static int do_env_enter(struct ve_struct *ve, unsigned int flags)
+{
+	struct task_struct *tsk = current;
+	int err;
+
+	VZTRACE("%s: veid=%d\n", __FUNCTION__, ve->veid);
+
+	err = -EBUSY;
+	down_read(&ve->op_sem);
+	if (!ve->is_running)
+		goto out_up;
+	if (ve->is_locked && !(flags & VE_SKIPLOCK))
+		goto out_up;
+
+#ifdef CONFIG_FAIRSCHED
+	err = sys_fairsched_mvpr(current->pid, ve->veid);
+	if (err)
+		goto out_up;
+#endif
+
+	ve_sched_attach(ve);
+	move_task(current, ve, VE_TASK_INFO(tsk)->owner_env);
+	err = VE_TASK_INFO(tsk)->owner_env->veid;
+
+out_up:
+	up_read(&ve->op_sem);
+	return err;
+}
+
+static void env_cleanup(struct ve_struct *ve)
+{
+	struct ve_struct *old_ve;
+
+	VZTRACE("real_do_env_cleanup\n");
+
+	down_read(&ve->op_sem);
+	old_ve = set_exec_env(ve);
+
+	ve_hook_iterate_cleanup(VE_HOOK_FINI, (void *)ve);
+
+	fini_venet(ve);
+
+	/* no new packets in flight beyond this point */
+	synchronize_net();
+	/* skb hold dst_entry, and in turn lies in the ip fragment queue */
+	ip_fragment_cleanup(ve);
+
+	fini_ve_netdev();
+	fini_ve_route(ve);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	fini_ve_route6(ve);
+#endif
+	ve_arp_fini(ve);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	ve_ndisc_fini(ve);
+#endif
+
+	/* kill iptables */
+	/* No skb belonging to VE can exist at this point as unregister_netdev
+	   is an operation awaiting until ALL skb's gone */
+	flush_ve_iptables(ve);
+	fini_ve_iptables(ve, ve->_iptables_modules);
+	fini_ve_netfilter();
+
+	ve_ipc_cleanup();
+
+	fini_ve_sched(ve);
+	do_clean_devperms(ve->veid);
+
+	fini_ve_devpts(ve);
+	fini_ve_shmem(ve);
+	fini_ve_sysfs(ve);
+	unregister_ve_tty_drivers(ve);
+	fini_ve_sysctl(ve);
+	fini_ve_proc(ve);
+	fini_ve_meminfo(ve);
+
+	fini_ve_mibs(ve);
+
+	(void)set_exec_env(old_ve);
+	fini_printk(ve);	/* no printk can happen in ve context anymore */
+
+	ve_list_del(ve);
+	up_read(&ve->op_sem);
+
+	real_put_ve(ve);
+}
+
+static struct list_head ve_cleanup_list;
+static spinlock_t ve_cleanup_lock;
+
+static DECLARE_COMPLETION(vzmond_complete);
+static struct task_struct *vzmond_thread;
+static volatile int stop_vzmond;
+
+void real_do_env_cleanup(struct ve_struct *ve)
+{
+	spin_lock(&ve_cleanup_lock);
+	list_add_tail(&ve->cleanup_list, &ve_cleanup_list);
+	spin_unlock(&ve_cleanup_lock);
+	wake_up_process(vzmond_thread);
+}
+
+static void do_pending_env_cleanups(void)
+{
+	struct ve_struct *ve;
+
+	spin_lock(&ve_cleanup_lock);
+	while (1) {
+		if (list_empty(&ve_cleanup_list) || need_resched())
+			break;
+		ve = list_entry(ve_cleanup_list.next, struct ve_struct,
+				cleanup_list);
+		list_del(&ve->cleanup_list);
+		spin_unlock(&ve_cleanup_lock);
+		env_cleanup(ve);
+		spin_lock(&ve_cleanup_lock);
+	}
+	spin_unlock(&ve_cleanup_lock);
+}
+
+static int have_pending_cleanups(void)
+{
+	return !list_empty(&ve_cleanup_list);
+}
+
+static int vzmond(void *arg)
+{
+	daemonize("vzmond");
+	vzmond_thread = current;
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	while (!stop_vzmond) {
+		schedule();
+		try_to_freeze();
+		if (signal_pending(current))
+			flush_signals(current);
+
+		do_pending_env_cleanups();
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (have_pending_cleanups())
+			__set_current_state(TASK_RUNNING);
+	}
+
+	__set_task_state(current, TASK_RUNNING);
+	complete_and_exit(&vzmond_complete, 0);
+}
+
+static int __init init_vzmond(void)
+{
+	INIT_LIST_HEAD(&ve_cleanup_list);
+	spin_lock_init(&ve_cleanup_lock);
+	stop_vzmond = 0;
+	return kernel_thread(vzmond, NULL, 0);
+}
+
+static void fini_vzmond(void)
+{
+	stop_vzmond = 1;
+	wake_up_process(vzmond_thread);
+	wait_for_completion(&vzmond_complete);
+	WARN_ON(!list_empty(&ve_cleanup_list));
+}
+
+void real_do_env_free(struct ve_struct *ve)
+{
+	VZTRACE("real_do_env_free\n");
+
+	ve_ipc_free(ve); /* free SYSV IPC resources */
+	free_ve_tty_drivers(ve);
+	free_ve_utsname(ve);
+	free_ve_sysctl(ve); /* free per ve sysctl data */
+	free_ve_filesystems(ve);
+	printk(KERN_INFO "VPS: %d: stopped\n", VEID(ve));
+	kfree(ve);
+
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL(real_do_env_free);
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE TTY handling
+ *
+ **********************************************************************
+ **********************************************************************/
+
+DCL_VE_OWNER(TTYDRV, struct tty_driver, owner_env)
+
+static struct tty_driver *alloc_ve_tty_driver(struct tty_driver *base,
+					   struct ve_struct *ve)
+{
+	size_t size;
+	struct tty_driver *driver;
+
+	driver = kmalloc(sizeof(struct tty_driver), GFP_KERNEL);
+	if (!driver)
+		goto out;
+
+	memcpy(driver, base, sizeof(struct tty_driver));
+
+	driver->driver_state = NULL;
+
+	size = base->num * 3 * sizeof(void *);
+	if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
+		void **p;
+		p = kmalloc(size, GFP_KERNEL);
+		if (!p)
+			goto out_free;
+		memset(p, 0, size);
+		driver->ttys = (struct tty_struct **)p;
+		driver->termios = (struct termios **)(p + driver->num);
+		driver->termios_locked = (struct termios **)(p + driver->num * 2);
+	} else {
+		driver->ttys = NULL;
+		driver->termios = NULL;
+		driver->termios_locked = NULL;
+	}
+
+	SET_VE_OWNER_TTYDRV(driver, ve);
+	driver->flags |= TTY_DRIVER_INSTALLED;
+
+	return driver;
+
+out_free:
+	kfree(driver);
+out:
+	return NULL;
+}
+
+static void free_ve_tty_driver(struct tty_driver *driver)
+{
+	if (!driver)
+		return;
+
+	clear_termios(driver);
+	kfree(driver->ttys);
+	kfree(driver);
+}
+
+static int alloc_ve_tty_drivers(struct ve_struct* ve)
+{
+#ifdef CONFIG_LEGACY_PTYS
+	/* Traditional BSD devices */
+	ve->pty_driver = alloc_ve_tty_driver(pty_driver, ve);
+	if (!ve->pty_driver)
+		goto out_mem;
+
+	ve->pty_slave_driver = alloc_ve_tty_driver(pty_slave_driver, ve);
+	if (!ve->pty_slave_driver)
+		goto out_mem;
+
+	ve->pty_driver->other       = ve->pty_slave_driver;
+	ve->pty_slave_driver->other = ve->pty_driver;
+#endif	
+
+#ifdef CONFIG_UNIX98_PTYS
+	ve->ptm_driver = alloc_ve_tty_driver(ptm_driver, ve);
+	if (!ve->ptm_driver)
+		goto out_mem;
+
+	ve->pts_driver = alloc_ve_tty_driver(pts_driver, ve);
+	if (!ve->pts_driver)
+		goto out_mem;
+
+	ve->ptm_driver->other = ve->pts_driver;
+	ve->pts_driver->other = ve->ptm_driver;
+
+	ve->allocated_ptys = kmalloc(sizeof(*ve->allocated_ptys), GFP_KERNEL);
+	if (!ve->allocated_ptys)
+		goto out_mem;
+	idr_init(ve->allocated_ptys);
+#endif
+	return 0;
+
+out_mem:
+	free_ve_tty_drivers(ve);
+	return -ENOMEM;
+}
+
+static void free_ve_tty_drivers(struct ve_struct* ve)
+{
+#ifdef CONFIG_LEGACY_PTYS
+	free_ve_tty_driver(ve->pty_driver);
+	free_ve_tty_driver(ve->pty_slave_driver);
+	ve->pty_driver = ve->pty_slave_driver = NULL;
+#endif	
+#ifdef CONFIG_UNIX98_PTYS
+	free_ve_tty_driver(ve->ptm_driver);
+	free_ve_tty_driver(ve->pts_driver);
+	kfree(ve->allocated_ptys);
+	ve->ptm_driver = ve->pts_driver = NULL;
+	ve->allocated_ptys = NULL;
+#endif
+}
+
+static inline void __register_tty_driver(struct tty_driver *driver)
+{
+	list_add(&driver->tty_drivers, &tty_drivers);
+}
+
+static inline void __unregister_tty_driver(struct tty_driver *driver)
+{
+	if (!driver)
+		return;
+	list_del(&driver->tty_drivers);
+}
+
+static int register_ve_tty_drivers(struct ve_struct* ve)
+{
+	write_lock_irq(&tty_driver_guard);
+#ifdef CONFIG_UNIX98_PTYS
+	__register_tty_driver(ve->ptm_driver);
+	__register_tty_driver(ve->pts_driver);
+#endif
+#ifdef CONFIG_LEGACY_PTYS
+	__register_tty_driver(ve->pty_driver);
+	__register_tty_driver(ve->pty_slave_driver);
+#endif	
+	write_unlock_irq(&tty_driver_guard);
+
+	return 0;
+}
+
+static void unregister_ve_tty_drivers(struct ve_struct* ve)
+{
+	VZTRACE("unregister_ve_tty_drivers\n");
+
+	write_lock_irq(&tty_driver_guard);
+	__unregister_tty_driver(ve->pty_driver);
+	__unregister_tty_driver(ve->pty_slave_driver);
+#ifdef CONFIG_UNIX98_PTYS
+	__unregister_tty_driver(ve->ptm_driver);
+	__unregister_tty_driver(ve->pts_driver);
+#endif
+	write_unlock_irq(&tty_driver_guard);
+}
+
+static int init_ve_tty_drivers(struct ve_struct *ve)
+{
+	int err;
+
+	if ((err = alloc_ve_tty_drivers(ve)))
+		goto err_ttyalloc;
+	if ((err = register_ve_tty_drivers(ve)))
+		goto err_ttyreg;
+	return 0;
+
+err_ttyreg:
+	free_ve_tty_drivers(ve);
+err_ttyalloc:
+	return err;
+}
+
+static void fini_ve_tty_drivers(struct ve_struct *ve)
+{
+	unregister_ve_tty_drivers(ve);
+	free_ve_tty_drivers(ve);
+}
+
+/*
+ * Free the termios and termios_locked structures because
+ * we don't want to get memory leaks when modular tty
+ * drivers are removed from the kernel.
+ */
+static void clear_termios(struct tty_driver *driver)
+{
+	int i;
+	struct termios *tp;
+
+	if (driver->termios == NULL)
+		return;
+	for (i = 0; i < driver->num; i++) {
+		tp = driver->termios[i];
+		if (tp) {
+			driver->termios[i] = NULL;
+			kfree(tp);
+		}
+		tp = driver->termios_locked[i];
+		if (tp) {
+			driver->termios_locked[i] = NULL;
+			kfree(tp);
+		}
+	}
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * Pieces of VE network
+ *
+ **********************************************************************
+ **********************************************************************/
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#include <asm/uaccess.h>
+#include <net/sock.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+#endif
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+static void ve_del_ip_addrs(struct net_device *dev)
+{
+	struct in_device *in_dev;
+
+	in_dev = in_dev_get(dev);
+	if (in_dev == NULL)
+		return;
+
+	while (in_dev->ifa_list != NULL) {
+		inet_del_ifa(in_dev, &in_dev->ifa_list, 1);
+	}
+	in_dev_put(in_dev);
+}
+
+static void ve_del_ipv6_addrs(struct net_device *dev)
+{
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	addrconf_ifdown(dev, 2);
+#endif
+}
+
+static int ve_netdev_cleanup(struct net_device *dev, int to_ve)
+{
+	int err;
+
+	err = 0;
+	ve_del_ip_addrs(dev);
+	ve_del_ipv6_addrs(dev);
+	if ((dev->flags & IFF_UP) != 0)
+		err = dev_close(dev);
+	synchronize_net();
+	dev_shutdown(dev);
+	dev_mc_discard(dev);
+	free_divert_blk(dev);
+	synchronize_net();
+
+	if (to_ve)
+		dev->orig_mtu = dev->mtu;
+	else {
+		int rc = dev_set_mtu(dev, dev->orig_mtu);
+		if (err == 0)
+			err = rc;
+	}
+
+	return err;
+}
+
+static void __ve_dev_move(struct net_device *dev, struct ve_struct *ve_src,
+	struct ve_struct *ve_dst, struct user_beancounter *exec_ub)
+{
+	struct net_device **dp, *d;
+	struct user_beancounter *ub;
+
+	for (d = ve_src->_net_dev_base, dp = NULL; d != NULL; 
+	     dp = &d->next, d = d->next) {
+		if (d == dev) {
+			hlist_del(&dev->name_hlist);
+			hlist_del(&dev->index_hlist);
+			if (ve_src->_net_dev_tail == &dev->next)
+				ve_src->_net_dev_tail = dp;
+			if (dp)
+				*dp = dev->next;
+			dev->next = NULL;
+			break;
+		}
+	}
+	*ve_dst->_net_dev_tail = dev;
+	ve_dst->_net_dev_tail = &dev->next;
+	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name, ve_dst));
+	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, ve_dst));
+	dev->owner_env = ve_dst;
+
+	ub = netdev_bc(dev)->exec_ub;
+	netdev_bc(dev)->exec_ub = get_beancounter(exec_ub);
+	put_beancounter(ub);
+}
+
+static int ve_dev_add(envid_t veid, char *dev_name)
+{
+	int err;
+	struct net_device *dev;
+	struct ve_struct *ve;
+	struct hlist_node *p;
+
+	dev = NULL;
+	err = -ESRCH;
+
+	ve = get_ve_by_id(veid);
+	if (ve == NULL)
+		goto out;
+
+	rtnl_lock();
+
+	read_lock(&dev_base_lock);
+	hlist_for_each(p, dev_name_hash(dev_name, get_ve0())) {
+		struct net_device *d = hlist_entry(p, struct net_device, 
+						   name_hlist);
+		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
+			dev = d;
+			break;
+		}
+	}
+	read_unlock(&dev_base_lock);
+	if (dev == NULL)
+		goto out_unlock;
+
+	err = -EPERM;
+	if (!ve_is_dev_movable(dev))
+		goto out_unlock;
+
+	err = -EINVAL;
+	if (dev->flags & (IFF_SLAVE|IFF_MASTER))
+		goto out_unlock;
+
+	ve_netdev_cleanup(dev, 1);
+
+	write_lock_bh(&dev_base_lock);
+	__ve_dev_move(dev, get_ve0(), ve, get_exec_ub());
+	write_unlock_bh(&dev_base_lock);
+
+	err = 0;
+
+out_unlock:
+	rtnl_unlock();
+	real_put_ve(ve);
+
+	if (dev == NULL)
+		printk(KERN_WARNING "Device %s not found\n", dev_name);
+
+out:
+	return err;
+}
+
+static int ve_dev_del(envid_t veid, char *dev_name)
+{
+	int err;
+	struct net_device *dev;
+	struct ve_struct *ve, *old_exec;
+	struct hlist_node *p;
+
+	dev = NULL;
+	err = -ESRCH;
+
+	ve = get_ve_by_id(veid);
+	if (ve == NULL)
+		goto out;
+
+	rtnl_lock();
+
+	read_lock(&dev_base_lock);
+	hlist_for_each(p, dev_name_hash(dev_name, ve)) {
+		struct net_device *d = hlist_entry(p, struct net_device, 
+						   name_hlist);
+		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
+			dev = d;
+			break;
+		}
+	}
+	read_unlock(&dev_base_lock);
+	if (dev == NULL)
+		goto out_unlock;
+
+	err = -EPERM;
+	if (!ve_is_dev_movable(dev))
+		goto out_unlock;
+
+	old_exec = set_exec_env(ve);
+	ve_netdev_cleanup(dev, 0);
+	(void)set_exec_env(old_exec);
+
+	write_lock_bh(&dev_base_lock);
+	__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
+	write_unlock_bh(&dev_base_lock);
+
+	err = 0;
+
+out_unlock:
+	rtnl_unlock();
+	real_put_ve(ve);
+
+	if (dev == NULL)
+		printk(KERN_WARNING "Device %s not found\n", dev_name);
+
+out:
+	return err;
+}
+
+int real_ve_dev_map(envid_t veid, int op, char *dev_name)
+{
+	int err;
+	err = -EPERM;
+	if (!capable(CAP_SETVEID))
+		goto out;
+	switch (op)
+	{
+		case VE_NETDEV_ADD:
+			err = ve_dev_add(veid, dev_name);
+			break;
+		case VE_NETDEV_DEL:
+			err = ve_dev_del(veid, dev_name);
+			break;
+		default:
+			err = -EINVAL;
+			break;
+	}
+out:
+	return err;
+}
+
+static void ve_mapped_devs_cleanup(struct ve_struct *ve)
+{
+	struct net_device *dev;
+
+	rtnl_lock();
+	write_lock_bh(&dev_base_lock);
+restart:
+	for (dev = ve->_net_dev_base; dev != NULL; dev = dev->next)
+	{
+		if ((dev->features & NETIF_F_VENET) ||
+		    (dev == ve->_loopback_dev)) /* Skip loopback dev */
+			continue;
+		write_unlock_bh(&dev_base_lock);
+		ve_netdev_cleanup(dev, 0);
+		write_lock_bh(&dev_base_lock);
+		__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
+		goto restart;
+	}
+	write_unlock_bh(&dev_base_lock);
+	rtnl_unlock();
+}
+#endif
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE information via /proc
+ *
+ **********************************************************************
+ **********************************************************************/
+#ifdef CONFIG_PROC_FS
+static int devperms_seq_show(struct seq_file *m, void *v)
+{
+	struct devperms_struct *dp;
+	char dev_s[32], type_c;
+	unsigned use, type;
+	dev_t dev;
+
+	dp = (struct devperms_struct *)v;
+	if (dp == (struct devperms_struct *)1L) {
+		seq_printf(m, "Version: 2.7\n");
+		return 0;
+	}
+
+	use = dp->type & VE_USE_MASK;
+	type = dp->type & S_IFMT;
+	dev = dp->dev;
+
+	if ((use | VE_USE_MINOR) == use)
+		snprintf(dev_s, sizeof(dev_s), "%d:%d", MAJOR(dev), MINOR(dev));
+	else if ((use | VE_USE_MAJOR) == use)
+		snprintf(dev_s, sizeof(dev_s), "%d:*", MAJOR(dp->dev));
+	else
+		snprintf(dev_s, sizeof(dev_s), "*:*");
+
+	if (type == S_IFCHR)
+		type_c = 'c';
+	else if (type == S_IFBLK)
+		type_c = 'b';
+	else
+		type_c = '?';
+
+	seq_printf(m, "%10u %c %03o %s\n", dp->veid, type_c, dp->mask, dev_s);
+	return 0;
+}
+
+static void *devperms_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t cpos;
+	long slot;
+	struct devperms_struct *dp;
+
+	cpos = *pos;
+	read_lock(&devperms_hash_guard);
+	if (cpos-- == 0)
+		return (void *)1L;
+
+	for (slot = 0; slot < DEVPERMS_HASH_SZ; slot++)
+		for (dp = devperms_hash[slot]; dp; dp = dp->devhash_next)
+			if (cpos-- == 0) {
+				m->private = (void *)slot;
+				return dp;
+			}
+	return NULL;
+}
+
+static void *devperms_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	long slot;
+	struct devperms_struct *dp;
+
+	dp = (struct devperms_struct *)v;
+
+	if (dp == (struct devperms_struct *)1L)
+		slot = 0;
+	else if (dp->devhash_next == NULL)
+		slot = (long)m->private + 1;
+	else {
+		(*pos)++;
+		return dp->devhash_next;
+	}
+
+	for (; slot < DEVPERMS_HASH_SZ; slot++)
+		if (devperms_hash[slot]) {
+			(*pos)++;
+			m->private = (void *)slot;
+			return devperms_hash[slot];
+		}
+	return NULL;
+}
+
+static void devperms_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&devperms_hash_guard);
+}
+
+static struct seq_operations devperms_seq_op = {
+	.start	= devperms_seq_start,
+	.next	= devperms_seq_next,
+	.stop	= devperms_seq_stop,
+	.show	= devperms_seq_show,
+};
+
+static int devperms_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &devperms_seq_op);
+}
+
+static struct file_operations proc_devperms_ops = {
+	.open		= devperms_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+#if BITS_PER_LONG == 32
+#define VESTAT_LINE_WIDTH (6 * 11 + 6 * 21)
+#define VESTAT_LINE_FMT "%10u %10lu %10lu %10lu %10lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %10lu\n"
+#define VESTAT_HEAD_FMT "%10s %10s %10s %10s %10s %20s %20s %20s %20s %20s %20s %10s\n"
+#else
+#define VESTAT_LINE_WIDTH (12 * 21)
+#define VESTAT_LINE_FMT "%20u %20lu %20lu %20lu %20lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20lu\n"
+#define VESTAT_HEAD_FMT "%20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s\n"
+#endif
+
+static int vestat_seq_show(struct seq_file *m, void *v)
+{
+	struct ve_struct *ve = (struct ve_struct *)v;
+	struct ve_struct *curve;
+	int cpu;
+	unsigned long user_ve, nice_ve, system_ve, uptime;
+	cycles_t uptime_cycles, idle_time, strv_time, used;
+
+	curve = get_exec_env();
+	if (ve == ve_list_head ||
+	    (!ve_is_super(curve) && ve == curve)) {
+		/* print header */
+		seq_printf(m, "%-*s\n",
+			VESTAT_LINE_WIDTH - 1,
+			"Version: 2.2");
+		seq_printf(m, VESTAT_HEAD_FMT, "VEID",
+					"user", "nice", "system",
+					"uptime", "idle",
+					"strv", "uptime", "used",
+					"maxlat", "totlat", "numsched");
+	}
+
+	if (ve == get_ve0())
+		return 0;
+
+	user_ve = nice_ve = system_ve = 0;
+	idle_time = strv_time = used = 0;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		struct ve_cpu_stats *st;
+
+		st = VE_CPU_STATS(ve, cpu);
+		user_ve += st->user;
+		nice_ve += st->nice;
+		system_ve += st->system;
+		used += VE_CPU_STATS(ve, cpu)->used_time;
+		idle_time += ve_sched_get_idle_time(ve, cpu);
+	}
+	uptime_cycles = get_cycles() - ve->start_cycles;
+	uptime = jiffies - ve->start_jiffies;
+
+	seq_printf(m, VESTAT_LINE_FMT, ve->veid,
+				user_ve, nice_ve, system_ve,
+				uptime, idle_time, 
+				strv_time, uptime_cycles, used,
+				ve->sched_lat_ve.last.maxlat,
+				ve->sched_lat_ve.last.totlat,
+				ve->sched_lat_ve.last.count);
+	return 0;
+}
+
+static void *ve_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct ve_struct *ve, *curve;
+	loff_t l;
+
+	curve = get_exec_env();
+	read_lock(&ve_list_guard);
+	if (!ve_is_super(curve)) {
+		if (*pos != 0)
+			return NULL;
+		return curve;
+	}
+	for (ve = ve_list_head, l = *pos;
+	     ve != NULL && l > 0;
+	     ve = ve->next, l--);
+	return ve;
+}
+
+static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct ve_struct *ve = (struct ve_struct *)v;
+
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
+	(*pos)++;
+	return ve->next;
+}
+
+static void ve_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_list_guard);
+}
+
+static struct seq_operations vestat_seq_op = {
+        start:  ve_seq_start,
+        next:   ve_seq_next,
+        stop:   ve_seq_stop,
+        show:   vestat_seq_show
+};
+
+static int vestat_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &vestat_seq_op);
+}
+
+static struct file_operations proc_vestat_operations = {
+        open:           vestat_open,
+        read:           seq_read,
+        llseek:         seq_lseek,
+        release:        seq_release
+};
+
+static inline unsigned long ve_used_mem(struct user_beancounter *ub)
+{
+	return ub->ub_parms[UB_OOMGUARPAGES].held;
+}
+
+static inline void ve_mi_replace(struct meminfo *mi)
+{
+	struct user_beancounter *ub;
+	unsigned long meminfo_val;
+	unsigned long nodettram;
+	unsigned long usedmem;
+
+	meminfo_val = get_exec_env()->meminfo_val;
+
+	if(!meminfo_val)
+		return; /* No virtualization */
+
+	nodettram = mi->si.totalram;
+	ub = current->mm->mm_ub;
+	usedmem = ve_used_mem(ub);
+
+	memset(mi, 0, sizeof(*mi));
+
+	mi->si.totalram = (meminfo_val > nodettram) ?
+			nodettram : meminfo_val;
+	mi->si.freeram = (mi->si.totalram > usedmem) ?
+			(mi->si.totalram - usedmem) : 0;
+}
+
+static int meminfo_call(struct vnotifier_block *self,
+                unsigned long event, void *arg, int old_ret)
+{
+	if (event != VIRTINFO_MEMINFO)
+		return old_ret;
+
+	ve_mi_replace((struct meminfo *)arg);
+
+	return NOTIFY_OK;
+}
+
+
+static struct vnotifier_block meminfo_notifier_block = {
+	.notifier_call = meminfo_call
+};
+
+static int __init init_vecalls_proc(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_glob_entry("vz/vestat",
+			S_IFREG|S_IRUSR, NULL);
+	if (de == NULL) {
+		/* create "vz" subdirectory, if not exist */
+		(void) create_proc_glob_entry("vz",
+					      S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+		de = create_proc_glob_entry("vz/vestat",
+				S_IFREG|S_IRUSR, NULL);
+	}
+	if (de)
+		de->proc_fops = &proc_vestat_operations;
+	else
+		printk(KERN_WARNING 
+				"VZMON: can't make vestat proc entry\n");
+
+	de = create_proc_entry("vz/devperms", S_IFREG | S_IRUSR, NULL);
+	if (de)
+		de->proc_fops = &proc_devperms_ops;
+	else
+		printk(KERN_WARNING
+				"VZMON: can't make devperms proc entry\n");
+
+	virtinfo_notifier_register(VITYPE_GENERAL, &meminfo_notifier_block);
+
+	return 0;
+}
+
+static void fini_vecalls_proc(void)
+{
+	remove_proc_entry("vz/devperms", NULL);
+	remove_proc_entry("vz/vestat", NULL);
+	virtinfo_notifier_unregister(VITYPE_GENERAL, &meminfo_notifier_block);
+}
+#else
+#define init_vecalls_proc()	(0)
+#define fini_vecalls_proc()	do { } while (0)
+#endif /* CONFIG_PROC_FS */
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * User ctl
+ *
+ **********************************************************************
+ **********************************************************************/
+
+int vzcalls_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+static struct vzioctlinfo vzcalls = {
+	type: VZCTLTYPE,
+	func: vzcalls_ioctl,
+	owner: THIS_MODULE,
+};
+
+int vzcalls_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	    case VZCTL_MARK_ENV_TO_DOWN: {
+		        /* Compatibility issue */
+		        err = 0;
+		}
+		break;
+	    case VZCTL_SETDEVPERMS: {
+			/* Device type was mistakenly declared as dev_t
+			 * in the old user-kernel interface.
+			 * That's wrong, dev_t is a kernel internal type.
+			 * I use `unsigned' not having anything better in mind.
+			 * 2001/08/11  SAW  */
+			struct vzctl_setdevperms s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = real_setdevperms(s.veid, s.type,
+					new_decode_dev(s.dev), s.mask);
+		}
+		break;
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	    case VZCTL_VE_NETDEV: {
+			struct vzctl_ve_netdev d;
+			char *s;
+			err = -EFAULT;
+			if (copy_from_user(&d, (void *)arg, sizeof(d)))
+				break;
+			err = -ENOMEM;
+			s = kmalloc(IFNAMSIZ+1, GFP_KERNEL);
+			if (s == NULL)
+				break;
+			err = -EFAULT;
+			if (strncpy_from_user(s, d.dev_name, IFNAMSIZ) > 0) {
+				s[IFNAMSIZ] = 0;
+				err = real_ve_dev_map(d.veid, d.op, s);
+			}
+			kfree(s);
+		}
+		break;
+#endif
+	    case VZCTL_ENV_CREATE: {
+			struct vzctl_env_create s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = real_env_create(s.veid, s.flags, s.class_id,
+				NULL, 0);
+		}
+		break;
+	    case VZCTL_ENV_CREATE_DATA: {
+			struct vzctl_env_create_data s;
+			env_create_param_t *data;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err=-EINVAL;
+			if (s.datalen < VZCTL_ENV_CREATE_DATA_MINLEN ||
+			    s.datalen > VZCTL_ENV_CREATE_DATA_MAXLEN ||
+			    s.data == 0)
+				break;
+			err = -ENOMEM;
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				break;
+			memset(data, 0, sizeof(*data));
+			err = -EFAULT;
+			if (copy_from_user(data, (void *)s.data, s.datalen))
+				goto free_data;
+			err = real_env_create(s.veid, s.flags, s.class_id,
+				data, s.datalen);
+free_data:
+			kfree(data);
+		}
+		break;
+	    case VZCTL_GET_CPU_STAT: {
+			struct vzctl_cpustatctl s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = ve_get_cpu_stat(s.veid, s.cpustat);
+		}
+		break;
+	    case VZCTL_VE_MEMINFO: {
+			struct vzctl_ve_meminfo s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = ve_set_meminfo(s.veid, s.val);
+		}
+		break;
+	}
+	return err;
+}
+EXPORT_SYMBOL(real_env_create);
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * Init/exit stuff
+ *
+ **********************************************************************
+ **********************************************************************/
+
+#ifdef CONFIG_VE_CALLS_MODULE
+static int __init init_vecalls_symbols(void)
+{
+	KSYMRESOLVE(real_get_device_perms_ve);
+	KSYMRESOLVE(real_do_env_cleanup);
+	KSYMRESOLVE(real_do_env_free);
+	KSYMRESOLVE(real_update_load_avg_ve);
+	KSYMMODRESOLVE(vzmon);
+	return 0;
+}
+
+static void fini_vecalls_symbols(void)
+{
+	KSYMMODUNRESOLVE(vzmon);
+	KSYMUNRESOLVE(real_get_device_perms_ve);
+	KSYMUNRESOLVE(real_do_env_cleanup);
+	KSYMUNRESOLVE(real_do_env_free);
+	KSYMUNRESOLVE(real_update_load_avg_ve);
+}
+#else
+#define init_vecalls_symbols()	(0)
+#define fini_vecalls_symbols()	do { } while (0)
+#endif
+
+static inline __init int init_vecalls_ioctls(void)
+{
+	vzioctl_register(&vzcalls);
+	return 0;
+}
+
+static inline void fini_vecalls_ioctls(void)
+{
+	vzioctl_unregister(&vzcalls);
+}
+
+static int __init vecalls_init(void)
+{
+	int err;
+	int i;
+
+	ve_list_head = get_ve0();
+
+	err = init_vzmond();
+	if (err < 0)
+		goto out_vzmond;
+
+	err = init_devperms_hash();
+	if (err < 0)
+		goto out_perms;
+
+	err = init_vecalls_symbols();
+	if (err < 0)
+		goto out_sym;
+
+	err = init_vecalls_proc();
+	if (err < 0)
+		goto out_proc;
+
+	err = init_vecalls_ioctls();
+	if (err < 0)
+		goto out_ioctls;
+
+	for (i = 0; i < VE_MAX_HOOKS; i++)
+		INIT_LIST_HEAD(&ve_hooks[i]);
+
+	return 0;
+
+out_ioctls:
+	fini_vecalls_proc();
+out_proc:
+	fini_vecalls_symbols();
+out_sym:
+	fini_devperms_hash();
+out_perms:
+	fini_vzmond();
+out_vzmond:
+	return err;
+}
+
+static void vecalls_exit(void)
+{
+	fini_vecalls_ioctls();
+	fini_vecalls_proc();
+	fini_vecalls_symbols();
+	fini_devperms_hash();
+	fini_vzmond();
+}
+
+EXPORT_SYMBOL(get_ve_by_id);
+EXPORT_SYMBOL(__find_ve_by_id);
+EXPORT_SYMBOL(ve_list_guard);
+EXPORT_SYMBOL(ve_list_head);
+EXPORT_SYMBOL(nr_ve);
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Control");
+MODULE_LICENSE("GPL v2");
+
+module_init(vecalls_init)
+module_exit(vecalls_exit)
diff -uprN linux-2.6.16/kernel/veowner.c linux-2.6.16.ovz/kernel/veowner.c
--- linux-2.6.16/kernel/veowner.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/veowner.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,308 @@
+/*
+ *  kernel/veowner.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/ve.h>
+#include <linux/ve_owner.h>
+#include <linux/ve_proto.h>
+#include <linux/ipc.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/inetdevice.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#include <net/tcp.h>
+
+void prepare_ve0_process(struct task_struct *tsk)
+{
+	set_virt_pid(tsk, tsk->pid);
+	set_virt_tgid(tsk, tsk->tgid);
+	if (tsk->signal) {
+		set_virt_pgid(tsk, tsk->signal->pgrp);
+		set_virt_sid(tsk, tsk->signal->session);
+	}
+	VE_TASK_INFO(tsk)->exec_env = get_ve0();
+	VE_TASK_INFO(tsk)->owner_env = get_ve0();
+	VE_TASK_INFO(tsk)->sleep_time = 0;
+	VE_TASK_INFO(tsk)->wakeup_stamp = 0;
+	VE_TASK_INFO(tsk)->sched_time = 0;
+	seqcount_init(&VE_TASK_INFO(tsk)->wakeup_lock);
+
+	if (tsk->pid) {
+		SET_VE_LINKS(tsk);
+		atomic_inc(&get_ve0()->pcounter);
+	}
+}
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+void prepare_ve0_loopback(void)
+{
+	get_ve0()->_loopback_dev = &loopback_dev;
+}
+#endif
+
+/*
+ * ------------------------------------------------------------------------
+ * proc entries
+ * ------------------------------------------------------------------------
+ */
+
+#ifdef CONFIG_PROC_FS
+static void proc_move(struct proc_dir_entry *ddir,
+		struct proc_dir_entry *sdir,
+		const char *name)
+{
+	struct proc_dir_entry **p, *q;
+	int len;
+
+	len = strlen(name);
+	for (p = &sdir->subdir, q = *p; q != NULL; p = &q->next, q = *p)
+		if (proc_match(len, name, q))
+			break;
+	if (q == NULL)
+		return;
+	*p = q->next;
+	q->parent = ddir;
+	q->next = ddir->subdir;
+	ddir->subdir = q;
+}
+static void prepare_proc_misc(void)
+{
+	static char *table[] = {
+		"loadavg",
+		"uptime",
+		"meminfo",
+		"version",
+		"stat",
+		"filesystems",
+		"locks",
+		"swaps",
+		"mounts",
+		"net",
+		"cpuinfo",
+		"sysvipc",
+		"sys",
+		"fs",
+		"vz",
+		"user_beancounters",
+		"cmdline",
+		"vmstat",
+		"modules",
+		"kmsg",
+		NULL,
+	};
+	char **p;
+
+	for (p = table; *p != NULL; p++)
+		proc_move(&proc_root, ve0.proc_root, *p);
+}
+int prepare_proc(void)
+{
+	struct ve_struct *envid;
+	struct proc_dir_entry *de;
+	struct proc_dir_entry *ve_root;
+
+	envid = set_exec_env(&ve0);
+	ve_root = ve0.proc_root->subdir;
+	/* move the whole tree to be visible in VE0 only */
+	ve0.proc_root->subdir = proc_root.subdir;
+	for (de = ve0.proc_root->subdir; de->next != NULL; de = de->next)
+		de->parent = ve0.proc_root;
+	de->parent = ve0.proc_root;
+	de->next = ve_root;
+
+	/* move back into the global scope some specific entries */
+	proc_root.subdir = NULL;
+	prepare_proc_misc();
+	proc_net = proc_mkdir("net", ve0.proc_root);
+	proc_net_stat = proc_mkdir("stat", proc_net);
+	proc_mkdir("vz", 0);
+#ifdef CONFIG_SYSVIPC
+	proc_mkdir("sysvipc", 0);
+#endif
+	proc_root_fs = proc_mkdir("fs", 0);
+	/* XXX proc_tty_init(); */
+
+	/* XXX process inodes */
+
+	(void)set_exec_env(envid);
+
+	(void)create_proc_glob_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+	return 0;
+}
+
+static struct proc_dir_entry ve0_proc_root = {
+	.name = "/proc",
+	.namelen = 5,
+	.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+	.nlink = 2
+};
+
+void prepare_ve0_proc_root(void)
+{
+	ve0.proc_root = &ve0_proc_root;
+}
+#endif
+
+/*
+ * ------------------------------------------------------------------------
+ * Virtualized sysctl
+ * ------------------------------------------------------------------------
+ */
+
+static int semmin[4] = { 1, 1, 1, 1 };
+static int semmax[4] = { 8000, INT_MAX, 1000, IPCMNI };
+static ctl_table kern_table[] = {
+	{KERN_NODENAME, "hostname", system_utsname.nodename, 64,
+	 0644, NULL, &proc_doutsstring, &sysctl_string},
+	{KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
+	 0644, NULL, &proc_doutsstring, &sysctl_string},
+#ifdef CONFIG_SYSVIPC
+#define get_ve0_field(fname) &ve0._##fname
+	{KERN_SHMMAX, "shmmax", get_ve0_field(shm_ctlmax), sizeof (size_t),
+	 0644, NULL, &proc_doulongvec_minmax },
+	{KERN_SHMALL, "shmall", get_ve0_field(shm_ctlall), sizeof (size_t),
+	 0644, NULL, &proc_doulongvec_minmax },
+	{KERN_SHMMNI, "shmmni", get_ve0_field(shm_ctlmni), sizeof (int),
+	 0644, NULL, &proc_dointvec_minmax, NULL,
+	 NULL, &semmin[0], &semmax[3] },
+	{KERN_MSGMAX, "msgmax", get_ve0_field(msg_ctlmax), sizeof (int),
+	 0644, NULL, &proc_dointvec },
+	{KERN_MSGMNI, "msgmni", get_ve0_field(msg_ctlmni), sizeof (int),
+	 0644, NULL, &proc_dointvec_minmax, NULL,
+	 NULL, &semmin[0], &semmax[3] },
+	{KERN_MSGMNB, "msgmnb", get_ve0_field(msg_ctlmnb), sizeof (int),
+	 0644, NULL, &proc_dointvec },
+	{KERN_SEM, "sem", get_ve0_field(sem_ctls), 4*sizeof (int),
+	 0644, NULL, &proc_dointvec },
+#endif
+	{0}
+};
+static ctl_table root_table[] = {
+	{CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
+	{0}
+};
+extern int ip_rt_src_check;
+extern int ve_area_access_check;
+static ctl_table vz_ipv4_route_table[] = {
+	{
+		ctl_name:	NET_IPV4_ROUTE_SRC_CHECK,
+		procname:	"src_check",
+		data:		&ip_rt_src_check,
+		maxlen:		sizeof(int),
+		mode:		0644,
+		proc_handler:	&proc_dointvec,
+	},
+	{ 0 }
+};
+static ctl_table vz_ipv4_table[] = {
+	{NET_IPV4_ROUTE, "route", NULL, 0, 0555, vz_ipv4_route_table},
+	{ 0 }
+};
+static ctl_table vz_net_table[] = {
+	{NET_IPV4,   "ipv4",      NULL, 0, 0555, vz_ipv4_table},
+	{ 0 }
+};
+static ctl_table vz_fs_table[] = {
+	{
+		ctl_name:	226,
+		procname:	"ve-area-access-check",
+		data:		&ve_area_access_check,
+		maxlen:		sizeof(int),
+		mode:		0644,
+		proc_handler:	&proc_dointvec,
+	},
+	{ 0 }
+};
+static ctl_table root_table2[] = {
+	{CTL_NET, "net", NULL, 0, 0555, vz_net_table},
+	{CTL_FS, "fs", NULL, 0, 0555, vz_fs_table},
+	{ 0 }
+};
+int prepare_sysctl(void)
+{
+	struct ve_struct *envid;
+
+	envid = set_exec_env(&ve0);
+	ve0.kern_header = register_sysctl_table(root_table, 1);
+	register_sysctl_table(root_table2, 0);
+	(void)set_exec_env(envid);
+	return 0;
+}
+
+void prepare_ve0_sysctl(void)
+{
+	INIT_LIST_HEAD(&ve0.sysctl_lh);
+#ifdef CONFIG_SYSCTL
+	ve0.proc_sys_root = proc_mkdir("sys", 0);
+#endif
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * XXX init_ve_system
+ * ------------------------------------------------------------------------
+ */
+
+void init_ve_system(void)
+{
+	struct task_struct *init_entry, *p, *tsk;
+	struct ve_struct *ptr;
+	unsigned long flags;
+	int i;
+
+	ptr = get_ve0();
+	(void)get_ve(ptr);
+	atomic_set(&ptr->pcounter, 1);
+
+	/* Don't forget about idle tasks */
+	write_lock_irqsave(&tasklist_lock, flags);
+	for (i = 0; i < NR_CPUS; i++) {
+		tsk = idle_task(i);
+		if (tsk == NULL)
+			continue;
+
+		prepare_ve0_process(tsk);
+	}
+	do_each_thread_all(p, tsk) {
+		prepare_ve0_process(tsk);
+	} while_each_thread_all(p, tsk);
+	write_unlock_irqrestore(&tasklist_lock, flags);
+
+	init_entry = child_reaper;
+	ptr->init_entry = init_entry;
+	/* XXX: why? */
+	cap_set_full(ptr->cap_default);
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	ptr->_ipv4_devconf = &ipv4_devconf;
+	ptr->_ipv4_devconf_dflt = &ipv4_devconf_dflt;
+#endif
+
+	read_lock(&init_entry->fs->lock);
+	ptr->fs_rootmnt = init_entry->fs->rootmnt;
+	ptr->fs_root = init_entry->fs->root;
+	read_unlock(&init_entry->fs->lock);
+
+	/* common prepares */
+#ifdef CONFIG_PROC_FS
+	prepare_proc();
+#endif
+	prepare_sysctl();
+	prepare_ipc();
+}
diff -uprN linux-2.6.16/kernel/vzdev.c linux-2.6.16.ovz/kernel/vzdev.c
--- linux-2.6.16/kernel/vzdev.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/vzdev.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,129 @@
+/*
+ *  kernel/vzdev.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/vzctl.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/vzcalluser.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <linux/device.h>
+#include <linux/smp_lock.h>
+
+#define VZCTL_MAJOR 126
+#define VZCTL_NAME "vzctl"
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Interface");
+MODULE_LICENSE("GPL v2");
+
+static LIST_HEAD(ioctls);
+static spinlock_t ioctl_lock = SPIN_LOCK_UNLOCKED;
+
+int vzctl_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	int err;
+	struct list_head *p;
+	struct vzioctlinfo *inf;
+
+	err = -ENOTTY;
+	spin_lock(&ioctl_lock);
+	list_for_each(p, &ioctls) {
+		inf = list_entry(p, struct vzioctlinfo, list);
+		if (inf->type != _IOC_TYPE(cmd))
+			continue;
+
+		err = try_module_get(inf->owner) ? 0 : -EBUSY;
+		spin_unlock(&ioctl_lock);
+		if (!err) {
+			unlock_kernel();
+			err = (*inf->func)(ino, file, cmd, arg);
+			lock_kernel();
+			module_put(inf->owner);
+		}
+		return err;
+	}
+	spin_unlock(&ioctl_lock);
+	return err;
+}
+
+void vzioctl_register(struct vzioctlinfo *inf)
+{
+	spin_lock(&ioctl_lock);
+	list_add(&inf->list, &ioctls);
+	spin_unlock(&ioctl_lock);
+}
+
+void vzioctl_unregister(struct vzioctlinfo *inf)
+{
+	spin_lock(&ioctl_lock);
+	list_del_init(&inf->list);
+	spin_unlock(&ioctl_lock);
+}
+
+EXPORT_SYMBOL(vzioctl_register);
+EXPORT_SYMBOL(vzioctl_unregister);
+
+/*
+ * Init/exit stuff.
+ */
+static struct file_operations vzctl_fops = {
+	.owner		= THIS_MODULE,
+	.ioctl		= vzctl_ioctl,
+};
+
+static struct class *vzctl_class;
+
+static void __exit vzctl_exit(void)
+{
+	class_device_destroy(vzctl_class, MKDEV(VZCTL_MAJOR, 0));
+	class_destroy(vzctl_class);
+	unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
+}
+
+static int __init vzctl_init(void)
+{
+	int ret;
+	struct class_device *class_err;
+
+	ret = register_chrdev(VZCTL_MAJOR, VZCTL_NAME, &vzctl_fops);
+	if (ret < 0)
+		goto out;
+
+	vzctl_class = class_create(THIS_MODULE, "vzctl");
+	if (IS_ERR(vzctl_class)) {
+		ret = PTR_ERR(vzctl_class);
+		goto out_cleandev;
+	}
+
+	class_err = class_device_create(vzctl_class, NULL, MKDEV(VZCTL_MAJOR, 0),
+				NULL, VZCTL_NAME);
+	if (IS_ERR(class_err)) {
+		ret = PTR_ERR(class_err);
+		goto out_rmclass;
+	}
+
+	goto out;
+
+out_rmclass:
+	class_destroy(vzctl_class);
+out_cleandev:
+	unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
+out:
+	return ret;
+}
+
+module_init(vzctl_init)
+module_exit(vzctl_exit);
diff -uprN linux-2.6.16/kernel/vzwdog.c linux-2.6.16.ovz/kernel/vzwdog.c
--- linux-2.6.16/kernel/vzwdog.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.16.ovz/kernel/vzwdog.c	2006-07-05 08:34:56.000000000 -0400
@@ -0,0 +1,278 @@
+/*
+ *  kernel/vzwdog.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/kobject.h>
+#include <linux/genhd.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/suspend.h>
+#include <linux/ve.h>
+#include <linux/vzstat.h>
+
+/* Staff regading kernel thread polling VE validity */
+static int sleep_timeout = 60;
+static pid_t wdog_thread_pid;
+static int   wdog_thread_continue = 1;
+static DECLARE_COMPLETION(license_thread_exited);
+
+extern void show_mem(void);
+extern struct ve_struct *ve_list_head;
+
+#if 0
+static char page[PAGE_SIZE];
+
+static void parse_irq_list(int len)
+{
+	int i, k, skip;
+	for (i = 0; i < len; ) {
+		k = i;
+		while (i < len && page[i] != '\n' && page[i] != ':')
+			i++;
+		skip = 0;
+		if (i < len && page[i] != '\n') {
+			i++; /* skip ':' */
+			while (i < len && (page[i] == ' ' || page[i] == '0'))
+				i++;
+			skip = (i < len && (page[i] < '0' || page[i] > '9'));
+			while (i < len && page[i] != '\n')
+				i++;
+		}
+		if (!skip)
+			printk("\n%.*s", i - k, page + k);
+		if (i < len)
+			i++; /* skip '\n' */
+	}
+}
+#endif
+
+static void show_irq_list(void)
+{
+#if 0
+	i = KSYMSAFECALL(int, get_irq_list, (page));
+	parse_irq_list(i);  /* Safe, zero was returned if unassigned */
+#endif
+}
+
+static void show_alloc_latency(void)
+{
+	static const char *alloc_descr[KSTAT_ALLOCSTAT_NR] = {
+		"A0",
+		"L0",
+		"H0",
+		"L1",
+		"H1"
+	};
+	int i;
+
+	printk("lat: ");
+	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
+		struct kstat_lat_struct *p;
+		cycles_t maxlat, avg0, avg1, avg2;
+
+		p = &kstat_glob.alloc_lat[i];
+		spin_lock_irq(&kstat_glb_lock);
+		maxlat = p->last.maxlat;
+		avg0 = p->avg[0];
+		avg1 = p->avg[1];
+		avg2 = p->avg[2];
+		spin_unlock_irq(&kstat_glb_lock);
+
+		printk("%s %Lu (%Lu %Lu %Lu)",
+				alloc_descr[i],
+				maxlat,
+				avg0,
+				avg1,
+				avg2);
+	}
+	printk("\n");
+}
+
+static void show_schedule_latency(void)
+{
+	struct kstat_lat_pcpu_struct *p;
+	cycles_t maxlat, totlat, avg0, avg1, avg2;
+	unsigned long count;
+
+	p = &kstat_glob.sched_lat;
+	spin_lock_irq(&kstat_glb_lock);
+	maxlat = p->last.maxlat;
+	totlat = p->last.totlat;
+	count = p->last.count;
+	avg0 = p->avg[0];
+	avg1 = p->avg[1];
+	avg2 = p->avg[2];
+	spin_unlock_irq(&kstat_glb_lock);
+
+	printk("sched lat: %Lu/%Lu/%lu (%Lu %Lu %Lu)\n",
+			maxlat,
+			totlat,
+			count,
+			avg0,
+			avg1,
+			avg2);
+}
+
+static void show_header(void)
+{
+	struct timeval tv;
+
+	do_gettimeofday(&tv);
+	printk("*** VZWDOG 1.14: time %lu.%06lu uptime %Lu CPU %d ***\n",
+			tv.tv_sec, tv.tv_usec,
+			get_jiffies_64(), smp_processor_id());
+#ifdef CONFIG_FAIRSCHED
+	printk("*** cycles_per_jiffy %lu jiffies_per_second %u ***\n",
+			cycles_per_jiffy, HZ);		
+#else
+	printk("*** jiffies_per_second %u ***\n", HZ);		
+#endif
+}
+
+static void show_pgdatinfo(void)
+{
+	pg_data_t *pgdat;
+
+	printk("pgdat:");
+	for_each_pgdat(pgdat) {
+		printk(" %d: %lu,%lu,%lu,%p",
+			pgdat->node_id,
+			pgdat->node_start_pfn,
+			pgdat->node_present_pages,
+			pgdat->node_spanned_pages,
+			pgdat->node_mem_map);
+	}
+	printk("\n");
+}
+
+static void show_diskio(void)
+{
+	struct gendisk *gd;
+	char buf[BDEVNAME_SIZE];
+
+	printk("disk_io: ");
+
+	down_read(&block_subsys.rwsem);
+	list_for_each_entry(gd, &block_subsys.kset.list, kobj.entry) {
+		char *name;
+		name = disk_name(gd, 0, buf);
+		if ((strlen(name) > 4) && (strncmp(name, "loop", 4) == 0) &&
+		    isdigit(name[4]))
+			continue;
+		if ((strlen(name) > 3) && (strncmp(name, "ram", 3) == 0) &&
+		    isdigit(name[3]))
+			continue;
+		printk("(%u,%u) %s r(%u %u %u) w(%u %u %u)\n",
+			gd->major, gd->first_minor,
+			name,
+			disk_stat_read(gd, ios[READ]),
+			disk_stat_read(gd, sectors[READ]),
+			disk_stat_read(gd, merges[READ]),
+			disk_stat_read(gd, ios[WRITE]),
+			disk_stat_read(gd, sectors[WRITE]),
+			disk_stat_read(gd, merges[WRITE]));
+	}
+	up_read(&block_subsys.rwsem);
+
+	printk("\n");
+}
+
+static void show_nrprocs(void)
+{
+	unsigned long _nr_running, _nr_sleeping,
+			_nr_unint, _nr_zombie, _nr_dead, _nr_stopped;
+
+	_nr_running = nr_running();
+	_nr_unint = nr_uninterruptible();
+	_nr_sleeping = nr_sleeping();
+	_nr_zombie = nr_zombie;
+	_nr_dead = atomic_read(&nr_dead);
+	_nr_stopped = nr_stopped();
+
+	printk("VEnum: %d, proc R %lu, S %lu, D %lu, "
+		"Z %lu, X %lu, T %lu (tot %d)\n",
+		nr_ve,	_nr_running, _nr_sleeping, _nr_unint,
+		_nr_zombie, _nr_dead, _nr_stopped, nr_threads);
+}
+
+static void wdog_print(void)
+{
+	show_header();
+	show_irq_list();
+	show_pgdatinfo();
+	show_mem();
+	show_diskio();
+	show_schedule_latency();
+	show_alloc_latency();
+	show_nrprocs();
+}
+
+static int wdog_loop(void* data)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAIT_QUEUE_HEAD(thread_wait_queue);
+
+	/*
+	 * This thread doesn't need any user-level access,
+	 * so get rid of all our resources
+	 */
+	daemonize("wdogd");
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	sigfillset(&tsk->blocked);
+	sigdelset(&tsk->blocked, SIGHUP);
+	recalc_sigpending();
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	while (wdog_thread_continue) {
+		wdog_print();
+		interruptible_sleep_on_timeout(&thread_wait_queue,
+					       sleep_timeout*HZ);
+		try_to_freeze();
+		/* clear all signals */
+		if (signal_pending(tsk))
+			flush_signals(tsk);
+	}
+
+	complete_and_exit(&license_thread_exited, 0);
+}
+
+static int __init wdog_init(void)
+{
+	wdog_thread_pid = kernel_thread(wdog_loop, NULL, 0);
+	if (wdog_thread_pid < 0)
+		return wdog_thread_pid;
+
+	return 0;
+}
+
+static void __exit wdog_exit(void)
+{
+	wdog_thread_continue = 0;
+	if (wdog_thread_pid > 0) {
+		kill_proc(wdog_thread_pid, SIGHUP, 1);
+		wait_for_completion(&license_thread_exited);
+	}
+}
+
+module_param(sleep_timeout, int, 0);
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo WDOG");
+MODULE_LICENSE("GPL v2");
+
+module_init(wdog_init)
+module_exit(wdog_exit)
diff -uprN linux-2.6.16/lib/Kconfig.debug linux-2.6.16.ovz/lib/Kconfig.debug
--- linux-2.6.16/lib/Kconfig.debug	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/lib/Kconfig.debug	2006-07-05 08:34:56.000000000 -0400
@@ -48,7 +48,7 @@ config LOG_BUF_SHIFT
 
 config DETECT_SOFTLOCKUP
 	bool "Detect Soft Lockups"
-	depends on DEBUG_KERNEL
+	depends on DEBUG_KERNEL && !SCHED_VCPU
 	default y
 	help
 	  Say Y here to enable the kernel to detect "soft lockups",
diff -uprN linux-2.6.16/lib/bust_spinlocks.c linux-2.6.16.ovz/lib/bust_spinlocks.c
--- linux-2.6.16/lib/bust_spinlocks.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/lib/bust_spinlocks.c	2006-07-05 08:34:56.000000000 -0400
@@ -20,19 +20,11 @@ void bust_spinlocks(int yes)
 	if (yes) {
 		oops_in_progress = 1;
 	} else {
-		int loglevel_save = console_loglevel;
 #ifdef CONFIG_VT
 		unblank_screen();
 #endif
 		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk() will give klogd
-		 * and the blanked console a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
-		printk(" ");
-		console_loglevel = loglevel_save;
+		wake_up_klogd();
 	}
 }
 
diff -uprN linux-2.6.16/mm/filemap_xip.c linux-2.6.16.ovz/mm/filemap_xip.c
--- linux-2.6.16/mm/filemap_xip.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/filemap_xip.c	2006-07-05 08:34:56.000000000 -0400
@@ -15,6 +15,7 @@
 #include <linux/rmap.h>
 #include <asm/tlbflush.h>
 #include "filemap.h"
+#include <ub/ub_vmpages.h>
 
 /*
  * This is a file read routine for execute in place files, and uses
@@ -190,7 +191,10 @@ __xip_unmap (struct address_space * mapp
 			flush_cache_page(vma, address, pte_pfn(*pte));
 			pteval = ptep_clear_flush(vma, address, pte);
 			page_remove_rmap(page);
+			pb_remove_ref(page, mm);
+			ub_unused_privvm_inc(mm, vma);
 			dec_mm_counter(mm, file_rss);
+			dec_vma_rss(vma);
 			BUG_ON(pte_dirty(pteval));
 			pte_unmap_unlock(pte, ptl);
 			page_cache_release(page);
diff -uprN linux-2.6.16/mm/fremap.c linux-2.6.16.ovz/mm/fremap.c
--- linux-2.6.16/mm/fremap.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/fremap.c	2006-07-05 08:34:56.000000000 -0400
@@ -20,6 +20,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long addr, pte_t *ptep)
 {
@@ -34,6 +36,7 @@ static int zap_pte(struct mm_struct *mm,
 			if (pte_dirty(pte))
 				set_page_dirty(page);
 			page_remove_rmap(page);
+			pb_remove_ref(page, mm);
 			page_cache_release(page);
 		}
 	} else {
@@ -57,6 +60,10 @@ int install_page(struct mm_struct *mm, s
 	pte_t *pte;
 	pte_t pte_val;
 	spinlock_t *ptl;
+	struct page_beancounter *pbc;
+
+	if (unlikely(pb_alloc(&pbc)))
+		goto out_nopb;
 
 	pte = get_locked_pte(mm, addr, &ptl);
 	if (!pte)
@@ -75,11 +82,15 @@ int install_page(struct mm_struct *mm, s
 	if (page_mapcount(page) > INT_MAX/2)
 		goto unlock;
 
-	if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
+	if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte)) {
+		ub_unused_privvm_dec(mm, vma);
 		inc_mm_counter(mm, file_rss);
+		inc_vma_rss(vma);
+	}
 
 	flush_icache_page(vma, page);
 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
+	pb_add_ref(page, mm, &pbc);
 	page_add_file_rmap(page);
 	pte_val = *pte;
 	update_mmu_cache(vma, addr, pte_val);
@@ -87,6 +98,8 @@ int install_page(struct mm_struct *mm, s
 unlock:
 	pte_unmap_unlock(pte, ptl);
 out:
+	pb_free(&pbc);
+out_nopb:
 	return err;
 }
 EXPORT_SYMBOL(install_page);
@@ -109,7 +122,9 @@ int install_file_pte(struct mm_struct *m
 
 	if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) {
 		update_hiwater_rss(mm);
+		ub_unused_privvm_inc(mm, vma);
 		dec_mm_counter(mm, file_rss);
+		dec_vma_rss(vma);
 	}
 
 	set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
@@ -220,4 +235,5 @@ asmlinkage long sys_remap_file_pages(uns
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(sys_remap_file_pages);
 
diff -uprN linux-2.6.16/mm/madvise.c linux-2.6.16.ovz/mm/madvise.c
--- linux-2.6.16/mm/madvise.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/madvise.c	2006-07-05 08:34:56.000000000 -0400
@@ -168,6 +168,9 @@ static long madvise_remove(struct vm_are
 			return -EINVAL;
 	}
 
+	if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
+		return -EACCES;
+
 	mapping = vma->vm_file->f_mapping;
 
 	offset = (loff_t)(start - vma->vm_start)
diff -uprN linux-2.6.16/mm/memory.c linux-2.6.16.ovz/mm/memory.c
--- linux-2.6.16/mm/memory.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/memory.c	2006-07-05 08:34:56.000000000 -0400
@@ -58,6 +58,8 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 
+#include <ub/ub_vmpages.h>
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
@@ -81,6 +83,7 @@ unsigned long vmalloc_earlyreserve;
 EXPORT_SYMBOL(num_physpages);
 EXPORT_SYMBOL(high_memory);
 EXPORT_SYMBOL(vmalloc_earlyreserve);
+EXPORT_SYMBOL_GPL(empty_zero_page);
 
 int randomize_va_space __read_mostly = 1;
 
@@ -103,18 +106,21 @@ void pgd_clear_bad(pgd_t *pgd)
 	pgd_ERROR(*pgd);
 	pgd_clear(pgd);
 }
+EXPORT_SYMBOL_GPL(pgd_clear_bad);
 
 void pud_clear_bad(pud_t *pud)
 {
 	pud_ERROR(*pud);
 	pud_clear(pud);
 }
+EXPORT_SYMBOL_GPL(pud_clear_bad);
 
 void pmd_clear_bad(pmd_t *pmd)
 {
 	pmd_ERROR(*pmd);
 	pmd_clear(pmd);
 }
+EXPORT_SYMBOL_GPL(pmd_clear_bad);
 
 /*
  * Note: this doesn't free the actual pages themselves. That
@@ -318,6 +324,7 @@ int __pte_alloc(struct mm_struct *mm, pm
 	spin_unlock(&mm->page_table_lock);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__pte_alloc);
 
 int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
 {
@@ -418,6 +425,7 @@ struct page *vm_normal_page(struct vm_ar
 	 */
 	return pfn_to_page(pfn);
 }
+EXPORT_SYMBOL_GPL(vm_normal_page);
 
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
@@ -428,7 +436,7 @@ struct page *vm_normal_page(struct vm_ar
 static inline void
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-		unsigned long addr, int *rss)
+		unsigned long addr, int *rss, struct page_beancounter **pbc)
 {
 	unsigned long vm_flags = vma->vm_flags;
 	pte_t pte = *src_pte;
@@ -471,6 +479,7 @@ copy_one_pte(struct mm_struct *dst_mm, s
 	if (page) {
 		get_page(page);
 		page_dup_rmap(page);
+		pb_dup_ref(page, dst_mm, pbc);
 		rss[!!PageAnon(page)]++;
 	}
 
@@ -478,20 +487,36 @@ out_set_pte:
 	set_pte_at(dst_mm, addr, dst_pte, pte);
 }
 
+#define pte_ptrs(a)	(PTRS_PER_PTE - ((a >> PAGE_SHIFT)&(PTRS_PER_PTE - 1)))
+#ifdef CONFIG_USER_RESOURCE
+#define same_ub(mm1, mm2)	((mm1)->mm_ub == (mm2)->mm_ub)
+#else
+#define same_ub(mm1, mm2)	(1)
+#endif
+
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
+		pmd_t *dst_pmd, pmd_t *src_pmd,
+		struct vm_area_struct *dst_vma,
+		struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end)
 {
 	pte_t *src_pte, *dst_pte;
 	spinlock_t *src_ptl, *dst_ptl;
 	int progress = 0;
-	int rss[2];
+	int rss[2], rss_tot;
+	struct page_beancounter *pbc;
+	int err;
 
+	err = -ENOMEM;
+	pbc = same_ub(src_mm, dst_mm) ? PBC_COPY_SAME : NULL;
 again:
+	if (pbc != PBC_COPY_SAME && pb_alloc_list(&pbc, pte_ptrs(addr)))
+		goto out;
 	rss[1] = rss[0] = 0;
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
 	if (!dst_pte)
-		return -ENOMEM;
+		goto out;
+
 	src_pte = pte_offset_map_nested(src_pmd, addr);
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock(src_ptl);
@@ -512,22 +537,32 @@ again:
 			progress++;
 			continue;
 		}
-		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
+		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
+				vma, addr, rss, &pbc);
 		progress += 8;
 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 
 	spin_unlock(src_ptl);
 	pte_unmap_nested(src_pte - 1);
+	rss_tot = rss[0] + rss[1];
+	add_vma_rss(dst_vma, rss_tot);
+	ub_unused_privvm_sub(dst_mm, dst_vma, rss_tot);
 	add_mm_rss(dst_mm, rss[0], rss[1]);
 	pte_unmap_unlock(dst_pte - 1, dst_ptl);
 	cond_resched();
 	if (addr != end)
 		goto again;
-	return 0;
+
+	err = 0;
+out:
+	pb_free_list(&pbc);
+	return err;
 }
 
 static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
+		pud_t *dst_pud, pud_t *src_pud,
+		struct vm_area_struct *dst_vma, 
+		struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end)
 {
 	pmd_t *src_pmd, *dst_pmd;
@@ -542,14 +577,16 @@ static inline int copy_pmd_range(struct 
 		if (pmd_none_or_clear_bad(src_pmd))
 			continue;
 		if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
-						vma, addr, next))
+						dst_vma, vma, addr, next))
 			return -ENOMEM;
 	} while (dst_pmd++, src_pmd++, addr = next, addr != end);
 	return 0;
 }
 
 static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+		pgd_t *dst_pgd, pgd_t *src_pgd,
+		struct vm_area_struct *dst_vma,
+		struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end)
 {
 	pud_t *src_pud, *dst_pud;
@@ -564,19 +601,20 @@ static inline int copy_pud_range(struct 
 		if (pud_none_or_clear_bad(src_pud))
 			continue;
 		if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
-						vma, addr, next))
+						dst_vma, vma, addr, next))
 			return -ENOMEM;
 	} while (dst_pud++, src_pud++, addr = next, addr != end);
 	return 0;
 }
 
-int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		struct vm_area_struct *vma)
+int __copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *vma,
+		      unsigned long addr, size_t size)
 {
+	struct mm_struct *dst_mm = dst_vma->vm_mm;
+	struct mm_struct *src_mm = vma->vm_mm;
 	pgd_t *src_pgd, *dst_pgd;
 	unsigned long next;
-	unsigned long addr = vma->vm_start;
-	unsigned long end = vma->vm_end;
+	unsigned long end = addr + size;
 
 	/*
 	 * Don't copy ptes where a page fault will fill them correctly.
@@ -599,11 +637,22 @@ int copy_page_range(struct mm_struct *ds
 		if (pgd_none_or_clear_bad(src_pgd))
 			continue;
 		if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
-						vma, addr, next))
+						dst_vma, vma, addr, next))
 			return -ENOMEM;
 	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__copy_page_range);
+
+int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+		    struct vm_area_struct *dst_vma, struct vm_area_struct *vma)
+{
+	if (dst_vma->vm_mm != dst)
+		BUG();
+	if (vma->vm_mm != src)
+		BUG();
+	return __copy_page_range(dst_vma, vma, vma->vm_start, vma->vm_end-vma->vm_start);
+}
 
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pmd_t *pmd,
@@ -615,6 +664,7 @@ static unsigned long zap_pte_range(struc
 	spinlock_t *ptl;
 	int file_rss = 0;
 	int anon_rss = 0;
+	int rss;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	do {
@@ -668,6 +718,7 @@ static unsigned long zap_pte_range(struc
 				file_rss--;
 			}
 			page_remove_rmap(page);
+			pb_remove_ref(page, mm);
 			tlb_remove_page(tlb, page);
 			continue;
 		}
@@ -682,6 +733,9 @@ static unsigned long zap_pte_range(struc
 		pte_clear_full(mm, addr, pte, tlb->fullmm);
 	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
 
+	rss = -(file_rss + anon_rss);
+	ub_unused_privvm_add(mm, vma, rss);
+	sub_vma_rss(vma, rss);
 	add_mm_rss(mm, file_rss, anon_rss);
 	pte_unmap_unlock(pte - 1, ptl);
 
@@ -1087,12 +1141,14 @@ int get_user_pages(struct task_struct *t
 }
 EXPORT_SYMBOL(get_user_pages);
 
-static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+static int zeromap_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, unsigned long end, pgprot_t prot)
 {
 	pte_t *pte;
 	spinlock_t *ptl;
+	struct mm_struct *mm;
 
+	mm = vma->vm_mm;
 	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
 	if (!pte)
 		return -ENOMEM;
@@ -1102,6 +1158,8 @@ static int zeromap_pte_range(struct mm_s
 		page_cache_get(page);
 		page_add_file_rmap(page);
 		inc_mm_counter(mm, file_rss);
+		inc_vma_rss(vma);
+		ub_unused_privvm_dec(mm, vma);
 		BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, addr, pte, zero_pte);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
@@ -1109,35 +1167,35 @@ static int zeromap_pte_range(struct mm_s
 	return 0;
 }
 
-static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
+static inline int zeromap_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 			unsigned long addr, unsigned long end, pgprot_t prot)
 {
 	pmd_t *pmd;
 	unsigned long next;
 
-	pmd = pmd_alloc(mm, pud, addr);
+	pmd = pmd_alloc(vma->vm_mm, pud, addr);
 	if (!pmd)
 		return -ENOMEM;
 	do {
 		next = pmd_addr_end(addr, end);
-		if (zeromap_pte_range(mm, pmd, addr, next, prot))
+		if (zeromap_pte_range(vma, pmd, addr, next, prot))
 			return -ENOMEM;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
 
-static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+static inline int zeromap_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 			unsigned long addr, unsigned long end, pgprot_t prot)
 {
 	pud_t *pud;
 	unsigned long next;
 
-	pud = pud_alloc(mm, pgd, addr);
+	pud = pud_alloc(vma->vm_mm, pgd, addr);
 	if (!pud)
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
-		if (zeromap_pmd_range(mm, pud, addr, next, prot))
+		if (zeromap_pmd_range(vma, pud, addr, next, prot))
 			return -ENOMEM;
 	} while (pud++, addr = next, addr != end);
 	return 0;
@@ -1149,15 +1207,14 @@ int zeromap_page_range(struct vm_area_st
 	pgd_t *pgd;
 	unsigned long next;
 	unsigned long end = addr + size;
-	struct mm_struct *mm = vma->vm_mm;
 	int err;
 
 	BUG_ON(addr >= end);
-	pgd = pgd_offset(mm, addr);
+	pgd = pgd_offset(vma->vm_mm, addr);
 	flush_cache_range(vma, addr, end);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = zeromap_pud_range(mm, pgd, addr, next, prot);
+		err = zeromap_pud_range(vma, pgd, addr, next, prot);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
@@ -1183,11 +1240,14 @@ pte_t * fastcall get_locked_pte(struct m
  * old drivers should use this, and they needed to mark their
  * pages reserved for the old functions anyway.
  */
-static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *page, pgprot_t prot)
+static int insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot)
 {
 	int retval;
 	pte_t *pte;
-	spinlock_t *ptl;  
+	spinlock_t *ptl;
+	struct mm_struct *mm;
+
+	mm = vma->vm_mm;
 
 	retval = -EINVAL;
 	if (PageAnon(page))
@@ -1204,6 +1264,7 @@ static int insert_page(struct mm_struct 
 	/* Ok, finally just insert the thing.. */
 	get_page(page);
 	inc_mm_counter(mm, file_rss);
+	inc_vma_rss(vma);
 	page_add_file_rmap(page);
 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
 
@@ -1240,7 +1301,7 @@ int vm_insert_page(struct vm_area_struct
 	if (!page_count(page))
 		return -EINVAL;
 	vma->vm_flags |= VM_INSERTPAGE;
-	return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
+	return insert_page(vma, addr, page, vma->vm_page_prot);
 }
 EXPORT_SYMBOL(vm_insert_page);
 
@@ -1449,6 +1510,7 @@ static int do_wp_page(struct mm_struct *
 	struct page *old_page, *new_page;
 	pte_t entry;
 	int ret = VM_FAULT_MINOR;
+	struct page_beancounter *pbc;
 
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page)
@@ -1476,6 +1538,9 @@ static int do_wp_page(struct mm_struct *
 gotten:
 	pte_unmap_unlock(page_table, ptl);
 
+	if (unlikely(pb_alloc(&pbc)))
+		goto oom_nopb;
+
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
 	if (old_page == ZERO_PAGE(address)) {
@@ -1496,12 +1561,16 @@ gotten:
 	if (likely(pte_same(*page_table, orig_pte))) {
 		if (old_page) {
 			page_remove_rmap(old_page);
+			pb_remove_ref(old_page, mm);
 			if (!PageAnon(old_page)) {
 				dec_mm_counter(mm, file_rss);
 				inc_mm_counter(mm, anon_rss);
 			}
-		} else
+		} else {
+			ub_unused_privvm_dec(mm, vma);
 			inc_mm_counter(mm, anon_rss);
+			inc_vma_rss(vma);
+		}
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -1510,6 +1579,7 @@ gotten:
 		lazy_mmu_prot_update(entry);
 		lru_cache_add_active(new_page);
 		page_add_new_anon_rmap(new_page, vma, address);
+		pb_add_ref(new_page, mm, &pbc);
 
 		/* Free the old page.. */
 		new_page = old_page;
@@ -1519,10 +1589,13 @@ gotten:
 		page_cache_release(new_page);
 	if (old_page)
 		page_cache_release(old_page);
+	pb_free(&pbc);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return ret;
 oom:
+	pb_free(&pbc);
+oom_nopb:
 	if (old_page)
 		page_cache_release(old_page);
 	return VM_FAULT_OOM;
@@ -1877,10 +1950,16 @@ static int do_swap_page(struct mm_struct
 	swp_entry_t entry;
 	pte_t pte;
 	int ret = VM_FAULT_MINOR;
+	struct page_beancounter *pbc;
+	cycles_t start;
 
 	if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
-		goto out;
+		goto out_nostat;
+
+	if (unlikely(pb_alloc(&pbc)))
+		return VM_FAULT_OOM;
 
+	start = get_cycles();
 	entry = pte_to_swp_entry(orig_pte);
 again:
 	page = lookup_swap_cache(entry);
@@ -1928,6 +2007,8 @@ again:
 	/* The page isn't present yet, go ahead with the fault. */
 
 	inc_mm_counter(mm, anon_rss);
+	inc_vma_rss(vma);
+	ub_swapin_inc(mm);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if (write_access && can_share_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1937,6 +2018,8 @@ again:
 	flush_icache_page(vma, page);
 	set_pte_at(mm, address, page_table, pte);
 	page_add_anon_rmap(page, vma, address);
+	pb_add_ref(page, mm, &pbc);
+	ub_unused_privvm_dec(mm, vma);
 
 	swap_free(entry);
 	if (vm_swap_full())
@@ -1947,7 +2030,7 @@ again:
 		if (do_wp_page(mm, vma, address,
 				page_table, pmd, ptl, pte) == VM_FAULT_OOM)
 			ret = VM_FAULT_OOM;
-		goto out;
+		goto out_wp;
 	}
 
 	/* No need to invalidate - it was non-present before */
@@ -1955,10 +2038,16 @@ again:
 	lazy_mmu_prot_update(pte);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
-out:
+out_wp:
+	pb_free(&pbc);
+	spin_lock_irq(&kstat_glb_lock);
+	KSTAT_LAT_ADD(&kstat_glob.swap_in, get_cycles() - start);
+	spin_unlock_irq(&kstat_glb_lock);
+out_nostat:
 	return ret;
 out_nomap:
 	pte_unmap_unlock(page_table, ptl);
+	pb_free(&pbc);
 	unlock_page(page);
 	page_cache_release(page);
 	return ret;
@@ -1976,11 +2065,15 @@ static int do_anonymous_page(struct mm_s
 	struct page *page;
 	spinlock_t *ptl;
 	pte_t entry;
+	struct page_beancounter *pbc;
 
 	if (write_access) {
 		/* Allocate our own private page. */
 		pte_unmap(page_table);
 
+		if (unlikely(pb_alloc(&pbc)))
+			goto oom_nopb;
+
 		if (unlikely(anon_vma_prepare(vma)))
 			goto oom;
 		page = alloc_zeroed_user_highpage(vma, address);
@@ -1996,7 +2089,10 @@ static int do_anonymous_page(struct mm_s
 		inc_mm_counter(mm, anon_rss);
 		lru_cache_add_active(page);
 		page_add_new_anon_rmap(page, vma, address);
+		pb_add_ref(page, mm, &pbc);
 	} else {
+		pbc = NULL;
+
 		/* Map the ZERO_PAGE - vm_page_prot is readonly */
 		page = ZERO_PAGE(address);
 		page_cache_get(page);
@@ -2010,18 +2106,23 @@ static int do_anonymous_page(struct mm_s
 		page_add_file_rmap(page);
 	}
 
+	inc_vma_rss(vma);
+	ub_unused_privvm_dec(mm, vma);
 	set_pte_at(mm, address, page_table, entry);
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, entry);
 	lazy_mmu_prot_update(entry);
 unlock:
+	pb_free(&pbc);
 	pte_unmap_unlock(page_table, ptl);
 	return VM_FAULT_MINOR;
 release:
 	page_cache_release(page);
 	goto unlock;
 oom:
+	pb_free(&pbc);
+oom_nopb:
 	return VM_FAULT_OOM;
 }
 
@@ -2049,6 +2150,7 @@ static int do_no_page(struct mm_struct *
 	unsigned int sequence = 0;
 	int ret = VM_FAULT_MINOR;
 	int anon = 0;
+	struct page_beancounter *pbc;
 
 	pte_unmap(page_table);
 	BUG_ON(vma->vm_flags & VM_PFNMAP);
@@ -2058,6 +2160,9 @@ static int do_no_page(struct mm_struct *
 		sequence = mapping->truncate_count;
 		smp_rmb(); /* serializes i_size against truncate_count */
 	}
+
+	if (unlikely(pb_alloc(&pbc)))
+		goto oom_nopb;
 retry:
 	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
 	/*
@@ -2070,9 +2175,9 @@ retry:
 
 	/* no page was available -- either SIGBUS or OOM */
 	if (new_page == NOPAGE_SIGBUS)
-		return VM_FAULT_SIGBUS;
+		goto bus_nopg;
 	if (new_page == NOPAGE_OOM)
-		return VM_FAULT_OOM;
+		goto oom_nopg;
 
 	/*
 	 * Should we do an early C-O-W break?
@@ -2131,6 +2236,9 @@ retry:
 			inc_mm_counter(mm, file_rss);
 			page_add_file_rmap(new_page);
 		}
+		inc_vma_rss(vma);
+		pb_add_ref(new_page, mm, &pbc);
+		ub_unused_privvm_dec(mm, vma);
 	} else {
 		/* One of our sibling threads was faster, back out. */
 		page_cache_release(new_page);
@@ -2142,10 +2250,18 @@ retry:
 	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
+	pb_free(&pbc);
 	return ret;
 oom:
 	page_cache_release(new_page);
+oom_nopg:
+	pb_free(&pbc);
+oom_nopb:
 	return VM_FAULT_OOM;
+
+bus_nopg:
+	pb_free(&pbc);
+	return VM_FAULT_SIGBUS;
 }
 
 /*
@@ -2314,6 +2430,8 @@ int __pud_alloc(struct mm_struct *mm, pg
 }
 #endif /* __PAGETABLE_PUD_FOLDED */
 
+EXPORT_SYMBOL_GPL(__pud_alloc);
+
 #ifndef __PAGETABLE_PMD_FOLDED
 /*
  * Allocate page middle directory.
@@ -2348,6 +2466,8 @@ int __pmd_alloc(struct mm_struct *mm, pu
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
+EXPORT_SYMBOL_GPL(__pmd_alloc);
+
 int make_pages_present(unsigned long addr, unsigned long end)
 {
 	int ret, len, write;
diff -uprN linux-2.6.16/mm/mempolicy.c linux-2.6.16.ovz/mm/mempolicy.c
--- linux-2.6.16/mm/mempolicy.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/mempolicy.c	2006-07-05 08:34:56.000000000 -0400
@@ -933,7 +933,7 @@ asmlinkage long sys_migrate_pages(pid_t 
 
 	/* Find the mm_struct */
 	read_lock(&tasklist_lock);
-	task = pid ? find_task_by_pid(pid) : current;
+	task = pid ? find_task_by_pid_ve(pid) : current;
 	if (!task) {
 		read_unlock(&tasklist_lock);
 		return -ESRCH;
@@ -1796,7 +1796,6 @@ static void gather_stats(struct page *pa
 		md->mapcount_max = count;
 
 	md->node[page_to_nid(page)]++;
-	cond_resched();
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
diff -uprN linux-2.6.16/mm/mempool.c linux-2.6.16.ovz/mm/mempool.c
--- linux-2.6.16/mm/mempool.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/mempool.c	2006-07-05 08:34:56.000000000 -0400
@@ -14,6 +14,7 @@
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
+#include <linux/kmem_cache.h>
 
 static void add_element(mempool_t *pool, void *element)
 {
@@ -78,6 +79,8 @@ mempool_t *mempool_create_node(int min_n
 	init_waitqueue_head(&pool->wait);
 	pool->alloc = alloc_fn;
 	pool->free = free_fn;
+	if (alloc_fn == mempool_alloc_slab)
+		kmem_mark_nocharge((kmem_cache_t *)pool_data);
 
 	/*
 	 * First pre-allocate the guaranteed number of buffers.
@@ -119,6 +122,7 @@ int mempool_resize(mempool_t *pool, int 
 	unsigned long flags;
 
 	BUG_ON(new_min_nr <= 0);
+	gfp_mask &= ~__GFP_UBC;
 
 	spin_lock_irqsave(&pool->lock, flags);
 	if (new_min_nr <= pool->min_nr) {
@@ -212,6 +216,7 @@ void * mempool_alloc(mempool_t *pool, gf
 	gfp_mask |= __GFP_NOMEMALLOC;	/* don't allocate emergency reserves */
 	gfp_mask |= __GFP_NORETRY;	/* don't loop in __alloc_pages */
 	gfp_mask |= __GFP_NOWARN;	/* failures are OK */
+	gfp_mask &= ~__GFP_UBC;
 
 	gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO);
 
diff -uprN linux-2.6.16/mm/mlock.c linux-2.6.16.ovz/mm/mlock.c
--- linux-2.6.16/mm/mlock.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/mlock.c	2006-07-05 08:34:56.000000000 -0400
@@ -8,9 +8,11 @@
 #include <linux/capability.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/mempolicy.h>
 #include <linux/syscalls.h>
 
+#include <ub/ub_vmpages.h>
 
 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	unsigned long start, unsigned long end, unsigned int newflags)
@@ -25,6 +27,14 @@ static int mlock_fixup(struct vm_area_st
 		goto out;
 	}
 
+	if (newflags & VM_LOCKED) {
+		ret = ub_locked_charge(mm, end - start);
+		if (ret < 0) {
+			*prev = vma;
+			goto out;
+		}
+	}
+
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
 			  vma->vm_file, pgoff, vma_policy(vma));
@@ -38,13 +48,13 @@ static int mlock_fixup(struct vm_area_st
 	if (start != vma->vm_start) {
 		ret = split_vma(mm, vma, start, 1);
 		if (ret)
-			goto out;
+			goto out_uncharge;
 	}
 
 	if (end != vma->vm_end) {
 		ret = split_vma(mm, vma, end, 0);
 		if (ret)
-			goto out;
+			goto out_uncharge;
 	}
 
 success:
@@ -63,13 +73,19 @@ success:
 		pages = -pages;
 		if (!(newflags & VM_IO))
 			ret = make_pages_present(start, end);
-	}
+	} else
+		ub_locked_uncharge(mm, end - start);
 
 	vma->vm_mm->locked_vm -= pages;
 out:
 	if (ret == -ENOMEM)
 		ret = -EAGAIN;
 	return ret;
+
+out_uncharge:
+	if (newflags & VM_LOCKED)
+		ub_locked_uncharge(mm, end - start);
+	goto out;
 }
 
 static int do_mlock(unsigned long start, size_t len, int on)
@@ -146,6 +162,7 @@ asmlinkage long sys_mlock(unsigned long 
 	up_write(&current->mm->mmap_sem);
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_mlock);
 
 asmlinkage long sys_munlock(unsigned long start, size_t len)
 {
@@ -158,6 +175,7 @@ asmlinkage long sys_munlock(unsigned lon
 	up_write(&current->mm->mmap_sem);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(sys_munlock);
 
 static int do_mlockall(int flags)
 {
diff -uprN linux-2.6.16/mm/mmap.c linux-2.6.16.ovz/mm/mmap.c
--- linux-2.6.16/mm/mmap.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/mmap.c	2006-07-05 08:34:56.000000000 -0400
@@ -25,14 +25,18 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
+#include <linux/virtinfo.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/tlb.h>
 
+#include <ub/ub_vmpages.h>
+
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end);
+static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft);
 
 /*
  * WARNING: the debugging will use recursive algorithms so never enable this
@@ -87,6 +91,16 @@ int __vm_enough_memory(long pages, int c
 
 	vm_acct_memory(pages);
 
+	switch (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_ENOUGHMEM,
+				(void *)pages)
+			& (NOTIFY_OK | NOTIFY_FAIL)) {
+		case NOTIFY_OK:
+			return 0;
+		case NOTIFY_FAIL:
+			vm_unacct_memory(pages);
+			return -ENOMEM;
+	}
+
 	/*
 	 * Sometimes we want to use more memory than we have
 	 */
@@ -201,11 +215,16 @@ static struct vm_area_struct *remove_vma
 	struct vm_area_struct *next = vma->vm_next;
 
 	might_sleep();
+
+	ub_memory_uncharge(vma->vm_mm, vma->vm_end - vma->vm_start,
+			vma->vm_flags, vma->vm_file);
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	mpol_free(vma_policy(vma));
+	if (get_vma_rss(vma))
+		warn_bad_rss(vma, 0);
 	kmem_cache_free(vm_area_cachep, vma);
 	return next;
 }
@@ -242,7 +261,7 @@ asmlinkage unsigned long sys_brk(unsigne
 		goto out;
 
 	/* Ok, looks good - let it rip. */
-	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+	if (__do_brk(oldbrk, newbrk-oldbrk, UB_HARD) != oldbrk)
 		goto out;
 set_brk:
 	mm->brk = brk;
@@ -726,7 +745,7 @@ struct vm_area_struct *vma_merge(struct 
 	else
 		next = mm->mmap;
 	area = next;
-	if (next && next->vm_end == end)		/* cases 6, 7, 8 */
+	if (next && next->vm_end == end) 		/* cases 6, 7, 8 */
 		next = next->vm_next;
 
 	/*
@@ -746,11 +765,22 @@ struct vm_area_struct *vma_merge(struct 
 				is_mergeable_anon_vma(prev->anon_vma,
 						      next->anon_vma)) {
 							/* cases 1, 6 */
+			add_vma_rss(prev, get_vma_rss(next));
+			if (area != next) /* case 6 */
+				add_vma_rss(prev, get_vma_rss(area));
 			vma_adjust(prev, prev->vm_start,
 				next->vm_end, prev->vm_pgoff, NULL);
-		} else					/* cases 2, 5, 7 */
+		} else {				/* cases 2, 5, 7 */
+			if (next && addr == next->vm_start) { /* case 5 */
+				unsigned long rss;
+				rss = pages_in_vma_range(next, addr, end);
+				sub_vma_rss(next, rss);
+				add_vma_rss(prev, rss);
+			} else if (area != next) /* case 7 */
+				add_vma_rss(prev, get_vma_rss(area));
 			vma_adjust(prev, prev->vm_start,
 				end, prev->vm_pgoff, NULL);
+		}
 		return prev;
 	}
 
@@ -761,12 +791,19 @@ struct vm_area_struct *vma_merge(struct 
  			mpol_equal(policy, vma_policy(next)) &&
 			can_vma_merge_before(next, vm_flags,
 					anon_vma, file, pgoff+pglen)) {
-		if (prev && addr < prev->vm_end)	/* case 4 */
+		if (prev && addr < prev->vm_end) {	/* case 4 */
+			unsigned long rss;
+			rss = pages_in_vma_range(prev, addr, end);
+			sub_vma_rss(prev, rss);
+			add_vma_rss(next, rss);
 			vma_adjust(prev, prev->vm_start,
 				addr, prev->vm_pgoff, NULL);
-		else					/* cases 3, 8 */
+		} else {				/* cases 3, 8 */
+			if (area != next) /* case 8 */
+				add_vma_rss(area, get_vma_rss(next));
 			vma_adjust(area, addr, next->vm_end,
 				next->vm_pgoff - pglen, NULL);
+		}
 		return area;
 	}
 
@@ -1033,6 +1070,10 @@ munmap_back:
 		}
 	}
 
+	if (ub_memory_charge(mm, len, vm_flags, file,
+				(flags & MAP_EXECPRIO ? UB_SOFT : UB_HARD)))
+		goto charge_error;
+
 	/*
 	 * Can we just expand an old private anonymous mapping?
 	 * The VM_SHARED test is necessary because shmem_zero_setup
@@ -1048,7 +1089,8 @@ munmap_back:
 	 * specific mapper. the address has already been validated, but
 	 * not unmapped, but the maps are removed from the list.
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
+			(flags & MAP_EXECPRIO ? __GFP_SOFT_UBC : 0));
 	if (!vma) {
 		error = -ENOMEM;
 		goto unacct_error;
@@ -1107,6 +1149,19 @@ munmap_back:
 		if (correct_wcount)
 			atomic_inc(&inode->i_writecount);
 	} else {
+		unsigned long rss;
+
+		rss = get_vma_rss(vma);
+		if (rss > 0) {
+			if (prev->vm_next && prev->vm_next->vm_start == addr)
+				/* vma_merge expanded next vm_area */
+				add_vma_rss(prev->vm_next, rss);
+			else
+				/* vma_merge expanded prev vm_area
+				 * and probably splitted it with next
+				 */
+				add_vma_rss(prev, rss);
+		}
 		if (file) {
 			if (correct_wcount)
 				atomic_inc(&inode->i_writecount);
@@ -1142,6 +1197,8 @@ unmap_and_free_vma:
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
+	ub_memory_uncharge(mm, len, vm_flags, file);
+charge_error:
 	if (charged)
 		vm_unacct_memory(charged);
 	return error;
@@ -1471,12 +1528,16 @@ static int acct_stack_growth(struct vm_a
 			return -ENOMEM;
 	}
 
+	if (ub_memory_charge(mm, grow << PAGE_SHIFT, vma->vm_flags,
+				vma->vm_file, UB_SOFT))
+		goto fail_charge;
+
 	/*
 	 * Overcommit..  This must be the final test, as it will
 	 * update security statistics.
 	 */
 	if (security_vm_enough_memory(grow))
-		return -ENOMEM;
+		goto fail_sec;
 
 	/* Ok, everything looks good - let it rip */
 	mm->total_vm += grow;
@@ -1484,6 +1545,11 @@ static int acct_stack_growth(struct vm_a
 		mm->locked_vm += grow;
 	vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
 	return 0;
+
+fail_sec:
+	ub_memory_uncharge(mm, grow << PAGE_SHIFT, vma->vm_flags, vma->vm_file);
+fail_charge:
+	return -ENOMEM;
 }
 
 #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
@@ -1744,8 +1810,13 @@ int split_vma(struct mm_struct * mm, str
 	else
 		vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
 
+	/* protected with mmap sem */
+	set_vma_rss(vma, pages_in_vma(vma));
+	set_vma_rss(new, pages_in_vma(new));
+
 	return 0;
 }
+EXPORT_SYMBOL_GPL(split_vma);
 
 /* Munmap is split into 2 main parts -- this part which finds
  * what needs doing, and the areas themselves, which do the
@@ -1839,7 +1910,7 @@ static inline void verify_mm_writelocked
  *  anonymous maps.  eventually we may be able to do some
  *  brk-specific accounting here.
  */
-unsigned long do_brk(unsigned long addr, unsigned long len)
+static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft)
 {
 	struct mm_struct * mm = current->mm;
 	struct vm_area_struct * vma, * prev;
@@ -1891,11 +1962,14 @@ unsigned long do_brk(unsigned long addr,
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
-	if (security_vm_enough_memory(len >> PAGE_SHIFT))
-		return -ENOMEM;
-
 	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
 
+	if (ub_memory_charge(mm, len, flags, NULL, soft))
+		goto fail_charge;
+
+	if (security_vm_enough_memory(len >> PAGE_SHIFT))
+		goto fail_sec;
+
 	/* Can we just expand an old private anonymous mapping? */
 	if (vma_merge(mm, prev, addr, addr + len, flags,
 					NULL, NULL, pgoff, NULL))
@@ -1904,11 +1978,11 @@ unsigned long do_brk(unsigned long addr,
 	/*
 	 * create a vma struct for an anonymous mapping
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-	if (!vma) {
-		vm_unacct_memory(len >> PAGE_SHIFT);
-		return -ENOMEM;
-	}
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
+			(soft == UB_SOFT ? __GFP_SOFT_UBC : 0));
+	if (!vma)
+		goto fail_alloc;
+
 	memset(vma, 0, sizeof(*vma));
 
 	vma->vm_mm = mm;
@@ -1925,8 +1999,19 @@ out:
 		make_pages_present(addr, addr + len);
 	}
 	return addr;
+
+fail_alloc:
+	vm_unacct_memory(len >> PAGE_SHIFT);
+fail_sec:
+	ub_memory_uncharge(mm, len, flags, NULL);
+fail_charge:
+	return -ENOMEM;
 }
 
+unsigned long do_brk(unsigned long addr, unsigned long len)
+{
+	return __do_brk(addr, len, UB_SOFT);
+}
 EXPORT_SYMBOL(do_brk);
 
 /* Release all mmaps. */
@@ -2036,6 +2121,7 @@ struct vm_area_struct *copy_vma(struct v
 			new_vma->vm_start = addr;
 			new_vma->vm_end = addr + len;
 			new_vma->vm_pgoff = pgoff;
+			set_vma_rss(new_vma, 0);
 			if (new_vma->vm_file)
 				get_file(new_vma->vm_file);
 			if (new_vma->vm_ops && new_vma->vm_ops->open)
diff -uprN linux-2.6.16/mm/mprotect.c linux-2.6.16.ovz/mm/mprotect.c
--- linux-2.6.16/mm/mprotect.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/mprotect.c	2006-07-05 08:34:56.000000000 -0400
@@ -9,6 +9,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
 #include <linux/shm.h>
@@ -25,6 +26,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot)
 {
@@ -109,12 +112,20 @@ mprotect_fixup(struct vm_area_struct *vm
 	pgprot_t newprot;
 	pgoff_t pgoff;
 	int error;
+	unsigned long ch_size;
+	int ch_dir;
 
 	if (newflags == oldflags) {
 		*pprev = vma;
 		return 0;
 	}
 
+	error = -ENOMEM;
+	ch_size = nrpages - pages_in_vma_range(vma, start, end);
+	ch_dir = ub_protected_charge(mm, ch_size, newflags, vma);
+	if (ch_dir == PRIVVM_ERROR)
+		goto fail_ch;
+
 	/*
 	 * If we make a private mapping writable we increase our commit;
 	 * but (without finer accounting) cannot reduce our commit if we
@@ -127,7 +138,7 @@ mprotect_fixup(struct vm_area_struct *vm
 		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
 			charged = nrpages;
 			if (security_vm_enough_memory(charged))
-				return -ENOMEM;
+				goto fail_sec;
 			newflags |= VM_ACCOUNT;
 		}
 	}
@@ -169,10 +180,16 @@ success:
 	change_protection(vma, start, end, newprot);
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
+	if (ch_dir == PRIVVM_TO_SHARED)
+		__ub_unused_privvm_dec(mm, ch_size);
 	return 0;
 
 fail:
 	vm_unacct_memory(charged);
+fail_sec:
+	if (ch_dir == PRIVVM_TO_PRIVATE)
+		__ub_unused_privvm_dec(mm, ch_size);
+fail_ch:
 	return error;
 }
 
@@ -280,3 +297,4 @@ out:
 	up_write(&current->mm->mmap_sem);
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_mprotect);
diff -uprN linux-2.6.16/mm/mremap.c linux-2.6.16.ovz/mm/mremap.c
--- linux-2.6.16/mm/mremap.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/mremap.c	2006-07-05 08:34:56.000000000 -0400
@@ -23,6 +23,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
@@ -106,6 +108,8 @@ static void move_ptes(struct vm_area_str
 		pte = ptep_clear_flush(vma, old_addr, old_pte);
 		/* ZERO_PAGE can be dependant on virtual addr */
 		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
+		dec_vma_rss(vma);
+		inc_vma_rss(new_vma);
 		set_pte_at(mm, new_addr, new_pte, pte);
 	}
 
@@ -166,17 +170,21 @@ static unsigned long move_vma(struct vm_
 	unsigned long hiwater_vm;
 	int split = 0;
 
+	if (ub_memory_charge(mm, new_len, vm_flags,
+				vma->vm_file, UB_HARD))
+		goto err;
+
 	/*
 	 * We'd prefer to avoid failure later on in do_munmap:
 	 * which may split one vma into three before unmapping.
 	 */
 	if (mm->map_count >= sysctl_max_map_count - 3)
-		return -ENOMEM;
+		goto err_nomem;
 
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
 	if (!new_vma)
-		return -ENOMEM;
+		goto err_nomem;
 
 	moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
 	if (moved_len < old_len) {
@@ -235,7 +243,13 @@ static unsigned long move_vma(struct vm_
 					   new_addr + new_len);
 	}
 
-	return new_addr;
+	if (new_addr != -ENOMEM)
+		return new_addr;
+
+err_nomem:
+	ub_memory_uncharge(mm, new_len, vm_flags, vma->vm_file);
+err:
+	return -ENOMEM;
 }
 
 /*
@@ -359,7 +373,15 @@ unsigned long do_mremap(unsigned long ad
 			max_addr = vma->vm_next->vm_start;
 		/* can we just expand the current mapping? */
 		if (max_addr - addr >= new_len) {
-			int pages = (new_len - old_len) >> PAGE_SHIFT;
+			int len;
+			int pages;
+
+			len = new_len - old_len;
+			pages = len >> PAGE_SHIFT;
+			ret = -ENOMEM;
+			if (ub_memory_charge(mm, len, vma->vm_flags,
+						vma->vm_file, UB_HARD))
+				goto out;
 
 			vma_adjust(vma, vma->vm_start,
 				addr + new_len, vma->vm_pgoff, NULL);
diff -uprN linux-2.6.16/mm/oom_kill.c linux-2.6.16.ovz/mm/oom_kill.c
--- linux-2.6.16/mm/oom_kill.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/oom_kill.c	2006-07-05 08:34:56.000000000 -0400
@@ -176,7 +176,7 @@ static struct task_struct *select_bad_pr
 	*ppoints = 0;
 
 	do_posix_clock_monotonic_gettime(&uptime);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		unsigned long points;
 		int releasing;
 
@@ -205,7 +205,7 @@ static struct task_struct *select_bad_pr
 			chosen = p;
 			*ppoints = points;
 		}
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	return chosen;
 }
 
@@ -261,10 +261,10 @@ static struct mm_struct *oom_kill_task(t
 	 * kill all processes that share the ->mm (i.e. all threads),
 	 * but are in a different thread group
 	 */
-	do_each_thread(g, q)
+	do_each_thread_all(g, q) {
 		if (q->mm == mm && q->tgid != p->tgid)
 			__oom_kill_task(q, message);
-	while_each_thread(g, q);
+	} while_each_thread_all(g, q);
 
 	return mm;
 }
diff -uprN linux-2.6.16/mm/page_alloc.c linux-2.6.16.ovz/mm/page_alloc.c
--- linux-2.6.16/mm/page_alloc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/page_alloc.c	2006-07-05 08:34:56.000000000 -0400
@@ -41,6 +41,8 @@
 #include <asm/tlbflush.h>
 #include "internal.h"
 
+#include <ub/ub_mem.h>
+
 /*
  * MCD - HACK: Find somewhere to initialize this EARLY, or make this
  * initializer cleaner
@@ -50,6 +52,7 @@ EXPORT_SYMBOL(node_online_map);
 nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
 EXPORT_SYMBOL(node_possible_map);
 struct pglist_data *pgdat_list __read_mostly;
+EXPORT_SYMBOL(pgdat_list);
 unsigned long totalram_pages __read_mostly;
 unsigned long totalhigh_pages __read_mostly;
 long nr_swap_pages;
@@ -153,7 +156,8 @@ static void bad_page(struct page *page)
 			1 << PG_reclaim |
 			1 << PG_slab    |
 			1 << PG_swapcache |
-			1 << PG_writeback );
+			1 << PG_writeback |
+			1 << PG_buddy );
 	set_page_count(page, 0);
 	reset_page_mapcount(page);
 	page->mapping = NULL;
@@ -224,12 +228,12 @@ static inline unsigned long page_order(s
 
 static inline void set_page_order(struct page *page, int order) {
 	set_page_private(page, order);
-	__SetPagePrivate(page);
+	__SetPageBuddy(page);
 }
 
 static inline void rmv_page_order(struct page *page)
 {
-	__ClearPagePrivate(page);
+	__ClearPageBuddy(page);
 	set_page_private(page, 0);
 }
 
@@ -268,11 +272,13 @@ __find_combined_index(unsigned long page
  * This function checks whether a page is free && is the buddy
  * we can do coalesce a page and its buddy if
  * (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
- * (c) the buddy is on the buddy system &&
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order.
  *
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
+ *
+ * For recording page's order, we use page_private(page).
  */
 static inline int page_is_buddy(struct page *page, int order)
 {
@@ -281,10 +287,10 @@ static inline int page_is_buddy(struct p
 		return 0;
 #endif
 
-       if (PagePrivate(page)           &&
-           (page_order(page) == order) &&
-            page_count(page) == 0)
+	if (PageBuddy(page) && page_order(page) == order) {
+		BUG_ON(page_count(page) != 0);
                return 1;
+	}
        return 0;
 }
 
@@ -301,7 +307,7 @@ static inline int page_is_buddy(struct p
  * as necessary, plus some accounting needed to play nicely with other
  * parts of the VM system.
  * At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
  * order is recorded in page_private(page) field.
  * So when we are allocating or freeing one, we can derive the state of the
  * other.  That is, if we allocate a small block, and both were   
@@ -364,7 +370,8 @@ static inline int free_pages_check(struc
 			1 << PG_slab	|
 			1 << PG_swapcache |
 			1 << PG_writeback |
-			1 << PG_reserved ))))
+			1 << PG_reserved |
+			1 << PG_buddy ))))
 		bad_page(page);
 	if (PageDirty(page))
 		__ClearPageDirty(page);
@@ -434,6 +441,7 @@ static void __free_pages_ok(struct page 
 		return;
 
 	kernel_map_pages(page, 1 << order, 0);
+	ub_page_uncharge(page, order);
 	local_irq_save(flags);
 	__mod_page_state(pgfree, 1 << order);
 	free_one_page(page_zone(page), page, order);
@@ -522,7 +530,8 @@ static int prep_new_page(struct page *pa
 			1 << PG_slab    |
 			1 << PG_swapcache |
 			1 << PG_writeback |
-			1 << PG_reserved ))))
+			1 << PG_reserved |
+			1 << PG_buddy ))))
 		bad_page(page);
 
 	/*
@@ -721,6 +730,7 @@ static void fastcall free_hot_cold_page(
 	kernel_map_pages(page, 1, 0);
 
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+	ub_page_uncharge(page, 0);
 	local_irq_save(flags);
 	__inc_page_state(pgfree);
 	list_add(&page->lru, &pcp->list);
@@ -894,6 +904,28 @@ get_page_from_freelist(gfp_t gfp_mask, u
 	return page;
 }
 
+static void __alloc_collect_stats(unsigned int gfp_mask,
+		unsigned int order, struct page *page, cycles_t time)
+{
+	int ind;
+	unsigned long flags;
+
+	time = get_cycles() - time;
+	if (!(gfp_mask & __GFP_WAIT))
+		ind = 0;
+	else if (!(gfp_mask & __GFP_HIGHMEM))
+		ind = (order > 0 ? 2 : 1);
+	else
+		ind = (order > 0 ? 4 : 3);
+	spin_lock_irqsave(&kstat_glb_lock, flags);
+	KSTAT_LAT_ADD(&kstat_glob.alloc_lat[ind], time);
+	if (!page)
+		kstat_glob.alloc_fails[ind]++;
+	spin_unlock_irqrestore(&kstat_glb_lock, flags);
+}
+
+int alloc_fail_warn;
+
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
@@ -909,6 +941,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned i
 	int do_retry;
 	int alloc_flags;
 	int did_some_progress;
+	cycles_t start;
 
 	might_sleep_if(wait);
 
@@ -920,6 +953,7 @@ restart:
 		return NULL;
 	}
 
+	start = get_cycles();
 	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
 				zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
 	if (page)
@@ -944,7 +978,8 @@ restart:
 		alloc_flags |= ALLOC_HARDER;
 	if (gfp_mask & __GFP_HIGH)
 		alloc_flags |= ALLOC_HIGH;
-	alloc_flags |= ALLOC_CPUSET;
+	if (wait)
+		alloc_flags |= ALLOC_CPUSET;
 
 	/*
 	 * Go through the zonelist again. Let __GFP_HIGH and allocations
@@ -1038,14 +1073,22 @@ rebalance:
 	}
 
 nopage:
-	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
+	__alloc_collect_stats(gfp_mask, order, page, start);
+	if (alloc_fail_warn && !(gfp_mask & __GFP_NOWARN) && 
+			printk_ratelimit()) {
 		printk(KERN_WARNING "%s: page allocation failure."
 			" order:%d, mode:0x%x\n",
 			p->comm, order, gfp_mask);
 		dump_stack();
 		show_mem();
 	}
+	return NULL;
+
 got_pg:
+	if (ub_page_charge(page, order, gfp_mask)) {
+		__free_pages(page, order);
+		page = NULL;
+	}
 	return page;
 }
 
@@ -2378,7 +2421,10 @@ static void *vmstat_start(struct seq_fil
 	m->private = ps;
 	if (!ps)
 		return ERR_PTR(-ENOMEM);
-	get_full_page_state(ps);
+	if (ve_is_super(get_exec_env()))
+		get_full_page_state(ps);
+	else
+		memset(ps, 0, sizeof(*ps));
 	ps->pgpgin /= 2;		/* sectors -> kbytes */
 	ps->pgpgout /= 2;
 	return (unsigned long *)ps + *pos;
diff -uprN linux-2.6.16/mm/rmap.c linux-2.6.16.ovz/mm/rmap.c
--- linux-2.6.16/mm/rmap.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/rmap.c	2006-07-05 08:34:56.000000000 -0400
@@ -56,6 +56,8 @@
 
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 //#define RMAP_DEBUG /* can be enabled only for debugging */
 
 kmem_cache_t *anon_vma_cachep;
@@ -117,6 +119,7 @@ int anon_vma_prepare(struct vm_area_stru
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(anon_vma_prepare);
 
 void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
 {
@@ -145,6 +148,7 @@ void anon_vma_link(struct vm_area_struct
 		spin_unlock(&anon_vma->lock);
 	}
 }
+EXPORT_SYMBOL_GPL(anon_vma_link);
 
 void anon_vma_unlink(struct vm_area_struct *vma)
 {
@@ -180,14 +184,15 @@ static void anon_vma_ctor(void *data, km
 void __init anon_vma_init(void)
 {
 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
-			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor, NULL);
+			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_UBC,
+			anon_vma_ctor, NULL);
 }
 
 /*
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma = NULL;
 	unsigned long anon_mapping;
@@ -205,6 +210,7 @@ out:
 	rcu_read_unlock();
 	return anon_vma;
 }
+EXPORT_SYMBOL_GPL(page_lock_anon_vma);
 
 #ifdef CONFIG_MIGRATION
 /*
@@ -220,6 +226,7 @@ void remove_from_swap(struct page *page)
 	struct anon_vma *anon_vma;
 	struct vm_area_struct *vma;
 	unsigned long mapping;
+	struct page_beancounter *pb;
 
 	if (!PageSwapCache(page))
 		return;
@@ -229,6 +236,10 @@ void remove_from_swap(struct page *page)
 	if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
 		return;
 
+	pb = NULL;
+	if (pb_alloc_all(&pb))
+		return;
+
 	/*
 	 * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
 	 */
@@ -236,10 +247,12 @@ void remove_from_swap(struct page *page)
 	spin_lock(&anon_vma->lock);
 
 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
-		remove_vma_swap(vma, page);
+		remove_vma_swap(vma, page, &pb);
 
 	spin_unlock(&anon_vma->lock);
 	delete_from_swap_cache(page);
+
+	pb_free_list(&pb);
 }
 EXPORT_SYMBOL(remove_from_swap);
 #endif
@@ -638,7 +651,11 @@ static int try_to_unmap_one(struct page 
 	} else
 		dec_mm_counter(mm, file_rss);
 
+	dec_vma_rss(vma);
 	page_remove_rmap(page);
+	ub_unused_privvm_inc(mm, vma);
+	ub_unmap_inc(mm);
+	pb_remove_ref(page, mm);
 	page_cache_release(page);
 
 out_unmap:
@@ -729,8 +746,12 @@ static void try_to_unmap_cluster(unsigne
 			set_page_dirty(page);
 
 		page_remove_rmap(page);
+		ub_unmap_inc(mm);
+		pb_remove_ref(page, mm);
+		ub_unused_privvm_inc(mm, vma);
 		page_cache_release(page);
 		dec_mm_counter(mm, file_rss);
+		dec_vma_rss(vma);
 		(*mapcount)--;
 	}
 	pte_unmap_unlock(pte - 1, ptl);
diff -uprN linux-2.6.16/mm/shmem.c linux-2.6.16.ovz/mm/shmem.c
--- linux-2.6.16/mm/shmem.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/shmem.c	2006-07-05 08:34:56.000000000 -0400
@@ -50,6 +50,8 @@
 #include <asm/div64.h>
 #include <asm/pgtable.h>
 
+#include <ub/ub_vmpages.h>
+
 /* This magic number is used in glibc for posix shared memory */
 #define TMPFS_MAGIC	0x01021994
 
@@ -211,7 +213,7 @@ static void shmem_free_blocks(struct ino
  *
  * It has to be called with the spinlock held.
  */
-static void shmem_recalc_inode(struct inode *inode)
+static void shmem_recalc_inode(struct inode *inode, long swp_freed)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	long freed;
@@ -221,6 +223,8 @@ static void shmem_recalc_inode(struct in
 		info->alloced -= freed;
 		shmem_unacct_blocks(info->flags, freed);
 		shmem_free_blocks(inode, freed);
+		if (freed > swp_freed)
+			ub_tmpfs_respages_sub(info, freed - swp_freed);
 	}
 }
 
@@ -326,6 +330,11 @@ static void shmem_swp_set(struct shmem_i
 		struct page *page = kmap_atomic_to_page(entry);
 		set_page_private(page, page_private(page) + incdec);
 	}
+
+	if (incdec == 1)
+		ub_tmpfs_respages_dec(info);
+	else
+		ub_tmpfs_respages_inc(info);
 }
 
 /*
@@ -342,14 +351,24 @@ static swp_entry_t *shmem_swp_alloc(stru
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	struct page *page = NULL;
 	swp_entry_t *entry;
+	unsigned long ub_val;
 
 	if (sgp != SGP_WRITE &&
 	    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
 		return ERR_PTR(-EINVAL);
 
+	ub_val = 0;
+	if (info->next_index <= index) {
+		ub_val = index + 1 - info->next_index;
+		if (ub_shmpages_charge(info, ub_val))
+			return ERR_PTR(-ENOSPC);
+	}
+
 	while (!(entry = shmem_swp_entry(info, index, &page))) {
-		if (sgp == SGP_READ)
-			return shmem_swp_map(ZERO_PAGE(0));
+		if (sgp == SGP_READ) {
+			entry = shmem_swp_map(ZERO_PAGE(0));
+			goto out;
+		}
 		/*
 		 * Test free_blocks against 1 not 0, since we have 1 data
 		 * page (and perhaps indirect index pages) yet to allocate:
@@ -359,7 +378,8 @@ static swp_entry_t *shmem_swp_alloc(stru
 			spin_lock(&sbinfo->stat_lock);
 			if (sbinfo->free_blocks <= 1) {
 				spin_unlock(&sbinfo->stat_lock);
-				return ERR_PTR(-ENOSPC);
+				entry = ERR_PTR(-ENOSPC);
+				goto out;
 			}
 			sbinfo->free_blocks--;
 			inode->i_blocks += BLOCKS_PER_PAGE;
@@ -367,31 +387,43 @@ static swp_entry_t *shmem_swp_alloc(stru
 		}
 
 		spin_unlock(&info->lock);
-		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
+		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) |
+				__GFP_ZERO | __GFP_UBC);
 		if (page)
 			set_page_private(page, 0);
 		spin_lock(&info->lock);
 
 		if (!page) {
-			shmem_free_blocks(inode, 1);
-			return ERR_PTR(-ENOMEM);
+			entry = ERR_PTR(-ENOMEM);
+			goto out_block;
 		}
 		if (sgp != SGP_WRITE &&
 		    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
 			entry = ERR_PTR(-EINVAL);
-			break;
+			goto out_dir;
 		}
-		if (info->next_index <= index)
+		if (info->next_index <= index) {
+			ub_val = 0;
 			info->next_index = index + 1;
+		}
 	}
 	if (page) {
 		/* another task gave its page, or truncated the file */
 		shmem_free_blocks(inode, 1);
 		shmem_dir_free(page);
 	}
-	if (info->next_index <= index && !IS_ERR(entry))
+	if (info->next_index <= index)
 		info->next_index = index + 1;
 	return entry;
+
+out_dir:
+	shmem_dir_free(page);
+out_block:
+	shmem_free_blocks(inode, 1);
+out:
+	if (ub_val)
+		ub_shmpages_uncharge(info, ub_val);
+	return entry;
 }
 
 /*
@@ -484,6 +516,7 @@ static void shmem_truncate_range(struct 
 		return;
 
 	spin_lock(&info->lock);
+	ub_shmpages_uncharge(info, info->next_index - idx);
 	info->flags |= SHMEM_TRUNCATE;
 	if (likely(end == (loff_t) -1)) {
 		limit = info->next_index;
@@ -613,7 +646,7 @@ done2:
 	info->swapped -= nr_swaps_freed;
 	if (nr_pages_to_free)
 		shmem_free_blocks(inode, nr_pages_to_free);
-	shmem_recalc_inode(inode);
+	shmem_recalc_inode(inode, nr_swaps_freed);
 	spin_unlock(&info->lock);
 
 	/*
@@ -696,6 +729,7 @@ static void shmem_delete_inode(struct in
 		sbinfo->free_inodes++;
 		spin_unlock(&sbinfo->stat_lock);
 	}
+	shmi_ub_put(info);
 	clear_inode(inode);
 }
 
@@ -817,6 +851,12 @@ int shmem_unuse(swp_entry_t entry, struc
 	return found;
 }
 
+#ifdef CONFIG_USER_RESOURCE
+#define shm_get_swap_page(info)	(get_swap_page((info)->shmi_ub))
+#else
+#define shm_get_swap_page(info)	(get_swap_page(NULL))
+#endif
+
 /*
  * Move the page from the page cache to the swap cache.
  */
@@ -837,12 +877,12 @@ static int shmem_writepage(struct page *
 	info = SHMEM_I(inode);
 	if (info->flags & VM_LOCKED)
 		goto redirty;
-	swap = get_swap_page();
+	swap = shm_get_swap_page(info);
 	if (!swap.val)
 		goto redirty;
 
 	spin_lock(&info->lock);
-	shmem_recalc_inode(inode);
+	shmem_recalc_inode(inode, 0);
 	if (index >= info->next_index) {
 		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
 		goto unlock;
@@ -1030,7 +1070,7 @@ repeat:
 		goto failed;
 
 	spin_lock(&info->lock);
-	shmem_recalc_inode(inode);
+	shmem_recalc_inode(inode, 0);
 	entry = shmem_swp_alloc(info, idx, sgp);
 	if (IS_ERR(entry)) {
 		spin_unlock(&info->lock);
@@ -1206,6 +1246,7 @@ repeat:
 		spin_unlock(&info->lock);
 		flush_dcache_page(filepage);
 		SetPageUptodate(filepage);
+		ub_tmpfs_respages_inc(info);
 	}
 done:
 	if (*pagep != filepage) {
@@ -1307,28 +1348,6 @@ shmem_get_policy(struct vm_area_struct *
 }
 #endif
 
-int shmem_lock(struct file *file, int lock, struct user_struct *user)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct shmem_inode_info *info = SHMEM_I(inode);
-	int retval = -ENOMEM;
-
-	spin_lock(&info->lock);
-	if (lock && !(info->flags & VM_LOCKED)) {
-		if (!user_shm_lock(inode->i_size, user))
-			goto out_nomem;
-		info->flags |= VM_LOCKED;
-	}
-	if (!lock && (info->flags & VM_LOCKED) && user) {
-		user_shm_unlock(inode->i_size, user);
-		info->flags &= ~VM_LOCKED;
-	}
-	retval = 0;
-out_nomem:
-	spin_unlock(&info->lock);
-	return retval;
-}
-
 int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	file_accessed(file);
@@ -1365,6 +1384,7 @@ shmem_get_inode(struct super_block *sb, 
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		info = SHMEM_I(inode);
 		memset(info, 0, (char *)inode - (char *)info);
+		shmi_ub_set(info, get_exec_ub());
 		spin_lock_init(&info->lock);
 		INIT_LIST_HEAD(&info->swaplist);
 
@@ -2100,6 +2120,7 @@ static int shmem_fill_super(struct super
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = TMPFS_MAGIC;
 	sb->s_op = &shmem_ops;
+	sb->s_time_gran = 1;
 
 	inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
 	if (!inode)
@@ -2172,6 +2193,7 @@ static struct address_space_operations s
 	.prepare_write	= shmem_prepare_write,
 	.commit_write	= simple_commit_write,
 #endif
+	.migratepage	= migrate_page,
 };
 
 static struct file_operations shmem_file_operations = {
@@ -2226,6 +2248,10 @@ static struct vm_operations_struct shmem
 #endif
 };
 
+int is_shmem_mapping(struct address_space *map)
+{
+	return (map != NULL && map->a_ops == &shmem_aops);
+}
 
 static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
@@ -2233,13 +2259,19 @@ static struct super_block *shmem_get_sb(
 	return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
 }
 
-static struct file_system_type tmpfs_fs_type = {
+struct file_system_type tmpfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "tmpfs",
 	.get_sb		= shmem_get_sb,
 	.kill_sb	= kill_litter_super,
 };
+EXPORT_SYMBOL(tmpfs_fs_type);
+
+#ifdef CONFIG_VE
+#define shm_mnt	(get_exec_env()->shmem_mnt)
+#else
 static struct vfsmount *shm_mnt;
+#endif
 
 static int __init init_tmpfs(void)
 {
@@ -2276,6 +2308,36 @@ out3:
 }
 module_init(init_tmpfs)
 
+static inline int shm_charge_ahead(struct inode *inode)
+{
+#ifdef CONFIG_USER_RESOURCE
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	unsigned long idx;
+	swp_entry_t *entry;
+
+	if (!inode->i_size)
+		return 0;
+	idx = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
+	/* 
+	 * Just touch info to allocate space for entry and
+	 * make all UBC checks 
+	 */
+	spin_lock(&info->lock);
+	entry = shmem_swp_alloc(info, idx, SGP_CACHE);
+	if (IS_ERR(entry))
+		goto err;
+	shmem_swp_unmap(entry);
+	spin_unlock(&info->lock);
+	return 0;
+
+err:
+	spin_unlock(&info->lock);
+	return PTR_ERR(entry);
+#else
+	return 0;
+#endif
+}
+
 /*
  * shmem_file_setup - get an unlinked file living in tmpfs
  *
@@ -2323,6 +2385,10 @@ struct file *shmem_file_setup(char *name
 	d_instantiate(dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;	/* It is unlinked */
+	error = shm_charge_ahead(inode);
+	if (error)
+		goto close_file;
+
 	file->f_vfsmnt = mntget(shm_mnt);
 	file->f_dentry = dentry;
 	file->f_mapping = inode->i_mapping;
@@ -2338,6 +2404,7 @@ put_memory:
 	shmem_unacct_size(flags, size);
 	return ERR_PTR(error);
 }
+EXPORT_SYMBOL_GPL(shmem_file_setup);
 
 /*
  * shmem_zero_setup - setup a shared anonymous mapping
@@ -2355,6 +2422,8 @@ int shmem_zero_setup(struct vm_area_stru
 
 	if (vma->vm_file)
 		fput(vma->vm_file);
+	else if (vma->vm_flags & VM_WRITE)
+		__ub_unused_privvm_dec(vma->vm_mm, size >> PAGE_SHIFT);
 	vma->vm_file = file;
 	vma->vm_ops = &shmem_vm_ops;
 	return 0;
diff -uprN linux-2.6.16/mm/slab.c linux-2.6.16.ovz/mm/slab.c
--- linux-2.6.16/mm/slab.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/slab.c	2006-07-05 08:34:56.000000000 -0400
@@ -105,32 +105,19 @@
 #include	<linux/nodemask.h>
 #include	<linux/mempolicy.h>
 #include	<linux/mutex.h>
+#include	<linux/kmem_slab.h>
+#include	<linux/kmem_cache.h>
 
 #include	<asm/uaccess.h>
 #include	<asm/cacheflush.h>
 #include	<asm/tlbflush.h>
 #include	<asm/page.h>
 
-/*
- * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
- *		  SLAB_RED_ZONE & SLAB_POISON.
- *		  0 for faster, smaller code (especially in the critical paths).
- *
- * STATS	- 1 to collect stats for /proc/slabinfo.
- *		  0 for faster, smaller code (especially in the critical paths).
- *
- * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
- */
+#include	<ub/ub_mem.h>
 
-#ifdef CONFIG_DEBUG_SLAB
-#define	DEBUG		1
-#define	STATS		1
-#define	FORCED_DEBUG	1
-#else
-#define	DEBUG		0
-#define	STATS		0
-#define	FORCED_DEBUG	0
-#endif
+#define DEBUG		SLAB_DEBUG
+#define STATS		SLAB_STATS
+#define FORCED_DEBUG	SLAB_FORCED_DEBUG
 
 /* Shouldn't this be in a header file somewhere? */
 #define	BYTES_PER_WORD		sizeof(void *)
@@ -173,134 +160,20 @@
 			 SLAB_NO_REAP | SLAB_CACHE_DMA | \
 			 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-			 SLAB_DESTROY_BY_RCU)
+			 SLAB_DESTROY_BY_RCU | SLAB_UBC | SLAB_NO_CHARGE)
 #else
 # define CREATE_MASK	(SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
 			 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-			 SLAB_DESTROY_BY_RCU)
+			 SLAB_DESTROY_BY_RCU | SLAB_UBC | SLAB_NO_CHARGE)
 #endif
 
-/*
- * kmem_bufctl_t:
- *
- * Bufctl's are used for linking objs within a slab
- * linked offsets.
- *
- * This implementation relies on "struct page" for locating the cache &
- * slab an object belongs to.
- * This allows the bufctl structure to be small (one int), but limits
- * the number of objects a slab (not a cache) can contain when off-slab
- * bufctls are used. The limit is the size of the largest general cache
- * that does not use off-slab slabs.
- * For 32bit archs with 4 kB pages, is this 56.
- * This is not serious, as it is only for large objects, when it is unwise
- * to have too many per slab.
- * Note: This limit can be raised by introducing a general cache whose size
- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
- */
-
-typedef unsigned int kmem_bufctl_t;
-#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
-#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
-
 /* Max number of objs-per-slab for caches which use off-slab slabs.
  * Needed to avoid a possible looping condition in cache_grow().
  */
 static unsigned long offslab_limit;
 
 /*
- * struct slab
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
- */
-struct slab {
-	struct list_head list;
-	unsigned long colouroff;
-	void *s_mem;		/* including colour offset */
-	unsigned int inuse;	/* num of objs active in slab */
-	kmem_bufctl_t free;
-	unsigned short nodeid;
-};
-
-/*
- * struct slab_rcu
- *
- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
- * arrange for kmem_freepages to be called via RCU.  This is useful if
- * we need to approach a kernel structure obliquely, from its address
- * obtained without the usual locking.  We can lock the structure to
- * stabilize it and check it's still at the given address, only if we
- * can be sure that the memory has not been meanwhile reused for some
- * other kind of object (which our subsystem's lock might corrupt).
- *
- * rcu_read_lock before reading the address, then rcu_read_unlock after
- * taking the spinlock within the structure expected at that address.
- *
- * We assume struct slab_rcu can overlay struct slab when destroying.
- */
-struct slab_rcu {
-	struct rcu_head head;
-	struct kmem_cache *cachep;
-	void *addr;
-};
-
-/*
- * struct array_cache
- *
- * Purpose:
- * - LIFO ordering, to hand out cache-warm objects from _alloc
- * - reduce the number of linked list operations
- * - reduce spinlock operations
- *
- * The limit is stored in the per-cpu structure to reduce the data cache
- * footprint.
- *
- */
-struct array_cache {
-	unsigned int avail;
-	unsigned int limit;
-	unsigned int batchcount;
-	unsigned int touched;
-	spinlock_t lock;
-	void *entry[0];		/*
-				 * Must have this definition in here for the proper
-				 * alignment of array_cache. Also simplifies accessing
-				 * the entries.
-				 * [0] is for gcc 2.95. It should really be [].
-				 */
-};
-
-/* bootstrap: The caches do not work without cpuarrays anymore,
- * but the cpuarrays are allocated from the generic caches...
- */
-#define BOOT_CPUCACHE_ENTRIES	1
-struct arraycache_init {
-	struct array_cache cache;
-	void *entries[BOOT_CPUCACHE_ENTRIES];
-};
-
-/*
- * The slab lists for all objects.
- */
-struct kmem_list3 {
-	struct list_head slabs_partial;	/* partial list first, better asm code */
-	struct list_head slabs_full;
-	struct list_head slabs_free;
-	unsigned long free_objects;
-	unsigned long next_reap;
-	int free_touched;
-	unsigned int free_limit;
-	unsigned int colour_next;	/* Per-node cache coloring */
-	spinlock_t list_lock;
-	struct array_cache *shared;	/* shared per node */
-	struct array_cache **alien;	/* on other nodes */
-};
-
-/*
  * Need this for bootstrapping a per node allocator.
  */
 #define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1)
@@ -364,79 +237,6 @@ static void kmem_list3_init(struct kmem_
 	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\
 	} while (0)
 
-/*
- * struct kmem_cache
- *
- * manages a cache.
- */
-
-struct kmem_cache {
-/* 1) per-cpu data, touched during every alloc/free */
-	struct array_cache *array[NR_CPUS];
-	unsigned int batchcount;
-	unsigned int limit;
-	unsigned int shared;
-	unsigned int buffer_size;
-/* 2) touched by every alloc & free from the backend */
-	struct kmem_list3 *nodelists[MAX_NUMNODES];
-	unsigned int flags;	/* constant flags */
-	unsigned int num;	/* # of objs per slab */
-	spinlock_t spinlock;
-
-/* 3) cache_grow/shrink */
-	/* order of pgs per slab (2^n) */
-	unsigned int gfporder;
-
-	/* force GFP flags, e.g. GFP_DMA */
-	gfp_t gfpflags;
-
-	size_t colour;		/* cache colouring range */
-	unsigned int colour_off;	/* colour offset */
-	struct kmem_cache *slabp_cache;
-	unsigned int slab_size;
-	unsigned int dflags;	/* dynamic flags */
-
-	/* constructor func */
-	void (*ctor) (void *, struct kmem_cache *, unsigned long);
-
-	/* de-constructor func */
-	void (*dtor) (void *, struct kmem_cache *, unsigned long);
-
-/* 4) cache creation/removal */
-	const char *name;
-	struct list_head next;
-
-/* 5) statistics */
-#if STATS
-	unsigned long num_active;
-	unsigned long num_allocations;
-	unsigned long high_mark;
-	unsigned long grown;
-	unsigned long reaped;
-	unsigned long errors;
-	unsigned long max_freeable;
-	unsigned long node_allocs;
-	unsigned long node_frees;
-	atomic_t allochit;
-	atomic_t allocmiss;
-	atomic_t freehit;
-	atomic_t freemiss;
-#endif
-#if DEBUG
-	/*
-	 * If debugging is enabled, then the allocator can add additional
-	 * fields and/or padding to every object. buffer_size contains the total
-	 * object size including these internal fields, the following two
-	 * variables contain the offset to the user object and its size.
-	 */
-	int obj_offset;
-	int obj_size;
-#endif
-};
-
-#define CFLGS_OFF_SLAB		(0x80000000UL)
-#define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
-
 #define BATCHREFILL_LIMIT	16
 /* Optimization question: fewer reaps means less 
  * probability for unnessary cpucache drain/refill cycles.
@@ -573,42 +373,6 @@ static void **dbg_userword(struct kmem_c
 #define	BREAK_GFP_ORDER_LO	0
 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
 
-/* Functions for storing/retrieving the cachep and or slab from the
- * global 'mem_map'. These are used to find the slab an obj belongs to.
- * With kfree(), these are used to find the cache which an obj belongs to.
- */
-static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
-{
-	page->lru.next = (struct list_head *)cache;
-}
-
-static inline struct kmem_cache *page_get_cache(struct page *page)
-{
-	return (struct kmem_cache *)page->lru.next;
-}
-
-static inline void page_set_slab(struct page *page, struct slab *slab)
-{
-	page->lru.prev = (struct list_head *)slab;
-}
-
-static inline struct slab *page_get_slab(struct page *page)
-{
-	return (struct slab *)page->lru.prev;
-}
-
-static inline struct kmem_cache *virt_to_cache(const void *obj)
-{
-	struct page *page = virt_to_page(obj);
-	return page_get_cache(page);
-}
-
-static inline struct slab *virt_to_slab(const void *obj)
-{
-	struct page *page = virt_to_page(obj);
-	return page_get_slab(page);
-}
-
 /* These are the default caches for kmalloc. Custom caches can have other sizes. */
 struct cache_sizes malloc_sizes[] = {
 #define CACHE(x) { .cs_size = (x) },
@@ -715,9 +479,17 @@ struct kmem_cache *kmem_find_general_cac
 }
 EXPORT_SYMBOL(kmem_find_general_cachep);
 
-static size_t slab_mgmt_size(size_t nr_objs, size_t align)
+static size_t slab_mgmt_size_noalign(size_t nr_objs, int flags)
 {
-	return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
+	size_t size_noub;
+
+	size_noub = sizeof(struct slab) + nr_objs * sizeof(kmem_bufctl_t);
+	return ALIGN(size_noub, UB_ALIGN(flags)) + nr_objs * UB_EXTRA(flags);
+}
+
+static size_t slab_mgmt_size(size_t nr_objs, size_t align, int flags)
+{
+	return ALIGN(slab_mgmt_size_noalign(nr_objs, flags), align);
 }
 
 /* Calculate the number of objects and left-over bytes for a given
@@ -761,20 +533,23 @@ static void cache_estimate(unsigned long
 		 * into account.
 		 */
 		nr_objs = (slab_size - sizeof(struct slab)) /
-			  (buffer_size + sizeof(kmem_bufctl_t));
+			  (buffer_size + sizeof(kmem_bufctl_t) +
+			   	UB_EXTRA(flags));
 
 		/*
 		 * This calculated number will be either the right
 		 * amount, or one greater than what we want.
 		 */
-		if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
-		       > slab_size)
+		if (slab_mgmt_size(nr_objs, align, flags) +
+				nr_objs * buffer_size > slab_size)
 			nr_objs--;
+		BUG_ON(slab_mgmt_size(nr_objs, align, flags) +
+				nr_objs * buffer_size > slab_size);
 
 		if (nr_objs > SLAB_LIMIT)
 			nr_objs = SLAB_LIMIT;
 
-		mgmt_size = slab_mgmt_size(nr_objs, align);
+		mgmt_size = slab_mgmt_size(nr_objs, align, flags);
 	}
 	*num = nr_objs;
 	*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
@@ -1254,6 +1029,7 @@ void __init kmem_cache_init(void)
 						      sizes[INDEX_AC].cs_size,
 						      ARCH_KMALLOC_MINALIGN,
 						      (ARCH_KMALLOC_FLAGS |
+						       SLAB_UBC|SLAB_NO_CHARGE |
 						       SLAB_PANIC), NULL, NULL);
 
 	if (INDEX_AC != INDEX_L3)
@@ -1261,8 +1037,9 @@ void __init kmem_cache_init(void)
 		    kmem_cache_create(names[INDEX_L3].name,
 				      sizes[INDEX_L3].cs_size,
 				      ARCH_KMALLOC_MINALIGN,
-				      (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL,
-				      NULL);
+				      (ARCH_KMALLOC_FLAGS |
+				       SLAB_UBC | SLAB_NO_CHARGE |
+				       SLAB_PANIC), NULL, NULL);
 
 	while (sizes->cs_size != ULONG_MAX) {
 		/*
@@ -1277,14 +1054,14 @@ void __init kmem_cache_init(void)
 							     sizes->cs_size,
 							     ARCH_KMALLOC_MINALIGN,
 							     (ARCH_KMALLOC_FLAGS
+							      | SLAB_UBC
+							      | SLAB_NO_CHARGE
 							      | SLAB_PANIC),
 							     NULL, NULL);
 
 		/* Inc off-slab bufctl limit until the ceiling is hit. */
-		if (!(OFF_SLAB(sizes->cs_cachep))) {
-			offslab_limit = sizes->cs_size - sizeof(struct slab);
-			offslab_limit /= sizeof(kmem_bufctl_t);
-		}
+		if (!(OFF_SLAB(sizes->cs_cachep)))
+			offslab_limit = sizes->cs_size;
 
 		sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
 							sizes->cs_size,
@@ -1704,8 +1481,13 @@ static inline size_t calculate_slab_orde
 			continue;
 
 		/* More than offslab_limit objects will cause problems */
-		if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit)
-			break;
+		if (flags & CFLGS_OFF_SLAB) {
+			unsigned long slab_size;
+
+			slab_size = slab_mgmt_size_noalign(num, flags);
+			if (slab_size > offslab_limit)
+				break;
+		}
 
 		/* Found something acceptable - save it away */
 		cachep->num = num;
@@ -1950,8 +1732,7 @@ kmem_cache_create (const char *name, siz
 		cachep = NULL;
 		goto oops;
 	}
-	slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
-			  + sizeof(struct slab), align);
+	slab_size = slab_mgmt_size(cachep->num, align, flags);
 
 	/*
 	 * If the slab has been placed off-slab, and we have enough space then
@@ -1964,8 +1745,7 @@ kmem_cache_create (const char *name, siz
 
 	if (flags & CFLGS_OFF_SLAB) {
 		/* really off slab. No need for manual alignment */
-		slab_size =
-		    cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
+		slab_size = slab_mgmt_size_noalign(cachep->num, flags);
 	}
 
 	cachep->colour_off = cache_line_size();
@@ -2045,6 +1825,7 @@ kmem_cache_create (const char *name, siz
 
 	/* cache setup completed, link it into the list */
 	list_add(&cachep->next, &cache_chain);
+	set_cache_objuse(cachep);
       oops:
 	if (!cachep && (flags & SLAB_PANIC))
 		panic("kmem_cache_create(): failed to create slab `%s'\n",
@@ -2266,6 +2047,8 @@ int kmem_cache_destroy(struct kmem_cache
 			kfree(l3);
 		}
 	}
+
+	ub_kmemcache_free(cachep);
 	kmem_cache_free(&cache_cache, cachep);
 
 	unlock_cpu_hotplug();
@@ -2282,7 +2065,8 @@ static struct slab *alloc_slabmgmt(struc
 
 	if (OFF_SLAB(cachep)) {
 		/* Slab management obj is off-slab. */
-		slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
+		slabp = kmem_cache_alloc(cachep->slabp_cache,
+				local_flags & (~__GFP_UBC));
 		if (!slabp)
 			return NULL;
 	} else {
@@ -2292,15 +2076,11 @@ static struct slab *alloc_slabmgmt(struc
 	slabp->inuse = 0;
 	slabp->colouroff = colour_off;
 	slabp->s_mem = objp + colour_off;
+	init_slab_ubps(cachep, slabp);
 
 	return slabp;
 }
 
-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
-{
-	return (kmem_bufctl_t *) (slabp + 1);
-}
-
 static void cache_init_objs(struct kmem_cache *cachep,
 			    struct slab *slabp, unsigned long ctor_flags)
 {
@@ -2470,7 +2250,7 @@ static int cache_grow(struct kmem_cache 
 	/* Get mem for the objs.
 	 * Attempt to allocate a physical page from 'nodeid',
 	 */
-	if (!(objp = kmem_getpages(cachep, flags, nodeid)))
+	if (!(objp = kmem_getpages(cachep, flags & (~__GFP_UBC), nodeid)))
 		goto failed;
 
 	/* Get slab management. */
@@ -2823,6 +2603,11 @@ __cache_alloc(struct kmem_cache *cachep,
 	objp = cache_alloc_debugcheck_after(cachep, flags, objp,
 					    caller);
 	prefetchw(objp);
+
+	if (objp && ub_slab_charge(objp, flags)) {
+		kmem_cache_free(cachep, objp);
+		objp = NULL;
+	}
 	return objp;
 }
 
@@ -2997,6 +2782,8 @@ static inline void __cache_free(struct k
 	check_irq_off();
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
+	ub_slab_uncharge(objp);
+
 	/* Make sure we are not freeing a object from another
 	 * node to the array cache on this cpu.
 	 */
@@ -3128,6 +2915,10 @@ void *kmem_cache_alloc_node(struct kmem_
 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr,
 					   __builtin_return_address(0));
 
+	if (ptr && ub_slab_charge(ptr, flags)) {
+		kmem_cache_free(cachep, ptr);
+		ptr = NULL;
+	}
 	return ptr;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
@@ -3543,6 +3334,7 @@ static void cache_reap(void *unused)
 		return;
 	}
 
+	{KSTAT_PERF_ENTER(cache_reap)
 	list_for_each(walk, &cache_chain) {
 		struct kmem_cache *searchp;
 		struct list_head *p;
@@ -3608,6 +3400,7 @@ static void cache_reap(void *unused)
 	check_irq_on();
 	mutex_unlock(&cache_chain_mutex);
 	next_reap_node();
+	KSTAT_PERF_LEAVE(cache_reap)}
 	/* Setup the next iteration */
 	schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
 }
diff -uprN linux-2.6.16/mm/swap_state.c linux-2.6.16.ovz/mm/swap_state.c
--- linux-2.6.16/mm/swap_state.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/swap_state.c	2006-07-05 08:34:56.000000000 -0400
@@ -18,6 +18,8 @@
 
 #include <asm/pgtable.h>
 
+#include <ub/ub_vmpages.h>
+
 /*
  * swapper_space is a fiction, retained to simplify the path through
  * vmscan's shrink_list, to make sync_page look nicer, and to allow
@@ -52,14 +54,18 @@ static struct {
 	unsigned long find_total;
 	unsigned long noent_race;
 	unsigned long exist_race;
+	unsigned long remove_race;
 } swap_cache_info;
+EXPORT_SYMBOL(swap_cache_info);
 
 void show_swap_cache_info(void)
 {
-	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
+	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, "
+		"race %lu+%lu+%lu\n",
 		swap_cache_info.add_total, swap_cache_info.del_total,
 		swap_cache_info.find_success, swap_cache_info.find_total,
-		swap_cache_info.noent_race, swap_cache_info.exist_race);
+		swap_cache_info.noent_race, swap_cache_info.exist_race,
+		swap_cache_info.remove_race);
 	printk("Free swap  = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10));
 	printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
 }
@@ -151,7 +157,14 @@ int add_to_swap(struct page * page, gfp_
 		BUG();
 
 	for (;;) {
-		entry = get_swap_page();
+		struct user_beancounter *ub;
+
+		ub = pb_grab_page_ub(page);
+		if (IS_ERR(ub))
+			return 0;
+
+		entry = get_swap_page(ub);
+		put_beancounter(ub);
 		if (!entry.val)
 			return 0;
 
@@ -252,10 +265,13 @@ int move_from_swap_cache(struct page *pa
  */
 static inline void free_swap_cache(struct page *page)
 {
-	if (PageSwapCache(page) && !TestSetPageLocked(page)) {
+	if (!PageSwapCache(page))
+		return;
+	if (!TestSetPageLocked(page)) {
 		remove_exclusive_swap_page(page);
 		unlock_page(page);
-	}
+	} else
+		INC_CACHE_INFO(remove_race);
 }
 
 /* 
diff -uprN linux-2.6.16/mm/swapfile.c linux-2.6.16.ovz/mm/swapfile.c
--- linux-2.6.16/mm/swapfile.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/swapfile.c	2006-07-05 08:34:56.000000000 -0400
@@ -33,6 +33,8 @@
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
 
+#include <ub/ub_vmpages.h>
+
 DEFINE_SPINLOCK(swap_lock);
 unsigned int nr_swapfiles;
 long total_swap_pages;
@@ -172,7 +174,7 @@ no_page:
 	return 0;
 }
 
-swp_entry_t get_swap_page(void)
+swp_entry_t get_swap_page(struct user_beancounter *ub)
 {
 	struct swap_info_struct *si;
 	pgoff_t offset;
@@ -202,6 +204,7 @@ swp_entry_t get_swap_page(void)
 		offset = scan_swap_map(si);
 		if (offset) {
 			spin_unlock(&swap_lock);
+			ub_swapentry_inc(si, offset, ub);
 			return swp_entry(type, offset);
 		}
 		next = swap_list.next;
@@ -277,6 +280,7 @@ static int swap_entry_free(struct swap_i
 		count--;
 		p->swap_map[offset] = count;
 		if (!count) {
+			ub_swapentry_dec(p, offset);
 			if (offset < p->lowest_bit)
 				p->lowest_bit = offset;
 			if (offset > p->highest_bit)
@@ -423,11 +427,18 @@ void free_swap_and_cache(swp_entry_t ent
  * force COW, vm_page_prot omits write permission from any private vma.
  */
 static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
-		unsigned long addr, swp_entry_t entry, struct page *page)
+		unsigned long addr, swp_entry_t entry, struct page *page,
+		struct page_beancounter **pb)
 {
-	inc_mm_counter(vma->vm_mm, anon_rss);
+	struct mm_struct *mm;
+
+	mm = vma->vm_mm;
+	inc_mm_counter(mm, anon_rss);
+	inc_vma_rss(vma);
+	ub_unused_privvm_dec(mm, vma);
+	pb_add_ref(page, mm, pb);
 	get_page(page);
-	set_pte_at(vma->vm_mm, addr, pte,
+	set_pte_at(mm, addr, pte,
 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	page_add_anon_rmap(page, vma, addr);
 	swap_free(entry);
@@ -440,7 +451,8 @@ static void unuse_pte(struct vm_area_str
 
 static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pte_t swp_pte = swp_entry_to_pte(entry);
 	pte_t *pte;
@@ -454,7 +466,7 @@ static int unuse_pte_range(struct vm_are
 		 * Test inline before going to call unuse_pte.
 		 */
 		if (unlikely(pte_same(*pte, swp_pte))) {
-			unuse_pte(vma, pte++, addr, entry, page);
+			unuse_pte(vma, pte++, addr, entry, page, pb);
 			found = 1;
 			break;
 		}
@@ -465,7 +477,8 @@ static int unuse_pte_range(struct vm_are
 
 static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -475,7 +488,7 @@ static inline int unuse_pmd_range(struct
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		if (unuse_pte_range(vma, pmd, addr, next, entry, page))
+		if (unuse_pte_range(vma, pmd, addr, next, entry, page, pb))
 			return 1;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
@@ -483,7 +496,8 @@ static inline int unuse_pmd_range(struct
 
 static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -493,14 +507,15 @@ static inline int unuse_pud_range(struct
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		if (unuse_pmd_range(vma, pud, addr, next, entry, page))
+		if (unuse_pmd_range(vma, pud, addr, next, entry, page, pb))
 			return 1;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
 
 static int unuse_vma(struct vm_area_struct *vma,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pgd_t *pgd;
 	unsigned long addr, end, next;
@@ -521,14 +536,15 @@ static int unuse_vma(struct vm_area_stru
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		if (unuse_pud_range(vma, pgd, addr, next, entry, page))
+		if (unuse_pud_range(vma, pgd, addr, next, entry, page, pb))
 			return 1;
 	} while (pgd++, addr = next, addr != end);
 	return 0;
 }
 
 static int unuse_mm(struct mm_struct *mm,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	struct vm_area_struct *vma;
 
@@ -543,7 +559,7 @@ static int unuse_mm(struct mm_struct *mm
 		lock_page(page);
 	}
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		if (vma->anon_vma && unuse_vma(vma, entry, page))
+		if (vma->anon_vma && unuse_vma(vma, entry, page, pb))
 			break;
 	}
 	up_read(&mm->mmap_sem);
@@ -555,11 +571,12 @@ static int unuse_mm(struct mm_struct *mm
 }
 
 #ifdef CONFIG_MIGRATION
-int remove_vma_swap(struct vm_area_struct *vma, struct page *page)
+int remove_vma_swap(struct vm_area_struct *vma, struct page *page,
+		struct page_beancounter **pb)
 {
 	swp_entry_t entry = { .val = page_private(page) };
 
-	return unuse_vma(vma, entry, page);
+	return unuse_vma(vma, entry, page, pb);
 }
 #endif
 
@@ -618,6 +635,7 @@ static int try_to_unuse(unsigned int typ
 	int retval = 0;
 	int reset_overflow = 0;
 	int shmem;
+	struct page_beancounter *pb;
 
 	/*
 	 * When searching mms for an entry, a good strategy is to
@@ -670,6 +688,13 @@ again:
 			break;
 		}
 
+		pb = NULL;
+		if (pb_alloc_all(&pb)) {
+			page_cache_release(page);
+			retval = -ENOMEM;
+			break;
+		}
+
 		/*
 		 * Don't hold on to start_mm if it looks like exiting.
 		 */
@@ -698,6 +723,20 @@ again:
 		}
 		wait_on_page_writeback(page);
 
+		/* If read failed we cannot map not-uptodate page to 
+		 * user space. Actually, we are in serious troubles,
+		 * we do not even know what process to kill. So, the only
+		 * variant remains: to stop swapoff() and allow someone
+		 * to kill processes to zap invalid pages.
+		 */
+		if (unlikely(!PageUptodate(page))) {
+			pb_free_list(&pb);
+			unlock_page(page);
+			page_cache_release(page);
+			retval = -EIO;
+			break;
+		}
+
 		/*
 		 * Remove all references to entry.
 		 * Whenever we reach init_mm, there's no address space
@@ -709,7 +748,7 @@ again:
 			if (start_mm == &init_mm)
 				shmem = shmem_unuse(entry, page);
 			else
-				retval = unuse_mm(start_mm, entry, page);
+				retval = unuse_mm(start_mm, entry, page, &pb);
 		}
 		if (*swap_map > 1) {
 			int set_start_mm = (*swap_map >= swcount);
@@ -741,7 +780,7 @@ again:
 					set_start_mm = 1;
 					shmem = shmem_unuse(entry, page);
 				} else
-					retval = unuse_mm(mm, entry, page);
+					retval = unuse_mm(mm, entry, page, &pb);
 				if (set_start_mm && *swap_map < swcount) {
 					mmput(new_start_mm);
 					atomic_inc(&mm->mm_users);
@@ -755,6 +794,8 @@ again:
 			mmput(start_mm);
 			start_mm = new_start_mm;
 		}
+
+		pb_free_list(&pb);
 		if (retval) {
 			unlock_page(page);
 			page_cache_release(page);
@@ -1100,6 +1141,10 @@ asmlinkage long sys_swapoff(const char _
 	int i, type, prev;
 	int err;
 	
+	/* VE admin check is just to be on the safe side, the admin may affect
+	 * swaps only if he has access to special, i.e. if he has been granted
+	 * access to the block device or if the swap file is in the area
+	 * visible to him. */
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
@@ -1199,6 +1244,7 @@ asmlinkage long sys_swapoff(const char _
 	spin_unlock(&swap_lock);
 	mutex_unlock(&swapon_mutex);
 	vfree(swap_map);
+	ub_swap_fini(p);
 	inode = mapping->host;
 	if (S_ISBLK(inode->i_mode)) {
 		struct block_device *bdev = I_BDEV(inode);
@@ -1557,6 +1603,11 @@ asmlinkage long sys_swapon(const char __
 		goto bad_swap;
 	}
 
+	if (ub_swap_init(p, maxpages)) {
+		error = -ENOMEM;
+		goto bad_swap;
+	}
+
 	mutex_lock(&swapon_mutex);
 	spin_lock(&swap_lock);
 	p->flags = SWP_ACTIVE;
diff -uprN linux-2.6.16/mm/vmalloc.c linux-2.6.16.ovz/mm/vmalloc.c
--- linux-2.6.16/mm/vmalloc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/vmalloc.c	2006-07-05 08:34:56.000000000 -0400
@@ -20,6 +20,8 @@
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_debug.h>
+
 
 DEFINE_RWLOCK(vmlist_lock);
 struct vm_struct *vmlist;
@@ -256,6 +258,68 @@ struct vm_struct *get_vm_area_node(unsig
 	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node);
 }
 
+struct vm_struct * get_vm_area_best(unsigned long size, unsigned long flags)
+{
+	unsigned long addr, best_addr, delta, best_delta;
+	struct vm_struct **p, **best_p, *tmp, *area;
+
+	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		return NULL;
+
+	size += PAGE_SIZE; /* one-page gap at the end */
+	addr = VMALLOC_START;
+	best_addr = 0UL;
+	best_p = NULL;
+	best_delta = PAGE_ALIGN(VMALLOC_END) - VMALLOC_START;
+
+	write_lock(&vmlist_lock);
+	for (p = &vmlist; (tmp = *p) &&
+			(tmp->addr <= (void *)PAGE_ALIGN(VMALLOC_END));
+			p = &tmp->next) {
+		if ((size + addr) < addr)
+			break;
+		delta = (unsigned long) tmp->addr - (size + addr);
+		if (delta < best_delta) {
+			best_delta = delta;
+			best_addr = addr;
+			best_p = p;
+		}
+		addr = tmp->size + (unsigned long) tmp->addr;
+		if (addr > VMALLOC_END-size)
+			break;
+	}
+
+	if (!tmp || (tmp->addr > (void *)PAGE_ALIGN(VMALLOC_END))) {
+		/* check free area after list end */
+		delta = (unsigned long) PAGE_ALIGN(VMALLOC_END) - (size + addr);
+		if (delta < best_delta) {
+			best_delta = delta;
+			best_addr = addr;
+			best_p = p;
+		}
+	}
+	if (best_addr) {
+		area->flags = flags;
+		/* allocate at the end of this area */
+		area->addr = (void *)(best_addr + best_delta);
+		area->size = size;
+		area->next = *best_p;
+		area->pages = NULL;
+		area->nr_pages = 0;
+		area->phys_addr = 0;
+		*best_p = area;
+		/* check like in __vunmap */
+		WARN_ON((PAGE_SIZE - 1) & (unsigned long)area->addr);
+	} else {
+		kfree(area);
+		area = NULL;
+	}
+	write_unlock(&vmlist_lock);
+
+	return area;
+}
+
 /* Caller must hold vmlist_lock */
 struct vm_struct *__remove_vm_area(void *addr)
 {
@@ -296,7 +360,7 @@ struct vm_struct *remove_vm_area(void *a
 	return v;
 }
 
-void __vunmap(void *addr, int deallocate_pages)
+void __vunmap(void *addr, int deallocate_pages, int uncharge)
 {
 	struct vm_struct *area;
 
@@ -320,6 +384,8 @@ void __vunmap(void *addr, int deallocate
 	if (deallocate_pages) {
 		int i;
 
+		if (uncharge)
+			dec_vmalloc_charged(area);
 		for (i = 0; i < area->nr_pages; i++) {
 			if (unlikely(!area->pages[i]))
 				BUG();
@@ -350,7 +416,7 @@ void __vunmap(void *addr, int deallocate
 void vfree(void *addr)
 {
 	BUG_ON(in_interrupt());
-	__vunmap(addr, 1);
+	__vunmap(addr, 1, 1);
 }
 EXPORT_SYMBOL(vfree);
 
@@ -367,7 +433,7 @@ EXPORT_SYMBOL(vfree);
 void vunmap(void *addr)
 {
 	BUG_ON(in_interrupt());
-	__vunmap(addr, 0);
+	__vunmap(addr, 0, 0);
 }
 EXPORT_SYMBOL(vunmap);
 
@@ -439,10 +505,12 @@ void *__vmalloc_area_node(struct vm_stru
 
 	if (map_vm_area(area, prot, &pages))
 		goto fail;
+
+	inc_vmalloc_charged(area, gfp_mask);
 	return area->addr;
 
 fail:
-	vfree(area->addr);
+	__vunmap(area->addr, 1, 0);
 	return NULL;
 }
 
@@ -486,6 +554,21 @@ void *__vmalloc(unsigned long size, gfp_
 }
 EXPORT_SYMBOL(__vmalloc);
 
+static void *____vmalloc(unsigned long size, gfp_t mask, pgprot_t prot)
+{
+	struct vm_struct *area;
+
+	size = PAGE_ALIGN(size);
+	if (!size || (size >> PAGE_SHIFT) > num_physpages)
+		return NULL;
+
+	area = get_vm_area_best(size, VM_ALLOC);
+	if (!area)
+		return NULL;
+
+	return __vmalloc_area_node(area, mask, prot, -1);
+}
+
 /**
  *	vmalloc  -  allocate virtually contiguous memory
  *
@@ -503,6 +586,26 @@ void *vmalloc(unsigned long size)
 }
 EXPORT_SYMBOL(vmalloc);
 
+void *ub_vmalloc(unsigned long size)
+{
+	return __vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+EXPORT_SYMBOL(ub_vmalloc);
+
+void *vmalloc_best(unsigned long size)
+{
+       return ____vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+
+EXPORT_SYMBOL(vmalloc_best);
+
+void *ub_vmalloc_best(unsigned long size)
+{
+       return ____vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+
+EXPORT_SYMBOL(ub_vmalloc_best);
+
 /**
  *	vmalloc_node  -  allocate memory on a specific node
  *
@@ -521,6 +624,12 @@ void *vmalloc_node(unsigned long size, i
 }
 EXPORT_SYMBOL(vmalloc_node);
 
+void *ub_vmalloc_node(unsigned long size, int node)
+{
+	return __vmalloc_node(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL, node);
+}
+EXPORT_SYMBOL(ub_vmalloc_node);
+
 #ifndef PAGE_KERNEL_EXEC
 # define PAGE_KERNEL_EXEC PAGE_KERNEL
 #endif
@@ -631,3 +740,37 @@ finished:
 	read_unlock(&vmlist_lock);
 	return buf - buf_start;
 }
+
+void vprintstat(void)
+{
+	struct vm_struct *p, *last_p = NULL;
+	unsigned long addr, size, free_size, max_free_size;
+	int num;
+
+	addr = VMALLOC_START;
+	size = max_free_size = 0;
+	num = 0;
+
+	read_lock(&vmlist_lock);
+	for (p = vmlist; p; p = p->next) {
+		free_size = (unsigned long)p->addr - addr;
+		if (free_size > max_free_size)
+			max_free_size = free_size;
+		addr = (unsigned long)p->addr + p->size;
+		size += p->size;
+		++num;
+		last_p = p;		
+	}
+	if (last_p) {
+		free_size = VMALLOC_END -
+			((unsigned long)last_p->addr + last_p->size);
+		if (free_size > max_free_size)
+			max_free_size = free_size;
+	}
+	read_unlock(&vmlist_lock);
+
+	printk("VMALLOC Used: %luKB Total: %luKB Entries: %d\n"
+			"    Max_Free: %luKB Start: %lx End: %lx\n",
+			size/1024, (VMALLOC_END - VMALLOC_START)/1024, num,
+			max_free_size/1024, VMALLOC_START, VMALLOC_END);
+}
diff -uprN linux-2.6.16/mm/vmscan.c linux-2.6.16.ovz/mm/vmscan.c
--- linux-2.6.16/mm/vmscan.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/mm/vmscan.c	2006-07-05 08:34:56.000000000 -0400
@@ -949,6 +949,17 @@ redo:
 			goto unlock_both;
                 }
 
+		/* Make sure the dirty bit is up to date */
+		if (try_to_unmap(page, 1) == SWAP_FAIL) {
+			rc = -EPERM;
+			goto unlock_both;
+		}
+
+		if (page_mapcount(page)) {
+			rc = -EAGAIN;
+			goto unlock_both;
+		}
+
 		/*
 		 * Default handling if a filesystem does not provide
 		 * a migration function. We can only migrate clean
@@ -1243,6 +1254,7 @@ refill_inactive_zone(struct zone *zone, 
 			reclaim_mapped = 1;
 	}
 
+	{KSTAT_PERF_ENTER(refill_inact)
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
 	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
@@ -1322,6 +1334,7 @@ refill_inactive_zone(struct zone *zone, 
 	local_irq_enable();
 
 	pagevec_release(&pvec);
+	KSTAT_PERF_LEAVE(refill_inact)}
 }
 
 /*
@@ -1438,6 +1451,7 @@ int try_to_free_pages(struct zone **zone
 	unsigned long lru_pages = 0;
 	int i;
 
+	KSTAT_PERF_ENTER(ttfp);
 	sc.gfp_mask = gfp_mask;
 	sc.may_writepage = !laptop_mode;
 	sc.may_swap = 1;
@@ -1500,6 +1514,7 @@ out:
 
 		zone->prev_priority = zone->temp_priority;
 	}
+	KSTAT_PERF_LEAVE(ttfp);
 	return ret;
 }
 
@@ -1832,7 +1847,8 @@ static int __init kswapd_init(void)
 	swap_setup();
 	for_each_pgdat(pgdat)
 		pgdat->kswapd
-		= find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
+		= find_task_by_pid_all(kernel_thread(kswapd,
+					pgdat, CLONE_KERNEL));
 	total_memory = nr_free_pagecache_pages();
 	hotcpu_notifier(cpu_callback, 0);
 	return 0;
diff -uprN linux-2.6.16/net/atm/clip.c linux-2.6.16.ovz/net/atm/clip.c
--- linux-2.6.16/net/atm/clip.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/atm/clip.c	2006-07-05 08:34:56.000000000 -0400
@@ -613,12 +613,19 @@ static int clip_create(int number)
 
 
 static int clip_device_event(struct notifier_block *this,unsigned long event,
-    void *dev)
+			     void *arg)
 {
+	struct net_device *dev = arg;
+
+	if (event == NETDEV_UNREGISTER) {
+		neigh_ifdown(&clip_tbl, dev);
+		return NOTIFY_DONE;
+	}
+
 	/* ignore non-CLIP devices */
-	if (((struct net_device *) dev)->type != ARPHRD_ATM ||
-	    ((struct net_device *) dev)->hard_start_xmit != clip_start_xmit)
+	if (dev->type != ARPHRD_ATM || dev->hard_start_xmit != clip_start_xmit)
 		return NOTIFY_DONE;
+
 	switch (event) {
 		case NETDEV_UP:
 			DPRINTK("clip_device_event NETDEV_UP\n");
@@ -686,14 +693,12 @@ static struct notifier_block clip_inet_n
 static void atmarpd_close(struct atm_vcc *vcc)
 {
 	DPRINTK("atmarpd_close\n");
-	atmarpd = NULL; /* assumed to be atomic */
-	barrier();
-	unregister_inetaddr_notifier(&clip_inet_notifier);
-	unregister_netdevice_notifier(&clip_dev_notifier);
-	if (skb_peek(&sk_atm(vcc)->sk_receive_queue))
-		printk(KERN_ERR "atmarpd_close: closing with requests "
-		    "pending\n");
+
+	rtnl_lock();
+	atmarpd = NULL;
 	skb_queue_purge(&sk_atm(vcc)->sk_receive_queue);
+	rtnl_unlock();
+
 	DPRINTK("(done)\n");
 	module_put(THIS_MODULE);
 }
@@ -714,7 +719,12 @@ static struct atm_dev atmarpd_dev = {
 
 static int atm_init_atmarp(struct atm_vcc *vcc)
 {
-	if (atmarpd) return -EADDRINUSE;
+	rtnl_lock();
+	if (atmarpd) {
+		rtnl_unlock();
+		return -EADDRINUSE;
+	}
+
 	if (start_timer) {
 		start_timer = 0;
 		init_timer(&idle_timer);
@@ -731,10 +741,7 @@ static int atm_init_atmarp(struct atm_vc
 	vcc->push = NULL;
 	vcc->pop = NULL; /* crash */
 	vcc->push_oam = NULL; /* crash */
-	if (register_netdevice_notifier(&clip_dev_notifier))
-		printk(KERN_ERR "register_netdevice_notifier failed\n");
-	if (register_inetaddr_notifier(&clip_inet_notifier))
-		printk(KERN_ERR "register_inetaddr_notifier failed\n");
+	rtnl_unlock();
 	return 0;
 }
 
@@ -992,6 +999,8 @@ static int __init atm_clip_init(void)
 
 	clip_tbl_hook = &clip_tbl;
 	register_atm_ioctl(&clip_ioctl_ops);
+	register_netdevice_notifier(&clip_dev_notifier);
+	register_inetaddr_notifier(&clip_inet_notifier);
 
 #ifdef CONFIG_PROC_FS
 {
@@ -1012,6 +1021,9 @@ static void __exit atm_clip_exit(void)
 
 	remove_proc_entry("arp", atm_proc_root);
 
+	unregister_inetaddr_notifier(&clip_inet_notifier);
+	unregister_netdevice_notifier(&clip_dev_notifier);
+
 	deregister_atm_ioctl(&clip_ioctl_ops);
 
 	/* First, stop the idle timer, so it stops banging
diff -uprN linux-2.6.16/net/bridge/br_netfilter.c linux-2.6.16.ovz/net/bridge/br_netfilter.c
--- linux-2.6.16/net/bridge/br_netfilter.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/bridge/br_netfilter.c	2006-07-05 08:34:56.000000000 -0400
@@ -739,6 +739,15 @@ out:
 	return NF_STOLEN;
 }
 
+static int br_nf_dev_queue_xmit(struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_IP) &&
+	    skb->len > skb->dev->mtu &&
+	    !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
+		return ip_fragment(skb, br_dev_queue_push_xmit);
+	else
+		return br_dev_queue_push_xmit(skb);
+}
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
 static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
@@ -798,7 +807,7 @@ static unsigned int br_nf_post_routing(u
 		realoutdev = nf_bridge->netoutdev;
 #endif
 	NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
-	        br_dev_queue_push_xmit);
+	        br_nf_dev_queue_xmit);
 
 	return NF_STOLEN;
 
@@ -843,7 +852,7 @@ static unsigned int ip_sabotage_out(unsi
 	if ((out->hard_start_xmit == br_dev_xmit &&
 	    okfn != br_nf_forward_finish &&
 	    okfn != br_nf_local_out_finish &&
-	    okfn != br_dev_queue_push_xmit)
+	    okfn != br_nf_dev_queue_xmit)
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 	    || ((out->priv_flags & IFF_802_1Q_VLAN) &&
 	    VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit)
diff -uprN linux-2.6.16/net/compat.c linux-2.6.16.ovz/net/compat.c
--- linux-2.6.16/net/compat.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/compat.c	2006-07-05 08:34:56.000000000 -0400
@@ -308,107 +308,6 @@ void scm_detach_fds_compat(struct msghdr
 }
 
 /*
- * For now, we assume that the compatibility and native version
- * of struct ipt_entry are the same - sfr.  FIXME
- */
-struct compat_ipt_replace {
-	char			name[IPT_TABLE_MAXNAMELEN];
-	u32			valid_hooks;
-	u32			num_entries;
-	u32			size;
-	u32			hook_entry[NF_IP_NUMHOOKS];
-	u32			underflow[NF_IP_NUMHOOKS];
-	u32			num_counters;
-	compat_uptr_t		counters;	/* struct ipt_counters * */
-	struct ipt_entry	entries[0];
-};
-
-static int do_netfilter_replace(int fd, int level, int optname,
-				char __user *optval, int optlen)
-{
-	struct compat_ipt_replace __user *urepl;
-	struct ipt_replace __user *repl_nat;
-	char name[IPT_TABLE_MAXNAMELEN];
-	u32 origsize, tmp32, num_counters;
-	unsigned int repl_nat_size;
-	int ret;
-	int i;
-	compat_uptr_t ucntrs;
-
-	urepl = (struct compat_ipt_replace __user *)optval;
-	if (get_user(origsize, &urepl->size))
-		return -EFAULT;
-
-	/* Hack: Causes ipchains to give correct error msg --RR */
-	if (optlen != sizeof(*urepl) + origsize)
-		return -ENOPROTOOPT;
-
-	/* XXX Assumes that size of ipt_entry is the same both in
-	 *     native and compat environments.
-	 */
-	repl_nat_size = sizeof(*repl_nat) + origsize;
-	repl_nat = compat_alloc_user_space(repl_nat_size);
-
-	ret = -EFAULT;
-	if (put_user(origsize, &repl_nat->size))
-		goto out;
-
-	if (!access_ok(VERIFY_READ, urepl, optlen) ||
-	    !access_ok(VERIFY_WRITE, repl_nat, optlen))
-		goto out;
-
-	if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
-	    __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
-		goto out;
-
-	if (__get_user(tmp32, &urepl->valid_hooks) ||
-	    __put_user(tmp32, &repl_nat->valid_hooks))
-		goto out;
-
-	if (__get_user(tmp32, &urepl->num_entries) ||
-	    __put_user(tmp32, &repl_nat->num_entries))
-		goto out;
-
-	if (__get_user(num_counters, &urepl->num_counters) ||
-	    __put_user(num_counters, &repl_nat->num_counters))
-		goto out;
-
-	if (__get_user(ucntrs, &urepl->counters) ||
-	    __put_user(compat_ptr(ucntrs), &repl_nat->counters))
-		goto out;
-
-	if (__copy_in_user(&repl_nat->entries[0],
-			   &urepl->entries[0],
-			   origsize))
-		goto out;
-
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
-		if (__get_user(tmp32, &urepl->hook_entry[i]) ||
-		    __put_user(tmp32, &repl_nat->hook_entry[i]) ||
-		    __get_user(tmp32, &urepl->underflow[i]) ||
-		    __put_user(tmp32, &repl_nat->underflow[i]))
-			goto out;
-	}
-
-	/*
-	 * Since struct ipt_counters just contains two u_int64_t members
-	 * we can just do the access_ok check here and pass the (converted)
-	 * pointer into the standard syscall.  We hope that the pointer is
-	 * not misaligned ...
-	 */
-	if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
-		       num_counters * sizeof(struct ipt_counters)))
-		goto out;
-
-
-	ret = sys_setsockopt(fd, level, optname,
-			     (char __user *)repl_nat, repl_nat_size);
-
-out:
-	return ret;
-}
-
-/*
  * A struct sock_filter is architecture independent.
  */
 struct compat_sock_fprog {
@@ -460,10 +359,6 @@ static int do_set_sock_timeout(int fd, i
 asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
 				char __user *optval, int optlen)
 {
-	/* SO_SET_REPLACE seems to be the same in all levels */
-	if (optname == IPT_SO_SET_REPLACE)
-		return do_netfilter_replace(fd, level, optname,
-					    optval, optlen);
 	if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER)
 		return do_set_attach_filter(fd, level, optname,
 					    optval, optlen);
diff -uprN linux-2.6.16/net/core/datagram.c linux-2.6.16.ovz/net/core/datagram.c
--- linux-2.6.16/net/core/datagram.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/datagram.c	2006-07-05 08:34:56.000000000 -0400
@@ -56,6 +56,8 @@
 #include <net/sock.h>
 #include <net/tcp_states.h>
 
+#include <ub/ub_net.h>
+
 /*
  *	Is a socket 'connection oriented' ?
  */
@@ -493,6 +495,7 @@ unsigned int datagram_poll(struct file *
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask;
+	int no_ubc_space;
 
 	poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
@@ -500,8 +503,14 @@ unsigned int datagram_poll(struct file *
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
 		mask |= POLLERR;
-	if (sk->sk_shutdown == SHUTDOWN_MASK)
+	if (sk->sk_shutdown == SHUTDOWN_MASK) {
+		no_ubc_space = 0;
 		mask |= POLLHUP;
+	} else {
+		no_ubc_space = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
+		if (no_ubc_space)
+			ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
+	}
 
 	/* readable? */
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
@@ -518,7 +527,7 @@ unsigned int datagram_poll(struct file *
 	}
 
 	/* writable? */
-	if (sock_writeable(sk))
+	if (!no_ubc_space && sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
diff -uprN linux-2.6.16/net/core/dev.c linux-2.6.16.ovz/net/core/dev.c
--- linux-2.6.16/net/core/dev.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/dev.c	2006-07-05 08:34:56.000000000 -0400
@@ -115,6 +115,10 @@
 #include <net/iw_handler.h>
 #endif	/* CONFIG_NET_RADIO */
 #include <asm/current.h>
+#include <ub/beancounter.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
 
 /*
  *	The list of packet types we will receive (as opposed to discard)
@@ -167,25 +171,40 @@ static struct list_head ptype_all;		/* T
  * unregister_netdevice(), which must be called with the rtnl
  * semaphore held.
  */
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define dev_tail	(get_exec_env()->_net_dev_tail)
+#else
 struct net_device *dev_base;
 static struct net_device **dev_tail = &dev_base;
+EXPORT_SYMBOL(dev_base);
+#endif
 DEFINE_RWLOCK(dev_base_lock);
 
-EXPORT_SYMBOL(dev_base);
 EXPORT_SYMBOL(dev_base_lock);
 
+#ifdef CONFIG_VE
+#define MAX_UNMOVABLE_NETDEVICES (8*4096)
+static uint8_t unmovable_ifindex_list[MAX_UNMOVABLE_NETDEVICES/8];
+static LIST_HEAD(dev_global_list);
+#endif
+
 #define NETDEV_HASHBITS	8
 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 
-static inline struct hlist_head *dev_name_hash(const char *name)
+struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env)
 {
-	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+	unsigned hash;
+	if (!ve_is_super(env))
+		return visible_dev_head(env);
+	hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 }
 
-static inline struct hlist_head *dev_index_hash(int ifindex)
+struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env)
 {
+	if (!ve_is_super(env))
+		return visible_dev_index_head(env);
 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 }
 
@@ -469,7 +488,7 @@ struct net_device *__dev_get_by_name(con
 {
 	struct hlist_node *p;
 
-	hlist_for_each(p, dev_name_hash(name)) {
+	hlist_for_each(p, dev_name_hash(name, get_exec_env())) {
 		struct net_device *dev
 			= hlist_entry(p, struct net_device, name_hlist);
 		if (!strncmp(dev->name, name, IFNAMSIZ))
@@ -502,6 +521,32 @@ struct net_device *dev_get_by_name(const
 }
 
 /**
+ *	__dev_global_get_by_name - find a device by its name in dev_global_list
+ *	@name: name to find
+ *
+ *	Find an interface by name. Must be called under RTNL semaphore
+ *	If the name is found a pointer to the device
+ *	is returned. If the name is not found then %NULL is returned. The
+ *	reference counters are not incremented so the caller must be
+ *	careful with locks.
+ */
+
+#ifdef CONFIG_VE
+struct net_device *__dev_global_get_by_name(const char *name)
+{
+	struct net_device *dev;
+	/* It's called relatively rarely */
+	list_for_each_entry(dev, &dev_global_list, dev_global_list_entry) {
+		if (strncmp(dev->name, name, IFNAMSIZ) == 0)
+			return dev;
+	}
+	return NULL;
+}
+#else	/* CONFIG_VE */
+#define __dev_global_get_by_name(name)		__dev_get_by_name(name)
+#endif	/* CONFIG_VE */
+
+/**
  *	__dev_get_by_index - find a device by its ifindex
  *	@ifindex: index of device
  *
@@ -516,7 +561,7 @@ struct net_device *__dev_get_by_index(in
 {
 	struct hlist_node *p;
 
-	hlist_for_each(p, dev_index_hash(ifindex)) {
+	hlist_for_each(p, dev_index_hash(ifindex, get_exec_env())) {
 		struct net_device *dev
 			= hlist_entry(p, struct net_device, index_hlist);
 		if (dev->ifindex == ifindex)
@@ -635,6 +680,23 @@ int dev_valid_name(const char *name)
 		 || strchr(name, '/'));
 }
 
+static inline void __dev_check_name(const char *dev_name, const char *name, 
+		long *inuse, const int max_netdevices)
+{
+	int i = 0;
+	char buf[IFNAMSIZ];
+
+	if (!sscanf(dev_name, name, &i))
+		return;
+	if (i < 0 || i >= max_netdevices)
+		return;
+
+	/* avoid cases where sscanf is not exact inverse of printf */
+	snprintf(buf, sizeof(buf), name, i);
+	if (!strncmp(buf, dev_name, IFNAMSIZ))
+		set_bit(i, inuse);
+}
+
 /**
  *	dev_alloc_name - allocate a name for a device
  *	@dev: device
@@ -671,16 +733,20 @@ int dev_alloc_name(struct net_device *de
 		if (!inuse)
 			return -ENOMEM;
 
-		for (d = dev_base; d; d = d->next) {
-			if (!sscanf(d->name, name, &i))
-				continue;
-			if (i < 0 || i >= max_netdevices)
-				continue;
-
-			/*  avoid cases where sscanf is not exact inverse of printf */
-			snprintf(buf, sizeof(buf), name, i);
-			if (!strncmp(buf, d->name, IFNAMSIZ))
-				set_bit(i, inuse);
+#ifdef CONFIG_VE
+		if (ve_is_super(get_exec_env())) {
+			list_for_each_entry(d, &dev_global_list, 
+					dev_global_list_entry) {
+				__dev_check_name(d->name, name, inuse, 
+						max_netdevices);
+			}
+		} else
+#endif
+		{
+			for (d = dev_base; d; d = d->next) {
+				__dev_check_name(d->name, name, inuse, 
+						max_netdevices);
+			}
 		}
 
 		i = find_first_zero_bit(inuse, max_netdevices);
@@ -688,7 +754,11 @@ int dev_alloc_name(struct net_device *de
 	}
 
 	snprintf(buf, sizeof(buf), name, i);
-	if (!__dev_get_by_name(buf)) {
+	if (ve_is_super(get_exec_env()))
+		d = __dev_global_get_by_name(buf);
+	else
+		d = __dev_get_by_name(buf);
+	if (d == NULL) {
 		strlcpy(dev->name, buf, IFNAMSIZ);
 		return i;
 	}
@@ -721,13 +791,14 @@ int dev_change_name(struct net_device *d
 	if (!dev_valid_name(newname))
 		return -EINVAL;
 
+	/* Rename of devices in VE is prohibited by CAP_NET_ADMIN */
 	if (strchr(newname, '%')) {
 		err = dev_alloc_name(dev, newname);
 		if (err < 0)
 			return err;
 		strcpy(newname, dev->name);
 	}
-	else if (__dev_get_by_name(newname))
+	else if (__dev_global_get_by_name(newname))
 		return -EEXIST;
 	else
 		strlcpy(dev->name, newname, IFNAMSIZ);
@@ -735,7 +806,8 @@ int dev_change_name(struct net_device *d
 	err = class_device_rename(&dev->class_dev, dev->name);
 	if (!err) {
 		hlist_del(&dev->name_hlist);
-		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name,
+					get_exec_env()));
 		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 	}
 
@@ -1294,6 +1366,25 @@ int dev_queue_xmit(struct sk_buff *skb)
 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
 	if (q->enqueue) {
+		struct user_beancounter *ub;
+
+		ub = netdev_bc(dev)->exec_ub;
+		/* the skb CAN be already charged if it transmitted via
+		 * something like bonding device */
+		if (ub && (skb_bc(skb)->resource == 0)) {
+			unsigned long chargesize;
+			chargesize = skb_charge_fullsize(skb);
+			if (charge_beancounter(ub, UB_OTHERSOCKBUF,
+						chargesize, UB_SOFT)) {
+				rcu_read_unlock();
+				rc = -ENOMEM;
+				goto out_kfree_skb;
+			}
+			skb_bc(skb)->ub = ub;
+			skb_bc(skb)->charged = chargesize;
+			skb_bc(skb)->resource = UB_OTHERSOCKBUF;
+		}
+
 		/* Grab device queue */
 		spin_lock(&dev->queue_lock);
 
@@ -1580,6 +1671,7 @@ int netif_receive_skb(struct sk_buff *sk
 	struct net_device *orig_dev;
 	int ret = NET_RX_DROP;
 	unsigned short type;
+	struct ve_struct *old_env;
 
 	/* if we've gotten here through NAPI, check netpoll */
 	if (skb->dev->poll && netpoll_rx(skb))
@@ -1598,6 +1690,17 @@ int netif_receive_skb(struct sk_buff *sk
 	skb->h.raw = skb->nh.raw = skb->data;
 	skb->mac_len = skb->nh.raw - skb->mac.raw;
 
+#ifdef CONFIG_VE
+	/*
+	 * Skb might be alloced in another VE context, than its device works.
+	 * So, set the correct owner_env.
+	 */
+	skb->owner_env = skb->dev->owner_env;
+	BUG_ON(skb->owner_env == NULL);
+#endif
+
+	old_env = set_exec_env(VE_OWNER_SKB(skb));
+
 	pt_prev = NULL;
 
 	rcu_read_lock();
@@ -1663,6 +1766,7 @@ ncls:
 
 out:
 	rcu_read_unlock();
+	(void)set_exec_env(old_env);
 	return ret;
 }
 
@@ -2038,7 +2142,7 @@ static int __init dev_proc_init(void)
 {
 	int rc = -ENOMEM;
 
-	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
+	if (!proc_glob_fops_create("net/dev", S_IRUGO, &dev_seq_fops))
 		goto out;
 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
 		goto out_dev;
@@ -2050,7 +2154,7 @@ out:
 out_softnet:
 	proc_net_remove("softnet_stat");
 out_dev:
-	proc_net_remove("dev");
+	remove_proc_glob_entry("net/dev", NULL);
 	goto out;
 }
 #else
@@ -2115,6 +2219,9 @@ void dev_set_promiscuity(struct net_devi
 		dev->flags &= ~IFF_PROMISC;
 	else
 		dev->flags |= IFF_PROMISC;
+	/* Promiscous mode on these devices does not mean anything */
+	if (dev->flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
+		return;
 	if (dev->flags != old_flags) {
 		dev_mc_upload(dev);
 		printk(KERN_INFO "device %s %s promiscuous mode\n",
@@ -2529,9 +2636,28 @@ int dev_ioctl(unsigned int cmd, void __u
 		 *	- require strict serialization.
 		 *	- do not return a value
 		 */
+		case SIOCSIFMTU:
+			if (!capable(CAP_NET_ADMIN) &&
+			    !capable(CAP_VE_NET_ADMIN))
+				return -EPERM;
+			dev_load(ifr.ifr_name);
+			rtnl_lock();
+			if (!ve_is_super(get_exec_env())) {
+				struct net_device *dev;
+				ret = -ENODEV;
+				if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
+					goto out_set_mtu_unlock;
+				ret = -EPERM;
+				if (ifr.ifr_mtu > dev->orig_mtu)
+					goto out_set_mtu_unlock;
+			}
+			ret = dev_ifsioc(&ifr, cmd);
+out_set_mtu_unlock:
+			rtnl_unlock();
+			return ret;
+		
 		case SIOCSIFFLAGS:
 		case SIOCSIFMETRIC:
-		case SIOCSIFMTU:
 		case SIOCSIFMAP:
 		case SIOCSIFHWADDR:
 		case SIOCSIFSLAVE:
@@ -2613,20 +2739,73 @@ int dev_ioctl(unsigned int cmd, void __u
  *	dev_new_index	-	allocate an ifindex
  *
  *	Returns a suitable unique value for a new device interface
- *	number.  The caller must hold the rtnl semaphore or the
+ *	number. The caller must hold the rtnl semaphore or the
  *	dev_base_lock to be sure it remains unique.
+ *
+ *	Note: dev->name must be valid on entrance
  */
-static int dev_new_index(void)
+static int dev_ve_new_index(void)
 {
-	static int ifindex;
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	int *ifindex = &get_exec_env()->ifindex;
+	int delta = 2;
+#else
+	static int s_ifindex;
+	int *ifindex = &s_ifindex;
+	int delta = 1;
+#endif
 	for (;;) {
-		if (++ifindex <= 0)
-			ifindex = 1;
-		if (!__dev_get_by_index(ifindex))
-			return ifindex;
+		*ifindex += delta;
+		if (*ifindex <= 0)
+			*ifindex = 1;
+		if (!__dev_get_by_index(*ifindex))
+			return *ifindex;
 	}
 }
 
+#ifdef CONFIG_VE
+static int dev_glb_new_index(void)
+{
+	int i;
+
+	i = find_first_zero_bit((long*)unmovable_ifindex_list, 
+			MAX_UNMOVABLE_NETDEVICES);
+
+	if (i == MAX_UNMOVABLE_NETDEVICES)
+		return -EMFILE;
+
+	__set_bit(i, (long*)unmovable_ifindex_list);
+	return (i + 1) * 2;
+}
+#endif
+
+static void dev_glb_free_index(struct net_device *dev)
+{
+#ifdef CONFIG_VE
+	int bit;
+
+	bit = dev->ifindex / 2 - 1;
+	BUG_ON(bit >= MAX_UNMOVABLE_NETDEVICES);
+	__clear_bit(bit, (long*)unmovable_ifindex_list);
+#endif
+}
+
+static int dev_new_index(struct net_device *dev)
+{
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
+		return dev_glb_new_index();
+#endif
+
+	return dev_ve_new_index();
+}
+
+static void dev_free_index(struct net_device *dev)
+{
+	if ((dev->ifindex % 2) == 0)
+		dev_glb_free_index(dev);
+}
+
 static int dev_boot_phase = 1;
 
 /* Delayed registration/unregisteration */
@@ -2669,6 +2848,10 @@ int register_netdevice(struct net_device
 	/* When net_device's are persistent, this will be fatal. */
 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
 
+	ret = -EPERM;
+	if (!ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
+		goto out;
+
 	spin_lock_init(&dev->queue_lock);
 	spin_lock_init(&dev->xmit_lock);
 	dev->xmit_lock_owner = -1;
@@ -2688,27 +2871,32 @@ int register_netdevice(struct net_device
 		if (ret) {
 			if (ret > 0)
 				ret = -EIO;
-			goto out_err;
+			goto out_free_div;
 		}
 	}
  
 	if (!dev_valid_name(dev->name)) {
 		ret = -EINVAL;
-		goto out_err;
+		goto out_free_div;
+	}
+
+	dev->ifindex = dev_new_index(dev);
+	if (dev->ifindex < 0) {
+		ret = dev->ifindex;
+		goto out_free_div;
 	}
 
-	dev->ifindex = dev_new_index();
 	if (dev->iflink == -1)
 		dev->iflink = dev->ifindex;
 
 	/* Check for existence of name */
-	head = dev_name_hash(dev->name);
+	head = dev_name_hash(dev->name, get_exec_env());
 	hlist_for_each(p, head) {
 		struct net_device *d
 			= hlist_entry(p, struct net_device, name_hlist);
 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
 			ret = -EEXIST;
- 			goto out_err;
+ 			goto out_free_ind;
 		}
  	}
 
@@ -2760,12 +2948,21 @@ int register_netdevice(struct net_device
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 
 	dev->next = NULL;
+	dev->owner_env = get_exec_env();
+	dev->orig_mtu = dev->mtu;
+	netdev_bc(dev)->owner_ub = get_beancounter(get_exec_ub());
+	netdev_bc(dev)->exec_ub = get_beancounter(get_exec_ub());
 	dev_init_scheduler(dev);
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		list_add_tail(&dev->dev_global_list_entry, &dev_global_list);
+#endif
 	write_lock_bh(&dev_base_lock);
 	*dev_tail = dev;
 	dev_tail = &dev->next;
 	hlist_add_head(&dev->name_hlist, head);
-	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
+	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, 
+						get_exec_env()));
 	dev_hold(dev);
 	dev->reg_state = NETREG_REGISTERING;
 	write_unlock_bh(&dev_base_lock);
@@ -2779,7 +2976,9 @@ int register_netdevice(struct net_device
 
 out:
 	return ret;
-out_err:
+out_free_ind:
+	dev_free_index(dev);
+out_free_div:
 	free_divert_blk(dev);
 	goto out;
 }
@@ -2825,6 +3024,10 @@ int register_netdev(struct net_device *d
 	err = register_netdevice(dev);
 out:
 	rtnl_unlock();
+	if (err == 0 && dev->reg_state != NETREG_REGISTERED) {
+		unregister_netdev(dev);
+		err = -ENOMEM;
+	}
 	return err;
 }
 EXPORT_SYMBOL(register_netdev);
@@ -2907,6 +3110,7 @@ void netdev_run_todo(void)
 {
 	struct list_head list = LIST_HEAD_INIT(list);
 	int err;
+	struct ve_struct *current_env;
 
 
 	/* Need to guard against multiple cpu's getting out of order. */
@@ -2925,22 +3129,30 @@ void netdev_run_todo(void)
 	list_splice_init(&net_todo_list, &list);
 	spin_unlock(&net_todo_list_lock);
 		
+	current_env = get_exec_env();
 	while (!list_empty(&list)) {
 		struct net_device *dev
 			= list_entry(list.next, struct net_device, todo_list);
 		list_del(&dev->todo_list);
 
+		(void)set_exec_env(dev->owner_env);
 		switch(dev->reg_state) {
 		case NETREG_REGISTERING:
+			dev->reg_state = NETREG_REGISTERED;
 			err = netdev_register_sysfs(dev);
-			if (err)
+			if (err) {
 				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
 				       dev->name, err);
-			dev->reg_state = NETREG_REGISTERED;
+				dev->reg_state = NETREG_REGISTER_ERR;
+				break;
+			}
 			break;
 
 		case NETREG_UNREGISTERING:
 			netdev_unregister_sysfs(dev);
+			/* fall through */
+
+		case NETREG_REGISTER_ERR:
 			dev->reg_state = NETREG_UNREGISTERED;
 
 			netdev_wait_allrefs(dev);
@@ -2951,6 +3163,10 @@ void netdev_run_todo(void)
 			BUG_TRAP(!dev->ip6_ptr);
 			BUG_TRAP(!dev->dn_ptr);
 
+			put_beancounter(netdev_bc(dev)->exec_ub);
+			put_beancounter(netdev_bc(dev)->owner_ub);
+			netdev_bc(dev)->exec_ub = NULL;
+			netdev_bc(dev)->owner_ub = NULL;
 
 			/* It must be the very last action, 
 			 * after this 'dev' may point to freed up memory.
@@ -2965,6 +3181,7 @@ void netdev_run_todo(void)
 			break;
 		}
 	}
+	(void)set_exec_env(current_env);
 
 out:
 	up(&net_todo_run_mutex);
@@ -2990,7 +3207,7 @@ struct net_device *alloc_netdev(int size
 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
 
-	p = kmalloc(alloc_size, GFP_KERNEL);
+	p = ub_kmalloc(alloc_size, GFP_KERNEL);
 	if (!p) {
 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
 		return NULL;
@@ -3070,7 +3287,8 @@ int unregister_netdevice(struct net_devi
 		return -ENODEV;
 	}
 
-	BUG_ON(dev->reg_state != NETREG_REGISTERED);
+	BUG_ON(dev->reg_state != NETREG_REGISTERED &&
+	       dev->reg_state != NETREG_REGISTER_ERR);
 
 	/* If device is running, close it first. */
 	if (dev->flags & IFF_UP)
@@ -3086,6 +3304,10 @@ int unregister_netdevice(struct net_devi
 				dev_tail = dp;
 			*dp = d->next;
 			write_unlock_bh(&dev_base_lock);
+#ifdef CONFIG_VE
+			if (ve_is_super(get_exec_env()))
+				list_del(&dev->dev_global_list_entry);
+#endif
 			break;
 		}
 	}
@@ -3095,7 +3317,8 @@ int unregister_netdevice(struct net_devi
 		return -ENODEV;
 	}
 
-	dev->reg_state = NETREG_UNREGISTERING;
+	if (dev->reg_state != NETREG_REGISTER_ERR)
+		dev->reg_state = NETREG_UNREGISTERING;
 
 	synchronize_net();
 
@@ -3119,6 +3342,8 @@ int unregister_netdevice(struct net_devi
 	/* Notifier chain MUST detach us from master device. */
 	BUG_TRAP(!dev->master);
 
+	dev_free_index(dev);
+
 	free_divert_blk(dev);
 
 	/* Finish processing unregister after unlock */
@@ -3276,6 +3501,8 @@ EXPORT_SYMBOL(dev_close);
 EXPORT_SYMBOL(dev_get_by_flags);
 EXPORT_SYMBOL(dev_get_by_index);
 EXPORT_SYMBOL(dev_get_by_name);
+EXPORT_SYMBOL(dev_name_hash);
+EXPORT_SYMBOL(dev_index_hash);
 EXPORT_SYMBOL(dev_open);
 EXPORT_SYMBOL(dev_queue_xmit);
 EXPORT_SYMBOL(dev_remove_pack);
diff -uprN linux-2.6.16/net/core/dev_mcast.c linux-2.6.16.ovz/net/core/dev_mcast.c
--- linux-2.6.16/net/core/dev_mcast.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/dev_mcast.c	2006-07-05 08:34:56.000000000 -0400
@@ -290,9 +290,10 @@ static struct file_operations dev_mc_seq
 
 void __init dev_mcast_init(void)
 {
-	proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
+	proc_glob_fops_create("net/dev_mcast", 0, &dev_mc_seq_fops);
 }
 
 EXPORT_SYMBOL(dev_mc_add);
 EXPORT_SYMBOL(dev_mc_delete);
 EXPORT_SYMBOL(dev_mc_upload);
+EXPORT_SYMBOL(dev_mc_discard);
diff -uprN linux-2.6.16/net/core/dst.c linux-2.6.16.ovz/net/core/dst.c
--- linux-2.6.16/net/core/dst.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/dst.c	2006-07-05 08:34:56.000000000 -0400
@@ -95,12 +95,11 @@ static void dst_run_gc(unsigned long dum
 		dst_gc_timer_inc = DST_GC_INC;
 		dst_gc_timer_expires = DST_GC_MIN;
 	}
-	dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
 #if RT_CACHE_DEBUG >= 2
 	printk("dst_total: %d/%d %ld\n",
 	       atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
 #endif
-	add_timer(&dst_gc_timer);
+	mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
 
 out:
 	spin_unlock(&dst_lock);
@@ -260,11 +259,14 @@ static int dst_dev_event(struct notifier
 	switch (event) {
 	case NETDEV_UNREGISTER:
 	case NETDEV_DOWN:
-		spin_lock_bh(&dst_lock);
+		local_bh_disable();
+		dst_run_gc(0);
+		spin_lock(&dst_lock);
 		for (dst = dst_garbage_list; dst; dst = dst->next) {
 			dst_ifdown(dst, dev, event != NETDEV_DOWN);
 		}
-		spin_unlock_bh(&dst_lock);
+		spin_unlock(&dst_lock);
+		local_bh_enable();
 		break;
 	}
 	return NOTIFY_DONE;
diff -uprN linux-2.6.16/net/core/dv.c linux-2.6.16.ovz/net/core/dv.c
--- linux-2.6.16/net/core/dv.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/dv.c	2006-07-05 08:34:56.000000000 -0400
@@ -547,3 +547,5 @@ void divert_frame(struct sk_buff *skb)
 		break;
 	}
 }
+
+EXPORT_SYMBOL(free_divert_blk);
diff -uprN linux-2.6.16/net/core/filter.c linux-2.6.16.ovz/net/core/filter.c
--- linux-2.6.16/net/core/filter.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/filter.c	2006-07-05 08:34:56.000000000 -0400
@@ -34,6 +34,7 @@
 #include <linux/timer.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/unaligned.h>
 #include <linux/filter.h>
 
 /* No hurry in this branch */
@@ -177,7 +178,7 @@ unsigned int sk_run_filter(struct sk_buf
 load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
 			if (ptr != NULL) {
-				A = ntohl(*(u32 *)ptr);
+				A = ntohl(get_unaligned((u32 *)ptr));
 				continue;
 			}
 			break;
@@ -186,7 +187,7 @@ load_w:
 load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
 			if (ptr != NULL) {
-				A = ntohs(*(u16 *)ptr);
+				A = ntohs(get_unaligned((u16 *)ptr));
 				continue;
 			}
 			break;
@@ -406,7 +407,7 @@ int sk_attach_filter(struct sock_fprog *
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL_UBC);
 	if (!fp)
 		return -ENOMEM;
 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
diff -uprN linux-2.6.16/net/core/neighbour.c linux-2.6.16.ovz/net/core/neighbour.c
--- linux-2.6.16/net/core/neighbour.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/neighbour.c	2006-07-05 08:34:56.000000000 -0400
@@ -33,6 +33,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/random.h>
 #include <linux/string.h>
+#include <ub/beancounter.h>
 
 #define NEIGH_DEBUG 1
 
@@ -639,6 +640,8 @@ static void neigh_periodic_timer(unsigne
 	struct neigh_table *tbl = (struct neigh_table *)arg;
 	struct neighbour *n, **np;
 	unsigned long expire, now = jiffies;
+	struct ve_struct *env = set_exec_env(tbl->owner_env);
+	struct user_beancounter *ub = set_exec_ub(tbl->owner_ub);
 
 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 
@@ -700,6 +703,8 @@ next_elt:
  	mod_timer(&tbl->gc_timer, now + expire);
 
 	write_unlock(&tbl->lock);
+	set_exec_ub(ub);
+	set_exec_env(env);
 }
 
 static __inline__ int neigh_max_probes(struct neighbour *n)
@@ -727,6 +732,11 @@ static void neigh_timer_handler(unsigned
 	struct neighbour *neigh = (struct neighbour *)arg;
 	unsigned state;
 	int notify = 0;
+	struct ve_struct *env;
+	struct user_beancounter *ub;
+
+	env = set_exec_env(neigh->dev->owner_env);
+	ub = set_exec_ub(netdev_bc(neigh->dev)->exec_ub);
 
 	write_lock(&neigh->lock);
 
@@ -824,6 +834,8 @@ out:
 		neigh_app_notify(neigh);
 #endif
 	neigh_release(neigh);
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(env);
 }
 
 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
@@ -1202,6 +1214,9 @@ static void neigh_proxy_process(unsigned
 	unsigned long now = jiffies;
 	struct sk_buff *skb;
 
+	struct ve_struct *env = set_exec_env(tbl->owner_env);
+	struct user_beancounter *ub = set_exec_ub(tbl->owner_ub);
+
 	spin_lock(&tbl->proxy_queue.lock);
 
 	skb = tbl->proxy_queue.next;
@@ -1213,6 +1228,7 @@ static void neigh_proxy_process(unsigned
 		skb = skb->next;
 		if (tdif <= 0) {
 			struct net_device *dev = back->dev;
+
 			__skb_unlink(back, &tbl->proxy_queue);
 			if (tbl->proxy_redo && netif_running(dev))
 				tbl->proxy_redo(back);
@@ -1220,6 +1236,7 @@ static void neigh_proxy_process(unsigned
 				kfree_skb(back);
 
 			dev_put(dev);
+
 		} else if (!sched_next || tdif < sched_next)
 			sched_next = tdif;
 	}
@@ -1227,6 +1244,8 @@ static void neigh_proxy_process(unsigned
 	if (sched_next)
 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
 	spin_unlock(&tbl->proxy_queue.lock);
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(env);
 }
 
 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
@@ -1323,12 +1342,15 @@ void neigh_parms_destroy(struct neigh_pa
 }
 
 
-void neigh_table_init(struct neigh_table *tbl)
+int neigh_table_init(struct neigh_table *tbl)
 {
 	unsigned long now = jiffies;
 	unsigned long phsize;
 
 	atomic_set(&tbl->parms.refcnt, 1);
+	atomic_set(&tbl->entries, 0);
+	tbl->hash_chain_gc = 0;
+	tbl->parms.next = NULL;
 	INIT_RCU_HEAD(&tbl->parms.rcu_head);
 	tbl->parms.reachable_time =
 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
@@ -1336,22 +1358,30 @@ void neigh_table_init(struct neigh_table
 	if (!tbl->kmem_cachep)
 		tbl->kmem_cachep = kmem_cache_create(tbl->id,
 						     tbl->entry_size,
-						     0, SLAB_HWCACHE_ALIGN,
+						     0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						     NULL, NULL);
 
 	if (!tbl->kmem_cachep)
-		panic("cannot create neighbour cache");
+		return -ENOMEM;
+
+	tbl->owner_env = get_ve(get_exec_env());
+	tbl->owner_ub = get_beancounter(get_exec_ub());
 
 	tbl->stats = alloc_percpu(struct neigh_statistics);
 	if (!tbl->stats)
-		panic("cannot create neighbour cache statistics");
+		return -ENOMEM;
 	
 #ifdef CONFIG_PROC_FS
-	tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
-	if (!tbl->pde) 
-		panic("cannot create neighbour proc dir entry");
-	tbl->pde->proc_fops = &neigh_stat_seq_fops;
-	tbl->pde->data = tbl;
+	if (ve_is_super(get_exec_env())) {
+		char name[strlen(tbl->id) + sizeof("net/stat/")];
+		strcpy(name, "net/stat/");
+		strcat(name, tbl->id);
+		tbl->pde = create_proc_glob_entry(name, S_IRUGO, NULL);
+		if (tbl->pde) {
+			tbl->pde->proc_fops = &neigh_stat_seq_fops;
+			tbl->pde->data = tbl;
+		}
+	}
 #endif
 
 	tbl->hash_mask = 1;
@@ -1361,7 +1391,7 @@ void neigh_table_init(struct neigh_table
 	tbl->phash_buckets = kmalloc(phsize, GFP_KERNEL);
 
 	if (!tbl->hash_buckets || !tbl->phash_buckets)
-		panic("cannot allocate neighbour cache hashes");
+		goto nomem;
 
 	memset(tbl->phash_buckets, 0, phsize);
 
@@ -1385,6 +1415,24 @@ void neigh_table_init(struct neigh_table
 	tbl->next	= neigh_tables;
 	neigh_tables	= tbl;
 	write_unlock(&neigh_tbl_lock);
+	return 0;
+
+nomem:
+	if (tbl->hash_buckets) {
+		neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
+		tbl->hash_buckets = NULL;
+	}
+	if (tbl->phash_buckets) {
+		kfree(tbl->phash_buckets);
+		tbl->phash_buckets = NULL;
+	}
+	if (tbl->stats) {
+		free_percpu(tbl->stats);
+		tbl->stats = NULL;
+	}
+	put_beancounter(tbl->owner_ub);
+	put_ve(tbl->owner_env);
+	return -ENOMEM;
 }
 
 int neigh_table_clear(struct neigh_table *tbl)
@@ -1398,6 +1446,15 @@ int neigh_table_clear(struct neigh_table
 	neigh_ifdown(tbl, NULL);
 	if (atomic_read(&tbl->entries))
 		printk(KERN_CRIT "neighbour leakage\n");
+#ifdef CONFIG_PROC_FS
+	if (ve_is_super(get_exec_env())) {
+		char name[strlen(tbl->id) + sizeof("net/stat/")];
+		strcpy(name, "net/stat/");
+		strcat(name, tbl->id);
+		remove_proc_glob_entry(name, NULL);
+	}
+#endif
+
 	write_lock(&neigh_tbl_lock);
 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
 		if (*tp == tbl) {
@@ -1413,6 +1470,9 @@ int neigh_table_clear(struct neigh_table
 	kfree(tbl->phash_buckets);
 	tbl->phash_buckets = NULL;
 
+	put_beancounter(tbl->owner_ub);
+	put_ve(tbl->owner_env);
+
 	return 0;
 }
 
@@ -1435,6 +1495,8 @@ int neigh_delete(struct sk_buff *skb, st
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		read_unlock(&neigh_tbl_lock);
 
 		err = -EINVAL;
@@ -1488,6 +1550,8 @@ int neigh_add(struct sk_buff *skb, struc
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		read_unlock(&neigh_tbl_lock);
 
 		err = -EINVAL;
@@ -1720,6 +1784,9 @@ int neightbl_set(struct sk_buff *skb, st
 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
 			continue;
 
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
+
 		if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
 			break;
 	}
@@ -1941,6 +2008,8 @@ int neigh_dump_info(struct sk_buff *skb,
 	s_t = cb->args[0];
 
 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		if (t < s_t || (family && tbl->family != family))
 			continue;
 		if (t > s_t)
@@ -2530,11 +2599,12 @@ int neigh_sysctl_register(struct net_dev
 			  int p_id, int pdev_id, char *p_name, 
 			  proc_handler *handler, ctl_handler *strategy)
 {
-	struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+	struct neigh_sysctl_table *t;
 	const char *dev_name_source = NULL;
 	char *dev_name = NULL;
 	int err = 0;
 
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (!t)
 		return -ENOBUFS;
 	memcpy(t, &neigh_sysctl_template, sizeof(*t));
diff -uprN linux-2.6.16/net/core/net-sysfs.c linux-2.6.16.ovz/net/core/net-sysfs.c
--- linux-2.6.16/net/core/net-sysfs.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/net-sysfs.c	2006-07-05 08:34:56.000000000 -0400
@@ -388,12 +388,13 @@ static void netdev_release(struct class_
 	struct net_device *dev 
 		= container_of(cd, struct net_device, class_dev);
 
-	BUG_ON(dev->reg_state != NETREG_RELEASED);
+	BUG_ON(dev->reg_state != NETREG_RELEASED &&
+	       dev->reg_state != NETREG_REGISTERING);
 
 	kfree((char *)dev - dev->padded);
 }
 
-static struct class net_class = {
+struct class net_class = {
 	.name = "net",
 	.release = netdev_release,
 	.class_dev_attrs = net_class_attributes,
@@ -401,6 +402,13 @@ static struct class net_class = {
 	.uevent = netdev_uevent,
 #endif
 };
+EXPORT_SYMBOL(net_class);
+
+#ifndef CONFIG_VE
+#define visible_net_class net_class
+#else
+#define visible_net_class (*get_exec_env()->net_class)
+#endif
 
 void netdev_unregister_sysfs(struct net_device * net)
 {
@@ -424,7 +432,7 @@ int netdev_register_sysfs(struct net_dev
 	struct class_device *class_dev = &(net->class_dev);
 	int ret;
 
-	class_dev->class = &net_class;
+	class_dev->class = &visible_net_class;
 	class_dev->class_data = net;
 
 	strlcpy(class_dev->class_id, net->name, BUS_ID_SIZE);
@@ -453,12 +461,21 @@ out_cleanup:
 out_unreg:
 	printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n",
 	       net->name, ret);
-	class_device_unregister(class_dev);
+	/* put is called in free_netdev() */
+	class_device_del(class_dev);
 out:
 	return ret;
 }
 
+void prepare_sysfs_netdev(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->net_class = &net_class;
+#endif
+}
+
 int netdev_sysfs_init(void)
 {
+	prepare_sysfs_netdev();
 	return class_register(&net_class);
 }
diff -uprN linux-2.6.16/net/core/rtnetlink.c linux-2.6.16.ovz/net/core/rtnetlink.c
--- linux-2.6.16/net/core/rtnetlink.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/rtnetlink.c	2006-07-05 08:34:56.000000000 -0400
@@ -434,6 +434,8 @@ static int rtnetlink_dump_all(struct sk_
 		if (rtnetlink_links[idx] == NULL ||
 		    rtnetlink_links[idx][type].dumpit == NULL)
 			continue;
+		if (vz_security_proto_check(idx, 0, 0))
+			continue;
 		if (idx > s_idx)
 			memset(&cb->args[0], 0, sizeof(cb->args));
 		if (rtnetlink_links[idx][type].dumpit(skb, cb))
@@ -501,7 +503,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
 		return 0;
 
 	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
-	if (family >= NPROTO) {
+	if (family >= NPROTO || vz_security_proto_check(family, 0, 0)) {
 		*errp = -EAFNOSUPPORT;
 		return -1;
 	}
diff -uprN linux-2.6.16/net/core/scm.c linux-2.6.16.ovz/net/core/scm.c
--- linux-2.6.16/net/core/scm.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/scm.c	2006-07-05 08:34:56.000000000 -0400
@@ -34,6 +34,7 @@
 #include <net/compat.h>
 #include <net/scm.h>
 
+#include <ub/ub_mem.h>
 
 /*
  *	Only allow a user to send credentials, that they could set with 
@@ -42,7 +43,9 @@
 
 static __inline__ int scm_check_creds(struct ucred *creds)
 {
-	if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
+	if ((creds->pid == virt_tgid(current) ||
+	     creds->pid == current->tgid ||
+	     capable(CAP_VE_SYS_ADMIN)) &&
 	    ((creds->uid == current->uid || creds->uid == current->euid ||
 	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
 	    ((creds->gid == current->gid || creds->gid == current->egid ||
@@ -69,7 +72,7 @@ static int scm_fp_copy(struct cmsghdr *c
 
 	if (!fpl)
 	{
-		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+		fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
 		if (!fpl)
 			return -ENOMEM;
 		*fplp = fpl;
@@ -275,7 +278,7 @@ struct scm_fp_list *scm_fp_dup(struct sc
 	if (!fpl)
 		return NULL;
 
-	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	new_fpl = ub_kmalloc(sizeof(*fpl), GFP_KERNEL);
 	if (new_fpl) {
 		for (i=fpl->count-1; i>=0; i--)
 			get_file(fpl->fp[i]);
diff -uprN linux-2.6.16/net/core/skbuff.c linux-2.6.16.ovz/net/core/skbuff.c
--- linux-2.6.16/net/core/skbuff.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/skbuff.c	2006-07-05 08:34:56.000000000 -0400
@@ -48,6 +48,7 @@
 #include <linux/in.h>
 #include <linux/inet.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/netdevice.h>
 #ifdef CONFIG_NET_CLS_ACT
 #include <net/pkt_sched.h>
@@ -68,6 +69,8 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
+#include <ub/ub_net.h>
+
 static kmem_cache_t *skbuff_head_cache __read_mostly;
 static kmem_cache_t *skbuff_fclone_cache __read_mostly;
 
@@ -147,6 +150,9 @@ struct sk_buff *__alloc_skb(unsigned int
 	if (!skb)
 		goto out;
 
+	if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
+		goto nobc;
+
 	/* Get the DATA. Size must match skb_add_mtu(). */
 	size = SKB_DATA_ALIGN(size);
 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
@@ -160,6 +166,7 @@ struct sk_buff *__alloc_skb(unsigned int
 	skb->data = data;
 	skb->tail = data;
 	skb->end  = data + size;
+	SET_VE_OWNER_SKB(skb, get_exec_env());
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
 	atomic_set(&shinfo->dataref, 1);
@@ -182,6 +189,8 @@ struct sk_buff *__alloc_skb(unsigned int
 out:
 	return skb;
 nodata:
+	ub_skb_free_bc(skb);
+nobc:
 	kmem_cache_free(cache, skb);
 	skb = NULL;
 	goto out;
@@ -214,6 +223,9 @@ struct sk_buff *alloc_skb_from_cache(kme
 	if (!skb)
 		goto out;
 
+	if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
+		goto nobc;
+
 	/* Get the DATA. */
 	size = SKB_DATA_ALIGN(size);
 	data = kmem_cache_alloc(cp, gfp_mask);
@@ -227,6 +239,7 @@ struct sk_buff *alloc_skb_from_cache(kme
 	skb->data = data;
 	skb->tail = data;
 	skb->end  = data + size;
+	SET_VE_OWNER_SKB(skb, get_exec_env());
 
 	atomic_set(&(skb_shinfo(skb)->dataref), 1);
 	skb_shinfo(skb)->nr_frags  = 0;
@@ -236,6 +249,8 @@ struct sk_buff *alloc_skb_from_cache(kme
 out:
 	return skb;
 nodata:
+	ub_skb_free_bc(skb);
+nobc:
 	kmem_cache_free(skbuff_head_cache, skb);
 	skb = NULL;
 	goto out;
@@ -290,6 +305,7 @@ void kfree_skbmem(struct sk_buff *skb)
 	atomic_t *fclone_ref;
 
 	skb_release_data(skb);
+	ub_skb_free_bc(skb);
 	switch (skb->fclone) {
 	case SKB_FCLONE_UNAVAILABLE:
 		kmem_cache_free(skbuff_head_cache, skb);
@@ -331,6 +347,7 @@ void __kfree_skb(struct sk_buff *skb)
 #ifdef CONFIG_XFRM
 	secpath_put(skb->sp);
 #endif
+	ub_skb_uncharge(skb);
 	if (skb->destructor) {
 		WARN_ON(in_irq());
 		skb->destructor(skb);
@@ -386,6 +403,11 @@ struct sk_buff *skb_clone(struct sk_buff
 		n->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
 
+	if (ub_skb_alloc_bc(n, gfp_mask)) {
+		kmem_cache_free(skbuff_head_cache, n);
+		return NULL;
+	}
+
 #define C(x) n->x = skb->x
 
 	n->next = n->prev = NULL;
@@ -415,6 +437,7 @@ struct sk_buff *skb_clone(struct sk_buff
 	C(ipvs_property);
 #endif
 	C(protocol);
+	SET_VE_OWNER_SKB(n, VE_OWNER_SKB(skb));
 	n->destructor = NULL;
 #ifdef CONFIG_NETFILTER
 	C(nfmark);
diff -uprN linux-2.6.16/net/core/sock.c linux-2.6.16.ovz/net/core/sock.c
--- linux-2.6.16/net/core/sock.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/sock.c	2006-07-05 08:34:56.000000000 -0400
@@ -108,6 +108,7 @@
 #include <linux/net.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/tcp.h>
@@ -124,6 +125,9 @@
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
 
+#include <ub/ub_net.h>
+#include <ub/beancounter.h>
+
 #include <linux/filter.h>
 
 #ifdef CONFIG_INET
@@ -172,7 +176,7 @@ static void sock_warn_obsolete_bsdism(co
 	static char warncomm[TASK_COMM_LEN];
 	if (strcmp(warncomm, current->comm) && warned < 5) { 
 		strcpy(warncomm,  current->comm); 
-		printk(KERN_WARNING "process `%s' is using obsolete "
+		ve_printk(VE_LOG, KERN_WARNING "process `%s' is using obsolete "
 		       "%s SO_BSDCOMPAT\n", warncomm, name);
 		warned++;
 	}
@@ -404,8 +408,9 @@ set_rcvbuf:
 			if (!valbool) {
 				sk->sk_bound_dev_if = 0;
 			} else {
-				if (optlen > IFNAMSIZ) 
-					optlen = IFNAMSIZ; 
+				if (optlen > IFNAMSIZ - 1)
+					optlen = IFNAMSIZ - 1;
+				memset(devname, 0, sizeof(devname));
 				if (copy_from_user(devname, optval, optlen)) {
 					ret = -EFAULT;
 					break;
@@ -659,6 +664,7 @@ struct sock *sk_alloc(int family, gfp_t 
 			 */
 			sk->sk_prot = sk->sk_prot_creator = prot;
 			sock_lock_init(sk);
+			SET_VE_OWNER_SK(sk, get_exec_env());
 		}
 		
 		if (security_sk_alloc(sk, family, priority))
@@ -698,6 +704,7 @@ void sk_free(struct sock *sk)
 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
 
 	security_sk_free(sk);
+	ub_sock_uncharge(sk);
 	if (sk->sk_prot_creator->slab != NULL)
 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
 	else
@@ -742,14 +749,11 @@ struct sock *sk_clone(const struct sock 
 		if (filter != NULL)
 			sk_filter_charge(newsk, filter);
 
-		if (unlikely(xfrm_sk_clone_policy(newsk))) {
-			/* It is still raw copy of parent, so invalidate
-			 * destructor and make plain sk_free() */
-			newsk->sk_destruct = NULL;
-			sk_free(newsk);
-			newsk = NULL;
-			goto out;
-		}
+		if (ub_sock_charge(newsk, newsk->sk_family, newsk->sk_type) < 0)
+			goto out_err;
+
+		if (unlikely(xfrm_sk_clone_policy(newsk)))
+			 goto out_err;
 
 		newsk->sk_err	   = 0;
 		newsk->sk_priority = 0;
@@ -773,8 +777,15 @@ struct sock *sk_clone(const struct sock 
 		if (newsk->sk_prot->sockets_allocated)
 			atomic_inc(newsk->sk_prot->sockets_allocated);
 	}
-out:
 	return newsk;
+
+out_err:
+	/* It is still raw copy of parent, so invalidate
+	 * destructor and make plain sk_free() */
+	sock_reset_flag(newsk, SOCK_TIMESTAMP);
+	newsk->sk_destruct = NULL;
+	sk_free(newsk);
+	return NULL;
 }
 
 EXPORT_SYMBOL_GPL(sk_clone);
@@ -934,14 +945,12 @@ static long sock_wait_for_wmem(struct so
 /*
  *	Generic send/receive buffer handlers
  */
-
-static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
-					    unsigned long header_len,
-					    unsigned long data_len,
-					    int noblock, int *errcode)
+struct sk_buff *sock_alloc_send_skb2(struct sock *sk, unsigned long size,
+				     unsigned long size2, int noblock,
+				     int *errcode)
 {
 	struct sk_buff *skb;
-	gfp_t gfp_mask;
+	unsigned int gfp_mask;
 	long timeo;
 	int err;
 
@@ -959,46 +968,35 @@ static struct sk_buff *sock_alloc_send_p
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
 			goto failure;
 
-		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
-			skb = alloc_skb(header_len, sk->sk_allocation);
-			if (skb) {
-				int npages;
-				int i;
-
-				/* No pages, we're done... */
-				if (!data_len)
-					break;
-
-				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-				skb->truesize += data_len;
-				skb_shinfo(skb)->nr_frags = npages;
-				for (i = 0; i < npages; i++) {
-					struct page *page;
-					skb_frag_t *frag;
-
-					page = alloc_pages(sk->sk_allocation, 0);
-					if (!page) {
-						err = -ENOBUFS;
-						skb_shinfo(skb)->nr_frags = i;
-						kfree_skb(skb);
-						goto failure;
-					}
-
-					frag = &skb_shinfo(skb)->frags[i];
-					frag->page = page;
-					frag->page_offset = 0;
-					frag->size = (data_len >= PAGE_SIZE ?
-						      PAGE_SIZE :
-						      data_len);
-					data_len -= PAGE_SIZE;
-				}
+		if (ub_sock_getwres_other(sk, skb_charge_size(size))) {
+			if (size2 < size) {
+				size = size2;
+				continue;
+			}
+			set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+			err = -EAGAIN;
+			if (!timeo)
+				goto failure;
+			if (signal_pending(current))
+				goto interrupted;
+			timeo = ub_sock_wait_for_space(sk, timeo,
+					skb_charge_size(size));
+			continue;
+		}
 
+		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+			skb = alloc_skb(size, sk->sk_allocation);
+			if (skb)
 				/* Full success... */
 				break;
-			}
+			ub_sock_retwres_other(sk, skb_charge_size(size),
+					SOCK_MIN_UBCSPACE_CH);
 			err = -ENOBUFS;
 			goto failure;
 		}
+		ub_sock_retwres_other(sk,
+				skb_charge_size(size),
+				SOCK_MIN_UBCSPACE_CH);
 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		err = -EAGAIN;
@@ -1009,6 +1007,7 @@ static struct sk_buff *sock_alloc_send_p
 		timeo = sock_wait_for_wmem(sk, timeo);
 	}
 
+	ub_skb_set_charge(skb, sk, skb_charge_size(size), UB_OTHERSOCKBUF);
 	skb_set_owner_w(skb, sk);
 	return skb;
 
@@ -1022,7 +1021,7 @@ failure:
 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 
 				    int noblock, int *errcode)
 {
-	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
+	return sock_alloc_send_skb2(sk, size, size, noblock, errcode);
 }
 
 static void __lock_sock(struct sock *sk)
@@ -1462,7 +1461,8 @@ int proto_register(struct proto *prot, i
 
 	if (alloc_slab) {
 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
-					       SLAB_HWCACHE_ALIGN, NULL, NULL);
+					       SLAB_HWCACHE_ALIGN | SLAB_UBC,
+					       NULL, NULL);
 
 		if (prot->slab == NULL) {
 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
@@ -1478,9 +1478,11 @@ int proto_register(struct proto *prot, i
 				goto out_free_sock_slab;
 
 			sprintf(request_sock_slab_name, mask, prot->name);
-			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
-								 prot->rsk_prot->obj_size, 0,
-								 SLAB_HWCACHE_ALIGN, NULL, NULL);
+			prot->rsk_prot->slab =
+				kmem_cache_create(request_sock_slab_name,
+						prot->rsk_prot->obj_size, 0,
+						SLAB_HWCACHE_ALIGN | SLAB_UBC,
+						NULL, NULL);
 
 			if (prot->rsk_prot->slab == NULL) {
 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
@@ -1501,7 +1503,7 @@ int proto_register(struct proto *prot, i
 			prot->twsk_prot->twsk_slab =
 				kmem_cache_create(timewait_sock_slab_name,
 						  prot->twsk_prot->twsk_obj_size,
-						  0, SLAB_HWCACHE_ALIGN,
+						  0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						  NULL, NULL);
 			if (prot->twsk_prot->twsk_slab == NULL)
 				goto out_free_timewait_sock_slab_name;
diff -uprN linux-2.6.16/net/core/stream.c linux-2.6.16.ovz/net/core/stream.c
--- linux-2.6.16/net/core/stream.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/core/stream.c	2006-07-05 08:34:56.000000000 -0400
@@ -111,8 +111,9 @@ EXPORT_SYMBOL(sk_stream_wait_close);
  * sk_stream_wait_memory - Wait for more memory for a socket
  * @sk: socket to wait for memory
  * @timeo_p: for how long
+ * @amount - amount of memory to wait for (in UB space!)
  */
-int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+int sk_stream_wait_memory(struct sock *sk, long *timeo_p, unsigned long amount)
 {
 	int err = 0;
 	long vm_wait = 0;
@@ -134,8 +135,11 @@ int sk_stream_wait_memory(struct sock *s
 		if (signal_pending(current))
 			goto do_interrupted;
 		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-		if (sk_stream_memory_free(sk) && !vm_wait)
-			break;
+		if (amount == 0) {
+			if (sk_stream_memory_free(sk) && !vm_wait)
+				break;
+		} else
+			ub_sock_sndqueueadd_tcp(sk, amount);
 
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		sk->sk_write_pending++;
@@ -144,6 +148,8 @@ int sk_stream_wait_memory(struct sock *s
 						  sk_stream_memory_free(sk) &&
 						  vm_wait);
 		sk->sk_write_pending--;
+		if (amount > 0)
+			ub_sock_sndqueuedel(sk);
 
 		if (vm_wait) {
 			vm_wait -= current_timeo;
diff -uprN linux-2.6.16/net/dccp/ipv6.c linux-2.6.16.ovz/net/dccp/ipv6.c
--- linux-2.6.16/net/dccp/ipv6.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/dccp/ipv6.c	2006-07-05 08:34:56.000000000 -0400
@@ -872,6 +872,8 @@ static struct sock *dccp_v6_request_recv
 	ip6_dst_store(newsk, dst, NULL);
 	newsk->sk_route_caps = dst->dev->features &
 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	if (!sysctl_tcp_use_sg)
+		newsk->sk_route_caps &= ~NETIF_F_SG;
 
 	newdp6 = (struct dccp6_sock *)newsk;
 	newinet = inet_sk(newsk);
diff -uprN linux-2.6.16/net/ipv4/af_inet.c linux-2.6.16.ovz/net/ipv4/af_inet.c
--- linux-2.6.16/net/ipv4/af_inet.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/af_inet.c	2006-07-05 08:34:56.000000000 -0400
@@ -114,6 +114,7 @@
 #ifdef CONFIG_IP_MROUTE
 #include <linux/mroute.h>
 #endif
+#include <ub/ub_net.h>
 
 DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
 
@@ -298,6 +299,13 @@ lookup_protocol:
 	if (sk == NULL)
 		goto out;
 
+	err = -ENOBUFS;
+	if (ub_sock_charge(sk, PF_INET, sock->type))
+		goto out_sk_free;
+	/* if charge was successful, sock_init_data() MUST be called to
+	 * set sk->sk_type. otherwise sk will be uncharged to wrong resource
+	 */
+
 	err = 0;
 	sk->sk_no_check = answer_no_check;
 	if (INET_PROTOSW_REUSE & answer_flags)
@@ -355,6 +363,9 @@ out:
 out_rcu_unlock:
 	rcu_read_unlock();
 	goto out;
+out_sk_free:
+	sk_free(sk);
+	return err;
 }
 
 
@@ -369,6 +380,9 @@ int inet_release(struct socket *sock)
 
 	if (sk) {
 		long timeout;
+		struct ve_struct *saved_env;
+
+		saved_env = set_exec_env(VE_OWNER_SK(sk));
 
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
@@ -386,6 +400,8 @@ int inet_release(struct socket *sock)
 			timeout = sk->sk_lingertime;
 		sock->sk = NULL;
 		sk->sk_prot->close(sk, timeout);
+
+		(void)set_exec_env(saved_env);
 	}
 	return 0;
 }
@@ -1108,20 +1124,20 @@ static struct net_protocol icmp_protocol
 
 static int __init init_ipv4_mibs(void)
 {
-	net_statistics[0] = alloc_percpu(struct linux_mib);
-	net_statistics[1] = alloc_percpu(struct linux_mib);
-	ip_statistics[0] = alloc_percpu(struct ipstats_mib);
-	ip_statistics[1] = alloc_percpu(struct ipstats_mib);
-	icmp_statistics[0] = alloc_percpu(struct icmp_mib);
-	icmp_statistics[1] = alloc_percpu(struct icmp_mib);
-	tcp_statistics[0] = alloc_percpu(struct tcp_mib);
-	tcp_statistics[1] = alloc_percpu(struct tcp_mib);
-	udp_statistics[0] = alloc_percpu(struct udp_mib);
-	udp_statistics[1] = alloc_percpu(struct udp_mib);
+	ve_net_statistics[0] = alloc_percpu(struct linux_mib);
+	ve_net_statistics[1] = alloc_percpu(struct linux_mib);
+	ve_ip_statistics[0] = alloc_percpu(struct ipstats_mib);
+	ve_ip_statistics[1] = alloc_percpu(struct ipstats_mib);
+	ve_icmp_statistics[0] = alloc_percpu(struct icmp_mib);
+	ve_icmp_statistics[1] = alloc_percpu(struct icmp_mib);
+	ve_tcp_statistics[0] = alloc_percpu(struct tcp_mib);
+	ve_tcp_statistics[1] = alloc_percpu(struct tcp_mib);
+	ve_udp_statistics[0] = alloc_percpu(struct udp_mib);
+	ve_udp_statistics[1] = alloc_percpu(struct udp_mib);
 	if (!
-	    (net_statistics[0] && net_statistics[1] && ip_statistics[0]
-	     && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
-	     && udp_statistics[0] && udp_statistics[1]))
+	    (ve_net_statistics[0] && ve_net_statistics[1] && ve_ip_statistics[0]
+	     && ve_ip_statistics[1] && ve_tcp_statistics[0] && ve_tcp_statistics[1]
+	     && ve_udp_statistics[0] && ve_udp_statistics[1]))
 		return -ENOMEM;
 
 	(void) tcp_mib_init();
diff -uprN linux-2.6.16/net/ipv4/arp.c linux-2.6.16.ovz/net/ipv4/arp.c
--- linux-2.6.16/net/ipv4/arp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/arp.c	2006-07-05 08:34:56.000000000 -0400
@@ -175,7 +175,7 @@ struct neigh_ops arp_broken_ops = {
 	.queue_xmit =		dev_queue_xmit,
 };
 
-struct neigh_table arp_tbl = {
+struct neigh_table global_arp_tbl = {
 	.family =	AF_INET,
 	.entry_size =	sizeof(struct neighbour) + 4,
 	.key_len =	4,
@@ -184,7 +184,7 @@ struct neigh_table arp_tbl = {
 	.proxy_redo =	parp_redo,
 	.id =		"arp_cache",
 	.parms = {
-		.tbl =			&arp_tbl,
+		.tbl =			&global_arp_tbl,
 		.base_reachable_time =	30 * HZ,
 		.retrans_time =	1 * HZ,
 		.gc_staletime =	60 * HZ,
@@ -920,6 +920,9 @@ out:
 
 static void parp_redo(struct sk_buff *skb)
 {
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_NETFILTER_DEBUG)
+	skb->nf_debug = 0;
+#endif
 	arp_process(skb);
 }
 
@@ -988,7 +991,7 @@ static int arp_req_set(struct arpreq *r,
 			return 0;
 		}
 		if (dev == NULL) {
-			ipv4_devconf.proxy_arp = 1;
+			ve_ipv4_devconf.proxy_arp = 1;
 			return 0;
 		}
 		if (__in_dev_get_rtnl(dev)) {
@@ -1094,7 +1097,7 @@ static int arp_req_delete(struct arpreq 
 			return pneigh_delete(&arp_tbl, &ip, dev);
 		if (mask == 0) {
 			if (dev == NULL) {
-				ipv4_devconf.proxy_arp = 0;
+				ve_ipv4_devconf.proxy_arp = 0;
 				return 0;
 			}
 			if (__in_dev_get_rtnl(dev)) {
@@ -1240,7 +1243,9 @@ static int arp_proc_init(void);
 
 void __init arp_init(void)
 {
-	neigh_table_init(&arp_tbl);
+	get_ve0()->ve_arp_tbl = &global_arp_tbl;
+	if (neigh_table_init(&arp_tbl))
+		panic("cannot initialize ARP tables\n");
 
 	dev_add_pack(&arp_packet_type);
 	arp_proc_init();
@@ -1372,8 +1377,9 @@ static int arp_seq_open(struct inode *in
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
-       
+	struct neigh_seq_state *s;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
 		goto out;
 
@@ -1401,7 +1407,7 @@ static struct file_operations arp_seq_fo
 
 static int __init arp_proc_init(void)
 {
-	if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops))
+	if (!proc_glob_fops_create("net/arp", S_IRUGO, &arp_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
@@ -1421,8 +1427,55 @@ EXPORT_SYMBOL(arp_rcv);
 EXPORT_SYMBOL(arp_create);
 EXPORT_SYMBOL(arp_xmit);
 EXPORT_SYMBOL(arp_send);
-EXPORT_SYMBOL(arp_tbl);
+EXPORT_SYMBOL(global_arp_tbl);
 
 #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
 EXPORT_SYMBOL(clip_tbl_hook);
 #endif
+
+int ve_arp_init(struct ve_struct *ve)
+{
+	struct ve_struct *old_env;
+	int err;
+
+	ve->ve_arp_tbl = kmalloc(sizeof(struct neigh_table), GFP_KERNEL);
+	if (ve->ve_arp_tbl == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	*(ve->ve_arp_tbl) = global_arp_tbl;
+	ve->ve_arp_tbl->parms.tbl = ve->ve_arp_tbl;
+	old_env = set_exec_env(ve);
+	err = neigh_table_init(ve->ve_arp_tbl);
+	if (err)
+		goto out_free;
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
+			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+#endif
+	set_exec_env(old_env);
+	err = 0;
+
+out:
+	return err;
+
+out_free:
+	kfree(ve->ve_arp_tbl);
+	ve->ve_arp_tbl = NULL;
+	goto out;
+}
+EXPORT_SYMBOL(ve_arp_init);
+
+void ve_arp_fini(struct ve_struct *ve)
+{
+	if (ve->ve_arp_tbl) {
+#ifdef CONFIG_SYSCTL
+		neigh_sysctl_unregister(&ve->ve_arp_tbl->parms);
+#endif
+		neigh_table_clear(ve->ve_arp_tbl);
+		kfree(ve->ve_arp_tbl);
+		ve->ve_arp_tbl = NULL;
+	}
+}
+EXPORT_SYMBOL(ve_arp_fini);
diff -uprN linux-2.6.16/net/ipv4/devinet.c linux-2.6.16.ovz/net/ipv4/devinet.c
--- linux-2.6.16/net/ipv4/devinet.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/devinet.c	2006-07-05 08:34:56.000000000 -0400
@@ -71,7 +71,7 @@ struct ipv4_devconf ipv4_devconf = {
 	.shared_media =	  1,
 };
 
-static struct ipv4_devconf ipv4_devconf_dflt = {
+struct ipv4_devconf ipv4_devconf_dflt = {
 	.accept_redirects =  1,
 	.send_redirects =    1,
 	.secure_redirects =  1,
@@ -79,10 +79,16 @@ static struct ipv4_devconf ipv4_devconf_
 	.accept_source_route = 1,
 };
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_ipv4_devconf_dflt	(*(get_exec_env()->_ipv4_devconf_dflt))
+#else
+#define ve_ipv4_devconf_dflt	ipv4_devconf_dflt
+#endif
+
 static void rtmsg_ifa(int event, struct in_ifaddr *);
 
 static struct notifier_block *inetaddr_chain;
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy);
 #ifdef CONFIG_SYSCTL
 static void devinet_sysctl_register(struct in_device *in_dev,
@@ -92,7 +98,7 @@ static void devinet_sysctl_unregister(st
 
 /* Locks all the inet devices. */
 
-static struct in_ifaddr *inet_alloc_ifa(void)
+struct in_ifaddr *inet_alloc_ifa(void)
 {
 	struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
 
@@ -103,6 +109,7 @@ static struct in_ifaddr *inet_alloc_ifa(
 
 	return ifa;
 }
+EXPORT_SYMBOL_GPL(inet_alloc_ifa);
 
 static void inet_rcu_free_ifa(struct rcu_head *head)
 {
@@ -175,6 +182,7 @@ out_kfree:
 	in_dev = NULL;
 	goto out;
 }
+EXPORT_SYMBOL_GPL(inetdev_init);
 
 static void in_dev_rcu_put(struct rcu_head *head)
 {
@@ -190,7 +198,7 @@ static void inetdev_destroy(struct in_de
 	ASSERT_RTNL();
 
 	dev = in_dev->dev;
-	if (dev == &loopback_dev)
+	if (dev == &ve0_loopback)
 		return;
 
 	in_dev->dead = 1;
@@ -232,7 +240,7 @@ int inet_addr_onlink(struct in_device *i
 	return 0;
 }
 
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy)
 {
 	struct in_ifaddr *promote = NULL;
@@ -320,7 +328,7 @@ static void inet_del_ifa(struct in_devic
 	}
 }
 
-static int inet_insert_ifa(struct in_ifaddr *ifa)
+int inet_insert_ifa(struct in_ifaddr *ifa)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -370,6 +378,7 @@ static int inet_insert_ifa(struct in_ifa
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(inet_insert_ifa);
 
 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 {
@@ -578,7 +587,7 @@ int devinet_ioctl(unsigned int cmd, void
 
 	case SIOCSIFFLAGS:
 		ret = -EACCES;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_VE_NET_ADMIN))
 			goto out;
 		break;
 	case SIOCSIFADDR:	/* Set interface address (and family) */
@@ -586,7 +595,7 @@ int devinet_ioctl(unsigned int cmd, void
 	case SIOCSIFDSTADDR:	/* Set the destination address */
 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
 		ret = -EACCES;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_VE_NET_ADMIN))
 			goto out;
 		ret = -EINVAL;
 		if (sin->sin_family != AF_INET)
@@ -1163,10 +1172,10 @@ static struct rtnetlink_link inet_rtnetl
 void inet_forward_change(void)
 {
 	struct net_device *dev;
-	int on = ipv4_devconf.forwarding;
+	int on = ve_ipv4_devconf.forwarding;
 
-	ipv4_devconf.accept_redirects = !on;
-	ipv4_devconf_dflt.forwarding = on;
+	ve_ipv4_devconf.accept_redirects = !on;
+	ve_ipv4_devconf_dflt.forwarding = on;
 
 	read_lock(&dev_base_lock);
 	for (dev = dev_base; dev; dev = dev->next) {
@@ -1191,9 +1200,9 @@ static int devinet_sysctl_forward(ctl_ta
 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
 	if (write && *valp != val) {
-		if (valp == &ipv4_devconf.forwarding)
+		if (valp == &ve_ipv4_devconf.forwarding)
 			inet_forward_change();
-		else if (valp != &ipv4_devconf_dflt.forwarding)
+		else if (valp != &ve_ipv4_devconf_dflt.forwarding)
 			rt_cache_flush(0);
 	}
 
@@ -1464,30 +1473,22 @@ static struct devinet_sysctl_table {
 	},
 };
 
-static void devinet_sysctl_register(struct in_device *in_dev,
-				    struct ipv4_devconf *p)
+static struct devinet_sysctl_table *__devinet_sysctl_register(char *dev_name,
+		int ifindex, struct ipv4_devconf *p)
 {
 	int i;
-	struct net_device *dev = in_dev ? in_dev->dev : NULL;
-	struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
-	char *dev_name = NULL;
+	struct devinet_sysctl_table *t;
 
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (!t)
-		return;
+		goto out;
+
 	memcpy(t, &devinet_sysctl, sizeof(*t));
 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
 		t->devinet_vars[i].de = NULL;
 	}
 
-	if (dev) {
-		dev_name = dev->name; 
-		t->devinet_dev[0].ctl_name = dev->ifindex;
-	} else {
-		dev_name = "default";
-		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-	}
-
 	/* 
 	 * Make a copy of dev_name, because '.procname' is regarded as const 
 	 * by sysctl and we wouldn't want anyone to change it under our feet
@@ -1495,8 +1496,9 @@ static void devinet_sysctl_register(stru
 	 */	
 	dev_name = kstrdup(dev_name, GFP_KERNEL);
 	if (!dev_name)
-	    goto free;
+	    goto out_free_table;
 
+	t->devinet_dev[0].ctl_name    = ifindex;
 	t->devinet_dev[0].procname    = dev_name;
 	t->devinet_dev[0].child	      = t->devinet_vars;
 	t->devinet_dev[0].de	      = NULL;
@@ -1509,17 +1511,38 @@ static void devinet_sysctl_register(stru
 
 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
 	if (!t->sysctl_header)
-	    goto free_procname;
+	    goto out_free_procname;
 
-	p->sysctl = t;
-	return;
+	return t;
 
 	/* error path */
- free_procname:
+out_free_procname:
 	kfree(dev_name);
- free:
+out_free_table:
 	kfree(t);
-	return;
+out:
+	printk(KERN_DEBUG "Can't register net/ipv4/conf sysctls.\n");
+	return NULL;
+}
+
+static void devinet_sysctl_register(struct in_device *in_dev,
+				    struct ipv4_devconf *p)
+{
+	struct net_device *dev;
+	char *dev_name;
+	int ifindex;
+
+	dev = in_dev ? in_dev->dev : NULL;
+
+	if (dev) {
+		dev_name = dev->name; 
+		ifindex = dev->ifindex;
+	} else {
+		dev_name = "default";
+		ifindex = NET_PROTO_CONF_DEFAULT;
+	}
+
+	p->sysctl = __devinet_sysctl_register(dev_name, ifindex, p);
 }
 
 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
@@ -1532,7 +1555,170 @@ static void devinet_sysctl_unregister(st
 		kfree(t);
 	}
 }
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+static ctl_table net_sysctl_tables[] = {
+	/* 0: net */
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[2],
+	},
+	{ .ctl_name = 0, },
+	/* 2: net/ipv4 */
+	{
+		.ctl_name	= NET_IPV4,
+		.procname	= "ipv4",
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[4],
+	},
+	{ .ctl_name = 0, },
+	/* 4, 5: net/ipv4/[vars] */
+	{
+		.ctl_name	= NET_IPV4_FORWARD,
+		.procname	= "ip_forward",
+		.data		= &ipv4_devconf.forwarding,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &ipv4_sysctl_forward,
+		.strategy	= &ipv4_sysctl_forward_strategy,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE,
+		.procname	= "route",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[7],
+	},
+	{ .ctl_name = 0 },
+	/* 7: net/ipv4/route/flush */
+	{
+		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
+		.procname	= "flush",
+		.data		= NULL, /* setuped below */
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &ipv4_sysctl_rtcache_flush,
+		.strategy	= &ipv4_sysctl_rtcache_flush_strategy,
+	},
+	{ .ctl_name = 0 },
+};
+
+static int ip_forward_sysctl_register(struct ve_struct *ve,
+		struct ipv4_devconf *p)
+{
+	struct ctl_table_header *hdr;
+	ctl_table *root;
+
+	root = clone_sysctl_template(net_sysctl_tables,
+			sizeof(net_sysctl_tables) / sizeof(ctl_table));
+	if (root == NULL)
+		goto out;
+
+	root[4].data = &p->forwarding;
+	root[7].data = &ipv4_flush_delay;
+
+	hdr = register_sysctl_table(root, 1);
+	if (hdr == NULL)
+		goto out_free;
+
+	ve->forward_header = hdr;
+	ve->forward_table = root;
+	return 0;
+
+out_free:
+	free_sysctl_clone(root);
+out:
+	return -ENOMEM;
+}
+
+static inline void ip_forward_sysctl_unregister(struct ve_struct *ve)
+{
+	unregister_sysctl_table(ve->forward_header);
+	ve->forward_header = NULL;
+}
+
+static inline void ip_forward_sysctl_free(struct ve_struct *ve)
+{
+	free_sysctl_clone(ve->forward_table);
+	ve->forward_table = NULL;
+}
+#endif
+#endif
+
+int devinet_sysctl_init(struct ve_struct *ve)
+{
+	int err = 0;
+#ifdef CONFIG_SYSCTL
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	struct ipv4_devconf *conf, *conf_def;
+
+	err = -ENOMEM;
+
+	conf = kmalloc(sizeof(*conf), GFP_KERNEL);
+	if (!conf)
+		goto err1;
+
+	memcpy(conf, &ipv4_devconf, sizeof(*conf));
+	conf->sysctl = __devinet_sysctl_register("all",
+			NET_PROTO_CONF_ALL, conf);
+	if (!conf->sysctl)
+		goto err2;
+
+	conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
+	if (!conf_def)
+		goto err3;
+
+	memcpy(conf_def, &ipv4_devconf_dflt, sizeof(*conf_def));
+	conf_def->sysctl = __devinet_sysctl_register("default",
+			NET_PROTO_CONF_DEFAULT, conf_def);
+	if (!conf_def->sysctl)
+		goto err4;
+
+	err = ip_forward_sysctl_register(ve, conf);
+	if (err)
+		goto err5;
+
+	ve->_ipv4_devconf = conf;
+	ve->_ipv4_devconf_dflt = conf_def;
+	return 0;
+
+err5:
+	devinet_sysctl_unregister(conf_def);
+err4:
+	kfree(conf_def);
+err3:
+	devinet_sysctl_unregister(conf);
+err2:
+	kfree(conf);
+err1:
 #endif
+#endif
+	return err;
+}
+
+void devinet_sysctl_fini(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSCTL
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	ip_forward_sysctl_unregister(ve);
+	devinet_sysctl_unregister(ve->_ipv4_devconf);
+	devinet_sysctl_unregister(ve->_ipv4_devconf_dflt);
+#endif
+#endif
+}
+
+void devinet_sysctl_free(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSCTL
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	ip_forward_sysctl_free(ve);
+	kfree(ve->_ipv4_devconf);
+	kfree(ve->_ipv4_devconf_dflt);
+#endif
+#endif
+}
 
 void __init devinet_init(void)
 {
@@ -1542,13 +1728,18 @@ void __init devinet_init(void)
 #ifdef CONFIG_SYSCTL
 	devinet_sysctl.sysctl_header =
 		register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
-	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
+	__devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
+			&ipv4_devconf_dflt);
 #endif
 }
 
 EXPORT_SYMBOL(devinet_ioctl);
 EXPORT_SYMBOL(in_dev_finish_destroy);
 EXPORT_SYMBOL(inet_select_addr);
+EXPORT_SYMBOL(inet_del_ifa);
 EXPORT_SYMBOL(inetdev_by_index);
+EXPORT_SYMBOL(devinet_sysctl_init);
+EXPORT_SYMBOL(devinet_sysctl_fini);
+EXPORT_SYMBOL(devinet_sysctl_free);
 EXPORT_SYMBOL(register_inetaddr_notifier);
 EXPORT_SYMBOL(unregister_inetaddr_notifier);
diff -uprN linux-2.6.16/net/ipv4/fib_frontend.c linux-2.6.16.ovz/net/ipv4/fib_frontend.c
--- linux-2.6.16/net/ipv4/fib_frontend.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/fib_frontend.c	2006-07-05 08:34:56.000000000 -0400
@@ -53,14 +53,46 @@
 
 #define RT_TABLE_MIN RT_TABLE_MAIN
 
+#undef ip_fib_local_table
+#undef ip_fib_main_table
 struct fib_table *ip_fib_local_table;
 struct fib_table *ip_fib_main_table;
+void prepare_fib_tables(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->_local_table = ip_fib_local_table;
+	ip_fib_local_table = (struct fib_table *)0x12345678;
+	get_ve0()->_main_table = ip_fib_main_table;
+	ip_fib_main_table = (struct fib_table *)0x12345678;
+#endif
+}
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ip_fib_local_table 	get_exec_env()->_local_table
+#define ip_fib_main_table 	get_exec_env()->_main_table
+#endif
 
 #else
 
 #define RT_TABLE_MIN 1
 
+#undef fib_tables
 struct fib_table *fib_tables[RT_TABLE_MAX+1];
+void prepare_fib_tables(void)
+{
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	int i;
+
+	BUG_ON(sizeof(fib_tables) !=
+		sizeof(((struct ve_struct *)0)->_fib_tables));
+	memcpy(get_ve0()->_fib_tables, fib_tables, sizeof(fib_tables));
+	for (i = 0; i <= RT_TABLE_MAX; i++)
+		fib_tables[i] = (void *)0x12366678;
+#endif
+}
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define fib_tables get_exec_env()->_fib_tables
+#endif
 
 struct fib_table *__fib_new_table(int id)
 {
@@ -250,7 +282,7 @@ int ip_rt_ioctl(unsigned int cmd, void _
 	switch (cmd) {
 	case SIOCADDRT:		/* Add a route */
 	case SIOCDELRT:		/* Delete a route */
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_VE_NET_ADMIN))
 			return -EPERM;
 		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
 			return -EFAULT;
@@ -653,6 +685,7 @@ static struct notifier_block fib_netdev_
 
 void __init ip_fib_init(void)
 {
+	prepare_fib_tables();
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
diff -uprN linux-2.6.16/net/ipv4/fib_hash.c linux-2.6.16.ovz/net/ipv4/fib_hash.c
--- linux-2.6.16/net/ipv4/fib_hash.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/fib_hash.c	2006-07-05 08:34:56.000000000 -0400
@@ -36,6 +36,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
+#include <linux/ve.h>
 
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -73,11 +74,6 @@ struct fn_zone {
  * can be cheaper than memory lookup, so that FZ_* macros are used.
  */
 
-struct fn_hash {
-	struct fn_zone	*fn_zones[33];
-	struct fn_zone	*fn_zone_list;
-};
-
 static inline u32 fn_hash(u32 key, struct fn_zone *fz)
 {
 	u32 h = ntohl(key)>>(32 - fz->fz_order);
@@ -623,7 +619,7 @@ fn_hash_delete(struct fib_table *tb, str
 	return -ESRCH;
 }
 
-static int fn_flush_list(struct fn_zone *fz, int idx)
+static int fn_flush_list(struct fn_zone *fz, int idx, int destroy)
 {
 	struct hlist_head *head = &fz->fz_hash[idx];
 	struct hlist_node *node, *n;
@@ -638,7 +634,9 @@ static int fn_flush_list(struct fn_zone 
 		list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
 			struct fib_info *fi = fa->fa_info;
 
-			if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
+			if (fi == NULL)
+				continue;
+			if (destroy || (fi->fib_flags&RTNH_F_DEAD)) {
 				write_lock_bh(&fib_hash_lock);
 				list_del(&fa->fa_list);
 				if (list_empty(&f->fn_alias)) {
@@ -660,7 +658,7 @@ static int fn_flush_list(struct fn_zone 
 	return found;
 }
 
-static int fn_hash_flush(struct fib_table *tb)
+static int __fn_hash_flush(struct fib_table *tb, int destroy)
 {
 	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
 	struct fn_zone *fz;
@@ -670,11 +668,84 @@ static int fn_hash_flush(struct fib_tabl
 		int i;
 
 		for (i = fz->fz_divisor - 1; i >= 0; i--)
-			found += fn_flush_list(fz, i);
+			found += fn_flush_list(fz, i, destroy);
 	}
 	return found;
 }
 
+static int fn_hash_flush(struct fib_table *tb)
+{
+	return __fn_hash_flush(tb, 0);
+}
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+void fib_hash_destroy(struct fib_table *tb)
+{
+	__fn_hash_flush(tb, 1);
+	kfree(tb);
+}
+
+/*
+ * Initialization of virtualized networking subsystem.
+ */
+int init_ve_route(struct ve_struct *ve)
+{
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	if (fib_rules_create())
+		return -ENOMEM;
+	ve->_fib_tables[RT_TABLE_LOCAL] = fib_hash_init(RT_TABLE_LOCAL);
+	if (!ve->_fib_tables[RT_TABLE_LOCAL])
+		goto out_destroy;
+	ve->_fib_tables[RT_TABLE_MAIN] = fib_hash_init(RT_TABLE_MAIN);
+	if (!ve->_fib_tables[RT_TABLE_MAIN])
+		goto out_destroy_local;
+
+	return 0;
+
+out_destroy_local:
+	fib_hash_destroy(ve->_fib_tables[RT_TABLE_LOCAL]);
+out_destroy:
+	fib_rules_destroy();
+	ve->_local_rule = NULL;
+	return -ENOMEM;
+#else
+	ve->_local_table = fib_hash_init(RT_TABLE_LOCAL);
+	if (!ve->_local_table)
+		return -ENOMEM;
+	ve->_main_table = fib_hash_init(RT_TABLE_MAIN);
+	if (!ve->_main_table) {
+		fib_hash_destroy(ve->_local_table);
+		return -ENOMEM;
+	}
+	return 0;
+#endif
+}
+
+void fini_ve_route(struct ve_struct *ve)
+{
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	int i;
+	for (i=0; i<RT_TABLE_MAX+1; i++)
+	{
+		if (!ve->_fib_tables[i])
+			continue;
+		fib_hash_destroy(ve->_fib_tables[i]);
+	}
+	fib_rules_destroy();
+	ve->_local_rule = NULL;
+#else
+	fib_hash_destroy(ve->_local_table);
+	fib_hash_destroy(ve->_main_table);
+#endif
+	fib_hash_free(ve->_fib_info_hash, ve->_fib_hash_size);
+	fib_hash_free(ve->_fib_info_laddrhash, ve->_fib_hash_size);
+	ve->_fib_info_hash = ve->_fib_info_laddrhash = NULL;
+}
+
+EXPORT_SYMBOL(init_ve_route);
+EXPORT_SYMBOL(fini_ve_route);
+#endif
+
 
 static inline int
 fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
@@ -766,7 +837,7 @@ static int fn_hash_dump(struct fib_table
 	return skb->len;
 }
 
-#ifdef CONFIG_IP_MULTIPLE_TABLES
+#if defined(CONFIG_IP_MULTIPLE_TABLES) || defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
 struct fib_table * fib_hash_init(int id)
 #else
 struct fib_table * __init fib_hash_init(int id)
@@ -1076,13 +1147,13 @@ static struct file_operations fib_seq_fo
 
 int __init fib_proc_init(void)
 {
-	if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
+	if (!proc_glob_fops_create("net/route", S_IRUGO, &fib_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void __init fib_proc_exit(void)
 {
-	proc_net_remove("route");
+	remove_proc_glob_entry("net/route", NULL);
 }
 #endif /* CONFIG_PROC_FS */
diff -uprN linux-2.6.16/net/ipv4/fib_lookup.h linux-2.6.16.ovz/net/ipv4/fib_lookup.h
--- linux-2.6.16/net/ipv4/fib_lookup.h	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/fib_lookup.h	2006-07-05 08:34:56.000000000 -0400
@@ -41,5 +41,6 @@ extern struct fib_alias *fib_find_alias(
 extern int fib_detect_death(struct fib_info *fi, int order,
 			    struct fib_info **last_resort,
 			    int *last_idx, int *dflt);
+void fib_hash_free(struct hlist_head *hash, int bytes);
 
 #endif /* _FIB_LOOKUP_H */
diff -uprN linux-2.6.16/net/ipv4/fib_rules.c linux-2.6.16.ovz/net/ipv4/fib_rules.c
--- linux-2.6.16/net/ipv4/fib_rules.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/fib_rules.c	2006-07-05 08:34:56.000000000 -0400
@@ -39,6 +39,7 @@
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/rtnetlink.h>
 #include <linux/init.h>
 
 #include <net/ip.h>
@@ -99,9 +100,89 @@ static struct fib_rule local_rule = {
 	.r_action =	RTN_UNICAST,
 };
 
-static struct fib_rule *fib_rules = &local_rule;
 static DEFINE_RWLOCK(fib_rules_lock);
 
+void __init prepare_fib_rules(void)
+{
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	get_ve0()->_local_rule = &local_rule;
+	get_ve0()->_fib_rules = &local_rule;
+#endif
+}
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define local_rule (*(get_exec_env()->_local_rule))
+#define fib_rules (get_exec_env()->_fib_rules)
+#else
+static struct fib_rule *fib_rules = &local_rule;
+#endif
+
+#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
+int fib_rules_create()
+{
+	struct fib_rule *default_rule, *main_rule, *loc_rule;
+
+	default_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
+	if (default_rule == NULL)
+		goto out_def;
+	memset(default_rule, 0, sizeof(struct fib_rule));
+	atomic_set(&default_rule->r_clntref, 1);
+	default_rule->r_preference = 0x7FFF;
+	default_rule->r_table = RT_TABLE_DEFAULT;
+	default_rule->r_action = RTN_UNICAST;
+
+	main_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
+	if (main_rule == NULL)
+		goto out_main;
+	memset(main_rule, 0, sizeof(struct fib_rule));
+	atomic_set(&main_rule->r_clntref, 1);
+	main_rule->r_preference = 0x7FFE;
+	main_rule->r_table = RT_TABLE_MAIN;
+	main_rule->r_action = RTN_UNICAST;
+	main_rule->r_next = default_rule;
+
+	loc_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
+	if (loc_rule == NULL)
+		goto out_loc;
+	memset(loc_rule, 0, sizeof(struct fib_rule));
+	atomic_set(&loc_rule->r_clntref, 1);
+	loc_rule->r_preference = 0;
+	loc_rule->r_table = RT_TABLE_LOCAL;
+	loc_rule->r_action = RTN_UNICAST;
+	loc_rule->r_next = main_rule;
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	get_exec_env()->_local_rule = loc_rule;
+	get_exec_env()->_fib_rules = loc_rule;
+#endif
+
+	return 0;
+
+out_loc:
+	kfree(main_rule);
+out_main:
+	kfree(default_rule);
+out_def:
+	return -1;
+}
+
+void fib_rules_destroy()
+{
+	struct fib_rule *r;
+
+	rtnl_lock();
+	write_lock_bh(&fib_rules_lock);
+	while(fib_rules != NULL) {
+		r = fib_rules;
+		fib_rules = fib_rules->r_next;
+		r->r_dead = 1;
+		fib_rule_put(r);
+	}
+	write_unlock_bh(&fib_rules_lock);
+	rtnl_unlock();
+}
+#endif
+
 int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct rtattr **rta = arg;
@@ -435,5 +516,6 @@ int inet_dump_rules(struct sk_buff *skb,
 
 void __init fib_rules_init(void)
 {
+	prepare_fib_rules();
 	register_netdevice_notifier(&fib_rules_notifier);
 }
diff -uprN linux-2.6.16/net/ipv4/fib_semantics.c linux-2.6.16.ovz/net/ipv4/fib_semantics.c
--- linux-2.6.16/net/ipv4/fib_semantics.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/fib_semantics.c	2006-07-05 08:34:56.000000000 -0400
@@ -33,6 +33,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
+#include <linux/ve.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
@@ -56,6 +57,24 @@ static struct hlist_head *fib_info_laddr
 static unsigned int fib_hash_size;
 static unsigned int fib_info_cnt;
 
+void prepare_fib_info(void)
+{
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	get_ve0()->_fib_info_hash = fib_info_hash;
+	get_ve0()->_fib_info_laddrhash = fib_info_laddrhash;
+	get_ve0()->_fib_hash_size = fib_hash_size;
+	get_ve0()->_fib_info_cnt = fib_info_cnt;
+#endif
+}
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define fib_info_hash (get_exec_env()->_fib_info_hash)
+#define fib_info_laddrhash (get_exec_env()->_fib_info_laddrhash)
+#define fib_hash_size (get_exec_env()->_fib_hash_size)
+#define fib_info_cnt (get_exec_env()->_fib_info_cnt)
+#endif
+
+
 #define DEVINDEX_HASHBITS 8
 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
@@ -235,13 +254,15 @@ static struct fib_info *fib_find_info(co
 	return NULL;
 }
 
-static inline unsigned int fib_devindex_hashfn(unsigned int val)
+static inline unsigned int fib_devindex_hashfn(unsigned int val,
+		envid_t veid)
 {
 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
 
 	return (val ^
 		(val >> DEVINDEX_HASHBITS) ^
-		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
+		(val >> (DEVINDEX_HASHBITS * 2)) ^
+		(veid ^ (veid >> 16))) & mask;
 }
 
 /* Check, that the gateway is already configured.
@@ -257,7 +278,7 @@ int ip_fib_check_default(u32 gw, struct 
 
 	read_lock(&fib_info_lock);
 
-	hash = fib_devindex_hashfn(dev->ifindex);
+	hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
 	head = &fib_info_devhash[hash];
 	hlist_for_each_entry(nh, node, head, nh_hash) {
 		if (nh->nh_dev == dev &&
@@ -580,7 +601,7 @@ static struct hlist_head *fib_hash_alloc
 			__get_free_pages(GFP_KERNEL, get_order(bytes));
 }
 
-static void fib_hash_free(struct hlist_head *hash, int bytes)
+void fib_hash_free(struct hlist_head *hash, int bytes)
 {
 	if (!hash)
 		return;
@@ -837,7 +858,8 @@ link_it:
 
 		if (!nh->nh_dev)
 			continue;
-		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
+		hash = fib_devindex_hashfn(nh->nh_dev->ifindex,
+				VEID(nh->nh_dev->owner_env));
 		head = &fib_info_devhash[hash];
 		hlist_add_head(&nh->nh_hash, head);
 	} endfor_nexthops(fi)
@@ -1184,7 +1206,8 @@ int fib_sync_down(u32 local, struct net_
 
 	if (dev) {
 		struct fib_info *prev_fi = NULL;
-		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+		unsigned int hash = fib_devindex_hashfn(dev->ifindex,
+				VEID(dev->owner_env));
 		struct hlist_head *head = &fib_info_devhash[hash];
 		struct hlist_node *node;
 		struct fib_nh *nh;
@@ -1249,7 +1272,7 @@ int fib_sync_up(struct net_device *dev)
 		return 0;
 
 	prev_fi = NULL;
-	hash = fib_devindex_hashfn(dev->ifindex);
+	hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
 	head = &fib_info_devhash[hash];
 	ret = 0;
 
diff -uprN linux-2.6.16/net/ipv4/fib_trie.c linux-2.6.16.ovz/net/ipv4/fib_trie.c
--- linux-2.6.16/net/ipv4/fib_trie.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/fib_trie.c	2006-07-05 08:34:56.000000000 -0400
@@ -314,11 +314,6 @@ static void __leaf_free_rcu(struct rcu_h
 	kfree(container_of(head, struct leaf, rcu));
 }
 
-static inline void free_leaf(struct leaf *leaf)
-{
-	call_rcu(&leaf->rcu, __leaf_free_rcu);
-}
-
 static void __leaf_info_free_rcu(struct rcu_head *head)
 {
 	kfree(container_of(head, struct leaf_info, rcu));
@@ -357,7 +352,12 @@ static void __tnode_free_rcu(struct rcu_
 
 static inline void tnode_free(struct tnode *tn)
 {
-	call_rcu(&tn->rcu, __tnode_free_rcu);
+	if(IS_LEAF(tn)) {
+		struct leaf *l = (struct leaf *) tn;
+		call_rcu_bh(&l->rcu, __leaf_free_rcu);
+	}
+        else
+		call_rcu(&tn->rcu, __tnode_free_rcu);
 }
 
 static struct leaf *leaf_new(void)
diff -uprN linux-2.6.16/net/ipv4/igmp.c linux-2.6.16.ovz/net/ipv4/igmp.c
--- linux-2.6.16/net/ipv4/igmp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/igmp.c	2006-07-05 08:34:56.000000000 -0400
@@ -2262,6 +2262,8 @@ static inline struct ip_mc_list *igmp_mc
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct in_device *in_dev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		in_dev = in_dev_get(state->dev);
 		if (!in_dev)
 			continue;
@@ -2291,6 +2293,8 @@ static struct ip_mc_list *igmp_mc_get_ne
 			state->in_dev = NULL;
 			break;
 		}
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		state->in_dev = in_dev_get(state->dev);
 		if (!state->in_dev)
 			continue;
@@ -2425,6 +2429,8 @@ static inline struct ip_sf_list *igmp_mc
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct in_device *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in_dev_get(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
@@ -2464,6 +2470,8 @@ static struct ip_sf_list *igmp_mcf_get_n
 				state->idev = NULL;
 				goto out;
 			}
+			if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+				continue;
 			state->idev = in_dev_get(state->dev);
 			if (!state->idev)
 				continue;
@@ -2584,8 +2592,8 @@ static struct file_operations igmp_mcf_s
 
 int __init igmp_mc_proc_init(void)
 {
-	proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops);
-	proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+	proc_glob_fops_create("net/igmp", S_IRUGO, &igmp_mc_seq_fops);
+	proc_glob_fops_create("net/mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
 	return 0;
 }
 #endif
diff -uprN linux-2.6.16/net/ipv4/inet_connection_sock.c linux-2.6.16.ovz/net/ipv4/inet_connection_sock.c
--- linux-2.6.16/net/ipv4/inet_connection_sock.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/inet_connection_sock.c	2006-07-05 08:34:56.000000000 -0400
@@ -25,6 +25,9 @@
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
 
+#include <ub/ub_net.h>
+#include <ub/ub_orphan.h>
+
 #ifdef INET_CSK_DEBUG
 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
 EXPORT_SYMBOL(inet_csk_timer_bug_msg);
@@ -48,6 +51,7 @@ int inet_csk_bind_conflict(const struct 
 	sk_for_each_bound(sk2, node, &tb->owners) {
 		if (sk != sk2 &&
 		    !inet_v6_ipv6only(sk2) &&
+		    !ve_accessible_strict(VE_OWNER_SK(sk), VE_OWNER_SK(sk2)) &&
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
@@ -77,7 +81,9 @@ int inet_csk_get_port(struct inet_hashin
 	struct hlist_node *node;
 	struct inet_bind_bucket *tb;
 	int ret;
+	struct ve_struct *env;
 
+	env = VE_OWNER_SK(sk);
 	local_bh_disable();
 	if (!snum) {
 		int low = sysctl_local_port_range[0];
@@ -86,11 +92,15 @@ int inet_csk_get_port(struct inet_hashin
 		int rover = net_random() % (high - low) + low;
 
 		do {
-			head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
+			head = &hashinfo->bhash[inet_bhashfn(rover,
+					hashinfo->bhash_size, VEID(env))];
 			spin_lock(&head->lock);
-			inet_bind_bucket_for_each(tb, node, &head->chain)
+			inet_bind_bucket_for_each(tb, node, &head->chain) {
+				if (!ve_accessible_strict(VE_OWNER_TB(tb),env))
+					continue;
 				if (tb->port == rover)
 					goto next;
+			}
 			break;
 		next:
 			spin_unlock(&head->lock);
@@ -113,11 +123,15 @@ int inet_csk_get_port(struct inet_hashin
 		 */
 		snum = rover;
 	} else {
-		head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
+		head = &hashinfo->bhash[inet_bhashfn(snum,
+				hashinfo->bhash_size, VEID(env))];
 		spin_lock(&head->lock);
-		inet_bind_bucket_for_each(tb, node, &head->chain)
+		inet_bind_bucket_for_each(tb, node, &head->chain) {
+			if (!ve_accessible_strict(VE_OWNER_TB(tb), env))
+				continue;
 			if (tb->port == snum)
 				goto tb_found;
+		}
 	}
 	tb = NULL;
 	goto tb_not_found;
@@ -136,7 +150,7 @@ tb_found:
 	}
 tb_not_found:
 	ret = 1;
-	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
+	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum, env)) == NULL)
 		goto fail_unlock;
 	if (hlist_empty(&tb->owners)) {
 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
@@ -541,7 +555,7 @@ void inet_csk_destroy_sock(struct sock *
 
 	sk_refcnt_debug_release(sk);
 
-	atomic_dec(sk->sk_prot->orphan_count);
+	ub_dec_orphan_count(sk);
 	sock_put(sk);
 }
 
@@ -621,7 +635,7 @@ void inet_csk_listen_stop(struct sock *s
 
 		sock_orphan(child);
 
-		atomic_inc(sk->sk_prot->orphan_count);
+		ub_inc_orphan_count(sk);
 
 		inet_csk_destroy_sock(child);
 
diff -uprN linux-2.6.16/net/ipv4/inet_diag.c linux-2.6.16.ovz/net/ipv4/inet_diag.c
--- linux-2.6.16/net/ipv4/inet_diag.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/inet_diag.c	2006-07-05 08:34:56.000000000 -0400
@@ -673,7 +673,9 @@ static int inet_diag_dump(struct sk_buff
 	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
 	const struct inet_diag_handler *handler;
 	struct inet_hashinfo *hashinfo;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	handler = inet_diag_table[cb->nlh->nlmsg_type];
 	BUG_ON(handler == NULL);
 	hashinfo = handler->idiag_hashinfo;
@@ -694,6 +696,8 @@ static int inet_diag_dump(struct sk_buff
 			sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
 				struct inet_sock *inet = inet_sk(sk);
 
+				if (!ve_accessible(VE_OWNER_SK(sk), ve))
+					continue;
 				if (num < s_num) {
 					num++;
 					continue;
@@ -754,6 +758,8 @@ skip_listen_ht:
 		sk_for_each(sk, node, &head->chain) {
 			struct inet_sock *inet = inet_sk(sk);
 
+			if (!ve_accessible(VE_OWNER_SK(sk), ve))
+				continue;
 			if (num < s_num)
 				goto next_normal;
 			if (!(r->idiag_states & (1 << sk->sk_state)))
@@ -778,6 +784,8 @@ next_normal:
 			inet_twsk_for_each(tw, node,
 				    &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
 
+				if (!ve_accessible_veid(inet_twsk(sk)->tw_owner_env, VEID(ve)))
+					continue;
 				if (num < s_num)
 					goto next_dying;
 				if (r->id.idiag_sport != tw->tw_sport &&
diff -uprN linux-2.6.16/net/ipv4/inet_hashtables.c linux-2.6.16.ovz/net/ipv4/inet_hashtables.c
--- linux-2.6.16/net/ipv4/inet_hashtables.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/inet_hashtables.c	2006-07-05 08:34:56.000000000 -0400
@@ -30,7 +30,8 @@
  */
 struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
 						 struct inet_bind_hashbucket *head,
-						 const unsigned short snum)
+						 const unsigned short snum,
+						 struct ve_struct *ve)
 {
 	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC);
 
@@ -38,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucke
 		tb->port      = snum;
 		tb->fastreuse = 0;
 		INIT_HLIST_HEAD(&tb->owners);
+		SET_VE_OWNER_TB(tb, ve);
 		hlist_add_head(&tb->node, &head->chain);
 	}
 	return tb;
@@ -71,10 +73,13 @@ EXPORT_SYMBOL(inet_bind_hash);
  */
 static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
 {
-	const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
-	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
+	int bhash;
+	struct inet_bind_hashbucket *head;
 	struct inet_bind_bucket *tb;
 
+	bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size,
+			VEID(VE_OWNER_SK(sk)));
+	head = &hashinfo->bhash[bhash];
 	spin_lock(&head->lock);
 	tb = inet_csk(sk)->icsk_bind_hash;
 	__sk_del_bind_node(sk);
@@ -130,7 +135,8 @@ EXPORT_SYMBOL(inet_listen_wlock);
  * wildcarded during the search since they can never be otherwise.
  */
 struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
-				    const unsigned short hnum, const int dif)
+				    const unsigned short hnum, const int dif,
+				    struct ve_struct *env)
 {
 	struct sock *result = NULL, *sk;
 	const struct hlist_node *node;
@@ -139,6 +145,8 @@ struct sock *__inet_lookup_listener(cons
 	sk_for_each(sk, node, head) {
 		const struct inet_sock *inet = inet_sk(sk);
 
+		if (!ve_accessible_strict(VE_OWNER_SK(sk), env))
+			continue;
 		if (inet->num == hnum && !ipv6_only_sock(sk)) {
 			const __u32 rcv_saddr = inet->rcv_saddr;
 			int score = sk->sk_family == PF_INET ? 1 : 0;
@@ -169,7 +177,8 @@ EXPORT_SYMBOL_GPL(__inet_lookup_listener
 /* called with local bh disabled */
 static int __inet_check_established(struct inet_timewait_death_row *death_row,
 				    struct sock *sk, __u16 lport,
-				    struct inet_timewait_sock **twp)
+				    struct inet_timewait_sock **twp,
+				    struct ve_struct *ve)
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_sock *inet = inet_sk(sk);
@@ -178,12 +187,15 @@ static int __inet_check_established(stru
 	int dif = sk->sk_bound_dev_if;
 	INET_ADDR_COOKIE(acookie, saddr, daddr)
 	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	unsigned int hash;
+	struct inet_ehash_bucket *head;
 	struct sock *sk2;
 	const struct hlist_node *node;
 	struct inet_timewait_sock *tw;
 
+	hash = inet_ehashfn(daddr, lport, saddr, inet->dport, VEID(ve));
+	head = inet_ehash_bucket(hinfo, hash);
+
 	prefetch(head->chain.first);
 	write_lock(&head->lock);
 
@@ -191,7 +203,8 @@ static int __inet_check_established(stru
 	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
 		tw = inet_twsk(sk2);
 
-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
+		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr,
+					ports, dif, ve)) {
 			if (twsk_unique(sk, sk2, twp))
 				goto unique;
 			else
@@ -202,7 +215,8 @@ static int __inet_check_established(stru
 
 	/* And established part... */
 	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
+		if (INET_MATCH(sk2, hash, acookie, saddr, daddr,
+					ports, dif, ve))
 			goto not_unique;
 	}
 
@@ -253,7 +267,9 @@ int inet_hash_connect(struct inet_timewa
  	struct inet_bind_hashbucket *head;
  	struct inet_bind_bucket *tb;
 	int ret;
+	struct ve_struct *ve;
 
+	ve = VE_OWNER_SK(sk);
  	if (!snum) {
  		int low = sysctl_local_port_range[0];
  		int high = sysctl_local_port_range[1];
@@ -268,7 +284,8 @@ int inet_hash_connect(struct inet_timewa
  		local_bh_disable();
 		for (i = 1; i <= range; i++) {
 			port = low + (i + offset) % range;
- 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			head = &hinfo->bhash[inet_bhashfn(port,
+					hinfo->bhash_size, VEID(ve))];
  			spin_lock(&head->lock);
 
  			/* Does not bother with rcv_saddr checks,
@@ -282,13 +299,14 @@ int inet_hash_connect(struct inet_timewa
  						goto next_port;
  					if (!__inet_check_established(death_row,
 								      sk, port,
-								      &tw))
+								      &tw, ve))
  						goto ok;
  					goto next_port;
  				}
  			}
 
- 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
+ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+					head, port, ve);
  			if (!tb) {
  				spin_unlock(&head->lock);
  				break;
@@ -323,7 +341,7 @@ ok:
 		goto out;
  	}
 
- 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size, VEID(ve))];
  	tb  = inet_csk(sk)->icsk_bind_hash;
 	spin_lock_bh(&head->lock);
 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
@@ -333,7 +351,7 @@ ok:
 	} else {
 		spin_unlock(&head->lock);
 		/* No definite answer... Walk to established hash table */
-		ret = __inet_check_established(death_row, sk, snum, NULL);
+		ret = __inet_check_established(death_row, sk, snum, NULL, ve);
 out:
 		local_bh_enable();
 		return ret;
diff -uprN linux-2.6.16/net/ipv4/inet_timewait_sock.c linux-2.6.16.ovz/net/ipv4/inet_timewait_sock.c
--- linux-2.6.16/net/ipv4/inet_timewait_sock.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/inet_timewait_sock.c	2006-07-05 08:34:56.000000000 -0400
@@ -32,7 +32,8 @@ void __inet_twsk_kill(struct inet_timewa
 	write_unlock(&ehead->lock);
 
 	/* Disassociate with bind bucket. */
-	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
+	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num,
+			hashinfo->bhash_size, tw->tw_owner_env)];
 	spin_lock(&bhead->lock);
 	tb = tw->tw_tb;
 	__hlist_del(&tw->tw_bind_node);
@@ -66,7 +67,8 @@ void __inet_twsk_hashdance(struct inet_t
 	   Note, that any socket with inet->num != 0 MUST be bound in
 	   binding cache, even if it is closed.
 	 */
-	bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
+	bhead = &hashinfo->bhash[inet_bhashfn(inet->num,
+			hashinfo->bhash_size, tw->tw_owner_env)];
 	spin_lock(&bhead->lock);
 	tw->tw_tb = icsk->icsk_bind_hash;
 	BUG_TRAP(icsk->icsk_bind_hash);
@@ -90,9 +92,14 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance)
 
 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
 {
-	struct inet_timewait_sock *tw =
-		kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
-				 SLAB_ATOMIC);
+	struct user_beancounter *ub;
+	struct inet_timewait_sock *tw;
+
+	ub = set_exec_ub(sock_bc(sk)->ub);
+	tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+			SLAB_ATOMIC);
+	(void)set_exec_ub(ub);
+
 	if (tw != NULL) {
 		const struct inet_sock *inet = inet_sk(sk);
 
diff -uprN linux-2.6.16/net/ipv4/ip_forward.c linux-2.6.16.ovz/net/ipv4/ip_forward.c
--- linux-2.6.16/net/ipv4/ip_forward.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/ip_forward.c	2006-07-05 08:34:56.000000000 -0400
@@ -87,6 +87,24 @@ int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto sr_failed;
 
+	/*
+	 * We try to optimize forwarding of VE packets:
+	 * do not decrement TTL (and so save skb_cow)
+	 * during forwarding of outgoing pkts from VE.
+	 * For incoming pkts we still do ttl decr,
+	 * since such skb is not cloned and does not require
+	 * actual cow. So, there is at least one place
+	 * in pkts path with mandatory ttl decr, that is
+	 * sufficient to prevent routing loops.
+	 */
+	iph = skb->nh.iph;
+	if (
+#ifdef CONFIG_IP_ROUTE_NAT			
+	    (rt->rt_flags & RTCF_NAT) == 0 &&	  /* no NAT mangling expected */
+#endif						  /* and */
+	    (skb->dev->features & NETIF_F_VENET)) /* src is VENET device */
+		goto no_ttl_decr;
+
 	/* We are about to mangle packet. Copy it! */
 	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
 		goto drop;
@@ -95,6 +113,8 @@ int ip_forward(struct sk_buff *skb)
 	/* Decrease ttl after skb cow done */
 	ip_decrease_ttl(iph);
 
+no_ttl_decr:
+
 	/*
 	 *	We now generate an ICMP HOST REDIRECT giving the route
 	 *	we calculated.
diff -uprN linux-2.6.16/net/ipv4/ip_fragment.c linux-2.6.16.ovz/net/ipv4/ip_fragment.c
--- linux-2.6.16/net/ipv4/ip_fragment.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/ip_fragment.c	2006-07-05 08:34:56.000000000 -0400
@@ -44,6 +44,7 @@
 #include <linux/udp.h>
 #include <linux/inet.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/ve_owner.h>
 
 /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
  * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -97,8 +98,12 @@ struct ipq {
 	int             iif;
 	unsigned int    rid;
 	struct inet_peer *peer;
+	struct ve_struct *owner_env;
 };
 
+DCL_VE_OWNER_PROTO(IPQ, struct ipq, owner_env)
+DCL_VE_OWNER(IPQ, struct ipq, owner_env)
+
 /* Hash table. */
 
 #define IPQ_HASHSZ	64
@@ -182,7 +187,8 @@ static __inline__ void frag_free_queue(s
 
 static __inline__ struct ipq *frag_alloc_queue(void)
 {
-	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+	struct ipq *qp = kmalloc(sizeof(struct ipq) + sizeof(void *),
+				GFP_ATOMIC);
 
 	if(!qp)
 		return NULL;
@@ -278,6 +284,9 @@ static void ip_evictor(void)
 static void ip_expire(unsigned long arg)
 {
 	struct ipq *qp = (struct ipq *) arg;
+	struct ve_struct *envid;
+
+	envid = set_exec_env(VE_OWNER_IPQ(qp));
 
 	spin_lock(&qp->lock);
 
@@ -300,6 +309,8 @@ static void ip_expire(unsigned long arg)
 out:
 	spin_unlock(&qp->lock);
 	ipq_put(qp, NULL);
+
+	(void)set_exec_env(envid);
 }
 
 /* Creation primitives. */
@@ -321,7 +332,8 @@ static struct ipq *ip_frag_intern(unsign
 		   qp->saddr == qp_in->saddr	&&
 		   qp->daddr == qp_in->daddr	&&
 		   qp->protocol == qp_in->protocol &&
-		   qp->user == qp_in->user) {
+		   qp->user == qp_in->user	&&
+		   qp->owner_env == get_exec_env()) {
 			atomic_inc(&qp->refcnt);
 			write_unlock(&ipfrag_lock);
 			qp_in->last_in |= COMPLETE;
@@ -371,6 +383,8 @@ static struct ipq *ip_frag_create(unsign
 	spin_lock_init(&qp->lock);
 	atomic_set(&qp->refcnt, 1);
 
+	SET_VE_OWNER_IPQ(qp, get_exec_env());
+
 	return ip_frag_intern(hash, qp);
 
 out_nomem:
@@ -397,7 +411,8 @@ static inline struct ipq *ip_find(struct
 		   qp->saddr == saddr	&&
 		   qp->daddr == daddr	&&
 		   qp->protocol == protocol &&
-		   qp->user == user) {
+		   qp->user == user	&&
+		   qp->owner_env == get_exec_env()) {
 			atomic_inc(&qp->refcnt);
 			read_unlock(&ipfrag_lock);
 			return qp;
@@ -719,6 +734,9 @@ struct sk_buff *ip_defrag(struct sk_buff
 		    qp->meat == qp->len)
 			ret = ip_frag_reasm(qp, dev);
 
+		if (ret)
+			SET_VE_OWNER_SKB(ret, VE_OWNER_SKB(skb));
+
 		spin_unlock(&qp->lock);
 		ipq_put(qp, NULL);
 		return ret;
@@ -729,6 +747,51 @@ struct sk_buff *ip_defrag(struct sk_buff
 	return NULL;
 }
 
+#ifdef CONFIG_VE
+/* XXX */
+void ip_fragment_cleanup(struct ve_struct *envid)
+{
+	int i, progress;
+
+	/* All operations with fragment queues are performed from NET_RX/TX
+	 * soft interrupts or from timer context.  --Den */
+	local_bh_disable();
+	do {
+		progress = 0;
+		for (i = 0; i < IPQ_HASHSZ; i++) {
+			struct ipq *qp;
+			struct hlist_node *p, *n;
+
+			if (hlist_empty(&ipq_hash[i]))
+				continue;
+inner_restart:
+			read_lock(&ipfrag_lock);
+			hlist_for_each_entry_safe(qp, p, n,
+					&ipq_hash[i], list) {
+				if (!ve_accessible_strict(
+						VE_OWNER_IPQ(qp),
+						envid))
+					continue;
+				atomic_inc(&qp->refcnt);
+				read_unlock(&ipfrag_lock);
+
+				spin_lock(&qp->lock);
+				if (!(qp->last_in&COMPLETE))
+					ipq_kill(qp);
+				spin_unlock(&qp->lock);
+
+				ipq_put(qp, NULL);
+				progress = 1;
+				goto inner_restart;
+			}
+			read_unlock(&ipfrag_lock);
+		}
+	} while(progress);
+	local_bh_enable();
+}
+EXPORT_SYMBOL(ip_fragment_cleanup);
+#endif
+
 void ipfrag_init(void)
 {
 	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
diff -uprN linux-2.6.16/net/ipv4/ip_output.c linux-2.6.16.ovz/net/ipv4/ip_output.c
--- linux-2.6.16/net/ipv4/ip_output.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/ip_output.c	2006-07-05 08:34:56.000000000 -0400
@@ -86,8 +86,6 @@
 
 int sysctl_ip_default_ttl = IPDEFTTL;
 
-static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
-
 /* Generate a checksum for an outgoing IP datagram. */
 __inline__ void ip_send_check(struct iphdr *iph)
 {
@@ -421,7 +419,7 @@ static void ip_copy_metadata(struct sk_b
  *	single device frame, and queue such a frame for sending.
  */
 
-static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 {
 	struct iphdr *iph;
 	int raw = 0;
@@ -673,6 +671,8 @@ fail:
 	return err;
 }
 
+EXPORT_SYMBOL(ip_fragment);
+
 int
 ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 {
@@ -1249,11 +1249,7 @@ int ip_push_pending_frames(struct sock *
 	iph->tos = inet->tos;
 	iph->tot_len = htons(skb->len);
 	iph->frag_off = df;
-	if (!df) {
-		__ip_select_ident(iph, &rt->u.dst, 0);
-	} else {
-		iph->id = htons(inet->id++);
-	}
+	ip_select_ident(iph, &rt->u.dst, sk);
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	iph->saddr = rt->rt_src;
@@ -1340,12 +1336,13 @@ void ip_send_reply(struct sock *sk, stru
 		char			data[40];
 	} replyopts;
 	struct ipcm_cookie ipc;
-	u32 daddr;
+	u32 saddr, daddr;
 	struct rtable *rt = (struct rtable*)skb->dst;
 
 	if (ip_options_echo(&replyopts.opt, skb))
 		return;
 
+	saddr = skb->nh.iph->daddr;
 	daddr = ipc.addr = rt->rt_src;
 	ipc.opt = NULL;
 
@@ -1359,7 +1356,7 @@ void ip_send_reply(struct sock *sk, stru
 	{
 		struct flowi fl = { .nl_u = { .ip4_u =
 					      { .daddr = daddr,
-						.saddr = rt->rt_spec_dst,
+						.saddr = saddr,
 						.tos = RT_TOS(skb->nh.iph->tos) } },
 				    /* Not quite clean, but right. */
 				    .uli_u = { .ports =
diff -uprN linux-2.6.16/net/ipv4/ipmr.c linux-2.6.16.ovz/net/ipv4/ipmr.c
--- linux-2.6.16/net/ipv4/ipmr.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/ipmr.c	2006-07-05 08:34:56.000000000 -0400
@@ -837,7 +837,7 @@ static void mrtsock_destruct(struct sock
 {
 	rtnl_lock();
 	if (sk == mroute_socket) {
-		ipv4_devconf.mc_forwarding--;
+		ve_ipv4_devconf.mc_forwarding--;
 
 		write_lock_bh(&mrt_lock);
 		mroute_socket=NULL;
@@ -888,7 +888,7 @@ int ip_mroute_setsockopt(struct sock *sk
 				mroute_socket=sk;
 				write_unlock_bh(&mrt_lock);
 
-				ipv4_devconf.mc_forwarding++;
+				ve_ipv4_devconf.mc_forwarding++;
 			}
 			rtnl_unlock();
 			return ret;
diff -uprN linux-2.6.16/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.16.ovz/net/ipv4/ipvs/ip_vs_conn.c
--- linux-2.6.16/net/ipv4/ipvs/ip_vs_conn.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/ipvs/ip_vs_conn.c	2006-07-05 08:34:56.000000000 -0400
@@ -902,7 +902,8 @@ int ip_vs_conn_init(void)
 	/* Allocate ip_vs_conn slab cache */
 	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
 					      sizeof(struct ip_vs_conn), 0,
-					      SLAB_HWCACHE_ALIGN, NULL, NULL);
+					      SLAB_HWCACHE_ALIGN | SLAB_UBC,
+					      NULL, NULL);
 	if (!ip_vs_conn_cachep) {
 		vfree(ip_vs_conn_tab);
 		return -ENOMEM;
diff -uprN linux-2.6.16/net/ipv4/ipvs/ip_vs_core.c linux-2.6.16.ovz/net/ipv4/ipvs/ip_vs_core.c
--- linux-2.6.16/net/ipv4/ipvs/ip_vs_core.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/ipvs/ip_vs_core.c	2006-07-05 08:34:56.000000000 -0400
@@ -952,6 +952,10 @@ ip_vs_in(unsigned int hooknum, struct sk
 	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
 	 *	... don't know why 1st test DOES NOT include 2nd (?)
 	 */
+	/*
+	 * VZ: the question above is right.
+	 * The second test is superfluous.
+	 */
 	if (unlikely(skb->pkt_type != PACKET_HOST
 		     || skb->dev == &loopback_dev || skb->sk)) {
 		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
diff -uprN linux-2.6.16/net/ipv4/netfilter/arp_tables.c linux-2.6.16.ovz/net/ipv4/netfilter/arp_tables.c
--- linux-2.6.16/net/ipv4/netfilter/arp_tables.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/arp_tables.c	2006-07-05 08:34:56.000000000 -0400
@@ -941,7 +941,7 @@ static int do_add_counters(void __user *
 
 	write_lock_bh(&t->lock);
 	private = t->private;
-	if (private->number != paddc->num_counters) {
+	if (private->number != tmp.num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_core.c
--- linux-2.6.16/net/ipv4/netfilter/ip_conntrack_core.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_core.c	2006-07-05 08:34:56.000000000 -0400
@@ -49,6 +49,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/listhelp.h>
+#include <ub/ub_mem.h>
 
 #define IP_CONNTRACK_VERSION	"2.4"
 
@@ -60,22 +61,41 @@
 
 DEFINE_RWLOCK(ip_conntrack_lock);
 
-/* ip_conntrack_standalone needs this */
-atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_conntrack_helpers \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_helpers)
+#define ve_ip_conntrack_max \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_max)
+#define ve_ip_conntrack_count \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_count)
+#define ve_ip_conntrack_unconfirmed \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_unconfirmed)
+#else
 
 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
 LIST_HEAD(ip_conntrack_expect_list);
 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 static LIST_HEAD(helpers);
+struct list_head *ip_conntrack_hash;
+static LIST_HEAD(unconfirmed);
+#define ve_ip_conntrack_count 		ip_conntrack_count
+#define ve_ip_conntrack_helpers		helpers
+#define ve_ip_conntrack_max 		ip_conntrack_max
+#define ve_ip_conntrack_unconfirmed 	unconfirmed
+#endif
+
+/* ip_conntrack_standalone needs this */
+atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+
 unsigned int ip_conntrack_htable_size = 0;
 int ip_conntrack_max;
-struct list_head *ip_conntrack_hash;
 static kmem_cache_t *ip_conntrack_cachep __read_mostly;
 static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
 struct ip_conntrack ip_conntrack_untracked;
 unsigned int ip_ct_log_invalid;
-static LIST_HEAD(unconfirmed);
+#ifndef CONFIG_VE_IPTABLES
 static int ip_conntrack_vmalloc;
+#endif
 
 static unsigned int ip_conntrack_next_id = 1;
 static unsigned int ip_conntrack_expect_next_id = 1;
@@ -105,6 +125,9 @@ void ip_ct_deliver_cached_events(const s
 {
 	struct ip_conntrack_ecache *ecache;
 	
+	if (!ve_is_super(get_exec_env()))
+		return;
+
 	local_bh_disable();
 	ecache = &__get_cpu_var(ip_conntrack_ecache);
 	if (ecache->ct == ct)
@@ -133,6 +156,9 @@ static void ip_ct_event_cache_flush(void
 	struct ip_conntrack_ecache *ecache;
 	int cpu;
 
+	if (!ve_is_super(get_exec_env()))
+		return;
+
 	for_each_cpu(cpu) {
 		ecache = &per_cpu(ip_conntrack_ecache, cpu);
 		if (ecache->ct)
@@ -226,7 +252,7 @@ __ip_conntrack_expect_find(const struct 
 {
 	struct ip_conntrack_expect *i;
 	
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
 		if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
 			atomic_inc(&i->use);
 			return i;
@@ -255,7 +281,7 @@ find_expectation(const struct ip_conntra
 {
 	struct ip_conntrack_expect *i;
 
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
 		/* If master is not in hash table yet (ie. packet hasn't left
 		   this machine yet), how can other end know about expected?
 		   Hence these are not the droids you are looking for (if
@@ -284,7 +310,7 @@ void ip_ct_remove_expectations(struct ip
 	if (ct->expecting == 0)
 		return;
 
-	list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_safe(i, tmp, &ve_ip_conntrack_expect_list, list) {
 		if (i->master == ct && del_timer(&i->timeout)) {
 			ip_ct_unlink_expect(i);
 			ip_conntrack_expect_put(i);
@@ -302,8 +328,10 @@ clean_from_lists(struct ip_conntrack *ct
 
 	ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 	hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-	LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-	LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+	LIST_DELETE(&ve_ip_conntrack_hash[ho],
+ 		    &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+	LIST_DELETE(&ve_ip_conntrack_hash[hr],
+ 		    &ct->tuplehash[IP_CT_DIR_REPLY]);
 
 	/* Destroy all pending expectations */
 	ip_ct_remove_expectations(ct);
@@ -329,8 +357,8 @@ destroy_conntrack(struct nf_conntrack *n
 	if (proto && proto->destroy)
 		proto->destroy(ct);
 
-	if (ip_conntrack_destroyed)
-		ip_conntrack_destroyed(ct);
+	if (ve_ip_conntrack_destroyed)
+		ve_ip_conntrack_destroyed(ct);
 
 	write_lock_bh(&ip_conntrack_lock);
 	/* Expectations will have been removed in clean_from_lists,
@@ -358,7 +386,11 @@ destroy_conntrack(struct nf_conntrack *n
 static void death_by_timeout(unsigned long ul_conntrack)
 {
 	struct ip_conntrack *ct = (void *)ul_conntrack;
+#ifdef CONFIG_VE_IPTABLES
+	struct ve_struct *old;
 
+	old = set_exec_env(VE_OWNER_CT(ct));
+#endif
 	write_lock_bh(&ip_conntrack_lock);
 	/* Inside lock so preempt is disabled on module removal path.
 	 * Otherwise we can get spurious warnings. */
@@ -366,6 +398,9 @@ static void death_by_timeout(unsigned lo
 	clean_from_lists(ct);
 	write_unlock_bh(&ip_conntrack_lock);
 	ip_conntrack_put(ct);
+#ifdef CONFIG_VE_IPTABLES
+	(void)set_exec_env(old);
+#endif
 }
 
 static inline int
@@ -386,7 +421,7 @@ __ip_conntrack_find(const struct ip_conn
 	unsigned int hash = hash_conntrack(tuple);
 
 	ASSERT_READ_LOCK(&ip_conntrack_lock);
-	list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
+	list_for_each_entry(h, &ve_ip_conntrack_hash[hash], list) {
 		if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
 			CONNTRACK_STAT_INC(found);
 			return h;
@@ -418,9 +453,9 @@ static void __ip_conntrack_hash_insert(s
 					unsigned int repl_hash) 
 {
 	ct->id = ++ip_conntrack_next_id;
-	list_prepend(&ip_conntrack_hash[hash],
+	list_prepend(&ve_ip_conntrack_hash[hash],
 		     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	list_prepend(&ip_conntrack_hash[repl_hash],
+	list_prepend(&ve_ip_conntrack_hash[repl_hash],
 		     &ct->tuplehash[IP_CT_DIR_REPLY].list);
 }
 
@@ -471,11 +506,11 @@ __ip_conntrack_confirm(struct sk_buff **
 	/* See if there's one in the list already, including reverse:
            NAT could have grabbed it without realizing, since we're
            not in the hash.  If there is, we lost race. */
-	if (!LIST_FIND(&ip_conntrack_hash[hash],
+	if (!LIST_FIND(&ve_ip_conntrack_hash[hash],
 		       conntrack_tuple_cmp,
 		       struct ip_conntrack_tuple_hash *,
 		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
-	    && !LIST_FIND(&ip_conntrack_hash[repl_hash],
+	    && !LIST_FIND(&ve_ip_conntrack_hash[repl_hash],
 			  conntrack_tuple_cmp,
 			  struct ip_conntrack_tuple_hash *,
 			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
@@ -569,7 +604,7 @@ static inline int helper_cmp(const struc
 static struct ip_conntrack_helper *
 __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
 {
-	return LIST_FIND(&helpers, helper_cmp,
+	return LIST_FIND(&ve_ip_conntrack_helpers, helper_cmp,
 			 struct ip_conntrack_helper *,
 			 tuple);
 }
@@ -605,7 +640,7 @@ void ip_conntrack_helper_put(struct ip_c
 struct ip_conntrack_protocol *
 __ip_conntrack_proto_find(u_int8_t protocol)
 {
-	return ip_ct_protos[protocol];
+	return ve_ip_ct_protos[protocol];
 }
 
 /* this is guaranteed to always return a valid protocol helper, since
@@ -632,29 +667,32 @@ void ip_conntrack_proto_put(struct ip_co
 }
 
 struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
-					struct ip_conntrack_tuple *repl)
+		struct ip_conntrack_tuple *repl, struct user_beancounter *ub)
 {
 	struct ip_conntrack *conntrack;
+	struct user_beancounter *old_ub;
 
 	if (!ip_conntrack_hash_rnd_initted) {
 		get_random_bytes(&ip_conntrack_hash_rnd, 4);
 		ip_conntrack_hash_rnd_initted = 1;
 	}
 
-	if (ip_conntrack_max
-	    && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+	if (ve_ip_conntrack_max
+	    && atomic_read(&ve_ip_conntrack_count) >= ve_ip_conntrack_max) {
 		unsigned int hash = hash_conntrack(orig);
 		/* Try dropping from this hash chain. */
-		if (!early_drop(&ip_conntrack_hash[hash])) {
+		if (!early_drop(&ve_ip_conntrack_hash[hash])) {
 			if (net_ratelimit())
-				printk(KERN_WARNING
-				       "ip_conntrack: table full, dropping"
-				       " packet.\n");
+				ve_printk(VE_LOG_BOTH, KERN_WARNING
+				       "ip_conntrack: VPS %d: table full, dropping"
+				       " packet.\n", VEID(get_exec_env()));
 			return ERR_PTR(-ENOMEM);
 		}
 	}
 
+	old_ub = set_exec_ub(ub);
 	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
+	(void)set_exec_ub(old_ub);
 	if (!conntrack) {
 		DEBUGP("Can't allocate conntrack.\n");
 		return ERR_PTR(-ENOMEM);
@@ -669,8 +707,11 @@ struct ip_conntrack *ip_conntrack_alloc(
 	init_timer(&conntrack->timeout);
 	conntrack->timeout.data = (unsigned long)conntrack;
 	conntrack->timeout.function = death_by_timeout;
+#ifdef CONFIG_VE_IPTABLES
+	SET_VE_OWNER_CT(conntrack, get_exec_env());
+#endif
 
-	atomic_inc(&ip_conntrack_count);
+	atomic_inc(&ve_ip_conntrack_count);
 
 	return conntrack;
 }
@@ -678,7 +719,7 @@ struct ip_conntrack *ip_conntrack_alloc(
 void
 ip_conntrack_free(struct ip_conntrack *conntrack)
 {
-	atomic_dec(&ip_conntrack_count);
+	atomic_dec(&ve_ip_conntrack_count);
 	kmem_cache_free(ip_conntrack_cachep, conntrack);
 }
 
@@ -692,13 +733,22 @@ init_conntrack(struct ip_conntrack_tuple
 	struct ip_conntrack *conntrack;
 	struct ip_conntrack_tuple repl_tuple;
 	struct ip_conntrack_expect *exp;
+	struct user_beancounter *ub;
 
 	if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
 		DEBUGP("Can't invert tuple.\n");
 		return NULL;
 	}
 
-	conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
+#ifdef CONFIG_USER_RESOURCE
+	if (skb->dev != NULL)  /* received skb */
+		ub = netdev_bc(skb->dev)->exec_ub;
+	else if (skb->sk != NULL) /* sent skb */
+		ub = sock_bc(skb->sk)->ub;
+	else
+#endif
+		ub = NULL;
+	conntrack = ip_conntrack_alloc(tuple, &repl_tuple, ub);
 	if (conntrack == NULL || IS_ERR(conntrack))
 		return (struct ip_conntrack_tuple_hash *)conntrack;
 
@@ -733,7 +783,8 @@ init_conntrack(struct ip_conntrack_tuple
 	}
 
 	/* Overload tuple linked list to put us in unconfirmed list. */
-	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
+	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list,
+			&ve_ip_conntrack_unconfirmed);
 
 	write_unlock_bh(&ip_conntrack_lock);
 
@@ -925,7 +976,7 @@ void ip_conntrack_unexpect_related(struc
 
 	write_lock_bh(&ip_conntrack_lock);
 	/* choose the the oldest expectation to evict */
-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
 		if (expect_matches(i, exp) && del_timer(&i->timeout)) {
 			ip_ct_unlink_expect(i);
 			write_unlock_bh(&ip_conntrack_lock);
@@ -959,11 +1010,11 @@ void ip_conntrack_expect_put(struct ip_c
 		kmem_cache_free(ip_conntrack_expect_cachep, exp);
 }
 
-static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
+void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
 {
 	atomic_inc(&exp->use);
 	exp->master->expecting++;
-	list_add(&exp->list, &ip_conntrack_expect_list);
+	list_add(&exp->list, &ve_ip_conntrack_expect_list);
 
 	init_timer(&exp->timeout);
 	exp->timeout.data = (unsigned long)exp;
@@ -975,13 +1026,14 @@ static void ip_conntrack_expect_insert(s
 	atomic_inc(&exp->use);
 	CONNTRACK_STAT_INC(expect_create);
 }
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_insert);
 
 /* Race with expectations being used means we could have none to find; OK. */
 static void evict_oldest_expect(struct ip_conntrack *master)
 {
 	struct ip_conntrack_expect *i;
 
-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
 		if (i->master == master) {
 			if (del_timer(&i->timeout)) {
 				ip_ct_unlink_expect(i);
@@ -1012,7 +1064,7 @@ int ip_conntrack_expect_related(struct i
 	DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
 
 	write_lock_bh(&ip_conntrack_lock);
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
 		if (expect_matches(i, expect)) {
 			/* Refresh timer: if it's dying, ignore.. */
 			if (refresh_timer(i)) {
@@ -1060,18 +1112,48 @@ int ip_conntrack_helper_register(struct 
 {
 	BUG_ON(me->timeout == 0);
 	write_lock_bh(&ip_conntrack_lock);
-	list_prepend(&helpers, me);
+	list_prepend(&ve_ip_conntrack_helpers, me);
 	write_unlock_bh(&ip_conntrack_lock);
 
 	return 0;
 }
 
+int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *me)
+{
+	int ret;
+	struct module *mod = me->me;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct ip_conntrack_helper *tmp;
+		__module_get(mod);
+		ret = -ENOMEM;
+		tmp = kmalloc(sizeof(struct ip_conntrack_helper), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, me, sizeof(struct ip_conntrack_helper));
+		me = tmp;
+	}
+
+	ret = ip_conntrack_helper_register(me);
+	if (ret)
+		goto out;
+
+	return 0;
+out:
+	if (!ve_is_super(get_exec_env())){
+		kfree(me);
+nomem:
+		module_put(mod);
+	}
+	return ret;
+}
+
 struct ip_conntrack_helper *
 __ip_conntrack_helper_find_byname(const char *name)
 {
 	struct ip_conntrack_helper *h;
 
-	list_for_each_entry(h, &helpers, list) {
+	list_for_each_entry(h, &ve_ip_conntrack_helpers, list) {
 		if (!strcmp(h->name, name))
 			return h;
 	}
@@ -1096,19 +1178,20 @@ void ip_conntrack_helper_unregister(stru
 
 	/* Need write lock here, to delete helper. */
 	write_lock_bh(&ip_conntrack_lock);
-	LIST_DELETE(&helpers, me);
+	LIST_DELETE(&ve_ip_conntrack_helpers, me);
 
 	/* Get rid of expectations */
-	list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list, list) {
 		if (exp->master->helper == me && del_timer(&exp->timeout)) {
 			ip_ct_unlink_expect(exp);
 			ip_conntrack_expect_put(exp);
 		}
 	}
 	/* Get rid of expecteds, set helpers to NULL. */
-	LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
+	LIST_FIND_W(&ve_ip_conntrack_unconfirmed, unhelp,
+			struct ip_conntrack_tuple_hash*, me);
 	for (i = 0; i < ip_conntrack_htable_size; i++)
-		LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
+		LIST_FIND_W(&ve_ip_conntrack_hash[i], unhelp,
 			    struct ip_conntrack_tuple_hash *, me);
 	write_unlock_bh(&ip_conntrack_lock);
 
@@ -1116,6 +1199,25 @@ void ip_conntrack_helper_unregister(stru
 	synchronize_net();
 }
 
+void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
+{
+
+	if (!ve_is_super(get_exec_env())) {
+		read_lock_bh(&ip_conntrack_lock);
+		me = list_named_find(&ve_ip_conntrack_helpers, me->name);
+		read_unlock_bh(&ip_conntrack_lock);
+		if (!me)
+			return;
+	}
+
+	ip_conntrack_helper_unregister(me);
+
+	if (!ve_is_super(get_exec_env())) {
+		module_put(me->me);
+		kfree(me);
+	}
+}
+
 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
 void __ip_ct_refresh_acct(struct ip_conntrack *ct, 
 		        enum ip_conntrack_info ctinfo,
@@ -1246,13 +1348,13 @@ get_next_corpse(int (*iter)(struct ip_co
 
 	write_lock_bh(&ip_conntrack_lock);
 	for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
-		h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
+		h = LIST_FIND_W(&ve_ip_conntrack_hash[*bucket], do_iter,
 				struct ip_conntrack_tuple_hash *, iter, data);
 		if (h)
 			break;
 	}
 	if (!h)
-		h = LIST_FIND_W(&unconfirmed, do_iter,
+		h = LIST_FIND_W(&ve_ip_conntrack_unconfirmed, do_iter,
 				struct ip_conntrack_tuple_hash *, iter, data);
 	if (h)
 		atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
@@ -1289,6 +1391,11 @@ getorigdst(struct sock *sk, int optval, 
 	struct ip_conntrack_tuple_hash *h;
 	struct ip_conntrack_tuple tuple;
 	
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_ip_conntrack)
+		return -ENOPROTOOPT;
+#endif
+
 	IP_CT_TUPLE_U_BLANK(&tuple);
 	tuple.src.ip = inet->rcv_saddr;
 	tuple.src.u.tcp.port = inet->sport;
@@ -1318,6 +1425,7 @@ getorigdst(struct sock *sk, int optval, 
 			.tuple.dst.u.tcp.port;
 		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
 			.tuple.dst.ip;
+		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
 
 		DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
 		       NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
@@ -1359,12 +1467,17 @@ static void free_conntrack_hash(struct l
 			   get_order(sizeof(struct list_head) * size));
 }
 
+static void ip_conntrack_cache_free(void)
+{
+	kmem_cache_destroy(ip_conntrack_expect_cachep);
+	kmem_cache_destroy(ip_conntrack_cachep);
+	nf_unregister_sockopt(&so_getorigdst);
+}
+
 /* Mishearing the voices in his head, our hero wonders how he's
    supposed to kill the mall. */
 void ip_conntrack_cleanup(void)
 {
-	ip_ct_attach = NULL;
-
 	/* This makes sure all current packets have passed through
            netfilter framework.  Roll on, two-stage module
            delete... */
@@ -1373,19 +1486,32 @@ void ip_conntrack_cleanup(void)
 	ip_ct_event_cache_flush();
  i_see_dead_people:
 	ip_conntrack_flush();
-	if (atomic_read(&ip_conntrack_count) != 0) {
+	if (atomic_read(&ve_ip_conntrack_count) != 0) {
 		schedule();
 		goto i_see_dead_people;
 	}
-	/* wait until all references to ip_conntrack_untracked are dropped */
-	while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
-		schedule();
-
-	kmem_cache_destroy(ip_conntrack_cachep);
-	kmem_cache_destroy(ip_conntrack_expect_cachep);
-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+	if (ve_is_super(get_exec_env())) {
+		/* wait until all references to ip_conntrack_untracked are
+		 * dropped */
+		while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
+			schedule();
+		ip_ct_attach = NULL;
+		ip_conntrack_cache_free();
+	}
+	free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
 			    ip_conntrack_htable_size);
-	nf_unregister_sockopt(&so_getorigdst);
+	ve_ip_conntrack_hash = NULL;		    
+	INIT_LIST_HEAD(&ve_ip_conntrack_unconfirmed);
+	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
+	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
+	atomic_set(&ve_ip_conntrack_count, 0);
+	ve_ip_conntrack_max = 0;
+#ifdef CONFIG_VE_IPTABLES
+	kfree(ve_ip_ct_protos);
+	ve_ip_ct_protos = NULL;
+	kfree(get_exec_env()->_ip_conntrack);
+	get_exec_env()->_ip_conntrack = NULL;
+#endif
 }
 
 static struct list_head *alloc_hashtable(int size, int *vmalloced)
@@ -1394,13 +1520,13 @@ static struct list_head *alloc_hashtable
 	unsigned int i;
 
 	*vmalloced = 0; 
-	hash = (void*)__get_free_pages(GFP_KERNEL, 
+	hash = (void*)__get_free_pages(GFP_KERNEL_UBC,
 				       get_order(sizeof(struct list_head)
 						 * size));
 	if (!hash) { 
 		*vmalloced = 1;
 		printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
-		hash = vmalloc(sizeof(struct list_head) * size);
+		hash = ub_vmalloc(sizeof(struct list_head) * size);
 	}
 
 	if (hash)
@@ -1436,8 +1562,8 @@ static int set_hashsize(const char *val,
 
 	write_lock_bh(&ip_conntrack_lock);
 	for (i = 0; i < ip_conntrack_htable_size; i++) {
-		while (!list_empty(&ip_conntrack_hash[i])) {
-			h = list_entry(ip_conntrack_hash[i].next,
+		while (!list_empty(&ve_ip_conntrack_hash[i])) {
+			h = list_entry(ve_ip_conntrack_hash[i].next,
 				       struct ip_conntrack_tuple_hash, list);
 			list_del(&h->list);
 			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
@@ -1445,12 +1571,12 @@ static int set_hashsize(const char *val,
 		}
 	}
 	old_size = ip_conntrack_htable_size;
-	old_vmalloced = ip_conntrack_vmalloc;
-	old_hash = ip_conntrack_hash;
+	old_vmalloced = ve_ip_conntrack_vmalloc;
+	old_hash = ve_ip_conntrack_hash;
 
 	ip_conntrack_htable_size = hashsize;
-	ip_conntrack_vmalloc = vmalloced;
-	ip_conntrack_hash = hash;
+	ve_ip_conntrack_vmalloc = vmalloced;
+	ve_ip_conntrack_hash = hash;
 	ip_conntrack_hash_rnd = rnd;
 	write_unlock_bh(&ip_conntrack_lock);
 
@@ -1461,9 +1587,8 @@ static int set_hashsize(const char *val,
 module_param_call(hashsize, set_hashsize, param_get_uint,
 		  &ip_conntrack_htable_size, 0600);
 
-int __init ip_conntrack_init(void)
+static int ip_conntrack_cache_create(void)
 {
-	unsigned int i;
 	int ret;
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
@@ -1477,70 +1602,127 @@ int __init ip_conntrack_init(void)
 		if (ip_conntrack_htable_size < 16)
 			ip_conntrack_htable_size = 16;
 	}
-	ip_conntrack_max = 8 * ip_conntrack_htable_size;
+	ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
 
 	printk("ip_conntrack version %s (%u buckets, %d max)"
 	       " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
-	       ip_conntrack_htable_size, ip_conntrack_max,
+	       ip_conntrack_htable_size, ve_ip_conntrack_max,
 	       sizeof(struct ip_conntrack));
 
 	ret = nf_register_sockopt(&so_getorigdst);
 	if (ret != 0) {
 		printk(KERN_ERR "Unable to register netfilter socket option\n");
-		return ret;
-	}
-
-	ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
-					    &ip_conntrack_vmalloc);
-	if (!ip_conntrack_hash) {
-		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
-		goto err_unreg_sockopt;
+		goto out_sockopt;
 	}
 
+	ret = -ENOMEM;
 	ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
 	                                        sizeof(struct ip_conntrack), 0,
-	                                        0, NULL, NULL);
+	                                        SLAB_UBC, NULL, NULL);
 	if (!ip_conntrack_cachep) {
 		printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
-		goto err_free_hash;
+		goto err_unreg_sockopt;
 	}
 
 	ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
 					sizeof(struct ip_conntrack_expect),
-					0, 0, NULL, NULL);
+					0, SLAB_UBC, NULL, NULL);
 	if (!ip_conntrack_expect_cachep) {
 		printk(KERN_ERR "Unable to create ip_expect slab cache\n");
 		goto err_free_conntrack_slab;
 	}
 
+	return 0;
+
+err_free_conntrack_slab:
+	kmem_cache_destroy(ip_conntrack_cachep);
+err_unreg_sockopt:
+	nf_unregister_sockopt(&so_getorigdst);
+out_sockopt:
+	return ret;
+}
+
+int ip_conntrack_init(void)
+{
+	struct ve_struct *env;
+	unsigned int i;
+	int ret;
+
+	env = get_exec_env();
+#ifdef CONFIG_VE_IPTABLES
+	ret = -ENOMEM;
+	env->_ip_conntrack =
+		kmalloc(sizeof(struct ve_ip_conntrack), GFP_KERNEL);
+	if (!env->_ip_conntrack)
+		goto out;
+	memset(env->_ip_conntrack, 0, sizeof(struct ve_ip_conntrack));
+	if (ve_is_super(env)) {
+		ret = ip_conntrack_cache_create();
+		if (ret)
+			goto cache_fail;
+	} else
+		ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
+#else /* CONFIG_VE_IPTABLES */
+	ret = ip_conntrack_cache_create();
+	if (ret)
+		goto out;
+#endif
+
+	ret = -ENOMEM;
+	ve_ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
+					    &ve_ip_conntrack_vmalloc);
+	if (!ve_ip_conntrack_hash) {
+		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
+		goto err_free_cache;
+	}
+
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_ct_protos = (struct ip_conntrack_protocol **)
+		ub_kmalloc(sizeof(void *)*MAX_IP_CT_PROTO, GFP_KERNEL);
+	if (!ve_ip_ct_protos)
+		goto err_free_hash;
+#endif
 	/* Don't NEED lock here, but good form anyway. */
 	write_lock_bh(&ip_conntrack_lock);
 	for (i = 0; i < MAX_IP_CT_PROTO; i++)
-		ip_ct_protos[i] = &ip_conntrack_generic_protocol;
+		ve_ip_ct_protos[i] = &ip_conntrack_generic_protocol;
 	/* Sew in builtin protocols. */
-	ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
-	ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
-	ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
+	ve_ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
+	ve_ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
+	ve_ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
 	write_unlock_bh(&ip_conntrack_lock);
 
-	/* For use by ipt_REJECT */
-	ip_ct_attach = ip_conntrack_attach;
-
-	/* Set up fake conntrack:
-	    - to never be deleted, not in any hashes */
-	atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
-	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
+	INIT_LIST_HEAD(&ve_ip_conntrack_unconfirmed);
+	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
+	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
+
+	if (ve_is_super(env)) {
+		/* For use by ipt_REJECT */
+		ip_ct_attach = ip_conntrack_attach;
+
+		/* Set up fake conntrack:
+		    - to never be deleted, not in any hashes */
+		atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
+		/*  - and look it like as a confirmed connection */
+		set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
+	}
 
-	return ret;
+	return 0;
 
-err_free_conntrack_slab:
-	kmem_cache_destroy(ip_conntrack_cachep);
+#ifdef CONFIG_VE_IPTABLES
 err_free_hash:
-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+#endif
+	free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
 			    ip_conntrack_htable_size);
-err_unreg_sockopt:
-	nf_unregister_sockopt(&so_getorigdst);
-
-	return -ENOMEM;
+	ve_ip_conntrack_hash = NULL;		    
+err_free_cache:
+	if (ve_is_super(env))
+		ip_conntrack_cache_free();
+#ifdef CONFIG_VE_IPTABLES
+cache_fail:
+	kfree(env->_ip_conntrack);
+	env->_ip_conntrack = NULL;
+#endif
+out:
+	return ret;
 }
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_conntrack_ftp.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_ftp.c
--- linux-2.6.16/net/ipv4/netfilter/ip_conntrack_ftp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_ftp.c	2006-07-05 08:34:56.000000000 -0400
@@ -15,6 +15,7 @@
 #include <linux/ctype.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
@@ -425,8 +426,8 @@ static int help(struct sk_buff **pskb,
 
 	/* Now, NAT might want to mangle the packet, and register the
 	 * (possibly changed) expectation itself. */
-	if (ip_nat_ftp_hook)
-		ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
+	if (ve_ip_nat_ftp_hook)
+		ret = ve_ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
 				      matchoff, matchlen, exp, &seq);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
@@ -452,16 +453,39 @@ out_update_nl:
 static struct ip_conntrack_helper ftp[MAX_PORTS];
 static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
 
-/* Not __exit: called from init() */
-static void fini(void)
+void fini_iptable_ftp(void)
 {
 	int i;
 	for (i = 0; i < ports_c; i++) {
 		DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
 				ports[i]);
-		ip_conntrack_helper_unregister(&ftp[i]);
+		virt_ip_conntrack_helper_unregister(&ftp[i]);
 	}
+}
+
+int init_iptable_ftp(void)
+{
+	int i, ret;
 
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("ip_ct_ftp: registering helper for port %d\n",
+				ports[i]);
+		ret = virt_ip_conntrack_helper_register(&ftp[i]);
+		if (ret) {
+			fini_iptable_ftp();
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/* Not __exit: called from init() */
+static void fini(void)
+{
+	KSYMMODUNRESOLVE(ip_conntrack_ftp);
+	KSYMUNRESOLVE(init_iptable_ftp);
+	KSYMUNRESOLVE(fini_iptable_ftp);
+	fini_iptable_ftp();
 	kfree(ftp_buffer);
 }
 
@@ -496,13 +520,17 @@ static int __init init(void)
 
 		DEBUGP("ip_ct_ftp: registering helper for port %d\n", 
 				ports[i]);
-		ret = ip_conntrack_helper_register(&ftp[i]);
+		ret = virt_ip_conntrack_helper_register(&ftp[i]);
 
 		if (ret) {
 			fini();
 			return ret;
 		}
 	}
+
+	KSYMRESOLVE(init_iptable_ftp);
+	KSYMRESOLVE(fini_iptable_ftp);
+	KSYMMODRESOLVE(ip_conntrack_ftp);
 	return 0;
 }
 
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_conntrack_irc.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_irc.c
--- linux-2.6.16/net/ipv4/netfilter/ip_conntrack_irc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_irc.c	2006-07-05 08:34:56.000000000 -0400
@@ -28,6 +28,7 @@
 #include <linux/ip.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
@@ -244,6 +245,33 @@ static char irc_names[MAX_PORTS][sizeof(
 
 static void fini(void);
 
+void fini_iptable_irc(void)
+{
+	int i;
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("unregistering port %d\n",
+		       ports[i]);
+		virt_ip_conntrack_helper_unregister(&irc_helpers[i]);
+	}
+}
+
+int init_iptable_irc(void)
+{
+	int i, ret;
+
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("port #%d: %d\n", i, ports[i]);
+		ret = virt_ip_conntrack_helper_register(&irc_helpers[i]);
+		if (ret) {
+			printk("ip_conntrack_irc: ERROR registering port %d\n",
+				ports[i]);
+			fini_iptable_irc();
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
 static int __init init(void)
 {
 	int i, ret;
@@ -283,7 +311,7 @@ static int __init init(void)
 
 		DEBUGP("port #%d: %d\n", i, ports[i]);
 
-		ret = ip_conntrack_helper_register(hlpr);
+		ret = virt_ip_conntrack_helper_register(hlpr);
 
 		if (ret) {
 			printk("ip_conntrack_irc: ERROR registering port %d\n",
@@ -292,6 +320,10 @@ static int __init init(void)
 			return -EBUSY;
 		}
 	}
+
+	KSYMRESOLVE(init_iptable_irc);
+	KSYMRESOLVE(fini_iptable_irc);
+	KSYMMODRESOLVE(ip_conntrack_irc);
 	return 0;
 }
 
@@ -299,12 +331,10 @@ static int __init init(void)
  * it is needed by the init function */
 static void fini(void)
 {
-	int i;
-	for (i = 0; i < ports_c; i++) {
-		DEBUGP("unregistering port %d\n",
-		       ports[i]);
-		ip_conntrack_helper_unregister(&irc_helpers[i]);
-	}
+	KSYMMODUNRESOLVE(ip_conntrack_irc);
+	KSYMUNRESOLVE(init_iptable_irc);
+	KSYMUNRESOLVE(fini_iptable_irc);
+	fini_iptable_irc();
 	kfree(irc_buffer);
 }
 
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_conntrack_netlink.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_netlink.c
--- linux-2.6.16/net/ipv4/netfilter/ip_conntrack_netlink.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_netlink.c	2006-07-05 08:34:56.000000000 -0400
@@ -29,6 +29,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
+#include <net/sock.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
@@ -39,6 +40,8 @@
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
+#include <ub/beancounter.h>
+#include <ub/ub_sk.h>
 
 MODULE_LICENSE("GPL");
 
@@ -403,7 +406,7 @@ ctnetlink_dump_table(struct sk_buff *skb
 
 	read_lock_bh(&ip_conntrack_lock);
 	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
+		list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
 			h = (struct ip_conntrack_tuple_hash *) i;
 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
@@ -440,7 +443,7 @@ ctnetlink_dump_table_w(struct sk_buff *s
 
 	write_lock_bh(&ip_conntrack_lock);
 	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
+		list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
 			h = (struct ip_conntrack_tuple_hash *) i;
 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
@@ -1003,14 +1006,15 @@ ctnetlink_change_conntrack(struct ip_con
 static int
 ctnetlink_create_conntrack(struct nfattr *cda[], 
 			   struct ip_conntrack_tuple *otuple,
-			   struct ip_conntrack_tuple *rtuple)
+			   struct ip_conntrack_tuple *rtuple,
+			   struct user_beancounter *ub)
 {
 	struct ip_conntrack *ct;
 	int err = -EINVAL;
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
-	ct = ip_conntrack_alloc(otuple, rtuple);
+	ct = ip_conntrack_alloc(otuple, rtuple, ub);
 	if (ct == NULL || IS_ERR(ct))
 		return -ENOMEM;	
 
@@ -1087,8 +1091,16 @@ ctnetlink_new_conntrack(struct sock *ctn
 		write_unlock_bh(&ip_conntrack_lock);
 		DEBUGP("no such conntrack, create new\n");
 		err = -ENOENT;
-		if (nlh->nlmsg_flags & NLM_F_CREATE)
-			err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
+		if (nlh->nlmsg_flags & NLM_F_CREATE) {
+#ifdef CONFIG_USER_RESOURCE
+			if (skb->sk)
+				err = ctnetlink_create_conntrack(cda, &otuple,
+						&rtuple, sock_bc(skb->sk)->ub);
+			else
+#endif
+				err = ctnetlink_create_conntrack(cda,
+						&otuple, &rtuple, NULL);
+		}
 		return err;
 	}
 	/* implicit 'else' */
@@ -1249,7 +1261,7 @@ ctnetlink_exp_dump_table(struct sk_buff 
 	DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
 
 	read_lock_bh(&ip_conntrack_lock);
-	list_for_each_prev(i, &ip_conntrack_expect_list) {
+	list_for_each_prev(i, &ve_ip_conntrack_expect_list) {
 		exp = (struct ip_conntrack_expect *) i;
 		if (exp->id <= *id)
 			continue;
@@ -1395,7 +1407,7 @@ ctnetlink_del_expect(struct sock *ctnl, 
 			write_unlock_bh(&ip_conntrack_lock);
 			return -EINVAL;
 		}
-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
+		list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
 					 list) {
 			if (exp->master->helper == h 
 			    && del_timer(&exp->timeout)) {
@@ -1407,7 +1419,7 @@ ctnetlink_del_expect(struct sock *ctnl, 
 	} else {
 		/* This basically means we have to flush everything*/
 		write_lock_bh(&ip_conntrack_lock);
-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
+		list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
 					 list) {
 			if (del_timer(&exp->timeout)) {
 				ip_ct_unlink_expect(exp);
@@ -1619,7 +1631,7 @@ static void __exit ctnetlink_exit(void)
 	printk("ctnetlink: unregistering from nfnetlink.\n");
 
 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-	ip_conntrack_unregister_notifier(&ctnl_notifier_exp);
+	ip_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
 	ip_conntrack_unregister_notifier(&ctnl_notifier);
 #endif
 
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_conntrack_proto_generic.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_proto_generic.c
--- linux-2.6.16/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2006-07-05 08:34:56.000000000 -0400
@@ -52,7 +52,7 @@ static int packet(struct ip_conntrack *c
 		  const struct sk_buff *skb,
 		  enum ip_conntrack_info ctinfo)
 {
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
+	ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_generic_timeout);
 	return NF_ACCEPT;
 }
 
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_conntrack_proto_icmp.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
--- linux-2.6.16/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2006-07-05 08:34:56.000000000 -0400
@@ -104,7 +104,7 @@ static int icmp_packet(struct ip_conntra
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
 		ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
-		ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
+		ip_ct_refresh_acct(ct, ctinfo, skb, ve_ip_ct_icmp_timeout);
 	}
 
 	return NF_ACCEPT;
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_conntrack_proto_sctp.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
--- linux-2.6.16/net/ipv4/netfilter/ip_conntrack_proto_sctp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_proto_sctp.c	2006-07-05 08:34:56.000000000 -0400
@@ -235,12 +235,15 @@ static int do_basic_checks(struct ip_con
 			flag = 1;
 		}
 
-		/* Cookie Ack/Echo chunks not the first OR 
-		   Init / Init Ack / Shutdown compl chunks not the only chunks */
-		if ((sch->type == SCTP_CID_COOKIE_ACK 
+		/*
+		 * Cookie Ack/Echo chunks not the first OR
+		 * Init / Init Ack / Shutdown compl chunks not the only chunks
+		 * OR zero-length.
+		 */
+		if (((sch->type == SCTP_CID_COOKIE_ACK
 			|| sch->type == SCTP_CID_COOKIE_ECHO
 			|| flag)
-		     && count !=0 ) {
+		      && count !=0) || !sch->length) {
 			DEBUGP("Basic checks failed\n");
 			return 1;
 		}
@@ -251,7 +254,7 @@ static int do_basic_checks(struct ip_con
 	}
 
 	DEBUGP("Basic checks passed\n");
-	return 0;
+	return count == 0;
 }
 
 static int new_state(enum ip_conntrack_dir dir,
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
--- linux-2.6.16/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2006-07-05 08:34:56.000000000 -0400
@@ -98,7 +98,7 @@ unsigned int ip_ct_tcp_timeout_close =  
    to ~13-30min depending on RTO. */
 unsigned int ip_ct_tcp_timeout_max_retrans =     5 MINS;
  
-static const unsigned int * tcp_timeouts[]
+const unsigned int * tcp_timeouts[]
 = { NULL,                              /*      TCP_CONNTRACK_NONE */
     &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
     &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
@@ -762,7 +762,7 @@ static int tcp_in_window(struct ip_ct_tc
 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
 			: "SEQ is over the upper bound (over the window of the receiver)");
 
-		res = ip_ct_tcp_be_liberal;
+		res = ve_ip_ct_tcp_be_liberal;
   	}
   
 	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
@@ -1033,9 +1033,11 @@ static int tcp_packet(struct ip_conntrac
 	    && (new_state == TCP_CONNTRACK_FIN_WAIT
 	    	|| new_state == TCP_CONNTRACK_CLOSE))
 		conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
-	timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
-		  && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
-		  ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
+	timeout = conntrack->proto.tcp.retrans >= ve_ip_ct_tcp_max_retrans &&
+		ve_ip_ct_tcp_timeouts[new_state] >
+					ve_ip_ct_tcp_timeout_max_retrans
+		? ve_ip_ct_tcp_timeout_max_retrans :
+					ve_ip_ct_tcp_timeouts[new_state];
 	write_unlock_bh(&tcp_lock);
 
 	ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
@@ -1110,7 +1112,7 @@ static int tcp_new(struct ip_conntrack *
 		conntrack->proto.tcp.seen[1].flags = 0;
 		conntrack->proto.tcp.seen[0].loose = 
 		conntrack->proto.tcp.seen[1].loose = 0;
-	} else if (ip_ct_tcp_loose == 0) {
+	} else if (ve_ip_ct_tcp_loose == 0) {
 		/* Don't try to pick up connections. */
 		return 0;
 	} else {
@@ -1134,7 +1136,7 @@ static int tcp_new(struct ip_conntrack *
 		conntrack->proto.tcp.seen[0].flags =
 		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
 		conntrack->proto.tcp.seen[0].loose = 
-		conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
+		conntrack->proto.tcp.seen[1].loose = ve_ip_ct_tcp_loose;
 	}
     
 	conntrack->proto.tcp.seen[1].td_end = 0;
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_conntrack_proto_udp.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_proto_udp.c
--- linux-2.6.16/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2006-07-05 08:34:56.000000000 -0400
@@ -71,12 +71,12 @@ static int udp_packet(struct ip_conntrac
 	   stream.  Extend timeout. */
 	if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
 		ip_ct_refresh_acct(conntrack, ctinfo, skb, 
-				   ip_ct_udp_timeout_stream);
+				   ve_ip_ct_udp_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
 			ip_conntrack_event_cache(IPCT_STATUS, skb);
 	} else
-		ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
+		ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_udp_timeout);
 
 	return NF_ACCEPT;
 }
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_standalone.c
--- linux-2.6.16/net/ipv4/netfilter/ip_conntrack_standalone.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_conntrack_standalone.c	2006-07-05 08:34:56.000000000 -0400
@@ -28,6 +28,7 @@
 #include <net/checksum.h>
 #include <net/ip.h>
 #include <net/route.h>
+#include <linux/nfcalls.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -46,9 +47,31 @@
 
 MODULE_LICENSE("GPL");
 
+int ip_conntrack_disable_ve0 = 0;
+module_param(ip_conntrack_disable_ve0, int, 0440);
+
 extern atomic_t ip_conntrack_count;
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_conntrack_count \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_count)
+#else
+#define ve_ip_conntrack_count	ip_conntrack_count
+#endif
 DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
 
+/* Prior to 2.6.15, we had a ip_conntrack_enable_ve0 param. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+	printk(KERN_INFO KBUILD_MODNAME
+	       ": parameter ip_conntrack_enable_ve0 is obsoleted. In ovzkernel"
+	       " >= 2.6.15 connection tracking on hardware node is enabled by "
+	       "default, use ip_conntrack_disable_ve0=1 parameter to "
+	       "disable.\n");
+	return 0;
+}
+module_param_call(ip_conntrack_enable_ve0, warn_set, NULL, NULL, 0);
+
 static int kill_proto(struct ip_conntrack *i, void *data)
 {
 	return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == 
@@ -89,8 +112,8 @@ static struct list_head *ct_get_first(st
 	for (st->bucket = 0;
 	     st->bucket < ip_conntrack_htable_size;
 	     st->bucket++) {
-		if (!list_empty(&ip_conntrack_hash[st->bucket]))
-			return ip_conntrack_hash[st->bucket].next;
+		if (!list_empty(&ve_ip_conntrack_hash[st->bucket]))
+			return ve_ip_conntrack_hash[st->bucket].next;
 	}
 	return NULL;
 }
@@ -100,10 +123,10 @@ static struct list_head *ct_get_next(str
 	struct ct_iter_state *st = seq->private;
 
 	head = head->next;
-	while (head == &ip_conntrack_hash[st->bucket]) {
+	while (head == &ve_ip_conntrack_hash[st->bucket]) {
 		if (++st->bucket >= ip_conntrack_htable_size)
 			return NULL;
-		head = ip_conntrack_hash[st->bucket].next;
+		head = ve_ip_conntrack_hash[st->bucket].next;
 	}
 	return head;
 }
@@ -234,7 +257,7 @@ static struct file_operations ct_file_op
 /* expects */
 static void *exp_seq_start(struct seq_file *s, loff_t *pos)
 {
-	struct list_head *e = &ip_conntrack_expect_list;
+	struct list_head *e = &ve_ip_conntrack_expect_list;
 	loff_t i;
 
 	/* strange seq_file api calls stop even if we fail,
@@ -246,7 +269,7 @@ static void *exp_seq_start(struct seq_fi
 
 	for (i = 0; i <= *pos; i++) {
 		e = e->next;
-		if (e == &ip_conntrack_expect_list)
+		if (e == &ve_ip_conntrack_expect_list)
 			return NULL;
 	}
 	return e;
@@ -259,7 +282,7 @@ static void *exp_seq_next(struct seq_fil
 	++*pos;
 	e = e->next;
 
-	if (e == &ip_conntrack_expect_list)
+	if (e == &ve_ip_conntrack_expect_list)
 		return NULL;
 
 	return e;
@@ -344,7 +367,7 @@ static void ct_cpu_seq_stop(struct seq_f
 
 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 {
-	unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
+	unsigned int nr_conntracks = atomic_read(&ve_ip_conntrack_count);
 	struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
@@ -541,6 +564,28 @@ static struct nf_hook_ops ip_conntrack_l
 
 /* From ip_conntrack_core.c */
 extern int ip_conntrack_max;
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_conntrack_max \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_max)
+#define ve_ip_ct_sysctl_header \
+	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_header)
+#define ve_ip_ct_net_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_net_table)
+#define ve_ip_ct_ipv4_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_ipv4_table)
+#define ve_ip_ct_netfilter_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_netfilter_table)
+#define ve_ip_ct_sysctl_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_table)
+#else
+#define ve_ip_conntrack_max		ip_conntrack_max
+static struct ctl_table_header *ip_ct_sysctl_header;
+#define ve_ip_ct_sysctl_header		ip_ct_sysctl_header
+#define ve_ip_ct_net_table		ip_ct_net_table
+#define ve_ip_ct_ipv4_table		ip_ct_ipv4_table
+#define ve_ip_ct_netfilter_table	ip_ct_netfilter_table
+#define ve_ip_ct_sysctl_table		ip_ct_sysctl_table
+#endif
 extern unsigned int ip_conntrack_htable_size;
 
 /* From ip_conntrack_proto_tcp.c */
@@ -571,8 +616,6 @@ extern unsigned int ip_ct_generic_timeou
 static int log_invalid_proto_min = 0;
 static int log_invalid_proto_max = 255;
 
-static struct ctl_table_header *ip_ct_sysctl_header;
-
 static ctl_table ip_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_NF_CONNTRACK_MAX,
@@ -781,6 +824,112 @@ static ctl_table ip_ct_net_table[] = {
 };
 
 EXPORT_SYMBOL(ip_ct_log_invalid);
+
+#ifdef CONFIG_VE_IPTABLES
+static void ip_conntrack_sysctl_cleanup(void)
+{
+	if (!ve_is_super(get_exec_env())) {
+		kfree(ve_ip_ct_net_table);
+		kfree(ve_ip_ct_ipv4_table);
+		kfree(ve_ip_ct_netfilter_table);
+		kfree(ve_ip_ct_sysctl_table);
+	}
+	ve_ip_ct_net_table = NULL;
+	ve_ip_ct_ipv4_table = NULL;
+	ve_ip_ct_netfilter_table = NULL;
+	ve_ip_ct_sysctl_table = NULL;
+}
+
+#define ALLOC_ENVCTL(field,k,label) \
+		if ( !(field = kmalloc(k*sizeof(ctl_table), GFP_KERNEL)) ) \
+				goto label;
+static int ip_conntrack_sysctl_init(void)
+{
+	int i, ret = 0;
+
+	ret = -ENOMEM;
+	if (ve_is_super(get_exec_env())) {
+		ve_ip_ct_net_table = ip_ct_net_table;
+		ve_ip_ct_ipv4_table = ip_ct_ipv4_table;
+		ve_ip_ct_netfilter_table = ip_ct_netfilter_table;
+		ve_ip_ct_sysctl_table = ip_ct_sysctl_table;
+	} else {
+		/* allocate structures in ve_struct */
+		ALLOC_ENVCTL(ve_ip_ct_net_table, 2, out);
+		ALLOC_ENVCTL(ve_ip_ct_ipv4_table, 2, nomem_1);
+		ALLOC_ENVCTL(ve_ip_ct_netfilter_table, 3, nomem_2);
+		ALLOC_ENVCTL(ve_ip_ct_sysctl_table, 21, nomem_3);
+
+		memcpy(ve_ip_ct_net_table, ip_ct_net_table,
+				2*sizeof(ctl_table));
+		memcpy(ve_ip_ct_ipv4_table, ip_ct_ipv4_table,
+				2*sizeof(ctl_table));
+		memcpy(ve_ip_ct_netfilter_table, ip_ct_netfilter_table,
+				3*sizeof(ctl_table));
+		memcpy(ve_ip_ct_sysctl_table, ip_ct_sysctl_table,
+				21*sizeof(ctl_table));
+
+		ve_ip_ct_net_table[0].child = ve_ip_ct_ipv4_table;
+		ve_ip_ct_ipv4_table[0].child = ve_ip_ct_netfilter_table;
+		ve_ip_ct_netfilter_table[0].child = ve_ip_ct_sysctl_table;
+	}
+	ve_ip_ct_sysctl_table[0].data = &ve_ip_conntrack_max;
+	ve_ip_ct_netfilter_table[1].data = &ve_ip_conntrack_max;
+	ve_ip_ct_sysctl_table[1].data = &ve_ip_conntrack_count;
+	/* skip ve_ip_ct_sysctl_table[2].data as it is read-only and common
+	 * for all environments */
+	ve_ip_ct_tcp_timeouts[1] = ip_ct_tcp_timeout_syn_sent;
+	ve_ip_ct_sysctl_table[3].data = &ve_ip_ct_tcp_timeouts[1];
+	ve_ip_ct_tcp_timeouts[2] = ip_ct_tcp_timeout_syn_recv;
+	ve_ip_ct_sysctl_table[4].data = &ve_ip_ct_tcp_timeouts[2];
+	ve_ip_ct_tcp_timeouts[3] = ip_ct_tcp_timeout_established;
+	ve_ip_ct_sysctl_table[5].data = &ve_ip_ct_tcp_timeouts[3];
+	ve_ip_ct_tcp_timeouts[4] = ip_ct_tcp_timeout_fin_wait;
+	ve_ip_ct_sysctl_table[6].data = &ve_ip_ct_tcp_timeouts[4];
+	ve_ip_ct_tcp_timeouts[5] = ip_ct_tcp_timeout_close_wait;
+	ve_ip_ct_sysctl_table[7].data = &ve_ip_ct_tcp_timeouts[5];
+	ve_ip_ct_tcp_timeouts[6] = ip_ct_tcp_timeout_last_ack;
+	ve_ip_ct_sysctl_table[8].data = &ve_ip_ct_tcp_timeouts[6];
+	ve_ip_ct_tcp_timeouts[7] = ip_ct_tcp_timeout_time_wait;
+	ve_ip_ct_sysctl_table[9].data = &ve_ip_ct_tcp_timeouts[7];
+	ve_ip_ct_tcp_timeouts[8] = ip_ct_tcp_timeout_close;
+	ve_ip_ct_sysctl_table[10].data = &ve_ip_ct_tcp_timeouts[8];
+	ve_ip_ct_udp_timeout = ip_ct_udp_timeout;
+	ve_ip_ct_sysctl_table[11].data = &ve_ip_ct_udp_timeout;
+	ve_ip_ct_udp_timeout_stream = ip_ct_udp_timeout_stream;
+	ve_ip_ct_sysctl_table[12].data = &ve_ip_ct_udp_timeout_stream;
+	ve_ip_ct_icmp_timeout = ip_ct_icmp_timeout;
+	ve_ip_ct_sysctl_table[13].data = &ve_ip_ct_icmp_timeout;
+	ve_ip_ct_generic_timeout = ip_ct_generic_timeout;
+	ve_ip_ct_sysctl_table[14].data = &ve_ip_ct_generic_timeout;
+	ve_ip_ct_log_invalid = ip_ct_log_invalid;
+	ve_ip_ct_sysctl_table[15].data = &ve_ip_ct_log_invalid;
+	ve_ip_ct_tcp_timeout_max_retrans = ip_ct_tcp_timeout_max_retrans;
+	ve_ip_ct_sysctl_table[16].data = &ve_ip_ct_tcp_timeout_max_retrans;
+	ve_ip_ct_tcp_loose = ip_ct_tcp_loose;
+	ve_ip_ct_sysctl_table[17].data = &ve_ip_ct_tcp_loose;
+	ve_ip_ct_tcp_be_liberal = ip_ct_tcp_be_liberal;
+	ve_ip_ct_sysctl_table[18].data = &ve_ip_ct_tcp_be_liberal;
+	ve_ip_ct_tcp_max_retrans = ip_ct_tcp_max_retrans;
+	ve_ip_ct_sysctl_table[19].data = &ve_ip_ct_tcp_max_retrans;
+	for (i = 0; i < 20; i++)
+		ve_ip_ct_sysctl_table[i].owner_env = get_exec_env();
+	ve_ip_ct_netfilter_table[1].owner_env = get_exec_env();
+	return 0;
+
+nomem_3:
+	kfree(ve_ip_ct_netfilter_table);
+	ve_ip_ct_netfilter_table = NULL;
+nomem_2:
+	kfree(ve_ip_ct_ipv4_table);
+	ve_ip_ct_ipv4_table = NULL;
+nomem_1:
+	kfree(ve_ip_ct_net_table);
+	ve_ip_ct_net_table = NULL;
+out:
+	return ret;
+}
+#endif /*CONFIG_VE*/
 #endif /* CONFIG_SYSCTL */
 
 static int init_or_cleanup(int init)
@@ -792,9 +941,16 @@ static int init_or_cleanup(int init)
 
 	if (!init) goto cleanup;
 
+	ret = -ENOENT;
+	if (!ve_is_super(get_exec_env()))
+		__module_get(THIS_MODULE);
+
 	ret = ip_conntrack_init();
 	if (ret < 0)
-		goto cleanup_nothing;
+		goto cleanup_unget;
+
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		return 0;
 
 #ifdef CONFIG_PROC_FS
 	ret = -ENOMEM;
@@ -804,98 +960,115 @@ static int init_or_cleanup(int init)
 	proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
 					&exp_file_ops);
 	if (!proc_exp) goto cleanup_proc;
+	proc_exp->proc_fops = &exp_file_ops;
 
-	proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
-	if (!proc_stat)
-		goto cleanup_proc_exp;
+	if (ve_is_super(get_exec_env())) {
+		proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
+		if (!proc_stat)
+			goto cleanup_proc_exp;
 
-	proc_stat->proc_fops = &ct_cpu_seq_fops;
-	proc_stat->owner = THIS_MODULE;
+		proc_stat->proc_fops = &ct_cpu_seq_fops;
+		proc_stat->owner = THIS_MODULE;
+	}
 #endif
 
-	ret = nf_register_hook(&ip_conntrack_defrag_ops);
+	ret = virt_nf_register_hook(&ip_conntrack_defrag_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register pre-routing defrag hook.\n");
 		goto cleanup_proc_stat;
 	}
-	ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
+	ret = virt_nf_register_hook(&ip_conntrack_defrag_local_out_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register local_out defrag hook.\n");
 		goto cleanup_defragops;
 	}
-	ret = nf_register_hook(&ip_conntrack_in_ops);
+	ret = virt_nf_register_hook(&ip_conntrack_in_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register pre-routing hook.\n");
 		goto cleanup_defraglocalops;
 	}
-	ret = nf_register_hook(&ip_conntrack_local_out_ops);
+	ret = virt_nf_register_hook(&ip_conntrack_local_out_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register local out hook.\n");
 		goto cleanup_inops;
 	}
-	ret = nf_register_hook(&ip_conntrack_helper_in_ops);
+	ret = virt_nf_register_hook(&ip_conntrack_helper_in_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register local in helper hook.\n");
 		goto cleanup_inandlocalops;
 	}
-	ret = nf_register_hook(&ip_conntrack_helper_out_ops);
+	ret = virt_nf_register_hook(&ip_conntrack_helper_out_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register postrouting helper hook.\n");
 		goto cleanup_helperinops;
 	}
-	ret = nf_register_hook(&ip_conntrack_out_ops);
+	ret = virt_nf_register_hook(&ip_conntrack_out_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register post-routing hook.\n");
 		goto cleanup_helperoutops;
 	}
-	ret = nf_register_hook(&ip_conntrack_local_in_ops);
+	ret = virt_nf_register_hook(&ip_conntrack_local_in_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register local in hook.\n");
 		goto cleanup_inoutandlocalops;
 	}
 #ifdef CONFIG_SYSCTL
-	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
-	if (ip_ct_sysctl_header == NULL) {
+#ifdef CONFIG_VE_IPTABLES
+	ret = ip_conntrack_sysctl_init();
+	if (ret < 0)
+		goto cleanup_sysctl;
+#endif
+	ret = -ENOMEM;
+	ve_ip_ct_sysctl_header = register_sysctl_table(ve_ip_ct_net_table, 0);
+	if (ve_ip_ct_sysctl_header == NULL) {
 		printk("ip_conntrack: can't register to sysctl.\n");
-		ret = -ENOMEM;
-		goto cleanup_localinops;
+		goto cleanup_sysctl2;
 	}
 #endif
 
-	return ret;
+	return 0;
 
  cleanup:
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		goto cleanup_init;
 	synchronize_net();
 #ifdef CONFIG_SYSCTL
- 	unregister_sysctl_table(ip_ct_sysctl_header);
- cleanup_localinops:
+ 	unregister_sysctl_table(ve_ip_ct_sysctl_header);
+ cleanup_sysctl2:
+#ifdef CONFIG_VE_IPTABLES
+	ip_conntrack_sysctl_cleanup();
+ cleanup_sysctl:
+#endif
 #endif
-	nf_unregister_hook(&ip_conntrack_local_in_ops);
+	virt_nf_unregister_hook(&ip_conntrack_local_in_ops);
  cleanup_inoutandlocalops:
-	nf_unregister_hook(&ip_conntrack_out_ops);
+	virt_nf_unregister_hook(&ip_conntrack_out_ops);
  cleanup_helperoutops:
-	nf_unregister_hook(&ip_conntrack_helper_out_ops);
+	virt_nf_unregister_hook(&ip_conntrack_helper_out_ops);
  cleanup_helperinops:
-	nf_unregister_hook(&ip_conntrack_helper_in_ops);
+	virt_nf_unregister_hook(&ip_conntrack_helper_in_ops);
  cleanup_inandlocalops:
-	nf_unregister_hook(&ip_conntrack_local_out_ops);
+	virt_nf_unregister_hook(&ip_conntrack_local_out_ops);
  cleanup_inops:
-	nf_unregister_hook(&ip_conntrack_in_ops);
+	virt_nf_unregister_hook(&ip_conntrack_in_ops);
  cleanup_defraglocalops:
-	nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
+	virt_nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
  cleanup_defragops:
-	nf_unregister_hook(&ip_conntrack_defrag_ops);
+	virt_nf_unregister_hook(&ip_conntrack_defrag_ops);
  cleanup_proc_stat:
 #ifdef CONFIG_PROC_FS
-	remove_proc_entry("ip_conntrack", proc_net_stat);
+	if (ve_is_super(get_exec_env()))
+		remove_proc_entry("ip_conntrack", proc_net_stat);
  cleanup_proc_exp:
 	proc_net_remove("ip_conntrack_expect");
  cleanup_proc:
 	proc_net_remove("ip_conntrack");
- cleanup_init:
 #endif /* CONFIG_PROC_FS */
+ cleanup_init:
 	ip_conntrack_cleanup();
- cleanup_nothing:
+ cleanup_unget:
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
 	return ret;
 }
 
@@ -906,11 +1079,11 @@ int ip_conntrack_protocol_register(struc
 	int ret = 0;
 
 	write_lock_bh(&ip_conntrack_lock);
-	if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
+	if (ve_ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
 		ret = -EBUSY;
 		goto out;
 	}
-	ip_ct_protos[proto->proto] = proto;
+	ve_ip_ct_protos[proto->proto] = proto;
  out:
 	write_unlock_bh(&ip_conntrack_lock);
 	return ret;
@@ -919,7 +1092,7 @@ int ip_conntrack_protocol_register(struc
 void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
 {
 	write_lock_bh(&ip_conntrack_lock);
-	ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
+	ve_ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
 	write_unlock_bh(&ip_conntrack_lock);
 	
 	/* Somebody could be still looking at the proto in bh. */
@@ -929,17 +1102,39 @@ void ip_conntrack_protocol_unregister(st
 	ip_ct_iterate_cleanup(kill_proto, &proto->proto);
 }
 
-static int __init init(void)
+int init_iptable_conntrack(void)
 {
 	return init_or_cleanup(1);
 }
 
-static void __exit fini(void)
+void fini_iptable_conntrack(void)
 {
 	init_or_cleanup(0);
 }
 
-module_init(init);
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_conntrack();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_conntrack);
+	KSYMRESOLVE(fini_iptable_conntrack);
+	KSYMMODRESOLVE(ip_conntrack);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip_conntrack);
+	KSYMUNRESOLVE(init_iptable_conntrack);
+	KSYMUNRESOLVE(fini_iptable_conntrack);
+	fini_iptable_conntrack();
+}
+
+subsys_initcall(init);
 module_exit(fini);
 
 /* Some modules need us, but don't depend directly on any symbol.
@@ -956,15 +1151,20 @@ EXPORT_SYMBOL_GPL(ip_conntrack_unregiste
 EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
 EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
 #endif
+EXPORT_SYMBOL(ip_conntrack_disable_ve0);
 EXPORT_SYMBOL(ip_conntrack_protocol_register);
 EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
 EXPORT_SYMBOL(ip_ct_get_tuple);
 EXPORT_SYMBOL(invert_tuplepr);
 EXPORT_SYMBOL(ip_conntrack_alter_reply);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL(ip_conntrack_destroyed);
+#endif
 EXPORT_SYMBOL(need_conntrack);
 EXPORT_SYMBOL(ip_conntrack_helper_register);
 EXPORT_SYMBOL(ip_conntrack_helper_unregister);
+EXPORT_SYMBOL(virt_ip_conntrack_helper_register);
+EXPORT_SYMBOL(virt_ip_conntrack_helper_unregister);
 EXPORT_SYMBOL(ip_ct_iterate_cleanup);
 EXPORT_SYMBOL(__ip_ct_refresh_acct);
 
@@ -974,14 +1174,18 @@ EXPORT_SYMBOL_GPL(__ip_conntrack_expect_
 EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
 EXPORT_SYMBOL(ip_conntrack_expect_related);
 EXPORT_SYMBOL(ip_conntrack_unexpect_related);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
+#endif
 EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
 
 EXPORT_SYMBOL(ip_conntrack_tuple_taken);
 EXPORT_SYMBOL(ip_ct_gather_frags);
 EXPORT_SYMBOL(ip_conntrack_htable_size);
 EXPORT_SYMBOL(ip_conntrack_lock);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL(ip_conntrack_hash);
+#endif
 EXPORT_SYMBOL(ip_conntrack_untracked);
 EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
 #ifdef CONFIG_IP_NF_NAT_NEEDED
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_nat_core.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_core.c
--- linux-2.6.16/net/ipv4/netfilter/ip_nat_core.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_core.c	2006-07-05 08:34:56.000000000 -0400
@@ -21,6 +21,8 @@
 #include <linux/icmp.h>
 #include <linux/udp.h>
 #include <linux/jhash.h>
+#include <linux/nfcalls.h>
+#include <ub/ub_mem.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -46,15 +48,24 @@ DEFINE_RWLOCK(ip_nat_lock);
 /* Calculated at init based on memory size */
 static unsigned int ip_nat_htable_size;
 
-static struct list_head *bysource;
-
 #define MAX_IP_NAT_PROTO 256
+
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_nat_bysource	\
+	(get_exec_env()->_ip_conntrack->_ip_nat_bysource)
+#define ve_ip_nat_protos	\
+	(get_exec_env()->_ip_conntrack->_ip_nat_protos)
+#else
+static struct list_head *bysource;
+#define ve_ip_nat_bysource	bysource
 static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+#define ve_ip_nat_protos	ip_nat_protos
+#endif
 
 static inline struct ip_nat_protocol *
 __ip_nat_proto_find(u_int8_t protonum)
 {
-	return ip_nat_protos[protonum];
+	return ve_ip_nat_protos[protonum];
 }
 
 struct ip_nat_protocol *
@@ -177,7 +188,7 @@ find_appropriate_src(const struct ip_con
 	struct ip_conntrack *ct;
 
 	read_lock_bh(&ip_nat_lock);
-	list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
+	list_for_each_entry(ct, &ve_ip_nat_bysource[h], nat.info.bysource) {
 		if (same_src(ct, tuple)) {
 			/* Copy source part from reply tuple. */
 			invert_tuplepr(result,
@@ -291,13 +302,22 @@ get_unique_tuple(struct ip_conntrack_tup
 	ip_nat_proto_put(proto);
 }
 
+void ip_nat_hash_conntrack(struct ip_conntrack *conntrack)
+{
+	unsigned int srchash
+		= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	write_lock_bh(&ip_nat_lock);
+	list_add(&conntrack->nat.info.bysource, &ve_ip_nat_bysource[srchash]);
+	write_unlock_bh(&ip_nat_lock);
+}
+EXPORT_SYMBOL_GPL(ip_nat_hash_conntrack);
+
 unsigned int
 ip_nat_setup_info(struct ip_conntrack *conntrack,
 		  const struct ip_nat_range *range,
 		  unsigned int hooknum)
 {
 	struct ip_conntrack_tuple curr_tuple, new_tuple;
-	struct ip_nat_info *info = &conntrack->nat.info;
 	int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
 	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
 
@@ -332,14 +352,8 @@ ip_nat_setup_info(struct ip_conntrack *c
 	}
 
 	/* Place in source hash if this is the first time. */
-	if (have_to_hash) {
-		unsigned int srchash
-			= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				      .tuple);
-		write_lock_bh(&ip_nat_lock);
-		list_add(&info->bysource, &bysource[srchash]);
-		write_unlock_bh(&ip_nat_lock);
-	}
+	if (have_to_hash)
+		ip_nat_hash_conntrack(conntrack);
 
 	/* It's done. */
 	if (maniptype == IP_NAT_MANIP_DST)
@@ -521,11 +535,11 @@ int ip_nat_protocol_register(struct ip_n
 	int ret = 0;
 
 	write_lock_bh(&ip_nat_lock);
-	if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
+	if (ve_ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
 		ret = -EBUSY;
 		goto out;
 	}
-	ip_nat_protos[proto->protonum] = proto;
+	ve_ip_nat_protos[proto->protonum] = proto;
  out:
 	write_unlock_bh(&ip_nat_lock);
 	return ret;
@@ -536,7 +550,7 @@ EXPORT_SYMBOL(ip_nat_protocol_register);
 void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
 {
 	write_lock_bh(&ip_nat_lock);
-	ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
+	ve_ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
 	write_unlock_bh(&ip_nat_lock);
 
 	/* Someone could be still looking at the proto in a bh. */
@@ -589,38 +603,55 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_
 EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
 #endif
 
-static int __init ip_nat_init(void)
+static int ip_nat_init(void)
 {
 	size_t i;
+	int ret;
 
-	/* Leave them the same for the moment. */
-	ip_nat_htable_size = ip_conntrack_htable_size;
+	if (ve_is_super(get_exec_env()))
+		ip_nat_htable_size = ip_conntrack_htable_size;
 
 	/* One vmalloc for both hash tables */
-	bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
-	if (!bysource)
-		return -ENOMEM;
+	ret = -ENOMEM;
+	ve_ip_nat_bysource =
+		ub_vmalloc(sizeof(struct list_head)*ip_nat_htable_size*2);
+	if (!ve_ip_nat_bysource)
+		goto nomem;
+
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_protos =
+		ub_kmalloc(sizeof(void *)*MAX_IP_NAT_PROTO, GFP_KERNEL);
+	if (!ve_ip_nat_protos)
+		goto nomem2;
+#endif
 
 	/* Sew in builtin protocols. */
 	write_lock_bh(&ip_nat_lock);
 	for (i = 0; i < MAX_IP_NAT_PROTO; i++)
-		ip_nat_protos[i] = &ip_nat_unknown_protocol;
-	ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
-	ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
-	ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
+		ve_ip_nat_protos[i] = &ip_nat_unknown_protocol;
+	ve_ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
+	ve_ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
+	ve_ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
 	write_unlock_bh(&ip_nat_lock);
 
 	for (i = 0; i < ip_nat_htable_size; i++) {
-		INIT_LIST_HEAD(&bysource[i]);
+		INIT_LIST_HEAD(&ve_ip_nat_bysource[i]);
 	}
 
 	/* FIXME: Man, this is a hack.  <SIGH> */
 	IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
-	ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+	ve_ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
 
-	/* Initialize fake conntrack so that NAT will skip it */
-	ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+	if (ve_is_super(get_exec_env()))
+		/* Initialize fake conntrack so that NAT will skip it */
+		ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
 	return 0;
+#ifdef CONFIG_VE_IPTABLES
+nomem2:
+#endif
+	vfree(ve_ip_nat_bysource);
+nomem:
+	return ret;
 }
 
 /* Clear NAT section of all conntracks, in case we're loaded again. */
@@ -631,14 +662,41 @@ static int clean_nat(struct ip_conntrack
 	return 0;
 }
 
-static void __exit ip_nat_cleanup(void)
+static void ip_nat_cleanup(void)
 {
 	ip_ct_iterate_cleanup(&clean_nat, NULL);
-	ip_conntrack_destroyed = NULL;
-	vfree(bysource);
+	ve_ip_conntrack_destroyed = NULL;
+	vfree(ve_ip_nat_bysource);
+	ve_ip_nat_bysource = NULL;
+#ifdef CONFIG_VE_IPTABLES
+	kfree(ve_ip_nat_protos);
+	ve_ip_nat_protos = NULL;
+#endif
+}
+
+static int __init init(void)
+{
+	int err;
+
+	err = ip_nat_init();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(ip_nat_init);
+	KSYMRESOLVE(ip_nat_cleanup);
+	KSYMMODRESOLVE(ip_nat);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip_nat);
+	KSYMUNRESOLVE(ip_nat_cleanup);
+	KSYMUNRESOLVE(ip_nat_init);
+	ip_nat_cleanup();
 }
 
 MODULE_LICENSE("GPL");
 
-module_init(ip_nat_init);
-module_exit(ip_nat_cleanup);
+fs_initcall(init);
+module_exit(fini);
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_nat_ftp.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_ftp.c
--- linux-2.6.16/net/ipv4/netfilter/ip_nat_ftp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_ftp.c	2006-07-05 08:34:56.000000000 -0400
@@ -19,6 +19,7 @@
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
@@ -154,18 +155,43 @@ static unsigned int ip_nat_ftp(struct sk
 	return NF_ACCEPT;
 }
 
-static void __exit fini(void)
+#ifdef CONFIG_VE_IPTABLES
+#undef ve_ip_nat_ftp_hook
+#define ve_ip_nat_ftp_hook \
+		(get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook)
+#endif
+int init_iptable_nat_ftp(void)
 {
-	ip_nat_ftp_hook = NULL;
+	BUG_ON(ve_ip_nat_ftp_hook);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_ftp_hook = (ip_nat_helper_func)ip_nat_ftp;
+#else
+	ve_ip_nat_ftp_hook = ip_nat_ftp;
+#endif
+	return 0;
+}
+
+void fini_iptable_nat_ftp(void)
+{
+	ve_ip_nat_ftp_hook = NULL;
 	/* Make sure noone calls it, meanwhile. */
 	synchronize_net();
 }
 
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip_nat_ftp);
+	KSYMUNRESOLVE(init_iptable_nat_ftp);
+	KSYMUNRESOLVE(fini_iptable_nat_ftp);
+	fini_iptable_nat_ftp();
+}
+
 static int __init init(void)
 {
-	BUG_ON(ip_nat_ftp_hook);
-	ip_nat_ftp_hook = ip_nat_ftp;
-	return 0;
+	KSYMRESOLVE(init_iptable_nat_ftp);
+	KSYMRESOLVE(fini_iptable_nat_ftp);
+	KSYMMODRESOLVE(ip_nat_ftp);
+	return init_iptable_nat_ftp();
 }
 
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_nat_irc.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_irc.c
--- linux-2.6.16/net/ipv4/netfilter/ip_nat_irc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_irc.c	2006-07-05 08:34:56.000000000 -0400
@@ -23,6 +23,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/moduleparam.h>
+#include <linux/nfcalls.h>
 
 #if 0
 #define DEBUGP printk
@@ -96,18 +97,44 @@ static unsigned int help(struct sk_buff 
 	return ret;
 }
 
-static void __exit fini(void)
+#ifdef CONFIG_VE_IPTABLES
+#undef ve_ip_nat_irc_hook
+#define ve_ip_nat_irc_hook \
+		(get_exec_env()->_ip_conntrack->_ip_nat_irc_hook)
+#endif
+
+int init_iptable_nat_irc(void)
+{
+	BUG_ON(ve_ip_nat_irc_hook);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_irc_hook = (ip_nat_helper_func)help;
+#else
+	ve_ip_nat_irc_hook = help;
+#endif
+	return 0;
+}
+
+void fini_iptable_nat_irc(void)
 {
-	ip_nat_irc_hook = NULL;
+	ve_ip_nat_irc_hook = NULL;
 	/* Make sure noone calls it, meanwhile. */
 	synchronize_net();
 }
 
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip_nat_irc);
+	KSYMUNRESOLVE(init_iptable_nat_irc);
+	KSYMUNRESOLVE(fini_iptable_nat_irc);
+	fini_iptable_nat_irc();
+}
+
 static int __init init(void)
 {
-	BUG_ON(ip_nat_irc_hook);
-	ip_nat_irc_hook = help;
-	return 0;
+	KSYMRESOLVE(init_iptable_nat_irc);
+	KSYMRESOLVE(fini_iptable_nat_irc);
+	KSYMMODRESOLVE(ip_nat_irc);
+	return init_iptable_nat_irc();
 }
 
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_nat_rule.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_rule.c
--- linux-2.6.16/net/ipv4/netfilter/ip_nat_rule.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_rule.c	2006-07-05 08:34:56.000000000 -0400
@@ -34,6 +34,13 @@
 #define DEBUGP(format, args...)
 #endif
 
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_nat_table		\
+	(get_exec_env()->_ip_conntrack->_ip_nat_table)
+#else
+#define ve_ip_nat_table		&nat_table
+#endif
+
 #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
 
 static struct
@@ -41,7 +48,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
 	struct ipt_error term;
-} nat_initial_table __initdata
+} nat_initial_table
 = { { "nat", NAT_VALID_HOOKS, 4,
       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
       { [NF_IP_PRE_ROUTING] = 0,
@@ -235,6 +242,93 @@ static int ipt_dnat_checkentry(const cha
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_target *pt;
+	struct ip_nat_multi_range_compat *pinfo;
+	struct compat_ip_nat_multi_range info;
+	u_int16_t tsize;
+
+	pt = (struct ipt_entry_target *)target;
+	tsize = pt->u.user.target_size;
+	if (__copy_to_user(*dstptr, pt, sizeof(struct ipt_entry_target)))
+		return -EFAULT;
+	pinfo = (struct ip_nat_multi_range_compat *)pt->data;
+	memset(&info, 0, sizeof(struct compat_ip_nat_multi_range));
+	info.rangesize = pinfo->rangesize;
+	info.range[0].flags = pinfo->range[0].flags;
+	info.range[0].min_ip = pinfo->range[0].min_ip;
+	info.range[0].max_ip = pinfo->range[0].max_ip;
+	info.range[0].min = pinfo->range[0].min;
+	info.range[0].max = pinfo->range[0].max;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_target),
+				&info, sizeof(struct compat_ip_nat_multi_range)))
+		return -EFAULT;
+	tsize -= off;
+	if (put_user(tsize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int compat_from_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_target *pt;
+	struct ipt_entry_target *dstpt;
+	struct compat_ip_nat_multi_range *pinfo;
+	struct ip_nat_multi_range_compat info;
+	u_int16_t tsize;
+
+	pt = (struct compat_ipt_entry_target *)target;
+	dstpt = (struct ipt_entry_target *)*dstptr;
+	tsize = pt->u.user.target_size;
+	memcpy(*dstptr, pt, sizeof(struct compat_ipt_entry_target));
+	pinfo = (struct compat_ip_nat_multi_range *)pt->data;
+	memset(&info, 0, sizeof(struct ip_nat_multi_range_compat));
+	info.rangesize = pinfo->rangesize;
+	info.range[0].flags = pinfo->range[0].flags;
+	info.range[0].min_ip = pinfo->range[0].min_ip;
+	info.range[0].max_ip = pinfo->range[0].max_ip;
+	info.range[0].min = pinfo->range[0].min;
+	info.range[0].max = pinfo->range[0].max;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_target),
+			&info, sizeof(struct ip_nat_multi_range_compat));
+	tsize += off;
+	dstpt->u.user.target_size = tsize;
+	*size += off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int compat(void *target, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat)) -
+		COMPAT_IPT_ALIGN(sizeof(struct compat_ip_nat_multi_range));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(target, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(target, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 inline unsigned int
 alloc_null_binding(struct ip_conntrack *conntrack,
 		   struct ip_nat_info *info,
@@ -286,7 +380,7 @@ int ip_nat_rule_find(struct sk_buff **ps
 {
 	int ret;
 
-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
+	ret = ipt_do_table(pskb, hooknum, in, out, ve_ip_nat_table, NULL);
 
 	if (ret == NF_ACCEPT) {
 		if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
@@ -300,21 +394,33 @@ static struct ipt_target ipt_snat_reg = 
 	.name		= "SNAT",
 	.target		= ipt_snat_target,
 	.checkentry	= ipt_snat_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 };
 
 static struct ipt_target ipt_dnat_reg = {
 	.name		= "DNAT",
 	.target		= ipt_dnat_target,
 	.checkentry	= ipt_dnat_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 };
 
-int __init ip_nat_rule_init(void)
+int ip_nat_rule_init(void)
 {
 	int ret;
+	struct ipt_table *tmp_table;
+
+	tmp_table = ipt_register_table(&nat_table,
+			&nat_initial_table.repl);
+	if (IS_ERR(tmp_table))
+		return PTR_ERR(tmp_table);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_table = tmp_table;
+#endif
 
-	ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
-	if (ret != 0)
-		return ret;
 	ret = ipt_register_target(&ipt_snat_reg);
 	if (ret != 0)
 		goto unregister_table;
@@ -328,7 +434,10 @@ int __init ip_nat_rule_init(void)
  unregister_snat:
 	ipt_unregister_target(&ipt_snat_reg);
  unregister_table:
-	ipt_unregister_table(&nat_table);
+	ipt_unregister_table(ve_ip_nat_table);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_table = NULL;
+#endif
 
 	return ret;
 }
@@ -337,5 +446,8 @@ void ip_nat_rule_cleanup(void)
 {
 	ipt_unregister_target(&ipt_dnat_reg);
 	ipt_unregister_target(&ipt_snat_reg);
-	ipt_unregister_table(&nat_table);
+	ipt_unregister_table(ve_ip_nat_table);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_table = NULL;
+#endif
 }
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_nat_snmp_basic.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_snmp_basic.c
--- linux-2.6.16/net/ipv4/netfilter/ip_nat_snmp_basic.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_snmp_basic.c	2006-07-05 08:34:56.000000000 -0400
@@ -1000,12 +1000,12 @@ static unsigned char snmp_trap_decode(st
 		
 	return 1;
 
+err_addr_free:
+	kfree((unsigned long *)trap->ip_address);
+
 err_id_free:
 	kfree(trap->id);
 
-err_addr_free:
-	kfree((unsigned long *)trap->ip_address);
-	
 	return 0;
 }
 
@@ -1123,11 +1123,10 @@ static int snmp_parse_mangle(unsigned ch
 		struct snmp_v1_trap trap;
 		unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
 		
-		/* Discard trap allocations regardless */
-		kfree(trap.id);
-		kfree((unsigned long *)trap.ip_address);
-		
-		if (!ret)
+		if (ret) {
+			kfree(trap.id);
+			kfree((unsigned long *)trap.ip_address);
+		} else 
 			return ret;
 		
 	} else {
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_nat_standalone.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_standalone.c
--- linux-2.6.16/net/ipv4/netfilter/ip_nat_standalone.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_nat_standalone.c	2006-07-05 08:34:56.000000000 -0400
@@ -30,6 +30,7 @@
 #include <net/ip.h>
 #include <net/checksum.h>
 #include <linux/spinlock.h>
+#include <linux/nfcalls.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -358,45 +359,45 @@ static int init_or_cleanup(int init)
 {
 	int ret = 0;
 
-	need_conntrack();
-
 	if (!init) goto cleanup;
 
-#ifdef CONFIG_XFRM
-	BUG_ON(ip_nat_decode_session != NULL);
-	ip_nat_decode_session = nat_decode_session;
-#endif
+	if (!ve_is_super(get_exec_env()))
+		__module_get(THIS_MODULE);
+
 	ret = ip_nat_rule_init();
 	if (ret < 0) {
 		printk("ip_nat_init: can't setup rules.\n");
-		goto cleanup_decode_session;
+ 		goto cleanup_modput;
 	}
-	ret = nf_register_hook(&ip_nat_in_ops);
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		return 0;
+
+	ret = virt_nf_register_hook(&ip_nat_in_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register in hook.\n");
 		goto cleanup_rule_init;
 	}
-	ret = nf_register_hook(&ip_nat_out_ops);
+	ret = virt_nf_register_hook(&ip_nat_out_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register out hook.\n");
 		goto cleanup_inops;
 	}
-	ret = nf_register_hook(&ip_nat_adjust_in_ops);
+	ret = virt_nf_register_hook(&ip_nat_adjust_in_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register adjust in hook.\n");
 		goto cleanup_outops;
 	}
-	ret = nf_register_hook(&ip_nat_adjust_out_ops);
+	ret = virt_nf_register_hook(&ip_nat_adjust_out_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register adjust out hook.\n");
 		goto cleanup_adjustin_ops;
 	}
-	ret = nf_register_hook(&ip_nat_local_out_ops);
+	ret = virt_nf_register_hook(&ip_nat_local_out_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register local out hook.\n");
 		goto cleanup_adjustout_ops;;
 	}
-	ret = nf_register_hook(&ip_nat_local_in_ops);
+	ret = virt_nf_register_hook(&ip_nat_local_in_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register local in hook.\n");
 		goto cleanup_localoutops;
@@ -404,38 +405,76 @@ static int init_or_cleanup(int init)
 	return ret;
 
  cleanup:
-	nf_unregister_hook(&ip_nat_local_in_ops);
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		goto cleanup_rule_init;
+	virt_nf_unregister_hook(&ip_nat_local_in_ops);
  cleanup_localoutops:
-	nf_unregister_hook(&ip_nat_local_out_ops);
+	virt_nf_unregister_hook(&ip_nat_local_out_ops);
  cleanup_adjustout_ops:
-	nf_unregister_hook(&ip_nat_adjust_out_ops);
+	virt_nf_unregister_hook(&ip_nat_adjust_out_ops);
  cleanup_adjustin_ops:
-	nf_unregister_hook(&ip_nat_adjust_in_ops);
+	virt_nf_unregister_hook(&ip_nat_adjust_in_ops);
  cleanup_outops:
-	nf_unregister_hook(&ip_nat_out_ops);
+	virt_nf_unregister_hook(&ip_nat_out_ops);
  cleanup_inops:
-	nf_unregister_hook(&ip_nat_in_ops);
+	virt_nf_unregister_hook(&ip_nat_in_ops);
  cleanup_rule_init:
 	ip_nat_rule_cleanup();
- cleanup_decode_session:
-#ifdef CONFIG_XFRM
-	ip_nat_decode_session = NULL;
-	synchronize_net();
-#endif
+ cleanup_modput:
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
 	return ret;
 }
 
-static int __init init(void)
+int init_iptable_nat(void)
 {
 	return init_or_cleanup(1);
 }
 
-static void __exit fini(void)
+void fini_iptable_nat(void)
 {
 	init_or_cleanup(0);
 }
 
-module_init(init);
+static int __init init(void)
+{
+	int err;
+
+	need_conntrack();
+
+#ifdef CONFIG_XFRM
+	BUG_ON(ip_nat_decode_session != NULL);
+	ip_nat_decode_session = nat_decode_session;
+#endif
+
+	err = init_iptable_nat();
+	if (err < 0) {
+#ifdef CONFIG_XFRM
+		ip_nat_decode_session = NULL;
+		synchronize_net();
+#endif
+		return err;
+	}
+
+	KSYMRESOLVE(init_iptable_nat);
+	KSYMRESOLVE(fini_iptable_nat);
+	KSYMMODRESOLVE(iptable_nat);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_nat);
+	KSYMUNRESOLVE(init_iptable_nat);
+	KSYMUNRESOLVE(fini_iptable_nat);
+	fini_iptable_nat();
+#ifdef CONFIG_XFRM
+	ip_nat_decode_session = NULL;
+	synchronize_net();
+#endif
+}
+
+fs_initcall(init);
 module_exit(fini);
 
 MODULE_LICENSE("GPL");
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_queue.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_queue.c
--- linux-2.6.16/net/ipv4/netfilter/ip_queue.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_queue.c	2006-07-05 08:34:56.000000000 -0400
@@ -542,8 +542,17 @@ ipq_rcv_sk(struct sock *sk, int len)
 	down(&ipqnl_sem);
 			
 	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
+#ifdef CONFIG_VE
+		struct ve_struct *env;
+#endif
 		skb = skb_dequeue(&sk->sk_receive_queue);
+#ifdef CONFIG_VE
+		env = set_exec_env(VE_OWNER_SKB(skb));
 		ipq_rcv_skb(skb);
+		(void)set_exec_env(env);
+#else
+		ipq_rcv_skb(skb);
+#endif
 		kfree_skb(skb);
 	}
 		
diff -uprN linux-2.6.16/net/ipv4/netfilter/ip_tables.c linux-2.6.16.ovz/net/ipv4/netfilter/ip_tables.c
--- linux-2.6.16/net/ipv4/netfilter/ip_tables.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ip_tables.c	2006-07-05 08:34:56.000000000 -0400
@@ -24,14 +24,17 @@
 #include <linux/module.h>
 #include <linux/icmp.h>
 #include <net/ip.h>
+#include <net/compat.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 #include <linux/proc_fs.h>
 #include <linux/err.h>
 #include <linux/cpumask.h>
+#include <ub/ub_mem.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -70,6 +73,14 @@ do {								\
 #define inline
 #endif
 
+#ifdef CONFIG_VE_IPTABLES
+/* include ve.h and define get_exec_env */
+#include <linux/sched.h>
+#define ve_ipt_standard_target	(get_exec_env()->_ipt_standard_target)
+#else
+#define ve_ipt_standard_target	&ipt_standard_target
+#endif
+
 /*
    We keep a set of rules for each CPU, so we can avoid write-locking
    them in the softirq when updating the counters and therefore
@@ -480,7 +491,7 @@ standard_check(const struct ipt_entry_ta
 	if (t->u.target_size
 	    != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
 		duprintf("standard_check: target size %u != %u\n",
-			 t->u.target_size,
+			 t->u.target_size, (unsigned int)
 			 IPT_ALIGN(sizeof(struct ipt_standard_target)));
 		return 0;
 	}
@@ -565,7 +576,7 @@ check_entry(struct ipt_entry *e, const c
 	}
 	t->u.kernel.target = target;
 
-	if (t->u.kernel.target == &ipt_standard_target) {
+	if (t->u.kernel.target == ve_ipt_standard_target) {
 		if (!standard_check(t, size)) {
 			ret = -EINVAL;
 			goto cleanup_matches;
@@ -790,32 +801,45 @@ get_counters(const struct xt_table_info 
 	}
 }
 
-static int
-copy_entries_to_user(unsigned int total_size,
-		     struct ipt_table *table,
-		     void __user *userptr)
+static inline struct xt_counters * alloc_counters(struct ipt_table *table)
 {
-	unsigned int off, num, countersize;
-	struct ipt_entry *e;
+	unsigned int countersize;
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
-	int ret = 0;
-	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc_node(countersize, numa_node_id());
+	counters = ub_vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* First, sum counters... */
 	write_lock_bh(&table->lock);
 	get_counters(private, counters);
 	write_unlock_bh(&table->lock);
 
+	return counters;
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+		     struct ipt_table *table,
+		     void __user *userptr)
+{
+	unsigned int off, num;
+	struct ipt_entry *e;
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
+	int ret = 0;
+	void *loc_cpu_entry;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
 	/* choose the copy that is on our node/cpu, ...
 	 * This choice is lazy (because current thread is
 	 * allowed to migrate to another cpu)
@@ -875,25 +899,391 @@ copy_entries_to_user(unsigned int total_
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+static DECLARE_MUTEX(compat_ipt_mutex);
+
+struct compat_delta {
+	struct compat_delta *next;
+	u_int16_t offset;
+	short delta;
+};
+
+static struct compat_delta *compat_offsets = NULL;
+
+static int compat_add_offset(u_int16_t offset, short delta)
+{
+	struct compat_delta *tmp;
+
+	tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+	tmp->offset = offset;
+	tmp->delta = delta;
+	if (compat_offsets) {
+		tmp->next = compat_offsets->next;
+		compat_offsets->next = tmp;
+	} else {
+		compat_offsets = tmp;
+		tmp->next = NULL;
+	}
+	return 0;
+}
+
+static void compat_flush_offsets(void)
+{
+	struct compat_delta *tmp, *next;
+
+	if (compat_offsets) {
+		for(tmp = compat_offsets; tmp; tmp = next) {
+			next = tmp->next;
+			kfree(tmp);
+		}
+		compat_offsets = NULL;
+	}
+}
+
+static short compat_calc_jump(u_int16_t offset)
+{
+	struct compat_delta *tmp;
+	short delta;
+
+	for(tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
+		if (tmp->offset < offset)
+			delta += tmp->delta;
+	return delta;
+}
+
+struct compat_ipt_standard_target
+{
+	struct compat_ipt_entry_target target;
+	compat_int_t verdict;
+};
+
+#define IPT_ST_OFFSET	(sizeof(struct ipt_standard_target) - \
+				sizeof(struct compat_ipt_standard_target))
+
+struct compat_ipt_standard
+{
+	struct compat_ipt_entry entry;
+	struct compat_ipt_standard_target target;
+};
+
+static int compat_ipt_standard_fn(void *target,
+		void **dstptr, int *size, int convert)
+{
+	struct compat_ipt_standard_target compat_st, *pcompat_st;
+	struct ipt_standard_target st, *pst;
+	int ret;
+
+	ret = 0;
+	switch (convert) {
+		case COMPAT_TO_USER:
+			pst = (struct ipt_standard_target *)target;
+			memcpy(&compat_st.target, &pst->target,
+					sizeof(struct ipt_entry_target));
+			compat_st.verdict = pst->verdict;
+			if (compat_st.verdict > 0)
+				compat_st.verdict -=
+					compat_calc_jump(compat_st.verdict);
+			compat_st.target.u.user.target_size =
+			sizeof(struct compat_ipt_standard_target);
+			if (__copy_to_user(*dstptr, &compat_st,
+				sizeof(struct compat_ipt_standard_target)))
+				ret = -EFAULT;
+			*size -= IPT_ST_OFFSET;
+			*dstptr += sizeof(struct compat_ipt_standard_target);
+			break;
+		case COMPAT_FROM_USER:
+			pcompat_st =
+				(struct compat_ipt_standard_target *)target;
+			memcpy(&st.target, &pcompat_st->target,
+					sizeof(struct ipt_entry_target));
+			st.verdict = pcompat_st->verdict;
+			if (st.verdict > 0)
+				st.verdict += compat_calc_jump(st.verdict);
+			st.target.u.user.target_size =
+			sizeof(struct ipt_standard_target);
+			memcpy(*dstptr, &st,
+					sizeof(struct ipt_standard_target));
+			*size += IPT_ST_OFFSET;
+			*dstptr += sizeof(struct ipt_standard_target);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += IPT_ST_OFFSET;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+
+int ipt_target_align_compat(void *target, void **dstptr,
+		int *size, int off, int convert)
+{
+	struct compat_ipt_entry_target *pcompat;
+	struct ipt_entry_target *pt;
+	u_int16_t tsize;
+	int ret;
+
+	ret = 0;
+	switch (convert) {
+		case COMPAT_TO_USER:
+			pt = (struct ipt_entry_target *)target;
+			tsize = pt->u.user.target_size;
+			if (__copy_to_user(*dstptr, pt, tsize)) {
+				ret = -EFAULT;
+				break;
+			}
+			tsize -= off;
+			if (put_user(tsize, (u_int16_t *)*dstptr))
+				ret = -EFAULT;
+			*size -= off;
+			*dstptr += tsize;
+			break;
+		case COMPAT_FROM_USER:
+			pcompat = (struct compat_ipt_entry_target *)target;
+			pt = (struct ipt_entry_target *)*dstptr;
+			tsize = pcompat->u.user.target_size;
+			memcpy(pt, pcompat, tsize);
+			tsize += off;
+			pt->u.user.target_size = tsize;
+			*size += off;
+			*dstptr += tsize;
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+
+int ipt_match_align_compat(void *match, void **dstptr,
+		int *size, int off, int convert)
+{
+	struct compat_ipt_entry_match *pcompat_m;
+	struct ipt_entry_match *pm;
+	u_int16_t msize;
+	int ret;
+
+	ret = 0;
+	switch (convert) {
+		case COMPAT_TO_USER:
+			pm = (struct ipt_entry_match *)match;
+			msize = pm->u.user.match_size;
+			if (__copy_to_user(*dstptr, pm, msize)) {
+				ret = -EFAULT;
+				break;
+			}
+			msize -= off;
+			if (put_user(msize, (u_int16_t *)*dstptr))
+				ret = -EFAULT;
+			*size -= off;
+			*dstptr += msize;
+			break;
+		case COMPAT_FROM_USER:
+			pcompat_m = (struct compat_ipt_entry_match *)match;
+			pm = (struct ipt_entry_match *)*dstptr;
+			msize = pcompat_m->u.user.match_size;
+			memcpy(pm, pcompat_m, msize);
+			msize += off;
+			pm->u.user.match_size = msize;
+			*size += off;
+			*dstptr += msize;
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+
+static int icmp_compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_icmp)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_icmp));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+
+static inline int
+compat_calc_match(struct ipt_entry_match *m, int * size)
+{
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
+	return 0;
+}
+
+static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
+		void *base, struct xt_table_info *newinfo)
+{
+	struct ipt_entry_target *t;
+	u_int16_t entry_offset;
+	int off, i, ret;
+
+	off = 0;
+	entry_offset = (void *)e - base;
+	IPT_MATCH_ITERATE(e, compat_calc_match, &off);
+	t = ipt_get_target(e);
+	if (t->u.kernel.target->compat)
+		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
+	newinfo->size -= off;
+	ret = compat_add_offset(entry_offset, off);
+	if (ret)
+		return ret;
+
+	for (i = 0; i< NF_IP_NUMHOOKS; i++) {
+		if (info->hook_entry[i] && (e < (struct ipt_entry *)
+				(base + info->hook_entry[i])))
+			newinfo->hook_entry[i] -= off;
+		if (info->underflow[i] && (e < (struct ipt_entry *)
+				(base + info->underflow[i])))
+			newinfo->underflow[i] -= off;
+	}
+	return 0;
+}
+
+static int compat_table_info(struct xt_table_info *info,
+		struct xt_table_info *newinfo)
+{
+	void *loc_cpu_entry;
+	int i;
+
+	if (!newinfo || !info)
+		return -EINVAL;
+
+	memset(newinfo, 0, sizeof(struct xt_table_info));
+	newinfo->size = info->size;
+	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+		newinfo->hook_entry[i] = info->hook_entry[i];
+		newinfo->underflow[i] = info->underflow[i];
+	}
+	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+	return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
+			compat_calc_entry, info, loc_cpu_entry, newinfo);
+}
+#endif
+
+static int get_info(void __user *user, int *len)
+{
+	char name[IPT_TABLE_MAXNAMELEN];
+	struct ipt_table *t;
+	int ret, size;
+
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits())
+		size = sizeof(struct compat_ipt_getinfo);
+	else
+#endif
+		size = sizeof(struct ipt_getinfo);
+
+	if (*len != size) {
+		duprintf("length %u != %u\n", *len,
+			(unsigned int)sizeof(struct ipt_getinfo));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(name, user, sizeof(name)) != 0)
+		return -EFAULT;
+
+	name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+#ifdef CONFIG_COMPAT
+	down(&compat_ipt_mutex);
+#endif
+	t = try_then_request_module(xt_find_table_lock(AF_INET, name),
+			"iptable_%s", name);
+	if (t && !IS_ERR(t)) {
+		struct ipt_getinfo info;
+		struct xt_table_info *private = t->private;
+#ifdef CONFIG_COMPAT
+		struct compat_ipt_getinfo compat_info;
+#endif
+		void *pinfo;
+
+#ifdef CONFIG_COMPAT
+		if (is_current_32bits()) {
+			struct xt_table_info tmp;
+			ret = compat_table_info(private, &tmp);
+			compat_flush_offsets();
+			memcpy(compat_info.hook_entry, tmp.hook_entry,
+					sizeof(compat_info.hook_entry));
+			memcpy(compat_info.underflow, tmp.underflow,
+					sizeof(compat_info.underflow));
+			compat_info.valid_hooks = t->valid_hooks;
+			compat_info.num_entries = private->number;
+			compat_info.size = tmp.size;
+			strcpy(compat_info.name, name);
+			pinfo = (void *)&compat_info;
+		} else
+#endif
+		{
+			info.valid_hooks = t->valid_hooks;
+			memcpy(info.hook_entry, private->hook_entry,
+					sizeof(info.hook_entry));
+			memcpy(info.underflow, private->underflow,
+					sizeof(info.underflow));
+			info.num_entries = private->number;
+			info.size = private->size;
+			strcpy(info.name, name);
+			pinfo = (void *)&info;
+		}
+
+		if (copy_to_user(user, pinfo, *len) != 0)
+			ret = -EFAULT;
+		else
+			ret = 0;
+
+		xt_table_unlock(t);
+		module_put(t->me);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+		up(&compat_ipt_mutex);
+#endif
+	return ret;
+}
+
 static int
-get_entries(const struct ipt_get_entries *entries,
-	    struct ipt_get_entries __user *uptr)
+get_entries(struct ipt_get_entries __user *uptr, int *len)
 {
 	int ret;
+	struct ipt_get_entries get;
 	struct ipt_table *t;
 
-	t = xt_find_table_lock(AF_INET, entries->name);
+	if (*len < sizeof(get)) {
+		duprintf("get_entries: %u < %d\n", *len,
+				(unsigned int)sizeof(get));
+		return -EINVAL;
+	}
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
+	if (*len != sizeof(struct ipt_get_entries) + get.size) {
+		duprintf("get_entries: %u != %u\n", *len,
+				(unsigned int)(sizeof(struct ipt_get_entries) +
+				get.size));
+		return -EINVAL;
+	}
+
+	t = xt_find_table_lock(AF_INET, get.name);
 	if (t && !IS_ERR(t)) {
 		struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n",
 			 private->number);
-		if (entries->size == private->size)
+		if (get.size == private->size)
 			ret = copy_entries_to_user(private->size,
 						   t, uptr->entrytable);
 		else {
 			duprintf("get_entries: I've got %u not %u!\n",
 				 private->size,
-				 entries->size);
+				 get.size);
 			ret = -EINVAL;
 		}
 		module_put(t->me);
@@ -905,71 +1295,39 @@ get_entries(const struct ipt_get_entries
 }
 
 static int
-do_replace(void __user *user, unsigned int len)
+__do_replace(const char *name, unsigned int valid_hooks,
+		struct xt_table_info *newinfo, unsigned int num_counters,
+		void __user *counters_ptr)
 {
 	int ret;
-	struct ipt_replace tmp;
 	struct ipt_table *t;
-	struct xt_table_info *newinfo, *oldinfo;
+	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
-	void *loc_cpu_entry, *loc_cpu_old_entry;
-
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-		return -EFAULT;
-
-	/* Hack: Causes ipchains to give correct error msg --RR */
-	if (len != sizeof(tmp) + tmp.size)
-		return -ENOPROTOOPT;
-
-	/* overflow check */
-	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
-			SMP_CACHE_BYTES)
-		return -ENOMEM;
-	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
-		return -ENOMEM;
-
-	newinfo = xt_alloc_table_info(tmp.size);
-	if (!newinfo)
-		return -ENOMEM;
-
-	/* choose the copy that is our node/cpu */
-	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
-	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
-			   tmp.size) != 0) {
-		ret = -EFAULT;
-		goto free_newinfo;
-	}
+	void *loc_cpu_old_entry;
 
-	counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
+	ret = 0;
+	counters = ub_vmalloc_best(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
-		goto free_newinfo;
+		goto out;
 	}
 
-	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
-			      tmp.hook_entry, tmp.underflow);
-	if (ret != 0)
-		goto free_newinfo_counters;
-
-	duprintf("ip_tables: Translated table\n");
-
-	t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
-				    "iptable_%s", tmp.name);
+	t = try_then_request_module(xt_find_table_lock(AF_INET, name),
+				    "iptable_%s", name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free_newinfo_counters_untrans;
 	}
 
 	/* You lied! */
-	if (tmp.valid_hooks != t->valid_hooks) {
+	if (valid_hooks != t->valid_hooks) {
 		duprintf("Valid hook crap: %08X vs %08X\n",
-			 tmp.valid_hooks, t->valid_hooks);
+			 valid_hooks, t->valid_hooks);
 		ret = -EINVAL;
 		goto put_module;
 	}
 
-	oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
+	oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
 	if (!oldinfo)
 		goto put_module;
 
@@ -989,8 +1347,8 @@ do_replace(void __user *user, unsigned i
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
 	xt_free_table_info(oldinfo);
-	if (copy_to_user(tmp.counters, counters,
-			 sizeof(struct xt_counters) * tmp.num_counters) != 0)
+	if (copy_to_user(counters_ptr, counters,
+			 sizeof(struct xt_counters) * num_counters) != 0)
 		ret = -EFAULT;
 	vfree(counters);
 	xt_table_unlock(t);
@@ -1000,9 +1358,62 @@ do_replace(void __user *user, unsigned i
 	module_put(t->me);
 	xt_table_unlock(t);
  free_newinfo_counters_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
- free_newinfo_counters:
 	vfree(counters);
+ out:
+	return ret;
+}
+
+static int
+do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct ipt_replace tmp;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* Hack: Causes ipchains to give correct error msg --RR */
+	if (len != sizeof(tmp) + tmp.size)
+		return -ENOPROTOOPT;
+
+	/* overflow check */
+	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
+			SMP_CACHE_BYTES)
+		return -ENOMEM;
+	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+		return -ENOMEM;
+
+	newinfo = xt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
+
+	/* choose the copy that is our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	ret = translate_table(tmp.name, tmp.valid_hooks,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
+			      tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
+
+	duprintf("ip_tables: Translated table\n");
+
+	ret = __do_replace(tmp.name, tmp.valid_hooks,
+			      newinfo, tmp.num_counters,
+			      tmp.counters);
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
+
+ free_newinfo_untrans:
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1034,28 +1445,56 @@ static int
 do_add_counters(void __user *user, unsigned int len)
 {
 	unsigned int i;
-	struct xt_counters_info tmp, *paddc;
+	struct xt_counters_info tmp;
+	struct xt_counters *paddc;
+	unsigned int num_counters;
+	char *name;
+	int size;
+	void *ptmp;
 	struct ipt_table *t;
 	struct xt_table_info *private;
 	int ret = 0;
 	void *loc_cpu_entry;
+#ifdef CONFIG_COMPAT
+	struct compat_xt_counters_info compat_tmp;
 
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+	if (is_current_32bits()) {
+		ptmp = &compat_tmp;
+		size = sizeof(struct compat_xt_counters_info);
+	} else
+#endif
+	{
+		ptmp = &tmp;
+		size = sizeof(struct xt_counters_info);
+	}
+
+	if (copy_from_user(ptmp, user, size) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits()) {
+		num_counters = compat_tmp.num_counters;
+		name = compat_tmp.name;
+	} else
+#endif
+	{
+		num_counters = tmp.num_counters;
+		name = tmp.name;
+	}
+
+	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc_node(len, numa_node_id());
+	paddc = ub_vmalloc_node(len - size, numa_node_id());
 	if (!paddc)
 		return -ENOMEM;
 
-	if (copy_from_user(paddc, user, len) != 0) {
+	if (copy_from_user(paddc, user + size, len - size) != 0) {
 		ret = -EFAULT;
 		goto free;
 	}
 
-	t = xt_find_table_lock(AF_INET, tmp.name);
+	t = xt_find_table_lock(AF_INET, name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
@@ -1063,7 +1502,7 @@ do_add_counters(void __user *user, unsig
 
 	write_lock_bh(&t->lock);
 	private = t->private;
-	if (private->number != paddc->num_counters) {
+	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
@@ -1074,7 +1513,7 @@ do_add_counters(void __user *user, unsig
 	IPT_ENTRY_ITERATE(loc_cpu_entry,
 			  private->size,
 			  add_counter_to_entry,
-			  paddc->counters,
+			  paddc,
 			  &i);
  unlock_up_free:
 	write_unlock_bh(&t->lock);
@@ -1086,14 +1525,590 @@ do_add_counters(void __user *user, unsig
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_ipt_replace {
+	char			name[IPT_TABLE_MAXNAMELEN];
+	u32			valid_hooks;
+	u32			num_entries;
+	u32			size;
+	u32			hook_entry[NF_IP_NUMHOOKS];
+	u32			underflow[NF_IP_NUMHOOKS];
+	u32			num_counters;
+	compat_uptr_t		counters;	/* struct ipt_counters * */
+	struct compat_ipt_entry	entries[0];
+};
+
+static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
+		void __user **dstptr, compat_uint_t *size)
+{
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, dstptr, size, COMPAT_TO_USER);
+	else {
+		if (__copy_to_user(*dstptr, m, m->u.match_size))
+			return -EFAULT;
+		*dstptr += m->u.match_size;
+	}
+	return 0;
+}
+
+static int compat_copy_entry_to_user(struct ipt_entry *e,
+		void __user **dstptr, compat_uint_t *size)
+{
+	struct ipt_entry_target __user *t;
+	struct compat_ipt_entry __user *ce;
+	u_int16_t target_offset, next_offset;
+	compat_uint_t origsize;
+	int ret;
+
+	ret = -EFAULT;
+	origsize = *size;
+	ce = (struct compat_ipt_entry __user *)*dstptr;
+	if (__copy_to_user(ce, e, sizeof(struct ipt_entry)))
+		goto out;
+
+	*dstptr += sizeof(struct compat_ipt_entry);
+	ret = IPT_MATCH_ITERATE(e, compat_copy_match_to_user, dstptr, size);
+	target_offset = e->target_offset - (origsize - *size);
+	if (ret)
+		goto out;
+	t = ipt_get_target(e);
+	if (t->u.kernel.target->compat) {
+		ret = t->u.kernel.target->compat(t,
+				dstptr, size, COMPAT_TO_USER);
+		if (ret)
+			goto out;
+	} else {
+		ret = -EFAULT;
+		if (__copy_to_user(*dstptr, t, t->u.target_size))
+			goto out;
+		*dstptr += t->u.target_size;
+	}
+	ret = -EFAULT;
+	next_offset = e->next_offset - (origsize - *size);
+	if (__put_user(target_offset, &ce->target_offset))
+		goto out;
+	if (__put_user(next_offset, &ce->next_offset))
+		goto out;
+	return 0;
+out:
+	return ret;
+}
+
+static inline int
+compat_check_calc_match(struct ipt_entry_match *m,
+	    const char *name,
+	    const struct ipt_ip *ip,
+	    unsigned int hookmask,
+	    int *size, int *i)
+{
+	struct ipt_match *match;
+
+	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
+						   m->u.user.revision),
+					"ipt_%s", m->u.user.name);
+	if (IS_ERR(match) || !match) {
+		duprintf("compat_check_calc_match: `%s' not found\n",
+				m->u.user.name);
+		return match ? PTR_ERR(match) : -ENOENT;
+	}
+	m->u.kernel.match = match;
+
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
+
+	(*i)++;
+	return 0;
+}
+
+static inline int
+check_compat_entry_size_and_hooks(struct ipt_entry *e,
+			   struct xt_table_info *newinfo,
+			   unsigned int *size,
+			   unsigned char *base,
+			   unsigned char *limit,
+			   unsigned int *hook_entries,
+			   unsigned int *underflows,
+			   unsigned int *i,
+			   const char *name)
+{
+	struct ipt_entry_target *t;
+	struct ipt_target *target;
+	u_int16_t entry_offset;
+	int ret, off, h, j;
+
+	duprintf("check_compat_entry_size_and_hooks %p\n", e);
+	if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
+	    || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
+		duprintf("Bad offset %p, limit = %p\n", e, limit);
+		return -EINVAL;
+	}
+
+	if (e->next_offset < sizeof(struct compat_ipt_entry) +
+			sizeof(struct compat_ipt_entry_target)) {
+		duprintf("checking: element %p size %u\n",
+			 e, e->next_offset);
+		return -EINVAL;
+	}
+
+	if (!ip_checkentry(&e->ip)) {
+		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+		return -EINVAL;
+	}
+
+	off = 0;
+	entry_offset = (void *)e - (void *)base;
+	j = 0;
+	ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
+			e->comefrom, &off, &j);
+	if (ret != 0)
+		goto out;
+
+	t = ipt_get_target(e);
+	target = try_then_request_module(xt_find_target(AF_INET,
+						     t->u.user.name,
+						     t->u.user.revision),
+					 "ipt_%s", t->u.user.name);
+	if (IS_ERR(target) || !target) {
+		duprintf("check_entry: `%s' not found\n", t->u.user.name);
+		ret = target ? PTR_ERR(target) : -ENOENT;
+		goto out;
+	}
+	t->u.kernel.target = target;
+
+	if (t->u.kernel.target->compat)
+		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
+	*size += off;
+	ret = compat_add_offset(entry_offset, off);
+	if (ret)
+		goto out;
+
+	/* Check hooks & underflows */
+	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+		if ((unsigned char *)e - base == hook_entries[h])
+			newinfo->hook_entry[h] = hook_entries[h];
+		if ((unsigned char *)e - base == underflows[h])
+			newinfo->underflow[h] = underflows[h];
+	}
+
+	/* Clear counters and comefrom */
+	e->counters = ((struct ipt_counters) { 0, 0 });
+	e->comefrom = 0;
+
+	(*i)++;
+	return 0;
+out:
+	IPT_MATCH_ITERATE(e, cleanup_match, &j);
+	return ret;
+}
+
+static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
+	void **dstptr, compat_uint_t *size, const char *name,
+	const struct ipt_ip *ip, unsigned int hookmask)
+{
+	struct ipt_entry_match *dm;
+
+	dm = (struct ipt_entry_match *)*dstptr;
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, dstptr, size, COMPAT_FROM_USER);
+	else {
+		memcpy(*dstptr, m, m->u.match_size);
+		*dstptr += m->u.match_size;
+	}
+
+	if (dm->u.kernel.match->checkentry
+	    && !dm->u.kernel.match->checkentry(name, ip, dm->data,
+					      dm->u.match_size - sizeof(*dm),
+					      hookmask)) {
+		module_put(dm->u.kernel.match->me);
+		duprintf("ip_tables: check failed for `%s'.\n",
+			 dm->u.kernel.match->name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
+	unsigned int *size, const char *name,
+	struct xt_table_info *newinfo, unsigned char *base)
+{
+	struct ipt_entry_target *t;
+	struct ipt_entry *de;
+	unsigned int origsize;
+	int ret, h;
+
+	ret = 0;
+	origsize = *size;
+	de = (struct ipt_entry *)*dstptr;
+	memcpy(de, e, sizeof(struct ipt_entry));
+
+	*dstptr += sizeof(struct compat_ipt_entry);
+	ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
+			name, &de->ip, de->comefrom);
+	if (ret)
+		goto out;
+	de->target_offset = e->target_offset - (origsize - *size);
+	t = ipt_get_target(e);
+	if (t->u.kernel.target->compat)
+		t->u.kernel.target->compat(t,
+				dstptr, size, COMPAT_FROM_USER);
+	else {
+		memcpy(*dstptr, t, t->u.target_size);
+		*dstptr += t->u.target_size;
+	}
+
+	de->next_offset = e->next_offset - (origsize - *size);
+	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+		if ((unsigned char *)de - base < newinfo->hook_entry[h])
+			newinfo->hook_entry[h] -= origsize - *size;
+		if ((unsigned char *)de - base < newinfo->underflow[h])
+			newinfo->underflow[h] -= origsize - *size;
+	}
+
+	ret = -EINVAL;
+	t = ipt_get_target(de);
+	if (t->u.kernel.target == &ipt_standard_target) {
+		if (!standard_check(t, *size))
+			goto out;
+	} else if (t->u.kernel.target->checkentry
+		   && !t->u.kernel.target->checkentry(name, de, t->data,
+						      t->u.target_size
+						      - sizeof(*t),
+						      de->comefrom)) {
+		module_put(t->u.kernel.target->me);
+		duprintf("ip_tables: compat: check failed for `%s'.\n",
+			 t->u.kernel.target->name);
+		goto out;
+	}
+	ret = 0;
+out:
+	return ret;
+}
+
+static int
+translate_compat_table(const char *name,
+		unsigned int valid_hooks,
+		struct xt_table_info **pinfo,
+		void **pentry0,
+		unsigned int total_size,
+		unsigned int number,
+		unsigned int *hook_entries,
+		unsigned int *underflows)
+{
+	unsigned int i;
+	struct xt_table_info *newinfo, *info;
+	void *pos, *entry0, *entry1;
+	unsigned int size;
+	int ret;
+
+	info = *pinfo;
+	entry0 = *pentry0;
+	size = total_size;
+	info->number = number;
+
+	/* Init all hooks to impossible value. */
+	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+		info->hook_entry[i] = 0xFFFFFFFF;
+		info->underflow[i] = 0xFFFFFFFF;
+	}
+
+	duprintf("translate_compat_table: size %u\n", info->size);
+	i = 0;
+	down(&compat_ipt_mutex);
+	/* Walk through entries, checking offsets. */
+	ret = IPT_ENTRY_ITERATE(entry0, total_size,
+				check_compat_entry_size_and_hooks,
+				info, &size, entry0,
+				entry0 + total_size,
+				hook_entries, underflows, &i, name);
+	if (ret != 0)
+		goto out_unlock;
+
+	ret = -EINVAL;
+	if (i != number) {
+		duprintf("translate_compat_table: %u not %u entries\n",
+			 i, number);
+		goto out_unlock;
+	}
+
+	/* Check hooks all assigned */
+	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+		/* Only hooks which are valid */
+		if (!(valid_hooks & (1 << i)))
+			continue;
+		if (info->hook_entry[i] == 0xFFFFFFFF) {
+			duprintf("Invalid hook entry %u %u\n",
+				 i, hook_entries[i]);
+			goto out_unlock;
+		}
+		if (info->underflow[i] == 0xFFFFFFFF) {
+			duprintf("Invalid underflow %u %u\n",
+				 i, underflows[i]);
+			goto out_unlock;
+		}
+	}
+
+	ret = -ENOMEM;
+	newinfo = xt_alloc_table_info(size);
+	if (!newinfo)
+		goto out_unlock;
+
+	newinfo->number = number;
+	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+		newinfo->hook_entry[i] = info->hook_entry[i];
+		newinfo->underflow[i] = info->underflow[i];
+	}
+	entry1 = newinfo->entries[raw_smp_processor_id()];
+	pos = entry1;
+	size =  total_size;
+	ret = IPT_ENTRY_ITERATE(entry0, total_size,
+			compat_copy_entry_from_user, &pos, &size,
+			name, newinfo, entry1);
+	compat_flush_offsets();
+	up(&compat_ipt_mutex);
+	if (ret)
+		goto free_newinfo;
+
+	ret = -ELOOP;
+	if (!mark_source_chains(newinfo, valid_hooks, entry1))
+		goto free_newinfo;
+
+	/* And one copy for every other CPU */
+	for_each_cpu(i)
+		if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+			memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+	*pinfo = newinfo;
+	*pentry0 = entry1;
+	xt_free_table_info(info);
+	return 0;
+
+free_newinfo:
+	xt_free_table_info(newinfo);
+out:
+	return ret;
+out_unlock:
+	up(&compat_ipt_mutex);
+	goto out;
+}
+
+static int
+compat_do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct compat_ipt_replace tmp;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* Hack: Causes ipchains to give correct error msg --RR */
+	if (len != sizeof(tmp) + tmp.size)
+		return -ENOPROTOOPT;
+
+	/* overflow check */
+	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
+			SMP_CACHE_BYTES)
+		return -ENOMEM;
+	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+		return -ENOMEM;
+
+	newinfo = xt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
+
+	/* choose the copy that is our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+			      &newinfo, &loc_cpu_entry, tmp.size,
+			      tmp.num_entries, tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
+
+	duprintf("compat_do_replace: Translated table\n");
+
+	ret = __do_replace(tmp.name, tmp.valid_hooks,
+			      newinfo, tmp.num_counters,
+			      compat_ptr(tmp.counters));
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
+
+ free_newinfo_untrans:
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
+ free_newinfo:
+	xt_free_table_info(newinfo);
+	return ret;
+}
+
+struct compat_ipt_get_entries
+{
+	char name[IPT_TABLE_MAXNAMELEN];
+	compat_uint_t size;
+	struct compat_ipt_entry entrytable[0];
+};
+
+static int compat_copy_entries_to_user(unsigned int total_size,
+		     struct ipt_table *table, void __user *userptr)
+{
+	unsigned int off, num;
+	struct compat_ipt_entry e;
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
+	void __user *pos;
+	unsigned int size;
+	int ret = 0;
+	void *loc_cpu_entry;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
+	/* choose the copy that is on our node/cpu, ...
+	 * This choice is lazy (because current thread is
+	 * allowed to migrate to another cpu)
+	 */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	pos = userptr;
+	size = total_size;
+	ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
+			compat_copy_entry_to_user, &pos, &size);
+	if (ret)
+		goto free_counters;
+
+	/* ... then go back and fix counters and names */
+	for (off = 0, num = 0; off < size; off += e.next_offset, num++) {
+		unsigned int i;
+		struct ipt_entry_match m;
+		struct ipt_entry_target t;
+
+		ret = -EFAULT;
+		if (copy_from_user(&e, userptr + off,
+					sizeof(struct compat_ipt_entry)))
+			goto free_counters;
+		if (copy_to_user(userptr + off +
+			offsetof(struct compat_ipt_entry, counters),
+			 &counters[num], sizeof(counters[num])))
+			goto free_counters;
+
+		for (i = sizeof(struct compat_ipt_entry);
+				i < e.target_offset; i += m.u.match_size) {
+			if (copy_from_user(&m, userptr + off + i,
+					sizeof(struct ipt_entry_match)))
+				goto free_counters;
+			if (copy_to_user(userptr + off + i +
+				offsetof(struct ipt_entry_match, u.user.name),
+				m.u.kernel.match->name,
+				strlen(m.u.kernel.match->name) + 1))
+				goto free_counters;
+		}
+
+		if (copy_from_user(&t, userptr + off + e.target_offset,
+					sizeof(struct ipt_entry_target)))
+			goto free_counters;
+		if (copy_to_user(userptr + off + e.target_offset +
+			offsetof(struct ipt_entry_target, u.user.name),
+			t.u.kernel.target->name,
+			strlen(t.u.kernel.target->name) + 1))
+			goto free_counters;
+	}
+	ret = 0;
+free_counters:
+	vfree(counters);
+	return ret;
+}
+
+static int
+compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
+{
+	int ret;
+	struct compat_ipt_get_entries get;
+	struct ipt_table *t;
+
+
+	if (*len < sizeof(get)) {
+		duprintf("compat_get_entries: %u < %u\n",
+				*len, (unsigned int)sizeof(get));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
+
+	if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
+		duprintf("compat_get_entries: %u != %u\n", *len,
+			(unsigned int)(sizeof(struct compat_ipt_get_entries) +
+			get.size));
+		return -EINVAL;
+	}
+
+	down(&compat_ipt_mutex);
+	t = xt_find_table_lock(AF_INET, get.name);
+	if (t && !IS_ERR(t)) {
+		struct xt_table_info *private = t->private;
+		struct xt_table_info info;
+		duprintf("t->private->number = %u\n",
+			 private->number);
+		ret = compat_table_info(private, &info);
+		if (!ret && get.size == info.size) {
+			ret = compat_copy_entries_to_user(private->size,
+						   t, uptr->entrytable);
+		} else if (!ret) {
+			duprintf("compat_get_entries: I've got %u not %u!\n",
+				 private->size,
+				 get.size);
+			ret = -EINVAL;
+		}
+		compat_flush_offsets();
+		module_put(t->me);
+		xt_table_unlock(t);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+
+	up(&compat_ipt_mutex);
+	return ret;
+}
+
+static int
+compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	int ret;
+
+	switch (cmd) {
+	case IPT_SO_GET_INFO:
+		ret = get_info(user, len);
+		break;
+	case IPT_SO_GET_ENTRIES:
+		ret = compat_get_entries(user, len);
+		break;
+	default:
+		duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+#endif
+
 static int
 do_ipt_set_ctl(struct sock *sk,	int cmd, void __user *user, unsigned int len)
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits() && (cmd == IPT_SO_SET_REPLACE))
+		return compat_do_replace(user, len);
+#endif
+
 	switch (cmd) {
 	case IPT_SO_SET_REPLACE:
 		ret = do_replace(user, len);
@@ -1116,69 +2131,22 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
-	switch (cmd) {
-	case IPT_SO_GET_INFO: {
-		char name[IPT_TABLE_MAXNAMELEN];
-		struct ipt_table *t;
-
-		if (*len != sizeof(struct ipt_getinfo)) {
-			duprintf("length %u != %u\n", *len,
-				 sizeof(struct ipt_getinfo));
-			ret = -EINVAL;
-			break;
-		}
-
-		if (copy_from_user(name, user, sizeof(name)) != 0) {
-			ret = -EFAULT;
-			break;
-		}
-		name[IPT_TABLE_MAXNAMELEN-1] = '\0';
-
-		t = try_then_request_module(xt_find_table_lock(AF_INET, name),
-					    "iptable_%s", name);
-		if (t && !IS_ERR(t)) {
-			struct ipt_getinfo info;
-			struct xt_table_info *private = t->private;
-
-			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, private->hook_entry,
-			       sizeof(info.hook_entry));
-			memcpy(info.underflow, private->underflow,
-			       sizeof(info.underflow));
-			info.num_entries = private->number;
-			info.size = private->size;
-			memcpy(info.name, name, sizeof(info.name));
-
-			if (copy_to_user(user, &info, *len) != 0)
-				ret = -EFAULT;
-			else
-				ret = 0;
-			xt_table_unlock(t);
-			module_put(t->me);
-		} else
-			ret = t ? PTR_ERR(t) : -ENOENT;
-	}
-	break;
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits())
+		return compat_do_ipt_get_ctl(sk, cmd, user, len);
+#endif
 
-	case IPT_SO_GET_ENTRIES: {
-		struct ipt_get_entries get;
+	switch (cmd) {
+	case IPT_SO_GET_INFO:
+		ret = get_info(user, len);
+		break;
 
-		if (*len < sizeof(get)) {
-			duprintf("get_entries: %u < %u\n", *len, sizeof(get));
-			ret = -EINVAL;
-		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
-			ret = -EFAULT;
-		} else if (*len != sizeof(struct ipt_get_entries) + get.size) {
-			duprintf("get_entries: %u != %u\n", *len,
-				 sizeof(struct ipt_get_entries) + get.size);
-			ret = -EINVAL;
-		} else
-			ret = get_entries(&get, user);
+	case IPT_SO_GET_ENTRIES:
+		ret = get_entries(user, len);
 		break;
-	}
 
 	case IPT_SO_GET_REVISION_MATCH:
 	case IPT_SO_GET_REVISION_TARGET: {
@@ -1214,7 +2182,8 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
 	return ret;
 }
 
-int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
+struct xt_table *ipt_register_table(struct xt_table *table,
+		const struct ipt_replace *repl)
 {
 	int ret;
 	struct xt_table_info *newinfo;
@@ -1224,7 +2193,7 @@ int ipt_register_table(struct xt_table *
 
 	newinfo = xt_alloc_table_info(repl->size);
 	if (!newinfo)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* choose the copy on our node/cpu
 	 * but dont care of preemption
@@ -1239,15 +2208,14 @@ int ipt_register_table(struct xt_table *
 			      repl->underflow);
 	if (ret != 0) {
 		xt_free_table_info(newinfo);
-		return ret;
+		return ERR_PTR(ret);
 	}
 
-	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+	table = virt_xt_register_table(table, &bootstrap, newinfo);
+	if (IS_ERR(table))
 		xt_free_table_info(newinfo);
-		return ret;
-	}
 
-	return 0;
+	return table;
 }
 
 void ipt_unregister_table(struct ipt_table *table)
@@ -1255,7 +2223,7 @@ void ipt_unregister_table(struct ipt_tab
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
 
- 	private = xt_unregister_table(table);
+ 	private = virt_xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
@@ -1263,6 +2231,29 @@ void ipt_unregister_table(struct ipt_tab
 	xt_free_table_info(private);
 }
 
+void ipt_flush_table(struct xt_table *table)
+{
+	struct xt_table *t;
+	void *loc_cpu_entry;
+
+	if (table == NULL)
+		return;
+
+	t = xt_find_table_lock(AF_INET, table->name);
+	if (t && !IS_ERR(t)) {
+		struct xt_table_info *private;
+		private = t->private;
+		loc_cpu_entry = private->entries[raw_smp_processor_id()];
+		IPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
+			  cleanup_entry, NULL);
+		if (private->number > private->initial_entries)
+			module_put(t->me);
+		private->size = 0;
+		xt_table_unlock(t);
+		module_put(t->me);
+	}
+}
+
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
 static inline int
 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
@@ -1327,6 +2318,9 @@ icmp_checkentry(const char *tablename,
 /* The built-in targets: standard (NULL) and error. */
 static struct ipt_target ipt_standard_target = {
 	.name		= IPT_STANDARD_TARGET,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat_ipt_standard_fn,
+#endif
 };
 
 static struct ipt_target ipt_error_target = {
@@ -1348,43 +2342,107 @@ static struct ipt_match icmp_matchstruct
 	.name		= "icmp",
 	.match		= &icmp_match,
 	.checkentry	= &icmp_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &icmp_compat,
+#endif
 };
 
-static int __init init(void)
+static int init_iptables(void)
 {
 	int ret;
 
-	xt_proto_init(AF_INET);
+	if (ve_ipt_standard_target != NULL)
+		return -EEXIST;
+
+	ret = xt_register_target(AF_INET, &ipt_standard_target);
+	if (ret)
+		goto out;
+#ifdef CONFIG_VE_IPTABLES
+	ve_ipt_standard_target = xt_find_target(AF_INET, IPT_STANDARD_TARGET, 0);
+	if (IS_ERR(ve_ipt_standard_target))
+		goto out_standard;
+#endif
+	ret = xt_register_target(AF_INET, &ipt_error_target);
+	if (ret)
+		goto out_error;
+	ret = xt_register_match(AF_INET, &icmp_matchstruct);
+	if (ret)
+		goto out_icmp;
+	ret = xt_proto_init(AF_INET);
+	if (ret)
+		goto out_proc;
+	return 0;
+
+out_proc:
+	xt_unregister_match(AF_INET, &icmp_matchstruct);
+out_icmp:
+	xt_unregister_target(AF_INET, &ipt_error_target);
+out_error:
+#ifdef CONFIG_VE_IPTABLES
+	ve_ipt_standard_target = NULL;
+out_standard:
+#endif
+	xt_unregister_target(AF_INET, &ipt_standard_target);
+out:
+	return ret;
+}
+
+static void fini_iptables(void)
+{
+	xt_proto_fini(AF_INET);
+	xt_unregister_match(AF_INET, &icmp_matchstruct);
+	xt_unregister_target(AF_INET, &ipt_error_target);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ipt_standard_target = NULL;
+#endif
+	xt_unregister_target(AF_INET, &ipt_standard_target);
+}
 
-	/* Noone else will be downing sem now, so we won't sleep */
-	xt_register_target(AF_INET, &ipt_standard_target);
-	xt_register_target(AF_INET, &ipt_error_target);
-	xt_register_match(AF_INET, &icmp_matchstruct);
+static int __init init(void)
+{
+	int ret;
+
+	ret = init_iptables();
+	if (ret)
+		goto out;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ipt_sockopts);
 	if (ret < 0) {
 		duprintf("Unable to register sockopts.\n");
-		return ret;
+		goto out_sockopts;
 	}
 
+	KSYMRESOLVE(init_iptables);
+	KSYMRESOLVE(fini_iptables);
+	KSYMRESOLVE(ipt_flush_table);
+	KSYMMODRESOLVE(ip_tables);
 	printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
+
+out_sockopts:
+	fini_iptables();
+out:
+	return ret;
 }
 
 static void __exit fini(void)
 {
+	KSYMMODUNRESOLVE(ip_tables);
+	KSYMUNRESOLVE(init_iptables);
+	KSYMUNRESOLVE(fini_iptables);
+	KSYMUNRESOLVE(ipt_flush_table);
 	nf_unregister_sockopt(&ipt_sockopts);
-
-	xt_unregister_match(AF_INET, &icmp_matchstruct);
-	xt_unregister_target(AF_INET, &ipt_error_target);
-	xt_unregister_target(AF_INET, &ipt_standard_target);
-
-	xt_proto_fini(AF_INET);
+	fini_iptables();
 }
 
 EXPORT_SYMBOL(ipt_register_table);
 EXPORT_SYMBOL(ipt_unregister_table);
 EXPORT_SYMBOL(ipt_do_table);
-module_init(init);
+#ifdef CONFIG_COMPAT
+EXPORT_SYMBOL(ipt_match_align_compat);
+EXPORT_SYMBOL(ipt_target_align_compat);
+#endif
+EXPORT_SYMBOL(ipt_flush_table);
+subsys_initcall(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/ipv4/netfilter/ipt_LOG.c linux-2.6.16.ovz/net/ipv4/netfilter/ipt_LOG.c
--- linux-2.6.16/net/ipv4/netfilter/ipt_LOG.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ipt_LOG.c	2006-07-05 08:34:56.000000000 -0400
@@ -18,6 +18,7 @@
 #include <net/udp.h>
 #include <net/tcp.h>
 #include <net/route.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -463,10 +464,25 @@ static int ipt_log_checkentry(const char
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int ipt_log_compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_log_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_log_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_log_reg = {
 	.name		= "LOG",
 	.target		= ipt_log_target,
 	.checkentry	= ipt_log_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_log_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
@@ -476,24 +492,44 @@ static struct nf_logger ipt_log_logger =
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_LOG(void)
+{
+	return ipt_register_target(&ipt_log_reg);
+}
+
+void fini_iptable_LOG(void)
+{
+	ipt_unregister_target(&ipt_log_reg);
+}
+
 static int __init init(void)
 {
-	if (ipt_register_target(&ipt_log_reg))
-		return -EINVAL;
+	int err;
+
+	err = init_iptable_LOG();
+	if (err < 0)
+		return err;
 	if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
-		printk(KERN_WARNING "ipt_LOG: not logging via system console "
+		ve_printk(VE_LOG, KERN_WARNING "ipt_LOG: not logging via system console "
 		       "since somebody else already registered for PF_INET\n");
 		/* we cannot make module load fail here, since otherwise
 		 * iptables userspace would abort */
 	}
 	
+
+	KSYMRESOLVE(init_iptable_LOG);
+	KSYMRESOLVE(fini_iptable_LOG);
+	KSYMMODRESOLVE(ipt_LOG);
 	return 0;
 }
 
 static void __exit fini(void)
 {
+	KSYMMODUNRESOLVE(ipt_LOG);
+	KSYMUNRESOLVE(init_iptable_LOG);
+	KSYMUNRESOLVE(fini_iptable_LOG);
 	nf_log_unregister_logger(&ipt_log_logger);
-	ipt_unregister_target(&ipt_log_reg);
+	fini_iptable_LOG();
 }
 
 module_init(init);
diff -uprN linux-2.6.16/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.16.ovz/net/ipv4/netfilter/ipt_MASQUERADE.c
--- linux-2.6.16/net/ipv4/netfilter/ipt_MASQUERADE.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ipt_MASQUERADE.c	2006-07-05 08:34:56.000000000 -0400
@@ -120,6 +120,7 @@ masquerade_target(struct sk_buff **pskb,
 	return ip_nat_setup_info(ct, &newrange, hooknum);
 }
 
+#if 0
 static inline int
 device_cmp(struct ip_conntrack *i, void *ifindex)
 {
@@ -175,6 +176,7 @@ static struct notifier_block masq_dev_no
 static struct notifier_block masq_inet_notifier = {
 	.notifier_call	= masq_inet_event,
 };
+#endif
 
 static struct ipt_target masquerade = {
 	.name		= "MASQUERADE",
@@ -189,12 +191,16 @@ static int __init init(void)
 
 	ret = ipt_register_target(&masquerade);
 
+#if 0
+/*	These notifiers are unnecessary and may
+	lead to oops in virtual environments */
 	if (ret == 0) {
 		/* Register for device down reports */
 		register_netdevice_notifier(&masq_dev_notifier);
 		/* Register IP address change reports */
 		register_inetaddr_notifier(&masq_inet_notifier);
 	}
+#endif
 
 	return ret;
 }
@@ -202,8 +208,8 @@ static int __init init(void)
 static void __exit fini(void)
 {
 	ipt_unregister_target(&masquerade);
-	unregister_netdevice_notifier(&masq_dev_notifier);
-	unregister_inetaddr_notifier(&masq_inet_notifier);	
+/*	unregister_netdevice_notifier(&masq_dev_notifier);
+	unregister_inetaddr_notifier(&masq_inet_notifier);	*/
 }
 
 module_init(init);
diff -uprN linux-2.6.16/net/ipv4/netfilter/ipt_REDIRECT.c linux-2.6.16.ovz/net/ipv4/netfilter/ipt_REDIRECT.c
--- linux-2.6.16/net/ipv4/netfilter/ipt_REDIRECT.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ipt_REDIRECT.c	2006-07-05 08:34:56.000000000 -0400
@@ -17,6 +17,7 @@
 #include <linux/inetdevice.h>
 #include <net/protocol.h>
 #include <net/checksum.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 
@@ -25,7 +26,7 @@ MODULE_AUTHOR("Netfilter Core Team <core
 MODULE_DESCRIPTION("iptables REDIRECT target module");
 
 #if 0
-#define DEBUGP printk
+#define DEBUGP ve_printk
 #else
 #define DEBUGP(format, args...)
 #endif
@@ -94,8 +95,14 @@ redirect_target(struct sk_buff **pskb,
 		
 		rcu_read_lock();
 		indev = __in_dev_get_rcu((*pskb)->dev);
-		if (indev && (ifa = indev->ifa_list))
+		if (indev && (ifa = indev->ifa_list)) {
+			/* because of venet device specific, we should use
+			 * second ifa in the list */
+			if (IN_LOOPBACK(ntohl(ifa->ifa_local)) &&
+					ifa->ifa_next)
+				ifa = ifa->ifa_next;
 			newdst = ifa->ifa_local;
+		}
 		rcu_read_unlock();
 
 		if (!newdst)
@@ -119,15 +126,37 @@ static struct ipt_target redirect_reg = 
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_iptable_REDIRECT(void)
 {
 	return ipt_register_target(&redirect_reg);
 }
 
-static void __exit fini(void)
+void fini_iptable_REDIRECT(void)
 {
 	ipt_unregister_target(&redirect_reg);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_REDIRECT();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_REDIRECT);
+	KSYMRESOLVE(fini_iptable_REDIRECT);
+	KSYMMODRESOLVE(ipt_REDIRECT);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ipt_REDIRECT);
+	KSYMUNRESOLVE(init_iptable_REDIRECT);
+	KSYMUNRESOLVE(fini_iptable_REDIRECT);
+	fini_iptable_REDIRECT();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.16.ovz/net/ipv4/netfilter/ipt_REJECT.c
--- linux-2.6.16/net/ipv4/netfilter/ipt_REJECT.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ipt_REJECT.c	2006-07-05 08:34:56.000000000 -0400
@@ -22,6 +22,7 @@
 #include <net/ip.h>
 #include <net/tcp.h>
 #include <net/route.h>
+#include <linux/nfcalls.h>
 #include <net/dst.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_REJECT.h>
@@ -322,22 +323,59 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_reject_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_reject_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_reject_reg = {
 	.name		= "REJECT",
 	.target		= reject,
 	.checkentry	= check,
+#ifdef CONFIG_COMPAT
+	.compat		= compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_iptable_REJECT(void)
 {
 	return ipt_register_target(&ipt_reject_reg);
 }
 
-static void __exit fini(void)
+void fini_iptable_REJECT(void)
 {
 	ipt_unregister_target(&ipt_reject_reg);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_REJECT();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_REJECT);
+	KSYMRESOLVE(fini_iptable_REJECT);
+	KSYMMODRESOLVE(ipt_REJECT);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ipt_REJECT);
+	KSYMUNRESOLVE(init_iptable_REJECT);
+	KSYMUNRESOLVE(fini_iptable_REJECT);
+	fini_iptable_REJECT();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/ipv4/netfilter/ipt_TCPMSS.c linux-2.6.16.ovz/net/ipv4/netfilter/ipt_TCPMSS.c
--- linux-2.6.16/net/ipv4/netfilter/ipt_TCPMSS.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ipt_TCPMSS.c	2006-07-05 08:34:56.000000000 -0400
@@ -13,6 +13,7 @@
 
 #include <linux/ip.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_TCPMSS.h>
@@ -242,22 +243,59 @@ ipt_tcpmss_checkentry(const char *tablen
 	return 0;
 }
 
+#ifdef CONFIG_COMPAT
+static int ipt_tcpmss_compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_tcpmss_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_tcpmss_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_tcpmss_reg = {
 	.name		= "TCPMSS",
 	.target		= ipt_tcpmss_target,
 	.checkentry	= ipt_tcpmss_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_tcpmss_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_iptable_TCPMSS(void)
 {
 	return ipt_register_target(&ipt_tcpmss_reg);
 }
 
-static void __exit fini(void)
+void fini_iptable_TCPMSS(void)
 {
 	ipt_unregister_target(&ipt_tcpmss_reg);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_TCPMSS();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_TCPMSS);
+	KSYMRESOLVE(fini_iptable_TCPMSS);
+	KSYMMODRESOLVE(ipt_TCPMSS);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ipt_TCPMSS);
+	KSYMUNRESOLVE(init_iptable_TCPMSS);
+	KSYMUNRESOLVE(fini_iptable_TCPMSS);
+	fini_iptable_TCPMSS();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/ipv4/netfilter/ipt_TOS.c linux-2.6.16.ovz/net/ipv4/netfilter/ipt_TOS.c
--- linux-2.6.16/net/ipv4/netfilter/ipt_TOS.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ipt_TOS.c	2006-07-05 08:34:56.000000000 -0400
@@ -15,6 +15,7 @@
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_TOS.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -83,22 +84,59 @@ checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_tos_target_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_target_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_tos_reg = {
 	.name		= "TOS",
 	.target		= target,
 	.checkentry	= checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_iptable_TOS(void)
 {
 	return ipt_register_target(&ipt_tos_reg);
 }
 
-static void __exit fini(void)
+void fini_iptable_TOS(void)
 {
 	ipt_unregister_target(&ipt_tos_reg);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_TOS();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_TOS);
+	KSYMRESOLVE(fini_iptable_TOS);
+	KSYMMODRESOLVE(ipt_TOS);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ipt_TOS);
+	KSYMUNRESOLVE(init_iptable_TOS);
+	KSYMUNRESOLVE(fini_iptable_TOS);
+	fini_iptable_TOS();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/ipv4/netfilter/ipt_multiport.c linux-2.6.16.ovz/net/ipv4/netfilter/ipt_multiport.c
--- linux-2.6.16/net/ipv4/netfilter/ipt_multiport.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ipt_multiport.c	2006-07-05 08:34:56.000000000 -0400
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/udp.h>
 #include <linux/skbuff.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ipt_multiport.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -21,6 +22,13 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables multiple port match module");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_multiport_match	(*(get_exec_env()->_multiport_match))
+#else
+#define ve_multiport_match	multiport_match
+#endif
+
 #if 0
 #define duprintf(format, args...) printk(format , ## args)
 #else
@@ -174,11 +182,36 @@ checkentry_v1(const char *tablename,
 	return (matchsize == IPT_ALIGN(sizeof(struct ipt_multiport_v1)));
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_multiport)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_multiport));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+
+static int compat_v1(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_multiport_v1)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_multiport_v1));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_match multiport_match = {
 	.name		= "multiport",
 	.revision	= 0,
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
@@ -187,10 +220,13 @@ static struct ipt_match multiport_match_
 	.revision	= 1,
 	.match		= &match_v1,
 	.checkentry	= &checkentry_v1,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat_v1,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_iptable_multiport(void)
 {
 	int err;
 
@@ -204,11 +240,33 @@ static int __init init(void)
 	return err;
 }
 
-static void __exit fini(void)
+void fini_iptable_multiport(void)
 {
 	ipt_unregister_match(&multiport_match);
 	ipt_unregister_match(&multiport_match_v1);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_multiport();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_multiport);
+	KSYMRESOLVE(fini_iptable_multiport);
+	KSYMMODRESOLVE(ipt_multiport);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ipt_multiport);
+	KSYMUNRESOLVE(init_iptable_multiport);
+	KSYMUNRESOLVE(fini_iptable_multiport);
+	fini_iptable_multiport();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/ipv4/netfilter/ipt_tos.c linux-2.6.16.ovz/net/ipv4/netfilter/ipt_tos.c
--- linux-2.6.16/net/ipv4/netfilter/ipt_tos.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ipt_tos.c	2006-07-05 08:34:56.000000000 -0400
@@ -10,6 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ipt_tos.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -17,6 +18,13 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("iptables TOS match module");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_tos_match		(*(get_exec_env()->_tos_match))
+#else
+#define ve_tos_match		tos_match
+#endif
+
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -44,22 +52,59 @@ checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_tos_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_match tos_match = {
 	.name		= "tos",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_iptable_tos(void)
 {
 	return ipt_register_match(&tos_match);
 }
 
-static void __exit fini(void)
+void fini_iptable_tos(void)
 {
 	ipt_unregister_match(&tos_match);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_tos();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_tos);
+	KSYMRESOLVE(fini_iptable_tos);
+	KSYMMODRESOLVE(ipt_tos);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ipt_tos);
+	KSYMUNRESOLVE(init_iptable_tos);
+	KSYMUNRESOLVE(fini_iptable_tos);
+	fini_iptable_tos();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/ipv4/netfilter/ipt_ttl.c linux-2.6.16.ovz/net/ipv4/netfilter/ipt_ttl.c
--- linux-2.6.16/net/ipv4/netfilter/ipt_ttl.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/ipt_ttl.c	2006-07-05 08:34:56.000000000 -0400
@@ -11,6 +11,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ipt_ttl.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -57,22 +58,58 @@ static int checkentry(const char *tablen
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_ttl_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_ttl_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_match ttl_match = {
 	.name		= "ttl",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_iptable_ttl(void)
 {
 	return ipt_register_match(&ttl_match);
 }
 
-static void __exit fini(void)
+void fini_iptable_ttl(void)
 {
 	ipt_unregister_match(&ttl_match);
+}
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_ttl();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_ttl);
+	KSYMRESOLVE(fini_iptable_ttl);
+	KSYMMODRESOLVE(ipt_ttl);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ipt_ttl);
+	KSYMUNRESOLVE(init_iptable_ttl);
+	KSYMUNRESOLVE(fini_iptable_ttl);
+	fini_iptable_ttl();
 }
 
 module_init(init);
diff -uprN linux-2.6.16/net/ipv4/netfilter/iptable_filter.c linux-2.6.16.ovz/net/ipv4/netfilter/iptable_filter.c
--- linux-2.6.16/net/ipv4/netfilter/iptable_filter.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/iptable_filter.c	2006-07-05 08:34:56.000000000 -0400
@@ -12,12 +12,20 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables filter table");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_filter	(get_exec_env()->_ve_ipt_filter_pf)
+#else
+#define	ve_packet_filter	&packet_filter
+#endif
+
 #define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
 
 static struct
@@ -25,7 +33,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
 	struct ipt_error term;
-} initial_table __initdata 
+} initial_table
 = { { "filter", FILTER_VALID_HOOKS, 4,
       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
       { [NF_IP_LOCAL_IN] = 0,
@@ -90,7 +98,7 @@ ipt_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static unsigned int
@@ -108,7 +116,7 @@ ipt_local_out_hook(unsigned int hook,
 		return NF_ACCEPT;
 	}
 
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static struct nf_hook_ops ipt_ops[] = {
@@ -139,56 +147,89 @@ static struct nf_hook_ops ipt_ops[] = {
 static int forward = NF_ACCEPT;
 module_param(forward, bool, 0000);
 
-static int __init init(void)
+int init_iptable_filter(void)
 {
 	int ret;
-
-	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
-		return -EINVAL;
-	}
-
-	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
+	struct ipt_table *tmp_filter;
 
 	/* Register table */
-	ret = ipt_register_table(&packet_filter, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_filter = ipt_register_table(&packet_filter,
+			&initial_table.repl);
+	if (IS_ERR(tmp_filter))
+		return PTR_ERR(tmp_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = tmp_filter;
+#endif
 
 	/* Register hooks */
-	ret = nf_register_hook(&ipt_ops[0]);
+	ret = virt_nf_register_hook(&ipt_ops[0]);
 	if (ret < 0)
 		goto cleanup_table;
 
-	ret = nf_register_hook(&ipt_ops[1]);
+	ret = virt_nf_register_hook(&ipt_ops[1]);
 	if (ret < 0)
 		goto cleanup_hook0;
 
-	ret = nf_register_hook(&ipt_ops[2]);
+	ret = virt_nf_register_hook(&ipt_ops[2]);
 	if (ret < 0)
 		goto cleanup_hook1;
 
 	return ret;
 
  cleanup_hook1:
-	nf_unregister_hook(&ipt_ops[1]);
+	virt_nf_unregister_hook(&ipt_ops[1]);
  cleanup_hook0:
-	nf_unregister_hook(&ipt_ops[0]);
+	virt_nf_unregister_hook(&ipt_ops[0]);
  cleanup_table:
-	ipt_unregister_table(&packet_filter);
+	ipt_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
 
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_iptable_filter(void)
 {
 	unsigned int i;
 
 	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
-		nf_unregister_hook(&ipt_ops[i]);
+		virt_nf_unregister_hook(&ipt_ops[i]);
 
-	ipt_unregister_table(&packet_filter);
+	ipt_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
+}
+
+static int __init init(void)
+{
+	int err;
+
+	if (forward < 0 || forward > NF_MAX_VERDICT) {
+		printk("iptables forward must be 0 or 1\n");
+		return -EINVAL;
+	}
+
+	/* Entry 1 is the FORWARD hook */
+	initial_table.entries[1].target.verdict = -forward - 1;
+
+	err = init_iptable_filter();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_filter);
+	KSYMRESOLVE(fini_iptable_filter);
+	KSYMMODRESOLVE(iptable_filter);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_filter);
+	KSYMUNRESOLVE(init_iptable_filter);
+	KSYMUNRESOLVE(fini_iptable_filter);
+	fini_iptable_filter();
 }
 
 module_init(init);
diff -uprN linux-2.6.16/net/ipv4/netfilter/iptable_mangle.c linux-2.6.16.ovz/net/ipv4/netfilter/iptable_mangle.c
--- linux-2.6.16/net/ipv4/netfilter/iptable_mangle.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/iptable_mangle.c	2006-07-05 08:34:56.000000000 -0400
@@ -17,6 +17,7 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/route.h>
+#include <linux/nfcalls.h>
 #include <linux/ip.h>
 
 MODULE_LICENSE("GPL");
@@ -35,7 +36,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[5];
 	struct ipt_error term;
-} initial_table __initdata
+} initial_table
 = { { "mangle", MANGLE_VALID_HOOKS, 6,
       sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
       { [NF_IP_PRE_ROUTING] 	= 0,
@@ -112,6 +113,13 @@ static struct ipt_table packet_mangler =
 	.af		= AF_INET,
 };
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_mangler	(get_exec_env()->_ipt_mangle_table)
+#else
+#define ve_packet_mangler	&packet_mangler
+#endif
+
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ipt_route_hook(unsigned int hook,
@@ -120,7 +128,7 @@ ipt_route_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	return ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
 }
 
 static unsigned int
@@ -149,7 +157,8 @@ ipt_local_hook(unsigned int hook,
 	daddr = (*pskb)->nh.iph->daddr;
 	tos = (*pskb)->nh.iph->tos;
 
-	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	ret = ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
+
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
 	    && ((*pskb)->nh.iph->saddr != saddr
@@ -201,60 +210,103 @@ static struct nf_hook_ops ipt_ops[] = {
 	},
 };
 
-static int __init init(void)
+static int mangle_init(struct nf_hook_ops ipt_ops[])
 {
 	int ret;
+	struct ipt_table *tmp_mangler;
 
 	/* Register table */
-	ret = ipt_register_table(&packet_mangler, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_mangler = ipt_register_table(&packet_mangler,
+			&initial_table.repl);
+	if (IS_ERR(tmp_mangler))
+		return PTR_ERR(tmp_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = tmp_mangler;
+#endif
 
 	/* Register hooks */
-	ret = nf_register_hook(&ipt_ops[0]);
+	ret = virt_nf_register_hook(&ipt_ops[0]);
 	if (ret < 0)
 		goto cleanup_table;
 
-	ret = nf_register_hook(&ipt_ops[1]);
+	ret = virt_nf_register_hook(&ipt_ops[1]);
 	if (ret < 0)
 		goto cleanup_hook0;
 
-	ret = nf_register_hook(&ipt_ops[2]);
+	ret = virt_nf_register_hook(&ipt_ops[2]);
 	if (ret < 0)
 		goto cleanup_hook1;
 
-	ret = nf_register_hook(&ipt_ops[3]);
+	ret = virt_nf_register_hook(&ipt_ops[3]);
 	if (ret < 0)
 		goto cleanup_hook2;
 
-	ret = nf_register_hook(&ipt_ops[4]);
+	ret = virt_nf_register_hook(&ipt_ops[4]);
 	if (ret < 0)
 		goto cleanup_hook3;
 
 	return ret;
 
  cleanup_hook3:
-        nf_unregister_hook(&ipt_ops[3]);
+        virt_nf_unregister_hook(&ipt_ops[3]);
  cleanup_hook2:
-        nf_unregister_hook(&ipt_ops[2]);
+        virt_nf_unregister_hook(&ipt_ops[2]);
  cleanup_hook1:
-	nf_unregister_hook(&ipt_ops[1]);
+	virt_nf_unregister_hook(&ipt_ops[1]);
  cleanup_hook0:
-	nf_unregister_hook(&ipt_ops[0]);
+	virt_nf_unregister_hook(&ipt_ops[0]);
  cleanup_table:
-	ipt_unregister_table(&packet_mangler);
+	ipt_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
 
 	return ret;
 }
 
-static void __exit fini(void)
+static void mangle_fini(struct nf_hook_ops ipt_ops[])
 {
 	unsigned int i;
 
-	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
-		nf_unregister_hook(&ipt_ops[i]);
+	for (i = 0; i < 5; i++)
+		virt_nf_unregister_hook(&ipt_ops[i]);
+
+	ipt_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
+}
+
+int init_iptable_mangle(void)
+{
+	return mangle_init(ipt_ops);
+}
+
+void fini_iptable_mangle(void)
+{
+	mangle_fini(ipt_ops);
+}
+
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_mangle();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_mangle);
+	KSYMRESOLVE(fini_iptable_mangle);
+	KSYMMODRESOLVE(iptable_mangle);
+	return 0;
+}
 
-	ipt_unregister_table(&packet_mangler);
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_mangle);
+	KSYMUNRESOLVE(init_iptable_mangle);
+	KSYMUNRESOLVE(fini_iptable_mangle);
+	fini_iptable_mangle();
 }
 
 module_init(init);
diff -uprN linux-2.6.16/net/ipv4/netfilter/iptable_raw.c linux-2.6.16.ovz/net/ipv4/netfilter/iptable_raw.c
--- linux-2.6.16/net/ipv4/netfilter/iptable_raw.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/iptable_raw.c	2006-07-05 08:34:56.000000000 -0400
@@ -118,12 +118,13 @@ static struct nf_hook_ops ipt_ops[] = {
 
 static int __init init(void)
 {
+	struct ipt_table *tmp;
 	int ret;
 
 	/* Register table */
-	ret = ipt_register_table(&packet_raw, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp = ipt_register_table(&packet_raw, &initial_table.repl);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
 
 	/* Register hooks */
 	ret = nf_register_hook(&ipt_ops[0]);
diff -uprN linux-2.6.16/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c linux-2.6.16.ovz/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
--- linux-2.6.16/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c	2006-07-05 08:34:56.000000000 -0400
@@ -354,6 +354,7 @@ getorigdst(struct sock *sk, int optval, 
 			.tuple.dst.u.tcp.port;
 		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
 			.tuple.dst.u3.ip;
+		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
 
 		DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
 		       NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
diff -uprN linux-2.6.16/net/ipv4/proc.c linux-2.6.16.ovz/net/ipv4/proc.c
--- linux-2.6.16/net/ipv4/proc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/proc.c	2006-07-05 08:34:56.000000000 -0400
@@ -258,11 +258,12 @@ static int snmp_seq_show(struct seq_file
 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
 
 	seq_printf(seq, "\nIp: %d %d",
-			ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
+			ve_ipv4_devconf.forwarding ? 1 : 2,
+			sysctl_ip_default_ttl);
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) ip_statistics, 
+			   fold_field((void **) ve_ip_statistics, 
 				      snmp4_ipstats_list[i].entry));
 
 	seq_puts(seq, "\nIcmp:");
@@ -272,7 +273,7 @@ static int snmp_seq_show(struct seq_file
 	seq_puts(seq, "\nIcmp:");
 	for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) icmp_statistics, 
+			   fold_field((void **) ve_icmp_statistics, 
 				      snmp4_icmp_list[i].entry));
 
 	seq_puts(seq, "\nTcp:");
@@ -284,11 +285,11 @@ static int snmp_seq_show(struct seq_file
 		/* MaxConn field is signed, RFC 2012 */
 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
 			seq_printf(seq, " %ld",
-				   fold_field((void **) tcp_statistics, 
+				   fold_field((void **) ve_tcp_statistics, 
 					      snmp4_tcp_list[i].entry));
 		else
 			seq_printf(seq, " %lu",
-				   fold_field((void **) tcp_statistics,
+				   fold_field((void **) ve_tcp_statistics,
 					      snmp4_tcp_list[i].entry));
 	}
 
@@ -299,7 +300,7 @@ static int snmp_seq_show(struct seq_file
 	seq_puts(seq, "\nUdp:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) udp_statistics, 
+			   fold_field((void **) ve_udp_statistics, 
 				      snmp4_udp_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -333,7 +334,7 @@ static int netstat_seq_show(struct seq_f
 	seq_puts(seq, "\nTcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) net_statistics, 
+			   fold_field((void **) ve_net_statistics, 
 				      snmp4_net_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -357,10 +358,10 @@ int __init ip_misc_proc_init(void)
 {
 	int rc = 0;
 
-	if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops))
+	if (!proc_glob_fops_create("net/netstat", S_IRUGO, &netstat_seq_fops))
 		goto out_netstat;
 
-	if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops))
+	if (!proc_glob_fops_create("net/snmp", S_IRUGO, &snmp_seq_fops))
 		goto out_snmp;
 
 	if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops))
@@ -368,9 +369,9 @@ int __init ip_misc_proc_init(void)
 out:
 	return rc;
 out_sockstat:
-	proc_net_remove("snmp");
+	remove_proc_glob_entry("net/snmp", NULL);
 out_snmp:
-	proc_net_remove("netstat");
+	remove_proc_glob_entry("net/netstat", NULL);
 out_netstat:
 	rc = -ENOMEM;
 	goto out;
diff -uprN linux-2.6.16/net/ipv4/raw.c linux-2.6.16.ovz/net/ipv4/raw.c
--- linux-2.6.16/net/ipv4/raw.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/raw.c	2006-07-05 08:34:56.000000000 -0400
@@ -114,7 +114,8 @@ struct sock *__raw_v4_lookup(struct sock
 		if (inet->num == num 					&&
 		    !(inet->daddr && inet->daddr != raddr) 		&&
 		    !(inet->rcv_saddr && inet->rcv_saddr != laddr)	&&
-		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) &&
+		    ve_accessible_strict(VE_OWNER_SK(sk), get_exec_env()))
 			goto found; /* gotcha */
 	}
 	sk = NULL;
@@ -753,8 +754,12 @@ static struct sock *raw_get_first(struct
 		struct hlist_node *node;
 
 		sk_for_each(sk, node, &raw_v4_htable[state->bucket])
-			if (sk->sk_family == PF_INET)
+			if (sk->sk_family == PF_INET) {
+				if (!ve_accessible(VE_OWNER_SK(sk),
+							get_exec_env()))
+					continue;
 				goto found;
+			}
 	}
 	sk = NULL;
 found:
@@ -768,8 +773,14 @@ static struct sock *raw_get_next(struct 
 	do {
 		sk = sk_next(sk);
 try_again:
-		;
-	} while (sk && sk->sk_family != PF_INET);
+		if (!sk)
+			break;
+		if (sk->sk_family != PF_INET)
+			continue;
+		if (ve_accessible(VE_OWNER_SK(sk),
+					get_exec_env()))
+			break;
+	} while (1);
 
 	if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
 		sk = sk_head(&raw_v4_htable[state->bucket]);
@@ -886,13 +897,13 @@ static struct file_operations raw_seq_fo
 
 int __init raw_proc_init(void)
 {
-	if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops))
+	if (!proc_glob_fops_create("net/raw", S_IRUGO, &raw_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void __init raw_proc_exit(void)
 {
-	proc_net_remove("raw");
+	remove_proc_glob_entry("net/raw", NULL);
 }
 #endif /* CONFIG_PROC_FS */
diff -uprN linux-2.6.16/net/ipv4/route.c linux-2.6.16.ovz/net/ipv4/route.c
--- linux-2.6.16/net/ipv4/route.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/route.c	2006-07-05 08:34:56.000000000 -0400
@@ -114,6 +114,8 @@
 
 #define RT_GC_TIMEOUT (300*HZ)
 
+int ip_rt_src_check		= 1;
+
 static int ip_rt_min_delay		= 2 * HZ;
 static int ip_rt_max_delay		= 10 * HZ;
 static int ip_rt_max_size;
@@ -253,11 +255,28 @@ static unsigned int rt_hash_code(u32 dad
 		& rt_hash_mask);
 }
 
+void prepare_rt_cache(void)
+{
+#ifdef CONFIG_VE
+	struct rtable *r;
+	int i;
+
+	for (i = rt_hash_mask; i >= 0; i--) {
+		spin_lock_bh(rt_hash_lock_addr(i));
+		for (r = rt_hash_table[i].chain; r; r = r->u.rt_next) {
+			r->fl.owner_env = get_ve0();
+		}
+		spin_unlock_bh(rt_hash_lock_addr(i));
+        }
+#endif
+}
+
 #ifdef CONFIG_PROC_FS
 struct rt_cache_iter_state {
 	int bucket;
 };
 
+static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r);
 static struct rtable *rt_cache_get_first(struct seq_file *seq)
 {
 	struct rtable *r = NULL;
@@ -270,6 +289,8 @@ static struct rtable *rt_cache_get_first
 			break;
 		rcu_read_unlock_bh();
 	}
+	if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
+		r = rt_cache_get_next(seq, r);
 	return r;
 }
 
@@ -277,14 +298,19 @@ static struct rtable *rt_cache_get_next(
 {
 	struct rt_cache_iter_state *st = rcu_dereference(seq->private);
 
-	r = r->u.rt_next;
+start:
+	do {
+		r = r->u.rt_next;
+	} while (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()));
 	while (!r) {
 		rcu_read_unlock_bh();
 		if (--st->bucket < 0)
-			break;
+			goto out;
 		rcu_read_lock_bh();
 		r = rt_hash_table[st->bucket].chain;
 	}
+	goto start;
+out:
 	return r;
 }
 
@@ -556,7 +582,8 @@ static inline int compare_keys(struct fl
 {
 	return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
 	       fl1->oif     == fl2->oif &&
-	       fl1->iif     == fl2->iif;
+	       fl1->iif     == fl2->iif &&
+	       ve_accessible_strict(fl1->owner_env, fl2->owner_env);
 }
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
@@ -670,26 +697,105 @@ static void rt_check_expire(unsigned lon
 	mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
 }
 
+typedef unsigned long rt_flush_gen_t;
+
+#ifdef CONFIG_VE
+
+static rt_flush_gen_t rt_flush_gen;
+
+/* called under rt_flush_lock */
+static void set_rt_flush_required(struct ve_struct *env)
+{
+	/*
+	 * If the global generation rt_flush_gen is equal to G, then
+	 * the pass considering entries labelled by G is yet to come.
+	 */
+	env->rt_flush_required = rt_flush_gen;
+}
+
+static spinlock_t rt_flush_lock;
+static rt_flush_gen_t reset_rt_flush_required(void)
+{
+	rt_flush_gen_t g;
+
+	spin_lock_bh(&rt_flush_lock);
+	g = rt_flush_gen++;
+	spin_unlock_bh(&rt_flush_lock);
+	return g;
+}
+
+static int check_rt_flush_required(struct ve_struct *env, rt_flush_gen_t gen)
+{
+	/* can be checked without the lock */
+	return env->rt_flush_required >= gen;
+}
+
+#else
+
+static void set_rt_flush_required(struct ve_struct *env)
+{
+}
+
+static rt_flush_gen_t reset_rt_flush_required(void)
+{
+	return 0;
+}
+
+#endif
+
 /* This can run from both BH and non-BH contexts, the latter
  * in the case of a forced flush event.
  */
 static void rt_run_flush(unsigned long dummy)
 {
 	int i;
-	struct rtable *rth, *next;
+	struct rtable * rth, * next;
+	struct rtable * tail;
+	rt_flush_gen_t gen;
 
 	rt_deadline = 0;
 
 	get_random_bytes(&rt_hash_rnd, 4);
 
+	gen = reset_rt_flush_required();
+
 	for (i = rt_hash_mask; i >= 0; i--) {
+#ifdef CONFIG_VE
+		struct rtable ** prev, * p;
+
+		spin_lock_bh(rt_hash_lock_addr(i));
+		rth = rt_hash_table[i].chain;
+
+		/* defer releasing the head of the list after spin_unlock */
+		for (tail = rth; tail; tail = tail->u.rt_next)
+			if (!check_rt_flush_required(tail->fl.owner_env, gen))
+				break;
+		if (rth != tail)
+			rt_hash_table[i].chain = tail;
+
+		/* call rt_free on entries after the tail requiring flush */
+		prev = &rt_hash_table[i].chain;
+		for (p = *prev; p; p = next) {
+			next = p->u.rt_next;
+			if (!check_rt_flush_required(p->fl.owner_env, gen)) {
+				prev = &p->u.rt_next;
+			} else {
+				*prev = next;
+				rt_free(p);
+			}
+		}
+
+#else
 		spin_lock_bh(rt_hash_lock_addr(i));
 		rth = rt_hash_table[i].chain;
 		if (rth)
 			rt_hash_table[i].chain = NULL;
+		tail = NULL;
+
+#endif
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
-		for (; rth; rth = next) {
+		for (; rth != tail; rth = next) {
 			next = rth->u.rt_next;
 			rt_free(rth);
 		}
@@ -728,6 +834,8 @@ void rt_cache_flush(int delay)
 			delay = tmo;
 	}
 
+	set_rt_flush_required(get_exec_env());
+
 	if (delay <= 0) {
 		spin_unlock_bh(&rt_flush_lock);
 		rt_run_flush(0);
@@ -743,9 +851,30 @@ void rt_cache_flush(int delay)
 
 static void rt_secret_rebuild(unsigned long dummy)
 {
+	int i;
+	struct rtable *rth, *next;
 	unsigned long now = jiffies;
 
-	rt_cache_flush(0);
+	spin_lock_bh(&rt_flush_lock);
+	del_timer(&rt_flush_timer);
+	spin_unlock_bh(&rt_flush_lock);
+
+	rt_deadline = 0;
+	get_random_bytes(&rt_hash_rnd, 4);
+
+	for (i = rt_hash_mask; i >= 0; i--) {
+		spin_lock_bh(rt_hash_lock_addr(i));
+		rth = rt_hash_table[i].chain;
+		if (rth)
+			rt_hash_table[i].chain = NULL;
+		spin_unlock_bh(rt_hash_lock_addr(i));
+
+		for (; rth; rth = next) {
+			next = rth->u.rt_next;
+			rt_free(rth);
+		}
+	}
+
 	mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
 }
 
@@ -1118,7 +1247,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 	struct rtable *rth, **rthp;
 	u32  skeys[2] = { saddr, 0 };
 	int  ikeys[2] = { dev->ifindex, 0 };
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	tos &= IPTOS_RT_MASK;
 
 	if (!in_dev)
@@ -1154,6 +1285,10 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 				    rth->fl.fl4_src != skeys[i] ||
 				    rth->fl.fl4_tos != tos ||
 				    rth->fl.oif != ikeys[k] ||
+#ifdef CONFIG_VE
+				    !ve_accessible_strict(rth->fl.owner_env,
+					    		  ve) ||
+#endif
 				    rth->fl.iif != 0) {
 					rthp = &rth->u.rt_next;
 					continue;
@@ -1192,6 +1327,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 				rt->u.dst.neighbour	= NULL;
 				rt->u.dst.hh		= NULL;
 				rt->u.dst.xfrm		= NULL;
+#ifdef CONFIG_VE
+				rt->fl.owner_env = ve;
+#endif
 
 				rt->rt_flags		|= RTCF_REDIRECTED;
 
@@ -1631,6 +1769,9 @@ static int ip_route_input_mc(struct sk_b
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -1776,6 +1917,9 @@ static inline int __mkroute_input(struct
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 	rth->rt_gateway	= daddr;
@@ -2021,6 +2165,9 @@ local_input:
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -2100,6 +2247,9 @@ int ip_route_input(struct sk_buff *skb, 
 #ifdef CONFIG_IP_ROUTE_FWMARK
 		    rth->fl.fl4_fwmark == skb->nfmark &&
 #endif
+#ifdef CONFIG_VE
+		    rth->fl.owner_env == get_exec_env() &&
+#endif
 		    rth->fl.fl4_tos == tos) {
 			rth->u.dst.lastuse = jiffies;
 			dst_hold(&rth->u.dst);
@@ -2226,6 +2376,9 @@ static inline int __mkroute_output(struc
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->rt_dst	= fl->fl4_dst;
 	rth->rt_src	= fl->fl4_src;
 	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
@@ -2399,10 +2552,13 @@ static int ip_route_output_slow(struct r
 		    ZERONET(oldflp->fl4_src))
 			goto out;
 
-		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-		dev_out = ip_dev_find(oldflp->fl4_src);
-		if (dev_out == NULL)
-			goto out;
+		if (ip_rt_src_check) {
+			/* It is equivalent to
+			   inet_addr_type(saddr) == RTN_LOCAL */
+			dev_out = ip_dev_find(oldflp->fl4_src);
+			if (dev_out == NULL)
+				goto out;
+		}
 
 		/* I removed check for oif == dev_out->oif here.
 		   It was wrong for two reasons:
@@ -2429,6 +2585,12 @@ static int ip_route_output_slow(struct r
 			   Luckily, this hack is good workaround.
 			 */
 
+			if (dev_out == NULL) {
+				dev_out = ip_dev_find(oldflp->fl4_src);
+				if (dev_out == NULL)
+					goto out;
+			}
+
 			fl.oif = dev_out->ifindex;
 			goto make_route;
 		}
@@ -2575,6 +2737,7 @@ int __ip_route_output_key(struct rtable 
 #ifdef CONFIG_IP_ROUTE_FWMARK
 		    rth->fl.fl4_fwmark == flp->fl4_fwmark &&
 #endif
+		    ve_accessible_strict(rth->fl.owner_env, get_exec_env()) &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
 
@@ -2705,7 +2868,7 @@ static int rt_fill_info(struct sk_buff *
 		u32 dst = rt->rt_dst;
 
 		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
-		    ipv4_devconf.mc_forwarding) {
+		    ve_ipv4_devconf.mc_forwarding) {
 			int err = ipmr_get_route(skb, r, nowait);
 			if (err <= 0) {
 				if (!nowait) {
@@ -2750,7 +2913,10 @@ int inet_rtm_getroute(struct sk_buff *in
 	/* Reserve room for dummy headers, this skb can pass
 	   through good chunk of routing engine.
 	 */
-	skb->mac.raw = skb->data;
+	skb->mac.raw = skb->nh.raw = skb->data;
+
+	/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
+	skb->nh.iph->protocol = IPPROTO_ICMP;
 	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
 
 	if (rta[RTA_SRC - 1])
@@ -2853,22 +3019,22 @@ void ip_rt_multicast_event(struct in_dev
 }
 
 #ifdef CONFIG_SYSCTL
-static int flush_delay;
+int ipv4_flush_delay;
 
-static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
 					struct file *filp, void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
 	if (write) {
 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-		rt_cache_flush(flush_delay);
+		rt_cache_flush(ipv4_flush_delay);
 		return 0;
 	} 
 
 	return -EINVAL;
 }
 
-static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
 						int __user *name,
 						int nlen,
 						void __user *oldval,
@@ -2890,7 +3056,7 @@ ctl_table ipv4_route_table[] = {
         {
 		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
 		.procname	= "flush",
-		.data		= &flush_delay,
+		.data		= &ipv4_flush_delay,
 		.maxlen		= sizeof(int),
 		.mode		= 0200,
 		.proc_handler	= &ipv4_sysctl_rtcache_flush,
@@ -3184,15 +3350,18 @@ int __init ip_rt_init(void)
 #ifdef CONFIG_PROC_FS
 	{
 	struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
-	if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
-	    !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, 
-			    		     proc_net_stat))) {
+
+	if (!proc_glob_fops_create("net/rt_cache",
+				S_IRUGO, &rt_cache_seq_fops))
+		return -ENOMEM;
+
+	if (!(rtstat_pde = create_proc_glob_entry("net/stat/rt_cache",
+				S_IRUGO, NULL)))
 		return -ENOMEM;
-	}
 	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
 	}
 #ifdef CONFIG_NET_CLS_ROUTE
-	create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
+	create_proc_read_entry("net/rt_acct", 0, NULL, ip_rt_acct_read, NULL);
 #endif
 #endif
 #ifdef CONFIG_XFRM
diff -uprN linux-2.6.16/net/ipv4/sysctl_net_ipv4.c linux-2.6.16.ovz/net/ipv4/sysctl_net_ipv4.c
--- linux-2.6.16/net/ipv4/sysctl_net_ipv4.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/sysctl_net_ipv4.c	2006-07-05 08:34:56.000000000 -0400
@@ -22,6 +22,9 @@
 /* From af_inet.c */
 extern int sysctl_ip_nonlocal_bind;
 
+int sysctl_tcp_use_sg = 1;
+EXPORT_SYMBOL(sysctl_tcp_use_sg);
+
 #ifdef CONFIG_SYSCTL
 static int zero;
 static int tcp_retr1_max = 255; 
@@ -33,22 +36,21 @@ struct ipv4_config ipv4_config;
 
 #ifdef CONFIG_SYSCTL
 
-static
 int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
 			void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	int val = ipv4_devconf.forwarding;
+	int val = ve_ipv4_devconf.forwarding;
 	int ret;
 
 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
-	if (write && ipv4_devconf.forwarding != val)
+	if (write && ve_ipv4_devconf.forwarding != val)
 		inet_forward_change();
 
 	return ret;
 }
 
-static int ipv4_sysctl_forward_strategy(ctl_table *table,
+int ipv4_sysctl_forward_strategy(ctl_table *table,
 			 int __user *name, int nlen,
 			 void __user *oldval, size_t __user *oldlenp,
 			 void __user *newval, size_t newlen, 
@@ -664,6 +666,14 @@ ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= NET_TCP_USE_SG,
+		.procname	= "tcp_use_sg",
+		.data		= &sysctl_tcp_use_sg,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 
 	{ .ctl_name = 0 }
 };
diff -uprN linux-2.6.16/net/ipv4/tcp.c linux-2.6.16.ovz/net/ipv4/tcp.c
--- linux-2.6.16/net/ipv4/tcp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/tcp.c	2006-07-05 08:34:56.000000000 -0400
@@ -248,6 +248,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/kmem_cache.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
@@ -263,6 +264,9 @@
 #include <net/xfrm.h>
 #include <net/ip.h>
 
+#include <ub/ub_orphan.h>
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -321,6 +325,7 @@ unsigned int tcp_poll(struct file *file,
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 	struct tcp_sock *tp = tcp_sk(sk);
+	int check_send_space;
 
 	poll_wait(file, sk->sk_sleep, wait);
 	if (sk->sk_state == TCP_LISTEN)
@@ -335,6 +340,21 @@ unsigned int tcp_poll(struct file *file,
 	if (sk->sk_err)
 		mask = POLLERR;
 
+	check_send_space = 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (!(sk->sk_shutdown & SEND_SHUTDOWN) && sock_has_ubc(sk)) {
+		unsigned long size;
+		size = MAX_TCP_HEADER + tp->mss_cache;
+		if (size > SOCK_MIN_UBCSPACE)
+			size = SOCK_MIN_UBCSPACE;
+		size = skb_charge_size(size);   
+		if (ub_sock_makewres_tcp(sk, size)) {
+			check_send_space = 0;
+			ub_sock_sndqueueadd_tcp(sk, size);
+		}
+	}
+#endif
+
 	/*
 	 * POLLHUP is certainly not done right. But poll() doesn't
 	 * have a notion of HUP in just one direction, and for a
@@ -378,7 +398,7 @@ unsigned int tcp_poll(struct file *file,
 		     sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
 			mask |= POLLIN | POLLRDNORM;
 
-		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+		if (check_send_space && !(sk->sk_shutdown & SEND_SHUTDOWN)) {
 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
 				mask |= POLLOUT | POLLWRNORM;
 			} else {  /* send SIGIO later */
@@ -528,16 +548,23 @@ static ssize_t do_tcp_sendpages(struct s
 		int copy, i, can_coalesce;
 		int offset = poffset % PAGE_SIZE;
 		int size = min_t(size_t, psize, PAGE_SIZE - offset);
+		unsigned long chargesize = 0;
 
 		if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
 new_segment:
+			chargesize = 0;
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
+			chargesize = skb_charge_size(MAX_TCP_HEADER +
+					tp->mss_cache);
+			if (ub_sock_getwres_tcp(sk, chargesize) < 0)
+				goto wait_for_ubspace;
 			skb = sk_stream_alloc_pskb(sk, 0, 0,
 						   sk->sk_allocation);
 			if (!skb)
 				goto wait_for_memory;
+			ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
 
 			skb_entail(sk, tp, skb);
 			copy = size_goal;
@@ -593,10 +620,14 @@ new_segment:
 wait_for_sndbuf:
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
+		ub_sock_retwres_tcp(sk, chargesize,
+			skb_charge_size(MAX_TCP_HEADER + tp->mss_cache));
+		chargesize = 0;
+wait_for_ubspace:
 		if (copied)
 			tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
-		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+		if ((err = sk_stream_wait_memory(sk, &timeo, chargesize)) != 0)
 			goto do_error;
 
 		mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
@@ -699,6 +730,7 @@ int tcp_sendmsg(struct kiocb *iocb, stru
 	while (--iovlen >= 0) {
 		int seglen = iov->iov_len;
 		unsigned char __user *from = iov->iov_base;
+		unsigned long chargesize = 0;
 
 		iov++;
 
@@ -709,18 +741,26 @@ int tcp_sendmsg(struct kiocb *iocb, stru
 
 			if (!sk->sk_send_head ||
 			    (copy = size_goal - skb->len) <= 0) {
+				unsigned long size;
 
 new_segment:
 				/* Allocate new segment. If the interface is SG,
 				 * allocate skb fitting to single page.
 				 */
+				chargesize = 0;
 				if (!sk_stream_memory_free(sk))
 					goto wait_for_sndbuf;
-
-				skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
-							   0, sk->sk_allocation);
+				size = select_size(sk, tp);
+				chargesize = skb_charge_size(MAX_TCP_HEADER +
+						size);
+				if (ub_sock_getwres_tcp(sk, chargesize) < 0)
+					goto wait_for_ubspace;
+				skb = sk_stream_alloc_pskb(sk, size, 0,
+						sk->sk_allocation);
 				if (!skb)
 					goto wait_for_memory;
+				ub_skb_set_charge(skb, sk, chargesize,
+						UB_TCPSNDBUF);
 
 				/*
 				 * Check whether we can use HW checksum.
@@ -768,6 +808,7 @@ new_segment:
 				} else if (page) {
 					if (off == PAGE_SIZE) {
 						put_page(page);
+						ub_sock_tcp_detachpage(sk);
 						TCP_PAGE(sk) = page = NULL;
 						off = 0;
 					}
@@ -781,6 +822,9 @@ new_segment:
 					goto wait_for_memory;
 
 				if (!page) {
+					chargesize = PAGE_SIZE;
+					if (ub_sock_tcp_chargepage(sk) < 0)
+						goto wait_for_ubspace;
 					/* Allocate new cache page. */
 					if (!(page = sk_stream_alloc_page(sk)))
 						goto wait_for_memory;
@@ -812,7 +856,8 @@ new_segment:
 					} else if (off + copy < PAGE_SIZE) {
 						get_page(page);
 						TCP_PAGE(sk) = page;
-					}
+					} else
+						ub_sock_tcp_detachpage(sk);
 				}
 
 				TCP_OFF(sk) = off + copy;
@@ -843,10 +888,15 @@ new_segment:
 wait_for_sndbuf:
 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
+			ub_sock_retwres_tcp(sk, chargesize,
+				skb_charge_size(MAX_TCP_HEADER+tp->mss_cache));
+			chargesize = 0;
+wait_for_ubspace:
 			if (copied)
 				tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
-			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+			if ((err = sk_stream_wait_memory(sk, &timeo,
+							chargesize)) != 0)
 				goto do_error;
 
 			mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
@@ -944,7 +994,18 @@ static void cleanup_rbuf(struct sock *sk
 #if TCP_DEBUG
 	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 
-	BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
+	if (!(skb==NULL || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq))) {
+		printk("KERNEL: assertion: skb==NULL || "
+				"before(tp->copied_seq, skb->end_seq)\n");
+		printk("VE%u pid %d comm %.16s\n", 
+				(get_exec_env() ? VEID(get_exec_env()) : 0),
+				current->pid, current->comm);
+		printk("copied=%d, copied_seq=%d, rcv_nxt=%d\n", copied,
+				tp->copied_seq, tp->rcv_nxt);
+		printk("skb->len=%d, skb->seq=%d, skb->end_seq=%d\n",
+				skb->len, TCP_SKB_CB(skb)->seq, 
+				TCP_SKB_CB(skb)->end_seq);
+	}
 #endif
 
 	if (inet_csk_ack_scheduled(sk)) {
@@ -1168,7 +1229,22 @@ int tcp_recvmsg(struct kiocb *iocb, stru
 				goto found_ok_skb;
 			if (skb->h.th->fin)
 				goto found_fin_ok;
-			BUG_TRAP(flags & MSG_PEEK);
+			if (!(flags & MSG_PEEK)) {
+				printk("KERNEL: assertion: flags&MSG_PEEK\n");
+				printk("VE%u pid %d comm %.16s\n", 
+						(get_exec_env() ? 
+						 VEID(get_exec_env()) : 0),
+						current->pid, current->comm);
+				printk("flags=0x%x, len=%d, copied_seq=%d, "
+						"rcv_nxt=%d\n", flags, len,
+						tp->copied_seq, tp->rcv_nxt);
+				printk("skb->len=%d, *seq=%d, skb->seq=%d, "
+						"skb->end_seq=%d, offset=%d\n",
+						skb->len, *seq, 
+						TCP_SKB_CB(skb)->seq,
+						TCP_SKB_CB(skb)->end_seq, 
+						offset);
+			}
 			skb = skb->next;
 		} while (skb != (struct sk_buff *)&sk->sk_receive_queue);
 
@@ -1231,8 +1307,18 @@ int tcp_recvmsg(struct kiocb *iocb, stru
 
 			tp->ucopy.len = len;
 
-			BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
-				 (flags & (MSG_PEEK | MSG_TRUNC)));
+			if (!(tp->copied_seq == tp->rcv_nxt || 
+						(flags&(MSG_PEEK|MSG_TRUNC)))) {
+				printk("KERNEL: assertion: tp->copied_seq == "
+						"tp->rcv_nxt || ...\n");
+				printk("VE%u pid %d comm %.16s\n", 
+						(get_exec_env() ?
+						 VEID(get_exec_env()) : 0),
+						current->pid, current->comm);
+				printk("flags=0x%x, len=%d, copied_seq=%d, "
+						"rcv_nxt=%d\n", flags, len,
+						tp->copied_seq, tp->rcv_nxt);
+			}
 
 			/* Ugly... If prequeue is not empty, we have to
 			 * process it before releasing socket, otherwise
@@ -1583,7 +1669,7 @@ adjudge_to_death:
 			if (tmo > TCP_TIMEWAIT_LEN) {
 				inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
 			} else {
-				atomic_inc(sk->sk_prot->orphan_count);
+				ub_inc_orphan_count(sk);
 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
 				goto out;
 			}
@@ -1591,9 +1677,7 @@ adjudge_to_death:
 	}
 	if (sk->sk_state != TCP_CLOSE) {
 		sk_stream_mem_reclaim(sk);
-		if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans ||
-		    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-		     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
+		if (ub_too_many_orphans(sk, ub_get_orphan_count(sk))) {
 			if (net_ratelimit())
 				printk(KERN_INFO "TCP: too many of orphaned "
 				       "sockets\n");
@@ -1602,7 +1686,7 @@ adjudge_to_death:
 			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
 		}
 	}
-	atomic_inc(sk->sk_prot->orphan_count);
+	ub_inc_orphan_count(sk);
 
 	if (sk->sk_state == TCP_CLOSE)
 		inet_csk_destroy_sock(sk);
@@ -2051,7 +2135,7 @@ void __init tcp_init(void)
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
-				  SLAB_HWCACHE_ALIGN, NULL, NULL);
+				  SLAB_HWCACHE_ALIGN | SLAB_UBC, NULL, NULL);
 	if (!tcp_hashinfo.bind_bucket_cachep)
 		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
 
diff -uprN linux-2.6.16/net/ipv4/tcp_input.c linux-2.6.16.ovz/net/ipv4/tcp_input.c
--- linux-2.6.16/net/ipv4/tcp_input.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/tcp_input.c	2006-07-05 08:34:56.000000000 -0400
@@ -72,6 +72,8 @@
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
 
+#include <ub/ub_tcp.h>
+
 int sysctl_tcp_timestamps = 1;
 int sysctl_tcp_window_scaling = 1;
 int sysctl_tcp_sack = 1;
@@ -252,7 +254,7 @@ static void tcp_grow_window(struct sock 
 	/* Check #1 */
 	if (tp->rcv_ssthresh < tp->window_clamp &&
 	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
-	    !tcp_memory_pressure) {
+	    ub_tcp_rmem_allows_expand(sk)) {
 		int incr;
 
 		/* Check #2. Increase window, if skb with such overhead
@@ -321,6 +323,8 @@ static void tcp_init_buffer_space(struct
 
 	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
+
+	ub_tcp_update_maxadvmss(sk);
 }
 
 /* 5. Recalculate window clamp after socket hit its memory bounds. */
@@ -332,7 +336,7 @@ static void tcp_clamp_window(struct sock
 
 	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-	    !tcp_memory_pressure &&
+	    !ub_tcp_memory_pressure(sk) &&
 	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
 		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
 				    sysctl_tcp_rmem[2]);
@@ -3118,7 +3122,7 @@ queue_and_out:
 			     !sk_stream_rmem_schedule(sk, skb))) {
 				if (tcp_prune_queue(sk) < 0 ||
 				    !sk_stream_rmem_schedule(sk, skb))
-					goto drop;
+					goto drop_part;
 			}
 			sk_stream_set_owner_r(skb, sk);
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
@@ -3162,6 +3166,12 @@ out_of_window:
 drop:
 		__kfree_skb(skb);
 		return;
+
+drop_part:
+		if (after(tp->copied_seq, tp->rcv_nxt))
+			tp->rcv_nxt = tp->copied_seq;
+		__kfree_skb(skb);
+		return;
 	}
 
 	/* Out of window. F.e. zero window probe. */
@@ -3333,6 +3343,10 @@ tcp_collapse(struct sock *sk, struct sk_
 		nskb = alloc_skb(copy+header, GFP_ATOMIC);
 		if (!nskb)
 			return;
+		if (ub_tcprcvbuf_charge_forced(skb->sk, nskb) < 0) {
+			kfree_skb(nskb);
+			return;
+		}
 		skb_reserve(nskb, header);
 		memcpy(nskb->head, skb->head, header);
 		nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
@@ -3429,7 +3443,7 @@ static int tcp_prune_queue(struct sock *
 
 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
 		tcp_clamp_window(sk, tp);
-	else if (tcp_memory_pressure)
+	else if (ub_tcp_memory_pressure(sk))
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
 	tcp_collapse_ofo_queue(sk);
@@ -3505,7 +3519,7 @@ static int tcp_should_expand_sndbuf(stru
 		return 0;
 
 	/* If we are under global TCP memory pressure, do not expand.  */
-	if (tcp_memory_pressure)
+	if (ub_tcp_memory_pressure(sk))
 		return 0;
 
 	/* If we are under soft global TCP memory pressure, do not expand.  */
@@ -3898,6 +3912,10 @@ int tcp_rcv_established(struct sock *sk,
 
 				if ((int)skb->truesize > sk->sk_forward_alloc)
 					goto step5;
+				/* This is OK not to try to free memory here.
+				 * Do this below on slow path. Den */
+				if (ub_tcprcvbuf_charge(sk, skb) < 0)
+					goto step5;
 
 				NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
 
diff -uprN linux-2.6.16/net/ipv4/tcp_ipv4.c linux-2.6.16.ovz/net/ipv4/tcp_ipv4.c
--- linux-2.6.16/net/ipv4/tcp_ipv4.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/tcp_ipv4.c	2006-07-05 08:34:56.000000000 -0400
@@ -72,6 +72,8 @@
 #include <net/timewait_sock.h>
 #include <net/xfrm.h>
 
+#include <ub/ub_tcp.h>
+
 #include <linux/inet.h>
 #include <linux/ipv6.h>
 #include <linux/stddef.h>
@@ -705,6 +707,7 @@ struct request_sock_ops tcp_request_sock
 	.destructor	=	tcp_v4_reqsk_destructor,
 	.send_reset	=	tcp_v4_send_reset,
 };
+EXPORT_SYMBOL_GPL(tcp_request_sock_ops);
 
 static struct timewait_sock_ops tcp_timewait_sock_ops = {
 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
@@ -979,12 +982,15 @@ static int tcp_v4_checksum_init(struct s
  */
 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
+	struct user_beancounter *ub;
+
+	ub = set_exec_ub(sock_bc(sk)->ub);
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		TCP_CHECK_TIMER(sk);
 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
 			goto reset;
 		TCP_CHECK_TIMER(sk);
-		return 0;
+		goto restore_context;
 	}
 
 	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
@@ -998,7 +1004,7 @@ int tcp_v4_do_rcv(struct sock *sk, struc
 		if (nsk != sk) {
 			if (tcp_child_process(sk, nsk, skb))
 				goto reset;
-			return 0;
+			goto restore_context;
 		}
 	}
 
@@ -1006,6 +1012,9 @@ int tcp_v4_do_rcv(struct sock *sk, struc
 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
 		goto reset;
 	TCP_CHECK_TIMER(sk);
+
+restore_context:
+	(void)set_exec_ub(ub);
 	return 0;
 
 reset:
@@ -1017,7 +1026,7 @@ discard:
 	 * might be destroyed here. This current version compiles correctly,
 	 * but you have been warned.
 	 */
-	return 0;
+	goto restore_context;
 
 csum_err:
 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -1302,6 +1311,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
 	 * If sendmsg cached page exists, toss it.
 	 */
 	if (sk->sk_sndmsg_page) {
+		/* queue is empty, uncharge */
+		ub_sock_tcp_detachpage(sk);
 		__free_page(sk->sk_sndmsg_page);
 		sk->sk_sndmsg_page = NULL;
 	}
@@ -1316,16 +1327,34 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCP sock list dumping. */
 
-static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
+static inline struct inet_timewait_sock *tw_head(struct hlist_head *head,
+		envid_t veid)
 {
-	return hlist_empty(head) ? NULL :
-		list_entry(head->first, struct inet_timewait_sock, tw_node);
+	struct inet_timewait_sock *tw;
+	struct hlist_node *pos;
+
+	if (hlist_empty(head))
+		return NULL;
+	hlist_for_each_entry(tw, pos, head, tw_node) {
+		if (!ve_accessible_veid(tw->tw_owner_env, veid))
+			continue;
+		return tw;
+	}
+	return NULL;
 }
 
-static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
+static inline struct inet_timewait_sock *
+	tw_next(struct inet_timewait_sock *tw, envid_t veid)
 {
-	return tw->tw_node.next ?
-		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
+	while (1) {
+		if (tw->tw_node.next == NULL)
+			return NULL;
+		tw = hlist_entry(tw->tw_node.next, typeof(*tw), tw_node);
+		if (!ve_accessible_veid(tw->tw_owner_env, veid))
+			continue;
+		return tw;
+	}
+	return NULL;	/* make compiler happy */
 }
 
 static void *listening_get_next(struct seq_file *seq, void *cur)
@@ -1334,7 +1363,9 @@ static void *listening_get_next(struct s
 	struct hlist_node *node;
 	struct sock *sk = cur;
 	struct tcp_iter_state* st = seq->private;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	if (!sk) {
 		st->bucket = 0;
 		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
@@ -1374,6 +1405,8 @@ get_req:
 	}
 get_sk:
 	sk_for_each_from(sk, node) {
+		if (!ve_accessible(VE_OWNER_SK(sk), ve))
+			continue;
 		if (sk->sk_family == st->family) {
 			cur = sk;
 			goto out;
@@ -1414,7 +1447,9 @@ static void *established_get_first(struc
 {
 	struct tcp_iter_state* st = seq->private;
 	void *rc = NULL;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
 		struct sock *sk;
 		struct hlist_node *node;
@@ -1425,6 +1460,8 @@ static void *established_get_first(struc
 
 		read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
+			if (!ve_accessible(VE_OWNER_SK(sk), ve))
+				continue;
 			if (sk->sk_family != st->family) {
 				continue;
 			}
@@ -1434,6 +1471,8 @@ static void *established_get_first(struc
 		st->state = TCP_SEQ_STATE_TIME_WAIT;
 		inet_twsk_for_each(tw, node,
 				   &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
+			if (!ve_accessible_veid(tw->tw_owner_env, VEID(ve)))
+				continue;
 			if (tw->tw_family != st->family) {
 				continue;
 			}
@@ -1453,16 +1492,17 @@ static void *established_get_next(struct
 	struct inet_timewait_sock *tw;
 	struct hlist_node *node;
 	struct tcp_iter_state* st = seq->private;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	++st->num;
 
 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
 		tw = cur;
-		tw = tw_next(tw);
+		tw = tw_next(tw, VEID(ve));
 get_tw:
-		while (tw && tw->tw_family != st->family) {
-			tw = tw_next(tw);
-		}
+		while (tw && tw->tw_family != st->family)
+			tw = tw_next(tw, VEID(ve));
 		if (tw) {
 			cur = tw;
 			goto out;
@@ -1484,12 +1524,15 @@ get_tw:
 		sk = sk_next(sk);
 
 	sk_for_each_from(sk, node) {
+		if (!ve_accessible(VE_OWNER_SK(sk), ve))
+			continue;
 		if (sk->sk_family == st->family)
 			goto found;
 	}
 
 	st->state = TCP_SEQ_STATE_TIME_WAIT;
-	tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
+	tw = tw_head(&tcp_hashinfo.ehash[st->bucket +
+			tcp_hashinfo.ehash_size].chain, VEID(ve));
 	goto get_tw;
 found:
 	cur = sk;
@@ -1635,7 +1678,7 @@ int tcp_proc_register(struct tcp_seq_afi
 	afinfo->seq_fops->llseek	= seq_lseek;
 	afinfo->seq_fops->release	= seq_release_private;
 	
-	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+	p = proc_glob_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
 	if (p)
 		p->data = afinfo;
 	else
@@ -1647,7 +1690,8 @@ void tcp_proc_unregister(struct tcp_seq_
 {
 	if (!afinfo)
 		return;
-	proc_net_remove(afinfo->name);
+
+	remove_proc_glob_entry(afinfo->name, NULL);
 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 
 }
 
@@ -1777,7 +1821,7 @@ out:
 static struct file_operations tcp4_seq_fops;
 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "tcp",
+	.name		= "net/tcp",
 	.family		= AF_INET,
 	.seq_show	= tcp4_seq_show,
 	.seq_fops	= &tcp4_seq_fops,
@@ -1844,6 +1888,86 @@ void __init tcp_v4_init(struct net_proto
 	tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
 }
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+static void tcp_kill_ve_onesk(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* Check the assumed state of the socket. */
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		static int printed;
+invalid:
+		if (!printed)
+			printk(KERN_DEBUG "Killing sk: dead %d, state %d, "
+				"wrseq %u unseq %u, wrqu %d.\n",
+				sock_flag(sk, SOCK_DEAD), sk->sk_state,
+				tp->write_seq, tp->snd_una,
+				!skb_queue_empty(&sk->sk_write_queue));
+		printed = 1;
+		return;
+	}
+
+	tcp_send_active_reset(sk, GFP_ATOMIC);
+	switch (sk->sk_state) {
+		case TCP_FIN_WAIT1:
+		case TCP_CLOSING:
+			/* In these 2 states the peer may want us to retransmit
+			 * some data and/or FIN.  Entering "resetting mode"
+			 * instead.
+			 */
+			tcp_time_wait(sk, TCP_CLOSE, 0);
+			break;
+		case TCP_FIN_WAIT2:
+			/* By some reason the socket may stay in this state
+			 * without turning into a TW bucket.  Fix it.
+			 */
+			tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
+			break;
+		case TCP_LAST_ACK:
+			/* Just jump into CLOSED state. */
+			tcp_done(sk);
+			break;
+		default:
+			/* The socket must be already close()d. */
+			goto invalid;
+	}
+}
+
+void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
+{
+	struct inet_ehash_bucket *head;
+	int i;
+
+	/* alive */
+	local_bh_disable();
+	head = tcp_hashinfo.ehash;
+	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
+		struct sock *sk;
+		struct hlist_node *node;
+more_work:
+		write_lock(&head[i].lock);
+		sk_for_each(sk, node, &head[i].chain) {
+			if (ve_accessible_strict(VE_OWNER_SK(sk), envid)) {
+				sock_hold(sk);
+				write_unlock(&head[i].lock);
+
+				bh_lock_sock(sk);
+				/* sk might have disappeared from the hash before
+				 * we got the lock */
+				if (sk->sk_state != TCP_CLOSE)
+					tcp_kill_ve_onesk(sk);
+				bh_unlock_sock(sk);
+				sock_put(sk);
+				goto more_work;
+			}
+		}
+		write_unlock(&head[i].lock);
+	}
+	local_bh_enable();
+}
+EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
+#endif
+
 EXPORT_SYMBOL(ipv4_specific);
 EXPORT_SYMBOL(tcp_hashinfo);
 EXPORT_SYMBOL(tcp_prot);
diff -uprN linux-2.6.16/net/ipv4/tcp_minisocks.c linux-2.6.16.ovz/net/ipv4/tcp_minisocks.c
--- linux-2.6.16/net/ipv4/tcp_minisocks.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/tcp_minisocks.c	2006-07-05 08:34:56.000000000 -0400
@@ -29,6 +29,8 @@
 #include <net/inet_common.h>
 #include <net/xfrm.h>
 
+#include <ub/ub_net.h>
+
 #ifdef CONFIG_SYSCTL
 #define SYNC_INIT 0 /* let the user enable it */
 #else
@@ -307,6 +309,8 @@ void tcp_time_wait(struct sock *sk, int 
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
+		tw->tw_owner_env = VEID(VE_OWNER_SK(sk));
+
 		/* Linkage updates. */
 		__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
 
@@ -355,6 +359,8 @@ struct sock *tcp_create_openreq_child(st
 		struct tcp_sock *newtp;
 
 		/* Now setup tcp_sock */
+		SET_VE_OWNER_SK(newsk, VE_OWNER_SK(sk));
+
 		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
 		newtp->rcv_nxt = treq->rcv_isn + 1;
diff -uprN linux-2.6.16/net/ipv4/tcp_output.c linux-2.6.16.ovz/net/ipv4/tcp_output.c
--- linux-2.6.16/net/ipv4/tcp_output.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/tcp_output.c	2006-07-05 08:34:56.000000000 -0400
@@ -42,6 +42,9 @@
 #include <linux/module.h>
 #include <linux/smp_lock.h>
 
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
+
 /* People can turn this off for buggy TCP's found in printers etc. */
 int sysctl_tcp_retrans_collapse = 1;
 
@@ -528,16 +531,26 @@ int tcp_fragment(struct sock *sk, struct
 	if (nsize < 0)
 		nsize = 0;
 
-	if (skb_cloned(skb) &&
-	    skb_is_nonlinear(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-		return -ENOMEM;
+	if (skb_cloned(skb) && skb_is_nonlinear(skb)) {
+		unsigned long chargesize;
+		chargesize = skb_bc(skb)->charged;
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			return -ENOMEM;
+		ub_sock_retwres_tcp(sk, chargesize, chargesize);
+		ub_tcpsndbuf_charge_forced(sk, skb);
+	}
 
 	/* Get a new skb... force flag on. */
 	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
 	if (buff == NULL)
 		return -ENOMEM; /* We'll just try again later. */
-	sk_charge_skb(sk, buff);
+	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
+		kfree_skb(buff);
+		return -ENOMEM;
+	}
+
+	buff->truesize = skb->len - len;
+	skb->truesize -= buff->truesize;
 
 	/* Correct the sequence numbers. */
 	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
@@ -978,6 +991,11 @@ static int tso_fragment(struct sock *sk,
 	if (unlikely(buff == NULL))
 		return -ENOMEM;
 
+	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
+		kfree_skb(buff);
+		return -ENOMEM;
+	}
+
 	buff->truesize = nlen;
 	skb->truesize -= nlen;
 
@@ -1281,7 +1299,7 @@ u32 __tcp_select_window(struct sock *sk)
 	if (free_space < full_space/2) {
 		icsk->icsk_ack.quick = 0;
 
-		if (tcp_memory_pressure)
+		if (ub_tcp_shrink_rcvbuf(sk))
 			tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
 
 		if (free_space < mss)
@@ -1708,6 +1726,7 @@ void tcp_send_fin(struct sock *sk)
 				break;
 			yield();
 		}
+		ub_tcpsndbuf_charge_forced(sk, skb);
 
 		/* Reserve space for headers and prepare control bits. */
 		skb_reserve(skb, MAX_TCP_HEADER);
@@ -1777,6 +1796,10 @@ int tcp_send_synack(struct sock *sk)
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
 				return -ENOMEM;
+			if (ub_tcpsndbuf_charge(sk, skb) < 0) {
+				kfree_skb(nskb);
+				return -ENOMEM;
+			}
 			__skb_unlink(skb, &sk->sk_write_queue);
 			skb_header_release(nskb);
 			__skb_queue_head(&sk->sk_write_queue, nskb);
@@ -1928,6 +1951,10 @@ int tcp_connect(struct sock *sk)
 	buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
 	if (unlikely(buff == NULL))
 		return -ENOBUFS;
+	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
+		kfree_skb(buff);
+		return -ENOBUFS;
+	}
 
 	/* Reserve space for headers. */
 	skb_reserve(buff, MAX_TCP_HEADER);
diff -uprN linux-2.6.16/net/ipv4/tcp_timer.c linux-2.6.16.ovz/net/ipv4/tcp_timer.c
--- linux-2.6.16/net/ipv4/tcp_timer.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/tcp_timer.c	2006-07-05 08:34:56.000000000 -0400
@@ -22,6 +22,8 @@
 
 #include <linux/module.h>
 #include <net/tcp.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_tcp.h>
 
 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
@@ -67,7 +69,7 @@ static void tcp_write_err(struct sock *s
 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int orphans = atomic_read(&tcp_orphan_count);
+	int orphans = ub_get_orphan_count(sk);
 
 	/* If peer does not open window for long time, or did not transmit 
 	 * anything for long time, penalize it. */
@@ -78,9 +80,7 @@ static int tcp_out_of_resources(struct s
 	if (sk->sk_err_soft)
 		orphans <<= 1;
 
-	if (orphans >= sysctl_tcp_max_orphans ||
-	    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-	     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
+	if (ub_too_many_orphans(sk, orphans)) {
 		if (net_ratelimit())
 			printk(KERN_INFO "Out of socket memory\n");
 
@@ -173,9 +173,12 @@ static int tcp_write_timeout(struct sock
 static void tcp_delack_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock*)data;
+	struct ve_struct *env;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	env = set_exec_env(VE_OWNER_SK(sk));
+
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
 		/* Try again later. */
@@ -224,11 +227,12 @@ static void tcp_delack_timer(unsigned lo
 	TCP_CHECK_TIMER(sk);
 
 out:
-	if (tcp_memory_pressure)
+	if (ub_tcp_memory_pressure(sk))
 		sk_stream_mem_reclaim(sk);
 out_unlock:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
 
 static void tcp_probe_timer(struct sock *sk)
@@ -283,8 +287,11 @@ static void tcp_probe_timer(struct sock 
 static void tcp_retransmit_timer(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct ve_struct *env;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	env = set_exec_env(VE_OWNER_SK(sk));
+
 	if (!tp->packets_out)
 		goto out;
 
@@ -381,15 +388,19 @@ out_reset_timer:
 	if (icsk->icsk_retransmits > sysctl_tcp_retries1)
 		__sk_dst_reset(sk);
 
-out:;
+out:
+	(void)set_exec_env(env);
 }
 
 static void tcp_write_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock*)data;
+	struct ve_struct *env;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	int event;
 
+	env = set_exec_env(VE_OWNER_SK(sk));
+
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
 		/* Try again later */
@@ -423,6 +434,7 @@ out:
 out_unlock:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
 
 /*
@@ -450,10 +462,13 @@ void tcp_set_keepalive(struct sock *sk, 
 static void tcp_keepalive_timer (unsigned long data)
 {
 	struct sock *sk = (struct sock *) data;
+	struct ve_struct *env;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 elapsed;
 
+	env = set_exec_env(VE_OWNER_SK(sk));
+
 	/* Only process if socket is not in use. */
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
@@ -525,4 +540,5 @@ death:	
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
diff -uprN linux-2.6.16/net/ipv4/udp.c linux-2.6.16.ovz/net/ipv4/udp.c
--- linux-2.6.16/net/ipv4/udp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv4/udp.c	2006-07-05 08:34:56.000000000 -0400
@@ -127,7 +127,9 @@ static int udp_v4_get_port(struct sock *
 	struct hlist_node *node;
 	struct sock *sk2;
 	struct inet_sock *inet = inet_sk(sk);
+	struct ve_struct *env;
 
+	env = VE_OWNER_SK(sk);
 	write_lock_bh(&udp_hash_lock);
 	if (snum == 0) {
 		int best_size_so_far, best, result, i;
@@ -141,7 +143,7 @@ static int udp_v4_get_port(struct sock *
 			struct hlist_head *list;
 			int size;
 
-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+			list = &udp_hash[udp_hashfn(result, VEID(env))];
 			if (hlist_empty(list)) {
 				if (result > sysctl_local_port_range[1])
 					result = sysctl_local_port_range[0] +
@@ -163,7 +165,7 @@ static int udp_v4_get_port(struct sock *
 				result = sysctl_local_port_range[0]
 					+ ((result - sysctl_local_port_range[0]) &
 					   (UDP_HTABLE_SIZE - 1));
-			if (!udp_lport_inuse(result))
+			if (!udp_lport_inuse(result, env))
 				break;
 		}
 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
@@ -172,11 +174,12 @@ gotit:
 		udp_port_rover = snum = result;
 	} else {
 		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
+			    &udp_hash[udp_hashfn(snum, VEID(env))]) {
 			struct inet_sock *inet2 = inet_sk(sk2);
 
 			if (inet2->num == snum &&
 			    sk2 != sk &&
+			    ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
 			    !ipv6_only_sock(sk2) &&
 			    (!sk2->sk_bound_dev_if ||
 			     !sk->sk_bound_dev_if ||
@@ -190,7 +193,7 @@ gotit:
 	}
 	inet->num = snum;
 	if (sk_unhashed(sk)) {
-		struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+		struct hlist_head *h = &udp_hash[udp_hashfn(snum, VEID(env))];
 
 		sk_add_node(sk, h);
 		sock_prot_inc_use(sk->sk_prot);
@@ -228,11 +231,15 @@ static struct sock *udp_v4_lookup_longwa
 	struct hlist_node *node;
 	unsigned short hnum = ntohs(dport);
 	int badness = -1;
+	struct ve_struct *env;
 
-	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+	env = get_exec_env();
+	sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (inet->num == hnum && !ipv6_only_sock(sk)) {
+		if (inet->num == hnum &&
+		    ve_accessible_strict(VE_OWNER_SK(sk), env) &&
+		    !ipv6_only_sock(sk)) {
 			int score = (sk->sk_family == PF_INET ? 1 : 0);
 			if (inet->rcv_saddr) {
 				if (inet->rcv_saddr != daddr)
@@ -1049,7 +1056,8 @@ static int udp_v4_mcast_deliver(struct s
 	int dif;
 
 	read_lock(&udp_hash_lock);
-	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
+				VEID(VE_OWNER_SKB(skb)))]);
 	dif = skb->dev->ifindex;
 	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (sk) {
@@ -1367,10 +1375,14 @@ static struct sock *udp_get_first(struct
 {
 	struct sock *sk;
 	struct udp_iter_state *state = seq->private;
+	struct ve_struct *env;
 
+	env = get_exec_env();
 	for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
 		struct hlist_node *node;
 		sk_for_each(sk, node, &udp_hash[state->bucket]) {
+			if (!ve_accessible(VE_OWNER_SK(sk), env))
+				continue;
 			if (sk->sk_family == state->family)
 				goto found;
 		}
@@ -1387,8 +1399,13 @@ static struct sock *udp_get_next(struct 
 	do {
 		sk = sk_next(sk);
 try_again:
-		;
-	} while (sk && sk->sk_family != state->family);
+		if (!sk)
+			break;
+		if (sk->sk_family != state->family)
+			continue;
+		if (ve_accessible(VE_OWNER_SK(sk), get_exec_env()))
+			break;
+	} while (1);
 
 	if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
 		sk = sk_head(&udp_hash[state->bucket]);
@@ -1474,7 +1491,7 @@ int udp_proc_register(struct udp_seq_afi
 	afinfo->seq_fops->llseek	= seq_lseek;
 	afinfo->seq_fops->release	= seq_release_private;
 
-	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+	p = proc_glob_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
 	if (p)
 		p->data = afinfo;
 	else
@@ -1486,7 +1503,8 @@ void udp_proc_unregister(struct udp_seq_
 {
 	if (!afinfo)
 		return;
-	proc_net_remove(afinfo->name);
+
+	remove_proc_glob_entry(afinfo->name, NULL);
 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
 }
 
@@ -1529,7 +1547,7 @@ static int udp4_seq_show(struct seq_file
 static struct file_operations udp4_seq_fops;
 static struct udp_seq_afinfo udp4_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "udp",
+	.name		= "net/udp",
 	.family		= AF_INET,
 	.seq_show	= udp4_seq_show,
 	.seq_fops	= &udp4_seq_fops,
diff -uprN linux-2.6.16/net/ipv6/addrconf.c linux-2.6.16.ovz/net/ipv6/addrconf.c
--- linux-2.6.16/net/ipv6/addrconf.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/addrconf.c	2006-07-05 08:34:56.000000000 -0400
@@ -100,6 +100,7 @@
 #define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
 
 #ifdef CONFIG_SYSCTL
+static struct addrconf_sysctl_table * __addrconf_sysctl_register(struct inet6_dev *idev, char *devname, int ifindex, struct ipv6_devconf *p);
 static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
 static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
 #endif
@@ -133,8 +134,6 @@ static DEFINE_SPINLOCK(addrconf_verify_l
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
 
-static int addrconf_ifdown(struct net_device *dev, int how);
-
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
 static void addrconf_dad_timer(unsigned long data);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
@@ -149,7 +148,7 @@ static int ipv6_chk_same_addr(const stru
 
 static struct notifier_block *inet6addr_chain;
 
-struct ipv6_devconf ipv6_devconf = {
+struct ipv6_devconf global_ipv6_devconf = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -171,7 +170,7 @@ struct ipv6_devconf ipv6_devconf = {
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 };
 
-static struct ipv6_devconf ipv6_devconf_dflt = {
+struct ipv6_devconf global_ipv6_devconf_dflt = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -192,6 +191,12 @@ static struct ipv6_devconf ipv6_devconf_
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 };
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ipv6_devconf_dflt	(*(get_exec_env()->_ipv6_devconf_dflt))
+#else
+#define ipv6_devconf_dflt	global_ipv6_devconf_dflt
+#endif
+
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
 #if 0
 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
@@ -463,8 +468,8 @@ static void addrconf_forward_change(void
 		read_lock(&addrconf_lock);
 		idev = __in6_dev_get(dev);
 		if (idev) {
-			int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
-			idev->cnf.forwarding = ipv6_devconf.forwarding;
+			int changed = (!idev->cnf.forwarding) ^ (!ve_ipv6_devconf.forwarding);
+			idev->cnf.forwarding = ve_ipv6_devconf.forwarding;
 			if (changed)
 				dev_forward_change(idev);
 		}
@@ -1148,9 +1153,10 @@ int ipv6_chk_addr(struct in6_addr *addr,
 	read_lock_bh(&addrconf_hash_lock);
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
-		    !(ifp->flags&IFA_F_TENTATIVE)) {
+		    !(ifp->flags&IFA_F_TENTATIVE) &&
+		    ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env())) {
 			if (dev == NULL || ifp->idev->dev == dev ||
-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
+			    !((ifp->scope&(IFA_LINK|IFA_HOST)) || strict))
 				break;
 		}
 	}
@@ -1166,7 +1172,9 @@ int ipv6_chk_same_addr(const struct in6_
 
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
-			if (dev == NULL || ifp->idev->dev == dev)
+			if ((dev == NULL &&
+			     ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env()))
+			    || ifp->idev->dev == dev)
 				break;
 		}
 	}
@@ -1180,9 +1188,10 @@ struct inet6_ifaddr * ipv6_get_ifaddr(st
 
 	read_lock_bh(&addrconf_hash_lock);
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
-		if (ipv6_addr_equal(&ifp->addr, addr)) {
+		if (ipv6_addr_equal(&ifp->addr, addr) &&
+		    ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env())) {
 			if (dev == NULL || ifp->idev->dev == dev ||
-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+			    !((ifp->scope&(IFA_LINK|IFA_HOST)) || strict)) {
 				in6_ifa_hold(ifp);
 				break;
 			}
@@ -1842,7 +1851,7 @@ err_exit:
 /*
  *	Manual configuration of address on an interface
  */
-static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
+int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
 {
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
@@ -1871,6 +1880,7 @@ static int inet6_addr_add(int ifindex, s
 
 	return PTR_ERR(ifp);
 }
+EXPORT_SYMBOL_GPL(inet6_addr_add);
 
 static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
 {
@@ -1911,7 +1921,7 @@ int addrconf_add_ifaddr(void __user *arg
 	struct in6_ifreq ireq;
 	int err;
 	
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 	
 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -1928,7 +1938,7 @@ int addrconf_del_ifaddr(void __user *arg
 	struct in6_ifreq ireq;
 	int err;
 	
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -2270,7 +2280,7 @@ static struct notifier_block ipv6_dev_no
 	.priority = 0
 };
 
-static int addrconf_ifdown(struct net_device *dev, int how)
+int addrconf_ifdown(struct net_device *dev, int how)
 {
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa, **bifa;
@@ -2278,7 +2288,7 @@ static int addrconf_ifdown(struct net_de
 
 	ASSERT_RTNL();
 
-	if (dev == &loopback_dev && how == 1)
+	if (dev == get_ve0()->_loopback_dev && how == 1)
 		how = 0;
 
 	rt6_ifdown(dev);
@@ -2386,10 +2396,12 @@ static int addrconf_ifdown(struct net_de
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(addrconf_ifdown);
 
 static void addrconf_rs_timer(unsigned long data)
 {
 	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	struct ve_struct *old_env = set_exec_env(ifp->idev->dev->owner_env);
 
 	if (ifp->idev->cnf.forwarding)
 		goto out;
@@ -2428,6 +2440,7 @@ static void addrconf_rs_timer(unsigned l
 
 out:
 	in6_ifa_put(ifp);
+	set_exec_env(old_env);
 }
 
 /*
@@ -2495,6 +2508,7 @@ static void addrconf_dad_timer(unsigned 
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr unspec;
 	struct in6_addr mcaddr;
+	struct ve_struct *old_env = set_exec_env(ifp->idev->dev->owner_env);
 
 	read_lock_bh(&idev->lock);
 	if (idev->dead) {
@@ -2527,6 +2541,7 @@ static void addrconf_dad_timer(unsigned 
 	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
 out:
 	in6_ifa_put(ifp);
+	set_exec_env(old_env);
 }
 
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
@@ -2594,8 +2609,11 @@ static struct inet6_ifaddr *if6_get_firs
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
 		ifa = inet6_addr_lst[state->bucket];
-		if (ifa)
-			break;
+		while (ifa) {
+			if (ve_accessible_strict(ifa->idev->dev->owner_env, get_exec_env()))
+				return ifa;
+			ifa = ifa->lst_next;
+		}
 	}
 	return ifa;
 }
@@ -2606,6 +2624,11 @@ static struct inet6_ifaddr *if6_get_next
 
 	ifa = ifa->lst_next;
 try_again:
+	while (ifa) {
+		if (ve_accessible_strict(ifa->idev->dev->owner_env, get_exec_env()))
+			break;
+		ifa = ifa->lst_next;
+	}
 	if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
 		ifa = inet6_addr_lst[state->bucket];
 		goto try_again;
@@ -2697,14 +2720,14 @@ static struct file_operations if6_fops =
 
 int __init if6_proc_init(void)
 {
-	if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
+	if (!proc_glob_fops_create("net/if_inet6", S_IRUGO, &if6_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void if6_proc_exit(void)
 {
-	proc_net_remove("if_inet6");
+	remove_proc_glob_entry("net/if_inet6", NULL);
 }
 #endif	/* CONFIG_PROC_FS */
 
@@ -2717,6 +2740,7 @@ static void addrconf_verify(unsigned lon
 	struct inet6_ifaddr *ifp;
 	unsigned long now, next;
 	int i;
+	struct ve_struct *old_env;
 
 	spin_lock_bh(&addrconf_verify_lock);
 	now = jiffies;
@@ -2737,6 +2761,8 @@ restart:
 			if (ifp->flags & IFA_F_PERMANENT)
 				continue;
 
+			old_env = set_exec_env(ifp->idev->dev->owner_env);
+
 			spin_lock(&ifp->lock);
 			age = (now - ifp->tstamp) / HZ;
 
@@ -2751,6 +2777,7 @@ restart:
 				in6_ifa_hold(ifp);
 				read_unlock(&addrconf_hash_lock);
 				ipv6_del_addr(ifp);
+				set_exec_env(old_env);
 				goto restart;
 			} else if (age >= ifp->prefered_lft) {
 				/* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
@@ -2772,6 +2799,7 @@ restart:
 
 					ipv6_ifa_notify(0, ifp);
 					in6_ifa_put(ifp);
+					set_exec_env(old_env);
 					goto restart;
 				}
 #ifdef CONFIG_IPV6_PRIVACY
@@ -2793,6 +2821,7 @@ restart:
 						ipv6_create_tempaddr(ifpub, ifp);
 						in6_ifa_put(ifpub);
 						in6_ifa_put(ifp);
+						set_exec_env(old_env);
 						goto restart;
 					}
 				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
@@ -2805,6 +2834,7 @@ restart:
 					next = ifp->tstamp + ifp->prefered_lft * HZ;
 				spin_unlock(&ifp->lock);
 			}
+			set_exec_env(old_env);
 		}
 		read_unlock(&addrconf_hash_lock);
 	}
@@ -3360,7 +3390,7 @@ int addrconf_sysctl_forward(ctl_table *c
 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
 	if (write && valp != &ipv6_devconf_dflt.forwarding) {
-		if (valp != &ipv6_devconf.forwarding) {
+		if (valp != &ve_ipv6_devconf.forwarding) {
 			if ((!*valp) ^ (!val)) {
 				struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
 				if (idev == NULL)
@@ -3368,7 +3398,7 @@ int addrconf_sysctl_forward(ctl_table *c
 				dev_forward_change(idev);
 			}
 		} else {
-			ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
+			ipv6_devconf_dflt.forwarding = ve_ipv6_devconf.forwarding;
 			addrconf_forward_change();
 		}
 		if (*valp)
@@ -3411,7 +3441,7 @@ static int addrconf_sysctl_forward_strat
 	}
 
 	if (valp != &ipv6_devconf_dflt.forwarding) {
-		if (valp != &ipv6_devconf.forwarding) {
+		if (valp != &ve_ipv6_devconf.forwarding) {
 			struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
 			int changed;
 			if (unlikely(idev == NULL))
@@ -3447,7 +3477,7 @@ static struct addrconf_sysctl_table
         	{
 			.ctl_name	=	NET_IPV6_FORWARDING,
 			.procname	=	"forwarding",
-         		.data		=	&ipv6_devconf.forwarding,
+         		.data		=	&global_ipv6_devconf.forwarding,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&addrconf_sysctl_forward,
@@ -3456,7 +3486,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_HOP_LIMIT,
 			.procname	=	"hop_limit",
-         		.data		=	&ipv6_devconf.hop_limit,
+         		.data		=	&global_ipv6_devconf.hop_limit,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 			.proc_handler	=	proc_dointvec,
@@ -3464,7 +3494,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_MTU,
 			.procname	=	"mtu",
-			.data		=	&ipv6_devconf.mtu6,
+			.data		=	&global_ipv6_devconf.mtu6,
          		.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3472,7 +3502,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_ACCEPT_RA,
 			.procname	=	"accept_ra",
-         		.data		=	&ipv6_devconf.accept_ra,
+         		.data		=	&global_ipv6_devconf.accept_ra,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3480,7 +3510,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_ACCEPT_REDIRECTS,
 			.procname	=	"accept_redirects",
-         		.data		=	&ipv6_devconf.accept_redirects,
+         		.data		=	&global_ipv6_devconf.accept_redirects,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3488,7 +3518,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_AUTOCONF,
 			.procname	=	"autoconf",
-         		.data		=	&ipv6_devconf.autoconf,
+         		.data		=	&global_ipv6_devconf.autoconf,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3496,7 +3526,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_DAD_TRANSMITS,
 			.procname	=	"dad_transmits",
-         		.data		=	&ipv6_devconf.dad_transmits,
+         		.data		=	&global_ipv6_devconf.dad_transmits,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3504,7 +3534,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_RTR_SOLICITS,
 			.procname	=	"router_solicitations",
-         		.data		=	&ipv6_devconf.rtr_solicits,
+         		.data		=	&global_ipv6_devconf.rtr_solicits,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3512,7 +3542,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_RTR_SOLICIT_INTERVAL,
 			.procname	=	"router_solicitation_interval",
-         		.data		=	&ipv6_devconf.rtr_solicit_interval,
+         		.data		=	&global_ipv6_devconf.rtr_solicit_interval,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec_jiffies,
@@ -3521,7 +3551,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_RTR_SOLICIT_DELAY,
 			.procname	=	"router_solicitation_delay",
-         		.data		=	&ipv6_devconf.rtr_solicit_delay,
+         		.data		=	&global_ipv6_devconf.rtr_solicit_delay,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec_jiffies,
@@ -3530,7 +3560,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_FORCE_MLD_VERSION,
 			.procname	=	"force_mld_version",
-         		.data		=	&ipv6_devconf.force_mld_version,
+         		.data		=	&global_ipv6_devconf.force_mld_version,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3539,7 +3569,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_USE_TEMPADDR,
 			.procname	=	"use_tempaddr",
-	 		.data		=	&ipv6_devconf.use_tempaddr,
+	 		.data		=	&global_ipv6_devconf.use_tempaddr,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -3547,7 +3577,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_TEMP_VALID_LFT,
 			.procname	=	"temp_valid_lft",
-	 		.data		=	&ipv6_devconf.temp_valid_lft,
+	 		.data		=	&global_ipv6_devconf.temp_valid_lft,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -3555,7 +3585,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_TEMP_PREFERED_LFT,
 			.procname	=	"temp_prefered_lft",
-	 		.data		=	&ipv6_devconf.temp_prefered_lft,
+	 		.data		=	&global_ipv6_devconf.temp_prefered_lft,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -3563,7 +3593,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_REGEN_MAX_RETRY,
 			.procname	=	"regen_max_retry",
-	 		.data		=	&ipv6_devconf.regen_max_retry,
+	 		.data		=	&global_ipv6_devconf.regen_max_retry,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -3571,7 +3601,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_MAX_DESYNC_FACTOR,
 			.procname	=	"max_desync_factor",
-	 		.data		=	&ipv6_devconf.max_desync_factor,
+	 		.data		=	&global_ipv6_devconf.max_desync_factor,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -3580,7 +3610,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_MAX_ADDRESSES,
 			.procname	=	"max_addresses",
-			.data		=	&ipv6_devconf.max_addresses,
+			.data		=	&global_ipv6_devconf.max_addresses,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 			.proc_handler	=	&proc_dointvec,
@@ -3635,29 +3665,22 @@ static struct addrconf_sysctl_table
 	},
 };
 
-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+static struct addrconf_sysctl_table *
+__addrconf_sysctl_register(struct inet6_dev *idev, char *dev_name, int ifindex, struct ipv6_devconf *p)
 {
 	int i;
-	struct net_device *dev = idev ? idev->dev : NULL;
 	struct addrconf_sysctl_table *t;
-	char *dev_name = NULL;
 
 	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (t == NULL)
-		return;
+		return NULL;
+
 	memcpy(t, &addrconf_sysctl, sizeof(*t));
 	for (i=0; t->addrconf_vars[i].data; i++) {
-		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+		t->addrconf_vars[i].data += (char*)p - (char*)&global_ipv6_devconf;
 		t->addrconf_vars[i].de = NULL;
 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
 	}
-	if (dev) {
-		dev_name = dev->name; 
-		t->addrconf_dev[0].ctl_name = dev->ifindex;
-	} else {
-		dev_name = "default";
-		t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-	}
 
 	/* 
 	 * Make a copy of dev_name, because '.procname' is regarded as const 
@@ -3668,6 +3691,7 @@ static void addrconf_sysctl_register(str
 	if (!dev_name)
 	    goto free;
 
+	t->addrconf_dev[0].ctl_name = ifindex;
 	t->addrconf_dev[0].procname = dev_name;
 
 	t->addrconf_dev[0].child = t->addrconf_vars;
@@ -3682,9 +3706,7 @@ static void addrconf_sysctl_register(str
 	t->sysctl_header = register_sysctl_table(t->addrconf_root_dir, 0);
 	if (t->sysctl_header == NULL)
 		goto free_procname;
-	else
-		p->sysctl = t;
-	return;
+	return t;
 
 	/* error path */
  free_procname:
@@ -3692,7 +3714,26 @@ static void addrconf_sysctl_register(str
  free:
 	kfree(t);
 
-	return;
+	return NULL;
+}
+
+static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+{
+	struct net_device *dev;
+	char *dev_name;
+	int ifindex;
+
+	dev = idev ? idev->dev : NULL;
+
+	if (dev) {
+		dev_name = dev->name; 
+		ifindex = dev->ifindex;
+	} else {
+		dev_name = "default";
+		ifindex = NET_PROTO_CONF_DEFAULT;
+	}
+
+	p->sysctl = __addrconf_sysctl_register(idev, dev_name, ifindex, p);
 }
 
 static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
@@ -3706,6 +3747,73 @@ static void addrconf_sysctl_unregister(s
 	}
 }
 
+int addrconf_sysctl_init(struct ve_struct *ve)
+{
+	int err = 0;
+#ifdef CONFIG_SYSCTL
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	struct ipv6_devconf *conf, *conf_def;
+
+	err = -ENOMEM;
+
+	conf = kmalloc(sizeof(*conf), GFP_KERNEL);
+	if (!conf)
+		goto err1;
+
+	memcpy(conf, &global_ipv6_devconf, sizeof(*conf));
+	conf->sysctl = __addrconf_sysctl_register(NULL, "all",
+			NET_PROTO_CONF_ALL, conf);
+	if (!conf->sysctl)
+		goto err2;
+
+	conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
+	if (!conf_def)
+		goto err3;
+
+	memcpy(conf_def, &global_ipv6_devconf_dflt, sizeof(*conf_def));
+	conf_def->sysctl = __addrconf_sysctl_register(NULL, "default",
+			NET_PROTO_CONF_DEFAULT, conf_def);
+	if (!conf_def->sysctl)
+		goto err4;
+
+	ve->_ipv6_devconf = conf;
+	ve->_ipv6_devconf_dflt = conf_def;
+	return 0;
+
+err4:
+	kfree(conf_def);
+err3:
+	addrconf_sysctl_unregister(conf);
+err2:
+	kfree(conf);
+err1:
+#endif
+#endif
+	return err;
+}
+EXPORT_SYMBOL(addrconf_sysctl_init);
+
+void addrconf_sysctl_fini(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSCTL
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	addrconf_sysctl_unregister(ve->_ipv6_devconf);
+	addrconf_sysctl_unregister(ve->_ipv6_devconf_dflt);
+#endif
+#endif
+}
+EXPORT_SYMBOL(addrconf_sysctl_fini);
+
+void addrconf_sysctl_free(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSCTL
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	kfree(ve->_ipv6_devconf);
+	kfree(ve->_ipv6_devconf_dflt);
+#endif
+#endif
+}
+EXPORT_SYMBOL(addrconf_sysctl_free);
 
 #endif
 
@@ -3731,6 +3839,11 @@ int __init addrconf_init(void)
 {
 	int err = 0;
 
+#ifdef CONFIG_VE
+	get_ve0()->_ipv6_devconf = &global_ipv6_devconf;
+	get_ve0()->_ipv6_devconf_dflt = &global_ipv6_devconf_dflt;
+#endif
+
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
 	 * before it can bring up and give link-local addresses
@@ -3772,7 +3885,7 @@ int __init addrconf_init(void)
 #ifdef CONFIG_SYSCTL
 	addrconf_sysctl.sysctl_header =
 		register_sysctl_table(addrconf_sysctl.addrconf_root_dir, 0);
-	addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
+	__addrconf_sysctl_register(NULL, "default", NET_PROTO_CONF_DEFAULT, &global_ipv6_devconf_dflt);
 #endif
 
 	return 0;
@@ -3789,8 +3902,8 @@ void __exit addrconf_cleanup(void)
 
 	rtnetlink_links[PF_INET6] = NULL;
 #ifdef CONFIG_SYSCTL
-	addrconf_sysctl_unregister(&ipv6_devconf_dflt);
-	addrconf_sysctl_unregister(&ipv6_devconf);
+	addrconf_sysctl_unregister(&global_ipv6_devconf_dflt);
+	addrconf_sysctl_unregister(&global_ipv6_devconf);
 #endif
 
 	rtnl_lock();
@@ -3835,6 +3948,6 @@ void __exit addrconf_cleanup(void)
 #endif
 
 #ifdef CONFIG_PROC_FS
-	proc_net_remove("if_inet6");
+	remove_proc_glob_entry("net/if_inet6", NULL);
 #endif
 }
diff -uprN linux-2.6.16/net/ipv6/af_inet6.c linux-2.6.16.ovz/net/ipv6/af_inet6.c
--- linux-2.6.16/net/ipv6/af_inet6.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/af_inet6.c	2006-07-05 08:34:56.000000000 -0400
@@ -60,6 +60,7 @@
 #ifdef CONFIG_IPV6_TUNNEL
 #include <net/ip6_tunnel.h>
 #endif
+#include <ub/ub_net.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -160,6 +161,13 @@ lookup_protocol:
 	if (sk == NULL)
 		goto out;
 
+	err = -ENOBUFS;
+	if (ub_sock_charge(sk, PF_INET6, sock->type))
+		goto out_sk_free;
+	/* if charge was successful, sock_init_data() MUST be called to
+	 * set sk->sk_type. otherwise sk will be uncharged to wrong resource
+	 */
+
 	sock_init_data(sock, sk);
 
 	err = 0;
@@ -234,6 +242,9 @@ out:
 out_rcu_unlock:
 	rcu_read_unlock();
 	goto out;
+out_sk_free:
+	sk_free(sk);
+	return err;
 }
 
 
@@ -650,6 +661,8 @@ int inet6_sk_rebuild_header(struct sock 
 		ip6_dst_store(sk, dst, NULL);
 		sk->sk_route_caps = dst->dev->features &
 			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+		if (!sysctl_tcp_use_sg)
+			sk->sk_route_caps &= ~NETIF_F_SG;
 	}
 
 	return 0;
@@ -715,21 +728,21 @@ snmp6_mib_free(void *ptr[2])
 
 static int __init init_ipv6_mibs(void)
 {
-	if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
+	if (snmp6_mib_init((void **)ve_ipv6_statistics, sizeof (struct ipstats_mib),
 			   __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
-	if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
+	if (snmp6_mib_init((void **)ve_icmpv6_statistics, sizeof (struct icmpv6_mib),
 			   __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp_mib;
-	if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
+	if (snmp6_mib_init((void **)ve_udp_stats_in6, sizeof (struct udp_mib),
 			   __alignof__(struct udp_mib)) < 0)
 		goto err_udp_mib;
 	return 0;
 
 err_udp_mib:
-	snmp6_mib_free((void **)icmpv6_statistics);
+	snmp6_mib_free((void **)ve_icmpv6_statistics);
 err_icmp_mib:
-	snmp6_mib_free((void **)ipv6_statistics);
+	snmp6_mib_free((void **)ve_ipv6_statistics);
 err_ip_mib:
 	return -ENOMEM;
 	
@@ -737,9 +750,9 @@ err_ip_mib:
 
 static void cleanup_ipv6_mibs(void)
 {
-	snmp6_mib_free((void **)ipv6_statistics);
-	snmp6_mib_free((void **)icmpv6_statistics);
-	snmp6_mib_free((void **)udp_stats_in6);
+	snmp6_mib_free((void **)ve_ipv6_statistics);
+	snmp6_mib_free((void **)ve_icmpv6_statistics);
+	snmp6_mib_free((void **)ve_udp_stats_in6);
 }
 
 static int __init inet6_init(void)
diff -uprN linux-2.6.16/net/ipv6/anycast.c linux-2.6.16.ovz/net/ipv6/anycast.c
--- linux-2.6.16/net/ipv6/anycast.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/anycast.c	2006-07-05 08:34:56.000000000 -0400
@@ -83,7 +83,7 @@ int ipv6_sock_ac_join(struct sock *sk, i
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev;
 	struct ipv6_ac_socklist *pac;
-	int	ishost = !ipv6_devconf.forwarding;
+	int	ishost = !ve_ipv6_devconf.forwarding;
 	int	err = 0;
 
 	if (!capable(CAP_NET_ADMIN))
@@ -455,6 +455,8 @@ static inline struct ifacaddr6 *ac6_get_
 	     state->dev;
 	     state->dev = state->dev->next) {
 		struct inet6_dev *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in6_dev_get(state->dev);
 		if (!idev)
 			continue;
@@ -484,6 +486,8 @@ static struct ifacaddr6 *ac6_get_next(st
 			state->idev = NULL;
 			break;
 		}
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		state->idev = in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
@@ -579,7 +583,7 @@ static struct file_operations ac6_seq_fo
 
 int __init ac6_proc_init(void)
 {
-	if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops))
+	if (!proc_glob_fops_create("net/anycast6", S_IRUGO, &ac6_seq_fops))
 		return -ENOMEM;
 
 	return 0;
@@ -587,7 +591,7 @@ int __init ac6_proc_init(void)
 
 void ac6_proc_exit(void)
 {
-	proc_net_remove("anycast6");
+	remove_proc_glob_entry("net/anycast6", NULL);
 }
 #endif
 
diff -uprN linux-2.6.16/net/ipv6/exthdrs.c linux-2.6.16.ovz/net/ipv6/exthdrs.c
--- linux-2.6.16/net/ipv6/exthdrs.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/exthdrs.c	2006-07-05 08:34:56.000000000 -0400
@@ -489,6 +489,18 @@ int ipv6_parse_hopopts(struct sk_buff *s
 {
 	struct inet6_skb_parm *opt = IP6CB(skb);
 
+	/*
+	 * skb->nh.raw is equal to skb->data, and
+	 * skb->h.raw - skb->nh.raw is always equal to
+	 * sizeof(struct ipv6hdr) by definition of
+	 * hop-by-hop options.
+	 */
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
+	    !pskb_may_pull(skb, sizeof(struct ipv6hdr) + ((skb->h.raw[1] + 1) << 3))) {
+		kfree_skb(skb);
+		return -1;
+	}
+
 	opt->hop = sizeof(struct ipv6hdr);
 	if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
 		skb->h.raw += (skb->h.raw[1]+1)<<3;
diff -uprN linux-2.6.16/net/ipv6/inet6_connection_sock.c linux-2.6.16.ovz/net/ipv6/inet6_connection_sock.c
--- linux-2.6.16/net/ipv6/inet6_connection_sock.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/inet6_connection_sock.c	2006-07-05 08:34:56.000000000 -0400
@@ -26,6 +26,8 @@
 #include <net/ip6_route.h>
 #include <net/sock.h>
 #include <net/inet6_connection_sock.h>
+#include <ub/ub_net.h>
+#include <ub/ub_orphan.h>
 
 int inet6_csk_bind_conflict(const struct sock *sk,
 			    const struct inet_bind_bucket *tb)
@@ -36,6 +38,7 @@ int inet6_csk_bind_conflict(const struct
 	/* We must walk the whole port owner list in this case. -DaveM */
 	sk_for_each_bound(sk2, node, &tb->owners) {
 		if (sk != sk2 &&
+		    !ve_accessible_strict(VE_OWNER_SK(sk), VE_OWNER_SK(sk2)) &&
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
@@ -173,6 +176,7 @@ int inet6_csk_xmit(struct sk_buff *skb, 
 
 		if (err) {
 			sk->sk_err_soft = -err;
+			kfree_skb(skb);
 			return err;
 		}
 
@@ -181,12 +185,15 @@ int inet6_csk_xmit(struct sk_buff *skb, 
 
 		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 			sk->sk_route_caps = 0;
+			kfree_skb(skb);
 			return err;
 		}
 
 		ip6_dst_store(sk, dst, NULL);
 		sk->sk_route_caps = dst->dev->features &
 			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+		if (!sysctl_tcp_use_sg)
+			sk->sk_route_caps &= ~NETIF_F_SG;
 	}
 
 	skb->dst = dst_clone(dst);
diff -uprN linux-2.6.16/net/ipv6/inet6_hashtables.c linux-2.6.16.ovz/net/ipv6/inet6_hashtables.c
--- linux-2.6.16/net/ipv6/inet6_hashtables.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/inet6_hashtables.c	2006-07-05 08:34:56.000000000 -0400
@@ -31,9 +31,14 @@ struct sock *inet6_lookup_listener(struc
 	const struct hlist_node *node;
 	struct sock *result = NULL;
 	int score, hiscore = 0;
+	struct ve_struct *env;
+
+	env = get_exec_env();
 
 	read_lock(&hashinfo->lhash_lock);
-	sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
+	sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum, VEID(env))]) {
+		if (!ve_accessible_strict(VE_OWNER_SK(sk), env))
+			continue;
 		if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
 			const struct ipv6_pinfo *np = inet6_sk(sk);
 			
@@ -84,7 +89,8 @@ EXPORT_SYMBOL_GPL(inet6_lookup);
 
 static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 				     struct sock *sk, const __u16 lport,
-				     struct inet_timewait_sock **twp)
+				     struct inet_timewait_sock **twp,
+				     struct ve_struct *ve)
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_sock *inet = inet_sk(sk);
@@ -94,7 +100,7 @@ static int __inet6_check_established(str
 	const int dif = sk->sk_bound_dev_if;
 	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
 	const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr,
-						inet->dport);
+						inet->dport, VEID(ve));
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	struct sock *sk2;
 	const struct hlist_node *node;
@@ -113,7 +119,8 @@ static int __inet6_check_established(str
 		   sk2->sk_family	       == PF_INET6	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
+		   ve_accessible_strict(tw->tw_owner_env, VEID(ve))) {
 			if (twsk_unique(sk, sk2, twp))
 				goto unique;
 			else
@@ -124,7 +131,7 @@ static int __inet6_check_established(str
 
 	/* And established part... */
 	sk_for_each(sk2, node, &head->chain) {
-		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
+		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif, ve))
 			goto not_unique;
 	}
 
@@ -173,7 +180,9 @@ int inet6_hash_connect(struct inet_timew
  	struct inet_bind_hashbucket *head;
  	struct inet_bind_bucket *tb;
 	int ret;
+	struct ve_struct *ve;
 
+	ve = VE_OWNER_SK(sk);
  	if (snum == 0) {
  		const int low = sysctl_local_port_range[0];
  		const int high = sysctl_local_port_range[1];
@@ -187,7 +196,8 @@ int inet6_hash_connect(struct inet_timew
  		local_bh_disable();
 		for (i = 1; i <= range; i++) {
 			port = low + (i + offset) % range;
- 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			head = &hinfo->bhash[inet_bhashfn(port,
+					hinfo->bhash_size, VEID(ve))];
  			spin_lock(&head->lock);
 
  			/* Does not bother with rcv_saddr checks,
@@ -201,14 +211,14 @@ int inet6_hash_connect(struct inet_timew
  						goto next_port;
  					if (!__inet6_check_established(death_row,
 								       sk, port,
-								       &tw))
+								       &tw, ve))
  						goto ok;
  					goto next_port;
  				}
  			}
 
  			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
-						     head, port);
+						     head, port, ve);
  			if (!tb) {
  				spin_unlock(&head->lock);
  				break;
@@ -243,7 +253,7 @@ ok:
 		goto out;
  	}
 
- 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size, VEID(ve))];
  	tb   = inet_csk(sk)->icsk_bind_hash;
 	spin_lock_bh(&head->lock);
 
@@ -254,7 +264,7 @@ ok:
 	} else {
 		spin_unlock(&head->lock);
 		/* No definite answer... Walk to established hash table */
-		ret = __inet6_check_established(death_row, sk, snum, NULL);
+		ret = __inet6_check_established(death_row, sk, snum, NULL, ve);
 out:
 		local_bh_enable();
 		return ret;
diff -uprN linux-2.6.16/net/ipv6/ip6_fib.c linux-2.6.16.ovz/net/ipv6/ip6_fib.c
--- linux-2.6.16/net/ipv6/ip6_fib.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/ip6_fib.c	2006-07-05 08:34:56.000000000 -0400
@@ -1128,8 +1128,12 @@ static int fib6_age(struct rt6_info *rt,
 
 static DEFINE_SPINLOCK(fib6_gc_lock);
 
+LIST_HEAD(fib6_table_list);
+
 void fib6_run_gc(unsigned long dummy)
 {
+	struct fib6_table *tbl;
+
 	if (dummy != ~0UL) {
 		spin_lock_bh(&fib6_gc_lock);
 		gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
@@ -1147,7 +1151,11 @@ void fib6_run_gc(unsigned long dummy)
 
 	write_lock_bh(&rt6_lock);
 	ndisc_dst_gc(&gc_args.more);
-	fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
+	list_for_each_entry(tbl, &fib6_table_list, list) {
+		struct ve_struct *old_env = set_exec_env(tbl->owner_env);
+		fib6_clean_tree(&tbl->root, fib6_age, 0, NULL);
+		set_exec_env(old_env);
+	}
 	write_unlock_bh(&rt6_lock);
 
 	if (gc_args.more)
@@ -1163,7 +1171,7 @@ void __init fib6_init(void)
 {
 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
 					   sizeof(struct fib6_node),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 					   NULL, NULL);
 	if (!fib6_node_kmem)
 		panic("cannot create fib6_nodes cache");
diff -uprN linux-2.6.16/net/ipv6/ip6_flowlabel.c linux-2.6.16.ovz/net/ipv6/ip6_flowlabel.c
--- linux-2.6.16/net/ipv6/ip6_flowlabel.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/ip6_flowlabel.c	2006-07-05 08:34:56.000000000 -0400
@@ -417,6 +417,9 @@ int ipv6_flowlabel_opt(struct sock *sk, 
 	struct ipv6_fl_socklist *sfl, **sflp;
 	struct ip6_flowlabel *fl;
 
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	if (optlen < sizeof(freq))
 		return -EINVAL;
 
diff -uprN linux-2.6.16/net/ipv6/ip6_output.c linux-2.6.16.ovz/net/ipv6/ip6_output.c
--- linux-2.6.16/net/ipv6/ip6_output.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/ip6_output.c	2006-07-05 08:34:56.000000000 -0400
@@ -319,7 +319,7 @@ int ip6_forward(struct sk_buff *skb)
 	struct ipv6hdr *hdr = skb->nh.ipv6h;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	
-	if (ipv6_devconf.forwarding == 0)
+	if (ve_ipv6_devconf.forwarding == 0)
 		goto error;
 
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
@@ -407,6 +407,20 @@ int ip6_forward(struct sk_buff *skb)
 		return -EMSGSIZE;
 	}
 
+	/*
+	 * We try to optimize forwarding of VE packets:
+	 * do not decrement TTL (and so save skb_cow)
+	 * during forwarding of outgoing pkts from VE.
+	 * For incoming pkts we still do ttl decr,
+	 * since such skb is not cloned and does not require
+	 * actual cow. So, there is at least one place
+	 * in pkts path with mandatory ttl decr, that is
+	 * sufficient to prevent routing loops.
+	 */
+	hdr = skb->nh.ipv6h;
+	if (skb->dev->features & NETIF_F_VENET) /* src is VENET device */
+		goto no_ttl_decr;
+
 	if (skb_cow(skb, dst->dev->hard_header_len)) {
 		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
 		goto drop;
@@ -418,6 +432,7 @@ int ip6_forward(struct sk_buff *skb)
  
 	hdr->hop_limit--;
 
+no_ttl_decr:
 	IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
 	return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
 
diff -uprN linux-2.6.16/net/ipv6/mcast.c linux-2.6.16.ovz/net/ipv6/mcast.c
--- linux-2.6.16/net/ipv6/mcast.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/mcast.c	2006-07-05 08:34:56.000000000 -0400
@@ -156,7 +156,7 @@ static int ip6_mc_leave_src(struct sock 
 #define IGMP6_UNSOLICITED_IVAL	(10*HZ)
 #define MLD_QRV_DEFAULT		2
 
-#define MLD_V1_SEEN(idev) (ipv6_devconf.force_mld_version == 1 || \
+#define MLD_V1_SEEN(idev) (ve_ipv6_devconf.force_mld_version == 1 || \
 		(idev)->cnf.force_mld_version == 1 || \
 		((idev)->mc_v1_seen && \
 		time_before(jiffies, (idev)->mc_v1_seen)))
@@ -248,6 +248,7 @@ int ipv6_sock_mc_join(struct sock *sk, i
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ipv6_sock_mc_join);
 
 /*
  *	socket leave on multicast group
@@ -2166,15 +2167,18 @@ static void igmp6_leave_group(struct ifm
 static void mld_gq_timer_expire(unsigned long data)
 {
 	struct inet6_dev *idev = (struct inet6_dev *)data;
+	struct ve_struct *old_env = set_exec_env(idev->dev->owner_env);
 
 	idev->mc_gq_running = 0;
 	mld_send_report(idev, NULL);
 	__in6_dev_put(idev);
+	set_exec_env(old_env);
 }
 
 static void mld_ifc_timer_expire(unsigned long data)
 {
 	struct inet6_dev *idev = (struct inet6_dev *)data;
+	struct ve_struct *old_env = set_exec_env(idev->dev->owner_env);
 
 	mld_send_cr(idev);
 	if (idev->mc_ifc_count) {
@@ -2183,6 +2187,7 @@ static void mld_ifc_timer_expire(unsigne
 			mld_ifc_start_timer(idev, idev->mc_maxdelay);
 	}
 	__in6_dev_put(idev);
+	set_exec_env(old_env);
 }
 
 static void mld_ifc_event(struct inet6_dev *idev)
@@ -2197,6 +2202,7 @@ static void mld_ifc_event(struct inet6_d
 static void igmp6_timer_handler(unsigned long data)
 {
 	struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
+	struct ve_struct *old_env = set_exec_env(ma->idev->dev->owner_env);
 
 	if (MLD_V1_SEEN(ma->idev))
 		igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
@@ -2208,6 +2214,7 @@ static void igmp6_timer_handler(unsigned
 	ma->mca_flags &= ~MAF_TIMER_RUNNING;
 	spin_unlock(&ma->mca_lock);
 	ma_put(ma);
+	set_exec_env(old_env);
 }
 
 /* Device going down */
@@ -2331,6 +2338,8 @@ static inline struct ifmcaddr6 *igmp6_mc
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct inet6_dev *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in6_dev_get(state->dev);
 		if (!idev)
 			continue;
@@ -2361,6 +2370,8 @@ static struct ifmcaddr6 *igmp6_mc_get_ne
 			state->idev = NULL;
 			break;
 		}
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		state->idev = in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
@@ -2476,6 +2487,8 @@ static inline struct ip6_sf_list *igmp6_
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct inet6_dev *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in6_dev_get(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
@@ -2515,6 +2528,8 @@ static struct ip6_sf_list *igmp6_mcf_get
 				state->idev = NULL;
 				goto out;
 			}
+			if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+				continue;
 			state->idev = in6_dev_get(state->dev);
 			if (!state->idev)
 				continue;
@@ -2657,8 +2672,8 @@ int __init igmp6_init(struct net_proto_f
 	np->hop_limit = 1;
 
 #ifdef CONFIG_PROC_FS
-	proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops);
-	proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
+	proc_glob_fops_create("net/igmp6", S_IRUGO, &igmp6_mc_seq_fops);
+	proc_glob_fops_create("net/mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
 #endif
 
 	return 0;
@@ -2670,7 +2685,7 @@ void igmp6_cleanup(void)
 	igmp6_socket = NULL; /* for safety */
 
 #ifdef CONFIG_PROC_FS
-	proc_net_remove("mcfilter6");
-	proc_net_remove("igmp6");
+	remove_proc_glob_entry("net/mcfilter6", NULL);
+	remove_proc_glob_entry("net/igmp6", NULL);
 #endif
 }
diff -uprN linux-2.6.16/net/ipv6/ndisc.c linux-2.6.16.ovz/net/ipv6/ndisc.c
--- linux-2.6.16/net/ipv6/ndisc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/ndisc.c	2006-07-05 08:34:56.000000000 -0400
@@ -124,7 +124,7 @@ static struct neigh_ops ndisc_direct_ops
 	.queue_xmit =		dev_queue_xmit,
 };
 
-struct neigh_table nd_tbl = {
+struct neigh_table global_nd_tbl = {
 	.family =	AF_INET6,
 	.entry_size =	sizeof(struct neighbour) + sizeof(struct in6_addr),
 	.key_len =	sizeof(struct in6_addr),
@@ -135,7 +135,7 @@ struct neigh_table nd_tbl = {
 	.proxy_redo =	pndisc_redo,
 	.id =		"ndisc_cache",
 	.parms = {
-		.tbl =			&nd_tbl,
+		.tbl =			&global_nd_tbl,
 		.base_reachable_time =	30 * HZ,
 		.retrans_time =	 1 * HZ,
 		.gc_staletime =	60 * HZ,
@@ -1660,7 +1660,9 @@ int __init ndisc_init(struct net_proto_f
          * Initialize the neighbour table
          */
 	
-	neigh_table_init(&nd_tbl);
+	get_ve0()->ve_nd_tbl = &global_nd_tbl;
+	if (neigh_table_init(&nd_tbl))
+		panic("cannot initialize IPv6 NDISC tables\n");
 
 #ifdef CONFIG_SYSCTL
 	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, 
@@ -1682,3 +1684,52 @@ void ndisc_cleanup(void)
 	sock_release(ndisc_socket);
 	ndisc_socket = NULL; /* For safety. */
 }
+
+int ve_ndisc_init(struct ve_struct *ve)
+{
+	struct ve_struct *old_env;
+	int err;
+
+	ve->ve_nd_tbl = kmalloc(sizeof(struct neigh_table), GFP_KERNEL);
+	if (ve->ve_nd_tbl == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	*(ve->ve_nd_tbl) = global_nd_tbl;
+	ve->ve_nd_tbl->parms.tbl = ve->ve_nd_tbl;
+	old_env = set_exec_env(ve);
+	err = neigh_table_init(ve->ve_nd_tbl);
+	if (err)
+		goto out_free;
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, 
+			      "ipv6",
+			      &ndisc_ifinfo_sysctl_change,
+			      &ndisc_ifinfo_sysctl_strategy);
+#endif
+	set_exec_env(old_env);
+	err = 0;
+
+out:
+	return err;
+
+out_free:
+	kfree(ve->ve_nd_tbl);
+	ve->ve_nd_tbl = NULL;
+	goto out;
+}
+EXPORT_SYMBOL(ve_ndisc_init);
+
+void ve_ndisc_fini(struct ve_struct *ve)
+{
+	if (ve->ve_nd_tbl) {
+#ifdef CONFIG_SYSCTL
+		neigh_sysctl_unregister(&ve->ve_nd_tbl->parms);
+#endif
+		neigh_table_clear(ve->ve_nd_tbl);
+		kfree(ve->ve_nd_tbl);
+		ve->ve_nd_tbl = NULL;
+	}
+}
+EXPORT_SYMBOL(ve_ndisc_fini);
diff -uprN linux-2.6.16/net/ipv6/netfilter/ip6_queue.c linux-2.6.16.ovz/net/ipv6/netfilter/ip6_queue.c
--- linux-2.6.16/net/ipv6/netfilter/ip6_queue.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/netfilter/ip6_queue.c	2006-07-05 08:34:56.000000000 -0400
@@ -540,8 +540,11 @@ ipq_rcv_sk(struct sock *sk, int len)
 	down(&ipqnl_sem);
 			
 	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
+		struct ve_struct *env;
 		skb = skb_dequeue(&sk->sk_receive_queue);
+		env = set_exec_env(VE_OWNER_SKB(skb));
 		ipq_rcv_skb(skb);
+		(void)set_exec_env(env);
 		kfree_skb(skb);
 	}
 		
diff -uprN linux-2.6.16/net/ipv6/netfilter/ip6_tables.c linux-2.6.16.ovz/net/ipv6/netfilter/ip6_tables.c
--- linux-2.6.16/net/ipv6/netfilter/ip6_tables.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/netfilter/ip6_tables.c	2006-07-05 08:34:56.000000000 -0400
@@ -32,9 +32,11 @@
 #include <asm/semaphore.h>
 #include <linux/proc_fs.h>
 #include <linux/cpumask.h>
+#include <ub/ub_mem.h>
 
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter/x_tables.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -79,6 +81,14 @@ do {								\
 #define inline
 #endif
 
+#ifdef CONFIG_VE_IPTABLES
+/* include ve.h and define get_exec_env */
+#include <linux/sched.h>
+#define ve_ip6t_standard_target	(get_exec_env()->_ip6t_standard_target)
+#else
+#define ve_ip6t_standard_target	&ip6t_standard_target
+#endif
+
 /*
    We keep a set of rules for each CPU, so we can avoid write-locking
    them in the softirq when updating the counters and therefore
@@ -632,7 +642,7 @@ check_entry(struct ip6t_entry *e, const 
 	}
 	t->u.kernel.target = target;
 
-	if (t->u.kernel.target == &ip6t_standard_target) {
+	if (t->u.kernel.target == ve_ip6t_standard_target) {
 		if (!standard_check(t, size)) {
 			ret = -EINVAL;
 			goto cleanup_matches;
@@ -1120,7 +1130,7 @@ do_add_counters(void __user *user, unsig
 
 	write_lock_bh(&t->lock);
 	private = t->private;
-	if (private->number != paddc->num_counters) {
+	if (private->number != tmp.num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
@@ -1148,7 +1158,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
 	switch (cmd) {
@@ -1173,7 +1183,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
 	switch (cmd) {
@@ -1271,7 +1281,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd
 	return ret;
 }
 
-int ip6t_register_table(struct xt_table *table,
+struct ip6t_table *ip6t_register_table(struct xt_table *table,
 			const struct ip6t_replace *repl)
 {
 	int ret;
@@ -1282,7 +1292,7 @@ int ip6t_register_table(struct xt_table 
 
 	newinfo = xt_alloc_table_info(repl->size);
 	if (!newinfo)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* choose the copy on our node/cpu */
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
@@ -1295,15 +1305,13 @@ int ip6t_register_table(struct xt_table 
 			      repl->underflow);
 	if (ret != 0) {
 		xt_free_table_info(newinfo);
-		return ret;
+		return ERR_PTR(ret);
 	}
 
-	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+	table = virt_xt_register_table(table, &bootstrap, newinfo);
+	if (IS_ERR(table))
 		xt_free_table_info(newinfo);
-		return ret;
-	}
-
-	return 0;
+	return table;
 }
 
 void ip6t_unregister_table(struct xt_table *table)
@@ -1311,7 +1319,7 @@ void ip6t_unregister_table(struct xt_tab
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
 
-	private = xt_unregister_table(table);
+	private = virt_xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
@@ -1319,6 +1327,29 @@ void ip6t_unregister_table(struct xt_tab
 	xt_free_table_info(private);
 }
 
+void ip6t_flush_table(struct xt_table *table)
+{
+	struct xt_table *t;
+	void *loc_cpu_entry;
+
+	if (table == NULL)
+		return;
+
+	t = xt_find_table_lock(AF_INET6, table->name);
+	if (t && !IS_ERR(t)) {
+		struct xt_table_info *private;
+		private = t->private;
+		loc_cpu_entry = private->entries[raw_smp_processor_id()];
+		IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size,
+			  cleanup_entry, NULL);
+		if (private->number > private->initial_entries)
+			module_put(t->me);
+		private->size = 0;
+		xt_table_unlock(t);
+		module_put(t->me);
+	}
+}
+
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
 static inline int
 icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
@@ -1405,36 +1436,93 @@ static struct ip6t_match icmp6_matchstru
 	.checkentry	= &icmp6_checkentry,
 };
 
-static int __init init(void)
+static int init_ip6tables(void)
 {
 	int ret;
 
-	xt_proto_init(AF_INET6);
+	if (ve_ip6t_standard_target != NULL)
+		return -EEXIST;
 
-	/* Noone else will be downing sem now, so we won't sleep */
-	xt_register_target(AF_INET6, &ip6t_standard_target);
-	xt_register_target(AF_INET6, &ip6t_error_target);
-	xt_register_match(AF_INET6, &icmp6_matchstruct);
+	ret = xt_register_target(AF_INET6, &ip6t_standard_target);
+	if (ret)
+		goto out;
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip6t_standard_target = xt_find_target(AF_INET6, IP6T_STANDARD_TARGET, 0);
+	if (IS_ERR(ve_ip6t_standard_target))
+		goto out_standard;
+#endif
+	ret = xt_register_target(AF_INET6, &ip6t_error_target);
+	if (ret)
+		goto out_error;
+	ret = xt_register_match(AF_INET6, &icmp6_matchstruct);
+	if (ret)
+		goto out_icmp;
+	ret = xt_proto_init(AF_INET6);
+	if (ret)
+		goto out_proc;
+	return 0;
+
+out_proc:
+	xt_unregister_match(AF_INET6, &icmp6_matchstruct);
+out_icmp:
+	xt_unregister_target(AF_INET6, &ip6t_error_target);
+out_error:
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip6t_standard_target = NULL;
+out_standard:
+#endif
+	xt_unregister_target(AF_INET6, &ip6t_standard_target);
+out:
+	return ret;
+}
+
+static void fini_ip6tables(void)
+{
+	xt_proto_fini(AF_INET6);
+	xt_unregister_match(AF_INET6, &icmp6_matchstruct);
+	xt_unregister_target(AF_INET6, &ip6t_error_target);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip6t_standard_target = NULL;
+#endif
+	xt_unregister_target(AF_INET6, &ip6t_standard_target);
+}
+
+static int __init init(void)
+{
+	int ret;
+
+	ret = init_ip6tables();
+	if (ret)
+		goto out;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ip6t_sockopts);
 	if (ret < 0) {
 		duprintf("Unable to register sockopts.\n");
-		xt_proto_fini(AF_INET6);
-		return ret;
+		goto out_sockopts;
 	}
 
+	KSYMRESOLVE(init_ip6tables);
+	KSYMRESOLVE(fini_ip6tables);
+	KSYMRESOLVE(ip6t_flush_table);
+	KSYMMODRESOLVE(ip6_tables);
 	printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
+
+out_sockopts:
+	fini_ip6tables();
+out:
+	return ret;
 }
 
 static void __exit fini(void)
 {
+	KSYMMODUNRESOLVE(ip6_tables);
+	KSYMUNRESOLVE(init_ip6tables);
+	KSYMUNRESOLVE(fini_ip6tables);
+	KSYMUNRESOLVE(ip6t_flush_table);
 	nf_unregister_sockopt(&ip6t_sockopts);
-	xt_unregister_match(AF_INET6, &icmp6_matchstruct);
-	xt_unregister_target(AF_INET6, &ip6t_error_target);
-	xt_unregister_target(AF_INET6, &ip6t_standard_target);
-	xt_proto_fini(AF_INET6);
+	fini_ip6tables();
 }
 
 /*
@@ -1516,6 +1604,7 @@ EXPORT_SYMBOL(ip6t_do_table);
 EXPORT_SYMBOL(ip6t_ext_hdr);
 EXPORT_SYMBOL(ipv6_find_hdr);
 EXPORT_SYMBOL(ip6_masked_addrcmp);
+EXPORT_SYMBOL(ip6t_flush_table);
 
-module_init(init);
+subsys_initcall(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/ipv6/netfilter/ip6t_LOG.c linux-2.6.16.ovz/net/ipv6/netfilter/ip6t_LOG.c
--- linux-2.6.16/net/ipv6/netfilter/ip6t_LOG.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/netfilter/ip6t_LOG.c	2006-07-05 08:34:56.000000000 -0400
@@ -20,6 +20,7 @@
 #include <net/udp.h>
 #include <net/tcp.h>
 #include <net/ipv6.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
@@ -488,10 +489,23 @@ static struct nf_logger ip6t_logger = {
 	.me		= THIS_MODULE,
 };
 
+int init_ip6table_LOG(void)
+{
+	return ip6t_register_target(&ip6t_log_reg);
+}
+
+void fini_ip6table_LOG(void)
+{
+	ip6t_unregister_target(&ip6t_log_reg);
+}
+
 static int __init init(void)
 {
-	if (ip6t_register_target(&ip6t_log_reg))
-		return -EINVAL;
+	int err;
+
+	err = init_ip6table_LOG();
+	if (err < 0)
+		return err;
 	if (nf_log_register(PF_INET6, &ip6t_logger) < 0) {
 		printk(KERN_WARNING "ip6t_LOG: not logging via system console "
 		       "since somebody else already registered for PF_INET6\n");
@@ -499,13 +513,19 @@ static int __init init(void)
 		 * ip6tables userspace would abort */
 	}
 
+	KSYMRESOLVE(init_ip6table_LOG);
+	KSYMRESOLVE(fini_ip6table_LOG);
+	KSYMMODRESOLVE(ip6t_LOG);
 	return 0;
 }
 
 static void __exit fini(void)
 {
+	KSYMMODUNRESOLVE(ip6t_LOG);
+	KSYMUNRESOLVE(init_ip6table_LOG);
+	KSYMUNRESOLVE(fini_ip6table_LOG);
 	nf_log_unregister_logger(&ip6t_logger);
-	ip6t_unregister_target(&ip6t_log_reg);
+	fini_ip6table_LOG();
 }
 
 module_init(init);
diff -uprN linux-2.6.16/net/ipv6/netfilter/ip6t_REJECT.c linux-2.6.16.ovz/net/ipv6/netfilter/ip6t_REJECT.c
--- linux-2.6.16/net/ipv6/netfilter/ip6t_REJECT.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/netfilter/ip6t_REJECT.c	2006-07-05 08:34:56.000000000 -0400
@@ -26,6 +26,7 @@
 #include <net/ip6_checksum.h>
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
+#include <linux/nfcalls.h>
 #include <net/flow.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_REJECT.h>
@@ -268,17 +269,39 @@ static struct ip6t_target ip6t_reject_re
 	.me		= THIS_MODULE
 };
 
-static int __init init(void)
+int init_ip6table_REJECT(void)
 {
 	if (ip6t_register_target(&ip6t_reject_reg))
 		return -EINVAL;
 	return 0;
 }
 
-static void __exit fini(void)
+void fini_ip6table_REJECT(void)
 {
 	ip6t_unregister_target(&ip6t_reject_reg);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_ip6table_REJECT();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_ip6table_REJECT);
+	KSYMRESOLVE(fini_ip6table_REJECT);
+	KSYMMODRESOLVE(ip6t_REJECT);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip6t_REJECT);
+	KSYMUNRESOLVE(init_ip6table_REJECT);
+	KSYMUNRESOLVE(fini_ip6table_REJECT);
+	fini_ip6table_REJECT();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/ipv6/netfilter/ip6t_multiport.c linux-2.6.16.ovz/net/ipv6/netfilter/ip6t_multiport.c
--- linux-2.6.16/net/ipv6/netfilter/ip6t_multiport.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/netfilter/ip6t_multiport.c	2006-07-05 08:34:56.000000000 -0400
@@ -14,6 +14,7 @@
 #include <linux/udp.h>
 #include <linux/skbuff.h>
 #include <linux/in.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv6/ip6t_multiport.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
@@ -112,15 +113,37 @@ static struct ip6t_match multiport_match
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_ip6table_multiport(void)
 {
 	return ip6t_register_match(&multiport_match);
 }
 
-static void __exit fini(void)
+void fini_ip6table_multiport(void)
 {
 	ip6t_unregister_match(&multiport_match);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_ip6table_multiport();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_ip6table_multiport);
+	KSYMRESOLVE(fini_ip6table_multiport);
+	KSYMMODRESOLVE(ip6t_multiport);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip6t_multiport);
+	KSYMUNRESOLVE(init_ip6table_multiport);
+	KSYMUNRESOLVE(fini_ip6table_multiport);
+	fini_ip6table_multiport();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/ipv6/netfilter/ip6table_filter.c linux-2.6.16.ovz/net/ipv6/netfilter/ip6table_filter.c
--- linux-2.6.16/net/ipv6/netfilter/ip6table_filter.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/netfilter/ip6table_filter.c	2006-07-05 08:34:56.000000000 -0400
@@ -11,12 +11,20 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("ip6tables filter table");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_filter	(get_exec_env()->_ve_ip6t_filter_pf)
+#else
+#define	ve_packet_filter	&packet_filter
+#endif
+
 #define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT))
 
 /* Standard entry. */
@@ -43,7 +51,7 @@ static struct
 	struct ip6t_replace repl;
 	struct ip6t_standard entries[3];
 	struct ip6t_error term;
-} initial_table __initdata
+} initial_table
 = { { "filter", FILTER_VALID_HOOKS, 4,
       sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
       { [NF_IP6_LOCAL_IN] = 0,
@@ -108,7 +116,7 @@ ip6t_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ip6t_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static unsigned int
@@ -128,7 +136,7 @@ ip6t_local_out_hook(unsigned int hook,
 	}
 #endif
 
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ip6t_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static struct nf_hook_ops ip6t_ops[] = {
@@ -159,56 +167,89 @@ static struct nf_hook_ops ip6t_ops[] = {
 static int forward = NF_ACCEPT;
 module_param(forward, bool, 0000);
 
-static int __init init(void)
+int init_ip6table_filter(void)
 {
 	int ret;
-
-	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
-		return -EINVAL;
-	}
-
-	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
+	struct ip6t_table *tmp_filter;
 
 	/* Register table */
-	ret = ip6t_register_table(&packet_filter, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_filter = ip6t_register_table(&packet_filter,
+			&initial_table.repl);
+	if (IS_ERR(tmp_filter))
+		return PTR_ERR(tmp_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = tmp_filter;
+#endif
 
 	/* Register hooks */
-	ret = nf_register_hook(&ip6t_ops[0]);
+	ret = virt_nf_register_hook(&ip6t_ops[0]);
 	if (ret < 0)
 		goto cleanup_table;
 
-	ret = nf_register_hook(&ip6t_ops[1]);
+	ret = virt_nf_register_hook(&ip6t_ops[1]);
 	if (ret < 0)
 		goto cleanup_hook0;
 
-	ret = nf_register_hook(&ip6t_ops[2]);
+	ret = virt_nf_register_hook(&ip6t_ops[2]);
 	if (ret < 0)
 		goto cleanup_hook1;
 
 	return ret;
 
  cleanup_hook1:
-	nf_unregister_hook(&ip6t_ops[1]);
+	virt_nf_unregister_hook(&ip6t_ops[1]);
  cleanup_hook0:
-	nf_unregister_hook(&ip6t_ops[0]);
+	virt_nf_unregister_hook(&ip6t_ops[0]);
  cleanup_table:
-	ip6t_unregister_table(&packet_filter);
+	ip6t_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
 
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_ip6table_filter(void)
 {
 	unsigned int i;
 
 	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
-		nf_unregister_hook(&ip6t_ops[i]);
+		virt_nf_unregister_hook(&ip6t_ops[i]);
 
-	ip6t_unregister_table(&packet_filter);
+	ip6t_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
+}
+
+static int __init init(void)
+{
+	int err;
+
+	if (forward < 0 || forward > NF_MAX_VERDICT) {
+		printk("iptables forward must be 0 or 1\n");
+		return -EINVAL;
+	}
+
+	/* Entry 1 is the FORWARD hook */
+	initial_table.entries[1].target.verdict = -forward - 1;
+
+	err = init_ip6table_filter();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_ip6table_filter);
+	KSYMRESOLVE(fini_ip6table_filter);
+	KSYMMODRESOLVE(ip6table_filter);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip6table_filter);
+	KSYMUNRESOLVE(init_ip6table_filter);
+	KSYMUNRESOLVE(fini_ip6table_filter);
+	fini_ip6table_filter();
 }
 
 module_init(init);
diff -uprN linux-2.6.16/net/ipv6/netfilter/ip6table_mangle.c linux-2.6.16.ovz/net/ipv6/netfilter/ip6table_mangle.c
--- linux-2.6.16/net/ipv6/netfilter/ip6table_mangle.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/netfilter/ip6table_mangle.c	2006-07-05 08:34:56.000000000 -0400
@@ -12,6 +12,7 @@
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -53,7 +54,7 @@ static struct
 	struct ip6t_replace repl;
 	struct ip6t_standard entries[5];
 	struct ip6t_error term;
-} initial_table __initdata
+} initial_table
 = { { "mangle", MANGLE_VALID_HOOKS, 6,
       sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
       { [NF_IP6_PRE_ROUTING] 	= 0,
@@ -130,6 +131,13 @@ static struct ip6t_table packet_mangler 
 	.af		= AF_INET6,
 };
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_mangler	(get_exec_env()->_ip6t_mangle_table)
+#else
+#define ve_packet_mangler	&packet_mangler
+#endif
+
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6t_route_hook(unsigned int hook,
@@ -138,7 +146,7 @@ ip6t_route_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	return ip6t_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
 }
 
 static unsigned int
@@ -174,7 +182,7 @@ ip6t_local_hook(unsigned int hook,
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
 
-	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	ret = ip6t_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
 
 	if (ret != NF_DROP && ret != NF_STOLEN 
 		&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
@@ -228,60 +236,93 @@ static struct nf_hook_ops ip6t_ops[] = {
 	},
 };
 
-static int __init init(void)
+int init_ip6table_mangle(void)
 {
 	int ret;
+	struct ip6t_table *tmp_mangler;
 
 	/* Register table */
-	ret = ip6t_register_table(&packet_mangler, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_mangler = ip6t_register_table(&packet_mangler,
+			&initial_table.repl);
+	if (IS_ERR(tmp_mangler))
+		return PTR_ERR(tmp_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = tmp_mangler;
+#endif
 
 	/* Register hooks */
-	ret = nf_register_hook(&ip6t_ops[0]);
+	ret = virt_nf_register_hook(&ip6t_ops[0]);
 	if (ret < 0)
 		goto cleanup_table;
 
-	ret = nf_register_hook(&ip6t_ops[1]);
+	ret = virt_nf_register_hook(&ip6t_ops[1]);
 	if (ret < 0)
 		goto cleanup_hook0;
 
-	ret = nf_register_hook(&ip6t_ops[2]);
+	ret = virt_nf_register_hook(&ip6t_ops[2]);
 	if (ret < 0)
 		goto cleanup_hook1;
 
-	ret = nf_register_hook(&ip6t_ops[3]);
+	ret = virt_nf_register_hook(&ip6t_ops[3]);
 	if (ret < 0)
 		goto cleanup_hook2;
 
-	ret = nf_register_hook(&ip6t_ops[4]);
+	ret = virt_nf_register_hook(&ip6t_ops[4]);
 	if (ret < 0)
 		goto cleanup_hook3;
 
 	return ret;
 
  cleanup_hook3:
-        nf_unregister_hook(&ip6t_ops[3]);
+        virt_nf_unregister_hook(&ip6t_ops[3]);
  cleanup_hook2:
-	nf_unregister_hook(&ip6t_ops[2]);
+	virt_nf_unregister_hook(&ip6t_ops[2]);
  cleanup_hook1:
-	nf_unregister_hook(&ip6t_ops[1]);
+	virt_nf_unregister_hook(&ip6t_ops[1]);
  cleanup_hook0:
-	nf_unregister_hook(&ip6t_ops[0]);
+	virt_nf_unregister_hook(&ip6t_ops[0]);
  cleanup_table:
-	ip6t_unregister_table(&packet_mangler);
+	ip6t_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
 
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_ip6table_mangle(void)
 {
 	unsigned int i;
 
 	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
-		nf_unregister_hook(&ip6t_ops[i]);
+		virt_nf_unregister_hook(&ip6t_ops[i]);
+
+	ip6t_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
+}
+
+static int __init init(void)
+{
+	int err;
 
-	ip6t_unregister_table(&packet_mangler);
+	err = init_ip6table_mangle();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_ip6table_mangle);
+	KSYMRESOLVE(fini_ip6table_mangle);
+	KSYMMODRESOLVE(ip6table_mangle);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip6table_mangle);
+	KSYMUNRESOLVE(init_ip6table_mangle);
+	KSYMUNRESOLVE(fini_ip6table_mangle);
+	fini_ip6table_mangle();
 }
 
 module_init(init);
diff -uprN linux-2.6.16/net/ipv6/netfilter/ip6table_raw.c linux-2.6.16.ovz/net/ipv6/netfilter/ip6table_raw.c
--- linux-2.6.16/net/ipv6/netfilter/ip6table_raw.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/netfilter/ip6table_raw.c	2006-07-05 08:34:56.000000000 -0400
@@ -145,11 +145,12 @@ static struct nf_hook_ops ip6t_ops[] = {
 static int __init init(void)
 {
 	int ret;
+	struct ip6t_table *tmp;
 
 	/* Register table */
-	ret = ip6t_register_table(&packet_raw, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp = ip6t_register_table(&packet_raw, &initial_table.repl);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
 
 	/* Register hooks */
 	ret = nf_register_hook(&ip6t_ops[0]);
diff -uprN linux-2.6.16/net/ipv6/proc.c linux-2.6.16.ovz/net/ipv6/proc.c
--- linux-2.6.16/net/ipv6/proc.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/proc.c	2006-07-05 08:34:56.000000000 -0400
@@ -25,13 +25,18 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
+#include <linux/ve.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_VE
+#define proc_net_devsnmp6	(get_exec_env()->_proc_net_devsnmp6)
+#else
 static struct proc_dir_entry *proc_net_devsnmp6;
+#endif
 
 static int fold_prot_inuse(struct proto *proto)
 {
@@ -164,9 +169,9 @@ static int snmp6_seq_show(struct seq_fil
 		seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
 		snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list);
 	} else {
-		snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
-		snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
-		snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
+		snmp6_seq_show_item(seq, (void **)ve_ipv6_statistics, snmp6_ipstats_list);
+		snmp6_seq_show_item(seq, (void **)ve_icmpv6_statistics, snmp6_icmp6_list);
+		snmp6_seq_show_item(seq, (void **)ve_udp_stats_in6, snmp6_udp6_list);
 	}
 	return 0;
 }
@@ -229,15 +234,27 @@ int snmp6_unregister_dev(struct inet6_de
 	return 0;
 }
 
+int ve_snmp_proc_init(void)
+{
+	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
+	return proc_net_devsnmp6 == NULL ? -ENOMEM : 0;
+}
+EXPORT_SYMBOL(ve_snmp_proc_init);
+
+void ve_snmp_proc_fini(void)
+{
+	proc_net_remove("dev_snmp6");
+}
+EXPORT_SYMBOL(ve_snmp_proc_fini);
+
 int __init ipv6_misc_proc_init(void)
 {
 	int rc = 0;
 
-	if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops))
+	if (!proc_glob_fops_create("net/snmp6", S_IRUGO, &snmp6_seq_fops))
 		goto proc_snmp6_fail;
 
-	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
-	if (!proc_net_devsnmp6)
+	if (ve_snmp_proc_init())
 		goto proc_dev_snmp6_fail;
 
 	if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops))
@@ -246,9 +263,9 @@ out:
 	return rc;
 
 proc_sockstat6_fail:
-	proc_net_remove("dev_snmp6");
+	ve_snmp_proc_fini();
 proc_dev_snmp6_fail:
-	proc_net_remove("snmp6");
+	remove_proc_glob_entry("net/snmp6", NULL);
 proc_snmp6_fail:
 	rc = -ENOMEM;
 	goto out;
diff -uprN linux-2.6.16/net/ipv6/raw.c linux-2.6.16.ovz/net/ipv6/raw.c
--- linux-2.6.16/net/ipv6/raw.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/raw.c	2006-07-05 08:34:56.000000000 -0400
@@ -99,6 +99,9 @@ struct sock *__raw_v6_lookup(struct sock
 			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
 				continue;
 
+			if (!ve_accessible_strict(VE_OWNER_SK(sk), get_exec_env()))
+				continue;
+
 			if (!ipv6_addr_any(&np->rcv_saddr)) {
 				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
 					goto found;
@@ -1046,8 +1049,14 @@ static struct sock *raw6_get_next(struct
 	do {
 		sk = sk_next(sk);
 try_again:
-		;
-	} while (sk && sk->sk_family != PF_INET6);
+		if (!sk)
+			break;
+		if (sk->sk_family != PF_INET6)
+			continue;
+		if (ve_accessible(VE_OWNER_SK(sk),
+					get_exec_env()))
+			break;
+	} while (1);
 
 	if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) {
 		sk = sk_head(&raw_v6_htable[state->bucket]);
@@ -1166,13 +1175,13 @@ static struct file_operations raw6_seq_f
 
 int __init raw6_proc_init(void)
 {
-	if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops))
+	if (!proc_glob_fops_create("net/raw6", S_IRUGO, &raw6_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void raw6_proc_exit(void)
 {
-	proc_net_remove("raw6");
+	remove_proc_glob_entry("net/raw6", NULL);
 }
 #endif	/* CONFIG_PROC_FS */
diff -uprN linux-2.6.16/net/ipv6/reassembly.c linux-2.6.16.ovz/net/ipv6/reassembly.c
--- linux-2.6.16/net/ipv6/reassembly.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/reassembly.c	2006-07-05 08:34:56.000000000 -0400
@@ -43,6 +43,7 @@
 #include <linux/icmpv6.h>
 #include <linux/random.h>
 #include <linux/jhash.h>
+#include <linux/ve_owner.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -53,6 +54,7 @@
 #include <net/rawv6.h>
 #include <net/ndisc.h>
 #include <net/addrconf.h>
+#include <linux/ve_owner.h>
 
 int sysctl_ip6frag_high_thresh = 256*1024;
 int sysctl_ip6frag_low_thresh = 192*1024;
@@ -95,8 +97,12 @@ struct frag_queue
 #define FIRST_IN		2
 #define LAST_IN			1
 	__u16			nhoffset;
+	struct ve_struct *owner_env;
 };
 
+DCL_VE_OWNER_PROTO(IP6Q, struct frag_queue, owner_env)
+DCL_VE_OWNER(IP6Q, struct frag_queue, owner_env)
+
 /* Hash table. */
 
 #define IP6Q_HASHSZ	64
@@ -288,6 +294,9 @@ static void ip6_evictor(void)
 static void ip6_frag_expire(unsigned long data)
 {
 	struct frag_queue *fq = (struct frag_queue *) data;
+	struct ve_struct *envid;
+
+	envid = set_exec_env(VE_OWNER_IP6Q(fq));
 
 	spin_lock(&fq->lock);
 
@@ -318,6 +327,8 @@ static void ip6_frag_expire(unsigned lon
 out:
 	spin_unlock(&fq->lock);
 	fq_put(fq, NULL);
+
+	(void)set_exec_env(envid);
 }
 
 /* Creation primitives. */
@@ -336,7 +347,8 @@ static struct frag_queue *ip6_frag_inter
 	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
 		if (fq->id == fq_in->id && 
 		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
-		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
+		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr) &&
+		    fq->owner_env == get_exec_env()) {
 			atomic_inc(&fq->refcnt);
 			write_unlock(&ip6_frag_lock);
 			fq_in->last_in |= COMPLETE;
@@ -380,6 +392,8 @@ ip6_frag_create(unsigned int hash, u32 i
 	spin_lock_init(&fq->lock);
 	atomic_set(&fq->refcnt, 1);
 
+	SET_VE_OWNER_IP6Q(fq, get_exec_env());
+
 	return ip6_frag_intern(hash, fq);
 
 oom:
@@ -398,7 +412,8 @@ fq_find(u32 id, struct in6_addr *src, st
 	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
 		if (fq->id == id && 
 		    ipv6_addr_equal(src, &fq->saddr) &&
-		    ipv6_addr_equal(dst, &fq->daddr)) {
+		    ipv6_addr_equal(dst, &fq->daddr) &&
+		    fq->owner_env == get_exec_env()) {
 			atomic_inc(&fq->refcnt);
 			read_unlock(&ip6_frag_lock);
 			return fq;
@@ -727,6 +742,9 @@ static int ipv6_frag_rcv(struct sk_buff 
 		    fq->meat == fq->len)
 			ret = ip6_frag_reasm(fq, skbp, dev);
 
+		if (ret > 0)
+			SET_VE_OWNER_SKB(*skbp, VE_OWNER_SKB(skb));
+
 		spin_unlock(&fq->lock);
 		fq_put(fq, NULL);
 		return ret;
@@ -737,6 +755,50 @@ static int ipv6_frag_rcv(struct sk_buff 
 	return -1;
 }
 
+#ifdef CONFIG_VE
+/* XXX */
+void ip6_frag_cleanup(struct ve_struct *envid)
+{
+	int i, progress;
+
+	local_bh_disable();
+	do {
+		progress = 0;
+		for (i = 0; i < IP6Q_HASHSZ; i++) {
+			struct frag_queue *fq;
+			struct hlist_node *p, *n;
+
+			if (hlist_empty(&ip6_frag_hash[i]))
+				continue;
+inner_restart:
+			read_lock(&ip6_frag_lock);
+			hlist_for_each_entry_safe(fq, p, n,
+					&ip6_frag_hash[i], list) {
+				if (!ve_accessible_strict(
+						VE_OWNER_IP6Q(fq),
+						envid))
+					continue;
+				atomic_inc(&fq->refcnt);
+				read_unlock(&ip6_frag_lock);
+
+				spin_lock(&fq->lock);
+				if (!(fq->last_in&COMPLETE))
+					fq_kill(fq);
+				spin_unlock(&fq->lock);
+
+				fq_put(fq, NULL);
+				progress = 1;
+				goto inner_restart;
+			}
+			read_unlock(&ip6_frag_lock);
+		}
+	} while(progress);
+	local_bh_enable();
+}
+EXPORT_SYMBOL(ip6_frag_cleanup);
+#endif
+
+
 static struct inet6_protocol frag_protocol =
 {
 	.handler	=	ipv6_frag_rcv,
diff -uprN linux-2.6.16/net/ipv6/route.c linux-2.6.16.ovz/net/ipv6/route.c
--- linux-2.6.16/net/ipv6/route.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/route.c	2006-07-05 08:34:56.000000000 -0400
@@ -52,7 +52,6 @@
 #include <net/addrconf.h>
 #include <net/tcp.h>
 #include <linux/rtnetlink.h>
-#include <net/dst.h>
 #include <net/xfrm.h>
 
 #include <asm/uaccess.h>
@@ -113,7 +112,6 @@ struct rt6_info ip6_null_entry = {
 		.dst = {
 			.__refcnt	= ATOMIC_INIT(1),
 			.__use		= 1,
-			.dev		= &loopback_dev,
 			.obsolete	= -1,
 			.error		= -ENETUNREACH,
 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
@@ -128,11 +126,19 @@ struct rt6_info ip6_null_entry = {
 	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
-struct fib6_node ip6_routing_table = {
-	.leaf		= &ip6_null_entry,
-	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+struct fib6_table global_fib6_table = {
+	.root = {
+		.leaf		= &ip6_null_entry,
+		.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+	}
 };
 
+#ifdef CONFIG_VE
+#define ip6_routing_table (get_exec_env()->_fib6_table->root)
+#else
+#define ip6_routing_table (global_ip6_routing_table.root)
+#endif
+
 /* Protects all the ip6 fib */
 
 DEFINE_RWLOCK(rt6_lock);
@@ -778,7 +784,7 @@ static int ipv6_get_mtu(struct net_devic
 
 int ipv6_get_hoplimit(struct net_device *dev)
 {
-	int hoplimit = ipv6_devconf.hop_limit;
+	int hoplimit = ve_ipv6_devconf.hop_limit;
 	struct inet6_dev *idev;
 
 	idev = in6_dev_get(dev);
@@ -1421,10 +1427,12 @@ struct rt6_info *addrconf_dst_alloc(stru
 		rt->rt6i_flags |= RTF_ANYCAST;
 	else
 		rt->rt6i_flags |= RTF_LOCAL;
-	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
-	if (rt->rt6i_nexthop == NULL) {
+	rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, rt->rt6i_dev);
+	if (IS_ERR(rt->rt6i_nexthop)) {
+		void *err = rt->rt6i_nexthop;
+		rt->rt6i_nexthop = NULL;
 		dst_free((struct dst_entry *) rt);
-		return ERR_PTR(-ENOMEM);
+		return err;
 	}
 
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
@@ -1640,8 +1648,12 @@ static int rt6_fill_node(struct sk_buff 
 		goto rtattr_failure;
 	if (rt->u.dst.neighbour)
 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
-	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
+	if (rt->u.dst.dev) {
+		struct net_device *odev = rt->rt6i_dev;
+		if (rt == &ip6_null_entry)
+			odev = &loopback_dev;
+		RTA_PUT(skb, RTA_OIF, sizeof(int), &odev->ifindex);
+	}
 	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
 	if (rt->rt6i_expires)
@@ -2110,23 +2122,31 @@ void __init ip6_route_init(void)
 	if (!ip6_dst_ops.kmem_cachep)
 		panic("cannot create ip6_dst_cache");
 
+#ifdef CONFIG_VE
+	global_fib6_table.owner_env = get_ve0();
+	get_ve0()->_fib6_table = &global_fib6_table;
+#endif
+	list_add(&global_fib6_table.list, &fib6_table_list);
 	fib6_init();
 #ifdef 	CONFIG_PROC_FS
-	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
-	if (p)
+	p = create_proc_glob_entry("net/ipv6_route", 0, NULL);
+	if (p) {
 		p->owner = THIS_MODULE;
+		p->get_info = rt6_proc_info;
+	}
 
 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
 #endif
 #ifdef CONFIG_XFRM
 	xfrm6_init();
 #endif
+	ip6_null_entry.u.dst.dev = &loopback_dev;
 }
 
 void ip6_route_cleanup(void)
 {
 #ifdef CONFIG_PROC_FS
-	proc_net_remove("ipv6_route");
+	remove_proc_glob_entry("net/ipv6_route", NULL);
 	proc_net_remove("rt6_stats");
 #endif
 #ifdef CONFIG_XFRM
@@ -2136,3 +2156,35 @@ void ip6_route_cleanup(void)
 	fib6_gc_cleanup();
 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
 }
+
+int init_ve_route6(struct ve_struct *ve)
+{
+	struct ve_struct *old_env = set_exec_env(ve);
+	ve->_fib6_table = kzalloc(sizeof(struct fib6_table), GFP_KERNEL_UBC);
+	if (ve->_fib6_table) {
+		ve->_fib6_table->owner_env = ve;
+		ve->_fib6_table->root.leaf = &ip6_null_entry;
+		ve->_fib6_table->root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+		write_lock_bh(&rt6_lock);
+		list_add(&ve->_fib6_table->list, &fib6_table_list);
+		write_unlock_bh(&rt6_lock);
+	}
+	set_exec_env(old_env);
+	return ve->_fib6_table ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(init_ve_route6);
+
+void fini_ve_route6(struct ve_struct *ve)
+{
+	struct ve_struct *old_env = set_exec_env(ve);
+
+	if (ve->_fib6_table) {
+		rt6_ifdown(NULL);
+		write_lock_bh(&rt6_lock);
+		list_del(&ve->_fib6_table->list);
+		write_unlock_bh(&rt6_lock);
+		kfree(ve->_fib6_table);
+	}
+	set_exec_env(old_env);
+}
+EXPORT_SYMBOL(fini_ve_route6);
diff -uprN linux-2.6.16/net/ipv6/tcp_ipv6.c linux-2.6.16.ovz/net/ipv6/tcp_ipv6.c
--- linux-2.6.16/net/ipv6/tcp_ipv6.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/tcp_ipv6.c	2006-07-05 08:34:56.000000000 -0400
@@ -62,6 +62,8 @@
 #include <net/dsfield.h>
 #include <net/timewait_sock.h>
 
+#include <ub/ub_tcp.h>
+
 #include <asm/uaccess.h>
 
 #include <linux/proc_fs.h>
@@ -77,7 +79,7 @@ static void	tcp_v6_send_check(struct soc
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
 
-static struct inet_connection_sock_af_ops ipv6_mapped;
+struct inet_connection_sock_af_ops ipv6_mapped;
 static struct inet_connection_sock_af_ops ipv6_specific;
 
 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
@@ -273,6 +275,8 @@ static int tcp_v6_connect(struct sock *s
 	ip6_dst_store(sk, dst, NULL);
 	sk->sk_route_caps = dst->dev->features &
 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	if (!sysctl_tcp_use_sg)
+		sk->sk_route_caps &= ~NETIF_F_SG;
 
 	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt)
@@ -933,6 +937,8 @@ static struct sock * tcp_v6_syn_recv_soc
 	ip6_dst_store(newsk, dst, NULL);
 	newsk->sk_route_caps = dst->dev->features &
 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	if (!sysctl_tcp_use_sg)
+		newsk->sk_route_caps &= ~NETIF_F_SG;
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
 	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1040,6 +1046,8 @@ static int tcp_v6_do_rcv(struct sock *sk
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp;
 	struct sk_buff *opt_skb = NULL;
+	struct user_beancounter *ub;
+
 
 	/* Imagine: socket is IPv6. IPv4 packet arrives,
 	   goes to IPv4 receive handler and backlogged.
@@ -1052,6 +1060,8 @@ static int tcp_v6_do_rcv(struct sock *sk
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_do_rcv(sk, skb);
 
+	ub = set_exec_ub(sock_bc(sk)->ub);
+
 	if (sk_filter(sk, skb, 0))
 		goto discard;
 
@@ -1083,7 +1093,7 @@ static int tcp_v6_do_rcv(struct sock *sk
 		TCP_CHECK_TIMER(sk);
 		if (opt_skb)
 			goto ipv6_pktoptions;
-		return 0;
+		goto restore_context;
 	}
 
 	if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
@@ -1104,7 +1114,7 @@ static int tcp_v6_do_rcv(struct sock *sk
 				goto reset;
 			if (opt_skb)
 				__kfree_skb(opt_skb);
-			return 0;
+			goto restore_context;
 		}
 	}
 
@@ -1114,6 +1124,9 @@ static int tcp_v6_do_rcv(struct sock *sk
 	TCP_CHECK_TIMER(sk);
 	if (opt_skb)
 		goto ipv6_pktoptions;
+
+restore_context:
+	(void)set_exec_ub(ub);
 	return 0;
 
 reset:
@@ -1122,7 +1135,7 @@ discard:
 	if (opt_skb)
 		__kfree_skb(opt_skb);
 	kfree_skb(skb);
-	return 0;
+	goto restore_context;
 csum_err:
 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
 	goto discard;
@@ -1154,7 +1167,7 @@ ipv6_pktoptions:
 
 	if (opt_skb)
 		kfree_skb(opt_skb);
-	return 0;
+	goto restore_context;
 }
 
 static int tcp_v6_rcv(struct sk_buff **pskb)
@@ -1315,7 +1328,7 @@ static struct inet_connection_sock_af_op
  *	TCP over IPv4 via INET6 API
  */
 
-static struct inet_connection_sock_af_ops ipv6_mapped = {
+struct inet_connection_sock_af_ops ipv6_mapped = {
 	.queue_xmit	=	ip_queue_xmit,
 	.send_check	=	tcp_v4_send_check,
 	.rebuild_header	=	inet_sk_rebuild_header,
@@ -1329,6 +1342,7 @@ static struct inet_connection_sock_af_op
 	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
 };
+EXPORT_SYMBOL_GPL(ipv6_mapped);
 
 
 
@@ -1535,7 +1549,7 @@ out:
 static struct file_operations tcp6_seq_fops;
 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "tcp6",
+	.name		= "net/tcp6",
 	.family		= AF_INET6,
 	.seq_show	= tcp6_seq_show,
 	.seq_fops	= &tcp6_seq_fops,
diff -uprN linux-2.6.16/net/ipv6/udp.c linux-2.6.16.ovz/net/ipv6/udp.c
--- linux-2.6.16/net/ipv6/udp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/udp.c	2006-07-05 08:34:56.000000000 -0400
@@ -69,7 +69,9 @@ static int udp_v6_get_port(struct sock *
 {
 	struct sock *sk2;
 	struct hlist_node *node;
+	struct ve_struct *env;
 
+	env = VE_OWNER_SK(sk);
 	write_lock_bh(&udp_hash_lock);
 	if (snum == 0) {
 		int best_size_so_far, best, result, i;
@@ -83,7 +85,7 @@ static int udp_v6_get_port(struct sock *
 			int size;
 			struct hlist_head *list;
 
-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+			list = &udp_hash[udp_hashfn(result, VEID(env))];
 			if (hlist_empty(list)) {
 				if (result > sysctl_local_port_range[1])
 					result = sysctl_local_port_range[0] +
@@ -105,7 +107,7 @@ static int udp_v6_get_port(struct sock *
 				result = sysctl_local_port_range[0]
 					+ ((result - sysctl_local_port_range[0]) &
 					   (UDP_HTABLE_SIZE - 1));
-			if (!udp_lport_inuse(result))
+			if (!udp_lport_inuse(result, env))
 				break;
 		}
 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
@@ -114,9 +116,10 @@ gotit:
 		udp_port_rover = snum = result;
 	} else {
 		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
+			    &udp_hash[udp_hashfn(snum, VEID(env))]) {
 			if (inet_sk(sk2)->num == snum &&
 			    sk2 != sk &&
+			    ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
 			    (!sk2->sk_bound_dev_if ||
 			     !sk->sk_bound_dev_if ||
 			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
@@ -128,7 +131,7 @@ gotit:
 
 	inet_sk(sk)->num = snum;
 	if (sk_unhashed(sk)) {
-		sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
+		sk_add_node(sk, &udp_hash[udp_hashfn(snum, VEID(env))]);
 		sock_prot_inc_use(sk->sk_prot);
 	}
 	write_unlock_bh(&udp_hash_lock);
@@ -161,12 +164,15 @@ static struct sock *udp_v6_lookup(struct
 	struct hlist_node *node;
 	unsigned short hnum = ntohs(dport);
 	int badness = -1;
+	struct ve_struct *env;
 
  	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+	env = get_exec_env();
+	sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (inet->num == hnum && sk->sk_family == PF_INET6) {
+		if (inet->num == hnum && sk->sk_family == PF_INET6 &&
+		    ve_accessible_strict(VE_OWNER_SK(sk), env)) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 			int score = 0;
 			if (inet->dport) {
@@ -415,7 +421,8 @@ static void udpv6_mcast_deliver(struct u
 	int dif;
 
 	read_lock(&udp_hash_lock);
-	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
+					  VEID(VE_OWNER_SKB(skb)))]);
 	dif = skb->dev->ifindex;
 	sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (!sk) {
@@ -1018,7 +1025,7 @@ static int udp6_seq_show(struct seq_file
 static struct file_operations udp6_seq_fops;
 static struct udp_seq_afinfo udp6_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "udp6",
+	.name		= "net/udp6",
 	.family		= AF_INET6,
 	.seq_show	= udp6_seq_show,
 	.seq_fops	= &udp6_seq_fops,
diff -uprN linux-2.6.16/net/ipv6/xfrm6_policy.c linux-2.6.16.ovz/net/ipv6/xfrm6_policy.c
--- linux-2.6.16/net/ipv6/xfrm6_policy.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/ipv6/xfrm6_policy.c	2006-07-05 08:34:56.000000000 -0400
@@ -191,16 +191,18 @@ error:
 static inline void
 _decode_session6(struct sk_buff *skb, struct flowi *fl)
 {
-	u16 offset = sizeof(struct ipv6hdr);
+	u16 offset = skb->h.raw - skb->nh.raw;
 	struct ipv6hdr *hdr = skb->nh.ipv6h;
-	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
-	u8 nexthdr = skb->nh.ipv6h->nexthdr;
+	struct ipv6_opt_hdr *exthdr;
+	u8 nexthdr = skb->nh.raw[IP6CB(skb)->nhoff];
 
 	memset(fl, 0, sizeof(struct flowi));
 	ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
 	ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
 
 	while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
+		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+
 		switch (nexthdr) {
 		case NEXTHDR_ROUTING:
 		case NEXTHDR_HOP:
diff -uprN linux-2.6.16/net/netfilter/core.c linux-2.6.16.ovz/net/netfilter/core.c
--- linux-2.6.16/net/netfilter/core.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/core.c	2006-07-05 08:34:56.000000000 -0400
@@ -32,16 +32,24 @@
  * of skbuffs queued for userspace, and not deregister a hook unless
  * this is zero, but that sucks.  Now, we simply check when the
  * packets come back: if the hook is gone, the packet is discarded. */
+static DEFINE_SPINLOCK(nf_hook_lock);
+
 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
 EXPORT_SYMBOL(nf_hooks);
-static DEFINE_SPINLOCK(nf_hook_lock);
+#ifdef CONFIG_VE_IPTABLES
+#define ve_nf_hooks \
+       ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
+#else
+#define ve_nf_hooks nf_hooks
+#endif
+
 
 int nf_register_hook(struct nf_hook_ops *reg)
 {
 	struct list_head *i;
 
 	spin_lock_bh(&nf_hook_lock);
-	list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
+	list_for_each(i, &ve_nf_hooks[reg->pf][reg->hooknum]) {
 		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
 			break;
 	}
@@ -53,6 +61,33 @@ int nf_register_hook(struct nf_hook_ops 
 }
 EXPORT_SYMBOL(nf_register_hook);
 
+int virt_nf_register_hook(struct nf_hook_ops *reg)
+{
+       int ret = 0;
+
+       if (!ve_is_super(get_exec_env())) {
+               struct nf_hook_ops *tmp;
+               ret = -ENOMEM;
+               tmp = kmalloc(sizeof(struct nf_hook_ops), GFP_KERNEL);
+               if (!tmp)
+                       goto nomem;
+               memcpy(tmp, reg, sizeof(struct nf_hook_ops));
+               reg =  tmp;
+       }
+
+       ret = nf_register_hook(reg);
+       if (ret)
+               goto out;
+
+       return 0;
+out:
+       if (!ve_is_super(get_exec_env()))
+               kfree(reg);
+nomem:
+       return ret;
+}
+EXPORT_SYMBOL(virt_nf_register_hook);
+
 void nf_unregister_hook(struct nf_hook_ops *reg)
 {
 	spin_lock_bh(&nf_hook_lock);
@@ -63,6 +98,29 @@ void nf_unregister_hook(struct nf_hook_o
 }
 EXPORT_SYMBOL(nf_unregister_hook);
 
+int virt_nf_unregister_hook(struct nf_hook_ops *reg)
+{
+       struct nf_hook_ops *i;
+
+       spin_lock_bh(&nf_hook_lock);
+       list_for_each_entry(i, &ve_nf_hooks[reg->pf][reg->hooknum], list) {
+               if (reg->hook == i->hook) {
+                       reg = i;
+                       break;
+               }
+       }
+       spin_unlock_bh(&nf_hook_lock);
+       if (reg != i)
+               return -ENOENT;
+
+       nf_unregister_hook(reg);
+
+       if (!ve_is_super(get_exec_env()))
+               kfree(reg);
+       return 0;
+}
+EXPORT_SYMBOL(virt_nf_unregister_hook);
+
 unsigned int nf_iterate(struct list_head *head,
 			struct sk_buff **skb,
 			int hook,
@@ -120,9 +178,9 @@ int nf_hook_slow(int pf, unsigned int ho
 	/* We may already have this, but read-locks nest anyway */
 	rcu_read_lock();
 
-	elem = &nf_hooks[pf][hook];
+	elem = &ve_nf_hooks[pf][hook];
 next_hook:
-	verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
+	verdict = nf_iterate(&ve_nf_hooks[pf][hook], pskb, hook, indev,
 			     outdev, &elem, okfn, hook_thresh);
 	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
 		ret = 1;
@@ -195,13 +253,54 @@ struct proc_dir_entry *proc_net_netfilte
 EXPORT_SYMBOL(proc_net_netfilter);
 #endif
 
-void __init netfilter_init(void)
+void init_nf_hooks(struct list_head (*nh)[NF_MAX_HOOKS])
 {
 	int i, h;
 	for (i = 0; i < NPROTO; i++) {
 		for (h = 0; h < NF_MAX_HOOKS; h++)
-			INIT_LIST_HEAD(&nf_hooks[i][h]);
+			INIT_LIST_HEAD(&ve_nf_hooks[i][h]);
 	}
+}
+
+int init_netfilter(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+       struct ve_struct *envid;
+
+       envid = get_exec_env();
+       envid->_nf_hooks = kmalloc(sizeof(nf_hooks), GFP_KERNEL);
+       if (envid->_nf_hooks == NULL)
+               return -ENOMEM;
+
+       /* FIXME: charge ubc */
+
+       init_nf_hooks(envid->_nf_hooks);
+       return 0;
+#else
+       init_nf_hooks(nf_hooks);
+       return 0;
+#endif
+}
+EXPORT_SYMBOL(init_netfilter);
+
+#ifdef CONFIG_VE_IPTABLES
+void fini_netfilter(void)
+{
+       struct ve_struct *envid;
+
+       envid = get_exec_env();
+       if (envid->_nf_hooks != NULL)
+               kfree(envid->_nf_hooks);
+       envid->_nf_hooks = NULL;
+
+       /* FIXME: uncharge ubc */
+}
+EXPORT_SYMBOL(fini_netfilter);
+#endif
+
+void __init netfilter_init(void)
+{
+       init_netfilter();
 
 #ifdef CONFIG_PROC_FS
 	proc_net_netfilter = proc_mkdir("netfilter", proc_net);
@@ -214,3 +313,4 @@ void __init netfilter_init(void)
 	if (netfilter_log_init() < 0)
 		panic("cannot initialize nf_log");
 }
+
diff -uprN linux-2.6.16/net/netfilter/nf_conntrack_netlink.c linux-2.6.16.ovz/net/netfilter/nf_conntrack_netlink.c
--- linux-2.6.16/net/netfilter/nf_conntrack_netlink.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/nf_conntrack_netlink.c	2006-07-05 08:34:56.000000000 -0400
@@ -1641,7 +1641,7 @@ static void __exit ctnetlink_exit(void)
 	printk("ctnetlink: unregistering from nfnetlink.\n");
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-	nf_conntrack_unregister_notifier(&ctnl_notifier_exp);
+	nf_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
 	nf_conntrack_unregister_notifier(&ctnl_notifier);
 #endif
 
diff -uprN linux-2.6.16/net/netfilter/nf_conntrack_proto_sctp.c linux-2.6.16.ovz/net/netfilter/nf_conntrack_proto_sctp.c
--- linux-2.6.16/net/netfilter/nf_conntrack_proto_sctp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/nf_conntrack_proto_sctp.c	2006-07-05 08:34:56.000000000 -0400
@@ -240,12 +240,15 @@ static int do_basic_checks(struct nf_con
 			flag = 1;
 		}
 
-		/* Cookie Ack/Echo chunks not the first OR 
-		   Init / Init Ack / Shutdown compl chunks not the only chunks */
-		if ((sch->type == SCTP_CID_COOKIE_ACK 
+		/*
+		 * Cookie Ack/Echo chunks not the first OR
+		 * Init / Init Ack / Shutdown compl chunks not the only chunks
+		 * OR zero-length.
+		 */
+		if (((sch->type == SCTP_CID_COOKIE_ACK
 			|| sch->type == SCTP_CID_COOKIE_ECHO
 			|| flag)
-		     && count !=0 ) {
+		      && count !=0) || !sch->length) {
 			DEBUGP("Basic checks failed\n");
 			return 1;
 		}
@@ -256,7 +259,7 @@ static int do_basic_checks(struct nf_con
 	}
 
 	DEBUGP("Basic checks passed\n");
-	return 0;
+	return count == 0;
 }
 
 static int new_state(enum ip_conntrack_dir dir,
diff -uprN linux-2.6.16/net/netfilter/nf_queue.c linux-2.6.16.ovz/net/netfilter/nf_queue.c
--- linux-2.6.16/net/netfilter/nf_queue.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/nf_queue.c	2006-07-05 08:34:56.000000000 -0400
@@ -209,12 +209,12 @@ void nf_reinject(struct sk_buff *skb, st
 	/* Drop reference to owner of hook which queued us. */
 	module_put(info->elem->owner);
 
-	list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
+	list_for_each_rcu(i, &ve_nf_hooks[info->pf][info->hook]) {
 		if (i == elem) 
   			break;
   	}
   
-	if (i == &nf_hooks[info->pf][info->hook]) {
+	if (i == &ve_nf_hooks[info->pf][info->hook]) {
 		/* The module which sent it to userspace is gone. */
 		NFDEBUG("%s: module disappeared, dropping packet.\n",
 			__FUNCTION__);
@@ -235,7 +235,7 @@ void nf_reinject(struct sk_buff *skb, st
 
 	if (verdict == NF_ACCEPT) {
 	next_hook:
-		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
+		verdict = nf_iterate(&ve_nf_hooks[info->pf][info->hook],
 				     &skb, info->hook, 
 				     info->indev, info->outdev, &elem,
 				     info->okfn, INT_MIN);
diff -uprN linux-2.6.16/net/netfilter/nf_sockopt.c linux-2.6.16.ovz/net/netfilter/nf_sockopt.c
--- linux-2.6.16/net/netfilter/nf_sockopt.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/nf_sockopt.c	2006-07-05 08:34:56.000000000 -0400
@@ -80,6 +80,12 @@ static int nf_sockopt(struct sock *sk, i
 	struct nf_sockopt_ops *ops;
 	int ret;
 
+#ifdef CONFIG_VE_IPTABLES
+       if (!get_exec_env()->_nf_hooks ||
+           !get_exec_env()->_ipt_standard_target)
+               return -ENOPROTOOPT;
+#endif
+
 	if (down_interruptible(&nf_sockopt_mutex) != 0)
 		return -EINTR;
 
diff -uprN linux-2.6.16/net/netfilter/x_tables.c linux-2.6.16.ovz/net/netfilter/x_tables.c
--- linux-2.6.16/net/netfilter/x_tables.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/x_tables.c	2006-07-05 08:34:56.000000000 -0400
@@ -24,6 +24,10 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp.h>
+#include <linux/nfcalls.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
@@ -38,7 +42,13 @@ struct xt_af {
 	struct list_head tables;
 };
 
+#ifdef CONFIG_VE_IPTABLES
+/* include ve.h and define get_exec_env */
+#include <linux/sched.h>
+#define xt	(get_exec_env()->_xt)
+#else
 static struct xt_af *xt;
+#endif
 
 #ifdef DEBUG_IP_FIREWALL_USER
 #define duprintf(format, args...) printk(format , ## args)
@@ -52,17 +62,52 @@ enum {
 	MATCH,
 };
 
+#ifdef CONFIG_USER_RESOURCE
+#define UB_NUMXTENT 23
+static int charge_xtables(struct user_beancounter *ub, unsigned long size)
+{
+	if (ub == NULL)
+		return 0;
+	return charge_beancounter(ub, UB_NUMXTENT, size, 1);
+}
+static void uncharge_xtables(struct user_beancounter *ub, unsigned long size)
+{
+	if (ub == NULL)
+		return;
+	uncharge_beancounter(ub, UB_NUMXTENT, size);
+}
+#endif	/* CONFIG_USER_RESOURCE */
+
 /* Registration hooks for targets. */
 int
 xt_register_target(int af, struct xt_target *target)
 {
 	int ret;
+	struct module *mod = target->me;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct xt_target *tmp;
+		__module_get(mod);
+		ret = -ENOMEM;
+		tmp = ub_kmalloc(sizeof(struct xt_target), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, target, sizeof(struct xt_target));
+		target = tmp;
+	}
 
 	ret = down_interruptible(&xt[af].mutex);
 	if (ret != 0)
-		return ret;
+		goto out;
 	list_add(&target->list, &xt[af].target);
 	up(&xt[af].mutex);
+	return 0;
+out:
+	if (!ve_is_super(get_exec_env())) {
+		kfree(target);
+nomem:
+		module_put(mod);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(xt_register_target);
@@ -71,8 +116,21 @@ void
 xt_unregister_target(int af, struct xt_target *target)
 {
 	down(&xt[af].mutex);
+	if (!ve_is_super(get_exec_env())) {
+		target = list_named_find(&xt[af].target, target->name);
+		if (!target) {
+			up(&xt[af].mutex);
+			return;
+		}
+	}
+
 	LIST_DELETE(&xt[af].target, target);
 	up(&xt[af].mutex);
+
+	if (!ve_is_super(get_exec_env())) {
+		module_put(target->me);
+		kfree(target);
+	}
 }
 EXPORT_SYMBOL(xt_unregister_target);
 
@@ -80,14 +138,33 @@ int
 xt_register_match(int af, struct xt_match *match)
 {
 	int ret;
+	struct module *mod = match->me;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct xt_match *tmp;
+		__module_get(mod);
+		ret = -ENOMEM;
+		tmp = ub_kmalloc(sizeof(struct xt_match), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, match, sizeof(struct xt_match));
+		match = tmp;
+	}
 
 	ret = down_interruptible(&xt[af].mutex);
 	if (ret != 0)
-		return ret;
+		goto out;
 
 	list_add(&match->list, &xt[af].match);
 	up(&xt[af].mutex);
 
+	return 0;
+out:
+	if (!ve_is_super(get_exec_env())) {
+		kfree(match);
+nomem:
+		module_put(mod);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(xt_register_match);
@@ -96,8 +173,21 @@ void
 xt_unregister_match(int af, struct xt_match *match)
 {
 	down(&xt[af].mutex);
+	if (!ve_is_super(get_exec_env())) {
+		match = list_named_find(&xt[af].match, match->name);
+		if (!match) {
+			up(&xt[af].mutex);
+			return;
+		}
+	}
+
 	LIST_DELETE(&xt[af].match, match);
 	up(&xt[af].mutex);
+
+	if (!ve_is_super(get_exec_env())) {
+		module_put(match->me);
+		kfree(match);
+	}
 }
 EXPORT_SYMBOL(xt_unregister_match);
 
@@ -246,7 +336,7 @@ struct xt_table_info *xt_alloc_table_inf
 	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
 		return NULL;
 
-	newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL);
+	newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL_UBC);
 	if (!newinfo)
 		return NULL;
 
@@ -255,10 +345,10 @@ struct xt_table_info *xt_alloc_table_inf
 	for_each_cpu(cpu) {
 		if (size <= PAGE_SIZE)
 			newinfo->entries[cpu] = kmalloc_node(size,
-							GFP_KERNEL,
+							GFP_KERNEL_UBC,
 							cpu_to_node(cpu));
 		else
-			newinfo->entries[cpu] = vmalloc_node(size,
+			newinfo->entries[cpu] = ub_vmalloc_node(size,
 							cpu_to_node(cpu));
 
 		if (newinfo->entries[cpu] == NULL) {
@@ -315,6 +405,9 @@ xt_replace_table(struct xt_table *table,
 	      int *error)
 {
 	struct xt_table_info *oldinfo, *private;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *old_ub, *new_ub;
+#endif
 
 	/* Do the substitution. */
 	write_lock_bh(&table->lock);
@@ -328,6 +421,21 @@ xt_replace_table(struct xt_table *table,
 		return NULL;
 	}
 	oldinfo = private;
+
+#ifdef CONFIG_USER_RESOURCE
+	new_ub = mem_ub(newinfo);
+	if (charge_xtables(new_ub, newinfo->number)) {
+		oldinfo = NULL;
+		write_unlock_bh(&table->lock);
+		*error = -ENOMEM;
+		return NULL;
+ 	}
+	if (num_counters) {
+		old_ub = mem_ub(oldinfo);
+		uncharge_xtables(old_ub, oldinfo->number);
+	}
+#endif
+
 	table->private = newinfo;
 	newinfo->initial_entries = oldinfo->initial_entries;
 	write_unlock_bh(&table->lock);
@@ -355,6 +463,7 @@ int xt_register_table(struct xt_table *t
 
 	/* Simplifies replace_table code. */
 	table->private = bootstrap;
+	rwlock_init(&table->lock);
 	if (!xt_replace_table(table, 0, newinfo, &ret))
 		goto unlock;
 
@@ -364,7 +473,6 @@ int xt_register_table(struct xt_table *t
 	/* save number of initial entries */
 	private->initial_entries = private->number;
 
-	rwlock_init(&table->lock);
 	list_prepend(&xt[table->af].tables, table);
 
 	ret = 0;
@@ -374,6 +482,39 @@ int xt_register_table(struct xt_table *t
 }
 EXPORT_SYMBOL_GPL(xt_register_table);
 
+struct xt_table * virt_xt_register_table(struct xt_table *table,
+		      struct xt_table_info *bootstrap,
+		      struct xt_table_info *newinfo)
+{
+	int ret;
+	struct module *mod = table->me;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct xt_table *tmp;
+		__module_get(mod);
+		ret = -ENOMEM;
+		tmp = ub_kmalloc(sizeof(struct xt_table), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, table, sizeof(struct xt_table));
+		table = tmp;
+	}
+
+	ret = xt_register_table(table, bootstrap, newinfo);
+	if (ret)
+		goto out;
+
+	return table;
+out:
+	if (!ve_is_super(get_exec_env())) {
+		kfree(table);
+nomem:
+		module_put(mod);
+	}
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(virt_xt_register_table);
+
 void *xt_unregister_table(struct xt_table *table)
 {
 	struct xt_table_info *private;
@@ -383,10 +524,27 @@ void *xt_unregister_table(struct xt_tabl
 	LIST_DELETE(&xt[table->af].tables, table);
 	up(&xt[table->af].mutex);
 
+#ifdef CONFIG_USER_RESOURCE
+	uncharge_xtables(mem_ub(private), private->number);
+#endif
+
 	return private;
 }
 EXPORT_SYMBOL_GPL(xt_unregister_table);
 
+void *virt_xt_unregister_table(struct xt_table *table)
+{
+	void *ret;
+
+	ret = xt_unregister_table(table);
+	if (!ve_is_super(get_exec_env())) {
+		module_put(table->me);
+		kfree(table);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(virt_xt_unregister_table);
+
 #ifdef CONFIG_PROC_FS
 static char *xt_proto_prefix[NPROTO] = {
 	[AF_INET]	= "ip",
@@ -597,10 +755,13 @@ void xt_proto_fini(int af)
 EXPORT_SYMBOL_GPL(xt_proto_fini);
 
 
-static int __init xt_init(void)
+int init_xtables(void)
 {
 	int i;
 
+	if (xt)
+		return -EEXIST;
+
 	xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL);
 	if (!xt)
 		return -ENOMEM;
@@ -614,11 +775,34 @@ static int __init xt_init(void)
 	return 0;
 }
 
-static void __exit xt_fini(void)
+void fini_xtables(void)
 {
 	kfree(xt);
+	xt = NULL;
+}
+
+static int __init xt_init(void)
+{
+	int err;
+
+	err = init_xtables();
+	if (err)
+		return err;
+
+	KSYMRESOLVE(init_xtables);
+	KSYMRESOLVE(fini_xtables);
+	KSYMMODRESOLVE(x_tables);
+	return 0;
+}
+
+static void __exit xt_fini(void)
+{
+	KSYMMODUNRESOLVE(x_tables);
+	KSYMUNRESOLVE(init_xtables);
+	KSYMUNRESOLVE(fini_xtables);
+	fini_xtables();
 }
 
-module_init(xt_init);
+subsys_initcall(xt_init);
 module_exit(xt_fini);
 
diff -uprN linux-2.6.16/net/netfilter/xt_conntrack.c linux-2.6.16.ovz/net/netfilter/xt_conntrack.c
--- linux-2.6.16/net/netfilter/xt_conntrack.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/xt_conntrack.c	2006-07-05 08:34:56.000000000 -0400
@@ -20,6 +20,8 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_conntrack.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
@@ -213,25 +215,145 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct xt_conntrack_info *pinfo;
+	struct compat_xt_conntrack_info info;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_conntrack_info *)pm->data;
+	memset(&info, 0, sizeof(struct compat_xt_conntrack_info));
+	info.statemask = pinfo->statemask;
+	info.statusmask = pinfo->statusmask;
+	memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
+			sizeof(struct ip_conntrack_tuple));
+	memcpy(info.sipmsk, pinfo->sipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	memcpy(info.dipmsk, pinfo->dipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	info.expires_min = pinfo->expires_min;
+	info.expires_max = pinfo->expires_max;
+	info.flags = pinfo->flags;
+	info.invflags = pinfo->invflags;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&info, sizeof(struct compat_xt_conntrack_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_xt_conntrack_info *pinfo;
+	struct xt_conntrack_info info;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_xt_conntrack_info *)pm->data;
+	memset(&info, 0, sizeof(struct xt_conntrack_info));
+	info.statemask = pinfo->statemask;
+	info.statusmask = pinfo->statusmask;
+	memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
+			sizeof(struct ip_conntrack_tuple));
+	memcpy(info.sipmsk, pinfo->sipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	memcpy(info.dipmsk, pinfo->dipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	info.expires_min = pinfo->expires_min;
+	info.expires_max = pinfo->expires_max;
+	info.flags = pinfo->flags;
+	info.invflags = pinfo->invflags;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&info, sizeof(struct xt_conntrack_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat(void *match, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_conntrack_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_conntrack_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(match, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(match, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct xt_match conntrack_match = {
 	.name		= "conntrack",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_xt_conntrack_match(void)
+{
+	return xt_register_match(AF_INET, &conntrack_match);
+}
+
+void fini_xt_conntrack_match(void)
+{
+	xt_unregister_match(AF_INET, &conntrack_match);
+}
+
 static int __init init(void)
 {
 	int ret;
 	need_conntrack();
-	ret = xt_register_match(AF_INET, &conntrack_match);
-
+	ret = init_xt_conntrack_match();
+	if (ret < 0)
+		return ret;
+
+	KSYMRESOLVE(init_xt_conntrack_match);
+	KSYMRESOLVE(fini_xt_conntrack_match);
+	KSYMMODRESOLVE(xt_conntrack);
 	return ret;
 }
 
 static void __exit fini(void)
 {
-	xt_unregister_match(AF_INET, &conntrack_match);
+	KSYMMODUNRESOLVE(xt_conntrack);
+	KSYMUNRESOLVE(init_xt_conntrack_match);
+	KSYMUNRESOLVE(fini_xt_conntrack_match);
+	fini_xt_conntrack_match();
 }
 
 module_init(init);
diff -uprN linux-2.6.16/net/netfilter/xt_helper.c linux-2.6.16.ovz/net/netfilter/xt_helper.c
--- linux-2.6.16/net/netfilter/xt_helper.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/xt_helper.c	2006-07-05 08:34:56.000000000 -0400
@@ -24,6 +24,8 @@
 #endif
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_helper.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
@@ -148,23 +150,107 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct xt_helper_info *pinfo;
+	struct compat_xt_helper_info info;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_helper_info *)pm->data;
+	memset(&info, 0, sizeof(struct compat_xt_helper_info));
+	info.invert = pinfo->invert;
+	memcpy(info.name, pinfo->name, 30);
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&info, sizeof(struct compat_xt_helper_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_xt_helper_info *pinfo;
+	struct xt_helper_info info;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_xt_helper_info *)pm->data;
+	memset(&info, 0, sizeof(struct xt_helper_info));
+	info.invert = pinfo->invert;
+	memcpy(info.name, pinfo->name, 30);
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&info, sizeof(struct xt_helper_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat(void *match, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_helper_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_helper_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(match, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(match, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct xt_match helper_match = {
 	.name		= "helper",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 static struct xt_match helper6_match = {
 	.name		= "helper",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_xt_helper(void)
 {
 	int ret;
-	need_conntrack();
 
 	ret = xt_register_match(AF_INET, &helper_match);
 	if (ret < 0)
@@ -177,12 +263,35 @@ static int __init init(void)
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_xt_helper(void)
 {
 	xt_unregister_match(AF_INET, &helper_match);
 	xt_unregister_match(AF_INET6, &helper6_match);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	need_conntrack();
+	err = init_xt_helper();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_xt_helper);
+	KSYMRESOLVE(fini_xt_helper);
+	KSYMMODRESOLVE(xt_helper);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(xt_helper);
+	KSYMUNRESOLVE(init_xt_helper);
+	KSYMUNRESOLVE(fini_xt_helper);
+	fini_xt_helper();
+}
+
 module_init(init);
 module_exit(fini);
 
diff -uprN linux-2.6.16/net/netfilter/xt_length.c linux-2.6.16.ovz/net/netfilter/xt_length.c
--- linux-2.6.16/net/netfilter/xt_length.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/xt_length.c	2006-07-05 08:34:56.000000000 -0400
@@ -13,6 +13,7 @@
 
 #include <linux/netfilter/xt_length.h>
 #include <linux/netfilter/x_tables.h>
+#include <linux/nfcalls.h>
 
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
 MODULE_DESCRIPTION("IP tables packet length matching module");
@@ -63,20 +64,38 @@ checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = XT_ALIGN(sizeof(struct xt_length_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct xt_length_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct xt_match length_match = {
 	.name		= "length",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 static struct xt_match length6_match = {
 	.name		= "length",
 	.match		= &match6,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_xt_length(void)
 {
 	int ret;
 	ret = xt_register_match(AF_INET, &length_match);
@@ -89,11 +108,33 @@ static int __init init(void)
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_xt_length(void)
 {
 	xt_unregister_match(AF_INET, &length_match);
 	xt_unregister_match(AF_INET6, &length6_match);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_xt_length();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_xt_length);
+	KSYMRESOLVE(fini_xt_length);
+	KSYMMODRESOLVE(xt_length);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(xt_length);
+	KSYMUNRESOLVE(init_xt_length);
+	KSYMUNRESOLVE(fini_xt_length);
+	fini_xt_length();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/netfilter/xt_limit.c linux-2.6.16.ovz/net/netfilter/xt_limit.c
--- linux-2.6.16/net/netfilter/xt_limit.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/xt_limit.c	2006-07-05 08:34:56.000000000 -0400
@@ -17,9 +17,11 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_limit.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
@@ -27,6 +29,13 @@ MODULE_DESCRIPTION("iptables rate limit 
 MODULE_ALIAS("ipt_limit");
 MODULE_ALIAS("ip6t_limit");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ipt_limit_reg	(*(get_exec_env()->_ipt_limit_reg))
+#else
+#define ve_ipt_limit_reg	ipt_limit_reg
+#endif
+
 /* The algorithm used is the Simple Token Bucket Filter (TBF)
  * see net/sched/sch_tbf.c in the linux source tree
  */
@@ -137,20 +146,108 @@ ipt_limit_checkentry(const char *tablena
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int ipt_limit_compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct xt_rateinfo *pinfo;
+	struct compat_xt_rateinfo rinfo;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_rateinfo *)pm->data;
+	memset(&rinfo, 0, sizeof(struct compat_xt_rateinfo));
+	rinfo.avg = pinfo->avg;
+	rinfo.burst = pinfo->burst;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&rinfo, sizeof(struct compat_xt_rateinfo)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int ipt_limit_compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_xt_rateinfo *pinfo;
+	struct xt_rateinfo rinfo;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_xt_rateinfo *)pm->data;
+	memset(&rinfo, 0, sizeof(struct xt_rateinfo));
+	rinfo.avg = pinfo->avg;
+	rinfo.burst = pinfo->burst;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&rinfo, sizeof(struct xt_rateinfo));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int ipt_limit_compat(void *match, void **dstptr,
+		int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_rateinfo)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_rateinfo));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = ipt_limit_compat_to_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = ipt_limit_compat_from_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct xt_match ipt_limit_reg = {
 	.name		= "limit",
 	.match		= ipt_limit_match,
 	.checkentry	= ipt_limit_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_limit_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 static struct xt_match limit6_reg = {
 	.name		= "limit",
 	.match		= ipt_limit_match,
 	.checkentry	= ipt_limit_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_limit_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_xt_limit(void)
 {
 	int ret;
 	
@@ -165,11 +262,33 @@ static int __init init(void)
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_xt_limit(void)
 {
 	xt_unregister_match(AF_INET, &ipt_limit_reg);
 	xt_unregister_match(AF_INET6, &limit6_reg);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_xt_limit();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_xt_limit);
+	KSYMRESOLVE(fini_xt_limit);
+	KSYMMODRESOLVE(xt_limit);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(xt_limit);
+	KSYMUNRESOLVE(init_xt_limit);
+	KSYMUNRESOLVE(fini_xt_limit);
+	fini_xt_limit();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/netfilter/xt_sctp.c linux-2.6.16.ovz/net/netfilter/xt_sctp.c
--- linux-2.6.16/net/netfilter/xt_sctp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/xt_sctp.c	2006-07-05 08:34:56.000000000 -0400
@@ -62,7 +62,7 @@ match_packet(const struct sk_buff *skb,
 
 	do {
 		sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
-		if (sch == NULL) {
+		if (sch == NULL || sch->length == 0) {
 			duprintf("Dropping invalid SCTP packet.\n");
 			*hotdrop = 1;
 			return 0;
diff -uprN linux-2.6.16/net/netfilter/xt_state.c linux-2.6.16.ovz/net/netfilter/xt_state.c
--- linux-2.6.16/net/netfilter/xt_state.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/xt_state.c	2006-07-05 08:34:56.000000000 -0400
@@ -10,9 +10,11 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/nfcalls.h>
 #include <net/netfilter/nf_conntrack_compat.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_state.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
@@ -55,10 +57,90 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct xt_state_info *pinfo;
+	struct compat_xt_state_info info;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_state_info *)pm->data;
+	memset(&info, 0, sizeof(struct compat_xt_state_info));
+	info.statemask = pinfo->statemask;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&info, sizeof(struct compat_xt_state_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_xt_state_info *pinfo;
+	struct xt_state_info info;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_xt_state_info *)pm->data;
+	memset(&info, 0, sizeof(struct xt_state_info));
+	info.statemask = pinfo->statemask;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&info, sizeof(struct xt_state_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat(void *match, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_state_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_state_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(match, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(match, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct xt_match state_match = {
 	.name		= "state",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
@@ -66,15 +148,16 @@ static struct xt_match state6_match = {
 	.name		= "state",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_xt_state(void)
 {
 	int ret;
 
-	need_conntrack();
-
 	ret = xt_register_match(AF_INET, &state_match);
 	if (ret < 0)
 		return ret;
@@ -86,11 +169,34 @@ static int __init init(void)
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_xt_state(void)
 {
 	xt_unregister_match(AF_INET, &state_match);
 	xt_unregister_match(AF_INET6, &state6_match);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	need_conntrack();
+	err = init_xt_state();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_xt_state);
+	KSYMRESOLVE(fini_xt_state);
+	KSYMMODRESOLVE(xt_state);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(xt_state);
+	KSYMUNRESOLVE(init_xt_state);
+	KSYMUNRESOLVE(fini_xt_state);
+	fini_xt_state();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/netfilter/xt_tcpmss.c linux-2.6.16.ovz/net/netfilter/xt_tcpmss.c
--- linux-2.6.16/net/netfilter/xt_tcpmss.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/xt_tcpmss.c	2006-07-05 08:34:56.000000000 -0400
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter/xt_tcpmss.h>
 #include <linux/netfilter/x_tables.h>
@@ -133,10 +134,25 @@ checkentry6(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = XT_ALIGN(sizeof(struct xt_tcpmss_match_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct xt_tcpmss_match_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct xt_match tcpmss_match = {
 	.name		= "tcpmss",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
@@ -144,11 +160,14 @@ static struct xt_match tcpmss6_match = {
 	.name		= "tcpmss",
 	.match		= &match,
 	.checkentry	= &checkentry6,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
 
-static int __init init(void)
+int init_xt_tcpmss(void)
 {
 	int ret;
 	ret = xt_register_match(AF_INET, &tcpmss_match);
@@ -162,11 +181,33 @@ static int __init init(void)
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_xt_tcpmss(void)
 {
 	xt_unregister_match(AF_INET6, &tcpmss6_match);
 	xt_unregister_match(AF_INET, &tcpmss_match);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_xt_tcpmss();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_xt_tcpmss);
+	KSYMRESOLVE(fini_xt_tcpmss);
+	KSYMMODRESOLVE(xt_tcpmss);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(xt_tcpmss);
+	KSYMUNRESOLVE(init_xt_tcpmss);
+	KSYMUNRESOLVE(fini_xt_tcpmss);
+	fini_xt_tcpmss();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/netfilter/xt_tcpudp.c linux-2.6.16.ovz/net/netfilter/xt_tcpudp.c
--- linux-2.6.16/net/netfilter/xt_tcpudp.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netfilter/xt_tcpudp.c	2006-07-05 08:34:56.000000000 -0400
@@ -5,6 +5,7 @@
 #include <net/ipv6.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_tcpudp.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -266,10 +267,35 @@ udp6_checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int tcp_compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = XT_ALIGN(sizeof(struct xt_tcp)) -
+		COMPAT_XT_ALIGN(sizeof(struct xt_tcp));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+
+static int udp_compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = XT_ALIGN(sizeof(struct xt_udp)) -
+		COMPAT_XT_ALIGN(sizeof(struct xt_udp));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct xt_match tcp_matchstruct = {
 	.name		= "tcp",
 	.match		= &tcp_match,
 	.checkentry	= &tcp_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &tcp_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 static struct xt_match tcp6_matchstruct = {
@@ -283,6 +309,9 @@ static struct xt_match udp_matchstruct =
 	.name		= "udp",
 	.match		= &udp_match,
 	.checkentry	= &udp_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &udp_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 static struct xt_match udp6_matchstruct = {
@@ -292,7 +321,7 @@ static struct xt_match udp6_matchstruct 
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_xt_tcpudp(void)
 {
 	int ret;
 	ret = xt_register_match(AF_INET, &tcp_matchstruct);
@@ -322,7 +351,7 @@ out_unreg_tcp:
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_xt_tcpudp(void)
 {
 	xt_unregister_match(AF_INET6, &udp6_matchstruct);
 	xt_unregister_match(AF_INET, &udp_matchstruct);
@@ -330,5 +359,27 @@ static void __exit fini(void)
 	xt_unregister_match(AF_INET, &tcp_matchstruct);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_xt_tcpudp();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_xt_tcpudp);
+	KSYMRESOLVE(fini_xt_tcpudp);
+	KSYMMODRESOLVE(xt_tcpudp);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(xt_tcpudp);
+	KSYMUNRESOLVE(init_xt_tcpudp);
+	KSYMUNRESOLVE(fini_xt_tcpudp);
+	fini_xt_tcpudp();
+}
+
 module_init(init);
 module_exit(fini);
diff -uprN linux-2.6.16/net/netlink/af_netlink.c linux-2.6.16.ovz/net/netlink/af_netlink.c
--- linux-2.6.16/net/netlink/af_netlink.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/netlink/af_netlink.c	2006-07-05 08:34:56.000000000 -0400
@@ -60,27 +60,14 @@
 #include <net/sock.h>
 #include <net/scm.h>
 #include <net/netlink.h>
+#include <net/netlink_sock.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_net.h>
 
 #define Nprintk(a...)
 #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
 
-struct netlink_sock {
-	/* struct sock has to be the first member of netlink_sock */
-	struct sock		sk;
-	u32			pid;
-	u32			dst_pid;
-	u32			dst_group;
-	u32			flags;
-	u32			subscriptions;
-	u32			ngroups;
-	unsigned long		*groups;
-	unsigned long		state;
-	wait_queue_head_t	wait;
-	struct netlink_callback	*cb;
-	spinlock_t		cb_lock;
-	void			(*data_ready)(struct sock *sk, int bytes);
-	struct module		*module;
-};
 
 #define NETLINK_KERNEL_SOCKET	0x1
 #define NETLINK_RECV_PKTINFO	0x2
@@ -209,7 +196,10 @@ static __inline__ struct sock *netlink_l
 	read_lock(&nl_table_lock);
 	head = nl_pid_hashfn(hash, pid);
 	sk_for_each(sk, node, head) {
-		if (nlk_sk(sk)->pid == pid) {
+		/* VEs should find sockets, created by kernel */
+		if ((nlk_sk(sk)->pid == pid) &&
+				(!pid || ve_accessible_strict(VE_OWNER_SK(sk),
+							      get_exec_env()))){
 			sock_hold(sk);
 			goto found;
 		}
@@ -309,7 +299,9 @@ static int netlink_insert(struct sock *s
 	head = nl_pid_hashfn(hash, pid);
 	len = 0;
 	sk_for_each(osk, node, head) {
-		if (nlk_sk(osk)->pid == pid)
+		if ((nlk_sk(sk)->pid == pid) &&
+				ve_accessible_strict(VE_OWNER_SK(sk),
+					get_exec_env()))
 			break;
 		len++;
 	}
@@ -362,6 +354,8 @@ static int __netlink_create(struct socke
 	sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
 	if (!sk)
 		return -ENOMEM;
+	if (ub_other_sock_charge(sk))
+		goto out_free;
 
 	sock_init_data(sock, sk);
 
@@ -372,6 +366,10 @@ static int __netlink_create(struct socke
 	sk->sk_destruct = netlink_sock_destruct;
 	sk->sk_protocol = protocol;
 	return 0;
+
+out_free:
+	sk_free(sk);
+	return -ENOMEM;
 }
 
 static int netlink_create(struct socket *sock, int protocol)
@@ -425,6 +423,7 @@ static int netlink_release(struct socket
 		return 0;
 
 	netlink_remove(sk);
+	sock_orphan(sk);
 	nlk = nlk_sk(sk);
 
 	spin_lock(&nlk->cb_lock);
@@ -439,7 +438,6 @@ static int netlink_release(struct socket
 	/* OK. Socket is unlinked, and, therefore,
 	   no new packets will arrive */
 
-	sock_orphan(sk);
 	sock->sk = NULL;
 	wake_up_interruptible_all(&nlk->wait);
 
@@ -477,7 +475,7 @@ static int netlink_autobind(struct socke
 	struct hlist_head *head;
 	struct sock *osk;
 	struct hlist_node *node;
-	s32 pid = current->tgid;
+	s32 pid = virt_pid(current);
 	int err;
 	static s32 rover = -4097;
 
@@ -486,7 +484,9 @@ retry:
 	netlink_table_grab();
 	head = nl_pid_hashfn(hash, pid);
 	sk_for_each(osk, node, head) {
-		if (nlk_sk(osk)->pid == pid) {
+		if ((nlk_sk(osk)->pid == pid) &&
+				ve_accessible_strict(VE_OWNER_SK(osk),
+					get_exec_env())) {
 			/* Bind collision, search negative pid values. */
 			pid = rover--;
 			if (rover > -4097)
@@ -511,7 +511,7 @@ retry:
 static inline int netlink_capable(struct socket *sock, unsigned int flag) 
 { 
 	return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
-	       capable(CAP_NET_ADMIN);
+	       capable(CAP_VE_NET_ADMIN);
 } 
 
 static void
@@ -845,6 +845,9 @@ static inline int do_one_broadcast(struc
 	    !test_bit(p->group - 1, nlk->groups))
 		goto out;
 
+	if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
+		goto out;
+
 	if (p->failure) {
 		netlink_overrun(sk);
 		goto out;
@@ -942,6 +945,9 @@ static inline int do_one_set_err(struct 
 	    !test_bit(p->group - 1, nlk->groups))
 		goto out;
 
+	if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
+		goto out;
+
 	sk->sk_err = p->code;
 	sk->sk_error_report(sk);
 out:
@@ -1076,12 +1082,17 @@ static int netlink_sendmsg(struct kiocb 
 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
-	struct sockaddr_nl *addr=msg->msg_name;
+	struct sockaddr_nl *addr = msg->msg_name;
 	u32 dst_pid;
-	u32 dst_group;
 	struct sk_buff *skb;
 	int err;
 	struct scm_cookie scm;
+	struct sock *dstsk;
+	long timeo;
+	int no_ubc, no_buf;
+	unsigned long chargesize;
+
+	DECLARE_WAITQUEUE(wait, current);
 
 	if (msg->msg_flags&MSG_OOB)
 		return -EOPNOTSUPP;
@@ -1092,17 +1103,16 @@ static int netlink_sendmsg(struct kiocb 
 	if (err < 0)
 		return err;
 
+	/* Broadcasts from user to kernel are disabled. This is OK
+	 * according to ANK */
 	if (msg->msg_namelen) {
 		if (addr->nl_family != AF_NETLINK)
 			return -EINVAL;
 		dst_pid = addr->nl_pid;
-		dst_group = ffs(addr->nl_groups);
-		if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
+		if (addr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
 			return -EPERM;
-	} else {
+	} else
 		dst_pid = nlk->dst_pid;
-		dst_group = nlk->dst_group;
-	}
 
 	if (!nlk->pid) {
 		err = netlink_autobind(sock);
@@ -1115,12 +1125,12 @@ static int netlink_sendmsg(struct kiocb 
 		goto out;
 	err = -ENOBUFS;
 	skb = alloc_skb(len, GFP_KERNEL);
-	if (skb==NULL)
+	if (skb == NULL)
 		goto out;
 
 	NETLINK_CB(skb).pid	= nlk->pid;
 	NETLINK_CB(skb).dst_pid = dst_pid;
-	NETLINK_CB(skb).dst_group = dst_group;
+	NETLINK_CB(skb).dst_group = 0;
 	NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
 	memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
 
@@ -1131,25 +1141,88 @@ static int netlink_sendmsg(struct kiocb 
 	 */
 
 	err = -EFAULT;
-	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
-		kfree_skb(skb);
-		goto out;
-	}
+	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
+		goto out_free;
 
 	err = security_netlink_send(sk, skb);
-	if (err) {
-		kfree_skb(skb);
-		goto out;
+	if (err)
+		goto out_free;
+
+	timeo = sock_sndtimeo(sk, msg->msg_flags&MSG_DONTWAIT);
+retry:
+	dstsk = netlink_getsockbypid(sk, dst_pid);
+	if (IS_ERR(dstsk)) {
+		err = PTR_ERR(dstsk);
+		goto out_free;
 	}
 
-	if (dst_group) {
-		atomic_inc(&skb->users);
-		netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
+	nlk = nlk_sk(dstsk);
+#ifdef NL_EMULATE_DEV
+	if (nlk->handler) {
+		skb_orphan(skb);
+		err = nlk->handler(protocol, skb);
+		goto out_put;
+	}
+#endif
+
+	/* BTW, it could be done once, before the retry loop */
+	chargesize = skb_charge_fullsize(skb);
+	no_ubc = ub_sock_getwres_other(sk, chargesize);
+	no_buf = atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
+		test_bit(0, &nlk->state);
+	if (no_ubc || no_buf) {
+		wait_queue_head_t *sleep;
+
+		if (!no_ubc)
+			ub_sock_retwres_other(sk, chargesize,
+					      SOCK_MIN_UBCSPACE_CH);
+		err = -EAGAIN;
+		if (timeo == 0) {
+			kfree_skb(skb);
+			goto out_put;
+		}
+
+		/* wake up comes to different queues */
+		sleep = no_ubc ? sk->sk_sleep : &nlk->wait;
+		__set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(sleep, &wait);
+
+		/* this if can't be moved upper because ub_sock_snd_queue_add()
+		 * may change task state to TASK_RUNNING */
+		if (no_ubc)
+			ub_sock_sndqueueadd_other(sk, chargesize);
+
+		if ((atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
+		     test_bit(0, &nlk->state) || no_ubc) &&
+		    !sock_flag(dstsk, SOCK_DEAD))
+			timeo = schedule_timeout(timeo);
+
+		__set_current_state(TASK_RUNNING);
+		remove_wait_queue(sleep, &wait);
+		if (no_ubc)
+			ub_sock_sndqueuedel(sk);
+		sock_put(dstsk);
+
+		if (!signal_pending(current))
+			goto retry;
+		err = sock_intr_errno(timeo);
+		goto out_free;
 	}
-	err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
 
+	skb_orphan(skb);
+	skb_set_owner_r(skb, dstsk);
+	ub_skb_set_charge(skb, sk, chargesize, UB_OTHERSOCKBUF);
+	skb_queue_tail(&dstsk->sk_receive_queue, skb);
+	dstsk->sk_data_ready(dstsk, len);
+	err = len;
+out_put:
+	sock_put(dstsk);
 out:
 	return err;
+
+out_free:
+	kfree_skb(skb);
+	return err;
 }
 
 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
@@ -1303,6 +1376,10 @@ static int netlink_dump(struct sock *sk)
 	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
+	if (ub_nlrcvbuf_charge(skb, sk) < 0) {
+		kfree_skb(skb);
+		return -EACCES;
+	}
 
 	spin_lock(&nlk->cb_lock);
 
@@ -1365,9 +1442,9 @@ int netlink_dump_start(struct sock *ssk,
 		return -ECONNREFUSED;
 	}
 	nlk = nlk_sk(sk);
-	/* A dump is in progress... */
+	/* A dump or destruction is in progress... */
 	spin_lock(&nlk->cb_lock);
-	if (nlk->cb) {
+	if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
 		spin_unlock(&nlk->cb_lock);
 		netlink_destroy_callback(cb);
 		sock_put(sk);
@@ -1471,8 +1548,15 @@ void netlink_run_queue(struct sock *sk, 
 		*qlen = skb_queue_len(&sk->sk_receive_queue);
 
 	for (; *qlen; (*qlen)--) {
+		int ret;
+		struct ve_struct *old_env;
 		skb = skb_dequeue(&sk->sk_receive_queue);
-		if (netlink_rcv_skb(skb, cb)) {
+
+		old_env = set_exec_env(VE_OWNER_SKB(skb));
+		ret = netlink_rcv_skb(skb, cb);
+		(void)set_exec_env(old_env);
+
+		if (ret) {
 			if (skb->len)
 				skb_queue_head(&sk->sk_receive_queue, skb);
 			else {
@@ -1740,6 +1824,7 @@ enomem:
 
 	sock_register(&netlink_family_ops);
 #ifdef CONFIG_PROC_FS
+	/* FIXME: virtualize before give access from VEs */
 	proc_net_fops_create("netlink", 0, &netlink_seq_fops);
 #endif
 	/* The netlink device handler may be needed early. */ 
diff -uprN linux-2.6.16/net/packet/af_packet.c linux-2.6.16.ovz/net/packet/af_packet.c
--- linux-2.6.16/net/packet/af_packet.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/packet/af_packet.c	2006-07-05 08:34:56.000000000 -0400
@@ -79,6 +79,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
+#include <ub/ub_net.h>
+
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
 #endif
@@ -280,7 +282,8 @@ static int packet_rcv_spkt(struct sk_buf
 	 *	so that this procedure is noop.
 	 */
 
-	if (skb->pkt_type == PACKET_LOOPBACK)
+	if (skb->pkt_type == PACKET_LOOPBACK ||
+	    !ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
 		goto out;
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
@@ -472,6 +475,9 @@ static int packet_rcv(struct sk_buff *sk
 	sk = pt->af_packet_priv;
 	po = pkt_sk(sk);
 
+	if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
+		goto drop;
+
 	skb->dev = dev;
 
 	if (dev->hard_header) {
@@ -531,6 +537,9 @@ static int packet_rcv(struct sk_buff *sk
 	if (pskb_trim(skb, snaplen))
 		goto drop_n_acct;
 
+	if (ub_sockrcvbuf_charge(sk, skb))
+		goto drop_n_acct;
+
 	skb_set_owner_r(skb, sk);
 	skb->dev = NULL;
 	dst_release(skb->dst);
@@ -581,6 +590,9 @@ static int tpacket_rcv(struct sk_buff *s
 	sk = pt->af_packet_priv;
 	po = pkt_sk(sk);
 
+	if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
+		goto drop;
+
 	if (dev->hard_header) {
 		if (sk->sk_type != SOCK_DGRAM)
 			skb_push(skb, skb->data - skb->mac.raw);
@@ -630,6 +642,12 @@ static int tpacket_rcv(struct sk_buff *s
 	if (snaplen > skb->len-skb->data_len)
 		snaplen = skb->len-skb->data_len;
 
+	if (copy_skb &&
+	    ub_sockrcvbuf_charge(sk, copy_skb)) {
+		spin_lock(&sk->sk_receive_queue.lock);
+		goto ring_is_full;
+	}
+
 	spin_lock(&sk->sk_receive_queue.lock);
 	h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
 	
@@ -1010,6 +1028,8 @@ static int packet_create(struct socket *
 	sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
 	if (sk == NULL)
 		goto out;
+	if (ub_other_sock_charge(sk))
+		goto out_free;
 
 	sock->ops = &packet_ops;
 #ifdef CONFIG_SOCK_PACKET
@@ -1048,6 +1068,9 @@ static int packet_create(struct socket *
 	sk_add_node(sk, &packet_sklist);
 	write_unlock_bh(&packet_sklist_lock);
 	return(0);
+
+out_free:
+	sk_free(sk);
 out:
 	return err;
 }
@@ -1430,11 +1453,16 @@ static int packet_notifier(struct notifi
 	struct sock *sk;
 	struct hlist_node *node;
 	struct net_device *dev = (struct net_device*)data;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	read_lock(&packet_sklist_lock);
 	sk_for_each(sk, node, &packet_sklist) {
 		struct packet_sock *po = pkt_sk(sk);
 
+		if (!ve_accessible_strict(VE_OWNER_SK(sk), ve))
+			continue;
+
 		switch (msg) {
 		case NETDEV_UNREGISTER:
 #ifdef CONFIG_PACKET_MULTICAST
@@ -1845,6 +1873,8 @@ static inline struct sock *packet_seq_id
 	struct hlist_node *node;
 
 	sk_for_each(s, node, &packet_sklist) {
+		if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
+			continue;
 		if (!off--)
 			return s;
 	}
@@ -1860,9 +1890,13 @@ static void *packet_seq_start(struct seq
 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	++*pos;
-	return  (v == SEQ_START_TOKEN) 
-		? sk_head(&packet_sklist) 
-		: sk_next((struct sock*)v) ;
+	do {
+		v = (v == SEQ_START_TOKEN) 
+			? sk_head(&packet_sklist) 
+			: sk_next((struct sock*)v);
+	} while (v != NULL &&
+		!ve_accessible(VE_OWNER_SK((struct sock*)v), get_exec_env()));	
+	return v;
 }
 
 static void packet_seq_stop(struct seq_file *seq, void *v)
@@ -1918,7 +1952,7 @@ static struct file_operations packet_seq
 
 static void __exit packet_exit(void)
 {
-	proc_net_remove("packet");
+	remove_proc_glob_entry("net/packet", NULL);
 	unregister_netdevice_notifier(&packet_netdev_notifier);
 	sock_unregister(PF_PACKET);
 	proto_unregister(&packet_proto);
@@ -1933,7 +1967,7 @@ static int __init packet_init(void)
 
 	sock_register(&packet_family_ops);
 	register_netdevice_notifier(&packet_netdev_notifier);
-	proc_net_fops_create("packet", 0, &packet_seq_fops);
+	proc_glob_fops_create("net/packet", 0, &packet_seq_fops);
 out:
 	return rc;
 }
diff -uprN linux-2.6.16/net/sched/sch_cbq.c linux-2.6.16.ovz/net/sched/sch_cbq.c
--- linux-2.6.16/net/sched/sch_cbq.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/sched/sch_cbq.c	2006-07-05 08:34:56.000000000 -0400
@@ -932,8 +932,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int 
 
 			if (cl->deficit <= 0) {
 				q->active[prio] = cl;
-				cl = cl->next_alive;
 				cl->deficit += cl->quantum;
+				cl = cl->next_alive;
 			}
 			return skb;
 
@@ -1109,17 +1109,19 @@ static void cbq_normalize_quanta(struct 
 
 	for (h=0; h<16; h++) {
 		for (cl = q->classes[h]; cl; cl = cl->next) {
+			long mtu;
 			/* BUGGGG... Beware! This expression suffer of
 			   arithmetic overflows!
 			 */
 			if (cl->priority == prio) {
-				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
-					q->quanta[prio];
-			}
-			if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
-				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
-				cl->quantum = cl->qdisc->dev->mtu/2 + 1;
+				cl->quantum = (cl->weight * cl->allot) /
+					(q->quanta[prio] / q->nclasses[prio]);
 			}
+			mtu = cl->qdisc->dev->mtu;
+			if (cl->quantum <= mtu/2)
+				cl->quantum = mtu/2 + 1;
+			else if (cl->quantum > 32*mtu) 
+				cl->quantum = 32*mtu;
 		}
 	}
 }
diff -uprN linux-2.6.16/net/sched/sch_generic.c linux-2.6.16.ovz/net/sched/sch_generic.c
--- linux-2.6.16/net/sched/sch_generic.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/sched/sch_generic.c	2006-07-05 08:34:56.000000000 -0400
@@ -97,6 +97,7 @@ int qdisc_restart(struct net_device *dev
 
 	/* Dequeue packet */
 	if ((skb = q->dequeue(q)) != NULL) {
+		struct ve_struct *envid;
 		unsigned nolock = (dev->features & NETIF_F_LLTX);
 		/*
 		 * When the driver has LLTX set it does its own locking
@@ -107,6 +108,7 @@ int qdisc_restart(struct net_device *dev
 		 * of lock congestion it should return -1 and the packet
 		 * will be requeued.
 		 */
+		envid = set_exec_env(VE_OWNER_SKB(skb));
 		if (!nolock) {
 			if (!spin_trylock(&dev->xmit_lock)) {
 			collision:
@@ -121,6 +123,7 @@ int qdisc_restart(struct net_device *dev
 					kfree_skb(skb);
 					if (net_ratelimit())
 						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+					(void)set_exec_env(envid);
 					return -1;
 				}
 				__get_cpu_var(netdev_rx_stat).cpu_collision++;
@@ -146,6 +149,7 @@ int qdisc_restart(struct net_device *dev
 						spin_unlock(&dev->xmit_lock);
 					}
 					spin_lock(&dev->queue_lock);
+					(void)set_exec_env(envid);
 					return -1;
 				}
 				if (ret == NETDEV_TX_LOCKED && nolock) {
@@ -177,6 +181,7 @@ int qdisc_restart(struct net_device *dev
 requeue:
 		q->ops->requeue(skb, q);
 		netif_schedule(dev);
+		(void)set_exec_env(envid);
 		return 1;
 	}
 	BUG_ON((int) q->q.qlen < 0);
@@ -625,3 +630,4 @@ EXPORT_SYMBOL(qdisc_reset);
 EXPORT_SYMBOL(qdisc_restart);
 EXPORT_SYMBOL(qdisc_lock_tree);
 EXPORT_SYMBOL(qdisc_unlock_tree);
+EXPORT_SYMBOL(dev_shutdown);
diff -uprN linux-2.6.16/net/sched/sch_teql.c linux-2.6.16.ovz/net/sched/sch_teql.c
--- linux-2.6.16/net/sched/sch_teql.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/sched/sch_teql.c	2006-07-05 08:34:56.000000000 -0400
@@ -189,6 +189,9 @@ static int teql_qdisc_init(struct Qdisc 
 	struct teql_master *m = (struct teql_master*)sch->ops;
 	struct teql_sched_data *q = qdisc_priv(sch);
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	if (dev->hard_header_len > m->dev->hard_header_len)
 		return -EINVAL;
 
diff -uprN linux-2.6.16/net/sctp/inqueue.c linux-2.6.16.ovz/net/sctp/inqueue.c
--- linux-2.6.16/net/sctp/inqueue.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/sctp/inqueue.c	2006-07-05 08:34:56.000000000 -0400
@@ -149,6 +149,7 @@ struct sctp_chunk *sctp_inq_pop(struct s
 		/* This is the first chunk in the packet.  */
 		chunk->singleton = 1;
 		ch = (sctp_chunkhdr_t *) chunk->skb->data;
+		chunk->data_accepted = 0;
 	}
 
         chunk->chunk_hdr = ch;
diff -uprN linux-2.6.16/net/sctp/sm_statefuns.c linux-2.6.16.ovz/net/sctp/sm_statefuns.c
--- linux-2.6.16/net/sctp/sm_statefuns.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/sctp/sm_statefuns.c	2006-07-05 08:34:56.000000000 -0400
@@ -636,8 +636,9 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(co
 	 */
         chunk->subh.cookie_hdr =
 		(struct sctp_signed_cookie *)chunk->skb->data;
-	skb_pull(chunk->skb,
-		 ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t));
+	if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
+					 sizeof(sctp_chunkhdr_t)))
+		goto nomem;
 
 	/* 5.1 D) Upon reception of the COOKIE ECHO chunk, Endpoint
 	 * "Z" will reply with a COOKIE ACK chunk after building a TCB
@@ -965,7 +966,8 @@ sctp_disposition_t sctp_sf_beat_8_3(cons
 	 */
 	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *) chunk->skb->data;
 	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
-	skb_pull(chunk->skb, paylen);
+	if (!pskb_pull(chunk->skb, paylen))
+		goto nomem;
 
 	reply = sctp_make_heartbeat_ack(asoc, chunk,
 					chunk->subh.hb_hdr, paylen);
@@ -1028,6 +1030,12 @@ sctp_disposition_t sctp_sf_backbeat_8_3(
 						  commands);
 
 	hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
+	/* Make sure that the length of the parameter is what we expect */
+	if (ntohs(hbinfo->param_hdr.length) !=
+				    sizeof(sctp_sender_hb_info_t)) {
+		return SCTP_DISPOSITION_DISCARD;
+	}
+
 	from_addr = hbinfo->daddr;
 	link = sctp_assoc_lookup_paddr(asoc, &from_addr);
 
@@ -1860,8 +1868,9 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupc
 	 * are in good shape.
 	 */
         chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data;
-	skb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
-		 sizeof(sctp_chunkhdr_t));
+	if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
+					sizeof(sctp_chunkhdr_t)))
+		goto nomem;
 
 	/* In RFC 2960 5.2.4 3, if both Verification Tags in the State Cookie
 	 * of a duplicate COOKIE ECHO match the Verification Tags of the
@@ -5151,7 +5160,9 @@ static int sctp_eat_data(const struct sc
 	int tmp;
 	__u32 tsn;
 	int account_value;
+	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
 	struct sock *sk = asoc->base.sk;
+	int rcvbuf_over = 0;
 
 	data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
 	skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
@@ -5162,10 +5173,16 @@ static int sctp_eat_data(const struct sc
 	/* ASSERT:  Now skb->data is really the user data.  */
 
 	/*
-	 * if we are established, and we have used up our receive
-	 * buffer memory, drop the frame
+	 * If we are established, and we have used up our receive buffer
+	 * memory, think about droping the frame.
+	 * Note that we have an opportunity to improve performance here.
+	 * If we accept one chunk from an skbuff, we have to keep all the
+	 * memory of that skbuff around until the chunk is read into user
+	 * space. Therefore, once we accept 1 chunk we may as well accept all
+	 * remaining chunks in the skbuff. The data_accepted flag helps us do
+	 * that.
 	 */
-	if (asoc->state == SCTP_STATE_ESTABLISHED) {
+	if ((asoc->state == SCTP_STATE_ESTABLISHED) && (!chunk->data_accepted)) {
 		/*
 		 * If the receive buffer policy is 1, then each
 		 * association can allocate up to sk_rcvbuf bytes
@@ -5176,9 +5193,25 @@ static int sctp_eat_data(const struct sc
 			account_value = atomic_read(&asoc->rmem_alloc);
 		else
 			account_value = atomic_read(&sk->sk_rmem_alloc);
-
-		if (account_value > sk->sk_rcvbuf)
-			return SCTP_IERROR_IGNORE_TSN;
+		if (account_value > sk->sk_rcvbuf) {
+			/*
+			 * We need to make forward progress, even when we are
+			 * under memory pressure, so we always allow the
+			 * next tsn after the ctsn ack point to be accepted.
+			 * This lets us avoid deadlocks in which we have to
+			 * drop frames that would otherwise let us drain the
+			 * receive queue.
+			 */
+			if ((sctp_tsnmap_get_ctsn(map) + 1) != tsn)
+				return SCTP_IERROR_IGNORE_TSN;
+
+			/*
+			 * We're going to accept the frame but we should renege
+			 * to make space for it. This will send us down that
+			 * path later in this function.
+			 */
+			rcvbuf_over = 1;
+		}
 	}
 
 	/* Process ECN based congestion.
@@ -5226,6 +5259,7 @@ static int sctp_eat_data(const struct sc
 	datalen -= sizeof(sctp_data_chunk_t);
 
 	deliver = SCTP_CMD_CHUNK_ULP;
+	chunk->data_accepted = 1;
 
 	/* Think about partial delivery. */
 	if ((datalen >= asoc->rwnd) && (!asoc->ulpq.pd_mode)) {
@@ -5242,7 +5276,8 @@ static int sctp_eat_data(const struct sc
 	 * large spill over.
 	 */
 	if (!asoc->rwnd || asoc->rwnd_over ||
-	    (datalen > asoc->rwnd + asoc->frag_point)) {
+	    (datalen > asoc->rwnd + asoc->frag_point) ||
+	    rcvbuf_over) {
 
 		/* If this is the next TSN, consider reneging to make
 		 * room.   Note: Playing nice with a confused sender.  A
@@ -5250,8 +5285,8 @@ static int sctp_eat_data(const struct sc
 		 * space and in the future we may want to detect and
 		 * do more drastic reneging.
 		 */
-		if (sctp_tsnmap_has_gap(&asoc->peer.tsn_map) &&
-		    (sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1) == tsn) {
+		if (sctp_tsnmap_has_gap(map) &&
+		    (sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
 			SCTP_DEBUG_PRINTK("Reneging for tsn:%u\n", tsn);
 			deliver = SCTP_CMD_RENEGE;
 		} else {
diff -uprN linux-2.6.16/net/sctp/sm_statetable.c linux-2.6.16.ovz/net/sctp/sm_statetable.c
--- linux-2.6.16/net/sctp/sm_statetable.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/sctp/sm_statetable.c	2006-07-05 08:34:56.000000000 -0400
@@ -366,9 +366,9 @@ const sctp_sm_table_entry_t *sctp_sm_loo
 	/* SCTP_STATE_EMPTY */ \
 	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
 	/* SCTP_STATE_CLOSED */ \
-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
 	/* SCTP_STATE_COOKIE_WAIT */ \
-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
 	/* SCTP_STATE_COOKIE_ECHOED */ \
 	{.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
 	/* SCTP_STATE_ESTABLISHED */ \
@@ -380,7 +380,7 @@ const sctp_sm_table_entry_t *sctp_sm_loo
 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
 	{.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
 	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
 } /* TYPE_SCTP_ECN_ECNE */
 
 #define TYPE_SCTP_ECN_CWR { \
@@ -401,7 +401,7 @@ const sctp_sm_table_entry_t *sctp_sm_loo
 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
 	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
 	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
 } /* TYPE_SCTP_ECN_CWR */
 
 #define TYPE_SCTP_SHUTDOWN_COMPLETE { \
@@ -647,7 +647,7 @@ chunk_event_table_unknown[SCTP_STATE_NUM
 	/* SCTP_STATE_EMPTY */ \
 	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
 	/* SCTP_STATE_CLOSED */ \
-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	{.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
 	/* SCTP_STATE_COOKIE_WAIT */ \
 	{.fn = sctp_sf_do_prm_requestheartbeat,		      \
 	 .name = "sctp_sf_do_prm_requestheartbeat"},          \
diff -uprN linux-2.6.16/net/sctp/ulpqueue.c linux-2.6.16.ovz/net/sctp/ulpqueue.c
--- linux-2.6.16/net/sctp/ulpqueue.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/sctp/ulpqueue.c	2006-07-05 08:34:56.000000000 -0400
@@ -279,6 +279,7 @@ static inline void sctp_ulpq_store_reasm
 static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag)
 {
 	struct sk_buff *pos;
+	struct sk_buff *new = NULL;
 	struct sctp_ulpevent *event;
 	struct sk_buff *pnext, *last;
 	struct sk_buff *list = skb_shinfo(f_frag)->frag_list;
@@ -297,11 +298,33 @@ static struct sctp_ulpevent *sctp_make_r
 	 */
 	if (last)
 		last->next = pos;
-	else
-		skb_shinfo(f_frag)->frag_list = pos;
+ 	else {
+ 		if (skb_cloned(f_frag)) {
+ 			/* This is a cloned skb, we can't just modify
+ 			 * the frag_list.  We need a new skb to do that.
+ 			 * Instead of calling skb_unshare(), we'll do it
+ 			 * ourselves since we need to delay the free.
+ 			 */
+ 			new = skb_copy(f_frag, GFP_ATOMIC);
+ 			if (!new)
+ 				return NULL;	/* try again later */
+
+ 			new->sk = f_frag->sk;
+
+ 			skb_shinfo(new)->frag_list = pos;
+ 		} else
+ 			skb_shinfo(f_frag)->frag_list = pos;
+ 	}
 
 	/* Remove the first fragment from the reassembly queue.  */
 	__skb_unlink(f_frag, queue);
+
+ 	/* if we did unshare, then free the old skb and re-assign */
+ 	if (new) {
+ 		kfree_skb(f_frag);
+ 		f_frag = new;
+ 	}
+
 	while (pos) {
 
 		pnext = pos->next;
diff -uprN linux-2.6.16/net/socket.c linux-2.6.16.ovz/net/socket.c
--- linux-2.6.16/net/socket.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/socket.c	2006-07-05 08:34:56.000000000 -0400
@@ -84,6 +84,7 @@
 #include <linux/compat.h>
 #include <linux/kmod.h>
 #include <linux/audit.h>
+#include <linux/in.h>
 
 #ifdef CONFIG_NET_RADIO
 #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
@@ -1075,6 +1076,49 @@ int sock_wake_async(struct socket *sock,
 	return 0;
 }
 
+int vz_security_proto_check(int family, int type, int protocol)
+{
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		return 0;
+
+	switch (family) {
+	case PF_UNSPEC:
+	case PF_PACKET:
+	case PF_NETLINK:
+	case PF_UNIX:
+		break;
+	case PF_INET:
+		switch (protocol) {
+		case  IPPROTO_IP:
+		case  IPPROTO_ICMP:
+		case  IPPROTO_TCP:
+		case  IPPROTO_UDP:
+		case  IPPROTO_RAW:
+			break;
+		default:
+			return -EAFNOSUPPORT;
+		}
+		break;
+	case PF_INET6:
+		switch (protocol) {
+		case  IPPROTO_IP:
+		case  IPPROTO_ICMPV6:
+		case  IPPROTO_TCP:
+		case  IPPROTO_UDP:
+		case  IPPROTO_RAW:
+			break;
+		default:
+			return -EAFNOSUPPORT;
+		}
+		break;
+	default:
+		return -EAFNOSUPPORT;
+	}
+#endif
+	return 0;
+}
+
 static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
 {
 	int err;
@@ -1102,6 +1146,11 @@ static int __sock_create(int family, int
 		family = PF_PACKET;
 	}
 
+	/* VZ compatibility layer */
+	err = vz_security_proto_check(family, type, protocol);
+	if (err < 0)
+		return err;
+
 	err = security_socket_create(family, type, protocol, kern);
 	if (err)
 		return err;
diff -uprN linux-2.6.16/net/sunrpc/clnt.c linux-2.6.16.ovz/net/sunrpc/clnt.c
--- linux-2.6.16/net/sunrpc/clnt.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/sunrpc/clnt.c	2006-07-05 08:34:56.000000000 -0400
@@ -168,10 +168,10 @@ rpc_new_client(struct rpc_xprt *xprt, ch
 	}
 
 	/* save the nodename */
-	clnt->cl_nodelen = strlen(system_utsname.nodename);
+	clnt->cl_nodelen = strlen(ve_utsname.nodename);
 	if (clnt->cl_nodelen > UNX_MAXNODENAME)
 		clnt->cl_nodelen = UNX_MAXNODENAME;
-	memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
+	memcpy(clnt->cl_nodename, ve_utsname.nodename, clnt->cl_nodelen);
 	return clnt;
 
 out_no_auth:
diff -uprN linux-2.6.16/net/sunrpc/sched.c linux-2.6.16.ovz/net/sunrpc/sched.c
--- linux-2.6.16/net/sunrpc/sched.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/sunrpc/sched.c	2006-07-05 08:34:56.000000000 -0400
@@ -605,7 +605,9 @@ EXPORT_SYMBOL(rpc_exit_task);
 static int __rpc_execute(struct rpc_task *task)
 {
 	int		status = 0;
+	struct ve_struct *env;
 
+	env = set_exec_env(get_ve0());
 	dprintk("RPC: %4d rpc_execute flgs %x\n",
 				task->tk_pid, task->tk_flags);
 
@@ -693,6 +695,7 @@ static int __rpc_execute(struct rpc_task
 	rpc_mark_complete_task(task);
 	/* Release all resources associated with the task */
 	rpc_release_task(task);
+	(void)set_exec_env(env);
 	return status;
 }
 
diff -uprN linux-2.6.16/net/sunrpc/svcsock.c linux-2.6.16.ovz/net/sunrpc/svcsock.c
--- linux-2.6.16/net/sunrpc/svcsock.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/sunrpc/svcsock.c	2006-07-05 08:34:56.000000000 -0400
@@ -361,6 +361,9 @@ svc_sendto(struct svc_rqst *rqstp, struc
 	size_t		base = xdr->page_base;
 	unsigned int	pglen = xdr->page_len;
 	unsigned int	flags = MSG_MORE;
+	struct ve_struct *old_env;
+
+	old_env = set_exec_env(get_ve0());
 
 	slen = xdr->len;
 
@@ -425,6 +428,8 @@ out:
 			rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
 		rqstp->rq_addr.sin_addr.s_addr);
 
+	(void)set_exec_env(old_env);
+
 	return len;
 }
 
@@ -437,9 +442,12 @@ svc_recv_available(struct svc_sock *svsk
 	mm_segment_t	oldfs;
 	struct socket	*sock = svsk->sk_sock;
 	int		avail, err;
+	struct ve_struct *old_env;
 
 	oldfs = get_fs(); set_fs(KERNEL_DS);
+	old_env = set_exec_env(get_ve0());
 	err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
+	(void)set_exec_env(old_env);
 	set_fs(oldfs);
 
 	return (err >= 0)? avail : err;
@@ -454,6 +462,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
 	struct msghdr	msg;
 	struct socket	*sock;
 	int		len, alen;
+	struct ve_struct *old_env;
 
 	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
 	sock = rqstp->rq_sock->sk_sock;
@@ -465,7 +474,9 @@ svc_recvfrom(struct svc_rqst *rqstp, str
 
 	msg.msg_flags	= MSG_DONTWAIT;
 
+	old_env = set_exec_env(get_ve0());
 	len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
+	(void)set_exec_env(get_ve0());
 
 	/* sock_recvmsg doesn't fill in the name/namelen, so we must..
 	 * possibly we should cache this in the svc_sock structure
@@ -761,17 +772,19 @@ svc_tcp_accept(struct svc_sock *svsk)
 	const struct proto_ops *ops;
 	struct svc_sock	*newsvsk;
 	int		err, slen;
+	struct ve_struct *old_env;
 
 	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
 	if (!sock)
 		return;
 
+	old_env = set_exec_env(get_ve0());
 	err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
 	if (err) {
 		if (err == -ENOMEM)
 			printk(KERN_WARNING "%s: no more sockets!\n",
 			       serv->sv_name);
-		return;
+		goto restore;
 	}
 
 	dprintk("svc: tcp_accept %p allocated\n", newsock);
@@ -865,6 +878,8 @@ svc_tcp_accept(struct svc_sock *svsk)
 
 	}
 
+	(void)set_exec_env(old_env);
+
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpconn++;
 
@@ -872,6 +887,8 @@ svc_tcp_accept(struct svc_sock *svsk)
 
 failed:
 	sock_release(newsock);
+restore:
+	(void)set_exec_env(old_env);
 	return;
 }
 
@@ -1388,6 +1405,7 @@ svc_create_socket(struct svc_serv *serv,
 	struct socket	*sock;
 	int		error;
 	int		type;
+	struct ve_struct *old_env;
 
 	dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
 				serv->sv_program->pg_name, protocol,
@@ -1401,8 +1419,10 @@ svc_create_socket(struct svc_serv *serv,
 	}
 	type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
 
+	old_env = set_exec_env(get_ve0());
+
 	if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
-		return error;
+		goto restore;
 
 	if (sin != NULL) {
 		if (type == SOCK_STREAM)
@@ -1418,12 +1438,16 @@ svc_create_socket(struct svc_serv *serv,
 			goto bummer;
 	}
 
-	if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
+	if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) {
+		(void)set_exec_env(old_env);
 		return 0;
+	}
 
 bummer:
 	dprintk("svc: svc_create_socket error = %d\n", -error);
 	sock_release(sock);
+restore:
+	(void)set_exec_env(old_env);
 	return error;
 }
 
diff -uprN linux-2.6.16/net/unix/af_unix.c linux-2.6.16.ovz/net/unix/af_unix.c
--- linux-2.6.16/net/unix/af_unix.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/unix/af_unix.c	2006-07-05 08:34:56.000000000 -0400
@@ -118,6 +118,9 @@
 #include <net/checksum.h>
 #include <linux/security.h>
 
+#include <ub/ub_net.h>
+#include <ub/beancounter.h>
+
 int sysctl_unix_max_dgram_qlen = 10;
 
 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
@@ -235,6 +238,8 @@ static struct sock *__unix_find_socket_b
 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 		struct unix_sock *u = unix_sk(s);
 
+		if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
+			continue;
 		if (u->addr->len == len &&
 		    !memcmp(u->addr->name, sunname, len))
 			goto found;
@@ -439,7 +444,7 @@ static int unix_listen(struct socket *so
 	sk->sk_max_ack_backlog	= backlog;
 	sk->sk_state		= TCP_LISTEN;
 	/* set credentials so connect can copy them */
-	sk->sk_peercred.pid	= current->tgid;
+	sk->sk_peercred.pid	= virt_tgid(current);
 	sk->sk_peercred.uid	= current->euid;
 	sk->sk_peercred.gid	= current->egid;
 	err = 0;
@@ -553,6 +558,8 @@ static struct sock * unix_create1(struct
 	sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
 	if (!sk)
 		goto out;
+	if (ub_other_sock_charge(sk))
+		goto out_sk_free;
 
 	atomic_inc(&unix_nr_socks);
 
@@ -571,6 +578,9 @@ static struct sock * unix_create1(struct
 	unix_insert_socket(unix_sockets_unbound, sk);
 out:
 	return sk;
+out_sk_free:
+	sk_free(sk);
+	return NULL;
 }
 
 static int unix_create(struct socket *sock, int protocol)
@@ -676,7 +686,7 @@ static struct sock *unix_find_other(stru
 		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
 		if (err)
 			goto fail;
-		err = vfs_permission(&nd, MAY_WRITE);
+		err = vfs_permission(&nd, MAY_WRITE, NULL);
 		if (err)
 			goto put_fail;
 
@@ -932,6 +942,7 @@ static int unix_stream_connect(struct so
 	int st;
 	int err;
 	long timeo;
+	unsigned long chargesize;
 
 	err = unix_mkname(sunaddr, addr_len, &hash);
 	if (err < 0)
@@ -960,6 +971,10 @@ static int unix_stream_connect(struct so
 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
 	if (skb == NULL)
 		goto out;
+	chargesize = skb_charge_fullsize(skb);
+	if (ub_sock_getwres_other(newsk, chargesize) < 0)
+		goto out;	
+	ub_skb_set_charge(skb, newsk, chargesize, UB_OTHERSOCKBUF);
 
 restart:
 	/*  Find listening sock. */
@@ -1043,7 +1058,7 @@ restart:
 	unix_peer(newsk)	= sk;
 	newsk->sk_state		= TCP_ESTABLISHED;
 	newsk->sk_type		= sk->sk_type;
-	newsk->sk_peercred.pid	= current->tgid;
+	newsk->sk_peercred.pid	= virt_tgid(current);
 	newsk->sk_peercred.uid	= current->euid;
 	newsk->sk_peercred.gid	= current->egid;
 	newu = unix_sk(newsk);
@@ -1107,7 +1122,7 @@ static int unix_socketpair(struct socket
 	sock_hold(skb);
 	unix_peer(ska)=skb;
 	unix_peer(skb)=ska;
-	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
+	ska->sk_peercred.pid = skb->sk_peercred.pid = virt_tgid(current);
 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
 
@@ -1433,6 +1448,16 @@ static int unix_stream_sendmsg(struct ki
 
 		size=len-sent;
 
+		if (msg->msg_flags & MSG_DONTWAIT)
+			ub_sock_makewres_other(sk, skb_charge_size(size));
+		if (sock_bc(sk) != NULL && 
+				sock_bc(sk)->poll_reserv >= 
+					SOCK_MIN_UBCSPACE &&
+				skb_charge_size(size) >
+					sock_bc(sk)->poll_reserv)
+			size = skb_charge_datalen(sock_bc(sk)->poll_reserv);
+				
+
 		/* Keep two messages in the pipe so it schedules better */
 		if (size > sk->sk_sndbuf / 2 - 64)
 			size = sk->sk_sndbuf / 2 - 64;
@@ -1444,7 +1469,8 @@ static int unix_stream_sendmsg(struct ki
 		 *	Grab a buffer
 		 */
 		 
-		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
+		skb = sock_alloc_send_skb2(sk, size, SOCK_MIN_UBCSPACE,
+				msg->msg_flags&MSG_DONTWAIT, &err);
 
 		if (skb==NULL)
 			goto out_err;
@@ -1869,6 +1895,7 @@ static unsigned int unix_poll(struct fil
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask;
+	int no_ub_res;
 
 	poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
@@ -1879,6 +1906,10 @@ static unsigned int unix_poll(struct fil
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
 		mask |= POLLHUP;
 
+	no_ub_res = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
+	if (no_ub_res)
+		ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
+
 	/* readable? */
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
 	    (sk->sk_shutdown & RCV_SHUTDOWN))
@@ -1892,7 +1923,7 @@ static unsigned int unix_poll(struct fil
 	 * we set writable also when the other side has shut down the
 	 * connection. This prevents stuck sockets.
 	 */
-	if (unix_writable(sk))
+	if (!no_ub_res && unix_writable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 
 	return mask;
@@ -2044,7 +2075,7 @@ static int __init af_unix_init(void)
 
 	sock_register(&unix_family_ops);
 #ifdef CONFIG_PROC_FS
-	proc_net_fops_create("unix", 0, &unix_seq_fops);
+	proc_glob_fops_create("net/unix", 0, &unix_seq_fops);
 #endif
 	unix_sysctl_register();
 out:
@@ -2055,7 +2086,7 @@ static void __exit af_unix_exit(void)
 {
 	sock_unregister(PF_UNIX);
 	unix_sysctl_unregister();
-	proc_net_remove("unix");
+	remove_proc_glob_entry("net/unix", NULL);
 	proto_unregister(&unix_proto);
 }
 
diff -uprN linux-2.6.16/net/unix/garbage.c linux-2.6.16.ovz/net/unix/garbage.c
--- linux-2.6.16/net/unix/garbage.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/net/unix/garbage.c	2006-07-05 08:34:56.000000000 -0400
@@ -76,6 +76,7 @@
 #include <linux/netdevice.h>
 #include <linux/file.h>
 #include <linux/proc_fs.h>
+#include <linux/module.h>
 
 #include <net/sock.h>
 #include <net/af_unix.h>
@@ -135,7 +136,7 @@ void unix_notinflight(struct file *fp)
 		atomic_dec(&unix_tot_inflight);
 	}
 }
-
+EXPORT_SYMBOL_GPL(unix_notinflight);
 
 /*
  *	Garbage Collector Support Functions
diff -uprN linux-2.6.16/scripts/kconfig/Makefile linux-2.6.16.ovz/scripts/kconfig/Makefile
--- linux-2.6.16/scripts/kconfig/Makefile	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/scripts/kconfig/Makefile	2006-07-05 08:34:55.000000000 -0400
@@ -42,6 +42,10 @@ update-po-config: $(obj)/kxgettext
 	$(Q)rm -f arch/um/Kconfig_arch
 	$(Q)rm -f scripts/kconfig/linux_*.pot scripts/kconfig/config.pot
 
+nonint_oldconfig: scripts/kconfig/conf
+	./scripts/kconfig/conf -b arch/$(ARCH)/Kconfig
+
+
 .PHONY: randconfig allyesconfig allnoconfig allmodconfig defconfig
 
 randconfig: $(obj)/conf
diff -uprN linux-2.6.16/scripts/kconfig/conf.c linux-2.6.16.ovz/scripts/kconfig/conf.c
--- linux-2.6.16/scripts/kconfig/conf.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/scripts/kconfig/conf.c	2006-07-05 08:34:55.000000000 -0400
@@ -20,6 +20,7 @@ enum {
 	ask_all,
 	ask_new,
 	ask_silent,
+	dont_ask,
 	set_default,
 	set_yes,
 	set_mod,
@@ -36,6 +37,8 @@ static struct menu *rootEntry;
 
 static char nohelp_text[] = N_("Sorry, no help available for this option yet.\n");
 
+static int return_value = 0;
+
 static void strip(char *str)
 {
 	char *p = str;
@@ -116,6 +119,12 @@ static void conf_askvalue(struct symbol 
 		fflush(stdout);
 		fgets_check_stream(line, 128, stdin);
 		return;
+	case dont_ask:
+		if (!sym_has_value(sym)) {
+			fprintf(stderr,"CONFIG_%s\n",sym->name);
+			return_value++;
+		}
+		return;
 	case set_default:
 		printf("%s\n", def);
 		return;
@@ -360,6 +369,10 @@ static int conf_choice(struct menu *menu
 			printf("?");
 		printf("]: ");
 		switch (input_mode) {
+		case dont_ask:
+			cnt = def;
+			printf("%d\n", cnt);
+			break;
 		case ask_new:
 		case ask_silent:
 			if (!is_new) {
@@ -496,7 +509,10 @@ static void check_conf(struct menu *menu
 			if (!conf_cnt++)
 				printf(_("*\n* Restart config...\n*\n"));
 			rootEntry = menu_get_parent_menu(menu);
-			conf(rootEntry);
+			if (input_mode == dont_ask)
+				fprintf(stderr,"CONFIG_%s\n",sym->name);
+			else
+				conf(rootEntry);
 		}
 	}
 
@@ -515,6 +531,9 @@ int main(int ac, char **av)
 		case 'o':
 			input_mode = ask_new;
 			break;
+		case 'b':
+			input_mode = dont_ask;
+			break;
 		case 's':
 			input_mode = ask_silent;
 			valid_stdin = isatty(0) && isatty(1) && isatty(2);
@@ -579,6 +598,7 @@ int main(int ac, char **av)
 		}
 	case ask_all:
 	case ask_new:
+	case dont_ask:
 		conf_read(NULL);
 		break;
 	case set_no:
@@ -617,10 +637,10 @@ int main(int ac, char **av)
 	do {
 		conf_cnt = 0;
 		check_conf(&rootmenu);
-	} while (conf_cnt);
+	} while ((conf_cnt) && (input_mode != dont_ask));
 	if (conf_write(NULL)) {
 		fprintf(stderr, _("\n*** Error during writing of the kernel configuration.\n\n"));
 		return 1;
 	}
-	return 0;
+	return return_value;
 }
diff -uprN linux-2.6.16/security/commoncap.c linux-2.6.16.ovz/security/commoncap.c
--- linux-2.6.16/security/commoncap.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/security/commoncap.c	2006-07-05 08:34:56.000000000 -0400
@@ -35,7 +35,7 @@ EXPORT_SYMBOL(cap_netlink_send);
 
 int cap_netlink_recv(struct sk_buff *skb)
 {
-	if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
+	if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_VE_NET_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -197,7 +197,7 @@ int cap_inode_setxattr(struct dentry *de
 {
 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -206,7 +206,7 @@ int cap_inode_removexattr(struct dentry 
 {
 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -312,7 +312,7 @@ void cap_task_reparent_to_init (struct t
 
 int cap_syslog (int type)
 {
-	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
+	if ((type != 3 && type != 10) && !capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	return 0;
 }
diff -uprN linux-2.6.16/security/keys/key.c linux-2.6.16.ovz/security/keys/key.c
--- linux-2.6.16/security/keys/key.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/security/keys/key.c	2006-07-05 08:34:56.000000000 -0400
@@ -785,6 +785,10 @@ key_ref_t key_create_or_update(key_ref_t
 
 	key_check(keyring);
 
+	key_ref = ERR_PTR(-ENOTDIR);
+	if (keyring->type != &key_type_keyring)
+		goto error_2;
+
 	down_write(&keyring->sem);
 
 	/* if we're going to allocate a new key, we're going to have
diff -uprN linux-2.6.16/security/keys/keyring.c linux-2.6.16.ovz/security/keys/keyring.c
--- linux-2.6.16/security/keys/keyring.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/security/keys/keyring.c	2006-07-05 08:34:56.000000000 -0400
@@ -437,6 +437,7 @@ EXPORT_SYMBOL(keyring_search);
 /*
  * search the given keyring only (no recursion)
  * - keyring must be locked by caller
+ * - caller must guarantee that the keyring is a keyring
  */
 key_ref_t __keyring_search_one(key_ref_t keyring_ref,
 			       const struct key_type *ktype,
diff -uprN linux-2.6.16/security/selinux/hooks.c linux-2.6.16.ovz/security/selinux/hooks.c
--- linux-2.6.16/security/selinux/hooks.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/security/selinux/hooks.c	2006-07-05 08:34:56.000000000 -0400
@@ -4167,12 +4167,12 @@ static int selinux_setprocattr(struct ta
 			struct task_struct *g, *t;
 			struct mm_struct *mm = p->mm;
 			read_lock(&tasklist_lock);
-			do_each_thread(g, t)
+			do_each_thread_ve(g, t)
 				if (t->mm == mm && t != p) {
 					read_unlock(&tasklist_lock);
 					return -EPERM;
 				}
-			while_each_thread(g, t);
+			while_each_thread_ve(g, t);
 			read_unlock(&tasklist_lock);
                 }
 
diff -uprN linux-2.6.16/security/selinux/ss/mls.c linux-2.6.16.ovz/security/selinux/ss/mls.c
--- linux-2.6.16/security/selinux/ss/mls.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/security/selinux/ss/mls.c	2006-07-05 08:34:56.000000000 -0400
@@ -264,7 +264,7 @@ int mls_context_to_sid(char oldc,
 
 	if (!selinux_mls_enabled) {
 		if (def_sid != SECSID_NULL && oldc)
-			*scontext += strlen(*scontext);
+			*scontext += strlen(*scontext)+1;
 		return 0;
 	}
 
diff -uprN linux-2.6.16/security/selinux/ss/services.c linux-2.6.16.ovz/security/selinux/ss/services.c
--- linux-2.6.16/security/selinux/ss/services.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/security/selinux/ss/services.c	2006-07-05 08:34:56.000000000 -0400
@@ -592,6 +592,10 @@ int security_sid_to_context(u32 sid, cha
 
 			*scontext_len = strlen(initial_sid_to_string[sid]) + 1;
 			scontextp = kmalloc(*scontext_len,GFP_ATOMIC);
+			if (!scontextp) {
+				rc = -ENOMEM;
+				goto out;
+			}
 			strcpy(scontextp, initial_sid_to_string[sid]);
 			*scontext = scontextp;
 			goto out;
diff -uprN linux-2.6.16/sound/isa/opti9xx/opti92x-ad1848.c linux-2.6.16.ovz/sound/isa/opti9xx/opti92x-ad1848.c
--- linux-2.6.16/sound/isa/opti9xx/opti92x-ad1848.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/sound/isa/opti9xx/opti92x-ad1848.c	2006-07-05 08:34:56.000000000 -0400
@@ -2088,9 +2088,11 @@ static int __init alsa_card_opti9xx_init
 	int error;
 	struct platform_device *device;
 
+#ifdef CONFIG_PNP
 	pnp_register_card_driver(&opti9xx_pnpc_driver);
 	if (snd_opti9xx_pnp_is_probed)
 		return 0;
+#endif
 	if (! is_isapnp_selected()) {
 		error = platform_driver_register(&snd_opti9xx_driver);
 		if (error < 0)
@@ -2102,7 +2104,9 @@ static int __init alsa_card_opti9xx_init
 		}
 		platform_driver_unregister(&snd_opti9xx_driver);
 	}
+#ifdef CONFIG_PNP
 	pnp_unregister_card_driver(&opti9xx_pnpc_driver);
+#endif
 #ifdef MODULE
 	printk(KERN_ERR "no OPTi " CHIP_NAME " soundcard found\n");
 #endif
@@ -2115,7 +2119,9 @@ static void __exit alsa_card_opti9xx_exi
 		platform_device_unregister(snd_opti9xx_platform_device);
 		platform_driver_unregister(&snd_opti9xx_driver);
 	}
+#ifdef CONFIG_PNP
 	pnp_unregister_card_driver(&opti9xx_pnpc_driver);
+#endif
 }
 
 module_init(alsa_card_opti9xx_init)
diff -uprN linux-2.6.16/sound/oss/dmasound/tas_common.c linux-2.6.16.ovz/sound/oss/dmasound/tas_common.c
--- linux-2.6.16/sound/oss/dmasound/tas_common.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/sound/oss/dmasound/tas_common.c	2006-07-05 08:34:56.000000000 -0400
@@ -195,8 +195,8 @@ tas_init(int driver_id, const char *driv
 
 	printk(KERN_INFO "tas driver [%s])\n", driver_name);
 
-#ifndef CONFIG_I2C_KEYWEST
-	request_module("i2c-keywest");
+#ifndef CONFIG_I2C_POWERMAC
+	request_module("i2c-powermac");
 #endif
 	tas_node = find_devices("deq");
 	if (tas_node == NULL)
diff -uprN linux-2.6.16/sound/pci/hda/patch_realtek.c linux-2.6.16.ovz/sound/pci/hda/patch_realtek.c
--- linux-2.6.16/sound/pci/hda/patch_realtek.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/sound/pci/hda/patch_realtek.c	2006-07-05 08:34:56.000000000 -0400
@@ -2948,6 +2948,8 @@ static struct hda_board_config alc260_cf
 	{ .modelname = "basic", .config = ALC260_BASIC },
 	{ .pci_subvendor = 0x104d, .pci_subdevice = 0x81bb,
 	  .config = ALC260_BASIC }, /* Sony VAIO */
+	{ .pci_subvendor = 0x152d, .pci_subdevice = 0x0729,
+	  .config = ALC260_BASIC }, /* CTL Travel Master U553W */
 	{ .modelname = "hp", .config = ALC260_HP },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3011, .config = ALC260_HP },
diff -uprN linux-2.6.16/sound/ppc/daca.c linux-2.6.16.ovz/sound/ppc/daca.c
--- linux-2.6.16/sound/ppc/daca.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/sound/ppc/daca.c	2006-07-05 08:34:56.000000000 -0400
@@ -256,7 +256,7 @@ int __init snd_pmac_daca_init(struct snd
 
 #ifdef CONFIG_KMOD
 	if (current->fs->root)
-		request_module("i2c-keywest");
+		request_module("i2c-powermac");
 #endif /* CONFIG_KMOD */	
 
 	mix = kmalloc(sizeof(*mix), GFP_KERNEL);
diff -uprN linux-2.6.16/sound/ppc/tumbler.c linux-2.6.16.ovz/sound/ppc/tumbler.c
--- linux-2.6.16/sound/ppc/tumbler.c	2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16.ovz/sound/ppc/tumbler.c	2006-07-05 08:34:56.000000000 -0400
@@ -1314,7 +1314,7 @@ int __init snd_pmac_tumbler_init(struct 
 
 #ifdef CONFIG_KMOD
 	if (current->fs->root)
-		request_module("i2c-keywest");
+		request_module("i2c-powermac");
 #endif /* CONFIG_KMOD */	
 
 	mix = kmalloc(sizeof(*mix), GFP_KERNEL);
