Index: linux-2.6.22/arch/i386/Kconfig
===================================================================
--- linux-2.6.22.orig/arch/i386/Kconfig	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/arch/i386/Kconfig	2007-07-15 17:13:47.000000000 +0200
@@ -18,6 +18,10 @@ config GENERIC_TIME
 	bool
 	default y
 
+config GENERIC_CMOS_UPDATE
+	bool
+	default y
+
 config CLOCKSOURCE_WATCHDOG
 	bool
 	default y
@@ -31,6 +35,10 @@ config GENERIC_CLOCKEVENTS_BROADCAST
 	default y
 	depends on X86_LOCAL_APIC
 
+config NONIRQ_WAKEUP
+	bool
+	default y
+
 config LOCKDEP_SUPPORT
 	bool
 	default y
@@ -1053,6 +1061,8 @@ endif # APM
 
 source "arch/i386/kernel/cpu/cpufreq/Kconfig"
 
+source "drivers/cpuidle/Kconfig"
+
 endmenu
 
 menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
Index: linux-2.6.22/arch/x86_64/Kconfig
===================================================================
--- linux-2.6.22.orig/arch/x86_64/Kconfig	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/arch/x86_64/Kconfig	2007-07-15 17:13:51.000000000 +0200
@@ -28,10 +28,26 @@ config GENERIC_TIME
 	bool
 	default y
 
+config GENERIC_CLOCKEVENTS
+	bool
+	default y
+
+config GENERIC_CLOCKEVENTS_BROADCAST
+	bool
+	default y
+
+config NONIRQ_WAKEUP
+	bool
+	default y
+
 config GENERIC_TIME_VSYSCALL
 	bool
 	default y
 
+config GENERIC_CMOS_UPDATE
+	bool
+	default y
+
 config ZONE_DMA32
 	bool
 	default y
@@ -126,6 +142,8 @@ source "init/Kconfig"
 
 menu "Processor type and features"
 
+source "kernel/time/Kconfig"
+
 choice
 	prompt "Subarchitecture Type"
 	default X86_PC
@@ -698,6 +716,8 @@ source "drivers/acpi/Kconfig"
 
 source "arch/x86_64/kernel/cpufreq/Kconfig"
 
+source "drivers/cpuidle/Kconfig"
+
 endmenu
 
 menu "Bus options (PCI etc.)"
Index: linux-2.6.22/drivers/Makefile
===================================================================
--- linux-2.6.22.orig/drivers/Makefile	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/drivers/Makefile	2007-07-15 17:13:44.000000000 +0200
@@ -70,6 +70,7 @@ obj-$(CONFIG_EDAC)		+= edac/
 obj-$(CONFIG_MCA)		+= mca/
 obj-$(CONFIG_EISA)		+= eisa/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
+obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
 obj-$(CONFIG_MMC)		+= mmc/
 obj-$(CONFIG_NEW_LEDS)		+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
Index: linux-2.6.22/drivers/cpuidle/Kconfig
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/drivers/cpuidle/Kconfig	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,39 @@
+menu "CPU idle PM support"
+
+config CPU_IDLE
+	bool "CPU idle PM support"
+	help
+	  CPU idle is a generic framework for supporting software-controlled
+	  idle processor power management.  It includes modular cross-platform
+	  governors that can be swapped during runtime.
+
+	  If you're using a mobile platform that supports CPU idle PM (e.g.
+	  an ACPI-capable notebook), you should say Y here.
+
+if CPU_IDLE
+
+comment "Governors"
+
+config CPU_IDLE_GOV_LADDER
+	tristate "'ladder' governor"
+	depends on CPU_IDLE
+	default y
+	help
+	  This cpuidle governor promotes and demotes through the supported idle
+	  states using residency time and bus master activity as metrics.  This
+	  algorithm was originally introduced in the old ACPI processor driver.
+
+config CPU_IDLE_GOV_MENU
+	tristate "'menu' governor"
+	depends on CPU_IDLE && NO_HZ
+	default y
+	help
+	  This cpuidle governor evaluates all available states and chooses the
+	  deepest state that meets all of the following constraints: BM activity,
+	  expected time until next timer interrupt, and last break event time
+	  delta.  It is designed to minimize power consumption.  Currently
+	  dynticks is required.
+
+endif	# CPU_IDLE
+
+endmenu
Index: linux-2.6.22/drivers/cpuidle/Makefile
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/drivers/cpuidle/Makefile	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,5 @@
+#
+# Makefile for cpuidle.
+#
+
+obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
Index: linux-2.6.22/drivers/cpuidle/cpuidle.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/drivers/cpuidle/cpuidle.c	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,306 @@
+/*
+ * cpuidle.c - core cpuidle infrastructure
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/latency.h>
+#include <linux/cpuidle.h>
+
+#include "cpuidle.h"
+
+DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+EXPORT_PER_CPU_SYMBOL_GPL(cpuidle_devices);
+
+DEFINE_MUTEX(cpuidle_lock);
+LIST_HEAD(cpuidle_detected_devices);
+static void (*pm_idle_old)(void);
+
+/**
+ * cpuidle_idle_call - the main idle loop
+ *
+ * NOTE: no locks or semaphores should be used here
+ */
+static void cpuidle_idle_call(void)
+{
+	struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices);
+	struct cpuidle_state *target_state;
+	int next_state;
+
+	/* check if the device is ready */
+	if (!dev || dev->status != CPUIDLE_STATUS_DOIDLE) {
+		if (pm_idle_old)
+			pm_idle_old();
+		else
+			local_irq_enable();
+		return;
+	}
+
+	/* ask the governor for the next state */
+	next_state = cpuidle_curr_governor->select(dev);
+	if (need_resched())
+		return;
+	target_state = &dev->states[next_state];
+
+	/* enter the state and update stats */
+	dev->last_residency = target_state->enter(dev, target_state);
+	dev->last_state = target_state;
+	target_state->time += dev->last_residency;
+	target_state->usage++;
+
+	/* give the governor an opportunity to reflect on the outcome */
+	if (cpuidle_curr_governor->reflect)
+		cpuidle_curr_governor->reflect(dev);
+}
+
+/**
+ * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
+ */
+void cpuidle_install_idle_handler(void)
+{
+	if (pm_idle != cpuidle_idle_call) {
+		/* Make sure all changes finished before we switch to new idle */
+		smp_wmb();
+		pm_idle = cpuidle_idle_call;
+	}
+}
+
+/**
+ * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
+ */
+void cpuidle_uninstall_idle_handler(void)
+{
+	if (pm_idle != pm_idle_old) {
+		pm_idle = pm_idle_old;
+		cpu_idle_wait();
+	}
+}
+
+/**
+ * cpuidle_rescan_device - prepares for a new state configuration
+ * @dev: the target device
+ *
+ * Must be called with cpuidle_lock aquired.
+ */
+void cpuidle_rescan_device(struct cpuidle_device *dev)
+{
+	int i;
+
+	if (cpuidle_curr_governor->scan)
+		cpuidle_curr_governor->scan(dev);
+
+	for (i = 0; i < dev->state_count; i++) {
+		dev->states[i].usage = 0;
+		dev->states[i].time = 0;
+	}
+}
+
+/**
+ * cpuidle_add_device - attaches the driver to a CPU instance
+ * @sys_dev: the system device (driver model CPU representation)
+ */
+static int cpuidle_add_device(struct sys_device *sys_dev)
+{
+	int cpu = sys_dev->id;
+	struct cpuidle_device *dev;
+
+	dev = per_cpu(cpuidle_devices, cpu);
+
+	mutex_lock(&cpuidle_lock);
+	if (cpu_is_offline(cpu)) {
+		mutex_unlock(&cpuidle_lock);
+		return 0;
+	}
+
+	if (!dev) {
+		dev = kzalloc(sizeof(struct cpuidle_device), GFP_KERNEL);
+		if (!dev) {
+			mutex_unlock(&cpuidle_lock);
+			return -ENOMEM;
+		}
+		init_completion(&dev->kobj_unregister);
+		per_cpu(cpuidle_devices, cpu) = dev;
+	}
+	dev->cpu = cpu;
+
+	if (dev->status & CPUIDLE_STATUS_DETECTED) {
+		mutex_unlock(&cpuidle_lock);
+		return 0;
+	}
+
+	cpuidle_add_sysfs(sys_dev);
+
+	if (cpuidle_curr_driver) {
+		if (cpuidle_attach_driver(dev))
+			goto err_ret;
+	}
+
+	if (cpuidle_curr_governor) {
+		if (cpuidle_attach_governor(dev)) {
+			cpuidle_detach_driver(dev);
+			goto err_ret;
+		}
+	}
+
+	if (cpuidle_device_can_idle(dev))
+		cpuidle_install_idle_handler();
+
+	list_add(&dev->device_list, &cpuidle_detected_devices);
+	dev->status |= CPUIDLE_STATUS_DETECTED;
+
+err_ret:
+	mutex_unlock(&cpuidle_lock);
+
+	return 0;
+}
+
+/**
+ * __cpuidle_remove_device - detaches the driver from a CPU instance
+ * @sys_dev: the system device (driver model CPU representation)
+ *
+ * Must be called with cpuidle_lock aquired.
+ */
+static int __cpuidle_remove_device(struct sys_device *sys_dev)
+{
+	struct cpuidle_device *dev;
+
+	dev = per_cpu(cpuidle_devices, sys_dev->id);
+
+	if (!(dev->status & CPUIDLE_STATUS_DETECTED)) {
+		return 0;
+	}
+	dev->status &= ~CPUIDLE_STATUS_DETECTED;
+	/* NOTE: we don't wait because the cpu is already offline */
+	if (cpuidle_curr_governor)
+		cpuidle_detach_governor(dev);
+	if (cpuidle_curr_driver)
+		cpuidle_detach_driver(dev);
+	cpuidle_remove_sysfs(sys_dev);
+	list_del(&dev->device_list);
+	wait_for_completion(&dev->kobj_unregister);
+	per_cpu(cpuidle_devices, sys_dev->id) = NULL;
+	kfree(dev);
+
+	return 0;
+}
+
+/**
+ * cpuidle_remove_device - detaches the driver from a CPU instance
+ * @sys_dev: the system device (driver model CPU representation)
+ */
+static int cpuidle_remove_device(struct sys_device *sys_dev)
+{
+	int ret;
+	mutex_lock(&cpuidle_lock);
+	ret = __cpuidle_remove_device(sys_dev);
+	mutex_unlock(&cpuidle_lock);
+
+	return ret;
+}
+
+static struct sysdev_driver cpuidle_sysdev_driver = {
+	.add		= cpuidle_add_device,
+	.remove		= cpuidle_remove_device,
+};
+
+static int cpuidle_cpu_callback(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	struct sys_device *sys_dev;
+
+	sys_dev = get_cpu_sysdev((unsigned long)hcpu);
+
+	switch (action) {
+	case CPU_ONLINE:
+		cpuidle_add_device(sys_dev);
+		break;
+	case CPU_DOWN_PREPARE:
+		mutex_lock(&cpuidle_lock);
+		break;
+	case CPU_DEAD:
+		__cpuidle_remove_device(sys_dev);
+		mutex_unlock(&cpuidle_lock);
+		break;
+	case CPU_DOWN_FAILED:
+		mutex_unlock(&cpuidle_lock);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata cpuidle_cpu_notifier =
+{
+    .notifier_call = cpuidle_cpu_callback,
+};
+
+#ifdef CONFIG_SMP
+
+static void smp_callback(void *v)
+{
+	/* we already woke the CPU up, nothing more to do */
+}
+
+/*
+ * This function gets called when a part of the kernel has a new latency
+ * requirement.  This means we need to get all processors out of their C-state,
+ * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
+ * wakes them all right up.
+ */
+static int cpuidle_latency_notify(struct notifier_block *b,
+		unsigned long l, void *v)
+{
+	smp_call_function(smp_callback, NULL, 0, 1);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpuidle_latency_notifier = {
+	.notifier_call = cpuidle_latency_notify,
+};
+
+#define latency_notifier_init(x) do { register_latency_notifier(x); } while (0)
+
+#else /* CONFIG_SMP */
+
+#define latency_notifier_init(x) do { } while (0)
+
+#endif /* CONFIG_SMP */
+
+/**
+ * cpuidle_init - core initializer
+ */
+static int __init cpuidle_init(void)
+{
+	int ret;
+
+	pm_idle_old = pm_idle;
+
+	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
+	if (ret)
+		return ret;
+
+	register_hotcpu_notifier(&cpuidle_cpu_notifier);
+
+	ret = sysdev_driver_register(&cpu_sysdev_class, &cpuidle_sysdev_driver);
+
+	if (ret) {
+		cpuidle_remove_class_sysfs(&cpu_sysdev_class);
+		printk(KERN_ERR "cpuidle: failed to initialize\n");
+		return ret;
+	}
+
+	latency_notifier_init(&cpuidle_latency_notifier);
+
+	return 0;
+}
+
+core_initcall(cpuidle_init);
Index: linux-2.6.22/drivers/cpuidle/cpuidle.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/drivers/cpuidle/cpuidle.h	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,50 @@
+/*
+ * cpuidle.h - The internal header file
+ */
+
+#ifndef __DRIVER_CPUIDLE_H
+#define __DRIVER_CPUIDLE_H
+
+#include <linux/sysdev.h>
+
+/* For internal use only */
+extern struct cpuidle_governor *cpuidle_curr_governor;
+extern struct cpuidle_driver *cpuidle_curr_driver;
+extern struct list_head cpuidle_drivers;
+extern struct list_head cpuidle_governors;
+extern struct list_head cpuidle_detected_devices;
+extern struct mutex cpuidle_lock;
+
+/* idle loop */
+extern void cpuidle_install_idle_handler(void);
+extern void cpuidle_uninstall_idle_handler(void);
+extern void cpuidle_rescan_device(struct cpuidle_device *dev);
+
+/* drivers */
+extern int cpuidle_attach_driver(struct cpuidle_device *dev);
+extern void cpuidle_detach_driver(struct cpuidle_device *dev);
+extern int cpuidle_switch_driver(struct cpuidle_driver *drv);
+
+/* governors */
+extern int cpuidle_attach_governor(struct cpuidle_device *dev);
+extern void cpuidle_detach_governor(struct cpuidle_device *dev);
+extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
+
+/* sysfs */
+extern int cpuidle_add_class_sysfs(struct sysdev_class *cls);
+extern void cpuidle_remove_class_sysfs(struct sysdev_class *cls);
+extern int cpuidle_add_driver_sysfs(struct cpuidle_device *device);
+extern void cpuidle_remove_driver_sysfs(struct cpuidle_device *device);
+extern int cpuidle_add_sysfs(struct sys_device *sysdev);
+extern void cpuidle_remove_sysfs(struct sys_device *sysdev);
+
+/**
+ * cpuidle_device_can_idle - determines if a CPU can utilize the idle loop
+ * @dev: the target CPU
+ */
+static inline int cpuidle_device_can_idle(struct cpuidle_device *dev)
+{
+	return (dev->status == CPUIDLE_STATUS_DOIDLE);
+}
+
+#endif /* __DRIVER_CPUIDLE_H */
Index: linux-2.6.22/drivers/cpuidle/driver.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/drivers/cpuidle/driver.c	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,276 @@
+/*
+ * driver.c - driver support
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/cpuidle.h>
+
+#include "cpuidle.h"
+
+LIST_HEAD(cpuidle_drivers);
+struct cpuidle_driver *cpuidle_curr_driver;
+
+
+/**
+ * cpuidle_attach_driver - attaches a driver to a CPU
+ * @dev: the target CPU
+ *
+ * Must be called with cpuidle_lock aquired.
+ */
+int cpuidle_attach_driver(struct cpuidle_device *dev)
+{
+	int ret;
+
+	if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED)
+		return -EIO;
+
+	if (!try_module_get(cpuidle_curr_driver->owner))
+		return -EINVAL;
+
+	ret = cpuidle_curr_driver->init(dev);
+	if (ret) {
+		module_put(cpuidle_curr_driver->owner);
+		printk(KERN_INFO "cpuidle: driver %s failed to attach to "
+			"cpu %d\n", cpuidle_curr_driver->name, dev->cpu);
+	} else {
+		if (dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED)
+			cpuidle_rescan_device(dev);
+		smp_wmb();
+		dev->status |= CPUIDLE_STATUS_DRIVER_ATTACHED;
+		cpuidle_add_driver_sysfs(dev);
+	}
+
+	return ret;
+}
+
+/**
+ * cpuidle_detach_govenor - detaches a driver from a CPU
+ * @dev: the target CPU
+ *
+ * Must be called with cpuidle_lock aquired.
+ */
+void cpuidle_detach_driver(struct cpuidle_device *dev)
+{
+	if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) {
+		cpuidle_remove_driver_sysfs(dev);
+		dev->status &= ~CPUIDLE_STATUS_DRIVER_ATTACHED;
+		if (cpuidle_curr_driver->exit)
+			cpuidle_curr_driver->exit(dev);
+		module_put(cpuidle_curr_driver->owner);
+	}
+}
+
+/**
+ * __cpuidle_find_driver - finds a driver of the specified name
+ * @str: the name
+ *
+ * Must be called with cpuidle_lock aquired.
+ */
+static struct cpuidle_driver * __cpuidle_find_driver(const char *str)
+{
+	struct cpuidle_driver *drv;
+
+	list_for_each_entry(drv, &cpuidle_drivers, driver_list)
+		if (!strnicmp(str, drv->name, CPUIDLE_NAME_LEN))
+			return drv;
+
+	return NULL;
+}
+
+/**
+ * cpuidle_switch_driver - changes the driver
+ * @drv: the new target driver
+ *
+ * NOTE: "drv" can be NULL to specify disabled
+ * Must be called with cpuidle_lock aquired.
+ */
+int cpuidle_switch_driver(struct cpuidle_driver *drv)
+{
+	struct cpuidle_device *dev;
+
+	if (drv == cpuidle_curr_driver)
+		return -EINVAL;
+
+	cpuidle_uninstall_idle_handler();
+
+	if (cpuidle_curr_driver)
+		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+			cpuidle_detach_driver(dev);
+
+	cpuidle_curr_driver = drv;
+
+	if (drv) {
+		int ret = 1;
+		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+			if (cpuidle_attach_driver(dev) == 0)
+				ret = 0;
+
+		/* If attach on all devices fail, switch to NULL driver */
+		if (ret)
+			cpuidle_curr_driver = NULL;
+
+		if (cpuidle_curr_driver && cpuidle_curr_governor) {
+			printk(KERN_INFO "cpuidle: using driver %s\n",
+					drv->name);
+			cpuidle_install_idle_handler();
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * cpuidle_register_driver - registers a driver
+ * @drv: the driver
+ */
+int cpuidle_register_driver(struct cpuidle_driver *drv)
+{
+	int ret = -EEXIST;
+
+	if (!drv || !drv->init)
+		return -EINVAL;
+
+	mutex_lock(&cpuidle_lock);
+	if (__cpuidle_find_driver(drv->name) == NULL) {
+		ret = 0;
+		list_add_tail(&drv->driver_list, &cpuidle_drivers);
+		if (!cpuidle_curr_driver)
+			cpuidle_switch_driver(drv);
+	}
+	mutex_unlock(&cpuidle_lock);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_register_driver);
+
+/**
+ * cpuidle_unregister_driver - unregisters a driver
+ * @drv: the driver
+ */
+void cpuidle_unregister_driver(struct cpuidle_driver *drv)
+{
+	if (!drv)
+		return;
+
+	mutex_lock(&cpuidle_lock);
+	if (drv == cpuidle_curr_driver)
+		cpuidle_switch_driver(NULL);
+	list_del(&drv->driver_list);
+	mutex_unlock(&cpuidle_lock);
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
+
+static void __cpuidle_force_redetect(struct cpuidle_device *dev)
+{
+	cpuidle_remove_driver_sysfs(dev);
+	cpuidle_curr_driver->redetect(dev);
+	cpuidle_add_driver_sysfs(dev);
+}
+
+/**
+ * cpuidle_force_redetect - redetects the idle states of a CPU
+ *
+ * @dev: the CPU to redetect
+ * @drv: the target driver
+ *
+ * Generally, the driver will call this when the supported states set has
+ * changed. (e.g. as the result of an ACPI transition to battery power)
+ */
+int cpuidle_force_redetect(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv)
+{
+	int uninstalled = 0;
+
+	mutex_lock(&cpuidle_lock);
+
+	if (drv != cpuidle_curr_driver) {
+		mutex_unlock(&cpuidle_lock);
+		return 0;
+	}
+
+	if (!(dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) ||
+	    !cpuidle_curr_driver->redetect) {
+		mutex_unlock(&cpuidle_lock);
+		return -EIO;
+	}
+
+	if (cpuidle_device_can_idle(dev)) {
+		uninstalled = 1;
+		cpuidle_uninstall_idle_handler();
+	}
+
+	__cpuidle_force_redetect(dev);
+
+	if (cpuidle_device_can_idle(dev)) {
+		cpuidle_rescan_device(dev);
+		cpuidle_install_idle_handler();
+	}
+
+	/* other devices are still ok */
+	if (uninstalled)
+		cpuidle_install_idle_handler();
+
+	mutex_unlock(&cpuidle_lock);
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_force_redetect);
+
+/**
+ * cpuidle_force_redetect_devices - redetects the idle states of all CPUs
+ *
+ * @drv: the target driver
+ *
+ * Generally, the driver will call this when the supported states set has
+ * changed. (e.g. as the result of an ACPI transition to battery power)
+ */
+int cpuidle_force_redetect_devices(struct cpuidle_driver *drv)
+{
+	struct cpuidle_device *dev;
+	int ret = 0;
+
+	mutex_lock(&cpuidle_lock);
+
+	if (drv != cpuidle_curr_driver)
+		goto out;
+
+	if (!cpuidle_curr_driver->redetect) {
+		ret = -EIO;
+		goto out;
+	}
+
+	cpuidle_uninstall_idle_handler();
+
+	list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+		__cpuidle_force_redetect(dev);
+
+	cpuidle_install_idle_handler();
+out:
+	mutex_unlock(&cpuidle_lock);
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_force_redetect_devices);
+
+/**
+ * cpuidle_get_bm_activity - determines if BM activity has occured
+ */
+int cpuidle_get_bm_activity(void)
+{
+	if (cpuidle_curr_driver->bm_check)
+		return cpuidle_curr_driver->bm_check();
+	else
+		return 0;
+}
+EXPORT_SYMBOL_GPL(cpuidle_get_bm_activity);
+
Index: linux-2.6.22/drivers/cpuidle/governor.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/drivers/cpuidle/governor.c	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,187 @@
+/*
+ * governor.c - governor support
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/cpuidle.h>
+
+#include "cpuidle.h"
+
+LIST_HEAD(cpuidle_governors);
+struct cpuidle_governor *cpuidle_curr_governor;
+
+
+/**
+ * cpuidle_attach_governor - attaches a governor to a CPU
+ * @dev: the target CPU
+ *
+ * Must be called with cpuidle_lock aquired.
+ */
+int cpuidle_attach_governor(struct cpuidle_device *dev)
+{
+	int ret = 0;
+
+	if(dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED)
+		return -EIO;
+
+	if (!try_module_get(cpuidle_curr_governor->owner))
+		return -EINVAL;
+
+	if (cpuidle_curr_governor->init)
+		ret = cpuidle_curr_governor->init(dev);
+	if (ret) {
+		module_put(cpuidle_curr_governor->owner);
+		printk(KERN_ERR "cpuidle: governor %s failed to attach to cpu %d\n",
+			cpuidle_curr_governor->name, dev->cpu);
+	} else {
+		if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED)
+			cpuidle_rescan_device(dev);
+		smp_wmb();
+		dev->status |= CPUIDLE_STATUS_GOVERNOR_ATTACHED;
+	}
+
+	return ret;
+}
+
+/**
+ * cpuidle_detach_govenor - detaches a governor from a CPU
+ * @dev: the target CPU
+ *
+ * Must be called with cpuidle_lock aquired.
+ */
+void cpuidle_detach_governor(struct cpuidle_device *dev)
+{
+	if (dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED) {
+		dev->status &= ~CPUIDLE_STATUS_GOVERNOR_ATTACHED;
+		if (cpuidle_curr_governor->exit)
+			cpuidle_curr_governor->exit(dev);
+		module_put(cpuidle_curr_governor->owner);
+	}
+}
+
+/**
+ * __cpuidle_find_governor - finds a governor of the specified name
+ * @str: the name
+ *
+ * Must be called with cpuidle_lock aquired.
+ */
+static struct cpuidle_governor * __cpuidle_find_governor(const char *str)
+{
+	struct cpuidle_governor *gov;
+
+	list_for_each_entry(gov, &cpuidle_governors, governor_list)
+		if (!strnicmp(str, gov->name, CPUIDLE_NAME_LEN))
+			return gov;
+
+	return NULL;
+}
+
+/**
+ * cpuidle_switch_governor - changes the governor
+ * @gov: the new target governor
+ *
+ * NOTE: "gov" can be NULL to specify disabled
+ * Must be called with cpuidle_lock aquired.
+ */
+int cpuidle_switch_governor(struct cpuidle_governor *gov)
+{
+	struct cpuidle_device *dev;
+
+	if (gov == cpuidle_curr_governor)
+		return -EINVAL;
+
+	cpuidle_uninstall_idle_handler();
+
+	if (cpuidle_curr_governor)
+		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+			cpuidle_detach_governor(dev);
+
+	cpuidle_curr_governor = gov;
+
+	if (gov) {
+		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+			cpuidle_attach_governor(dev);
+		if (cpuidle_curr_driver)
+			cpuidle_install_idle_handler();
+		printk(KERN_INFO "cpuidle: using governor %s\n", gov->name);
+	}
+
+	return 0;
+}
+
+/**
+ * cpuidle_register_governor - registers a governor
+ * @gov: the governor
+ */
+int cpuidle_register_governor(struct cpuidle_governor *gov)
+{
+	int ret = -EEXIST;
+
+	if (!gov || !gov->select)
+		return -EINVAL;
+
+	mutex_lock(&cpuidle_lock);
+	if (__cpuidle_find_governor(gov->name) == NULL) {
+		ret = 0;
+		list_add_tail(&gov->governor_list, &cpuidle_governors);
+		if (!cpuidle_curr_governor ||
+		    cpuidle_curr_governor->rating < gov->rating)
+			cpuidle_switch_governor(gov);
+	}
+	mutex_unlock(&cpuidle_lock);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_register_governor);
+
+/**
+ * cpuidle_replace_governor - find a replacement governor
+ * @exclude_rating: the rating that will be skipped while looking for
+ * new governor.
+ */
+struct cpuidle_governor *cpuidle_replace_governor(int exclude_rating)
+{
+	struct cpuidle_governor *gov;
+	struct cpuidle_governor *ret_gov = NULL;
+	unsigned int max_rating = 0;
+
+	list_for_each_entry(gov, &cpuidle_governors, governor_list) {
+		if (gov->rating == exclude_rating)
+			continue;
+		if (gov->rating > max_rating) {
+			max_rating = gov->rating;
+			ret_gov = gov;
+		}
+	}
+
+	return ret_gov;
+}
+
+/**
+ * cpuidle_unregister_governor - unregisters a governor
+ * @gov: the governor
+ */
+void cpuidle_unregister_governor(struct cpuidle_governor *gov)
+{
+	if (!gov)
+		return;
+
+	mutex_lock(&cpuidle_lock);
+	if (gov == cpuidle_curr_governor) {
+		struct cpuidle_governor *new_gov;
+		new_gov = cpuidle_replace_governor(gov->rating);
+		cpuidle_switch_governor(new_gov);
+	}
+	list_del(&gov->governor_list);
+	mutex_unlock(&cpuidle_lock);
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_unregister_governor);
Index: linux-2.6.22/drivers/cpuidle/governors/Makefile
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/drivers/cpuidle/governors/Makefile	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,6 @@
+#
+# Makefile for cpuidle governors.
+#
+
+obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o
+obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o
Index: linux-2.6.22/drivers/cpuidle/governors/ladder.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/drivers/cpuidle/governors/ladder.c	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,228 @@
+/*
+ * ladder.c - the residency ladder algorithm
+ *
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/latency.h>
+#include <linux/moduleparam.h>
+#include <linux/jiffies.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+#define PROMOTION_COUNT 4
+#define DEMOTION_COUNT 1
+
+/*
+ * bm_history -- bit-mask with a bit per jiffy of bus-master activity
+ * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
+ * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
+ * 100 HZ: 0x0000000F: 4 jiffies = 40ms
+ * reduce history for more aggressive entry into C3
+ */
+static unsigned int bm_history __read_mostly =
+    (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
+module_param(bm_history, uint, 0644);
+
+struct ladder_device_state {
+	struct {
+		u32 promotion_count;
+		u32 demotion_count;
+		u32 promotion_time;
+		u32 demotion_time;
+		u32 bm;
+	} threshold;
+	struct {
+		int promotion_count;
+		int demotion_count;
+	} stats;
+};
+
+struct ladder_device {
+	struct ladder_device_state states[CPUIDLE_STATE_MAX];
+	unsigned int bm_check:1;
+	unsigned long bm_check_timestamp;
+	unsigned long bm_activity; /* FIXME: bm activity should be global */
+	int last_state_idx;
+};
+
+/**
+ * ladder_do_selection - prepares private data for a state change
+ * @ldev: the ladder device
+ * @old_idx: the current state index
+ * @new_idx: the new target state index
+ */
+static inline void ladder_do_selection(struct ladder_device *ldev,
+				       int old_idx, int new_idx)
+{
+	ldev->states[old_idx].stats.promotion_count = 0;
+	ldev->states[old_idx].stats.demotion_count = 0;
+	ldev->last_state_idx = new_idx;
+}
+
+/**
+ * ladder_select_state - selects the next state to enter
+ * @dev: the CPU
+ */
+static int ladder_select_state(struct cpuidle_device *dev)
+{
+	struct ladder_device *ldev = dev->governor_data;
+	struct ladder_device_state *last_state;
+	int last_residency, last_idx = ldev->last_state_idx;
+
+	if (unlikely(!ldev))
+		return 0;
+
+	last_state = &ldev->states[last_idx];
+
+	/* demote if within BM threshold */
+	if (ldev->bm_check) {
+		unsigned long diff;
+
+		diff = jiffies - ldev->bm_check_timestamp;
+		if (diff > 31)
+			diff = 31;
+
+		ldev->bm_activity <<= diff;
+		if (cpuidle_get_bm_activity())
+			ldev->bm_activity |= ((1 << diff) - 1);
+
+		ldev->bm_check_timestamp = jiffies;
+		if ((last_idx > 0) &&
+		    (last_state->threshold.bm & ldev->bm_activity)) {
+			ladder_do_selection(ldev, last_idx, last_idx - 1);
+			return last_idx - 1;
+		}
+	}
+
+	if (dev->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID)
+		last_residency = cpuidle_get_last_residency(dev) - dev->states[last_idx].exit_latency;
+	else
+		last_residency = last_state->threshold.promotion_time + 1;
+
+	/* consider promotion */
+	if (last_idx < dev->state_count - 1 &&
+	    last_residency > last_state->threshold.promotion_time &&
+	    dev->states[last_idx + 1].exit_latency <= system_latency_constraint()) {
+		last_state->stats.promotion_count++;
+		last_state->stats.demotion_count = 0;
+		if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
+			ladder_do_selection(ldev, last_idx, last_idx + 1);
+			return last_idx + 1;
+		}
+	}
+
+	/* consider demotion */
+	if (last_idx > 0 &&
+	    last_residency < last_state->threshold.demotion_time) {
+		last_state->stats.demotion_count++;
+		last_state->stats.promotion_count = 0;
+		if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
+			ladder_do_selection(ldev, last_idx, last_idx - 1);
+			return last_idx - 1;
+		}
+	}
+
+	/* otherwise remain at the current state */
+	return last_idx;
+}
+
+/**
+ * ladder_scan_device - scans a CPU's states and does setup
+ * @dev: the CPU
+ */
+static void ladder_scan_device(struct cpuidle_device *dev)
+{
+	int i, bm_check = 0;
+	struct ladder_device *ldev = dev->governor_data;
+	struct ladder_device_state *lstate;
+	struct cpuidle_state *state;
+
+	ldev->last_state_idx = 0;
+	ldev->bm_check_timestamp = 0;
+	ldev->bm_activity = 0;
+
+	for (i = 0; i < dev->state_count; i++) {
+		state = &dev->states[i];
+		lstate = &ldev->states[i];
+
+		lstate->stats.promotion_count = 0;
+		lstate->stats.demotion_count = 0;
+
+		lstate->threshold.promotion_count = PROMOTION_COUNT;
+		lstate->threshold.demotion_count = DEMOTION_COUNT;
+
+		if (i < dev->state_count - 1)
+			lstate->threshold.promotion_time = state->exit_latency;
+		if (i > 0)
+			lstate->threshold.demotion_time = state->exit_latency;
+		if (state->flags & CPUIDLE_FLAG_CHECK_BM) {
+			lstate->threshold.bm = bm_history;
+			bm_check = 1;
+		} else
+			lstate->threshold.bm = 0;
+	}
+
+	ldev->bm_check = bm_check;
+}
+
+/**
+ * ladder_init_device - initializes a CPU-instance
+ * @dev: the CPU
+ */
+static int ladder_init_device(struct cpuidle_device *dev)
+{
+	dev->governor_data = kmalloc(sizeof(struct ladder_device), GFP_KERNEL);
+
+	return !dev->governor_data;
+}
+
+/**
+ * ladder_exit_device - exits a CPU-instance
+ * @dev: the CPU
+ */
+static void ladder_exit_device(struct cpuidle_device *dev)
+{
+	kfree(dev->governor_data);
+}
+
+static struct cpuidle_governor ladder_governor = {
+	.name =		"ladder",
+	.rating =	10,
+	.init =		ladder_init_device,
+	.exit =		ladder_exit_device,
+	.scan =		ladder_scan_device,
+	.select =	ladder_select_state,
+	.owner =	THIS_MODULE,
+};
+
+/**
+ * init_ladder - initializes the governor
+ */
+static int __init init_ladder(void)
+{
+	return cpuidle_register_governor(&ladder_governor);
+}
+
+/**
+ * exit_ladder - exits the governor
+ */
+static void __exit exit_ladder(void)
+{
+	cpuidle_unregister_governor(&ladder_governor);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_ladder);
+module_exit(exit_ladder);
Index: linux-2.6.22/drivers/cpuidle/sysfs.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/drivers/cpuidle/sysfs.c	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,393 @@
+/*
+ * sysfs.c - sysfs support
+ *
+ * (C) 2006-2007 Shaohua Li <shaohua.li@intel.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/sysfs.h>
+#include <linux/cpu.h>
+
+#include "cpuidle.h"
+
+static unsigned int sysfs_switch;
+static int __init cpuidle_sysfs_setup(char *unused)
+{
+	sysfs_switch = 1;
+	return 1;
+}
+__setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup);
+
+static ssize_t show_available_drivers(struct sys_device *dev, char *buf)
+{
+	ssize_t i = 0;
+	struct cpuidle_driver *tmp;
+
+	mutex_lock(&cpuidle_lock);
+	list_for_each_entry(tmp, &cpuidle_drivers, driver_list) {
+		if (i >= (ssize_t)((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
+			goto out;
+		i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
+	}
+out:
+	i+= sprintf(&buf[i], "\n");
+	mutex_unlock(&cpuidle_lock);
+	return i;
+}
+
+static ssize_t show_available_governors(struct sys_device *dev, char *buf)
+{
+	ssize_t i = 0;
+	struct cpuidle_governor *tmp;
+
+	mutex_lock(&cpuidle_lock);
+	list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
+		if (i >= (ssize_t)((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
+			goto out;
+		i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
+	}
+	if (list_empty(&cpuidle_governors))
+		i+= sprintf(&buf[i], "no governors");
+out:
+	i+= sprintf(&buf[i], "\n");
+	mutex_unlock(&cpuidle_lock);
+	return i;
+}
+
+static ssize_t show_current_driver(struct sys_device *dev, char *buf)
+{
+	ssize_t ret;
+
+	mutex_lock(&cpuidle_lock);
+	ret = sprintf(buf, "%s\n", cpuidle_curr_driver->name);
+	mutex_unlock(&cpuidle_lock);
+	return ret;
+}
+
+static ssize_t store_current_driver(struct sys_device *dev,
+	const char *buf, size_t count)
+{
+	char str[CPUIDLE_NAME_LEN];
+	int len = count;
+	struct cpuidle_driver *tmp, *found = NULL;
+
+	if (len > CPUIDLE_NAME_LEN)
+		len = CPUIDLE_NAME_LEN;
+
+	if (sscanf(buf, "%s", str) != 1)
+		return -EINVAL;
+
+	mutex_lock(&cpuidle_lock);
+	list_for_each_entry(tmp, &cpuidle_drivers, driver_list) {
+		if (strncmp(tmp->name, str, CPUIDLE_NAME_LEN) == 0) {
+			found = tmp;
+			break;
+		}
+	}
+	if (found)
+		cpuidle_switch_driver(found);
+	mutex_unlock(&cpuidle_lock);
+
+	return count;
+}
+
+static ssize_t show_current_governor(struct sys_device *dev, char *buf)
+{
+	ssize_t i;
+
+	mutex_lock(&cpuidle_lock);
+	if (cpuidle_curr_governor)
+		i = sprintf(buf, "%s\n", cpuidle_curr_governor->name);
+	else
+		i = sprintf(buf, "no governor\n");
+	mutex_unlock(&cpuidle_lock);
+
+	return i;
+}
+
+static ssize_t store_current_governor(struct sys_device *dev,
+	const char *buf, size_t count)
+{
+	char str[CPUIDLE_NAME_LEN];
+	int len = count;
+	struct cpuidle_governor *tmp, *found = NULL;
+
+	if (len > CPUIDLE_NAME_LEN)
+		len = CPUIDLE_NAME_LEN;
+
+	if (sscanf(buf, "%s", str) != 1)
+		return -EINVAL;
+
+	mutex_lock(&cpuidle_lock);
+	list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
+		if (strncmp(tmp->name, str, CPUIDLE_NAME_LEN) == 0) {
+			found = tmp;
+			break;
+		}
+	}
+	if (found)
+		cpuidle_switch_governor(found);
+	mutex_unlock(&cpuidle_lock);
+
+	return count;
+}
+
+static SYSDEV_ATTR(current_driver_ro, 0444, show_current_driver, NULL);
+static SYSDEV_ATTR(current_governor_ro, 0444, show_current_governor, NULL);
+
+static struct attribute *cpuclass_default_attrs[] = {
+	&attr_current_driver_ro.attr,
+	&attr_current_governor_ro.attr,
+	NULL
+};
+
+static SYSDEV_ATTR(available_drivers, 0444, show_available_drivers, NULL);
+static SYSDEV_ATTR(available_governors, 0444, show_available_governors, NULL);
+static SYSDEV_ATTR(current_driver, 0644, show_current_driver,
+	store_current_driver);
+static SYSDEV_ATTR(current_governor, 0644, show_current_governor,
+	store_current_governor);
+
+static struct attribute *cpuclass_switch_attrs[] = {
+	&attr_available_drivers.attr,
+	&attr_available_governors.attr,
+	&attr_current_driver.attr,
+	&attr_current_governor.attr,
+	NULL
+};
+
+static struct attribute_group cpuclass_attr_group = {
+	.attrs = cpuclass_default_attrs,
+	.name = "cpuidle",
+};
+
+/**
+ * cpuidle_add_class_sysfs - add CPU global sysfs attributes
+ */
+int cpuidle_add_class_sysfs(struct sysdev_class *cls)
+{
+	if (sysfs_switch)
+		cpuclass_attr_group.attrs = cpuclass_switch_attrs;
+
+	return sysfs_create_group(&cls->kset.kobj, &cpuclass_attr_group);
+}
+
+/**
+ * cpuidle_remove_class_sysfs - remove CPU global sysfs attributes
+ */
+void cpuidle_remove_class_sysfs(struct sysdev_class *cls)
+{
+	sysfs_remove_group(&cls->kset.kobj, &cpuclass_attr_group);
+}
+
+struct cpuidle_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct cpuidle_device *, char *);
+	ssize_t (*store)(struct cpuidle_device *, const char *, size_t count);
+};
+
+#define define_one_ro(_name, show) \
+	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
+#define define_one_rw(_name, show, store) \
+	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
+
+#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj)
+#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
+static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf)
+{
+	int ret = -EIO;
+	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+
+	if (cattr->show) {
+		mutex_lock(&cpuidle_lock);
+		ret = cattr->show(dev, buf);
+		mutex_unlock(&cpuidle_lock);
+	}
+	return ret;
+}
+
+static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr,
+		     const char * buf, size_t count)
+{
+	int ret = -EIO;
+	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+
+	if (cattr->store) {
+		mutex_lock(&cpuidle_lock);
+		ret = cattr->store(dev, buf, count);
+		mutex_unlock(&cpuidle_lock);
+	}
+	return ret;
+}
+
+static struct sysfs_ops cpuidle_sysfs_ops = {
+	.show = cpuidle_show,
+	.store = cpuidle_store,
+};
+
+static void cpuidle_sysfs_release(struct kobject *kobj)
+{
+	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+
+	complete(&dev->kobj_unregister);
+}
+
+static struct kobj_type ktype_cpuidle = {
+	.sysfs_ops = &cpuidle_sysfs_ops,
+	.release = cpuidle_sysfs_release,
+};
+
+struct cpuidle_state_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct cpuidle_state *, char *);
+	ssize_t (*store)(struct cpuidle_state *, const char *, size_t);
+};
+
+#define define_one_state_ro(_name, show) \
+static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
+
+#define define_show_state_function(_name) \
+static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \
+{ \
+	return sprintf(buf, "%d\n", state->_name);\
+}
+
+define_show_state_function(exit_latency)
+define_show_state_function(power_usage)
+define_show_state_function(usage)
+define_show_state_function(time)
+define_one_state_ro(latency, show_state_exit_latency);
+define_one_state_ro(power, show_state_power_usage);
+define_one_state_ro(usage, show_state_usage);
+define_one_state_ro(time, show_state_time);
+
+static struct attribute *cpuidle_state_default_attrs[] = {
+	&attr_latency.attr,
+	&attr_power.attr,
+	&attr_usage.attr,
+	&attr_time.attr,
+	NULL
+};
+
+#define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj)
+#define kobj_to_state(k) (kobj_to_state_obj(k)->state)
+#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
+static ssize_t cpuidle_state_show(struct kobject * kobj,
+	struct attribute * attr ,char * buf)
+{
+	int ret = -EIO;
+	struct cpuidle_state *state = kobj_to_state(kobj);
+	struct cpuidle_state_attr * cattr = attr_to_stateattr(attr);
+
+	if (cattr->show)
+		ret = cattr->show(state, buf);
+
+	return ret;
+}
+
+static struct sysfs_ops cpuidle_state_sysfs_ops = {
+	.show = cpuidle_state_show,
+};
+
+static void cpuidle_state_sysfs_release(struct kobject *kobj)
+{
+	struct cpuidle_state_kobj *state_obj = kobj_to_state_obj(kobj);
+
+	complete(&state_obj->kobj_unregister);
+}
+
+static struct kobj_type ktype_state_cpuidle = {
+	.sysfs_ops = &cpuidle_state_sysfs_ops,
+	.default_attrs = cpuidle_state_default_attrs,
+	.release = cpuidle_state_sysfs_release,
+};
+
+static void inline cpuidle_free_state_kobj(struct cpuidle_device *device, int i)
+{
+	kobject_unregister(&device->kobjs[i]->kobj);
+	wait_for_completion(&device->kobjs[i]->kobj_unregister);
+	kfree(device->kobjs[i]);
+	device->kobjs[i] = NULL;
+}
+
+/**
+ * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes
+ * @device: the target device
+ */
+int cpuidle_add_driver_sysfs(struct cpuidle_device *device)
+{
+	int i, ret = -ENOMEM;
+	struct cpuidle_state_kobj *kobj;
+
+	/* state statistics */
+	for (i = 0; i < device->state_count; i++) {
+		kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
+		if (!kobj)
+			goto error_state;
+		kobj->state = &device->states[i];
+		init_completion(&kobj->kobj_unregister);
+
+		kobj->kobj.parent = &device->kobj;
+		kobj->kobj.ktype = &ktype_state_cpuidle;
+		kobject_set_name(&kobj->kobj, "state%d", i);
+		ret = kobject_register(&kobj->kobj);
+		if (ret) {
+			kfree(kobj);
+			goto error_state;
+		}
+		device->kobjs[i] = kobj;
+	}
+
+	return 0;
+
+error_state:
+	for (i = i - 1; i >= 0; i--)
+		cpuidle_free_state_kobj(device, i);
+	return ret;
+}
+
+/**
+ * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes
+ * @device: the target device
+ */
+void cpuidle_remove_driver_sysfs(struct cpuidle_device *device)
+{
+	int i;
+
+	for (i = 0; i < device->state_count; i++)
+		cpuidle_free_state_kobj(device, i);
+}
+
+/**
+ * cpuidle_add_sysfs - creates a sysfs instance for the target device
+ * @sysdev: the target device
+ */
+int cpuidle_add_sysfs(struct sys_device *sysdev)
+{
+	int cpu = sysdev->id;
+	struct cpuidle_device *dev;
+
+	dev = per_cpu(cpuidle_devices, cpu);
+	dev->kobj.parent = &sysdev->kobj;
+	dev->kobj.ktype = &ktype_cpuidle;
+	kobject_set_name(&dev->kobj, "%s", "cpuidle");
+	return kobject_register(&dev->kobj);
+}
+
+/**
+ * cpuidle_remove_sysfs - deletes a sysfs instance on the target device
+ * @sysdev: the target device
+ */
+void cpuidle_remove_sysfs(struct sys_device *sysdev)
+{
+	int cpu = sysdev->id;
+	struct cpuidle_device *dev;
+
+	dev = per_cpu(cpuidle_devices, cpu);
+	kobject_unregister(&dev->kobj);
+}
Index: linux-2.6.22/include/linux/cpuidle.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/include/linux/cpuidle.h	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,190 @@
+/*
+ * cpuidle.h - a generic framework for CPU idle power management
+ *
+ * (C) 2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *          Shaohua Li <shaohua.li@intel.com>
+ *          Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#ifndef _LINUX_CPUIDLE_H
+#define _LINUX_CPUIDLE_H
+
+#include <linux/percpu.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/kobject.h>
+#include <linux/completion.h>
+
+#define CPUIDLE_STATE_MAX	8
+#define CPUIDLE_NAME_LEN	16
+
+struct cpuidle_device;
+
+
+/****************************
+ * CPUIDLE DEVICE INTERFACE *
+ ****************************/
+
+struct cpuidle_state {
+	char		name[CPUIDLE_NAME_LEN];
+	void		*driver_data;
+
+	unsigned int	flags;
+	unsigned int	exit_latency; /* in US */
+	unsigned int	power_usage; /* in mW */
+	unsigned int	target_residency; /* in US */
+
+	unsigned int	usage;
+	unsigned int	time; /* in US */
+
+	int (*enter)	(struct cpuidle_device *dev,
+			 struct cpuidle_state *state);
+};
+
+/* Idle State Flags */
+#define CPUIDLE_FLAG_TIME_VALID	(0x01) /* is residency time measurable? */
+#define CPUIDLE_FLAG_CHECK_BM	(0x02) /* BM activity will exit state */
+#define CPUIDLE_FLAG_SHALLOW	(0x10) /* low latency, minimal savings */
+#define CPUIDLE_FLAG_BALANCED	(0x20) /* medium latency, moderate savings */
+#define CPUIDLE_FLAG_DEEP	(0x40) /* high latency, large savings */
+
+#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
+
+/**
+ * cpuidle_get_statedata - retrieves private driver state data
+ * @state: the state
+ */
+static inline void * cpuidle_get_statedata(struct cpuidle_state *state)
+{
+	return state->driver_data;
+}
+
+/**
+ * cpuidle_set_statedata - stores private driver state data
+ * @state: the state
+ * @data: the private data
+ */
+static inline void
+cpuidle_set_statedata(struct cpuidle_state *state, void *data)
+{
+	state->driver_data = data;
+}
+
+struct cpuidle_state_kobj {
+	struct cpuidle_state *state;
+	struct completion kobj_unregister;
+	struct kobject kobj;
+};
+
+struct cpuidle_device {
+	unsigned int		status;
+	int			cpu;
+
+	int			last_residency;
+	int			state_count;
+	struct cpuidle_state	states[CPUIDLE_STATE_MAX];
+	struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
+	struct cpuidle_state	*last_state;
+
+	struct list_head 	device_list;
+	struct kobject		kobj;
+	struct completion	kobj_unregister;
+	void			*governor_data;
+};
+
+DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+
+/* Device Status Flags */
+#define CPUIDLE_STATUS_DETECTED		 (0x1)
+#define CPUIDLE_STATUS_DRIVER_ATTACHED	 (0x2)
+#define CPUIDLE_STATUS_GOVERNOR_ATTACHED (0x4)
+#define CPUIDLE_STATUS_DOIDLE		 (CPUIDLE_STATUS_DETECTED | \
+					  CPUIDLE_STATUS_DRIVER_ATTACHED | \
+					  CPUIDLE_STATUS_GOVERNOR_ATTACHED)
+
+/**
+ * cpuidle_get_last_residency - retrieves the last state's residency time
+ * @dev: the target CPU
+ *
+ * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_VALID isn't set
+ */
+static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
+{
+	return dev->last_residency;
+}
+
+
+/****************************
+ * CPUIDLE DRIVER INTERFACE *
+ ****************************/
+
+struct cpuidle_driver {
+	char			name[CPUIDLE_NAME_LEN];
+	struct list_head 	driver_list;
+
+	int  (*init)		(struct cpuidle_device *dev);
+	void (*exit)		(struct cpuidle_device *dev);
+	int  (*redetect)	(struct cpuidle_device *dev);
+
+	int  (*bm_check)	(void);
+
+	struct module 		*owner;
+};
+
+#ifdef CONFIG_CPU_IDLE
+
+extern int cpuidle_register_driver(struct cpuidle_driver *drv);
+extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
+extern int cpuidle_force_redetect(struct cpuidle_device *dev, struct cpuidle_driver *drv);
+extern int cpuidle_force_redetect_devices(struct cpuidle_driver *drv);
+
+#else
+
+static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
+{return 0;}
+static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { }
+static inline int cpuidle_force_redetect(struct cpuidle_device *dev, struct cpuidle_driver *drv)
+{return 0;}
+static inline int cpuidle_force_redetect_devices(struct cpuidle_driver *drv)
+{return 0;}
+
+#endif
+
+/******************************
+ * CPUIDLE GOVERNOR INTERFACE *
+ ******************************/
+
+struct cpuidle_governor {
+	char			name[CPUIDLE_NAME_LEN];
+	struct list_head 	governor_list;
+	unsigned int		rating;
+
+	int  (*init)		(struct cpuidle_device *dev);
+	void (*exit)		(struct cpuidle_device *dev);
+	void (*scan)		(struct cpuidle_device *dev);
+
+	int  (*select)		(struct cpuidle_device *dev);
+	void (*reflect)		(struct cpuidle_device *dev);
+
+	struct module 		*owner;
+};
+
+#ifdef CONFIG_CPU_IDLE
+
+extern int cpuidle_register_governor(struct cpuidle_governor *gov);
+extern void cpuidle_unregister_governor(struct cpuidle_governor *gov);
+extern int cpuidle_get_bm_activity(void);
+
+#else
+
+static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
+{return 0;}
+static inline void cpuidle_unregister_governor(struct cpuidle_governor *gov) { }
+static inline int cpuidle_get_bm_activity(void)
+{return 0;}
+
+#endif
+
+#endif /* _LINUX_CPUIDLE_H */
Index: linux-2.6.22/drivers/acpi/processor_idle.c
===================================================================
--- linux-2.6.22.orig/drivers/acpi/processor_idle.c	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/drivers/acpi/processor_idle.c	2007-07-15 17:13:52.000000000 +0200
@@ -40,6 +40,7 @@
 #include <linux/sched.h>	/* need_resched() */
 #include <linux/latency.h>
 #include <linux/clockchips.h>
+#include <linux/cpuidle.h>
 
 /*
  * Include the apic definitions for x86 to have the APIC timer related defines
@@ -62,26 +63,34 @@
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
 ACPI_MODULE_NAME("processor_idle");
 #define ACPI_PROCESSOR_FILE_POWER	"power"
-#define US_TO_PM_TIMER_TICKS(t)		((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
-#define PM_TIMER_TICK_NS		(1000000000ULL/PM_TIMER_FREQUENCY)
-#define C2_OVERHEAD			4	/* 1us (3.579 ticks per us) */
-#define C3_OVERHEAD			4	/* 1us (3.579 ticks per us) */
-static void (*pm_idle_save) (void) __read_mostly;
-module_param(max_cstate, uint, 0644);
+#define PM_TIMER_TICKS_TO_US(p)		(((p) * 1000)/(PM_TIMER_FREQUENCY/1000))
+#define C2_OVERHEAD			1	/* 1us */
+#define C3_OVERHEAD			1	/* 1us */
+
+void acpi_max_cstate_changed(void)
+{
+	/* Driver will reset devices' max cstate limit */
+	cpuidle_force_redetect_devices(&acpi_idle_driver);
+}
+
+static int change_max_cstate(const char *val, struct kernel_param *kp)
+{
+	int max;
+
+	max = simple_strtol(val, NULL, 0);
+	if (!max)
+		return -EINVAL;
+	max_cstate = max;
+	if (acpi_do_set_cstate_limit)
+		acpi_do_set_cstate_limit();
+	return 0;
+}
+
+module_param_call(max_cstate, change_max_cstate, param_get_uint, &max_cstate, 0644);
 
 static unsigned int nocst __read_mostly;
 module_param(nocst, uint, 0000);
 
-/*
- * bm_history -- bit-mask with a bit per jiffy of bus-master activity
- * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
- * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
- * 100 HZ: 0x0000000F: 4 jiffies = 40ms
- * reduce history for more aggressive entry into C3
- */
-static unsigned int bm_history __read_mostly =
-    (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
-module_param(bm_history, uint, 0644);
 /* --------------------------------------------------------------------------
                                 Power Management
    -------------------------------------------------------------------------- */
@@ -166,88 +176,6 @@ static struct dmi_system_id __cpuinitdat
 	{},
 };
 
-static inline u32 ticks_elapsed(u32 t1, u32 t2)
-{
-	if (t2 >= t1)
-		return (t2 - t1);
-	else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
-		return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
-	else
-		return ((0xFFFFFFFF - t1) + t2);
-}
-
-static void
-acpi_processor_power_activate(struct acpi_processor *pr,
-			      struct acpi_processor_cx *new)
-{
-	struct acpi_processor_cx *old;
-
-	if (!pr || !new)
-		return;
-
-	old = pr->power.state;
-
-	if (old)
-		old->promotion.count = 0;
-	new->demotion.count = 0;
-
-	/* Cleanup from old state. */
-	if (old) {
-		switch (old->type) {
-		case ACPI_STATE_C3:
-			/* Disable bus master reload */
-			if (new->type != ACPI_STATE_C3 && pr->flags.bm_check)
-				acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
-			break;
-		}
-	}
-
-	/* Prepare to use new state. */
-	switch (new->type) {
-	case ACPI_STATE_C3:
-		/* Enable bus master reload */
-		if (old->type != ACPI_STATE_C3 && pr->flags.bm_check)
-			acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
-		break;
-	}
-
-	pr->power.state = new;
-
-	return;
-}
-
-static void acpi_safe_halt(void)
-{
-	current_thread_info()->status &= ~TS_POLLING;
-	/*
-	 * TS_POLLING-cleared state must be visible before we
-	 * test NEED_RESCHED:
-	 */
-	smp_mb();
-	if (!need_resched())
-		safe_halt();
-	current_thread_info()->status |= TS_POLLING;
-}
-
-static atomic_t c3_cpu_count;
-
-/* Common C-state entry for C2, C3, .. */
-static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
-{
-	if (cstate->space_id == ACPI_CSTATE_FFH) {
-		/* Call into architectural FFH based C-state */
-		acpi_processor_ffh_cstate_enter(cstate);
-	} else {
-		int unused;
-		/* IO port based C-state */
-		inb(cstate->address);
-		/* Dummy wait op - must do something useless after P_LVL2 read
-		   because chipsets cannot guarantee that STPCLK# signal
-		   gets asserted in time to freeze execution properly. */
-		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
-	}
-}
-
 #ifdef ARCH_APICTIMER_STOPS_ON_C3
 
 /*
@@ -275,21 +203,12 @@ static void acpi_timer_check_state(int s
 
 static void acpi_propagate_timer_broadcast(struct acpi_processor *pr)
 {
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
 	unsigned long reason;
 
 	reason = pr->power.timer_broadcast_on_state < INT_MAX ?
 		CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;
 
 	clockevents_notify(reason, &pr->id);
-#else
-	cpumask_t mask = cpumask_of_cpu(pr->id);
-
-	if (pr->power.timer_broadcast_on_state < INT_MAX)
-		on_each_cpu(switch_APIC_timer_to_ipi, &mask, 1, 1);
-	else
-		on_each_cpu(switch_ipi_to_APIC_timer, &mask, 1, 1);
-#endif
 }
 
 /* Power(C) State timer broadcast control */
@@ -297,8 +216,6 @@ static void acpi_state_timer_broadcast(s
 				       struct acpi_processor_cx *cx,
 				       int broadcast)
 {
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
-
 	int state = cx - pr->power.states;
 
 	if (state >= pr->power.timer_broadcast_on_state) {
@@ -308,7 +225,6 @@ static void acpi_state_timer_broadcast(s
 			CLOCK_EVT_NOTIFY_BROADCAST_EXIT;
 		clockevents_notify(reason, &pr->id);
 	}
-#endif
 }
 
 #else
@@ -323,395 +239,6 @@ static void acpi_state_timer_broadcast(s
 }
 
 #endif
-
-static void acpi_processor_idle(void)
-{
-	struct acpi_processor *pr = NULL;
-	struct acpi_processor_cx *cx = NULL;
-	struct acpi_processor_cx *next_state = NULL;
-	int sleep_ticks = 0;
-	u32 t1, t2 = 0;
-
-	/*
-	 * Interrupts must be disabled during bus mastering calculations and
-	 * for C2/C3 transitions.
-	 */
-	local_irq_disable();
-
-	pr = processors[smp_processor_id()];
-	if (!pr) {
-		local_irq_enable();
-		return;
-	}
-
-	/*
-	 * Check whether we truly need to go idle, or should
-	 * reschedule:
-	 */
-	if (unlikely(need_resched())) {
-		local_irq_enable();
-		return;
-	}
-
-	cx = pr->power.state;
-	if (!cx) {
-		if (pm_idle_save)
-			pm_idle_save();
-		else
-			acpi_safe_halt();
-		return;
-	}
-
-	/*
-	 * Check BM Activity
-	 * -----------------
-	 * Check for bus mastering activity (if required), record, and check
-	 * for demotion.
-	 */
-	if (pr->flags.bm_check) {
-		u32 bm_status = 0;
-		unsigned long diff = jiffies - pr->power.bm_check_timestamp;
-
-		if (diff > 31)
-			diff = 31;
-
-		pr->power.bm_activity <<= diff;
-
-		acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
-		if (bm_status) {
-			pr->power.bm_activity |= 0x1;
-			acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
-		}
-		/*
-		 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
-		 * the true state of bus mastering activity; forcing us to
-		 * manually check the BMIDEA bit of each IDE channel.
-		 */
-		else if (errata.piix4.bmisx) {
-			if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
-			    || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
-				pr->power.bm_activity |= 0x1;
-		}
-
-		pr->power.bm_check_timestamp = jiffies;
-
-		/*
-		 * If bus mastering is or was active this jiffy, demote
-		 * to avoid a faulty transition.  Note that the processor
-		 * won't enter a low-power state during this call (to this
-		 * function) but should upon the next.
-		 *
-		 * TBD: A better policy might be to fallback to the demotion
-		 *      state (use it for this quantum only) istead of
-		 *      demoting -- and rely on duration as our sole demotion
-		 *      qualification.  This may, however, introduce DMA
-		 *      issues (e.g. floppy DMA transfer overrun/underrun).
-		 */
-		if ((pr->power.bm_activity & 0x1) &&
-		    cx->demotion.threshold.bm) {
-			local_irq_enable();
-			next_state = cx->demotion.state;
-			goto end;
-		}
-	}
-
-#ifdef CONFIG_HOTPLUG_CPU
-	/*
-	 * Check for P_LVL2_UP flag before entering C2 and above on
-	 * an SMP system. We do it here instead of doing it at _CST/P_LVL
-	 * detection phase, to work cleanly with logical CPU hotplug.
-	 */
-	if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && 
-	    !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
-		cx = &pr->power.states[ACPI_STATE_C1];
-#endif
-
-	/*
-	 * Sleep:
-	 * ------
-	 * Invoke the current Cx state to put the processor to sleep.
-	 */
-	if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) {
-		current_thread_info()->status &= ~TS_POLLING;
-		/*
-		 * TS_POLLING-cleared state must be visible before we
-		 * test NEED_RESCHED:
-		 */
-		smp_mb();
-		if (need_resched()) {
-			current_thread_info()->status |= TS_POLLING;
-			local_irq_enable();
-			return;
-		}
-	}
-
-	switch (cx->type) {
-
-	case ACPI_STATE_C1:
-		/*
-		 * Invoke C1.
-		 * Use the appropriate idle routine, the one that would
-		 * be used without acpi C-states.
-		 */
-		if (pm_idle_save)
-			pm_idle_save();
-		else
-			acpi_safe_halt();
-
-		/*
-		 * TBD: Can't get time duration while in C1, as resumes
-		 *      go to an ISR rather than here.  Need to instrument
-		 *      base interrupt handler.
-		 *
-		 * Note: the TSC better not stop in C1, sched_clock() will
-		 *       skew otherwise.
-		 */
-		sleep_ticks = 0xFFFFFFFF;
-		break;
-
-	case ACPI_STATE_C2:
-		/* Get start time (ticks) */
-		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
-		/* Tell the scheduler that we are going deep-idle: */
-		sched_clock_idle_sleep_event();
-		/* Invoke C2 */
-		acpi_state_timer_broadcast(pr, cx, 1);
-		acpi_cstate_enter(cx);
-		/* Get end time (ticks) */
-		t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
-
-#ifdef CONFIG_GENERIC_TIME
-		/* TSC halts in C2, so notify users */
-		mark_tsc_unstable("possible TSC halt in C2");
-#endif
-		/* Compute time (ticks) that we were actually asleep */
-		sleep_ticks = ticks_elapsed(t1, t2);
-
-		/* Tell the scheduler how much we idled: */
-		sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
-
-		/* Re-enable interrupts */
-		local_irq_enable();
-		/* Do not account our idle-switching overhead: */
-		sleep_ticks -= cx->latency_ticks + C2_OVERHEAD;
-
-		current_thread_info()->status |= TS_POLLING;
-		acpi_state_timer_broadcast(pr, cx, 0);
-		break;
-
-	case ACPI_STATE_C3:
-		if (pr->flags.bm_check) {
-			if (atomic_inc_return(&c3_cpu_count) ==
-			    num_online_cpus()) {
-				/*
-				 * All CPUs are trying to go to C3
-				 * Disable bus master arbitration
-				 */
-				acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
-			}
-		} else {
-			/* SMP with no shared cache... Invalidate cache  */
-			ACPI_FLUSH_CPU_CACHE();
-		}
-
-		/* Get start time (ticks) */
-		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
-		/* Invoke C3 */
-		acpi_state_timer_broadcast(pr, cx, 1);
-		/* Tell the scheduler that we are going deep-idle: */
-		sched_clock_idle_sleep_event();
-		acpi_cstate_enter(cx);
-		/* Get end time (ticks) */
-		t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
-		if (pr->flags.bm_check) {
-			/* Enable bus master arbitration */
-			atomic_dec(&c3_cpu_count);
-			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
-		}
-
-#ifdef CONFIG_GENERIC_TIME
-		/* TSC halts in C3, so notify users */
-		mark_tsc_unstable("TSC halts in C3");
-#endif
-		/* Compute time (ticks) that we were actually asleep */
-		sleep_ticks = ticks_elapsed(t1, t2);
-		/* Tell the scheduler how much we idled: */
-		sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
-
-		/* Re-enable interrupts */
-		local_irq_enable();
-		/* Do not account our idle-switching overhead: */
-		sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;
-
-		current_thread_info()->status |= TS_POLLING;
-		acpi_state_timer_broadcast(pr, cx, 0);
-		break;
-
-	default:
-		local_irq_enable();
-		return;
-	}
-	cx->usage++;
-	if ((cx->type != ACPI_STATE_C1) && (sleep_ticks > 0))
-		cx->time += sleep_ticks;
-
-	next_state = pr->power.state;
-
-#ifdef CONFIG_HOTPLUG_CPU
-	/* Don't do promotion/demotion */
-	if ((cx->type == ACPI_STATE_C1) && (num_online_cpus() > 1) &&
-	    !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) {
-		next_state = cx;
-		goto end;
-	}
-#endif
-
-	/*
-	 * Promotion?
-	 * ----------
-	 * Track the number of longs (time asleep is greater than threshold)
-	 * and promote when the count threshold is reached.  Note that bus
-	 * mastering activity may prevent promotions.
-	 * Do not promote above max_cstate.
-	 */
-	if (cx->promotion.state &&
-	    ((cx->promotion.state - pr->power.states) <= max_cstate)) {
-		if (sleep_ticks > cx->promotion.threshold.ticks &&
-		  cx->promotion.state->latency <= system_latency_constraint()) {
-			cx->promotion.count++;
-			cx->demotion.count = 0;
-			if (cx->promotion.count >=
-			    cx->promotion.threshold.count) {
-				if (pr->flags.bm_check) {
-					if (!
-					    (pr->power.bm_activity & cx->
-					     promotion.threshold.bm)) {
-						next_state =
-						    cx->promotion.state;
-						goto end;
-					}
-				} else {
-					next_state = cx->promotion.state;
-					goto end;
-				}
-			}
-		}
-	}
-
-	/*
-	 * Demotion?
-	 * ---------
-	 * Track the number of shorts (time asleep is less than time threshold)
-	 * and demote when the usage threshold is reached.
-	 */
-	if (cx->demotion.state) {
-		if (sleep_ticks < cx->demotion.threshold.ticks) {
-			cx->demotion.count++;
-			cx->promotion.count = 0;
-			if (cx->demotion.count >= cx->demotion.threshold.count) {
-				next_state = cx->demotion.state;
-				goto end;
-			}
-		}
-	}
-
-      end:
-	/*
-	 * Demote if current state exceeds max_cstate
-	 * or if the latency of the current state is unacceptable
-	 */
-	if ((pr->power.state - pr->power.states) > max_cstate ||
-		pr->power.state->latency > system_latency_constraint()) {
-		if (cx->demotion.state)
-			next_state = cx->demotion.state;
-	}
-
-	/*
-	 * New Cx State?
-	 * -------------
-	 * If we're going to start using a new Cx state we must clean up
-	 * from the previous and prepare to use the new.
-	 */
-	if (next_state != pr->power.state)
-		acpi_processor_power_activate(pr, next_state);
-}
-
-static int acpi_processor_set_power_policy(struct acpi_processor *pr)
-{
-	unsigned int i;
-	unsigned int state_is_set = 0;
-	struct acpi_processor_cx *lower = NULL;
-	struct acpi_processor_cx *higher = NULL;
-	struct acpi_processor_cx *cx;
-
-
-	if (!pr)
-		return -EINVAL;
-
-	/*
-	 * This function sets the default Cx state policy (OS idle handler).
-	 * Our scheme is to promote quickly to C2 but more conservatively
-	 * to C3.  We're favoring C2  for its characteristics of low latency
-	 * (quick response), good power savings, and ability to allow bus
-	 * mastering activity.  Note that the Cx state policy is completely
-	 * customizable and can be altered dynamically.
-	 */
-
-	/* startup state */
-	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
-		cx = &pr->power.states[i];
-		if (!cx->valid)
-			continue;
-
-		if (!state_is_set)
-			pr->power.state = cx;
-		state_is_set++;
-		break;
-	}
-
-	if (!state_is_set)
-		return -ENODEV;
-
-	/* demotion */
-	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
-		cx = &pr->power.states[i];
-		if (!cx->valid)
-			continue;
-
-		if (lower) {
-			cx->demotion.state = lower;
-			cx->demotion.threshold.ticks = cx->latency_ticks;
-			cx->demotion.threshold.count = 1;
-			if (cx->type == ACPI_STATE_C3)
-				cx->demotion.threshold.bm = bm_history;
-		}
-
-		lower = cx;
-	}
-
-	/* promotion */
-	for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) {
-		cx = &pr->power.states[i];
-		if (!cx->valid)
-			continue;
-
-		if (higher) {
-			cx->promotion.state = higher;
-			cx->promotion.threshold.ticks = cx->latency_ticks;
-			if (cx->type >= ACPI_STATE_C2)
-				cx->promotion.threshold.count = 4;
-			else
-				cx->promotion.threshold.count = 10;
-			if (higher->type == ACPI_STATE_C3)
-				cx->promotion.threshold.bm = bm_history;
-		}
-
-		higher = cx;
-	}
-
-	return 0;
-}
-
 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
 {
  
@@ -912,7 +456,7 @@ static void acpi_processor_power_verify_
 	 * Normalize the C2 latency to expidite policy
 	 */
 	cx->valid = 1;
-	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+	cx->latency_ticks = cx->latency;
 
 	return;
 }
@@ -986,7 +530,7 @@ static void acpi_processor_power_verify_
 	 * use this in our C3 policy
 	 */
 	cx->valid = 1;
-	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+	cx->latency_ticks = cx->latency;
 
 	return;
 }
@@ -1052,18 +596,6 @@ static int acpi_processor_get_power_info
 	pr->power.count = acpi_processor_power_verify(pr);
 
 	/*
-	 * Set Default Policy
-	 * ------------------
-	 * Now that we know which states are supported, set the default
-	 * policy.  Note that this policy can be changed dynamically
-	 * (e.g. encourage deeper sleeps to conserve battery life when
-	 * not on AC).
-	 */
-	result = acpi_processor_set_power_policy(pr);
-	if (result)
-		return result;
-
-	/*
 	 * if one state of type C2 or C3 is available, mark this
 	 * CPU as being "idle manageable"
 	 */
@@ -1080,9 +612,6 @@ static int acpi_processor_get_power_info
 
 int acpi_processor_cst_has_changed(struct acpi_processor *pr)
 {
-	int result = 0;
-
-
 	if (!pr)
 		return -EINVAL;
 
@@ -1093,16 +622,9 @@ int acpi_processor_cst_has_changed(struc
 	if (!pr->flags.power_setup_done)
 		return -ENODEV;
 
-	/* Fall back to the default idle loop */
-	pm_idle = pm_idle_save;
-	synchronize_sched();	/* Relies on interrupts forcing exit from idle. */
-
-	pr->flags.power = 0;
-	result = acpi_processor_get_power_info(pr);
-	if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
-		pm_idle = acpi_processor_idle;
-
-	return result;
+	acpi_processor_get_power_info(pr);
+	return cpuidle_force_redetect(per_cpu(cpuidle_devices, pr->id),
+		&acpi_idle_driver);
 }
 
 /* proc interface */
@@ -1188,30 +710,6 @@ static const struct file_operations acpi
 	.release = single_release,
 };
 
-#ifdef CONFIG_SMP
-static void smp_callback(void *v)
-{
-	/* we already woke the CPU up, nothing more to do */
-}
-
-/*
- * This function gets called when a part of the kernel has a new latency
- * requirement.  This means we need to get all processors out of their C-state,
- * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
- * wakes them all right up.
- */
-static int acpi_processor_latency_notify(struct notifier_block *b,
-		unsigned long l, void *v)
-{
-	smp_call_function(smp_callback, NULL, 0, 1);
-	return NOTIFY_OK;
-}
-
-static struct notifier_block acpi_processor_latency_notifier = {
-	.notifier_call = acpi_processor_latency_notify,
-};
-#endif
-
 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 			      struct acpi_device *device)
 {
@@ -1228,9 +726,6 @@ int __cpuinit acpi_processor_power_init(
 			       "ACPI: processor limited to max C-state %d\n",
 			       max_cstate);
 		first_run++;
-#ifdef CONFIG_SMP
-		register_latency_notifier(&acpi_processor_latency_notifier);
-#endif
 	}
 
 	if (!pr)
@@ -1247,6 +742,7 @@ int __cpuinit acpi_processor_power_init(
 
 	acpi_processor_get_power_info(pr);
 
+
 	/*
 	 * Install the idle handler if processor power management is supported.
 	 * Note that we use previously set idle handler will be used on
@@ -1259,11 +755,6 @@ int __cpuinit acpi_processor_power_init(
 				printk(" C%d[C%d]", i,
 				       pr->power.states[i].type);
 		printk(")\n");
-
-		if (pr->id == 0) {
-			pm_idle_save = pm_idle;
-			pm_idle = acpi_processor_idle;
-		}
 	}
 
 	/* 'power' [R] */
@@ -1291,21 +782,349 @@ int acpi_processor_power_exit(struct acp
 	if (acpi_device_dir(device))
 		remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,
 				  acpi_device_dir(device));
+	return 0;
+}
 
-	/* Unregister the idle handler when processor #0 is removed. */
-	if (pr->id == 0) {
-		pm_idle = pm_idle_save;
+/**
+ * ticks_elapsed - a helper function that determines how many ticks (in US)
+ *		   have elapsed between two PM Timer timestamps
+ * @t1: the start time
+ * @t2: the end time
+ */
+static inline u32 ticks_elapsed_in_us(u32 t1, u32 t2)
+{
+	if (t2 >= t1)
+		return PM_TIMER_TICKS_TO_US(t2 - t1);
+	else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
+		return PM_TIMER_TICKS_TO_US(((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
+	else
+		return PM_TIMER_TICKS_TO_US((0xFFFFFFFF - t1) + t2);
+}
 
-		/*
-		 * We are about to unload the current idle thread pm callback
-		 * (pm_idle), Wait for all processors to update cached/local
-		 * copies of pm_idle before proceeding.
-		 */
-		cpu_idle_wait();
-#ifdef CONFIG_SMP
-		unregister_latency_notifier(&acpi_processor_latency_notifier);
+static inline u32 ticks_elapsed(u32 t1, u32 t2)
+{
+	if (t2 >= t1)
+		return (t2 - t1);
+	else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
+		return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
+	else
+		return ((0xFFFFFFFF - t1) + t2);
+}
+
+/**
+ * acpi_idle_update_bm_rld - updates the BM_RLD bit depending on target state
+ * @pr: the processor
+ * @target: the new target state
+ */
+static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr,
+					   struct acpi_processor_cx *target)
+{
+	if (pr->flags.bm_rld_set && target->type != ACPI_STATE_C3) {
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+		pr->flags.bm_rld_set = 0;
+	}
+
+	if (!pr->flags.bm_rld_set && target->type == ACPI_STATE_C3) {
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
+		pr->flags.bm_rld_set = 1;
+	}
+}
+
+/**
+ * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
+ * @cx: cstate data
+ */
+static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
+{
+	if (cx->space_id == ACPI_CSTATE_FFH) {
+		/* Call into architectural FFH based C-state */
+		acpi_processor_ffh_cstate_enter(cx);
+	} else {
+		int unused;
+		/* IO port based C-state */
+		inb(cx->address);
+		/* Dummy wait op - must do something useless after P_LVL2 read
+		   because chipsets cannot guarantee that STPCLK# signal
+		   gets asserted in time to freeze execution properly. */
+		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
+	}
+}
+
+/**
+ * acpi_idle_enter_c1 - enters an ACPI C1 state-type
+ * @dev: the target CPU
+ * @state: the state data
+ *
+ * This is equivalent to the HALT instruction.
+ */
+static int acpi_idle_enter_c1(struct cpuidle_device *dev,
+			      struct cpuidle_state *state)
+{
+	struct acpi_processor *pr;
+	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
+	pr = processors[smp_processor_id()];
+
+	if (unlikely(!pr))
+		return 0;
+
+	if (pr->flags.bm_check)
+		acpi_idle_update_bm_rld(pr, cx);
+
+	current_thread_info()->status &= ~TS_POLLING;
+	/*
+	 * TS_POLLING-cleared state must be visible before we test
+	 * NEED_RESCHED:
+	 */
+	smp_mb();
+	if (!need_resched())
+		safe_halt();
+	current_thread_info()->status |= TS_POLLING;
+
+	cx->usage++;
+
+	return 0;
+}
+
+/**
+ * acpi_idle_enter_c2 - enters an ACPI C2 state-type
+ * @dev: the target CPU
+ * @state: the state data
+ */
+static int acpi_idle_enter_c2(struct cpuidle_device *dev,
+			      struct cpuidle_state *state)
+{
+	struct acpi_processor *pr;
+	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
+	u32 t1, t2;
+	pr = processors[smp_processor_id()];
+
+	if (unlikely(!pr))
+		return 0;
+
+	if (pr->flags.bm_check)
+		acpi_idle_update_bm_rld(pr, cx);
+
+	local_irq_disable();
+	current_thread_info()->status &= ~TS_POLLING;
+	/*
+	 * TS_POLLING-cleared state must be visible before we test
+	 * NEED_RESCHED:
+	 */
+	smp_mb();
+
+	if (unlikely(need_resched())) {
+		current_thread_info()->status |= TS_POLLING;
+		local_irq_enable();
+		return 0;
+	}
+
+	t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+	acpi_state_timer_broadcast(pr, cx, 1);
+	acpi_idle_do_entry(cx);
+	t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+
+#ifdef CONFIG_GENERIC_TIME
+	/* TSC halts in C2, so notify users */
+	mark_tsc_unstable("possible TSC halt in C2");
+#endif
+
+	local_irq_enable();
+	current_thread_info()->status |= TS_POLLING;
+
+	cx->usage++;
+
+	acpi_state_timer_broadcast(pr, cx, 0);
+	cx->time += ticks_elapsed(t1, t2);
+	return ticks_elapsed_in_us(t1, t2);
+}
+
+static int c3_cpu_count;
+static DEFINE_SPINLOCK(c3_lock);
+
+/**
+ * acpi_idle_enter_c3 - enters an ACPI C3 state-type
+ * @dev: the target CPU
+ * @state: the state data
+ *
+ * Similar to C2 entry, except special bus master handling is needed.
+ */
+static int acpi_idle_enter_c3(struct cpuidle_device *dev,
+			      struct cpuidle_state *state)
+{
+	struct acpi_processor *pr;
+	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
+	u32 t1, t2;
+	pr = processors[smp_processor_id()];
+
+	if (unlikely(!pr))
+		return 0;
+
+	if (pr->flags.bm_check)
+		acpi_idle_update_bm_rld(pr, cx);
+
+	local_irq_disable();
+	current_thread_info()->status &= ~TS_POLLING;
+	/*
+	 * TS_POLLING-cleared state must be visible before we test
+	 * NEED_RESCHED:
+	 */
+	smp_mb();
+
+	if (unlikely(need_resched())) {
+		current_thread_info()->status |= TS_POLLING;
+		local_irq_enable();
+		return 0;
+	}
+
+	/*
+	 * Must be done before busmaster disable as we might need to
+	 * access HPET !
+	 */
+	acpi_state_timer_broadcast(pr, cx, 1);
+
+	/* disable bus master */
+	if (pr->flags.bm_check) {
+		spin_lock(&c3_lock);
+			c3_cpu_count++;
+		if (c3_cpu_count == num_online_cpus()) {
+			/*
+			 * All CPUs are trying to go to C3
+			 * Disable bus master arbitration
+			 */
+			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
+		}
+		spin_unlock(&c3_lock);
+	} else {
+		/* SMP with no shared cache... Invalidate cache  */
+		ACPI_FLUSH_CPU_CACHE();
+	}
+
+	/* Get start time (ticks) */
+	t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+	acpi_idle_do_entry(cx);
+	t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+
+	if (pr->flags.bm_check) {
+		spin_lock(&c3_lock);
+		/* Enable bus master arbitration */
+		if (c3_cpu_count == num_online_cpus())
+			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
+		c3_cpu_count--;
+		spin_unlock(&c3_lock);
+	}
+
+#ifdef CONFIG_GENERIC_TIME
+	/* TSC halts in C3, so notify users */
+	mark_tsc_unstable("TSC halts in C3");
+#endif
+
+	local_irq_enable();
+	current_thread_info()->status |= TS_POLLING;
+
+	cx->usage++;
+
+	acpi_state_timer_broadcast(pr, cx, 0);
+	cx->time += ticks_elapsed(t1, t2);
+	return ticks_elapsed_in_us(t1, t2);
+}
+
+/**
+ * acpi_idle_bm_check - checks if bus master activity was detected
+ */
+static int acpi_idle_bm_check(void)
+{
+	u32 bm_status = 0;
+
+	acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
+	if (bm_status)
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
+	/*
+	 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
+	 * the true state of bus mastering activity; forcing us to
+	 * manually check the BMIDEA bit of each IDE channel.
+	 */
+	else if (errata.piix4.bmisx) {
+		if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
+		    || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
+			bm_status = 1;
+	}
+	return bm_status;
+}
+
+/**
+ * acpi_idle_init - attaches the driver to a CPU
+ * @dev: the CPU
+ */
+static int acpi_idle_init(struct cpuidle_device *dev)
+{
+	int cpu = dev->cpu;
+	int i, count = 0;
+	struct acpi_processor_cx *cx;
+	struct cpuidle_state *state;
+
+	struct acpi_processor *pr = processors[cpu];
+
+	if (!pr->flags.power_setup_done)
+		return -EINVAL;
+
+	if (pr->flags.power == 0) {
+		return -EINVAL;
+	}
+
+	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
+		cx = &pr->power.states[i];
+		state = &dev->states[count];
+
+		if (!cx->valid)
+			continue;
+
+#ifdef CONFIG_HOTPLUG_CPU
+		if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
+		    !pr->flags.has_cst &&
+		    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
+			continue;
 #endif
+		cpuidle_set_statedata(state, cx);
+
+		state->exit_latency = cx->latency;
+		state->target_residency = cx->latency * 6;
+		state->power_usage = cx->power;
+
+		state->flags = 0;
+		switch (cx->type) {
+			case ACPI_STATE_C1:
+			state->flags |= CPUIDLE_FLAG_SHALLOW;
+			state->enter = acpi_idle_enter_c1;
+			break;
+
+			case ACPI_STATE_C2:
+			state->flags |= CPUIDLE_FLAG_BALANCED;
+			state->flags |= CPUIDLE_FLAG_TIME_VALID;
+			state->enter = acpi_idle_enter_c2;
+			break;
+
+			case ACPI_STATE_C3:
+			state->flags |= CPUIDLE_FLAG_DEEP;
+			state->flags |= CPUIDLE_FLAG_TIME_VALID;
+			state->flags |= CPUIDLE_FLAG_CHECK_BM;
+			state->enter = acpi_idle_enter_c3;
+			break;
+		}
+
+		count++;
 	}
 
+	if (!count)
+		return -EINVAL;
+
+	dev->state_count = count;
 	return 0;
 }
+
+struct cpuidle_driver acpi_idle_driver = {
+	.name =		"acpi_idle",
+	.init =		acpi_idle_init,
+	.redetect =	acpi_idle_init,
+	.bm_check =	acpi_idle_bm_check,
+	.owner =	THIS_MODULE,
+};
Index: linux-2.6.22/drivers/acpi/processor_core.c
===================================================================
--- linux-2.6.22.orig/drivers/acpi/processor_core.c	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/drivers/acpi/processor_core.c	2007-07-15 17:13:44.000000000 +0200
@@ -44,6 +44,7 @@
 #include <linux/seq_file.h>
 #include <linux/dmi.h>
 #include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
@@ -1022,11 +1023,15 @@ static int __init acpi_processor_init(vo
 
 	acpi_processor_ppc_init();
 
+	cpuidle_register_driver(&acpi_idle_driver);
+	acpi_do_set_cstate_limit = acpi_max_cstate_changed;
 	return 0;
 }
 
 static void __exit acpi_processor_exit(void)
 {
+	acpi_do_set_cstate_limit = NULL;
+	cpuidle_unregister_driver(&acpi_idle_driver);
 
 	acpi_processor_ppc_exit();
 
Index: linux-2.6.22/include/acpi/processor.h
===================================================================
--- linux-2.6.22.orig/include/acpi/processor.h	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/include/acpi/processor.h	2007-07-15 17:13:44.000000000 +0200
@@ -161,6 +161,7 @@ struct acpi_processor_flags {
 	u8 bm_check:1;
 	u8 has_cst:1;
 	u8 power_setup_done:1;
+	u8 bm_rld_set:1;
 };
 
 struct acpi_processor {
@@ -279,6 +280,8 @@ int acpi_processor_power_init(struct acp
 int acpi_processor_cst_has_changed(struct acpi_processor *pr);
 int acpi_processor_power_exit(struct acpi_processor *pr,
 			      struct acpi_device *device);
+extern struct cpuidle_driver acpi_idle_driver;
+void acpi_max_cstate_changed(void);
 
 /* in processor_thermal.c */
 int acpi_processor_get_limit_info(struct acpi_processor *pr);
Index: linux-2.6.22/Documentation/cpuidle/core.txt
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/Documentation/cpuidle/core.txt	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,17 @@
+
+		Supporting multiple CPU idle levels in kernel
+
+				cpuidle
+
+General Information:
+
+Various CPUs today support multiple idle levels that are differentiated
+by varying exit latencies and power consumption during idle.
+cpuidle is a generic in-kernel infrastructure that separates
+idle policy (governor) from idle mechanism (driver) and provides a
+standardized infrastructure to support independent development of
+governors and drivers.
+
+cpuidle resides under drivers/cpuidle.
+
+
Index: linux-2.6.22/Documentation/cpuidle/driver.txt
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/Documentation/cpuidle/driver.txt	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,29 @@
+
+
+		Supporting multiple CPU idle levels in kernel
+
+				cpuidle drivers
+
+
+
+
+cpuidle driver hooks into the cpuidle infrastructure and does the
+architecture/platform dependent part of CPU idle states. Driver
+provides the platform idle state detection capability and also
+has mechanisms in place to support actusl entry-exit into a CPU idle state.
+
+cpuidle driver supports capability detection for a platform using the
+init and exit routines. They will be called for each online CPU, with a
+percpu cpuidle_driver object and driver should fill in cpuidle_states
+inside cpuidle_driver depending on the CPU capability.
+
+Driver can handle dynamic state changes (like battery<->AC), by calling
+force_redetect interface.
+
+It is possible to have more than one driver registered at the same time and
+user can switch between drivers using /sysfs interface (when enabled).
+
+Interfaces:
+int cpuidle_register_driver(struct cpuidle_driver *drv);
+void cpuidle_unregister_driver(struct cpuidle_driver *drv);
+int cpuidle_force_redetect(struct cpuidle_device *dev);
Index: linux-2.6.22/Documentation/cpuidle/governor.txt
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/Documentation/cpuidle/governor.txt	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,28 @@
+
+
+
+		Supporting multiple CPU idle levels in kernel
+
+				cpuidle governors
+
+
+
+
+cpuidle governor is policy routine that decides what idle state to enter at
+any given time. cpuidle core uses different callbacks to governor while
+handling idle entry.
+* select_state() callback where governor can determine next idle state to enter
+* prepare_idle() callback is called before entering an idle state
+* scan() callback is called after a driver forces redetection of the states
+
+More than one governor can be registered at the same time and
+user can switch between drivers using /sysfs interface (when supported).
+
+More than one governor part is supported for developers to easily experiment
+with different governors. By default, most optimal governor based on your
+kernel configuration and platform will be selected by cpuidle.
+
+Interfaces:
+int cpuidle_register_governor(struct cpuidle_governor *gov);
+void cpuidle_unregister_governor(struct cpuidle_governor *gov);
+
Index: linux-2.6.22/Documentation/cpuidle/sysfs.txt
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/Documentation/cpuidle/sysfs.txt	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,35 @@
+
+
+		Supporting multiple CPU idle levels in kernel
+
+				cpuidle sysfs
+
+System global cpuidle related information and tunables are under
+/sys/devices/system/cpu/cpuidle
+
+The current interfaces in this directory has self-explanatory names:
+* current_driver_ro
+* current_governor_ro
+
+With cpuidle_sysfs_switch boot option (meant for developer testing)
+following objects are visible instead.
+* available_drivers
+* available_governors
+* current_driver
+* current_governor
+In this case user can switch the driver, governor at run time by writing
+onto current_driver and current_governor.
+
+
+Per logical CPU specific cpuidle information are under
+/sys/devices/system/cpu/cpuX/cpuidle
+for each online cpu X
+
+Under this percpu directory, there is a directory for each idle state supported
+by the driver, which in turn has
+* latency : Latency to exit out of this idle state (in microseconds)
+* power : Power consumed while in this idle state (in milliwatts)
+* time : Total time spent in this idle state (in microseconds)
+* usage : Number of times this state was entered (count)
+
+
Index: linux-2.6.22/drivers/acpi/osl.c
===================================================================
--- linux-2.6.22.orig/drivers/acpi/osl.c	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/drivers/acpi/osl.c	2007-07-15 17:13:44.000000000 +0200
@@ -1056,6 +1056,17 @@ unsigned int max_cstate = ACPI_PROCESSOR
 
 EXPORT_SYMBOL(max_cstate);
 
+void (*acpi_do_set_cstate_limit)(void);
+EXPORT_SYMBOL(acpi_do_set_cstate_limit);
+
+void acpi_set_cstate_limit(unsigned int new_limit)
+{
+	max_cstate = new_limit;
+	if (acpi_do_set_cstate_limit)
+		acpi_do_set_cstate_limit();
+}
+EXPORT_SYMBOL(acpi_set_cstate_limit);
+
 /*
  * Acquire a spinlock.
  *
Index: linux-2.6.22/include/linux/acpi.h
===================================================================
--- linux-2.6.22.orig/include/linux/acpi.h	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/include/linux/acpi.h	2007-07-15 17:13:44.000000000 +0200
@@ -206,11 +206,8 @@ static inline unsigned int acpi_get_csta
 {
 	return max_cstate;
 }
-static inline void acpi_set_cstate_limit(unsigned int new_limit)
-{
-	max_cstate = new_limit;
-	return;
-}
+extern void (*acpi_do_set_cstate_limit)(void);
+extern void acpi_set_cstate_limit(unsigned int new_limit);
 #else
 static inline unsigned int acpi_get_cstate_limit(void) { return 0; }
 static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; }
Index: linux-2.6.22/arch/i386/kernel/process.c
===================================================================
--- linux-2.6.22.orig/arch/i386/kernel/process.c	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/arch/i386/kernel/process.c	2007-07-15 17:13:44.000000000 +0200
@@ -179,13 +179,14 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick();
 		while (!need_resched()) {
 			void (*idle)(void);
 
 			if (__get_cpu_var(cpu_idle_state))
 				__get_cpu_var(cpu_idle_state) = 0;
 
+			tick_nohz_stop_sched_tick();
+
 			check_pgt_cache();
 			rmb();
 			idle = pm_idle;
Index: linux-2.6.22/include/linux/tick.h
===================================================================
--- linux-2.6.22.orig/include/linux/tick.h	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/include/linux/tick.h	2007-07-15 17:13:44.000000000 +0200
@@ -40,6 +40,7 @@ enum tick_nohz_mode {
  * @idle_sleeps:	Number of idle calls, where the sched tick was stopped
  * @idle_entrytime:	Time when the idle call was entered
  * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
+ * @sleep_length:	Duration of the current idle sleep
  */
 struct tick_sched {
 	struct hrtimer			sched_timer;
@@ -52,6 +53,7 @@ struct tick_sched {
 	unsigned long			idle_sleeps;
 	ktime_t				idle_entrytime;
 	ktime_t				idle_sleeptime;
+	ktime_t				sleep_length;
 	unsigned long			last_jiffies;
 	unsigned long			next_jiffies;
 	ktime_t				idle_expires;
@@ -100,10 +102,18 @@ static inline int tick_check_oneshot_cha
 extern void tick_nohz_stop_sched_tick(void);
 extern void tick_nohz_restart_sched_tick(void);
 extern void tick_nohz_update_jiffies(void);
+extern ktime_t tick_nohz_get_sleep_length(void);
+extern unsigned long tick_nohz_get_idle_jiffies(void);
 # else
 static inline void tick_nohz_stop_sched_tick(void) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
 static inline void tick_nohz_update_jiffies(void) { }
+static inline ktime_t tick_nohz_get_sleep_length(void)
+{
+	ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
+
+	return len;
+}
 # endif /* !NO_HZ */
 
 #endif
Index: linux-2.6.22/kernel/softirq.c
===================================================================
--- linux-2.6.22.orig/kernel/softirq.c	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/kernel/softirq.c	2007-07-15 17:13:45.000000000 +0200
@@ -303,7 +303,7 @@ void irq_exit(void)
 	if (!in_interrupt() && local_softirq_pending())
 		invoke_softirq();
 
-#ifdef CONFIG_NO_HZ
+#if defined(CONFIG_NO_HZ) && !defined(CONFIG_NONIRQ_WAKEUP)
 	/* Make sure that timer wheel updates are propagated */
 	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
 		tick_nohz_stop_sched_tick();
Index: linux-2.6.22/kernel/time/tick-sched.c
===================================================================
--- linux-2.6.22.orig/kernel/time/tick-sched.c	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/kernel/time/tick-sched.c	2007-07-15 17:13:46.000000000 +0200
@@ -153,6 +153,7 @@ void tick_nohz_stop_sched_tick(void)
 	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
 	struct tick_sched *ts;
 	ktime_t last_update, expires, now, delta;
+	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
 	int cpu;
 
 	local_irq_save(flags);
@@ -290,11 +291,36 @@ void tick_nohz_stop_sched_tick(void)
 out:
 	ts->next_jiffies = next_jiffies;
 	ts->last_jiffies = last_jiffies;
+	ts->sleep_length = ktime_sub(dev->next_event, now);
 end:
 	local_irq_restore(flags);
 }
 
 /**
+ * tick_nohz_get_sleep_length - return the length of the current sleep
+ *
+ * Called from power state control code with interrupts disabled
+ */
+ktime_t tick_nohz_get_sleep_length(void)
+{
+       struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+
+       return ts->sleep_length;
+}
+EXPORT_SYMBOL_GPL(tick_nohz_get_sleep_length);
+
+/**
+ * tick_nohz_get_idle_jiffies - returns the current idle jiffie count
+ */
+unsigned long tick_nohz_get_idle_jiffies(void)
+{
+       struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+
+       return ts->idle_jiffies;
+}
+EXPORT_SYMBOL_GPL(tick_nohz_get_idle_jiffies);
+
+/**
  * nohz_restart_sched_tick - restart the idle tick from the idle task
  *
  * Restart the idle tick when the CPU is woken up from idle
@@ -546,6 +572,7 @@ void tick_setup_sched_timer(void)
 {
 	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
 	ktime_t now = ktime_get();
+	u64 offset;
 
 	/*
 	 * Emulate tick processing via per-CPU hrtimers:
@@ -554,8 +581,12 @@ void tick_setup_sched_timer(void)
 	ts->sched_timer.function = tick_sched_timer;
 	ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
 
-	/* Get the next period */
+	/* Get the next period (per cpu) */
 	ts->sched_timer.expires = tick_init_jiffy_update();
+	offset = ktime_to_ns(tick_period) >> 1;
+	do_div(offset, NR_CPUS);
+	offset *= smp_processor_id();
+	ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset);
 
 	for (;;) {
 		hrtimer_forward(&ts->sched_timer, now, tick_period);
Index: linux-2.6.22/drivers/cpuidle/governors/menu.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/drivers/cpuidle/governors/menu.c	2007-07-15 17:13:44.000000000 +0200
@@ -0,0 +1,181 @@
+/*
+ * menu.c - the menu idle governor
+ *
+ * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/latency.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+
+#define BM_HOLDOFF			20000	/* 20 ms */
+#define DEMOTION_THRESHOLD		5
+#define DEMOTION_TIMEOUT_MULTIPLIER	1000
+
+struct menu_device {
+	int		last_state_idx;
+
+	int		deepest_break_state;
+	struct timespec break_expire_time_ts;
+	int		break_last_cnt;
+
+	int		deepest_bm_state;
+	int		bm_elapsed_us;
+	int		bm_holdoff_us;
+};
+
+static DEFINE_PER_CPU(struct menu_device, menu_devices);
+
+/**
+ * menu_select - selects the next idle state to enter
+ * @dev: the CPU
+ */
+static int menu_select(struct cpuidle_device *dev)
+{
+	struct menu_device *data = &__get_cpu_var(menu_devices);
+	int i, expected_us, max_state = dev->state_count;
+
+	/* discard BM history because it is sticky */
+	cpuidle_get_bm_activity();
+
+	/* determine the expected residency time */
+	expected_us = (s32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000;
+
+	/* determine the maximum state compatible with current BM status */
+	if (cpuidle_get_bm_activity())
+		data->bm_elapsed_us = 0;
+	if (data->bm_elapsed_us <= data->bm_holdoff_us)
+		max_state = data->deepest_bm_state + 1;
+
+	/* determine the maximum state compatible with recent idle breaks */
+	if (data->deepest_break_state >= 0) {
+		struct timespec now;
+		ktime_get_ts(&now);
+		if (timespec_compare(&data->break_expire_time_ts, &now) > 0) {
+			max_state = min(max_state,
+					data->deepest_break_state + 1);
+		} else {
+			data->deepest_break_state = -1;
+		}
+	}
+	
+	/* find the deepest idle state that satisfies our constraints */
+	for (i = 1; i < max_state; i++) {
+		struct cpuidle_state *s = &dev->states[i];
+
+		if (s->target_residency > expected_us)
+			break;
+
+		if (s->exit_latency > system_latency_constraint())
+			break;
+	}
+
+	if (data->last_state_idx != i - 1)
+		data->break_last_cnt = 0;
+
+	data->last_state_idx = i - 1;
+	return i - 1;
+}
+
+/**
+ * menu_reflect - attempts to guess what happened after entry
+ * @dev: the CPU
+ *
+ * NOTE: it's important to be fast here because this operation will add to
+ *       the overall exit latency.
+ */
+static void menu_reflect(struct cpuidle_device *dev)
+{
+	struct menu_device *data = &__get_cpu_var(menu_devices);
+	int last_idx = data->last_state_idx;
+	int measured_us = cpuidle_get_last_residency(dev);
+	struct cpuidle_state *target = &dev->states[last_idx];
+
+	/*
+	 * Ugh, this idle state doesn't support residency measurements, so we
+	 * are basically lost in the dark.  As a compromise, assume we slept
+	 * for one full standard timer tick.  However, be aware that this
+	 * could potentially result in a suboptimal state transition.
+	 */
+	if (!(target->flags & CPUIDLE_FLAG_TIME_VALID))
+		measured_us = USEC_PER_SEC / HZ;
+
+	data->bm_elapsed_us += measured_us;
+
+	if (data->last_state_idx == 0)
+		return;
+
+	/*
+	 * Did something other than the timer interrupt
+	 * cause an early break event?
+	 */
+	if (unlikely(measured_us < target->target_residency)) {
+		if (data->break_last_cnt > DEMOTION_THRESHOLD) {
+			data->deepest_break_state = data->last_state_idx - 1;
+			ktime_get_ts(&data->break_expire_time_ts);
+			timespec_add_ns(&data->break_expire_time_ts,
+						target->target_residency *
+						DEMOTION_TIMEOUT_MULTIPLIER);
+		} else {
+			data->break_last_cnt++;
+		}
+	} else {
+		if (data->break_last_cnt > 0)
+			data->break_last_cnt--;
+	}
+}
+
+/**
+ * menu_scan_device - scans a CPU's states and does setup
+ * @dev: the CPU
+ */
+static void menu_scan_device(struct cpuidle_device *dev)
+{
+	struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
+	int i;
+
+	data->last_state_idx = 0;
+	data->bm_elapsed_us = 0;
+	data->bm_holdoff_us = BM_HOLDOFF;
+	data->deepest_break_state = -1;
+
+	for (i = 1; i < dev->state_count; i++)
+		if (dev->states[i].flags & CPUIDLE_FLAG_CHECK_BM)
+			break;
+	data->deepest_bm_state = i - 1;
+}
+
+struct cpuidle_governor menu_governor = {
+	.name =		"menu",
+	.rating =	20,
+	.scan =		menu_scan_device,
+	.select =	menu_select,
+	.reflect =	menu_reflect,
+	.owner =	THIS_MODULE,
+};
+
+/**
+ * init_menu - initializes the governor
+ */
+static int __init init_menu(void)
+{
+	return cpuidle_register_governor(&menu_governor);
+}
+
+/**
+ * exit_menu - exits the governor
+ */
+static void __exit exit_menu(void)
+{
+	cpuidle_unregister_governor(&menu_governor);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_menu);
+module_exit(exit_menu);
Index: linux-2.6.22/arch/i386/kernel/hpet.c
===================================================================
--- linux-2.6.22.orig/arch/i386/kernel/hpet.c	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/arch/i386/kernel/hpet.c	2007-07-15 17:13:53.000000000 +0200
@@ -1,16 +1,18 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/hpet.h>
 #include <linux/init.h>
+#include <linux/ioport.h>
 #include <linux/sysdev.h>
 #include <linux/pm.h>
 
+#include <asm/fixmap.h>
 #include <asm/hpet.h>
+#include <asm/i8253.h>
 #include <asm/io.h>
 
-extern struct clock_event_device *global_clock_event;
-
 #define HPET_MASK	CLOCKSOURCE_MASK(32)
 #define HPET_SHIFT	22
 
@@ -21,9 +23,9 @@ extern struct clock_event_device *global
  * HPET address is set in acpi/boot.c, when an ACPI entry exists
  */
 unsigned long hpet_address;
-static void __iomem * hpet_virt_address;
+static void __iomem *hpet_virt_address;
 
-static inline unsigned long hpet_readl(unsigned long a)
+unsigned long hpet_readl(unsigned long a)
 {
 	return readl(hpet_virt_address + a);
 }
@@ -33,6 +35,36 @@ static inline void hpet_writel(unsigned 
 	writel(d, hpet_virt_address + a);
 }
 
+#ifdef CONFIG_X86_64
+
+#include <asm/pgtable.h>
+
+static inline void hpet_set_mapping(void)
+{
+	set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
+	__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
+	hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
+}
+
+static inline void hpet_clear_mapping(void)
+{
+	hpet_virt_address = NULL;
+}
+
+#else
+
+static inline void hpet_set_mapping(void)
+{
+	hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+}
+
+static inline void hpet_clear_mapping(void)
+{
+	iounmap(hpet_virt_address);
+	hpet_virt_address = NULL;
+}
+#endif
+
 /*
  * HPET command line enable / disable
  */
@@ -48,6 +80,13 @@ static int __init hpet_setup(char* str)
 }
 __setup("hpet=", hpet_setup);
 
+static int __init disable_hpet(char *str)
+{
+	boot_hpet_disable = 1;
+	return 1;
+}
+__setup("nohpet", disable_hpet);
+
 static inline int is_hpet_capable(void)
 {
 	return (!boot_hpet_disable && hpet_address);
@@ -82,7 +121,7 @@ static void hpet_reserve_platform_timers
 
 	memset(&hd, 0, sizeof (hd));
 	hd.hd_phys_address = hpet_address;
-	hd.hd_address = hpet_virt_address;
+	hd.hd_address = hpet;
 	hd.hd_nirqs = nrtimers;
 	hd.hd_flags = HPET_DATA_PLATFORM;
 	hpet_reserve_timer(&hd, 0);
@@ -110,9 +149,9 @@ static void hpet_reserve_platform_timers
  */
 static unsigned long hpet_period;
 
-static void hpet_set_mode(enum clock_event_mode mode,
+static void hpet_legacy_set_mode(enum clock_event_mode mode,
 			  struct clock_event_device *evt);
-static int hpet_next_event(unsigned long delta,
+static int hpet_legacy_next_event(unsigned long delta,
 			   struct clock_event_device *evt);
 
 /*
@@ -121,10 +160,11 @@ static int hpet_next_event(unsigned long
 static struct clock_event_device hpet_clockevent = {
 	.name		= "hpet",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-	.set_mode	= hpet_set_mode,
-	.set_next_event = hpet_next_event,
+	.set_mode	= hpet_legacy_set_mode,
+	.set_next_event = hpet_legacy_next_event,
 	.shift		= 32,
 	.irq		= 0,
+	.rating		= 50,
 };
 
 static void hpet_start_counter(void)
@@ -139,7 +179,18 @@ static void hpet_start_counter(void)
 	hpet_writel(cfg, HPET_CFG);
 }
 
-static void hpet_enable_int(void)
+static void hpet_resume_device(void)
+{
+	force_hpet_resume();
+}
+
+static void hpet_restart_counter(void)
+{
+	hpet_resume_device();
+	hpet_start_counter();
+}
+
+static void hpet_enable_legacy_int(void)
 {
 	unsigned long cfg = hpet_readl(HPET_CFG);
 
@@ -148,7 +199,39 @@ static void hpet_enable_int(void)
 	hpet_legacy_int_enabled = 1;
 }
 
-static void hpet_set_mode(enum clock_event_mode mode,
+static void hpet_legacy_clockevent_register(void)
+{
+	uint64_t hpet_freq;
+
+	/* Start HPET legacy interrupts */
+	hpet_enable_legacy_int();
+
+	/*
+	 * The period is a femto seconds value. We need to calculate the
+	 * scaled math multiplication factor for nanosecond to hpet tick
+	 * conversion.
+	 */
+	hpet_freq = 1000000000000000ULL;
+	do_div(hpet_freq, hpet_period);
+	hpet_clockevent.mult = div_sc((unsigned long) hpet_freq,
+				      NSEC_PER_SEC, 32);
+	/* Calculate the min / max delta */
+	hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
+							   &hpet_clockevent);
+	hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30,
+							   &hpet_clockevent);
+
+	/*
+	 * Start hpet with the boot cpu mask and make it
+	 * global after the IO_APIC has been initialized.
+	 */
+	hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	clockevents_register_device(&hpet_clockevent);
+	global_clock_event = &hpet_clockevent;
+	printk(KERN_DEBUG "hpet clockevent registered\n");
+}
+
+static void hpet_legacy_set_mode(enum clock_event_mode mode,
 			  struct clock_event_device *evt)
 {
 	unsigned long cfg, cmp, now;
@@ -187,10 +270,14 @@ static void hpet_set_mode(enum clock_eve
 		cfg &= ~HPET_TN_ENABLE;
 		hpet_writel(cfg, HPET_T0_CFG);
 		break;
+
+	case CLOCK_EVT_MODE_RESUME:
+		hpet_enable_legacy_int();
+		break;
 	}
 }
 
-static int hpet_next_event(unsigned long delta,
+static int hpet_legacy_next_event(unsigned long delta,
 			   struct clock_event_device *evt)
 {
 	unsigned long cnt;
@@ -210,6 +297,13 @@ static cycle_t read_hpet(void)
 	return (cycle_t)hpet_readl(HPET_COUNTER);
 }
 
+#ifdef CONFIG_X86_64
+static cycle_t __vsyscall_fn vread_hpet(void)
+{
+	return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
+}
+#endif
+
 static struct clocksource clocksource_hpet = {
 	.name		= "hpet",
 	.rating		= 250,
@@ -217,22 +311,73 @@ static struct clocksource clocksource_hp
 	.mask		= HPET_MASK,
 	.shift		= HPET_SHIFT,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+	.resume		= hpet_restart_counter,
+#ifdef CONFIG_X86_64
+	.vread		= vread_hpet,
+#endif
 };
 
+static int hpet_clocksource_register(void)
+{
+	u64 tmp, start, now;
+	cycle_t t1;
+
+	/* Start the counter */
+	hpet_start_counter();
+
+	/* Verify whether hpet counter works */
+	t1 = read_hpet();
+	rdtscll(start);
+
+	/*
+	 * We don't know the TSC frequency yet, but waiting for
+	 * 200000 TSC cycles is safe:
+	 * 4 GHz == 50us
+	 * 1 GHz == 200us
+	 */
+	do {
+		rep_nop();
+		rdtscll(now);
+	} while ((now - start) < 200000UL);
+
+	if (t1 == read_hpet()) {
+		printk(KERN_WARNING
+		       "HPET counter not counting. HPET disabled\n");
+		return -ENODEV;
+	}
+
+	/* Initialize and register HPET clocksource
+	 *
+	 * hpet period is in femto seconds per cycle
+	 * so we need to convert this to ns/cyc units
+	 * aproximated by mult/2^shift
+	 *
+	 *  fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+	 *  fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+	 *  fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+	 *  (fsec/cyc << shift)/1000000 = mult
+	 *  (hpet_period << shift)/FSEC_PER_NSEC = mult
+	 */
+	tmp = (u64)hpet_period << HPET_SHIFT;
+	do_div(tmp, FSEC_PER_NSEC);
+	clocksource_hpet.mult = (u32)tmp;
+
+	clocksource_register(&clocksource_hpet);
+
+	return 0;
+}
+
 /*
  * Try to setup the HPET timer
  */
 int __init hpet_enable(void)
 {
 	unsigned long id;
-	uint64_t hpet_freq;
-	u64 tmp, start, now;
-	cycle_t t1;
 
 	if (!is_hpet_capable())
 		return 0;
 
-	hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+	hpet_set_mapping();
 
 	/*
 	 * Read the period and check for a sane value:
@@ -241,21 +387,6 @@ int __init hpet_enable(void)
 		goto out_nohpet;
 
 	/*
-	 * The period is a femto seconds value. We need to calculate the
-	 * scaled math multiplication factor for nanosecond to hpet tick
-	 * conversion.
-	 */
-	hpet_freq = 1000000000000000ULL;
-	do_div(hpet_freq, hpet_period);
-	hpet_clockevent.mult = div_sc((unsigned long) hpet_freq,
-				      NSEC_PER_SEC, 32);
-	/* Calculate the min / max delta */
-	hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
-							   &hpet_clockevent);
-	hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30,
-							   &hpet_clockevent);
-
-	/*
 	 * Read the HPET ID register to retrieve the IRQ routing
 	 * information and the number of channels
 	 */
@@ -270,70 +401,47 @@ int __init hpet_enable(void)
 		goto out_nohpet;
 #endif
 
-	/* Start the counter */
-	hpet_start_counter();
-
-	/* Verify whether hpet counter works */
-	t1 = read_hpet();
-	rdtscll(start);
-
-	/*
-	 * We don't know the TSC frequency yet, but waiting for
-	 * 200000 TSC cycles is safe:
-	 * 4 GHz == 50us
-	 * 1 GHz == 200us
-	 */
-	do {
-		rep_nop();
-		rdtscll(now);
-	} while ((now - start) < 200000UL);
-
-	if (t1 == read_hpet()) {
-		printk(KERN_WARNING
-		       "HPET counter not counting. HPET disabled\n");
-		goto out_nohpet;
-	}
-
-	/* Initialize and register HPET clocksource
-	 *
-	 * hpet period is in femto seconds per cycle
-	 * so we need to convert this to ns/cyc units
-	 * aproximated by mult/2^shift
-	 *
-	 *  fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
-	 *  fsec/cyc * 1ns/1000000fsec * 2^shift = mult
-	 *  fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
-	 *  (fsec/cyc << shift)/1000000 = mult
-	 *  (hpet_period << shift)/FSEC_PER_NSEC = mult
-	 */
-	tmp = (u64)hpet_period << HPET_SHIFT;
-	do_div(tmp, FSEC_PER_NSEC);
-	clocksource_hpet.mult = (u32)tmp;
-
-	clocksource_register(&clocksource_hpet);
-
+	if (hpet_clocksource_register())
+		goto out_nohpet;
 
 	if (id & HPET_ID_LEGSUP) {
-		hpet_enable_int();
-		hpet_reserve_platform_timers(id);
-		/*
-		 * Start hpet with the boot cpu mask and make it
-		 * global after the IO_APIC has been initialized.
-		 */
-		hpet_clockevent.cpumask =cpumask_of_cpu(0);
-		clockevents_register_device(&hpet_clockevent);
-		global_clock_event = &hpet_clockevent;
+		hpet_legacy_clockevent_register();
 		return 1;
 	}
 	return 0;
 
 out_nohpet:
-	iounmap(hpet_virt_address);
-	hpet_virt_address = NULL;
+	hpet_clear_mapping();
 	boot_hpet_disable = 1;
 	return 0;
 }
 
+/*
+ * Needs to be late, as the reserve_timer code calls kalloc !
+ *
+ * Not a problem on i386 as hpet_enable is called from late_time_init,
+ * but on x86_64 it is necessary !
+ */
+static __init int hpet_late_init(void)
+{
+	if (boot_hpet_disable)
+		return -ENODEV;
+
+	if (!hpet_address) {
+		if (!force_hpet_address)
+			return -ENODEV;
+
+		hpet_address = force_hpet_address;
+		hpet_enable();
+		if (!hpet_virt_address)
+			return -ENODEV;
+	}
+
+	hpet_reserve_platform_timers(hpet_readl(HPET_ID));
+
+	return 0;
+}
+fs_initcall(hpet_late_init);
 
 #ifdef CONFIG_HPET_EMULATE_RTC
 
@@ -524,68 +653,3 @@ irqreturn_t hpet_rtc_interrupt(int irq, 
 	return IRQ_HANDLED;
 }
 #endif
-
-
-/*
- * Suspend/resume part
- */
-
-#ifdef CONFIG_PM
-
-static int hpet_suspend(struct sys_device *sys_device, pm_message_t state)
-{
-	unsigned long cfg = hpet_readl(HPET_CFG);
-
-	cfg &= ~(HPET_CFG_ENABLE|HPET_CFG_LEGACY);
-	hpet_writel(cfg, HPET_CFG);
-
-	return 0;
-}
-
-static int hpet_resume(struct sys_device *sys_device)
-{
-	unsigned int id;
-
-	hpet_start_counter();
-
-	id = hpet_readl(HPET_ID);
-
-	if (id & HPET_ID_LEGSUP)
-		hpet_enable_int();
-
-	return 0;
-}
-
-static struct sysdev_class hpet_class = {
-	set_kset_name("hpet"),
-	.suspend	= hpet_suspend,
-	.resume		= hpet_resume,
-};
-
-static struct sys_device hpet_device = {
-	.id		= 0,
-	.cls		= &hpet_class,
-};
-
-
-static __init int hpet_register_sysfs(void)
-{
-	int err;
-
-	if (!is_hpet_capable())
-		return 0;
-
-	err = sysdev_class_register(&hpet_class);
-
-	if (!err) {
-		err = sysdev_register(&hpet_device);
-		if (err)
-			sysdev_class_unregister(&hpet_class);
-	}
-
-	return err;
-}
-
-device_initcall(hpet_register_sysfs);
-
-#endif
Index: linux-2.6.22/kernel/time/clockevents.c
===================================================================
--- linux-2.6.22.orig/kernel/time/clockevents.c	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/kernel/time/clockevents.c	2007-07-15 17:13:48.000000000 +0200
@@ -204,47 +204,7 @@ void clockevents_exchange_device(struct 
 	local_irq_restore(flags);
 }
 
-/**
- * clockevents_request_device
- */
-struct clock_event_device *clockevents_request_device(unsigned int features,
-						      cpumask_t cpumask)
-{
-	struct clock_event_device *cur, *dev = NULL;
-	struct list_head *tmp;
-
-	spin_lock(&clockevents_lock);
-
-	list_for_each(tmp, &clockevent_devices) {
-		cur = list_entry(tmp, struct clock_event_device, list);
-
-		if ((cur->features & features) == features &&
-		    cpus_equal(cpumask, cur->cpumask)) {
-			if (!dev || dev->rating < cur->rating)
-				dev = cur;
-		}
-	}
-
-	clockevents_exchange_device(NULL, dev);
-
-	spin_unlock(&clockevents_lock);
-
-	return dev;
-}
-
-/**
- * clockevents_release_device
- */
-void clockevents_release_device(struct clock_event_device *dev)
-{
-	spin_lock(&clockevents_lock);
-
-	clockevents_exchange_device(dev, NULL);
-	clockevents_notify_released();
-
-	spin_unlock(&clockevents_lock);
-}
-
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
 /**
  * clockevents_notify - notification about relevant events
  */
@@ -273,4 +233,4 @@ void clockevents_notify(unsigned long re
 	spin_unlock(&clockevents_lock);
 }
 EXPORT_SYMBOL_GPL(clockevents_notify);
-
+#endif
Index: linux-2.6.22/drivers/clocksource/acpi_pm.c
===================================================================
--- linux-2.6.22.orig/drivers/clocksource/acpi_pm.c	2007-07-15 17:13:44.000000000 +0200
+++ linux-2.6.22/drivers/clocksource/acpi_pm.c	2007-07-15 17:13:45.000000000 +0200
@@ -71,7 +71,7 @@ static struct clocksource clocksource_ac
 	.rating		= 200,
 	.read		= acpi_pm_read,
 	.mask		= (cycle_t)ACPI_PM_MASK,
-	.mult		= 0, /*to be caluclated*/
+	.mult		= 0, /*to be calculated*/
 	.shift		= 22,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 
Index: linux-2.6.22/kernel/time/timekeeping.c
===================================================================
--- linux-2.6.22.orig/kernel/time/timekeeping.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/kernel/time/timekeeping.c	2007-07-15 17:13:45.000000000 +0200
@@ -391,7 +391,7 @@ static __always_inline int clocksource_b
  * this is optimized for the most common adjustments of -1,0,1,
  * for other values we can do a bit more work.
  */
-static void clocksource_adjust(struct clocksource *clock, s64 offset)
+static void clocksource_adjust(s64 offset)
 {
 	s64 error, interval = clock->cycle_interval;
 	int adj;
@@ -466,7 +466,7 @@ void update_wall_time(void)
 	}
 
 	/* correct the clock when NTP error is too big */
-	clocksource_adjust(clock, offset);
+	clocksource_adjust(offset);
 
 	/* store full nanoseconds into xtime */
 	xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift;
Index: linux-2.6.22/kernel/timer.c
===================================================================
--- linux-2.6.22.orig/kernel/timer.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/kernel/timer.c	2007-07-15 17:13:45.000000000 +0200
@@ -103,14 +103,14 @@ static inline tvec_base_t *tbase_get_bas
 static inline void timer_set_deferrable(struct timer_list *timer)
 {
 	timer->base = ((tvec_base_t *)((unsigned long)(timer->base) |
-	                               TBASE_DEFERRABLE_FLAG));
+				       TBASE_DEFERRABLE_FLAG));
 }
 
 static inline void
 timer_set_base(struct timer_list *timer, tvec_base_t *new_base)
 {
 	timer->base = (tvec_base_t *)((unsigned long)(new_base) |
-	                              tbase_get_deferrable(timer->base));
+				      tbase_get_deferrable(timer->base));
 }
 
 /**
@@ -431,10 +431,10 @@ EXPORT_SYMBOL(__mod_timer);
 void add_timer_on(struct timer_list *timer, int cpu)
 {
 	tvec_base_t *base = per_cpu(tvec_bases, cpu);
-  	unsigned long flags;
+	unsigned long flags;
 
 	timer_stats_timer_set_start_info(timer);
-  	BUG_ON(timer_pending(timer) || !timer->function);
+	BUG_ON(timer_pending(timer) || !timer->function);
 	spin_lock_irqsave(&base->lock, flags);
 	timer_set_base(timer, base);
 	internal_add_timer(base, timer);
@@ -613,7 +613,7 @@ static inline void __run_timers(tvec_bas
 	while (time_after_eq(jiffies, base->timer_jiffies)) {
 		struct list_head work_list;
 		struct list_head *head = &work_list;
- 		int index = base->timer_jiffies & TVR_MASK;
+		int index = base->timer_jiffies & TVR_MASK;
 
 		/*
 		 * Cascade timers:
@@ -630,8 +630,8 @@ static inline void __run_timers(tvec_bas
 			unsigned long data;
 
 			timer = list_first_entry(head, struct timer_list,entry);
- 			fn = timer->function;
- 			data = timer->data;
+			fn = timer->function;
+			data = timer->data;
 
 			timer_stats_account_timer(timer);
 
@@ -675,8 +675,8 @@ static unsigned long __next_timer_interr
 	index = slot = timer_jiffies & TVR_MASK;
 	do {
 		list_for_each_entry(nte, base->tv1.vec + slot, entry) {
- 			if (tbase_get_deferrable(nte->base))
- 				continue;
+			if (tbase_get_deferrable(nte->base))
+				continue;
 
 			found = 1;
 			expires = nte->expires;
@@ -820,7 +820,7 @@ void update_process_times(int user_tick)
 	if (rcu_pending(cpu))
 		rcu_check_callbacks(cpu, user_tick);
 	scheduler_tick();
- 	run_posix_cpu_timers(p);
+	run_posix_cpu_timers(p);
 }
 
 /*
@@ -895,7 +895,7 @@ static inline void update_times(unsigned
 	update_wall_time();
 	calc_load(ticks);
 }
-  
+
 /*
  * The 64-bit jiffies value is not atomic - you MUST NOT read it
  * without sampling the sequence number in xtime_lock.
@@ -1091,7 +1091,7 @@ asmlinkage long sys_gettid(void)
 /**
  * do_sysinfo - fill in sysinfo struct
  * @info: pointer to buffer to fill
- */ 
+ */
 int do_sysinfo(struct sysinfo *info)
 {
 	unsigned long mem_total, sav_total;
Index: linux-2.6.22/include/linux/clockchips.h
===================================================================
--- linux-2.6.22.orig/include/linux/clockchips.h	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/include/linux/clockchips.h	2007-07-15 17:13:48.000000000 +0200
@@ -8,7 +8,7 @@
 #ifndef _LINUX_CLOCKCHIPS_H
 #define _LINUX_CLOCKCHIPS_H
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
 
 #include <linux/clocksource.h>
 #include <linux/cpumask.h>
@@ -23,6 +23,7 @@ enum clock_event_mode {
 	CLOCK_EVT_MODE_SHUTDOWN,
 	CLOCK_EVT_MODE_PERIODIC,
 	CLOCK_EVT_MODE_ONESHOT,
+	CLOCK_EVT_MODE_RESUME,
 };
 
 /* Clock event notification values */
@@ -119,10 +120,6 @@ extern void clockevents_register_device(
 
 extern void clockevents_exchange_device(struct clock_event_device *old,
 					struct clock_event_device *new);
-extern
-struct clock_event_device *clockevents_request_device(unsigned int features,
-						      cpumask_t cpumask);
-extern void clockevents_release_device(struct clock_event_device *dev);
 extern void clockevents_set_mode(struct clock_event_device *dev,
 				 enum clock_event_mode mode);
 extern int clockevents_register_notifier(struct notifier_block *nb);
@@ -130,11 +127,14 @@ extern void clockevents_unregister_notif
 extern int clockevents_program_event(struct clock_event_device *dev,
 				     ktime_t expires, ktime_t now);
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern void clockevents_notify(unsigned long reason, void *arg);
-
 #else
+# define clockevents_notify(reason, arg) do { } while (0)
+#endif
+
+#else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */
 
-static inline void clockevents_resume_events(void) { }
 #define clockevents_notify(reason, arg) do { } while (0)
 
 #endif
Index: linux-2.6.22/arch/arm/mach-davinci/time.c
===================================================================
--- linux-2.6.22.orig/arch/arm/mach-davinci/time.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/arch/arm/mach-davinci/time.c	2007-07-15 17:13:46.000000000 +0200
@@ -285,6 +285,8 @@ static void davinci_set_mode(enum clock_
 	case CLOCK_EVT_MODE_SHUTDOWN:
 		t->opts = TIMER_OPTS_DISABLED;
 		break;
+	case CLOCK_EVT_MODE_RESUME:
+		break;
 	}
 }
 
Index: linux-2.6.22/arch/arm/mach-ixp4xx/common.c
===================================================================
--- linux-2.6.22.orig/arch/arm/mach-ixp4xx/common.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/arch/arm/mach-ixp4xx/common.c	2007-07-15 17:13:46.000000000 +0200
@@ -459,6 +459,8 @@ static void ixp4xx_set_mode(enum clock_e
 	default:
 		osrt = opts = 0;
 		break;
+	case CLOCK_EVT_MODE_RESUME:
+		break;
 	}
 
 	*IXP4XX_OSRT1 = osrt | opts;
Index: linux-2.6.22/arch/arm/mach-omap1/time.c
===================================================================
--- linux-2.6.22.orig/arch/arm/mach-omap1/time.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/arch/arm/mach-omap1/time.c	2007-07-15 17:13:46.000000000 +0200
@@ -156,6 +156,7 @@ static void omap_mpu_set_mode(enum clock
 		break;
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
+	case CLOCK_EVT_MODE_RESUME:
 		break;
 	}
 }
Index: linux-2.6.22/arch/arm/plat-omap/timer32k.c
===================================================================
--- linux-2.6.22.orig/arch/arm/plat-omap/timer32k.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/arch/arm/plat-omap/timer32k.c	2007-07-15 17:13:46.000000000 +0200
@@ -156,6 +156,8 @@ static void omap_32k_timer_set_mode(enum
 	case CLOCK_EVT_MODE_SHUTDOWN:
 		omap_32k_timer_stop();
 		break;
+	case CLOCK_EVT_MODE_RESUME:
+		break;
 	}
 }
 
Index: linux-2.6.22/arch/i386/kernel/apic.c
===================================================================
--- linux-2.6.22.orig/arch/i386/kernel/apic.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/arch/i386/kernel/apic.c	2007-07-15 17:13:47.000000000 +0200
@@ -263,6 +263,9 @@ static void lapic_timer_setup(enum clock
 		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
 		apic_write_around(APIC_LVTT, v);
 		break;
+	case CLOCK_EVT_MODE_RESUME:
+		/* Nothing to do here */
+		break;
 	}
 
 	local_irq_restore(flags);
@@ -315,7 +318,7 @@ static void __devinit setup_APIC_timer(v
 
 #define LAPIC_CAL_LOOPS		(HZ/10)
 
-static __initdata volatile int lapic_cal_loops = -1;
+static __initdata int lapic_cal_loops = -1;
 static __initdata long lapic_cal_t1, lapic_cal_t2;
 static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
 static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
@@ -485,7 +488,7 @@ void __init setup_boot_APIC_clock(void)
 		/* Let the interrupts run */
 		local_irq_enable();
 
-		while(lapic_cal_loops <= LAPIC_CAL_LOOPS)
+		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
 			cpu_relax();
 
 		local_irq_disable();
@@ -521,6 +524,9 @@ void __init setup_boot_APIC_clock(void)
 		 */
 		if (nmi_watchdog != NMI_IO_APIC)
 			lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+		else
+			printk(KERN_WARNING "APIC timer registered as dummy,"
+			       " due to nmi_watchdog=1!\n");
 	}
 
 	/* Setup the lapic or request the broadcast */
Index: linux-2.6.22/arch/i386/kernel/i8253.c
===================================================================
--- linux-2.6.22.orig/arch/i386/kernel/i8253.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/arch/i386/kernel/i8253.c	2007-07-15 17:13:50.000000000 +0200
@@ -3,19 +3,17 @@
  *
  */
 #include <linux/clockchips.h>
-#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/jiffies.h>
-#include <linux/sysdev.h>
 #include <linux/module.h>
-#include <linux/init.h>
+#include <linux/spinlock.h>
 
 #include <asm/smp.h>
 #include <asm/delay.h>
 #include <asm/i8253.h>
 #include <asm/io.h>
 
-#include "io_ports.h"
-
 DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
@@ -41,26 +39,27 @@ static void init_pit_timer(enum clock_ev
 	case CLOCK_EVT_MODE_PERIODIC:
 		/* binary, mode 2, LSB/MSB, ch 0 */
 		outb_p(0x34, PIT_MODE);
-		udelay(10);
 		outb_p(LATCH & 0xff , PIT_CH0);	/* LSB */
-		udelay(10);
 		outb(LATCH >> 8 , PIT_CH0);	/* MSB */
 		break;
 
-	/*
-	 * Avoid unnecessary state transitions, as it confuses
-	 * Geode / Cyrix based boxen.
-	 */
 	case CLOCK_EVT_MODE_SHUTDOWN:
-		if (evt->mode == CLOCK_EVT_MODE_UNUSED)
-			break;
 	case CLOCK_EVT_MODE_UNUSED:
-		if (evt->mode == CLOCK_EVT_MODE_SHUTDOWN)
-			break;
+		if (evt->mode == CLOCK_EVT_MODE_PERIODIC ||
+		    evt->mode == CLOCK_EVT_MODE_ONESHOT) {
+			outb_p(0x30, PIT_MODE);
+			outb_p(0, PIT_CH0);
+			outb_p(0, PIT_CH0);
+		}
+		break;
+
 	case CLOCK_EVT_MODE_ONESHOT:
 		/* One shot setup */
 		outb_p(0x38, PIT_MODE);
-		udelay(10);
+		break;
+
+	case CLOCK_EVT_MODE_RESUME:
+		/* Nothing to do here */
 		break;
 	}
 	spin_unlock_irqrestore(&i8253_lock, flags);
@@ -120,6 +119,7 @@ void __init setup_pit_timer(void)
 	global_clock_event = &pit_clockevent;
 }
 
+#ifndef CONFIG_X86_64
 /*
  * Since the PIT overflows every tick, its not very useful
  * to just read by itself. So use jiffies to emulate a free
@@ -204,3 +204,5 @@ static int __init init_pit_clocksource(v
 	return clocksource_register(&clocksource_pit);
 }
 arch_initcall(init_pit_clocksource);
+
+#endif
Index: linux-2.6.22/arch/i386/kernel/vmiclock.c
===================================================================
--- linux-2.6.22.orig/arch/i386/kernel/vmiclock.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/arch/i386/kernel/vmiclock.c	2007-07-15 17:13:47.000000000 +0200
@@ -32,6 +32,7 @@
 #include <asm/apicdef.h>
 #include <asm/apic.h>
 #include <asm/timer.h>
+#include <asm/i8253.h>
 
 #include <irq_vectors.h>
 #include "io_ports.h"
@@ -142,6 +143,7 @@ static void vmi_timer_set_mode(enum cloc
 
 	switch (mode) {
 	case CLOCK_EVT_MODE_ONESHOT:
+	case CLOCK_EVT_MODE_RESUME:
 		break;
 	case CLOCK_EVT_MODE_PERIODIC:
 		cycles_per_hz = vmi_timer_ops.get_cycle_frequency();
Index: linux-2.6.22/arch/sh/kernel/timers/timer-tmu.c
===================================================================
--- linux-2.6.22.orig/arch/sh/kernel/timers/timer-tmu.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/arch/sh/kernel/timers/timer-tmu.c	2007-07-15 17:13:46.000000000 +0200
@@ -80,6 +80,7 @@ static void tmu_set_mode(enum clock_even
 		break;
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
+	case CLOCK_EVT_MODE_RESUME:
 		break;
 	}
 }
Index: linux-2.6.22/arch/sparc64/kernel/time.c
===================================================================
--- linux-2.6.22.orig/arch/sparc64/kernel/time.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/arch/sparc64/kernel/time.c	2007-07-15 17:13:47.000000000 +0200
@@ -403,58 +403,9 @@ static struct sparc64_tick_ops hbtick_op
 
 static unsigned long timer_ticks_per_nsec_quotient __read_mostly;
 
-#define TICK_SIZE (tick_nsec / 1000)
-
-#define USEC_AFTER	500000
-#define USEC_BEFORE	500000
-
-static void sync_cmos_clock(unsigned long dummy);
-
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
-
-static void sync_cmos_clock(unsigned long dummy)
-{
-	struct timeval now, next;
-	int fail = 1;
-
-	/*
-	 * If we have an externally synchronized Linux clock, then update
-	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-	 * called as close as possible to 500 ms before the new second starts.
-	 * This code is run on a timer.  If the clock is set, that timer
-	 * may not expire at the correct time.  Thus, we adjust...
-	 */
-	if (!ntp_synced())
-		/*
-		 * Not synced, exit, do not restart a timer (if one is
-		 * running, let it run out).
-		 */
-		return;
-
-	do_gettimeofday(&now);
-	if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
-	    now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
-		fail = set_rtc_mmss(now.tv_sec);
-
-	next.tv_usec = USEC_AFTER - now.tv_usec;
-	if (next.tv_usec <= 0)
-		next.tv_usec += USEC_PER_SEC;
-
-	if (!fail)
-		next.tv_sec = 659;
-	else
-		next.tv_sec = 0;
-
-	if (next.tv_usec >= USEC_PER_SEC) {
-		next.tv_sec++;
-		next.tv_usec -= USEC_PER_SEC;
-	}
-	mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
-}
-
-void notify_arch_cmos_timer(void)
+int update_persistent_clock(struct timespec now)
 {
-	mod_timer(&sync_cmos_timer, jiffies + 1);
+	return set_rtc_mmss(now.tv_sec);
 }
 
 /* Kick start a stopped clock (procedure from the Sun NVRAM/hostid FAQ). */
@@ -938,6 +889,7 @@ static void sparc64_timer_setup(enum clo
 {
 	switch (mode) {
 	case CLOCK_EVT_MODE_ONESHOT:
+	case CLOCK_EVT_MODE_RESUME:
 		break;
 
 	case CLOCK_EVT_MODE_SHUTDOWN:
Index: linux-2.6.22/kernel/time/tick-broadcast.c
===================================================================
--- linux-2.6.22.orig/kernel/time/tick-broadcast.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/kernel/time/tick-broadcast.c	2007-07-15 17:13:52.000000000 +0200
@@ -31,6 +31,12 @@ struct tick_device tick_broadcast_device
 static cpumask_t tick_broadcast_mask;
 static DEFINE_SPINLOCK(tick_broadcast_lock);
 
+#ifdef CONFIG_TICK_ONESHOT
+static void tick_broadcast_clear_oneshot(int cpu);
+#else
+static inline void tick_broadcast_clear_oneshot(int cpu) { }
+#endif
+
 /*
  * Debugging: see timer_list.c
  */
@@ -49,7 +55,7 @@ cpumask_t *tick_get_broadcast_mask(void)
  */
 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
 {
-	if (bc && bc->mode == CLOCK_EVT_MODE_SHUTDOWN)
+	if (bc)
 		tick_setup_periodic(bc, 1);
 }
 
@@ -58,8 +64,9 @@ static void tick_broadcast_start_periodi
  */
 int tick_check_broadcast_device(struct clock_event_device *dev)
 {
-	if (tick_broadcast_device.evtdev ||
-	    (dev->features & CLOCK_EVT_FEAT_C3STOP))
+	if ((tick_broadcast_device.evtdev &&
+	     tick_broadcast_device.evtdev->rating >= dev->rating) ||
+	     (dev->features & CLOCK_EVT_FEAT_C3STOP))
 		return 0;
 
 	clockevents_exchange_device(NULL, dev);
@@ -99,8 +106,19 @@ int tick_device_uses_broadcast(struct cl
 		cpu_set(cpu, tick_broadcast_mask);
 		tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
 		ret = 1;
-	}
+	} else {
+		/*
+		 * When the new device is not affected by the stop
+		 * feature and the cpu is marked in the broadcast mask
+		 * then clear the broadcast bit.
+		 */
+		if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
+			int cpu = smp_processor_id();
 
+			cpu_clear(cpu, tick_broadcast_mask);
+			tick_broadcast_clear_oneshot(cpu);
+		}
+	}
 	spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 	return ret;
 }
@@ -299,7 +317,7 @@ void tick_suspend_broadcast(void)
 	spin_lock_irqsave(&tick_broadcast_lock, flags);
 
 	bc = tick_broadcast_device.evtdev;
-	if (bc && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
+	if (bc)
 		clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
 
 	spin_unlock_irqrestore(&tick_broadcast_lock, flags);
@@ -316,6 +334,8 @@ int tick_resume_broadcast(void)
 	bc = tick_broadcast_device.evtdev;
 
 	if (bc) {
+		clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
+
 		switch (tick_broadcast_device.mode) {
 		case TICKDEV_MODE_PERIODIC:
 			if(!cpus_empty(tick_broadcast_mask))
@@ -485,16 +505,24 @@ out:
 	spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
+/*
+ * Reset the one shot broadcast for a cpu
+ *
+ * Called with tick_broadcast_lock held
+ */
+static void tick_broadcast_clear_oneshot(int cpu)
+{
+	cpu_clear(cpu, tick_broadcast_oneshot_mask);
+}
+
 /**
  * tick_broadcast_setup_highres - setup the broadcast device for highres
  */
 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 {
-	if (bc->mode != CLOCK_EVT_MODE_ONESHOT) {
-		bc->event_handler = tick_handle_oneshot_broadcast;
-		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
-		bc->next_event.tv64 = KTIME_MAX;
-	}
+	bc->event_handler = tick_handle_oneshot_broadcast;
+	clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+	bc->next_event.tv64 = KTIME_MAX;
 }
 
 /*
Index: linux-2.6.22/kernel/time/tick-common.c
===================================================================
--- linux-2.6.22.orig/kernel/time/tick-common.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/kernel/time/tick-common.c	2007-07-15 17:13:52.000000000 +0200
@@ -200,7 +200,7 @@ static int tick_check_new_device(struct 
 
 	cpu = smp_processor_id();
 	if (!cpu_isset(cpu, newdev->cpumask))
-		goto out;
+		goto out_bc;
 
 	td = &per_cpu(tick_cpu_device, cpu);
 	curdev = td->evtdev;
@@ -265,7 +265,7 @@ out_bc:
 	 */
 	if (tick_check_broadcast_device(newdev))
 		ret = NOTIFY_STOP;
-out:
+
 	spin_unlock_irqrestore(&tick_device_lock, flags);
 
 	return ret;
@@ -318,12 +318,17 @@ static void tick_resume(void)
 {
 	struct tick_device *td = &__get_cpu_var(tick_cpu_device);
 	unsigned long flags;
+	int broadcast = tick_resume_broadcast();
 
 	spin_lock_irqsave(&tick_device_lock, flags);
-	if (td->mode == TICKDEV_MODE_PERIODIC)
-		tick_setup_periodic(td->evtdev, 0);
-	else
-		tick_resume_oneshot();
+	clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
+
+	if (!broadcast) {
+		if (td->mode == TICKDEV_MODE_PERIODIC)
+			tick_setup_periodic(td->evtdev, 0);
+		else
+			tick_resume_oneshot();
+	}
 	spin_unlock_irqrestore(&tick_device_lock, flags);
 }
 
@@ -360,8 +365,7 @@ static int tick_notify(struct notifier_b
 		break;
 
 	case CLOCK_EVT_NOTIFY_RESUME:
-		if (!tick_resume_broadcast())
-			tick_resume();
+		tick_resume();
 		break;
 
 	default:
Index: linux-2.6.22/kernel/hrtimer.c
===================================================================
--- linux-2.6.22.orig/kernel/hrtimer.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/kernel/hrtimer.c	2007-07-15 17:13:46.000000000 +0200
@@ -558,7 +558,8 @@ static inline int hrtimer_enqueue_reprog
  */
 static int hrtimer_switch_to_hres(void)
 {
-	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
+	int cpu = smp_processor_id();
+	struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
 	unsigned long flags;
 
 	if (base->hres_active)
@@ -568,6 +569,8 @@ static int hrtimer_switch_to_hres(void)
 
 	if (tick_init_highres()) {
 		local_irq_restore(flags);
+		printk(KERN_WARNING "Could not switch to high resolution "
+				    "mode on CPU %d\n", cpu);
 		return 0;
 	}
 	base->hres_active = 1;
@@ -683,6 +686,7 @@ static void enqueue_hrtimer(struct hrtim
 	struct rb_node **link = &base->active.rb_node;
 	struct rb_node *parent = NULL;
 	struct hrtimer *entry;
+	int leftmost = 1;
 
 	/*
 	 * Find the right place in the rbtree:
@@ -694,18 +698,19 @@ static void enqueue_hrtimer(struct hrtim
 		 * We dont care about collisions. Nodes with
 		 * the same expiry time stay together.
 		 */
-		if (timer->expires.tv64 < entry->expires.tv64)
+		if (timer->expires.tv64 < entry->expires.tv64) {
 			link = &(*link)->rb_left;
-		else
+		} else {
 			link = &(*link)->rb_right;
+			leftmost = 0;
+		}
 	}
 
 	/*
 	 * Insert the timer to the rbtree and check whether it
 	 * replaces the first pending timer
 	 */
-	if (!base->first || timer->expires.tv64 <
-	    rb_entry(base->first, struct hrtimer, node)->expires.tv64) {
+	if (leftmost) {
 		/*
 		 * Reprogram the clock event device. When the timer is already
 		 * expired hrtimer_enqueue_reprogram has either called the
Index: linux-2.6.22/kernel/time/tick-oneshot.c
===================================================================
--- linux-2.6.22.orig/kernel/time/tick-oneshot.c	2007-07-15 17:13:43.000000000 +0200
+++ linux-2.6.22/kernel/time/tick-oneshot.c	2007-07-15 17:13:46.000000000 +0200
@@ -73,8 +73,21 @@ int tick_switch_to_oneshot(void (*handle
 	struct clock_event_device *dev = td->evtdev;
 
 	if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) ||
-	    !tick_device_is_functional(dev))
+		    !tick_device_is_functional(dev)) {
+
+		printk(KERN_INFO "Clockevents: "
+		       "could not switch to one-shot mode:");
+		if (!dev) {
+			printk(" no tick device\n");
+		} else {
+			if (!tick_device_is_functional(dev))
+				printk(" %s is not functional.\n", dev->name);
+			else
+				printk(" %s does not support one-shot mode.\n",
+				       dev->name);
+		}
 		return -EINVAL;
+	}
 
 	td->mode = TICKDEV_MODE_ONESHOT;
 	dev->event_handler = handler;
Index: linux-2.6.22/arch/x86_64/kernel/time.c
===================================================================
--- linux-2.6.22.orig/arch/x86_64/kernel/time.c	2007-07-15 17:13:42.000000000 +0200
+++ linux-2.6.22/arch/x86_64/kernel/time.c	2007-07-15 17:13:51.000000000 +0200
@@ -28,11 +28,13 @@
 #include <linux/cpu.h>
 #include <linux/kallsyms.h>
 #include <linux/acpi.h>
+#include <linux/clockchips.h>
+
 #ifdef CONFIG_ACPI
 #include <acpi/achware.h>	/* for PM timer frequency */
 #include <acpi/acpi_bus.h>
 #endif
-#include <asm/8253pit.h>
+#include <asm/i8253.h>
 #include <asm/pgtable.h>
 #include <asm/vsyscall.h>
 #include <asm/timex.h>
@@ -45,11 +47,8 @@
 #include <asm/mpspec.h>
 #include <asm/nmi.h>
 
-static char *timename = NULL;
-
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL(rtc_lock);
-DEFINE_SPINLOCK(i8253_lock);
 
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 
@@ -79,8 +78,9 @@ EXPORT_SYMBOL(profile_pc);
  * sheet for details.
  */
 
-static void set_rtc_mmss(unsigned long nowtime)
+static int set_rtc_mmss(unsigned long nowtime)
 {
+	int retval = 0;
 	int real_seconds, real_minutes, cmos_minutes;
 	unsigned char control, freq_select;
 
@@ -120,6 +120,7 @@ static void set_rtc_mmss(unsigned long n
 	if (abs(real_minutes - cmos_minutes) >= 30) {
 		printk(KERN_WARNING "time.c: can't update CMOS clock "
 		       "from %d to %d\n", cmos_minutes, real_minutes);
+		retval = -1;
 	} else {
 		BIN_TO_BCD(real_seconds);
 		BIN_TO_BCD(real_minutes);
@@ -139,67 +140,23 @@ static void set_rtc_mmss(unsigned long n
 	CMOS_WRITE(freq_select, RTC_FREQ_SELECT);
 
 	spin_unlock(&rtc_lock);
-}
 
+	return retval;
+}
 
-void main_timer_handler(void)
+int update_persistent_clock(struct timespec now)
 {
-	static unsigned long rtc_update = 0;
-/*
- * Here we are in the timer irq handler. We have irqs locally disabled (so we
- * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
- * on the other CPU, so we need a lock. We also need to lock the vsyscall
- * variables, because both do_timer() and us change them -arca+vojtech
- */
-
-	write_seqlock(&xtime_lock);
-
-/*
- * Do the timer stuff.
- */
-
-	do_timer(1);
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(get_irq_regs()));
-#endif
-
-/*
- * In the SMP case we use the local APIC timer interrupt to do the profiling,
- * except when we simulate SMP mode on a uniprocessor system, in that case we
- * have to call the local interrupt handler.
- */
-
-	if (!using_apic_timer)
-		smp_local_timer_interrupt();
-
-/*
- * If we have an externally synchronized Linux clock, then update CMOS clock
- * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
- * closest to exactly 500 ms before the next second. If the update fails, we
- * don't care, as it'll be updated on the next turn, and the problem (time way
- * off) isn't likely to go away much sooner anyway.
- */
-
-	if (ntp_synced() && xtime.tv_sec > rtc_update &&
-		abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
-		set_rtc_mmss(xtime.tv_sec);
-		rtc_update = xtime.tv_sec + 660;
-	}
- 
-	write_sequnlock(&xtime_lock);
+	return set_rtc_mmss(now.tv_sec);
 }
 
-static irqreturn_t timer_interrupt(int irq, void *dev_id)
+static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
 {
-	if (apic_runs_main_timer > 1)
-		return IRQ_HANDLED;
-	main_timer_handler();
-	if (using_apic_timer)
-		smp_send_timer_broadcast_ipi();
+	global_clock_event->event_handler(global_clock_event);
+
 	return IRQ_HANDLED;
 }
 
-static unsigned long get_cmos_time(void)
+unsigned long read_persistent_clock(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
 	unsigned long flags;
@@ -226,7 +183,7 @@ static unsigned long get_cmos_time(void)
 	/*
 	 * We know that x86-64 always uses BCD format, no need to check the
 	 * config register.
- 	 */
+	 */
 
 	BCD_TO_BIN(sec);
 	BCD_TO_BIN(min);
@@ -239,11 +196,11 @@ static unsigned long get_cmos_time(void)
 		BCD_TO_BIN(century);
 		year += century * 100;
 		printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
-	} else { 
+	} else {
 		/*
 		 * x86-64 systems only exists since 2002.
 		 * This will work up to Dec 31, 2100
-	 	 */
+		 */
 		year += 2000;
 	}
 
@@ -255,143 +212,62 @@ static unsigned long get_cmos_time(void)
 #define TICK_COUNT 100000000
 static unsigned int __init tsc_calibrate_cpu_khz(void)
 {
-       int tsc_start, tsc_now;
-       int i, no_ctr_free;
-       unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
-       unsigned long flags;
-
-       for (i = 0; i < 4; i++)
-               if (avail_to_resrv_perfctr_nmi_bit(i))
-                       break;
-       no_ctr_free = (i == 4);
-       if (no_ctr_free) {
-               i = 3;
-               rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
-               wrmsrl(MSR_K7_EVNTSEL3, 0);
-               rdmsrl(MSR_K7_PERFCTR3, pmc3);
-       } else {
-               reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
-               reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
-       }
-       local_irq_save(flags);
-       /* start meauring cycles, incrementing from 0 */
-       wrmsrl(MSR_K7_PERFCTR0 + i, 0);
-       wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
-       rdtscl(tsc_start);
-       do {
-               rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
-               tsc_now = get_cycles_sync();
-       } while ((tsc_now - tsc_start) < TICK_COUNT);
-
-       local_irq_restore(flags);
-       if (no_ctr_free) {
-               wrmsrl(MSR_K7_EVNTSEL3, 0);
-               wrmsrl(MSR_K7_PERFCTR3, pmc3);
-               wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
-       } else {
-               release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
-               release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
-       }
-
-       return pmc_now * tsc_khz / (tsc_now - tsc_start);
-}
-
-/*
- * pit_calibrate_tsc() uses the speaker output (channel 2) of
- * the PIT. This is better than using the timer interrupt output,
- * because we can read the value of the speaker with just one inb(),
- * where we need three i/o operations for the interrupt channel.
- * We count how many ticks the TSC does in 50 ms.
- */
-
-static unsigned int __init pit_calibrate_tsc(void)
-{
-	unsigned long start, end;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8253_lock, flags);
-
-	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
-
-	outb(0xb0, 0x43);
-	outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
-	outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
-	start = get_cycles_sync();
-	while ((inb(0x61) & 0x20) == 0);
-	end = get_cycles_sync();
-
-	spin_unlock_irqrestore(&i8253_lock, flags);
-	
-	return (end - start) / 50;
-}
-
-#define PIT_MODE 0x43
-#define PIT_CH0  0x40
-
-static void __pit_init(int val, u8 mode)
-{
+	int tsc_start, tsc_now;
+	int i, no_ctr_free;
+	unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
 	unsigned long flags;
 
-	spin_lock_irqsave(&i8253_lock, flags);
-	outb_p(mode, PIT_MODE);
-	outb_p(val & 0xff, PIT_CH0);	/* LSB */
-	outb_p(val >> 8, PIT_CH0);	/* MSB */
-	spin_unlock_irqrestore(&i8253_lock, flags);
-}
-
-void __init pit_init(void)
-{
-	__pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */
-}
-
-void pit_stop_interrupt(void)
-{
-	__pit_init(0, 0x30); /* mode 0 */
-}
-
-void stop_timer_interrupt(void)
-{
-	char *name;
-	if (hpet_address) {
-		name = "HPET";
-		hpet_timer_stop_set_go(0);
+	for (i = 0; i < 4; i++)
+		if (avail_to_resrv_perfctr_nmi_bit(i))
+			break;
+	no_ctr_free = (i == 4);
+	if (no_ctr_free) {
+		i = 3;
+		rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
+		wrmsrl(MSR_K7_EVNTSEL3, 0);
+		rdmsrl(MSR_K7_PERFCTR3, pmc3);
 	} else {
-		name = "PIT";
-		pit_stop_interrupt();
+		reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+		reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 	}
-	printk(KERN_INFO "timer: %s interrupt stopped.\n", name);
+	local_irq_save(flags);
+	/* start meauring cycles, incrementing from 0 */
+	wrmsrl(MSR_K7_PERFCTR0 + i, 0);
+	wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
+	rdtscl(tsc_start);
+	do {
+		rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
+		tsc_now = get_cycles_sync();
+	} while ((tsc_now - tsc_start) < TICK_COUNT);
+
+	local_irq_restore(flags);
+	if (no_ctr_free) {
+		wrmsrl(MSR_K7_EVNTSEL3, 0);
+		wrmsrl(MSR_K7_PERFCTR3, pmc3);
+		wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
+	} else {
+		release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+		release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+	}
+
+	return pmc_now * tsc_khz / (tsc_now - tsc_start);
 }
 
 static struct irqaction irq0 = {
-	.handler	= timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_IRQPOLL,
+	.handler	= timer_event_interrupt,
+	.flags		= IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING,
 	.mask		= CPU_MASK_NONE,
-	.name 		= "timer"
+	.name		= "timer"
 };
 
 void __init time_init(void)
 {
-	if (nohpet)
-		hpet_address = 0;
-	xtime.tv_sec = get_cmos_time();
-	xtime.tv_nsec = 0;
-
-	set_normalized_timespec(&wall_to_monotonic,
-	                        -xtime.tv_sec, -xtime.tv_nsec);
-
-	if (hpet_arch_init())
-		hpet_address = 0;
-
-	if (hpet_use_timer) {
-		/* set tick_nsec to use the proper rate for HPET */
-	  	tick_nsec = TICK_NSEC_HPET;
-		tsc_khz = hpet_calibrate_tsc();
-		timename = "HPET";
-	} else {
-		pit_init();
-		tsc_khz = pit_calibrate_tsc();
-		timename = "PIT";
-	}
+	if (!hpet_enable())
+		setup_pit_timer();
+
+	setup_irq(0, &irq0);
+
+	tsc_calibrate();
 
 	cpu_khz = tsc_khz;
 	if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
@@ -411,79 +287,4 @@ void __init time_init(void)
 	printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
 		cpu_khz / 1000, cpu_khz % 1000);
 	init_tsc_clocksource();
-
-	setup_irq(0, &irq0);
 }
-
-
-static long clock_cmos_diff;
-static unsigned long sleep_start;
-
-/*
- * sysfs support for the timer.
- */
-
-static int timer_suspend(struct sys_device *dev, pm_message_t state)
-{
-	/*
-	 * Estimate time zone so that set_time can update the clock
-	 */
-	long cmos_time =  get_cmos_time();
-
-	clock_cmos_diff = -cmos_time;
-	clock_cmos_diff += get_seconds();
-	sleep_start = cmos_time;
-	return 0;
-}
-
-static int timer_resume(struct sys_device *dev)
-{
-	unsigned long flags;
-	unsigned long sec;
-	unsigned long ctime = get_cmos_time();
-	long sleep_length = (ctime - sleep_start) * HZ;
-
-	if (sleep_length < 0) {
-		printk(KERN_WARNING "Time skew detected in timer resume!\n");
-		/* The time after the resume must not be earlier than the time
-		 * before the suspend or some nasty things will happen
-		 */
-		sleep_length = 0;
-		ctime = sleep_start;
-	}
-	if (hpet_address)
-		hpet_reenable();
-	else
-		i8254_timer_resume();
-
-	sec = ctime + clock_cmos_diff;
-	write_seqlock_irqsave(&xtime_lock,flags);
-	xtime.tv_sec = sec;
-	xtime.tv_nsec = 0;
-	jiffies += sleep_length;
-	write_sequnlock_irqrestore(&xtime_lock,flags);
-	touch_softlockup_watchdog();
-	return 0;
-}
-
-static struct sysdev_class timer_sysclass = {
-	.resume = timer_resume,
-	.suspend = timer_suspend,
-	set_kset_name("timer"),
-};
-
-/* XXX this sysfs stuff should probably go elsewhere later -john */
-static struct sys_device device_timer = {
-	.id	= 0,
-	.cls	= &timer_sysclass,
-};
-
-static int time_init_device(void)
-{
-	int error = sysdev_class_register(&timer_sysclass);
-	if (!error)
-		error = sysdev_register(&device_timer);
-	return error;
-}
-
-device_initcall(time_init_device);
Index: linux-2.6.22/drivers/input/misc/pcspkr.c
===================================================================
--- linux-2.6.22.orig/drivers/input/misc/pcspkr.c	2007-07-15 17:13:42.000000000 +0200
+++ linux-2.6.22/drivers/input/misc/pcspkr.c	2007-07-15 17:13:46.000000000 +0200
@@ -24,7 +24,12 @@ MODULE_AUTHOR("Vojtech Pavlik <vojtech@u
 MODULE_DESCRIPTION("PC Speaker beeper driver");
 MODULE_LICENSE("GPL");
 
-static DEFINE_SPINLOCK(i8253_beep_lock);
+#ifdef CONFIG_X86
+/* Use the global PIT lock ! */
+#include <asm/i8253.h>
+#else
+static DEFINE_SPINLOCK(i8253_lock);
+#endif
 
 static int pcspkr_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
 {
@@ -43,7 +48,7 @@ static int pcspkr_event(struct input_dev
 	if (value > 20 && value < 32767)
 		count = PIT_TICK_RATE / value;
 
-	spin_lock_irqsave(&i8253_beep_lock, flags);
+	spin_lock_irqsave(&i8253_lock, flags);
 
 	if (count) {
 		/* enable counter 2 */
@@ -58,7 +63,7 @@ static int pcspkr_event(struct input_dev
 		outb(inb_p(0x61) & 0xFC, 0x61);
 	}
 
-	spin_unlock_irqrestore(&i8253_beep_lock, flags);
+	spin_unlock_irqrestore(&i8253_lock, flags);
 
 	return 0;
 }
Index: linux-2.6.22/include/asm-x86_64/i8253.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22/include/asm-x86_64/i8253.h	2007-07-15 17:13:50.000000000 +0200
@@ -0,0 +1,2 @@
+#include <asm-i386/i8253.h>
+#include <asm/8253pit.h>
Index: linux-2.6.22/arch/i386/kernel/time.c
===================================================================
--- linux-2.6.22.orig/arch/i386/kernel/time.c	2007-07-15 17:13:42.000000000 +0200
+++ linux-2.6.22/arch/i386/kernel/time.c	2007-07-15 17:13:47.000000000 +0200
@@ -207,55 +207,9 @@ unsigned long read_persistent_clock(void
 	return retval;
 }
 
-static void sync_cmos_clock(unsigned long dummy);
-
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
-int no_sync_cmos_clock;
-
-static void sync_cmos_clock(unsigned long dummy)
-{
-	struct timeval now, next;
-	int fail = 1;
-
-	/*
-	 * If we have an externally synchronized Linux clock, then update
-	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-	 * called as close as possible to 500 ms before the new second starts.
-	 * This code is run on a timer.  If the clock is set, that timer
-	 * may not expire at the correct time.  Thus, we adjust...
-	 */
-	if (!ntp_synced())
-		/*
-		 * Not synced, exit, do not restart a timer (if one is
-		 * running, let it run out).
-		 */
-		return;
-
-	do_gettimeofday(&now);
-	if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
-	    now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
-		fail = set_rtc_mmss(now.tv_sec);
-
-	next.tv_usec = USEC_AFTER - now.tv_usec;
-	if (next.tv_usec <= 0)
-		next.tv_usec += USEC_PER_SEC;
-
-	if (!fail)
-		next.tv_sec = 659;
-	else
-		next.tv_sec = 0;
-
-	if (next.tv_usec >= USEC_PER_SEC) {
-		next.tv_sec++;
-		next.tv_usec -= USEC_PER_SEC;
-	}
-	mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
-}
-
-void notify_arch_cmos_timer(void)
+int update_persistent_clock(struct timespec now)
 {
-	if (!no_sync_cmos_clock)
-		mod_timer(&sync_cmos_timer, jiffies + 1);
+	return set_rtc_mmss(now.tv_sec);
 }
 
 extern void (*late_time_init)(void);
Index: linux-2.6.22/arch/sparc64/Kconfig
===================================================================
--- linux-2.6.22.orig/arch/sparc64/Kconfig	2007-07-15 17:13:42.000000000 +0200
+++ linux-2.6.22/arch/sparc64/Kconfig	2007-07-15 17:13:47.000000000 +0200
@@ -23,6 +23,10 @@ config GENERIC_TIME
 	bool
 	default y
 
+config GENERIC_CMOS_UPDATE
+	bool
+	default y
+
 config GENERIC_CLOCKEVENTS
 	bool
 	default y
Index: linux-2.6.22/include/asm-i386/timer.h
===================================================================
--- linux-2.6.22.orig/include/asm-i386/timer.h	2007-07-15 17:13:42.000000000 +0200
+++ linux-2.6.22/include/asm-i386/timer.h	2007-07-15 17:13:47.000000000 +0200
@@ -5,13 +5,11 @@
 
 #define TICK_SIZE (tick_nsec / 1000)
 
-void setup_pit_timer(void);
 unsigned long long native_sched_clock(void);
 unsigned long native_calculate_cpu_khz(void);
 
 extern int timer_ack;
 extern int no_timer_check;
-extern int no_sync_cmos_clock;
 extern int recalibrate_cpu_khz(void);
 
 #ifndef CONFIG_PARAVIRT
Index: linux-2.6.22/include/linux/time.h
===================================================================
--- linux-2.6.22.orig/include/linux/time.h	2007-07-15 17:13:42.000000000 +0200
+++ linux-2.6.22/include/linux/time.h	2007-07-15 17:13:47.000000000 +0200
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 
 #ifdef __KERNEL__
+# include <linux/cache.h>
 # include <linux/seqlock.h>
 #endif
 
@@ -93,6 +94,8 @@ extern struct timespec wall_to_monotonic
 extern seqlock_t xtime_lock __attribute__((weak));
 
 extern unsigned long read_persistent_clock(void);
+extern int update_persistent_clock(struct timespec now);
+extern int no_sync_cmos_clock __read_mostly;
 void timekeeping_init(void);
 
 static inline unsigned long get_seconds(void)
Index: linux-2.6.22/kernel/time/ntp.c
===================================================================
--- linux-2.6.22.orig/kernel/time/ntp.c	2007-07-15 17:13:42.000000000 +0200
+++ linux-2.6.22/kernel/time/ntp.c	2007-07-15 17:13:47.000000000 +0200
@@ -10,6 +10,7 @@
 
 #include <linux/mm.h>
 #include <linux/time.h>
+#include <linux/timer.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
 #include <linux/hrtimer.h>
@@ -185,12 +186,64 @@ u64 current_tick_length(void)
 	return tick_length;
 }
 
+#ifdef CONFIG_GENERIC_CMOS_UPDATE
 
-void __attribute__ ((weak)) notify_arch_cmos_timer(void)
+/* Disable the cmos update - used by virtualization and embedded */
+int no_sync_cmos_clock  __read_mostly;
+
+static void sync_cmos_clock(unsigned long dummy);
+
+static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
+
+static void sync_cmos_clock(unsigned long dummy)
 {
-	return;
+	struct timespec now, next;
+	int fail = 1;
+
+	/*
+	 * If we have an externally synchronized Linux clock, then update
+	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+	 * called as close as possible to 500 ms before the new second starts.
+	 * This code is run on a timer.  If the clock is set, that timer
+	 * may not expire at the correct time.  Thus, we adjust...
+	 */
+	if (!ntp_synced())
+		/*
+		 * Not synced, exit, do not restart a timer (if one is
+		 * running, let it run out).
+		 */
+		return;
+
+	getnstimeofday(&now);
+	if (abs(xtime.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2)
+		fail = update_persistent_clock(now);
+
+	next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec;
+	if (next.tv_nsec <= 0)
+		next.tv_nsec += NSEC_PER_SEC;
+
+	if (!fail)
+		next.tv_sec = 659;
+	else
+		next.tv_sec = 0;
+
+	if (next.tv_nsec >= NSEC_PER_SEC) {
+		next.tv_sec++;
+		next.tv_nsec -= NSEC_PER_SEC;
+	}
+	mod_timer(&sync_cmos_timer, jiffies + timespec_to_jiffies(&next));
 }
 
+static void notify_cmos_timer(void)
+{
+	if (no_sync_cmos_clock)
+		mod_timer(&sync_cmos_timer, jiffies + 1);
+}
+
+#else
+static inline void notify_cmos_timer(void) { }
+#endif
+
 /* adjtimex mainly allows reading (and writing, if superuser) of
  * kernel time-keeping variables. used by xntpd.
  */
@@ -355,6 +408,6 @@ leave:	if ((time_status & (STA_UNSYNC|ST
 	txc->stbcnt	   = 0;
 	write_sequnlock_irq(&xtime_lock);
 	do_gettimeofday(&txc->time);
-	notify_arch_cmos_timer();
+	notify_cmos_timer();
 	return(result);
 }
Index: linux-2.6.22/include/asm-i386/i8253.h
===================================================================
--- linux-2.6.22.orig/include/asm-i386/i8253.h	2007-07-15 17:13:41.000000000 +0200
+++ linux-2.6.22/include/asm-i386/i8253.h	2007-07-15 17:13:52.000000000 +0200
@@ -1,21 +1,15 @@
 #ifndef __ASM_I8253_H__
 #define __ASM_I8253_H__
 
-#include <linux/clockchips.h>
+/* i8253A PIT registers */
+#define PIT_MODE		0x43
+#define PIT_CH0			0x40
+#define PIT_CH2			0x42
 
 extern spinlock_t i8253_lock;
 
 extern struct clock_event_device *global_clock_event;
 
-/**
- * pit_interrupt_hook - hook into timer tick
- * @regs:	standard registers from interrupt
- *
- * Call the global clock event handler.
- **/
-static inline void pit_interrupt_hook(void)
-{
-	global_clock_event->event_handler(global_clock_event);
-}
+extern void setup_pit_timer(void);
 
 #endif	/* __ASM_I8253_H__ */
Index: linux-2.6.22/include/asm-i386/mach-default/io_ports.h
===================================================================
--- linux-2.6.22.orig/include/asm-i386/mach-default/io_ports.h	2007-07-15 17:13:41.000000000 +0200
+++ linux-2.6.22/include/asm-i386/mach-default/io_ports.h	2007-07-15 17:13:47.000000000 +0200
@@ -7,11 +7,6 @@
 #ifndef _MACH_IO_PORTS_H
 #define _MACH_IO_PORTS_H
 
-/* i8253A PIT registers */
-#define PIT_MODE		0x43
-#define PIT_CH0			0x40
-#define PIT_CH2			0x42
-
 /* i8259A PIC registers */
 #define PIC_MASTER_CMD		0x20
 #define PIC_MASTER_IMR		0x21
Index: linux-2.6.22/kernel/time/Kconfig
===================================================================
--- linux-2.6.22.orig/kernel/time/Kconfig	2007-07-15 17:13:41.000000000 +0200
+++ linux-2.6.22/kernel/time/Kconfig	2007-07-15 17:13:48.000000000 +0200
@@ -23,3 +23,8 @@ config HIGH_RES_TIMERS
 	  hardware is not capable then this option only increases
 	  the size of the kernel image.
 
+config GENERIC_CLOCKEVENTS_BUILD
+	bool
+	default y
+	depends on GENERIC_CLOCKEVENTS || GENERIC_CLOCKEVENTS_MIGR
+
Index: linux-2.6.22/kernel/time/Makefile
===================================================================
--- linux-2.6.22.orig/kernel/time/Makefile	2007-07-15 17:13:41.000000000 +0200
+++ linux-2.6.22/kernel/time/Makefile	2007-07-15 17:13:48.000000000 +0200
@@ -1,6 +1,6 @@
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
 
-obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= clockevents.o
+obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= tick-broadcast.o
 obj-$(CONFIG_TICK_ONESHOT)			+= tick-oneshot.o
Index: linux-2.6.22/include/asm-i386/mach-default/do_timer.h
===================================================================
--- linux-2.6.22.orig/include/asm-i386/mach-default/do_timer.h	2007-07-15 17:13:41.000000000 +0200
+++ linux-2.6.22/include/asm-i386/mach-default/do_timer.h	2007-07-15 17:13:48.000000000 +0200
@@ -12,5 +12,5 @@
 
 static inline void do_timer_interrupt_hook(void)
 {
-	pit_interrupt_hook();
+	global_clock_event->event_handler(global_clock_event);
 }
Index: linux-2.6.22/include/asm-i386/mach-voyager/do_timer.h
===================================================================
--- linux-2.6.22.orig/include/asm-i386/mach-voyager/do_timer.h	2007-07-15 17:13:41.000000000 +0200
+++ linux-2.6.22/include/asm-i386/mach-voyager/do_timer.h	2007-07-15 17:13:48.000000000 +0200
@@ -12,7 +12,7 @@
  **/
 static inline void do_timer_interrupt_hook(void)
 {
-	pit_interrupt_hook();
+	global_clock_event->event_handler(global_clock_event);
 	voyager_timer_interrupt();
 }
 
Index: linux-2.6.22/arch/x86_64/kernel/hpet.c
===================================================================
--- linux-2.6.22.orig/arch/x86_64/kernel/hpet.c	2007-07-15 17:13:41.000000000 +0200
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,493 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/mc146818rtc.h>
-#include <linux/time.h>
-#include <linux/clocksource.h>
-#include <linux/ioport.h>
-#include <linux/acpi.h>
-#include <linux/hpet.h>
-#include <asm/pgtable.h>
-#include <asm/vsyscall.h>
-#include <asm/timex.h>
-#include <asm/hpet.h>
-
-#define HPET_MASK	0xFFFFFFFF
-#define HPET_SHIFT	22
-
-/* FSEC = 10^-15 NSEC = 10^-9 */
-#define FSEC_PER_NSEC	1000000
-
-int nohpet __initdata;
-
-unsigned long hpet_address;
-unsigned long hpet_period;	/* fsecs / HPET clock */
-unsigned long hpet_tick;	/* HPET clocks / interrupt */
-
-int hpet_use_timer;		/* Use counter of hpet for time keeping,
-				 * otherwise PIT
-				 */
-
-#ifdef	CONFIG_HPET
-static __init int late_hpet_init(void)
-{
-	struct hpet_data	hd;
-	unsigned int 		ntimer;
-
-	if (!hpet_address)
-        	return 0;
-
-	memset(&hd, 0, sizeof(hd));
-
-	ntimer = hpet_readl(HPET_ID);
-	ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
-	ntimer++;
-
-	/*
-	 * Register with driver.
-	 * Timer0 and Timer1 is used by platform.
-	 */
-	hd.hd_phys_address = hpet_address;
-	hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
-	hd.hd_nirqs = ntimer;
-	hd.hd_flags = HPET_DATA_PLATFORM;
-	hpet_reserve_timer(&hd, 0);
-#ifdef	CONFIG_HPET_EMULATE_RTC
-	hpet_reserve_timer(&hd, 1);
-#endif
-	hd.hd_irq[0] = HPET_LEGACY_8254;
-	hd.hd_irq[1] = HPET_LEGACY_RTC;
-	if (ntimer > 2) {
-		struct hpet		*hpet;
-		struct hpet_timer	*timer;
-		int			i;
-
-		hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
-		timer = &hpet->hpet_timers[2];
-		for (i = 2; i < ntimer; timer++, i++)
-			hd.hd_irq[i] = (timer->hpet_config &
-					Tn_INT_ROUTE_CNF_MASK) >>
-				Tn_INT_ROUTE_CNF_SHIFT;
-
-	}
-
-	hpet_alloc(&hd);
-	return 0;
-}
-fs_initcall(late_hpet_init);
-#endif
-
-int hpet_timer_stop_set_go(unsigned long tick)
-{
-	unsigned int cfg;
-
-/*
- * Stop the timers and reset the main counter.
- */
-
-	cfg = hpet_readl(HPET_CFG);
-	cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
-	hpet_writel(cfg, HPET_CFG);
-	hpet_writel(0, HPET_COUNTER);
-	hpet_writel(0, HPET_COUNTER + 4);
-
-/*
- * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
- * and period also hpet_tick.
- */
-	if (hpet_use_timer) {
-		hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
-		    HPET_TN_32BIT, HPET_T0_CFG);
-		hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
-		hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
-		cfg |= HPET_CFG_LEGACY;
-	}
-/*
- * Go!
- */
-
-	cfg |= HPET_CFG_ENABLE;
-	hpet_writel(cfg, HPET_CFG);
-
-	return 0;
-}
-
-static cycle_t read_hpet(void)
-{
-	return (cycle_t)hpet_readl(HPET_COUNTER);
-}
-
-static cycle_t __vsyscall_fn vread_hpet(void)
-{
-	return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
-}
-
-struct clocksource clocksource_hpet = {
-	.name		= "hpet",
-	.rating		= 250,
-	.read		= read_hpet,
-	.mask		= (cycle_t)HPET_MASK,
-	.mult		= 0, /* set below */
-	.shift		= HPET_SHIFT,
-	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
-	.vread		= vread_hpet,
-};
-
-int hpet_arch_init(void)
-{
-	unsigned int id;
-	u64 tmp;
-
-	if (!hpet_address)
-		return -1;
-	set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
-	__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
-
-/*
- * Read the period, compute tick and quotient.
- */
-
-	id = hpet_readl(HPET_ID);
-
-	if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
-		return -1;
-
-	hpet_period = hpet_readl(HPET_PERIOD);
-	if (hpet_period < 100000 || hpet_period > 100000000)
-		return -1;
-
-	hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
-
-	hpet_use_timer = (id & HPET_ID_LEGSUP);
-
-	/*
-	 * hpet period is in femto seconds per cycle
-	 * so we need to convert this to ns/cyc units
-	 * aproximated by mult/2^shift
-	 *
-	 *  fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
-	 *  fsec/cyc * 1ns/1000000fsec * 2^shift = mult
-	 *  fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
-	 *  (fsec/cyc << shift)/1000000 = mult
-	 *  (hpet_period << shift)/FSEC_PER_NSEC = mult
-	 */
-	tmp = (u64)hpet_period << HPET_SHIFT;
-	do_div(tmp, FSEC_PER_NSEC);
-	clocksource_hpet.mult = (u32)tmp;
-	clocksource_register(&clocksource_hpet);
-
-	return hpet_timer_stop_set_go(hpet_tick);
-}
-
-int hpet_reenable(void)
-{
-	return hpet_timer_stop_set_go(hpet_tick);
-}
-
-/*
- * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing
- * it to the HPET timer of known frequency.
- */
-
-#define TICK_COUNT 100000000
-#define TICK_MIN   5000
-#define MAX_TRIES  5
-
-/*
- * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none
- * occurs between the reads of the hpet & TSC.
- */
-static void __init read_hpet_tsc(int *hpet, int *tsc)
-{
-	int tsc1, tsc2, hpet1, i;
-
-	for (i = 0; i < MAX_TRIES; i++) {
-		tsc1 = get_cycles_sync();
-		hpet1 = hpet_readl(HPET_COUNTER);
-		tsc2 = get_cycles_sync();
-		if (tsc2 - tsc1 > TICK_MIN)
-			break;
-	}
-	*hpet = hpet1;
-	*tsc = tsc2;
-}
-
-unsigned int __init hpet_calibrate_tsc(void)
-{
-	int tsc_start, hpet_start;
-	int tsc_now, hpet_now;
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	read_hpet_tsc(&hpet_start, &tsc_start);
-
-	do {
-		local_irq_disable();
-		read_hpet_tsc(&hpet_now, &tsc_now);
-		local_irq_restore(flags);
-	} while ((tsc_now - tsc_start) < TICK_COUNT &&
-		(hpet_now - hpet_start) < TICK_COUNT);
-
-	return (tsc_now - tsc_start) * 1000000000L
-		/ ((hpet_now - hpet_start) * hpet_period / 1000);
-}
-
-#ifdef CONFIG_HPET_EMULATE_RTC
-/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
- * is enabled, we support RTC interrupt functionality in software.
- * RTC has 3 kinds of interrupts:
- * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
- *    is updated
- * 2) Alarm Interrupt - generate an interrupt at a specific time of day
- * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
- *    2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
- * (1) and (2) above are implemented using polling at a frequency of
- * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
- * overhead. (DEFAULT_RTC_INT_FREQ)
- * For (3), we use interrupts at 64Hz or user specified periodic
- * frequency, whichever is higher.
- */
-#include <linux/rtc.h>
-
-#define DEFAULT_RTC_INT_FREQ 	64
-#define RTC_NUM_INTS 		1
-
-static unsigned long UIE_on;
-static unsigned long prev_update_sec;
-
-static unsigned long AIE_on;
-static struct rtc_time alarm_time;
-
-static unsigned long PIE_on;
-static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
-static unsigned long PIE_count;
-
-static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
-static unsigned int hpet_t1_cmp; /* cached comparator register */
-
-int is_hpet_enabled(void)
-{
-	return hpet_address != 0;
-}
-
-/*
- * Timer 1 for RTC, we do not use periodic interrupt feature,
- * even if HPET supports periodic interrupts on Timer 1.
- * The reason being, to set up a periodic interrupt in HPET, we need to
- * stop the main counter. And if we do that everytime someone diables/enables
- * RTC, we will have adverse effect on main kernel timer running on Timer 0.
- * So, for the time being, simulate the periodic interrupt in software.
- *
- * hpet_rtc_timer_init() is called for the first time and during subsequent
- * interuppts reinit happens through hpet_rtc_timer_reinit().
- */
-int hpet_rtc_timer_init(void)
-{
-	unsigned int cfg, cnt;
-	unsigned long flags;
-
-	if (!is_hpet_enabled())
-		return 0;
-	/*
-	 * Set the counter 1 and enable the interrupts.
-	 */
-	if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
-		hpet_rtc_int_freq = PIE_freq;
-	else
-		hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
-
-	local_irq_save(flags);
-
-	cnt = hpet_readl(HPET_COUNTER);
-	cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
-	hpet_writel(cnt, HPET_T1_CMP);
-	hpet_t1_cmp = cnt;
-
-	cfg = hpet_readl(HPET_T1_CFG);
-	cfg &= ~HPET_TN_PERIODIC;
-	cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-	hpet_writel(cfg, HPET_T1_CFG);
-
-	local_irq_restore(flags);
-
-	return 1;
-}
-
-static void hpet_rtc_timer_reinit(void)
-{
-	unsigned int cfg, cnt, ticks_per_int, lost_ints;
-
-	if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
-		cfg = hpet_readl(HPET_T1_CFG);
-		cfg &= ~HPET_TN_ENABLE;
-		hpet_writel(cfg, HPET_T1_CFG);
-		return;
-	}
-
-	if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
-		hpet_rtc_int_freq = PIE_freq;
-	else
-		hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
-
-	/* It is more accurate to use the comparator value than current count.*/
-	ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
-	hpet_t1_cmp += ticks_per_int;
-	hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
-
-	/*
-	 * If the interrupt handler was delayed too long, the write above tries
-	 * to schedule the next interrupt in the past and the hardware would
-	 * not interrupt until the counter had wrapped around.
-	 * So we have to check that the comparator wasn't set to a past time.
-	 */
-	cnt = hpet_readl(HPET_COUNTER);
-	if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
-		lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
-		/* Make sure that, even with the time needed to execute
-		 * this code, the next scheduled interrupt has been moved
-		 * back to the future: */
-		lost_ints++;
-
-		hpet_t1_cmp += lost_ints * ticks_per_int;
-		hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
-
-		if (PIE_on)
-			PIE_count += lost_ints;
-
-		if (printk_ratelimit())
-			printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
-			       hpet_rtc_int_freq);
-	}
-}
-
-/*
- * The functions below are called from rtc driver.
- * Return 0 if HPET is not being used.
- * Otherwise do the necessary changes and return 1.
- */
-int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
-{
-	if (!is_hpet_enabled())
-		return 0;
-
-	if (bit_mask & RTC_UIE)
-		UIE_on = 0;
-	if (bit_mask & RTC_PIE)
-		PIE_on = 0;
-	if (bit_mask & RTC_AIE)
-		AIE_on = 0;
-
-	return 1;
-}
-
-int hpet_set_rtc_irq_bit(unsigned long bit_mask)
-{
-	int timer_init_reqd = 0;
-
-	if (!is_hpet_enabled())
-		return 0;
-
-	if (!(PIE_on | AIE_on | UIE_on))
-		timer_init_reqd = 1;
-
-	if (bit_mask & RTC_UIE) {
-		UIE_on = 1;
-	}
-	if (bit_mask & RTC_PIE) {
-		PIE_on = 1;
-		PIE_count = 0;
-	}
-	if (bit_mask & RTC_AIE) {
-		AIE_on = 1;
-	}
-
-	if (timer_init_reqd)
-		hpet_rtc_timer_init();
-
-	return 1;
-}
-
-int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
-{
-	if (!is_hpet_enabled())
-		return 0;
-
-	alarm_time.tm_hour = hrs;
-	alarm_time.tm_min = min;
-	alarm_time.tm_sec = sec;
-
-	return 1;
-}
-
-int hpet_set_periodic_freq(unsigned long freq)
-{
-	if (!is_hpet_enabled())
-		return 0;
-
-	PIE_freq = freq;
-	PIE_count = 0;
-
-	return 1;
-}
-
-int hpet_rtc_dropped_irq(void)
-{
-	if (!is_hpet_enabled())
-		return 0;
-
-	return 1;
-}
-
-irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
-	struct rtc_time curr_time;
-	unsigned long rtc_int_flag = 0;
-	int call_rtc_interrupt = 0;
-
-	hpet_rtc_timer_reinit();
-
-	if (UIE_on | AIE_on) {
-		rtc_get_rtc_time(&curr_time);
-	}
-	if (UIE_on) {
-		if (curr_time.tm_sec != prev_update_sec) {
-			/* Set update int info, call real rtc int routine */
-			call_rtc_interrupt = 1;
-			rtc_int_flag = RTC_UF;
-			prev_update_sec = curr_time.tm_sec;
-		}
-	}
-	if (PIE_on) {
-		PIE_count++;
-		if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
-			/* Set periodic int info, call real rtc int routine */
-			call_rtc_interrupt = 1;
-			rtc_int_flag |= RTC_PF;
-			PIE_count = 0;
-		}
-	}
-	if (AIE_on) {
-		if ((curr_time.tm_sec == alarm_time.tm_sec) &&
-		    (curr_time.tm_min == alarm_time.tm_min) &&
-		    (curr_time.tm_hour == alarm_time.tm_hour)) {
-			/* Set alarm int info, call real rtc int routine */
-			call_rtc_interrupt = 1;
-			rtc_int_flag |= RTC_AF;
-		}
-	}
-	if (call_rtc_interrupt) {
-		rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
-		rtc_interrupt(rtc_int_flag, dev_id);
-	}
-	return IRQ_HANDLED;
-}
-#endif
-
-static int __init nohpet_setup(char *s)
-{
-	nohpet = 1;
-	return 1;
-}
-
-__setup("nohpet", nohpet_setup);
Index: linux-2.6.22/drivers/char/rtc.c
===================================================================
--- linux-2.6.22.orig/drivers/char/rtc.c	2007-07-15 17:13:41.000000000 +0200
+++ linux-2.6.22/drivers/char/rtc.c	2007-07-15 17:13:48.000000000 +0200
@@ -82,7 +82,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
-#if defined(__i386__)
+#ifdef CONFIG_X86
 #include <asm/hpet.h>
 #endif
 
Index: linux-2.6.22/include/asm-x86_64/apic.h
===================================================================
--- linux-2.6.22.orig/include/asm-x86_64/apic.h	2007-07-15 17:13:41.000000000 +0200
+++ linux-2.6.22/include/asm-x86_64/apic.h	2007-07-15 17:13:51.000000000 +0200
@@ -19,7 +19,6 @@
 extern int apic_verbosity;
 extern int apic_runs_main_timer;
 extern int ioapic_force;
-extern int apic_mapped;
 
 /*
  * Define the default level of output to be very little
@@ -79,12 +78,12 @@ extern void smp_local_timer_interrupt (v
 extern void setup_boot_APIC_clock (void);
 extern void setup_secondary_APIC_clock (void);
 extern int APIC_init_uniprocessor (void);
-extern void disable_APIC_timer(void);
-extern void enable_APIC_timer(void);
 extern void setup_apic_routing(void);
 
-extern void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
-				   unsigned char msg_type, unsigned char mask);
+extern void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
+				    unsigned char msg_type, unsigned char mask);
+
+extern int apic_is_clustered_box(void);
 
 #define K8_APIC_EXT_LVT_BASE    0x500
 #define K8_APIC_EXT_INT_MSG_FIX 0x0
@@ -93,10 +92,6 @@ extern void setup_APIC_extened_lvt(unsig
 #define K8_APIC_EXT_INT_MSG_EXT 0x7
 #define K8_APIC_EXT_LVT_ENTRY_THRESHOLD    0
 
-void smp_send_timer_broadcast_ipi(void);
-void switch_APIC_timer_to_ipi(void *cpumask);
-void switch_ipi_to_APIC_timer(void *cpumask);
-
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
 extern unsigned boot_cpu_id;
Index: linux-2.6.22/include/asm-x86_64/hpet.h
===================================================================
--- linux-2.6.22.orig/include/asm-x86_64/hpet.h	2007-07-15 17:13:41.000000000 +0200
+++ linux-2.6.22/include/asm-x86_64/hpet.h	2007-07-15 17:13:51.000000000 +0200
@@ -1,78 +1,2 @@
-#ifndef _ASM_X8664_HPET_H
-#define _ASM_X8664_HPET_H 1
 
-/*
- * Documentation on HPET can be found at:
- *      http://www.intel.com/ial/home/sp/pcmmspec.htm
- *      ftp://download.intel.com/ial/home/sp/mmts098.pdf
- */
-
-#define HPET_MMAP_SIZE	1024
-
-#define HPET_ID		0x000
-#define HPET_PERIOD	0x004
-#define HPET_CFG	0x010
-#define HPET_STATUS	0x020
-#define HPET_COUNTER	0x0f0
-#define HPET_Tn_OFFSET	0x20
-#define HPET_Tn_CFG(n)	 (0x100 + (n) * HPET_Tn_OFFSET)
-#define HPET_Tn_ROUTE(n) (0x104 + (n) * HPET_Tn_OFFSET)
-#define HPET_Tn_CMP(n)	 (0x108 + (n) * HPET_Tn_OFFSET)
-#define HPET_T0_CFG	HPET_Tn_CFG(0)
-#define HPET_T0_CMP	HPET_Tn_CMP(0)
-#define HPET_T1_CFG	HPET_Tn_CFG(1)
-#define HPET_T1_CMP	HPET_Tn_CMP(1)
-
-#define HPET_ID_VENDOR	0xffff0000
-#define HPET_ID_LEGSUP	0x00008000
-#define HPET_ID_64BIT	0x00002000
-#define HPET_ID_NUMBER	0x00001f00
-#define HPET_ID_REV	0x000000ff
-#define	HPET_ID_NUMBER_SHIFT	8
-
-#define HPET_ID_VENDOR_SHIFT	16
-#define HPET_ID_VENDOR_8086	0x8086
-
-#define HPET_CFG_ENABLE	0x001
-#define HPET_CFG_LEGACY	0x002
-#define	HPET_LEGACY_8254	2
-#define	HPET_LEGACY_RTC		8
-
-#define HPET_TN_LEVEL		0x0002
-#define HPET_TN_ENABLE		0x0004
-#define HPET_TN_PERIODIC	0x0008
-#define HPET_TN_PERIODIC_CAP	0x0010
-#define HPET_TN_64BIT_CAP	0x0020
-#define HPET_TN_SETVAL		0x0040
-#define HPET_TN_32BIT		0x0100
-#define HPET_TN_ROUTE		0x3e00
-#define HPET_TN_FSB		0x4000
-#define HPET_TN_FSB_CAP		0x8000
-
-#define HPET_TN_ROUTE_SHIFT	9
-
-#define HPET_TICK_RATE (HZ * 100000UL)
-
-extern int is_hpet_enabled(void);
-extern int hpet_rtc_timer_init(void);
-extern int apic_is_clustered_box(void);
-extern int hpet_arch_init(void);
-extern int hpet_timer_stop_set_go(unsigned long tick);
-extern int hpet_reenable(void);
-extern unsigned int hpet_calibrate_tsc(void);
-
-extern int hpet_use_timer;
-extern unsigned long hpet_address;
-extern unsigned long hpet_period;
-extern unsigned long hpet_tick;
-
-#ifdef CONFIG_HPET_EMULATE_RTC
-extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask);
-extern int hpet_set_rtc_irq_bit(unsigned long bit_mask);
-extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec);
-extern int hpet_set_periodic_freq(unsigned long freq);
-extern int hpet_rtc_dropped_irq(void);
-extern int hpet_rtc_timer_init(void);
-#endif /* CONFIG_HPET_EMULATE_RTC */
-
-#endif
+#include <asm-i386/hpet.h>
Index: linux-2.6.22/include/asm-x86_64/timex.h
===================================================================
--- linux-2.6.22.orig/include/asm-x86_64/timex.h	2007-07-15 17:13:41.000000000 +0200
+++ linux-2.6.22/include/asm-x86_64/timex.h	2007-07-15 17:13:48.000000000 +0200
@@ -9,7 +9,6 @@
 #include <asm/8253pit.h>
 #include <asm/msr.h>
 #include <asm/vsyscall.h>
-#include <asm/hpet.h>
 #include <asm/system.h>
 #include <asm/processor.h>
 #include <asm/tsc.h>
Index: linux-2.6.22/arch/x86_64/kernel/tsc.c
===================================================================
--- linux-2.6.22.orig/arch/x86_64/kernel/tsc.c	2007-07-15 17:13:40.000000000 +0200
+++ linux-2.6.22/arch/x86_64/kernel/tsc.c	2007-07-15 17:13:49.000000000 +0200
@@ -6,7 +6,9 @@
 #include <linux/time.h>
 #include <linux/acpi.h>
 #include <linux/cpufreq.h>
+#include <linux/acpi_pmtmr.h>
 
+#include <asm/hpet.h>
 #include <asm/timex.h>
 
 static int notsc __initdata = 0;
@@ -61,25 +63,9 @@ static inline int check_tsc_unstable(voi
  * first tick after the change will be slightly wrong.
  */
 
-#include <linux/workqueue.h>
-
-static unsigned int cpufreq_delayed_issched = 0;
-static unsigned int cpufreq_init = 0;
-static struct work_struct cpufreq_delayed_get_work;
-
-static void handle_cpufreq_delayed_get(struct work_struct *v)
-{
-	unsigned int cpu;
-	for_each_online_cpu(cpu) {
-		cpufreq_get(cpu);
-	}
-	cpufreq_delayed_issched = 0;
-}
-
-static unsigned int  ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
-
-static unsigned long tsc_khz_ref = 0;
+static unsigned int  ref_freq;
+static unsigned long loops_per_jiffy_ref;
+static unsigned long tsc_khz_ref;
 
 static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 				 void *data)
@@ -125,10 +111,8 @@ static struct notifier_block time_cpufre
 
 static int __init cpufreq_tsc(void)
 {
-	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
-	if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
-				       CPUFREQ_TRANSITION_NOTIFIER))
-		cpufreq_init = 1;
+	cpufreq_register_notifier(&time_cpufreq_notifier_block,
+				  CPUFREQ_TRANSITION_NOTIFIER);
 	return 0;
 }
 
@@ -136,7 +120,93 @@ core_initcall(cpufreq_tsc);
 
 #endif
 
-static int tsc_unstable = 0;
+#define MAX_RETRIES	5
+#define SMI_TRESHOLD	50000
+
+/*
+ * Read TSC and the reference counters. Take care of SMI disturbance
+ */
+static unsigned long __init tsc_read_refs(unsigned long *pm,
+					  unsigned long *hpet)
+{
+	unsigned long t1, t2;
+	int i;
+
+	for (i = 0; i < MAX_RETRIES; i++) {
+		t1 = get_cycles_sync();
+		if (hpet)
+			*hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
+		else
+			*pm = acpi_pm_read_early();
+		t2 = get_cycles_sync();
+		if ((t2 - t1) < SMI_TRESHOLD)
+			return t2;
+	}
+	return ULONG_MAX;
+}
+
+/**
+ * tsc_calibrate - calibrate the tsc on boot
+ */
+void __init tsc_calibrate(void)
+{
+	unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2;
+	int hpet = is_hpet_enabled();
+
+	local_irq_save(flags);
+
+	tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
+
+	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+	outb(0xb0, 0x43);
+	outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
+	outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
+	tr1 = get_cycles_sync();
+	while ((inb(0x61) & 0x20) == 0);
+	tr2 = get_cycles_sync();
+
+	tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
+
+	local_irq_restore(flags);
+
+	/*
+	 * Preset the result with the raw and inaccurate PIT
+	 * calibration value
+	 */
+	tsc_khz = (tr2 - tr1) / 50;
+
+	/* hpet or pmtimer available ? */
+	if (!hpet && !pm1 && !pm2) {
+		printk(KERN_INFO "TSC calibrated against PIT\n");
+		return;
+	}
+
+	/* Check, whether the sampling was disturbed by an SMI */
+	if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) {
+		printk(KERN_WARNING "TSC calibration disturbed by SMI, "
+		       "using PIT calibration result\n");
+		return;
+	}
+
+	tsc2 = (tsc2 - tsc1) * 1000000L;
+
+	if (hpet) {
+		printk(KERN_INFO "TSC calibrated against HPET\n");
+		if (hpet2 < hpet1)
+			hpet2 += 0x100000000;
+		hpet2 -= hpet1;
+		tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000;
+	} else {
+		printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
+		if (pm2 < pm1)
+			pm2 += ACPI_PM_OVRRUN;
+		pm2 -= pm1;
+		tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC;
+	}
+
+	tsc_khz = tsc2 / tsc1;
+}
 
 /*
  * Make an educated guess if the TSC is trustworthy and synchronized
@@ -153,17 +223,18 @@ __cpuinit int unsynchronized_tsc(void)
 #endif
 	/* Most intel systems have synchronized TSCs except for
 	   multi node systems */
- 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
 #ifdef CONFIG_ACPI
 		/* But TSC doesn't tick in C3 so don't use it there */
-		if (acpi_gbl_FADT.header.length > 0 && acpi_gbl_FADT.C3latency < 1000)
+		if (acpi_gbl_FADT.header.length > 0 &&
+		    acpi_gbl_FADT.C3latency < 1000)
 			return 1;
 #endif
- 		return 0;
+		return 0;
 	}
 
- 	/* Assume multi socket systems are not synchronized */
- 	return num_present_cpus() > 1;
+	/* Assume multi socket systems are not synchronized */
+	return num_present_cpus() > 1;
 }
 
 int __init notsc_setup(char *s)
Index: linux-2.6.22/arch/x86_64/kernel/apic.c
===================================================================
--- linux-2.6.22.orig/arch/x86_64/kernel/apic.c	2007-07-15 17:13:40.000000000 +0200
+++ linux-2.6.22/arch/x86_64/kernel/apic.c	2007-07-15 17:13:51.000000000 +0200
@@ -25,6 +25,7 @@
 #include <linux/sysdev.h>
 #include <linux/module.h>
 #include <linux/ioport.h>
+#include <linux/clockchips.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -39,12 +40,10 @@
 #include <asm/hpet.h>
 #include <asm/apic.h>
 
-int apic_mapped;
 int apic_verbosity;
-int apic_runs_main_timer;
-int apic_calibrate_pmtmr __initdata;
-
-int disable_apic_timer __initdata;
+static int apic_mapped;
+static int apic_calibrate_pmtmr __initdata;
+static int disable_apic_timer __initdata;
 
 /* Local APIC timer works in C2? */
 int local_apic_timer_c2_ok;
@@ -56,14 +55,78 @@ static struct resource lapic_resource = 
 	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
 };
 
+static unsigned int calibration_result;
+
+static int lapic_next_event(unsigned long delta,
+			    struct clock_event_device *evt);
+static void lapic_timer_setup(enum clock_event_mode mode,
+			      struct clock_event_device *evt);
+
+static void lapic_timer_broadcast(cpumask_t mask);
+
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen);
+
+static struct clock_event_device lapic_clockevent = {
+	.name		= "lapic",
+	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
+			| CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
+	.shift		= 32,
+	.set_mode	= lapic_timer_setup,
+	.set_next_event	= lapic_next_event,
+	.broadcast	= lapic_timer_broadcast,
+	.rating		= 100,
+	.irq		= -1,
+};
+static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
+
+static int lapic_next_event(unsigned long delta,
+			    struct clock_event_device *evt)
+{
+	apic_write(APIC_TMICT, delta);
+	return 0;
+}
+
+static void lapic_timer_setup(enum clock_event_mode mode,
+			      struct clock_event_device *evt)
+{
+	unsigned long flags;
+	unsigned int v;
+
+	/* Lapic used as dummy for broadcast ? */
+	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
+		return;
+
+	local_irq_save(flags);
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+	case CLOCK_EVT_MODE_ONESHOT:
+		__setup_APIC_LVTT(calibration_result,
+				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		v = apic_read(APIC_LVTT);
+		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+		apic_write(APIC_LVTT, v);
+		break;
+	case CLOCK_EVT_MODE_RESUME:
+		/* Nothing to do here */
+		break;
+	}
+
+	local_irq_restore(flags);
+}
+
 /*
- * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
- * IPIs in place of local APIC timers
+ * Local APIC timer broadcast function
  */
-static cpumask_t timer_interrupt_broadcast_ipi_mask;
-
-/* Using APIC to generate smp_local_timer_interrupt? */
-int using_apic_timer __read_mostly = 0;
+static void lapic_timer_broadcast(cpumask_t mask)
+{
+#ifdef CONFIG_SMP
+	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+#endif
+}
 
 static void apic_pm_activate(void);
 
@@ -92,8 +155,9 @@ unsigned int safe_apic_wait_icr_idle(voi
 void enable_NMI_through_LVT0 (void * dummy)
 {
 	unsigned int v;
-	
-	v = APIC_DM_NMI;                        /* unmask and set to NMI */
+
+	/* unmask and set to NMI */
+	v = APIC_DM_NMI;
 	apic_write(APIC_LVT0, v);
 }
 
@@ -120,7 +184,7 @@ void ack_bad_irq(unsigned int irq)
 	 * holds up an irq slot - in excessive cases (when multiple
 	 * unexpected vectors occur) that might lock up the APIC
 	 * completely.
-  	 * But don't ack when the APIC is disabled. -AK
+	 * But don't ack when the APIC is disabled. -AK
 	 */
 	if (!disable_apic)
 		ack_APIC_irq();
@@ -616,7 +680,7 @@ early_param("apic", apic_set_verbosity);
  * Detect and enable local APICs on non-SMP boards.
  * Original code written by Keir Fraser.
  * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.) 
+ * not correctly set up (usually the APIC timer won't work etc.)
  */
 
 static int __init detect_init_APIC (void)
@@ -757,16 +821,14 @@ void __init init_apic_mappings(void)
  * P5 APIC double write bug.
  */
 
-#define APIC_DIVISOR 16
-
-static void __setup_APIC_LVTT(unsigned int clocks)
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
 {
 	unsigned int lvtt_value, tmp_value;
-	int cpu = smp_processor_id();
 
-	lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
-
-	if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask))
+	lvtt_value = LOCAL_TIMER_VECTOR;
+	if (!oneshot)
+		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+	if (!irqen)
 		lvtt_value |= APIC_LVT_MASKED;
 
 	apic_write(APIC_LVTT, lvtt_value);
@@ -779,46 +841,18 @@ static void __setup_APIC_LVTT(unsigned i
 				& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
 				| APIC_TDR_DIV_16);
 
-	apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
+	if (!oneshot)
+		apic_write(APIC_TMICT, clocks);
 }
 
-static void setup_APIC_timer(unsigned int clocks)
+static void setup_APIC_timer(void)
 {
-	unsigned long flags;
+	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
-	local_irq_save(flags);
+	memcpy(levt, &lapic_clockevent, sizeof(*levt));
+	levt->cpumask = cpumask_of_cpu(smp_processor_id());
 
-	/* wait for irq slice */
- 	if (hpet_address && hpet_use_timer) {
- 		int trigger = hpet_readl(HPET_T0_CMP);
- 		while (hpet_readl(HPET_COUNTER) >= trigger)
- 			/* do nothing */ ;
- 		while (hpet_readl(HPET_COUNTER) <  trigger)
- 			/* do nothing */ ;
- 	} else {
-		int c1, c2;
-		outb_p(0x00, 0x43);
-		c2 = inb_p(0x40);
-		c2 |= inb_p(0x40) << 8;
-		do {
-			c1 = c2;
-			outb_p(0x00, 0x43);
-			c2 = inb_p(0x40);
-			c2 |= inb_p(0x40) << 8;
-		} while (c2 - c1 < 300);
-	}
-	__setup_APIC_LVTT(clocks);
-	/* Turn off PIT interrupt if we use APIC timer as main timer.
-	   Only works with the PM timer right now
-	   TBD fix it for HPET too. */
-	if ((pmtmr_ioport != 0) &&
-		smp_processor_id() == boot_cpu_id &&
-		apic_runs_main_timer == 1 &&
-		!cpu_isset(boot_cpu_id, timer_interrupt_broadcast_ipi_mask)) {
-		stop_timer_interrupt();
-		apic_runs_main_timer++;
-	}
-	local_irq_restore(flags);
+	clockevents_register_device(levt);
 }
 
 /*
@@ -836,17 +870,22 @@ static void setup_APIC_timer(unsigned in
 
 #define TICK_COUNT 100000000
 
-static int __init calibrate_APIC_clock(void)
+static void __init calibrate_APIC_clock(void)
 {
 	unsigned apic, apic_start;
 	unsigned long tsc, tsc_start;
 	int result;
+
+	local_irq_disable();
+
 	/*
 	 * Put whatever arbitrary (but long enough) timeout
 	 * value into the APIC clock, we just want to get the
 	 * counter running for calibration.
+	 *
+	 * No interrupt enable !
 	 */
-	__setup_APIC_LVTT(4000000000);
+	__setup_APIC_LVTT(250000000, 0, 0);
 
 	apic_start = apic_read(APIC_TMCCT);
 #ifdef CONFIG_X86_PM_TIMER
@@ -868,138 +907,75 @@ static int __init calibrate_APIC_clock(v
 		result = (apic_start - apic) * 1000L * tsc_khz /
 					(tsc - tsc_start);
 	}
-	printk("result %d\n", result);
 
+	local_irq_enable();
+
+	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
 
 	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
 		result / 1000 / 1000, result / 1000 % 1000);
 
-	return result * APIC_DIVISOR / HZ;
-}
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
 
-static unsigned int calibration_result;
+	calibration_result = result / HZ;
+}
 
 void __init setup_boot_APIC_clock (void)
 {
-	if (disable_apic_timer) { 
-		printk(KERN_INFO "Disabling APIC timer\n"); 
-		return; 
-	} 
+	/*
+	 * The local apic timer can be disabled via the kernel commandline.
+	 * Register the lapic timer as a dummy clock event source on SMP
+	 * systems, so the broadcast mechanism is used. On UP systems simply
+	 * ignore it.
+	 */
+	if (disable_apic_timer) {
+		printk(KERN_INFO "Disabling APIC timer\n");
+		/* No broadcast on UP ! */
+		if (num_possible_cpus() > 1)
+			setup_APIC_timer();
+		return;
+	}
 
 	printk(KERN_INFO "Using local APIC timer interrupts.\n");
-	using_apic_timer = 1;
-
-	local_irq_disable();
+	calibrate_APIC_clock();
 
-	calibration_result = calibrate_APIC_clock();
 	/*
-	 * Now set up the timer for real.
+	 * If nmi_watchdog is set to IO_APIC, we need the
+	 * PIT/HPET going.  Otherwise register lapic as a dummy
+	 * device.
 	 */
-	setup_APIC_timer(calibration_result);
+	if (nmi_watchdog != NMI_IO_APIC)
+		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+	else
+		printk(KERN_WARNING "APIC timer registered as dummy,"
+		       " due to nmi_watchdog=1!\n");
 
-	local_irq_enable();
+	setup_APIC_timer();
 }
 
 void __cpuinit setup_secondary_APIC_clock(void)
 {
-	local_irq_disable(); /* FIXME: Do we need this? --RR */
-	setup_APIC_timer(calibration_result);
-	local_irq_enable();
+	setup_APIC_timer();
 }
 
-void disable_APIC_timer(void)
-{
-	if (using_apic_timer) {
-		unsigned long v;
-
-		v = apic_read(APIC_LVTT);
-		/*
-		 * When an illegal vector value (0-15) is written to an LVT
-		 * entry and delivery mode is Fixed, the APIC may signal an
-		 * illegal vector error, with out regard to whether the mask
-		 * bit is set or whether an interrupt is actually seen on input.
-		 *
-		 * Boot sequence might call this function when the LVTT has
-		 * '0' vector value. So make sure vector field is set to
-		 * valid value.
-		 */
-		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-		apic_write(APIC_LVTT, v);
-	}
-}
-
-void enable_APIC_timer(void)
-{
-	int cpu = smp_processor_id();
-
-	if (using_apic_timer &&
-	    !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) {
-		unsigned long v;
-
-		v = apic_read(APIC_LVTT);
-		apic_write(APIC_LVTT, v & ~APIC_LVT_MASKED);
-	}
-}
-
-void switch_APIC_timer_to_ipi(void *cpumask)
-{
-	cpumask_t mask = *(cpumask_t *)cpumask;
-	int cpu = smp_processor_id();
-
-	if (cpu_isset(cpu, mask) &&
-	    !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) {
-		disable_APIC_timer();
-		cpu_set(cpu, timer_interrupt_broadcast_ipi_mask);
-	}
-}
-EXPORT_SYMBOL(switch_APIC_timer_to_ipi);
-
-void smp_send_timer_broadcast_ipi(void)
-{
-	int cpu = smp_processor_id();
-	cpumask_t mask;
-
-	cpus_and(mask, cpu_online_map, timer_interrupt_broadcast_ipi_mask);
-
-	if (cpu_isset(cpu, mask)) {
-		cpu_clear(cpu, mask);
-		add_pda(apic_timer_irqs, 1);
-		smp_local_timer_interrupt();
-	}
-
-	if (!cpus_empty(mask)) {
-		send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
-	}
-}
-
-void switch_ipi_to_APIC_timer(void *cpumask)
-{
-	cpumask_t mask = *(cpumask_t *)cpumask;
-	int cpu = smp_processor_id();
-
-	if (cpu_isset(cpu, mask) &&
-	    cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) {
-		cpu_clear(cpu, timer_interrupt_broadcast_ipi_mask);
-		enable_APIC_timer();
-	}
-}
-EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
-
 int setup_profiling_timer(unsigned int multiplier)
 {
 	return -EINVAL;
 }
 
-void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
-			    unsigned char msg_type, unsigned char mask)
+void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
+			     unsigned char msg_type, unsigned char mask)
 {
 	unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
 	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
 	apic_write(reg, v);
 }
 
-#undef APIC_DIVISOR
-
 /*
  * Local timer interrupt handler. It does both profiling and
  * process statistics/rescheduling.
@@ -1012,22 +988,34 @@ void setup_APIC_extened_lvt(unsigned cha
 
 void smp_local_timer_interrupt(void)
 {
-	profile_tick(CPU_PROFILING);
-#ifdef CONFIG_SMP
-	update_process_times(user_mode(get_irq_regs()));
-#endif
-	if (apic_runs_main_timer > 1 && smp_processor_id() == boot_cpu_id)
-		main_timer_handler();
+	int cpu = smp_processor_id();
+	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+
 	/*
-	 * We take the 'long' return path, and there every subsystem
-	 * grabs the appropriate locks (kernel lock/ irq lock).
-	 *
-	 * We might want to decouple profiling from the 'long path',
-	 * and do the profiling totally in assembly.
+	 * Normally we should not be here till LAPIC has been initialized but
+	 * in some cases like kdump, its possible that there is a pending LAPIC
+	 * timer interrupt from previous kernel's context and is delivered in
+	 * new kernel the moment interrupts are enabled.
 	 *
-	 * Currently this isn't too much of an issue (performance wise),
-	 * we can take more than 100K local irqs per second on a 100 MHz P5.
+	 * Interrupts are enabled early and LAPIC is setup much later, hence
+	 * its possible that when we get here evt->event_handler is NULL.
+	 * Check for event_handler being NULL and discard the interrupt as
+	 * spurious.
+	 */
+	if (!evt->event_handler) {
+		printk(KERN_WARNING
+		       "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+		/* Switch it off */
+		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
+		return;
+	}
+
+	/*
+	 * the NMI deadlock-detector uses this.
 	 */
+	add_pda(apic_timer_irqs, 1);
+
+	evt->event_handler(evt);
 }
 
 /*
@@ -1043,11 +1031,6 @@ void smp_apic_timer_interrupt(struct pt_
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	/*
-	 * the NMI deadlock-detector uses this.
-	 */
-	add_pda(apic_timer_irqs, 1);
-
-	/*
 	 * NOTE! We'd better ACK the irq immediately,
 	 * because timer handling can be slow.
 	 */
@@ -1127,21 +1110,6 @@ asmlinkage void smp_spurious_interrupt(v
 	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
 	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
 		ack_APIC_irq();
-
-#if 0
-	static unsigned long last_warning; 
-	static unsigned long skipped; 
-
-	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
-	if (time_before(last_warning+30*HZ,jiffies)) { 
-		printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n",
-		       smp_processor_id(), skipped);
-		last_warning = jiffies; 
-		skipped = 0;
-	} else { 
-		skipped++; 
-	} 
-#endif 
 	irq_exit();
 }
 
@@ -1173,11 +1141,11 @@ asmlinkage void smp_error_interrupt(void
 	   7: Illegal register address
 	*/
 	printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
-	        smp_processor_id(), v , v1);
+		smp_processor_id(), v , v1);
 	irq_exit();
 }
 
-int disable_apic; 
+int disable_apic;
 
 /*
  * This initializes the IO-APIC and APIC hardware if this is
@@ -1185,11 +1153,11 @@ int disable_apic; 
  */
 int __init APIC_init_uniprocessor (void)
 {
-	if (disable_apic) { 
+	if (disable_apic) {
 		printk(KERN_INFO "Apic disabled\n");
-		return -1; 
+		return -1;
 	}
-	if (!cpu_has_apic) { 
+	if (!cpu_has_apic) {
 		disable_apic = 1;
 		printk(KERN_INFO "Apic disabled by BIOS\n");
 		return -1;
@@ -1211,8 +1179,8 @@ int __init APIC_init_uniprocessor (void)
 	return 0;
 }
 
-static __init int setup_disableapic(char *str) 
-{ 
+static __init int setup_disableapic(char *str)
+{
 	disable_apic = 1;
 	clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
 	return 0;
@@ -1220,10 +1188,10 @@ static __init int setup_disableapic(char
 early_param("disableapic", setup_disableapic);
 
 /* same as disableapic, for compatibility */
-static __init int setup_nolapic(char *str) 
-{ 
+static __init int setup_nolapic(char *str)
+{
 	return setup_disableapic(str);
-} 
+}
 early_param("nolapic", setup_nolapic);
 
 static int __init parse_lapic_timer_c2_ok(char *arg)
@@ -1233,36 +1201,20 @@ static int __init parse_lapic_timer_c2_o
 }
 early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
 
-static __init int setup_noapictimer(char *str) 
-{ 
+static __init int setup_noapictimer(char *str)
+{
 	if (str[0] != ' ' && str[0] != 0)
 		return 0;
 	disable_apic_timer = 1;
 	return 1;
-} 
-
-static __init int setup_apicmaintimer(char *str)
-{
-	apic_runs_main_timer = 1;
-	nohpet = 1;
-	return 1;
-}
-__setup("apicmaintimer", setup_apicmaintimer);
-
-static __init int setup_noapicmaintimer(char *str)
-{
-	apic_runs_main_timer = -1;
-	return 1;
 }
-__setup("noapicmaintimer", setup_noapicmaintimer);
+__setup("noapictimer", setup_noapictimer);
 
 static __init int setup_apicpmtimer(char *s)
 {
 	apic_calibrate_pmtmr = 1;
 	notsc_setup(NULL);
-	return setup_apicmaintimer(NULL);
+	return 0;
 }
 __setup("apicpmtimer", setup_apicpmtimer);
 
-__setup("noapictimer", setup_noapictimer); 
-
Index: linux-2.6.22/arch/x86_64/kernel/mce_amd.c
===================================================================
--- linux-2.6.22.orig/arch/x86_64/kernel/mce_amd.c	2007-07-15 17:13:40.000000000 +0200
+++ linux-2.6.22/arch/x86_64/kernel/mce_amd.c	2007-07-15 17:13:49.000000000 +0200
@@ -157,9 +157,9 @@ void __cpuinit mce_amd_feature_init(stru
 			high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
 			wrmsr(address, low, high);
 
-			setup_APIC_extened_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
-					       THRESHOLD_APIC_VECTOR,
-					       K8_APIC_EXT_INT_MSG_FIX, 0);
+			setup_APIC_extended_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
+						THRESHOLD_APIC_VECTOR,
+						K8_APIC_EXT_INT_MSG_FIX, 0);
 
 			threshold_defaults.address = address;
 			threshold_restart_bank(&threshold_defaults, 0, 0);
Index: linux-2.6.22/include/asm-i386/hpet.h
===================================================================
--- linux-2.6.22.orig/include/asm-i386/hpet.h	2007-07-15 17:13:40.000000000 +0200
+++ linux-2.6.22/include/asm-i386/hpet.h	2007-07-15 17:13:53.000000000 +0200
@@ -4,117 +4,91 @@
 
 #ifdef CONFIG_HPET_TIMER
 
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/irq.h>
-#include <asm/msr.h>
-#include <asm/delay.h>
-#include <asm/mpspec.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-
-#include <linux/timex.h>
-
 /*
  * Documentation on HPET can be found at:
  *      http://www.intel.com/ial/home/sp/pcmmspec.htm
  *      ftp://download.intel.com/ial/home/sp/mmts098.pdf
  */
 
-#define HPET_MMAP_SIZE	1024
+#define HPET_MMAP_SIZE		1024
 
-#define HPET_ID		0x000
-#define HPET_PERIOD	0x004
-#define HPET_CFG	0x010
-#define HPET_STATUS	0x020
-#define HPET_COUNTER	0x0f0
-#define HPET_T0_CFG	0x100
-#define HPET_T0_CMP	0x108
-#define HPET_T0_ROUTE	0x110
-#define HPET_T1_CFG	0x120
-#define HPET_T1_CMP	0x128
-#define HPET_T1_ROUTE	0x130
-#define HPET_T2_CFG	0x140
-#define HPET_T2_CMP	0x148
-#define HPET_T2_ROUTE	0x150
-
-#define HPET_ID_LEGSUP	0x00008000
-#define HPET_ID_NUMBER	0x00001f00
-#define HPET_ID_REV	0x000000ff
+#define HPET_ID			0x000
+#define HPET_PERIOD		0x004
+#define HPET_CFG		0x010
+#define HPET_STATUS		0x020
+#define HPET_COUNTER		0x0f0
+#define HPET_T0_CFG		0x100
+#define HPET_T0_CMP		0x108
+#define HPET_T0_ROUTE		0x110
+#define HPET_T1_CFG		0x120
+#define HPET_T1_CMP		0x128
+#define HPET_T1_ROUTE		0x130
+#define HPET_T2_CFG		0x140
+#define HPET_T2_CMP		0x148
+#define HPET_T2_ROUTE		0x150
+
+#define HPET_ID_REV		0x000000ff
+#define HPET_ID_NUMBER		0x00001f00
+#define HPET_ID_64BIT		0x00002000
+#define HPET_ID_LEGSUP		0x00008000
+#define HPET_ID_VENDOR		0xffff0000
 #define	HPET_ID_NUMBER_SHIFT	8
+#define HPET_ID_VENDOR_SHIFT	16
 
-#define HPET_CFG_ENABLE	0x001
-#define HPET_CFG_LEGACY	0x002
+#define HPET_ID_VENDOR_8086	0x8086
+
+#define HPET_CFG_ENABLE		0x001
+#define HPET_CFG_LEGACY		0x002
 #define	HPET_LEGACY_8254	2
 #define	HPET_LEGACY_RTC		8
 
-#define HPET_TN_ENABLE		0x004
-#define HPET_TN_PERIODIC	0x008
-#define HPET_TN_PERIODIC_CAP	0x010
-#define HPET_TN_SETVAL		0x040
-#define HPET_TN_32BIT		0x100
-
-/* Use our own asm for 64 bit multiply/divide */
-#define ASM_MUL64_REG(eax_out,edx_out,reg_in,eax_in) 			\
-		__asm__ __volatile__("mull %2" 				\
-				:"=a" (eax_out), "=d" (edx_out) 	\
-				:"r" (reg_in), "0" (eax_in))
-
-#define ASM_DIV64_REG(eax_out,edx_out,reg_in,eax_in,edx_in) 		\
-		__asm__ __volatile__("divl %2" 				\
-				:"=a" (eax_out), "=d" (edx_out) 	\
-				:"r" (reg_in), "0" (eax_in), "1" (edx_in))
+#define HPET_TN_LEVEL		0x0002
+#define HPET_TN_ENABLE		0x0004
+#define HPET_TN_PERIODIC	0x0008
+#define HPET_TN_PERIODIC_CAP	0x0010
+#define HPET_TN_64BIT_CAP	0x0020
+#define HPET_TN_SETVAL		0x0040
+#define HPET_TN_32BIT		0x0100
+#define HPET_TN_ROUTE		0x3e00
+#define HPET_TN_FSB		0x4000
+#define HPET_TN_FSB_CAP		0x8000
+#define HPET_TN_ROUTE_SHIFT	9
 
-#define KERNEL_TICK_USEC 	(1000000UL/HZ)	/* tick value in microsec */
 /* Max HPET Period is 10^8 femto sec as in HPET spec */
-#define HPET_MAX_PERIOD (100000000UL)
+#define HPET_MAX_PERIOD		100000000UL
 /*
  * Min HPET period is 10^5 femto sec just for safety. If it is less than this,
  * then 32 bit HPET counter wrapsaround in less than 0.5 sec.
  */
-#define HPET_MIN_PERIOD (100000UL)
-#define HPET_TICK_RATE  (HZ * 100000UL)
+#define HPET_MIN_PERIOD		100000UL
 
-extern unsigned long hpet_address;	/* hpet memory map physical address */
+/* hpet memory map physical address */
+extern unsigned long hpet_address;
+extern unsigned long force_hpet_address;
 extern int is_hpet_enabled(void);
-
-#ifdef CONFIG_X86_64
-extern unsigned long hpet_tick;	/* hpet clks count per tick */
-extern int hpet_use_timer;
-extern int hpet_rtc_timer_init(void);
 extern int hpet_enable(void);
-extern int is_hpet_capable(void);
-extern int hpet_readl(unsigned long a);
-#else
-extern int hpet_enable(void);
-#endif
+extern unsigned long hpet_readl(unsigned long a);
+extern void force_hpet_resume(void);
 
 #ifdef CONFIG_HPET_EMULATE_RTC
+
+#include <linux/interrupt.h>
+
 extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask);
 extern int hpet_set_rtc_irq_bit(unsigned long bit_mask);
-extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec);
+extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
+			       unsigned char sec);
 extern int hpet_set_periodic_freq(unsigned long freq);
 extern int hpet_rtc_dropped_irq(void);
 extern int hpet_rtc_timer_init(void);
 extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id);
+
 #endif /* CONFIG_HPET_EMULATE_RTC */
 
 #else
 
 static inline int hpet_enable(void) { return 0; }
+static inline unsigned long hpet_readl(unsigned long a) { return 0; }
 
 #endif /* CONFIG_HPET_TIMER */
 #endif /* _I386_HPET_H */
Index: linux-2.6.22/arch/x86_64/kernel/i8259.c
===================================================================
--- linux-2.6.22.orig/arch/x86_64/kernel/i8259.c	2007-07-15 17:13:40.000000000 +0200
+++ linux-2.6.22/arch/x86_64/kernel/i8259.c	2007-07-15 17:13:51.000000000 +0200
@@ -460,47 +460,6 @@ void invalidate_interrupt6(void);
 void invalidate_interrupt7(void);
 void thermal_interrupt(void);
 void threshold_interrupt(void);
-void i8254_timer_resume(void);
-
-static void setup_timer_hardware(void)
-{
-	outb_p(0x34,0x43);		/* binary, mode 2, LSB/MSB, ch 0 */
-	udelay(10);
-	outb_p(LATCH & 0xff , 0x40);	/* LSB */
-	udelay(10);
-	outb(LATCH >> 8 , 0x40);	/* MSB */
-}
-
-static int timer_resume(struct sys_device *dev)
-{
-	setup_timer_hardware();
-	return 0;
-}
-
-void i8254_timer_resume(void)
-{
-	setup_timer_hardware();
-}
-
-static struct sysdev_class timer_sysclass = {
-	set_kset_name("timer_pit"),
-	.resume		= timer_resume,
-};
-
-static struct sys_device device_timer = {
-	.id		= 0,
-	.cls		= &timer_sysclass,
-};
-
-static int __init init_timer_sysfs(void)
-{
-	int error = sysdev_class_register(&timer_sysclass);
-	if (!error)
-		error = sysdev_register(&device_timer);
-	return error;
-}
-
-device_initcall(init_timer_sysfs);
 
 void __init init_IRQ(void)
 {
@@ -551,12 +510,6 @@ void __init init_IRQ(void)
 	set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 
-	/*
-	 * Set the clock to HZ Hz, we already have a valid
-	 * vector now:
-	 */
-	setup_timer_hardware();
-
 	if (!acpi_ioapic)
 		setup_irq(2, &irq2);
 }
Index: linux-2.6.22/include/asm-i386/tsc.h
===================================================================
--- linux-2.6.22.orig/include/asm-i386/tsc.h	2007-07-15 17:13:40.000000000 +0200
+++ linux-2.6.22/include/asm-i386/tsc.h	2007-07-15 17:13:49.000000000 +0200
@@ -71,4 +71,8 @@ extern void init_tsc_clocksource(void);
 extern void check_tsc_sync_source(int cpu);
 extern void check_tsc_sync_target(void);
 
+#ifdef CONFIG_X86_64
+extern void tsc_calibrate(void);
+#endif
+
 #endif
Index: linux-2.6.22/arch/x86_64/kernel/process.c
===================================================================
--- linux-2.6.22.orig/arch/x86_64/kernel/process.c	2007-07-15 17:13:38.000000000 +0200
+++ linux-2.6.22/arch/x86_64/kernel/process.c	2007-07-15 17:13:51.000000000 +0200
@@ -37,6 +37,7 @@
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
+#include <linux/tick.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -207,6 +208,8 @@ void cpu_idle (void)
 			if (__get_cpu_var(cpu_idle_state))
 				__get_cpu_var(cpu_idle_state) = 0;
 
+			tick_nohz_stop_sched_tick();
+
 			rmb();
 			idle = pm_idle;
 			if (!idle)
@@ -227,6 +230,7 @@ void cpu_idle (void)
 			__exit_idle();
 		}
 
+		tick_nohz_restart_sched_tick();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
Index: linux-2.6.22/arch/x86_64/kernel/Makefile
===================================================================
--- linux-2.6.22.orig/arch/x86_64/kernel/Makefile	2007-07-15 17:13:38.000000000 +0200
+++ linux-2.6.22/arch/x86_64/kernel/Makefile	2007-07-15 17:13:51.000000000 +0200
@@ -9,7 +9,7 @@ obj-y	:= process.o signal.o entry.o trap
 		x8664_ksyms.o i387.o syscall.o vsyscall.o \
 		setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
 		pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \
-		perfctr-watchdog.o
+		perfctr-watchdog.o i8253.o
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_X86_MCE)		+= mce.o therm_throt.o
@@ -48,6 +48,8 @@ obj-y				+= pcspeaker.o
 
 CFLAGS_vsyscall.o		:= $(PROFILING) -g0
 
+i8253-y				+= ../../i386/kernel/i8253.o
+hpet-y				+= ../../i386/kernel/hpet.o
 therm_throt-y                   += ../../i386/kernel/cpu/mcheck/therm_throt.o
 bootflag-y			+= ../../i386/kernel/bootflag.o
 legacy_serial-y			+= ../../i386/kernel/legacy_serial.o
Index: linux-2.6.22/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux-2.6.22.orig/arch/x86_64/kernel/smpboot.c	2007-07-15 17:13:38.000000000 +0200
+++ linux-2.6.22/arch/x86_64/kernel/smpboot.c	2007-07-15 17:13:51.000000000 +0200
@@ -223,8 +223,6 @@ void __cpuinit smp_callin(void)
 	local_irq_disable();
 	Dprintk("Stack at about %p\n",&cpuid);
 
-	disable_APIC_timer();
-
 	/*
 	 * Save our processor parameters
 	 */
@@ -348,8 +346,6 @@ void __cpuinit start_secondary(void)
 		enable_8259A_irq(0);
 	}
 
-	enable_APIC_timer();
-
 	/*
 	 * The sibling maps must be set before turing the online map on for
 	 * this cpu
Index: linux-2.6.22/include/asm-x86_64/proto.h
===================================================================
--- linux-2.6.22.orig/include/asm-x86_64/proto.h	2007-07-15 17:13:38.000000000 +0200
+++ linux-2.6.22/include/asm-x86_64/proto.h	2007-07-15 17:13:51.000000000 +0200
@@ -51,9 +51,6 @@ extern void reserve_bootmem_generic(unsi
 
 extern void load_gs_index(unsigned gs);
 
-extern void stop_timer_interrupt(void);
-extern void main_timer_handler(void);
-
 extern unsigned long end_pfn_map; 
 
 extern void show_trace(struct task_struct *, struct pt_regs *, unsigned long * rsp);
@@ -110,14 +107,10 @@ extern int timer_over_8254;
 
 extern int gsi_irq_sharing(int gsi);
 
-extern void smp_local_timer_interrupt(void);
-
 extern int force_mwait;
 
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
 
-void i8254_timer_resume(void);
-
 #define round_up(x,y) (((x) + (y) - 1) & ~((y)-1))
 #define round_down(x,y) ((x) & ~((y)-1))
 
Index: linux-2.6.22/include/linux/jiffies.h
===================================================================
--- linux-2.6.22.orig/include/linux/jiffies.h	2007-07-15 17:13:38.000000000 +0200
+++ linux-2.6.22/include/linux/jiffies.h	2007-07-15 17:13:52.000000000 +0200
@@ -36,8 +36,6 @@
 /* LATCH is used in the interval timer and ftape setup. */
 #define LATCH  ((CLOCK_TICK_RATE + HZ/2) / HZ)	/* For divider */
 
-#define LATCH_HPET ((HPET_TICK_RATE + HZ/2) / HZ)
-
 /* Suppose we want to devide two numbers NOM and DEN: NOM/DEN, the we can
  * improve accuracy by shifting LSH bits, hence calculating:
  *     (NOM << LSH) / DEN
@@ -53,13 +51,9 @@
 /* HZ is the requested value. ACTHZ is actual HZ ("<< 8" is for accuracy) */
 #define ACTHZ (SH_DIV (CLOCK_TICK_RATE, LATCH, 8))
 
-#define ACTHZ_HPET (SH_DIV (HPET_TICK_RATE, LATCH_HPET, 8))
-
 /* TICK_NSEC is the time between ticks in nsec assuming real ACTHZ */
 #define TICK_NSEC (SH_DIV (1000000UL * 1000, ACTHZ, 8))
 
-#define TICK_NSEC_HPET (SH_DIV(1000000UL * 1000, ACTHZ_HPET, 8))
-
 /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
 #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
 
Index: linux-2.6.22/include/asm-x86_64/vsyscall.h
===================================================================
--- linux-2.6.22.orig/include/asm-x86_64/vsyscall.h	2007-07-15 17:13:37.000000000 +0200
+++ linux-2.6.22/include/asm-x86_64/vsyscall.h	2007-07-15 17:13:52.000000000 +0200
@@ -27,9 +27,6 @@ enum vsyscall_num {
 #define VGETCPU_RDTSCP	1
 #define VGETCPU_LSL	2
 
-#define hpet_readl(a)           readl((const void __iomem *)fix_to_virt(FIX_HPET_BASE) + a)
-#define hpet_writel(d,a)        writel(d, (void __iomem *)fix_to_virt(FIX_HPET_BASE) + a)
-
 extern int __vgetcpu_mode;
 extern volatile unsigned long __jiffies;
 
Index: linux-2.6.22/arch/i386/kernel/quirks.c
===================================================================
--- linux-2.6.22.orig/arch/i386/kernel/quirks.c	2007-07-15 17:13:37.000000000 +0200
+++ linux-2.6.22/arch/i386/kernel/quirks.c	2007-07-15 17:13:53.000000000 +0200
@@ -4,6 +4,8 @@
 #include <linux/pci.h>
 #include <linux/irq.h>
 
+#include <asm/hpet.h>
+
 #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
 
 static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
@@ -47,3 +49,280 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IN
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7525_MCH,	quirk_intel_irqbalance);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7520_MCH,	quirk_intel_irqbalance);
 #endif
+
+#if defined(CONFIG_HPET_TIMER)
+unsigned long force_hpet_address;
+
+static enum {
+	NONE_FORCE_HPET_RESUME,
+	OLD_ICH_FORCE_HPET_RESUME,
+	ICH_FORCE_HPET_RESUME,
+	VT8237_FORCE_HPET_RESUME
+} force_hpet_resume_type;
+
+static void __iomem *rcba_base;
+
+static void ich_force_hpet_resume(void)
+{
+	u32 val;
+
+	if (!force_hpet_address)
+		return;
+
+	if (rcba_base == NULL)
+		BUG();
+
+	/* read the Function Disable register, dword mode only */
+	val = readl(rcba_base + 0x3404);
+	if (!(val & 0x80)) {
+		/* HPET disabled in HPTC. Trying to enable */
+		writel(val | 0x80, rcba_base + 0x3404);
+	}
+
+	val = readl(rcba_base + 0x3404);
+	if (!(val & 0x80))
+		BUG();
+	else
+		printk(KERN_DEBUG "Force enabled HPET at resume\n");
+
+	return;
+}
+
+static void ich_force_enable_hpet(struct pci_dev *dev)
+{
+	u32 val;
+	u32 uninitialized_var(rcba);
+	int err = 0;
+
+	if (hpet_address || force_hpet_address)
+		return;
+
+	pci_read_config_dword(dev, 0xF0, &rcba);
+	rcba &= 0xFFFFC000;
+	if (rcba == 0) {
+		printk(KERN_DEBUG "RCBA disabled. Cannot force enable HPET\n");
+		return;
+	}
+
+	/* use bits 31:14, 16 kB aligned */
+	rcba_base = ioremap_nocache(rcba, 0x4000);
+	if (rcba_base == NULL) {
+		printk(KERN_DEBUG "ioremap failed. Cannot force enable HPET\n");
+		return;
+	}
+
+	/* read the Function Disable register, dword mode only */
+	val = readl(rcba_base + 0x3404);
+
+	if (val & 0x80) {
+		/* HPET is enabled in HPTC. Just not reported by BIOS */
+		val = val & 0x3;
+		force_hpet_address = 0xFED00000 | (val << 12);
+		printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
+			       force_hpet_address);
+		iounmap(rcba_base);
+		return;
+	}
+
+	/* HPET disabled in HPTC. Trying to enable */
+	writel(val | 0x80, rcba_base + 0x3404);
+
+	val = readl(rcba_base + 0x3404);
+	if (!(val & 0x80)) {
+		err = 1;
+	} else {
+		val = val & 0x3;
+		force_hpet_address = 0xFED00000 | (val << 12);
+	}
+
+	if (err) {
+		force_hpet_address = 0;
+		iounmap(rcba_base);
+		printk(KERN_DEBUG "Failed to force enable HPET\n");
+	} else {
+		force_hpet_resume_type = ICH_FORCE_HPET_RESUME;
+		printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
+			       force_hpet_address);
+	}
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0,
+                         ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1,
+                         ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0,
+                         ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1,
+                         ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31,
+                         ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1,
+                         ich_force_enable_hpet);
+
+
+static struct pci_dev *cached_dev;
+
+static void old_ich_force_hpet_resume(void)
+{
+	u32 val;
+	u32 uninitialized_var(gen_cntl);
+
+	if (!force_hpet_address || !cached_dev)
+		return;
+
+	pci_read_config_dword(cached_dev, 0xD0, &gen_cntl);
+	gen_cntl &= (~(0x7 << 15));
+	gen_cntl |= (0x4 << 15);
+
+	pci_write_config_dword(cached_dev, 0xD0, gen_cntl);
+	pci_read_config_dword(cached_dev, 0xD0, &gen_cntl);
+	val = gen_cntl >> 15;
+	val &= 0x7;
+	if (val == 0x4)
+		printk(KERN_DEBUG "Force enabled HPET at resume\n");
+	else
+		BUG();
+}
+
+static void old_ich_force_enable_hpet(struct pci_dev *dev)
+{
+	u32 val;
+	u32 uninitialized_var(gen_cntl);
+
+	if (hpet_address || force_hpet_address)
+		return;
+
+	pci_read_config_dword(dev, 0xD0, &gen_cntl);
+	/*
+	 * Bit 17 is HPET enable bit.
+	 * Bit 16:15 control the HPET base address.
+	 */
+	val = gen_cntl >> 15;
+	val &= 0x7;
+	if (val & 0x4) {
+		val &= 0x3;
+		force_hpet_address = 0xFED00000 | (val << 12);
+		printk(KERN_DEBUG "HPET at base address 0x%lx\n",
+			       force_hpet_address);
+		return;
+	}
+
+	/*
+	 * HPET is disabled. Trying enabling at FED00000 and check
+	 * whether it sticks
+	 */
+	gen_cntl &= (~(0x7 << 15));
+	gen_cntl |= (0x4 << 15);
+	pci_write_config_dword(dev, 0xD0, gen_cntl);
+
+	pci_read_config_dword(dev, 0xD0, &gen_cntl);
+
+	val = gen_cntl >> 15;
+	val &= 0x7;
+	if (val & 0x4) {
+		/* HPET is enabled in HPTC. Just not reported by BIOS */
+		val &= 0x3;
+		force_hpet_address = 0xFED00000 | (val << 12);
+		printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
+			       force_hpet_address);
+		cached_dev = dev;
+		force_hpet_resume_type = OLD_ICH_FORCE_HPET_RESUME;
+		return;
+	}
+
+	printk(KERN_DEBUG "Failed to force enable HPET\n");
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
+                         old_ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
+                         old_ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0,
+                         old_ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12,
+                         old_ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0,
+                         old_ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12,
+                         old_ich_force_enable_hpet);
+
+
+static void vt8237_force_hpet_resume(void)
+{
+	u32 val;
+
+	if (!force_hpet_address || !cached_dev)
+		return;
+
+	val = 0xfed00000 | 0x80;
+	pci_write_config_dword(cached_dev, 0x68, val);
+
+	pci_read_config_dword(cached_dev, 0x68, &val);
+	if (val & 0x80)
+		printk(KERN_DEBUG "Force enabled HPET at resume\n");
+	else
+		BUG();
+}
+
+static void vt8237_force_enable_hpet(struct pci_dev *dev)
+{
+	u32 val;
+
+	if (hpet_address || force_hpet_address)
+		return;
+
+	pci_read_config_dword(dev, 0x68, &val);
+	/*
+	 * Bit 7 is HPET enable bit.
+	 * Bit 31:10 is HPET base address (contrary to what datasheet claims)
+	 */
+	if (val & 0x80) {
+		force_hpet_address = (val & ~0x3ff);
+		printk(KERN_DEBUG "HPET at base address 0x%lx\n",
+			       force_hpet_address);
+		return;
+	}
+
+	/*
+	 * HPET is disabled. Trying enabling at FED00000 and check
+	 * whether it sticks
+	 */
+	val = 0xfed00000 | 0x80;
+	pci_write_config_dword(dev, 0x68, val);
+
+	pci_read_config_dword(dev, 0x68, &val);
+	if (val & 0x80) {
+		force_hpet_address = (val & ~0x3ff);
+		printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
+			       force_hpet_address);
+		cached_dev = dev;
+		force_hpet_resume_type = VT8237_FORCE_HPET_RESUME;
+		return;
+	}
+
+	printk(KERN_DEBUG "Failed to force enable HPET\n");
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235,
+			 vt8237_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
+			 vt8237_force_enable_hpet);
+
+void force_hpet_resume(void)
+{
+	switch (force_hpet_resume_type) {
+	    case ICH_FORCE_HPET_RESUME:
+		return ich_force_hpet_resume();
+
+	    case OLD_ICH_FORCE_HPET_RESUME:
+		return old_ich_force_hpet_resume();
+
+	    case VT8237_FORCE_HPET_RESUME:
+		return vt8237_force_hpet_resume();
+
+	    default:
+		break;
+	}
+}
+
+#endif
Index: linux-2.6.22/include/linux/pci_ids.h
===================================================================
--- linux-2.6.22.orig/include/linux/pci_ids.h	2007-07-15 17:13:37.000000000 +0200
+++ linux-2.6.22/include/linux/pci_ids.h	2007-07-15 17:13:53.000000000 +0200
@@ -2221,6 +2221,7 @@
 #define PCI_DEVICE_ID_INTEL_82801EB_5	0x24d5
 #define PCI_DEVICE_ID_INTEL_82801EB_6	0x24d6
 #define PCI_DEVICE_ID_INTEL_82801EB_11	0x24db
+#define PCI_DEVICE_ID_INTEL_82801EB_12	0x24dc
 #define PCI_DEVICE_ID_INTEL_82801EB_13	0x24dd
 #define PCI_DEVICE_ID_INTEL_ESB_1	0x25a1
 #define PCI_DEVICE_ID_INTEL_ESB_2	0x25a2

